changeset 8703:b5cb079ecaa4

Merge
author ewendeli
date Sun, 03 Feb 2013 22:43:57 +0100
parents 20b605466ccb (diff) 77443715ec55 (current diff)
children 1cabf9c80e84
files src/share/vm/interpreter/linkResolver.cpp src/share/vm/opto/loopTransform.cpp src/share/vm/runtime/arguments.cpp src/share/vm/runtime/globals.hpp
diffstat 773 files changed, 40992 insertions(+), 28197 deletions(-) [+]
line wrap: on
line diff
--- a/.hgtags	Mon Nov 05 17:03:33 2012 -0500
+++ b/.hgtags	Sun Feb 03 22:43:57 2013 +0100
@@ -288,3 +288,27 @@
 4547dc71db765276e027b0c2780b724bae0a07d3 jdk8-b61
 d0337c31c8be7716369b4e7c3bd5f352983c6a06 hs25-b06
 dccd40de8db1fa96f186e6179907818d75320440 jdk8-b62
+dc16fe422c535ecd4e9f80fb814a1bb9704da6f5 hs25-b07
+acabb5c282f59be7e3238920b2ea06b684ab68f7 jdk8-b63
+8cb93eadfb6dcab88d91b8e2cd3e0e07d0ac4048 hs25-b08
+5920f72e799c8133d1066c4a62fa1fafcb729966 jdk8-b64
+b4ee7b773144a88af8b6b92e4384dea82cb948d8 hs25-b09
+0f7290a03b24bd562583fa325d3566c21c51fb94 jdk8-b65
+cfc5309f03b7bd6c1567618b63cf1fc74c0f2a8f hs25-b10
+01684f7fee1b86222be69bc23841ec2a4416696c jdk8-b66
+b61d9c88b759d1594b8af1655598e8fa00393672 hs25-b11
+25bdce771bb3a7ae9825261a284d292cda700122 jdk8-b67
+a35a72dd2e1255239d31f796f9f693e49b36bc9f hs25-b12
+121aa71316af6cd877bf455e775fa3fdbcdd4b65 jdk8-b68
+b6c9c0109a608eedbb6b868d260952990e3c91fe hs25-b13
+cb8a4e04bc8c104de8a2f67463c7e31232bf8d68 jdk8-b69
+990bbd393c239d95310ccc38094e57923bbf1d4a hs25-b14
+e94068d4ff52849c8aa0786a53a59b63d1312a39 jdk8-b70
+0847210f85480bf3848dc90bc2ab23c0a4791b55 jdk8-b71
+d5cb5830f570d1304ea4b196dde672a291b55f29 jdk8-b72
+1e129851479e4f5df439109fca2c7be1f1613522 hs25-b15
+11619f33cd683c2f1d6ef72f1c6ff3dacf5a9f1c jdk8-b73
+70c89bd6b895a10d25ca70e08093c09ff2005fda hs25-b16
+1a3e54283c54aaa8b3437813e8507fbdc966e5b6 jdk8-b74
+b4391649e91ea8d37f66317a03d6d2573a93d10d hs25-b17
+6778d0b1659323a506ca47600ca29a9d9f8b383d jdk8-b75
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/agent/doc/c2replay.html	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,41 @@
+<html>
+<head>
+<title>
+C2 Replay
+</title>
+</head>
+<body>
+
+<h1>C2 compiler replay</h1>
+<p>
+The C2 compiler replay is a function to repeat the compiling process from a crashed java process in compiled method<br>
+This function only exists in debug version of VM
+</p>
+<h2>Usage</h2>
+<pre> 
+First, use SA to attach to the core file, if suceeded, do
+       clhsdb>dumpreplaydata <address> | -a | <thread_id> [> replay.txt]
+       create file replay.txt, address is address of Method, or nmethod(CodeBlob)
+       clhsdb>buildreplayjars [all | boot | app]
+       create files:
+         all:
+           app.jar, boot.jar
+         boot:
+           boot.jar
+         app:
+           app.jar
+       exit SA now.
+Second, use the obtained replay text file, replay.txt and jar files, app.jar and boot.jar, using debug version of java
+       java -Xbootclasspath/p:boot.jar -cp app.jar -XX:ReplayDataFile=<datafile> -XX:+ReplayCompiles ....
+       This will replay the compiling process.
+
+       With ReplayCompiles, the replay will recompile all the methods in app.jar, and in boot.jar to emulate the process in java app.
+
+notes:
+       1) Most time, we don't need the boot.jar which is the classes loaded from JDK. It will be only modified when an agent(JVMDI) is running and modifies the classes.
+       2) If encounter error as "<flag>" not found, that means the SA is using a VMStructs which is different from the one with corefile. In this case, SA has a utility tool vmstructsdump which is located at agent/src/os/<os>/proc/<os_platform>
+
+       Use this tool to dump VM type library:
+       vmstructsdump libjvm.so > <type_name>.db
+
+       set env SA_TYPEDB=<type_name>.db (refer different shell for set envs)
--- a/agent/doc/clhsdb.html	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/doc/clhsdb.html	Sun Feb 03 22:43:57 2013 +0100
@@ -37,12 +37,19 @@
 Available commands:
   assert true | false <font color="red">turn on/off asserts in SA code</font>
   attach pid | exec core  <font color="red">attach SA to a process or core</font>
+  buildreplayjars [all | boot | app] <font color="red">build jars for replay, boot.jar for bootclasses, app.jar for application classes</font>
   class name <font color="red">find a Java class from debuggee and print oop</font>
   classes <font color="red">print all loaded Java classes with Klass*</font>
   detach <font color="red">detach SA from current target</font>
   dis address [ length ]  <font color="red">disassemble (sparc/x86) specified number of instructions from given address</font>
+  dissemble address <font color="red">disassemble nmethod</font>
+  dumpcfg -a | id <font color="red">Dump the PhaseCFG for every compiler thread that has one live</font>
   dumpclass { address | name } [ directory ] <font color="red">dump .class file for given Klass* or class name</font>
+  dumpcodecache <font color="red">dump codecache contents</font>
   dumpheap [ file ] <font color="red">dump heap in hprof binary format</font>
+  dumpideal -a | id <font color="red">dump ideal graph like debug flag -XX:+PrintIdeal</font>
+  dumpilt -a | id <font color="red">dump inline tree for C2 compilation</font>
+  dumpreplaydata <address> | -a | <thread_id> [>replay.txt] <font color="red">dump replay data into a file</font>
   echo [ true | false ] <font color="red">turn on/off command echo mode</font>
   examine [ address/count ] | [ address,address] <font color="red">show contents of memory from given address</font>
   field [ type [ name fieldtype isStatic offset address ] ] <font color="red">print info about a field of HotSpot type</font>
@@ -51,29 +58,35 @@
   help [ command ] <font color="red">print help message for all commands or just given command</font>
   history <font color="red">show command history. usual !command-number syntax works.</font>
   inspect expression <font color="red">inspect a given oop</font>
+  intConstant [ name [ value ] ] <font color="red">print out hotspot integer constant(s)</font>
   jdis address <font color="red">show bytecode disassembly of a given Method*</font>
   jhisto <font color="red">show Java heap histogram</font>
   jseval script <font color="red">evaluate a given string as JavaScript code</font>
   jsload file <font color="red">load and evaluate a JavaScript file</font>
   jstack [-v] <font color="red">show Java stack trace of all Java threads. -v is verbose mode</font>
   livenmethods <font color="red">show all live nmethods</font>
+  longConstant [ name [ value ] ] <font color="red">print out hotspot long constant(s)s</font>
   mem address [ length ] <font color="red">show contents of memory -- also shows closest ELF/COFF symbol if found</font>
   pmap <font color="red">show Solaris pmap-like output</font>
   print expression <font color="red">print given Klass*, Method* or arbitrary address</font>
   printas type expression <font color="red">print given address as given HotSpot type. eg. print JavaThread &lt;address&gt;</font>
+  printmdo -a | expression <font color="red">print method data oop</font>
   printstatics [ type ] <font color="red">print static fields of given HotSpot type (or all types if none specified)</font>
   pstack [-v] <font color="red">show mixed mode stack trace for all Java, non-Java threads. -v is verbose mode</font>
   quit <font color="red">quit CLHSDB tool</font>
   reattach <font color="red">detach and re-attach SA to current target</font>
+  revptrs  <font color="red">find liveness of oops</font>
   scanoops start end [ type ] <font color="red">scan a Oop from given start to end address</font>
   search [ heap | codecache | threads ] value <font color="red">search a value in heap or codecache or threads</font>
   source filename <font color="red">load and execute CLHSDB commands from given file</font>
   symbol name <font color="red">show address of a given ELF/COFF symbol</font>
   sysprops <font color="red">show all Java System properties</font>
+  thread id <font color="red">show thread of id</font>
   threads <font color="red">show all Java threads</font>
   tokenize ...
   type [ type [ name super isOop isInteger isUnsigned size ] ] <font color="red">show info. on HotSpot type</font>
   universe <font color="red">print gc universe</font>
+  vmstructsdump <font color="red">dump hotspot type library in text</font>
   verbose true | false <font color="red">turn on/off verbose mode</font>
   versioncheck [ true | false ] <font color="red">turn on/off debuggee VM version check</font>
   whatis address <font color="red">print info about any arbitrary address</font>
@@ -114,5 +127,11 @@
 </code>
 </pre>
 
+<h3>C2 Compilation Replay</h3>
+<p>
+When a java process crashes in compiled method, usually a core file is saved.
+The C2 replay function can reproduce the compiling process in the core.
+<a href="c2replay.html">c2replay.html</a>
+
 </body>
 </html>
--- a/agent/doc/index.html	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/doc/index.html	Sun Feb 03 22:43:57 2013 +0100
@@ -220,6 +220,12 @@
 </tr>
 </table>
 
+<h3>C2 Compilation Replay</h3>
+<p>
+When a java process crashes in compiled method, usually a core file is saved.
+The C2 replay function can reproduce the compiling process in the core.
+<a href="c2replay.html">c2replay.html</a>
+
 <h3>Debugging transported core dumps</h3>
 <p>
 When a core dump is moved from the machine where it was produced to a
--- a/agent/make/Makefile	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/make/Makefile	Sun Feb 03 22:43:57 2013 +0100
@@ -58,10 +58,8 @@
 sun.jvm.hotspot.debugger.cdbg.basic.amd64 \
 sun.jvm.hotspot.debugger.cdbg.basic.x86 \
 sun.jvm.hotspot.debugger.dummy \
-sun.jvm.hotspot.debugger.ia64 \
 sun.jvm.hotspot.debugger.linux \
 sun.jvm.hotspot.debugger.linux.amd64 \
-sun.jvm.hotspot.debugger.linux.ia64 \
 sun.jvm.hotspot.debugger.linux.x86 \
 sun.jvm.hotspot.debugger.posix \
 sun.jvm.hotspot.debugger.posix.elf \
@@ -77,7 +75,6 @@
 sun.jvm.hotspot.debugger.win32.coff \
 sun.jvm.hotspot.debugger.windbg \
 sun.jvm.hotspot.debugger.windbg.amd64 \
-sun.jvm.hotspot.debugger.windbg.ia64 \
 sun.jvm.hotspot.debugger.windbg.x86 \
 sun.jvm.hotspot.debugger.x86 \
 sun.jvm.hotspot.gc_implementation \
@@ -97,10 +94,8 @@
 sun.jvm.hotspot.runtime.bsd \
 sun.jvm.hotspot.runtime.bsd_amd64 \
 sun.jvm.hotspot.runtime.bsd_x86 \
-sun.jvm.hotspot.runtime.ia64 \
 sun.jvm.hotspot.runtime.linux \
 sun.jvm.hotspot.runtime.linux_amd64 \
-sun.jvm.hotspot.runtime.linux_ia64 \
 sun.jvm.hotspot.runtime.linux_sparc \
 sun.jvm.hotspot.runtime.linux_x86 \
 sun.jvm.hotspot.runtime.posix \
@@ -109,7 +104,6 @@
 sun.jvm.hotspot.runtime.solaris_x86 \
 sun.jvm.hotspot.runtime.sparc \
 sun.jvm.hotspot.runtime.win32_amd64 \
-sun.jvm.hotspot.runtime.win32_ia64 \
 sun.jvm.hotspot.runtime.win32_x86 \
 sun.jvm.hotspot.runtime.x86 \
 sun.jvm.hotspot.tools \
@@ -152,7 +146,6 @@
 sun/jvm/hotspot/debugger/cdbg/basic/amd64/*.java \
 sun/jvm/hotspot/debugger/cdbg/basic/x86/*.java \
 sun/jvm/hotspot/debugger/dummy/*.java \
-sun/jvm/hotspot/debugger/ia64/*.java \
 sun/jvm/hotspot/debugger/linux/*.java \
 sun/jvm/hotspot/debugger/linux/x86/*.java \
 sun/jvm/hotspot/debugger/posix/*.java \
@@ -168,7 +161,6 @@
 sun/jvm/hotspot/debugger/sparc/*.java \
 sun/jvm/hotspot/debugger/win32/coff/*.java \
 sun/jvm/hotspot/debugger/windbg/*.java \
-sun/jvm/hotspot/debugger/windbg/ia64/*.java \
 sun/jvm/hotspot/debugger/windbg/x86/*.java \
 sun/jvm/hotspot/debugger/x86/*.java \
 sun/jvm/hotspot/gc_implementation/g1/*.java \
@@ -186,10 +178,8 @@
 sun/jvm/hotspot/runtime/bsd/*.java \
 sun/jvm/hotspot/runtime/bsd_amd64/*.java \
 sun/jvm/hotspot/runtime/bsd_x86/*.java \
-sun/jvm/hotspot/runtime/ia64/*.java \
 sun/jvm/hotspot/runtime/linux/*.java \
 sun/jvm/hotspot/runtime/linux_amd64/*.java \
-sun/jvm/hotspot/runtime/linux_ia64/*.java \
 sun/jvm/hotspot/runtime/linux_sparc/*.java \
 sun/jvm/hotspot/runtime/linux_x86/*.java \
 sun/jvm/hotspot/runtime/posix/*.java \
@@ -198,7 +188,6 @@
 sun/jvm/hotspot/runtime/solaris_x86/*.java \
 sun/jvm/hotspot/runtime/sparc/*.java \
 sun/jvm/hotspot/runtime/win32_amd64/*.java \
-sun/jvm/hotspot/runtime/win32_ia64/*.java \
 sun/jvm/hotspot/runtime/win32_x86/*.java \
 sun/jvm/hotspot/runtime/x86/*.java \
 sun/jvm/hotspot/tools/*.java \
@@ -258,6 +247,7 @@
 
 SA_PROPERTIES = $(OUTPUT_DIR)/sa.properties
 JAVAC = $(JDK_HOME)/bin/javac
+JAVA = $(JDK_HOME)/bin/java
 JAVADOC = $(JDK_HOME)/bin/javadoc
 RMIC = $(JDK_HOME)/bin/rmic
 
@@ -298,7 +288,7 @@
 
 .PHONY: natives
 natives:
-	cd ../src/os/`java -classpath $(OUTPUT_DIR) sun.jvm.hotspot.utilities.PlatformInfo`; $(MAKE) all
+	cd ../src/os/`$(JAVA) -classpath $(OUTPUT_DIR) sun.jvm.hotspot.utilities.PlatformInfo`; $(MAKE) all
 
 .PHONY: sa-jdi.jar
 sa-jdi.jar:
@@ -323,5 +313,5 @@
 
 clean::
 	rm -rf filelist
-	cd ../src/os/`java -classpath $(OUTPUT_DIR) sun.jvm.hotspot.utilities.PlatformInfo`; $(MAKE) clean
+	cd ../src/os/`$(JAVA) -classpath $(OUTPUT_DIR) sun.jvm.hotspot.utilities.PlatformInfo`; $(MAKE) clean
 	rm -rf $(BUILD_DIR)/*
--- a/agent/src/share/classes/sun/jvm/hotspot/CommandProcessor.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/CommandProcessor.java	Sun Feb 03 22:43:57 2013 +0100
@@ -33,6 +33,7 @@
 import sun.jvm.hotspot.types.Field;
 import sun.jvm.hotspot.HotSpotTypeDataBase;
 import sun.jvm.hotspot.types.basic.BasicType;
+import sun.jvm.hotspot.types.basic.BasicTypeDataBase;
 import sun.jvm.hotspot.types.CIntegerType;
 import sun.jvm.hotspot.code.*;
 import sun.jvm.hotspot.compiler.*;
@@ -448,6 +449,112 @@
                 }
             }
         },
+        new Command("dumpreplaydata", "dumpreplaydata { <address > | -a | <thread_id> }", false) {
+            // This is used to dump replay data from ciInstanceKlass, ciMethodData etc
+            // default file name is replay.txt, also if java crashes in compiler
+            // thread, this file will be dumped in error processing.
+            public void doit(Tokens t) {
+                if (t.countTokens() != 1) {
+                    usage();
+                    return;
+                }
+                String name = t.nextToken();
+                Address a = null;
+                try {
+                    a = VM.getVM().getDebugger().parseAddress(name);
+                } catch (NumberFormatException e) { }
+                if (a != null) {
+                    // only nmethod, Method, MethodData and InstanceKlass needed to
+                    // dump replay data
+
+                    CodeBlob cb = VM.getVM().getCodeCache().findBlob(a);
+                    if (cb != null && (cb instanceof NMethod)) {
+                        ((NMethod)cb).dumpReplayData(out);
+                        return;
+                    }
+                    // assume it is Metadata
+                    Metadata meta = Metadata.instantiateWrapperFor(a);
+                    if (meta != null) {
+                        meta.dumpReplayData(out);
+                    } else {
+                        usage();
+                        return;
+                    }
+                }
+                // Not an address
+                boolean all = name.equals("-a");
+                Threads threads = VM.getVM().getThreads();
+                for (JavaThread thread = threads.first(); thread != null; thread = thread.next()) {
+                    ByteArrayOutputStream bos = new ByteArrayOutputStream();
+                    thread.printThreadIDOn(new PrintStream(bos));
+                    if (all || bos.toString().equals(name)) {
+                        if (thread instanceof CompilerThread) {
+                            CompilerThread ct = (CompilerThread)thread;
+                            ciEnv env = ct.env();
+                            if (env != null) {
+                               env.dumpReplayData(out);
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        new Command("buildreplayjars", "buildreplayjars [ all | app | boot ]  | [ prefix ]", false) {
+            // This is used to dump jar files of all the classes
+            // loaded in the core.  Everything on the bootclasspath
+            // will go in boot.jar and everything else will go in
+            // app.jar.  Then the classes can be loaded by the replay
+            // jvm using -Xbootclasspath/p:boot.jar -cp app.jar. boot.jar usually
+            // not needed, unless changed by jvmti.
+            public void doit(Tokens t) {
+                int tcount = t.countTokens();
+                if (tcount > 2) {
+                    usage();
+                    return;
+                }
+                try {
+                   String prefix = "";
+                   String option = "all"; // default
+                   switch(tcount) {
+                       case 0:
+                           break;
+                       case 1:
+                           option = t.nextToken();
+                           if (!option.equalsIgnoreCase("all") && !option.equalsIgnoreCase("app") &&
+                               !option.equalsIgnoreCase("root")) {
+                              prefix = option;
+                              option = "all";
+                           }
+                           break;
+                       case 2:
+                           option = t.nextToken();
+                           prefix = t.nextToken();
+                           break;
+                       default:
+                           usage();
+                           return;
+                   }
+                   if (!option.equalsIgnoreCase("all") && !option.equalsIgnoreCase("app") &&
+                               !option.equalsIgnoreCase("boot")) {
+                       usage();
+                       return;
+                   }
+                   ClassDump cd = new ClassDump();
+                   if (option.equalsIgnoreCase("all") || option.equalsIgnoreCase("boot")) {
+                     cd.setClassFilter(new BootFilter());
+                     cd.setJarOutput(prefix + "boot.jar");
+                     cd.run();
+                   }
+                   if (option.equalsIgnoreCase("all") || option.equalsIgnoreCase("app")) {
+                     cd.setClassFilter(new NonBootFilter());
+                     cd.setJarOutput(prefix + "app.jar");
+                     cd.run();
+                   }
+                } catch (IOException ioe) {
+                   ioe.printStackTrace();
+                }
+            }
+        },
         new Command("findpc", "findpc address", false) {
             public void doit(Tokens t) {
                 if (t.countTokens() != 1) {
--- a/agent/src/share/classes/sun/jvm/hotspot/ci/ciArrayKlass.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/ci/ciArrayKlass.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/ci/ciBaseObject.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/ci/ciBaseObject.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
@@ -50,4 +50,8 @@
   public ciBaseObject(Address addr) {
     super(addr);
   }
+
+  public void dumpReplayData(PrintStream out) {
+    out.println("# Unknown ci type " + getAddress().getAddressAt(0));
+  }
 }
--- a/agent/src/share/classes/sun/jvm/hotspot/ci/ciConstant.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/ci/ciConstant.java	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
@@ -60,4 +60,8 @@
   public ciConstant(Address addr) {
     super(addr);
   }
+
+  public void dumpReplayData(PrintStream out) {
+    // Nothing to be done
+  }
 }
--- a/agent/src/share/classes/sun/jvm/hotspot/ci/ciEnv.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/ci/ciEnv.java	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
@@ -74,4 +74,29 @@
   public CompileTask task() {
     return new CompileTask(taskField.getValue(this.getAddress()));
   }
+
+  public void dumpReplayData(PrintStream out) {
+    out.println("JvmtiExport can_access_local_variables " +
+                (JvmtiExport.canAccessLocalVariables() ? '1' : '0'));
+    out.println("JvmtiExport can_hotswap_or_post_breakpoint " +
+                (JvmtiExport.canHotswapOrPostBreakpoint() ? '1' : '0'));
+    out.println("JvmtiExport can_post_on_exceptions " +
+                (JvmtiExport.canPostOnExceptions() ? '1' : '0'));
+
+    GrowableArray<ciMetadata> objects = factory().objects();
+    out.println("# " + objects.length() + " ciObject found");
+    for (int i = 0; i < objects.length(); i++) {
+      ciMetadata o = objects.at(i);
+      out.println("# ciMetadata" + i + " @ " + o);
+      o.dumpReplayData(out);
+    }
+    CompileTask task = task();
+    Method method = task.method();
+    int entryBci = task.osrBci();
+    Klass holder = method.getMethodHolder();
+    out.println("compile " + holder.getName().asString() + " " +
+                OopUtilities.escapeString(method.getName().asString()) + " " +
+                method.getSignature().asString() + " " +
+                entryBci);
+  }
 }
--- a/agent/src/share/classes/sun/jvm/hotspot/ci/ciField.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/ci/ciField.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/ci/ciInstance.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/ci/ciInstance.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/ci/ciInstanceKlass.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/ci/ciInstanceKlass.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
@@ -80,4 +80,84 @@
   public boolean isInitialized() {
     return initState() == CLASS_STATE_FULLY_INITIALIZED;
   }
+
+  public void dumpReplayData(PrintStream out) {
+    InstanceKlass ik = (InstanceKlass)getMetadata();
+    ConstantPool cp = ik.getConstants();
+
+    // Try to record related loaded classes
+    Klass sub = ik.getSubklassKlass();
+    while (sub != null) {
+      if (sub instanceof InstanceKlass) {
+        out.println("instanceKlass " + sub.getName().asString());
+      }
+      sub = sub.getNextSiblingKlass();
+    }
+
+    final int length = (int) cp.getLength();
+    out.print("ciInstanceKlass " + name() + " " + (isLinked() ? 1 : 0) + " " + (isInitialized() ? 1 : 0) + " " + length);
+    for (int index = 1; index < length; index++) {
+      out.print(" " + cp.getTags().at(index));
+    }
+    out.println();
+    if (isInitialized()) {
+      Field[] staticFields = ik.getStaticFields();
+      for (int i = 0; i < staticFields.length; i++) {
+        Field f = staticFields[i];
+        Oop mirror = ik.getJavaMirror();
+        if (f.isFinal() && !f.hasInitialValue()) {
+          out.print("staticfield " + name() + " " +
+                    OopUtilities.escapeString(f.getID().getName()) + " " +
+                    f.getFieldType().getSignature().asString() + " ");
+          if (f instanceof ByteField) {
+            ByteField bf = (ByteField)f;
+            out.println(bf.getValue(mirror));
+          } else if (f instanceof BooleanField) {
+            BooleanField bf = (BooleanField)f;
+            out.println(bf.getValue(mirror) ? 1 : 0);
+          } else if (f instanceof ShortField) {
+            ShortField bf = (ShortField)f;
+            out.println(bf.getValue(mirror));
+          } else if (f instanceof CharField) {
+            CharField bf = (CharField)f;
+            out.println(bf.getValue(mirror) & 0xffff);
+          } else if (f instanceof IntField) {
+            IntField bf = (IntField)f;
+            out.println(bf.getValue(mirror));
+          } else  if (f instanceof LongField) {
+            LongField bf = (LongField)f;
+            out.println(bf.getValue(mirror));
+          } else if (f instanceof FloatField) {
+            FloatField bf = (FloatField)f;
+            out.println(Float.floatToRawIntBits(bf.getValue(mirror)));
+          } else if (f instanceof DoubleField) {
+            DoubleField bf = (DoubleField)f;
+            out.println(Double.doubleToRawLongBits(bf.getValue(mirror)));
+          } else if (f instanceof OopField) {
+            OopField bf = (OopField)f;
+            Oop value = bf.getValue(mirror);
+            if (value == null) {
+              out.println("null");
+            } else if (value.isInstance()) {
+              Instance inst = (Instance)value;
+              if (inst.isA(SystemDictionary.getStringKlass())) {
+                out.println("\"" + OopUtilities.stringOopToEscapedString(inst) + "\"");
+              } else {
+                out.println(inst.getKlass().getName().asString());
+              }
+            } else if (value.isObjArray()) {
+              ObjArray oa = (ObjArray)value;
+              Klass ek = (ObjArrayKlass)oa.getKlass();
+              out.println(oa.getLength() + " " + ek.getName().asString());
+            } else if (value.isTypeArray()) {
+              TypeArray ta = (TypeArray)value;
+              out.println(ta.getLength());
+            } else {
+              out.println(value);
+            }
+          }
+        }
+      }
+    }
+  }
 }
--- a/agent/src/share/classes/sun/jvm/hotspot/ci/ciKlass.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/ci/ciKlass.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/ci/ciMetadata.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/ci/ciMetadata.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/ci/ciMethod.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/ci/ciMethod.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
@@ -88,4 +88,19 @@
     st.printf(" %s::%s", method.getMethodHolder().getName().asString().replace('/', '.'),
               method.getName().asString());
   }
+
+  public void dumpReplayData(PrintStream out) {
+      Method method = (Method)getMetadata();
+      NMethod nm = method.getNativeMethod();
+      Klass holder = method.getMethodHolder();
+      out.println("ciMethod " +
+                  holder.getName().asString() + " " +
+                  OopUtilities.escapeString(method.getName().asString()) + " " +
+                  method.getSignature().asString() + " " +
+                  method.getInvocationCounter() + " " +
+                  method.getBackedgeCounter() + " " +
+                  interpreterInvocationCount() + " " +
+                  interpreterThrowoutCount() + " " +
+                  instructionsSize());
+  }
 }
--- a/agent/src/share/classes/sun/jvm/hotspot/ci/ciMethodData.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/ci/ciMethodData.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
@@ -174,4 +174,52 @@
     }
   }
 
+  public void dumpReplayData(PrintStream out) {
+    MethodData mdo = (MethodData)getMetadata();
+    Method method = mdo.getMethod();
+    Klass holder = method.getMethodHolder();
+    out.print("ciMethodData " +
+              holder.getName().asString() + " " +
+              OopUtilities.escapeString(method.getName().asString()) + " " +
+              method.getSignature().asString() + " " +
+              state() + " " + currentMileage());
+    byte[] orig = orig();
+    out.print(" orig " + orig.length);
+    for (int i = 0; i < orig.length; i++) {
+      out.print(" " + (orig[i] & 0xff));
+    }
+
+    long[] data = data();
+    out.print(" data " +  data.length);
+    for (int i = 0; i < data.length; i++) {
+      out.print(" 0x" + Long.toHexString(data[i]));
+    }
+    int count = 0;
+    for (int round = 0; round < 2; round++) {
+      if (round == 1) out.print(" oops " + count);
+      ProfileData pdata = firstData();
+      for ( ; isValid(pdata); pdata = nextData(pdata)) {
+        if (pdata instanceof ciReceiverTypeData) {
+          ciReceiverTypeData vdata = (ciReceiverTypeData)pdata;
+          for (int i = 0; i < vdata.rowLimit(); i++) {
+            ciKlass k = vdata.receiverAt(i);
+            if (k != null) {
+              if (round == 0) count++;
+              else out.print(" " + ((vdata.dp() + vdata.cellOffset(vdata.receiverCellIndex(i))) / MethodData.cellSize) + " " + k.name());
+            }
+          }
+        } else if (pdata instanceof ciVirtualCallData) {
+          ciVirtualCallData vdata = (ciVirtualCallData)pdata;
+          for (int i = 0; i < vdata.rowLimit(); i++) {
+            ciKlass k = vdata.receiverAt(i);
+            if (k != null) {
+              if (round == 0) count++;
+              else out.print(" " + ((vdata.dp() + vdata.cellOffset(vdata.receiverCellIndex(i))) / MethodData.cellSize + " " + k.name()));
+            }
+          }
+        }
+      }
+    }
+    out.println();
+  }
 }
--- a/agent/src/share/classes/sun/jvm/hotspot/ci/ciObjArrayKlass.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/ci/ciObjArrayKlass.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/ci/ciObject.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/ci/ciObject.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/ci/ciObjectFactory.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/ci/ciObjectFactory.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/ci/ciReceiverTypeData.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/ci/ciReceiverTypeData.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/ci/ciSymbol.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/ci/ciSymbol.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/ci/ciType.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/ci/ciType.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/ci/ciTypeArrayKlass.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/ci/ciTypeArrayKlass.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/ci/ciVirtualCallData.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/ci/ciVirtualCallData.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/classfile/ClassLoaderData.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/classfile/ClassLoaderData.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/code/NMethod.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/code/NMethod.java	Sun Feb 03 22:43:57 2013 +0100
@@ -498,6 +498,42 @@
            method.getSignature().asString();
   }
 
+  public void dumpReplayData(PrintStream out) {
+    HashMap h = new HashMap();
+    for (int i = 1; i < getMetadataLength(); i++) {
+      Metadata meta = Metadata.instantiateWrapperFor(getMetadataAt(i));
+      System.err.println(meta);
+      if (h.get(meta) != null) continue;
+      h.put(meta, meta);
+      if (meta instanceof InstanceKlass) {
+        ((InstanceKlass)meta).dumpReplayData(out);
+      } else if (meta instanceof Method) {
+        ((Method)meta).dumpReplayData(out);
+        MethodData mdo = ((Method)meta).getMethodData();
+        if (mdo != null) {
+          mdo.dumpReplayData(out);
+        }
+      }
+    }
+    Method method = getMethod();
+    if (h.get(method) == null) {
+      method.dumpReplayData(out);
+      MethodData mdo = method.getMethodData();
+      if (mdo != null) {
+        mdo.dumpReplayData(out);
+      }
+    }
+    if (h.get(method.getMethodHolder()) == null) {
+      ((InstanceKlass)method.getMethodHolder()).dumpReplayData(out);
+    }
+    Klass holder = method.getMethodHolder();
+    out.println("compile " + holder.getName().asString() + " " +
+                OopUtilities.escapeString(method.getName().asString()) + " " +
+                method.getSignature().asString() + " " +
+                getEntryBCI());
+
+  }
+
   //--------------------------------------------------------------------------------
   // Internals only below this point
   //
--- a/agent/src/share/classes/sun/jvm/hotspot/compiler/CompileTask.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/compiler/CompileTask.java	Sun Feb 03 22:43:57 2013 +0100
@@ -56,7 +56,7 @@
   }
 
   public Method method() {
-    Address oh =  methodField.getValue(getAddress()).getAddressAt(0);
+    Address oh =  methodField.getValue(getAddress());
     return (Method)Metadata.instantiateWrapperFor(oh);
   }
 
--- a/agent/src/share/classes/sun/jvm/hotspot/interpreter/Bytecodes.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/interpreter/Bytecodes.java	Sun Feb 03 22:43:57 2013 +0100
@@ -272,9 +272,10 @@
   public static final int _fast_aldc            = 229;
   public static final int _fast_aldc_w          = 230;
   public static final int _return_register_finalizer = 231;
-  public static final int _shouldnotreachhere   = 232; // For debugging
+  public static final int _invokehandle         = 232;
+  public static final int _shouldnotreachhere   = 233; // For debugging
 
-  public static final int number_of_codes       = 233;
+  public static final int number_of_codes       = 234;
 
   // Flag bits derived from format strings, can_trap, can_rewrite, etc.:
   // semantic flags:
@@ -787,20 +788,22 @@
     def(_fast_aaccess_0      , "fast_aaccess_0"      , "b_JJ" , null    , BasicType.getTObject() ,  1, true , _aload_0        );
     def(_fast_faccess_0      , "fast_faccess_0"      , "b_JJ" , null    , BasicType.getTObject() ,  1, true , _aload_0        );
 
-    def(_fast_iload          , "fast_iload"          , "bi"   , null    , BasicType.getTInt()    ,  1, false, _iload);
-    def(_fast_iload2         , "fast_iload2"         , "bi_i" , null    , BasicType.getTInt()    ,  2, false, _iload);
-    def(_fast_icaload        , "fast_icaload"        , "bi_"  , null    , BasicType.getTInt()    ,  0, false, _iload);
+    def(_fast_iload          , "fast_iload"          , "bi"   , null    , BasicType.getTInt()    ,  1, false, _iload          );
+    def(_fast_iload2         , "fast_iload2"         , "bi_i" , null    , BasicType.getTInt()    ,  2, false, _iload          );
+    def(_fast_icaload        , "fast_icaload"        , "bi_"  , null    , BasicType.getTInt()    ,  0, false, _iload          );
 
     // Faster method invocation.
-    def(_fast_invokevfinal   , "fast_invokevfinal"   , "bJJ"  , null    , BasicType.getTIllegal(), -1, true, _invokevirtual);
+    def(_fast_invokevfinal   , "fast_invokevfinal"   , "bJJ"  , null    , BasicType.getTIllegal(), -1, true, _invokevirtual   );
 
     def(_fast_linearswitch   , "fast_linearswitch"   , ""     , null    , BasicType.getTVoid()   , -1, false, _lookupswitch   );
     def(_fast_binaryswitch   , "fast_binaryswitch"   , ""     , null    , BasicType.getTVoid()   , -1, false, _lookupswitch   );
+    def(_fast_aldc           , "fast_aldc"           , "bj"   , null    , BasicType.getTObject(),   1, true,  _ldc            );
+    def(_fast_aldc_w         , "fast_aldc_w"         , "bJJ"  , null    , BasicType.getTObject(),   1, true,  _ldc_w          );
 
     def(_return_register_finalizer, "return_register_finalizer", "b"    , null    , BasicType.getTVoid()   , 0, true, _return );
 
-    def(_fast_aldc           , "fast_aldc"           , "bj"   , null    , BasicType.getTObject(),   1, true,  _ldc   );
-    def(_fast_aldc_w         , "fast_aldc_w"         , "bJJ"  , null    , BasicType.getTObject(),   1, true,  _ldc_w );
+    // special handling of signature-polymorphic methods
+    def(_invokehandle        , "invokehandle"        , "bJJ"  , null    , BasicType.getTIllegal(), -1, true, _invokevirtual   );
 
     def(_shouldnotreachhere  , "_shouldnotreachhere" , "b"    , null    , BasicType.getTVoid()   ,  0, false);
 
--- a/agent/src/share/classes/sun/jvm/hotspot/memory/LoaderConstraintTable.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/memory/LoaderConstraintTable.java	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/BitData.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/BitData.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/ConstMethod.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/ConstMethod.java	Sun Feb 03 22:43:57 2013 +0100
@@ -48,6 +48,7 @@
   private static int HAS_CHECKED_EXCEPTIONS;
   private static int HAS_LOCALVARIABLE_TABLE;
   private static int HAS_EXCEPTION_TABLE;
+  private static int HAS_GENERIC_SIGNATURE;
 
   private static synchronized void initialize(TypeDataBase db) throws WrongTypeException {
     Type type                  = db.lookupType("ConstMethod");
@@ -60,13 +61,16 @@
     HAS_CHECKED_EXCEPTIONS     = db.lookupIntConstant("ConstMethod::_has_checked_exceptions").intValue();
     HAS_LOCALVARIABLE_TABLE   = db.lookupIntConstant("ConstMethod::_has_localvariable_table").intValue();
     HAS_EXCEPTION_TABLE       = db.lookupIntConstant("ConstMethod::_has_exception_table").intValue();
+    HAS_GENERIC_SIGNATURE     = db.lookupIntConstant("ConstMethod::_has_generic_signature").intValue();
 
     // Size of Java bytecodes allocated immediately after ConstMethod*.
     codeSize                   = new CIntField(type.getCIntegerField("_code_size"), 0);
     nameIndex                  = new CIntField(type.getCIntegerField("_name_index"), 0);
     signatureIndex             = new CIntField(type.getCIntegerField("_signature_index"), 0);
-    genericSignatureIndex      = new CIntField(type.getCIntegerField("_generic_signature_index"),0);
     idnum                      = new CIntField(type.getCIntegerField("_method_idnum"), 0);
+    maxStack                   = new CIntField(type.getCIntegerField("_max_stack"), 0);
+    maxLocals                  = new CIntField(type.getCIntegerField("_max_locals"), 0);
+    sizeOfParameters           = new CIntField(type.getCIntegerField("_size_of_parameters"), 0);
 
     // start of byte code
     bytecodeOffset = type.getSize();
@@ -92,8 +96,10 @@
   private static CIntField codeSize;
   private static CIntField nameIndex;
   private static CIntField signatureIndex;
-  private static CIntField genericSignatureIndex;
   private static CIntField idnum;
+  private static CIntField maxStack;
+  private static CIntField maxLocals;
+  private static CIntField sizeOfParameters;
 
   // start of bytecode
   private static long bytecodeOffset;
@@ -134,13 +140,29 @@
   }
 
   public long getGenericSignatureIndex() {
-    return genericSignatureIndex.getValue(this);
+    if (hasGenericSignature()) {
+      return getAddress().getCIntegerAt(offsetOfGenericSignatureIndex(), 2, true);
+    } else {
+      return 0;
+    }
   }
 
   public long getIdNum() {
     return idnum.getValue(this);
   }
 
+  public long getMaxStack() {
+    return maxStack.getValue(this);
+  }
+
+  public long getMaxLocals() {
+    return maxLocals.getValue(this);
+  }
+
+  public long getSizeOfParameters() {
+    return sizeOfParameters.getValue(this);
+  }
+
   public Symbol getName() {
     return getMethod().getName();
   }
@@ -235,8 +257,10 @@
       visitor.doCInt(codeSize, true);
       visitor.doCInt(nameIndex, true);
       visitor.doCInt(signatureIndex, true);
-      visitor.doCInt(genericSignatureIndex, true);
       visitor.doCInt(codeSize, true);
+      visitor.doCInt(maxStack, true);
+      visitor.doCInt(maxLocals, true);
+      visitor.doCInt(sizeOfParameters, true);
     }
 
   // Accessors
@@ -353,6 +377,10 @@
     return ret;
   }
 
+  private boolean hasGenericSignature() {
+    return (getFlags() & HAS_GENERIC_SIGNATURE) != 0;
+  }
+
 
   //---------------------------------------------------------------------------
   // Internals only below this point
@@ -377,8 +405,14 @@
     return getSize() * VM.getVM().getObjectHeap().getOopSize() - 2;
   }
 
+  // Offset of the generic signature index
+  private long offsetOfGenericSignatureIndex() {
+    return offsetOfLastU2Element();
+  }
+
   private long offsetOfCheckedExceptionsLength() {
-    return offsetOfLastU2Element();
+    return hasGenericSignature() ? offsetOfLastU2Element() - 2 :
+                                   offsetOfLastU2Element();
   }
 
   private int getCheckedExceptionsLength() {
@@ -431,7 +465,8 @@
     } else if (hasCheckedExceptions()) {
       return offsetOfCheckedExceptions() - 2;
     } else {
-      return offsetOfLastU2Element();
+      return hasGenericSignature() ? offsetOfLastU2Element() - 2 :
+                                     offsetOfLastU2Element();
     }
   }
 
@@ -460,7 +495,8 @@
     if (hasCheckedExceptions()) {
       return offsetOfCheckedExceptions() - 2;
     } else {
-      return offsetOfLastU2Element();
+      return hasGenericSignature() ? offsetOfLastU2Element() - 2 :
+                                     offsetOfLastU2Element();
     }
   }
 
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/ConstantPool.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/ConstantPool.java	Sun Feb 03 22:43:57 2013 +0100
@@ -121,7 +121,7 @@
     Address addr = cache.getValue(getAddress());
     return (ConstantPoolCache) VMObjectFactory.newObject(ConstantPoolCache.class, addr);
   }
-  public Klass             getPoolHolder() { return (Klass)             poolHolder.getValue(this); }
+  public InstanceKlass     getPoolHolder() { return (InstanceKlass)poolHolder.getValue(this); }
   public int               getLength()     { return (int)length.getValue(getAddress()); }
   public Oop               getResolvedReferences() {
     Address handle = resolvedReferences.getValue(getAddress());
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/ConstantPoolCache.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/ConstantPoolCache.java	Sun Feb 03 22:43:57 2013 +0100
@@ -86,7 +86,7 @@
 
 
   public void printValueOn(PrintStream tty) {
-    tty.print("ConstantPoolCache for " + getConstants().getPoolHolder().getName().asString());
+    tty.print("ConstantPoolCache for " + getConstants().getPoolHolder().getName().asString() + " address = " + getAddress() + " offset = " + baseOffset);
   }
 
   public int getLength() {
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/Field.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/Field.java	Sun Feb 03 22:43:57 2013 +0100
@@ -110,6 +110,8 @@
   public Symbol getSignature() { return signature; }
   public Symbol getGenericSignature() { return genericSignature; }
 
+  public boolean hasInitialValue()           { return holder.getFieldInitialValueIndex(fieldIndex) != 0;    }
+
   //
   // Following acccessors are for named, non-VM fields only
   //
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/InstanceKlass.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/InstanceKlass.java	Sun Feb 03 22:43:57 2013 +0100
@@ -52,6 +52,9 @@
   private static int LOW_OFFSET;
   private static int HIGH_OFFSET;
   private static int FIELD_SLOTS;
+  private static short FIELDINFO_TAG_SIZE;
+  private static short FIELDINFO_TAG_MASK;
+  private static short FIELDINFO_TAG_OFFSET;
 
   // ClassState constants
   private static int CLASS_STATE_ALLOCATED;
@@ -96,9 +99,13 @@
     NAME_INDEX_OFFSET              = db.lookupIntConstant("FieldInfo::name_index_offset").intValue();
     SIGNATURE_INDEX_OFFSET         = db.lookupIntConstant("FieldInfo::signature_index_offset").intValue();
     INITVAL_INDEX_OFFSET           = db.lookupIntConstant("FieldInfo::initval_index_offset").intValue();
-    LOW_OFFSET                     = db.lookupIntConstant("FieldInfo::low_offset").intValue();
-    HIGH_OFFSET                    = db.lookupIntConstant("FieldInfo::high_offset").intValue();
+    LOW_OFFSET                     = db.lookupIntConstant("FieldInfo::low_packed_offset").intValue();
+    HIGH_OFFSET                    = db.lookupIntConstant("FieldInfo::high_packed_offset").intValue();
     FIELD_SLOTS                    = db.lookupIntConstant("FieldInfo::field_slots").intValue();
+    FIELDINFO_TAG_SIZE             = db.lookupIntConstant("FIELDINFO_TAG_SIZE").shortValue();
+    FIELDINFO_TAG_MASK             = db.lookupIntConstant("FIELDINFO_TAG_MASK").shortValue();
+    FIELDINFO_TAG_OFFSET           = db.lookupIntConstant("FIELDINFO_TAG_OFFSET").shortValue();
+
     // read ClassState constants
     CLASS_STATE_ALLOCATED = db.lookupIntConstant("InstanceKlass::allocated").intValue();
     CLASS_STATE_LOADED = db.lookupIntConstant("InstanceKlass::loaded").intValue();
@@ -278,7 +285,7 @@
   }
 
   public short getFieldGenericSignatureIndex(int index) {
-     int len = getFields().length();
+    // int len = getFields().length();
     int allFieldsCount = getAllFieldsCount();
     int generic_signature_slot = allFieldsCount * FIELD_SLOTS;
     for (int i = 0; i < allFieldsCount; i++) {
@@ -314,8 +321,12 @@
 
   public int getFieldOffset(int index) {
     U2Array fields = getFields();
-    return VM.getVM().buildIntFromShorts(fields.at(index * FIELD_SLOTS + LOW_OFFSET),
-                                         fields.at(index * FIELD_SLOTS + HIGH_OFFSET));
+    short lo = fields.at(index * FIELD_SLOTS + LOW_OFFSET);
+    short hi = fields.at(index * FIELD_SLOTS + HIGH_OFFSET);
+    if ((lo & FIELDINFO_TAG_MASK) == FIELDINFO_TAG_OFFSET) {
+      return VM.getVM().buildIntFromShorts(lo, hi) >> FIELDINFO_TAG_SIZE;
+    }
+    throw new RuntimeException("should not reach here");
   }
 
   // Accessors for declared fields
@@ -325,7 +336,7 @@
   public KlassArray   getTransitiveInterfaces() { return new KlassArray(transitiveInterfaces.getValue(getAddress())); }
   public int       getJavaFieldsCount()     { return                (int) javaFieldsCount.getValue(this); }
   public int       getAllFieldsCount()      {
-     int len = getFields().length();
+    int len = getFields().length();
     int allFieldsCount = 0;
     for (; allFieldsCount*FIELD_SLOTS < len; allFieldsCount++) {
       short flags = getFieldAccessFlags(allFieldsCount);
@@ -581,6 +592,19 @@
     }
   }
 
+  public Field[] getStaticFields() {
+    U2Array fields = getFields();
+    int length = getJavaFieldsCount();
+    ArrayList result = new ArrayList();
+    for (int index = 0; index < length; index++) {
+      Field f = newField(index);
+      if (f.isStatic()) {
+        result.add(f);
+      }
+    }
+    return (Field[])result.toArray(new Field[result.size()]);
+  }
+
   public void iterateNonStaticFields(OopVisitor visitor, Oop obj) {
     if (getSuper() != null) {
       ((InstanceKlass) getSuper()).iterateNonStaticFields(visitor, obj);
@@ -979,4 +1003,84 @@
     }
     return -1;
   }
+
+  public void dumpReplayData(PrintStream out) {
+    ConstantPool cp = getConstants();
+
+    // Try to record related loaded classes
+    Klass sub = getSubklassKlass();
+    while (sub != null) {
+        if (sub instanceof InstanceKlass) {
+            out.println("instanceKlass " + sub.getName().asString());
+        }
+        sub = sub.getNextSiblingKlass();
+    }
+
+    final int length = (int) cp.getLength();
+    out.print("ciInstanceKlass " + getName().asString() + " " + (isLinked() ? 1 : 0) + " " + (isInitialized() ? 1 : 0) + " " + length);
+    for (int index = 1; index < length; index++) {
+      out.print(" " + cp.getTags().at(index));
+    }
+    out.println();
+    if (isInitialized()) {
+      Field[] staticFields = getStaticFields();
+      for (int i = 0; i < staticFields.length; i++) {
+        Field f = staticFields[i];
+        Oop mirror = getJavaMirror();
+        if (f.isFinal() && !f.hasInitialValue()) {
+          out.print("staticfield " + getName().asString() + " " +
+                    OopUtilities.escapeString(f.getID().getName()) + " " +
+                    f.getFieldType().getSignature().asString() + " ");
+          if (f instanceof ByteField) {
+            ByteField bf = (ByteField)f;
+            out.println(bf.getValue(mirror));
+          } else if (f instanceof BooleanField) {
+            BooleanField bf = (BooleanField)f;
+            out.println(bf.getValue(mirror) ? 1 : 0);
+          } else if (f instanceof ShortField) {
+            ShortField bf = (ShortField)f;
+            out.println(bf.getValue(mirror));
+          } else if (f instanceof CharField) {
+            CharField bf = (CharField)f;
+            out.println(bf.getValue(mirror) & 0xffff);
+          } else if (f instanceof IntField) {
+            IntField bf = (IntField)f;
+            out.println(bf.getValue(mirror));
+          } else  if (f instanceof LongField) {
+            LongField bf = (LongField)f;
+            out.println(bf.getValue(mirror));
+          } else if (f instanceof FloatField) {
+            FloatField bf = (FloatField)f;
+            out.println(Float.floatToRawIntBits(bf.getValue(mirror)));
+          } else if (f instanceof DoubleField) {
+            DoubleField bf = (DoubleField)f;
+            out.println(Double.doubleToRawLongBits(bf.getValue(mirror)));
+          } else if (f instanceof OopField) {
+            OopField bf = (OopField)f;
+
+            Oop value = bf.getValue(mirror);
+            if (value == null) {
+              out.println("null");
+            } else if (value.isInstance()) {
+              Instance inst = (Instance)value;
+              if (inst.isA(SystemDictionary.getStringKlass())) {
+                out.println("\"" + OopUtilities.stringOopToEscapedString(inst) + "\"");
+              } else {
+                out.println(inst.getKlass().getName().asString());
+              }
+            } else if (value.isObjArray()) {
+              ObjArray oa = (ObjArray)value;
+              Klass ek = (ObjArrayKlass)oa.getKlass();
+              out.println(oa.getLength() + " " + ek.getName().asString());
+            } else if (value.isTypeArray()) {
+              TypeArray ta = (TypeArray)value;
+              out.println(ta.getLength());
+            } else {
+              out.println(value);
+            }
+          }
+        }
+      }
+    }
+  }
 }
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/Metadata.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/Metadata.java	Sun Feb 03 22:43:57 2013 +0100
@@ -79,4 +79,7 @@
   }
 
   abstract public void printValueOn(PrintStream tty);
+  public void dumpReplayData(PrintStream out) {
+      out.println("# Unknown Metadata");
+  }
 }
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/Method.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/Method.java	Sun Feb 03 22:43:57 2013 +0100
@@ -50,9 +50,6 @@
     constMethod                = type.getAddressField("_constMethod");
     methodData                 = type.getAddressField("_method_data");
     methodSize                 = new CIntField(type.getCIntegerField("_method_size"), 0);
-    maxStack                   = new CIntField(type.getCIntegerField("_max_stack"), 0);
-    maxLocals                  = new CIntField(type.getCIntegerField("_max_locals"), 0);
-    sizeOfParameters           = new CIntField(type.getCIntegerField("_size_of_parameters"), 0);
     accessFlags                = new CIntField(type.getCIntegerField("_access_flags"), 0);
     code                       = type.getAddressField("_code");
     vtableIndex                = new CIntField(type.getCIntegerField("_vtable_index"), 0);
@@ -84,9 +81,6 @@
   private static AddressField  constMethod;
   private static AddressField  methodData;
   private static CIntField methodSize;
-  private static CIntField maxStack;
-  private static CIntField maxLocals;
-  private static CIntField sizeOfParameters;
   private static CIntField accessFlags;
   private static CIntField vtableIndex;
   private static CIntField invocationCounter;
@@ -135,9 +129,9 @@
   }
   /** WARNING: this is in words, not useful in this system; use getObjectSize() instead */
   public long         getMethodSize()                 { return                methodSize.getValue(this);        }
-  public long         getMaxStack()                   { return                maxStack.getValue(this);          }
-  public long         getMaxLocals()                  { return                maxLocals.getValue(this);         }
-  public long         getSizeOfParameters()           { return                sizeOfParameters.getValue(this);  }
+  public long         getMaxStack()                   { return                getConstMethod().getMaxStack();   }
+  public long         getMaxLocals()                  { return                getConstMethod().getMaxLocals();         }
+  public long         getSizeOfParameters()           { return                getConstMethod().getSizeOfParameters();  }
   public long         getNameIndex()                  { return                getConstMethod().getNameIndex();  }
   public long         getSignatureIndex()             { return            getConstMethod().getSignatureIndex(); }
   public long         getGenericSignatureIndex()      { return     getConstMethod().getGenericSignatureIndex(); }
@@ -177,7 +171,7 @@
       bci. It is required that there is currently a bytecode at this
       bci. */
   public int getOrigBytecodeAt(int bci) {
-    BreakpointInfo bp = ((InstanceKlass) getMethodHolder()).getBreakpoints();
+    BreakpointInfo bp = getMethodHolder().getBreakpoints();
     for (; bp != null; bp = bp.getNext()) {
       if (bp.match(this, bci)) {
         return bp.getOrigBytecode();
@@ -238,7 +232,7 @@
   }
 
   // Method holder (the Klass holding this method)
-  public Klass   getMethodHolder()  { return getConstants().getPoolHolder();                           }
+  public InstanceKlass   getMethodHolder()  { return getConstants().getPoolHolder();                   }
 
   // Access flags
   public boolean isPublic()         { return getAccessFlagsObj().isPublic();                           }
@@ -284,9 +278,6 @@
 
   public void iterateFields(MetadataVisitor visitor) {
       visitor.doCInt(methodSize, true);
-      visitor.doCInt(maxStack, true);
-      visitor.doCInt(maxLocals, true);
-      visitor.doCInt(sizeOfParameters, true);
       visitor.doCInt(accessFlags, true);
     }
 
@@ -358,6 +349,25 @@
     buf.append(")");
     return buf.toString().replace('/', '.');
   }
+
+  public void dumpReplayData(PrintStream out) {
+      NMethod nm = getNativeMethod();
+      int code_size = 0;
+      if (nm != null) {
+        code_size = (int)nm.codeEnd().minus(nm.getVerifiedEntryPoint());
+      }
+      Klass holder = getMethodHolder();
+      out.println("ciMethod " +
+                  holder.getName().asString() + " " +
+                  OopUtilities.escapeString(getName().asString()) + " " +
+                  getSignature().asString() + " " +
+                  getInvocationCounter() + " " +
+                  getBackedgeCounter() + " " +
+                  interpreterInvocationCount() + " " +
+                  interpreterThrowoutCount() + " " +
+                  code_size);
+  }
+
   public int interpreterThrowoutCount() {
     return (int) interpreterThrowoutCountField.getValue(this);
   }
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/MethodData.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/MethodData.java	Sun Feb 03 22:43:57 2013 +0100
@@ -332,4 +332,59 @@
   public int currentMileage() {
     return 20000;
   }
+
+  public void dumpReplayData(PrintStream out) {
+    Method method = getMethod();
+    Klass holder = method.getMethodHolder();
+    out.print("ciMethodData " +
+              holder.getName().asString() + " " +
+              OopUtilities.escapeString(method.getName().asString()) + " " +
+              method.getSignature().asString() + " " +
+              "2" + " " +
+              currentMileage());
+    byte[] orig = orig();
+    out.print(" orig " + orig.length);
+    for (int i = 0; i < orig.length; i++) {
+      out.print(" " + (orig[i] & 0xff));
+    }
+
+    long[] data = data();
+    out.print(" data " +  data.length);
+    for (int i = 0; i < data.length; i++) {
+      out.print(" 0x" + Long.toHexString(data[i]));
+    }
+    int count = 0;
+    for (int round = 0; round < 2; round++) {
+      if (round == 1) out.print(" oops " + count);
+      ProfileData pdata = firstData();
+      for ( ; isValid(pdata); pdata = nextData(pdata)) {
+        if (pdata instanceof ReceiverTypeData) {
+          ReceiverTypeData vdata = (ReceiverTypeData)pdata;
+          for (int i = 0; i < vdata.rowLimit(); i++) {
+            Klass k = vdata.receiver(i);
+            if (k != null) {
+              if (round == 0) count++;
+              else out.print(" " +
+                             (dpToDi(vdata.dp() +
+                              vdata.cellOffset(vdata.receiverCellIndex(i))) / cellSize) + " " +
+                             k.getName().asString());
+            }
+          }
+        } else if (pdata instanceof VirtualCallData) {
+          VirtualCallData vdata = (VirtualCallData)pdata;
+          for (int i = 0; i < vdata.rowLimit(); i++) {
+            Klass k = vdata.receiver(i);
+            if (k != null) {
+              if (round == 0) count++;
+              else out.print(" " +
+                             (dpToDi(vdata.dp() +
+                              vdata.cellOffset(vdata.receiverCellIndex(i))) / cellSize) + " " +
+                             k.getName().asString());
+            }
+          }
+        }
+      }
+    }
+    out.println();
+  }
 }
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/ProfileData.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/ProfileData.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/RetData.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/RetData.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/Block.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/Block.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/Block_Array.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/Block_Array.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/Block_List.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/Block_List.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/CallDynamicJavaNode.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/CallDynamicJavaNode.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/CallJavaNode.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/CallJavaNode.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/CallNode.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/CallNode.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/CallRuntimeNode.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/CallRuntimeNode.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/CallStaticJavaNode.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/CallStaticJavaNode.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/Compile.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/Compile.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/HaltNode.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/HaltNode.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/InlineTree.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/InlineTree.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/JVMState.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/JVMState.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/LoopNode.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/LoopNode.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/MachCallJavaNode.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/MachCallJavaNode.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/MachCallNode.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/MachCallNode.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/MachCallRuntimeNode.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/MachCallRuntimeNode.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/MachCallStaticJavaNode.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/MachCallStaticJavaNode.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/MachIfNode.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/MachIfNode.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/MachNode.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/MachNode.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/MachReturnNode.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/MachReturnNode.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/MachSafePointNode.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/MachSafePointNode.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/MultiNode.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/MultiNode.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/Node.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/Node.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/Node_Array.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/Node_Array.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/Node_List.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/Node_List.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/Phase.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/Phase.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/PhaseCFG.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/PhaseCFG.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/PhaseRegAlloc.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/PhaseRegAlloc.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/PhiNode.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/PhiNode.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/ProjNode.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/ProjNode.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/RegionNode.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/RegionNode.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/RootNode.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/RootNode.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/SafePointNode.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/SafePointNode.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/TypeNode.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/TypeNode.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/prims/JvmtiExport.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/prims/JvmtiExport.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/Bytes.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/Bytes.java	Sun Feb 03 22:43:57 2013 +0100
@@ -30,24 +30,10 @@
 /** Encapsulates some byte-swapping operations defined in the VM */
 
 public class Bytes {
-  // swap if client platform is different from server's.
   private boolean swap;
 
   public Bytes(MachineDescription machDesc) {
-    String cpu = PlatformInfo.getCPU();
-    if (cpu.equals("sparc")) {
-      if (machDesc.isBigEndian()) {
-        swap = false;
-      } else {
-        swap = true;
-      }
-    } else { // intel
-      if (machDesc.isBigEndian()) {
-        swap = true;
-      } else {
-        swap = false;
-      }
-    }
+    swap = !machDesc.isBigEndian();
   }
 
   /** Should only swap if the hardware's underlying byte order is
--- a/agent/src/share/classes/sun/jvm/hotspot/tools/jcore/ByteCodeRewriter.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/tools/jcore/ByteCodeRewriter.java	Sun Feb 03 22:43:57 2013 +0100
@@ -29,6 +29,11 @@
 import sun.jvm.hotspot.utilities.*;
 import sun.jvm.hotspot.debugger.*;
 import sun.jvm.hotspot.runtime.*;
+import java.security.AccessController;
+import java.security.PrivilegedAction;
+import java.security.AccessControlContext;
+import java.security.PrivilegedExceptionAction;
+import java.security.PrivilegedActionException;
 
 public class ByteCodeRewriter
 {
@@ -38,8 +43,20 @@
     private byte[] code;
     private Bytes  bytes;
 
-    public static final boolean DEBUG = false;
     private static final int jintSize = 4;
+    public static final boolean DEBUG;
+
+    static {
+        String debug =  (String) AccessController.doPrivileged(
+            new PrivilegedAction() {
+                public Object run() {
+                    return System.getProperty("sun.jvm.hotspot.tools.jcore.ByteCodeRewriter.DEBUG");
+                }
+            }
+        );
+        DEBUG = (debug != null ? debug.equalsIgnoreCase("true") : false);
+    }
+
 
     protected void debugMessage(String message) {
         System.out.println(message);
@@ -54,6 +71,18 @@
 
     }
 
+    protected short getConstantPoolIndexFromRefMap(int rawcode, int bci) {
+        int refIndex;
+        String fmt = Bytecodes.format(rawcode);
+        switch (fmt.length()) {
+            case 2: refIndex = 0xFF & method.getBytecodeByteArg(bci); break;
+            case 3: refIndex = 0xFFFF & bytes.swapShort(method.getBytecodeShortArg(bci)); break;
+            default: throw new IllegalArgumentException();
+        }
+
+        return (short)cpool.objectToCPIndex(refIndex);
+     }
+
     protected short getConstantPoolIndex(int rawcode, int bci) {
        // get ConstantPool index from ConstantPoolCacheIndex at given bci
        String fmt = Bytecodes.format(rawcode);
@@ -95,6 +124,12 @@
         int hotspotcode = Bytecodes._illegal;
         int len = 0;
 
+        if (DEBUG) {
+            String msg = method.getMethodHolder().getName().asString() + "." +
+                         method.getName().asString() +
+                         method.getSignature().asString();
+            debugMessage(msg);
+        }
         for (int bci = 0; bci < code.length;) {
             hotspotcode = Bytecodes.codeAt(method, bci);
             bytecode = Bytecodes.javaCode(hotspotcode);
@@ -133,15 +168,15 @@
 
                 case Bytecodes._ldc_w:
                     if (hotspotcode != bytecode) {
-                        // fast_aldc_w puts constant in CP cache
-                        cpoolIndex = getConstantPoolIndex(hotspotcode, bci + 1);
+                        // fast_aldc_w puts constant in reference map
+                        cpoolIndex = getConstantPoolIndexFromRefMap(hotspotcode, bci + 1);
                         writeShort(code, bci + 1, cpoolIndex);
                     }
                     break;
                 case Bytecodes._ldc:
                     if (hotspotcode != bytecode) {
-                        // fast_aldc puts constant in CP cache
-                        cpoolIndex = getConstantPoolIndex(hotspotcode, bci + 1);
+                        // fast_aldc puts constant in reference map
+                        cpoolIndex = getConstantPoolIndexFromRefMap(hotspotcode, bci + 1);
                         code[bci + 1] = (byte)(cpoolIndex);
                     }
                     break;
--- a/agent/src/share/classes/sun/jvm/hotspot/utilities/GenericGrowableArray.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/utilities/GenericGrowableArray.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/classes/sun/jvm/hotspot/utilities/GrowableArray.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/utilities/GrowableArray.java	Sun Feb 03 22:43:57 2013 +0100
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
--- a/agent/src/share/native/sadis.c	Mon Nov 05 17:03:33 2012 -0500
+++ b/agent/src/share/native/sadis.c	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright 2012, Oracle and/or its affiliates. All Rights Reserved.
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
--- a/make/Makefile	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/Makefile	Sun Feb 03 22:43:57 2013 +0100
@@ -453,14 +453,30 @@
     ifeq ($(JVM_VARIANT_ZEROSHARK), true)
         $(EXPORT_JRE_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(SHARK_DIR)/%.$(LIBRARY_SUFFIX)
 		$(install-file)
+        $(EXPORT_JRE_LIB_ARCH_DIR)/%.debuginfo):	$(SHARK_DIR)/%.debuginfo
+		$(install-file)
+        $(EXPORT_JRE_LIB_ARCH_DIR)/%.diz:		$(SHARK_DIR)/%.diz
+		$(install-file)
         $(EXPORT_SERVER_DIR)/%.$(LIBRARY_SUFFIX):       $(SHARK_DIR)/%.$(LIBRARY_SUFFIX)
 		$(install-file)
+        $(EXPORT_SERVER_DIR)/%.debuginfo:		$(SHARK_DIR)/%.debuginfo
+		$(install-file)
+        $(EXPORT_SERVER_DIR)/%.diz:			$(SHARK_DIR)/%.diz
+		$(install-file)
     endif
     ifeq ($(JVM_VARIANT_ZERO), true)
         $(EXPORT_JRE_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(ZERO_DIR)/%.$(LIBRARY_SUFFIX)
 		$(install-file)
+        $(EXPORT_JRE_LIB_ARCH_DIR)/%.debuginfo:		$(ZERO_DIR)/%.debuginfo
+		$(install-file)
+        $(EXPORT_JRE_LIB_ARCH_DIR)/%.diz:		$(ZERO_DIR)/%.diz
+		$(install-file)
         $(EXPORT_SERVER_DIR)/%.$(LIBRARY_SUFFIX):       $(ZERO_DIR)/%.$(LIBRARY_SUFFIX)
 		$(install-file)
+        $(EXPORT_SERVER_DIR)/%.debuginfo:		$(ZERO_DIR)/%.debuginfo
+		$(install-file)
+        $(EXPORT_SERVER_DIR)/%.diz:			$(ZERO_DIR)/%.diz
+		$(install-file)
     endif
     ifeq ($(JVM_VARIANT_MINIMAL1), true)
         $(EXPORT_JRE_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX):	$(MINIMAL1_DIR)/%.$(LIBRARY_SUFFIX)
--- a/make/bsd/Makefile	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/bsd/Makefile	Sun Feb 03 22:43:57 2013 +0100
@@ -47,10 +47,10 @@
 
 # Along with VM, Serviceability Agent (SA) is built for SA/JDI binding.
 # JDI binding on SA produces two binaries:
-#  1. sa-jdi.jar       - This is build before building libjvm[_g].so
+#  1. sa-jdi.jar       - This is built before building libjvm.so
 #                        Please refer to ./makefiles/sa.make
-#  2. libsa[_g].so     - Native library for SA - This is built after
-#                        libjsig[_g].so (signal interposition library)
+#  2. libsa.so         - Native library for SA - This is built after
+#                        libjsig.so (signal interposition library)
 #                        Please refer to ./makefiles/vm.make
 # If $(GAMMADIR)/agent dir is not present, SA components are not built.
 
@@ -181,9 +181,9 @@
 #
 # What you get with each target:
 #
-# debug*     - "thin" libjvm_g - debug info linked into the gamma_g launcher
+# debug*     - "thin" libjvm - debug info linked into the gamma launcher
 # fastdebug* - optimized compile, but with asserts enabled
-# jvmg*      - "fat" libjvm_g - debug info linked into libjvm_g.so
+# jvmg*      - "fat" libjvm - debug info linked into libjvm.so
 # optimized* - optimized compile, no asserts
 # profiled*  - gprof
 # product*   - the shippable thing:  optimized compile, no asserts, -DPRODUCT
--- a/make/bsd/makefiles/buildtree.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/bsd/makefiles/buildtree.make	Sun Feb 03 22:43:57 2013 +0100
@@ -449,12 +449,7 @@
 	echo "  exit 0"; \
 	echo "fi"; \
 	echo ""; \
-	echo "# Use gamma_g if it exists"; \
-	echo ""; \
 	echo "GAMMA_PROG=gamma"; \
-	echo "if [ -f gamma_g ]; then "; \
-	echo "  GAMMA_PROG=gamma_g"; \
-	echo "fi"; \
 	echo ""; \
 	echo "if [ \"$(OS_VENDOR)\" = \"Darwin\" ]; then "; \
 	echo "  # Ensure architecture for gamma and JAVA_HOME is the same."; \
--- a/make/bsd/makefiles/debug.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/bsd/makefiles/debug.make	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 1999, 2008, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -38,7 +38,6 @@
  "Please use 'make jvmg' to build debug JVM.                            \n" \
  "----------------------------------------------------------------------\n")
 
-G_SUFFIX = _g
 VERSION = debug
 SYSDEFS += -DASSERT -DDEBUG
 PICFLAGS = DEFAULT
--- a/make/bsd/makefiles/dtrace.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/bsd/makefiles/dtrace.make	Sun Feb 03 22:43:57 2013 +0100
@@ -38,12 +38,10 @@
 # Bsd does not build libjvm_db, does not compile on macosx
 # disabled in build: rule in vm.make
 JVM_DB = libjvm_db
-#LIBJVM_DB = libjvm_db.dylib
-LIBJVM_DB = libjvm$(G_SUFFIX)_db.dylib
+LIBJVM_DB = libjvm_db.dylib
 
 JVM_DTRACE = jvm_dtrace
-#LIBJVM_DTRACE = libjvm_dtrace.dylib
-LIBJVM_DTRACE = libjvm$(G_SUFFIX)_dtrace.dylib
+LIBJVM_DTRACE = libjvm_dtrace.dylib
 
 JVMOFFS = JvmOffsets
 JVMOFFS.o = $(JVMOFFS).o
@@ -80,9 +78,7 @@
 ifneq ("${ISA}","${BUILDARCH}")
 
 XLIBJVM_DB = 64/$(LIBJVM_DB)
-XLIBJVM_DB_G = 64/$(LIBJVM_DB_G)
 XLIBJVM_DTRACE = 64/$(LIBJVM_DTRACE)
-XLIBJVM_DTRACE_G = 64/$(LIBJVM_DTRACE_G)
 XARCH = $(subst sparcv9,v9,$(shell echo $(ISA)))
 
 $(XLIBJVM_DB): $(DTRACE_SRCDIR)/$(JVM_DB).c $(JVMOFFS).h $(LIBJVM_DB_MAPFILE)
@@ -90,14 +86,12 @@
 	$(QUIETLY) mkdir -p 64/ ; \
 	$(CC) $(SYMFLAG) -xarch=$(XARCH) -D$(TYPE) -I. -I$(GENERATED) \
 		$(SHARED_FLAG) $(LFLAGS_JVM_DB) -o $@ $(DTRACE_SRCDIR)/$(JVM_DB).c #-lc
-#	[ -f $(XLIBJVM_DB_G) ] || { ln -s $(LIBJVM_DB) $(XLIBJVM_DB_G); }
 
 $(XLIBJVM_DTRACE): $(DTRACE_SRCDIR)/$(JVM_DTRACE).c $(DTRACE_SRCDIR)/$(JVM_DTRACE).h $(LIBJVM_DTRACE_MAPFILE)
 	@echo Making $@
 	$(QUIETLY) mkdir -p 64/ ; \
 	$(CC) $(SYMFLAG) -xarch=$(XARCH) -D$(TYPE) -I. \
 		$(SHARED_FLAG) $(LFLAGS_JVM_DTRACE) -o $@ $(DTRACE_SRCDIR)/$(JVM_DTRACE).c #-lc -lthread -ldoor
-#	[ -f $(XLIBJVM_DTRACE_G) ] || { ln -s $(LIBJVM_DTRACE) $(XLIBJVM_DTRACE_G); }
 
 endif # ifneq ("${ISA}","${BUILDARCH}")
 
@@ -141,13 +135,11 @@
 	@echo Making $@
 	$(QUIETLY) $(CC) $(SYMFLAG) $(ARCHFLAG) -D$(TYPE) -I. -I$(GENERATED) \
 		$(SHARED_FLAG) $(LFLAGS_JVM_DB) -o $@ $(DTRACE_SRCDIR)/$(JVM_DB).c -Wall # -lc
-#	[ -f $(LIBJVM_DB_G) ] || { ln -s $@ $(LIBJVM_DB_G); }
 
 $(LIBJVM_DTRACE): $(DTRACE_SRCDIR)/$(JVM_DTRACE).c $(XLIBJVM_DTRACE) $(DTRACE_SRCDIR)/$(JVM_DTRACE).h $(LIBJVM_DTRACE_MAPFILE)
 	@echo Making $@
 	$(QUIETLY) $(CC) $(SYMFLAG) $(ARCHFLAG) -D$(TYPE) -I.  \
 		$(SHARED_FLAG) $(LFLAGS_JVM_DTRACE) -o $@ $(DTRACE_SRCDIR)/$(JVM_DTRACE).c #-lc -lthread -ldoor
-#	[ -f $(LIBJVM_DTRACE_G) ] || { ln -s $@ $(LIBJVM_DTRACE_G); }
 
 #$(DTRACE).d: $(DTRACE_SRCDIR)/hotspot.d $(DTRACE_SRCDIR)/hotspot_jni.d \
 #             $(DTRACE_SRCDIR)/hs_private.d $(DTRACE_SRCDIR)/jhelper.d
--- a/make/bsd/makefiles/fastdebug.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/bsd/makefiles/fastdebug.make	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 1999, 2008, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 1999, 2012 Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -58,7 +58,6 @@
 # Linker mapfile
 MAPFILE = $(GAMMADIR)/make/bsd/makefiles/mapfile-vers-debug
 
-G_SUFFIX = _g
 VERSION = optimized
 SYSDEFS += -DASSERT -DFASTDEBUG
 PICFLAGS = DEFAULT
--- a/make/bsd/makefiles/gcc.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/bsd/makefiles/gcc.make	Sun Feb 03 22:43:57 2013 +0100
@@ -284,9 +284,9 @@
 
 # Use the stabs format for debugging information (this is the default
 # on gcc-2.91). It's good enough, has all the information about line
-# numbers and local variables, and libjvm_g.so is only about 16M.
+# numbers and local variables, and libjvm.so is only about 16M.
 # Change this back to "-g" if you want the most expressive format.
-# (warning: that could easily inflate libjvm_g.so to 150M!)
+# (warning: that could easily inflate libjvm.so to 150M!)
 # Note: The Itanium gcc compiler crashes when using -gstabs.
 DEBUG_CFLAGS/ia64  = -g
 DEBUG_CFLAGS/amd64 = -g
--- a/make/bsd/makefiles/jsig.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/bsd/makefiles/jsig.make	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -24,16 +24,13 @@
 
 # Rules to build signal interposition library, used by vm.make
 
-# libjsig[_g].so: signal interposition library
+# libjsig.so: signal interposition library
 JSIG   = jsig
-JSIG_G = $(JSIG)$(G_SUFFIX)
 
 ifeq ($(OS_VENDOR), Darwin)
   LIBJSIG   = lib$(JSIG).dylib
-  LIBJSIG_G = lib$(JSIG_G).dylib
 else
   LIBJSIG   = lib$(JSIG).so
-  LIBJSIG_G = lib$(JSIG_G).so
 endif
 
 JSIGSRCDIR = $(GAMMADIR)/src/os/$(Platform_os_family)/vm
@@ -58,7 +55,6 @@
 	@echo Making signal interposition lib...
 	$(QUIETLY) $(CC) $(SYMFLAG) $(ARCHFLAG) $(SHARED_FLAG) $(PICFLAG) \
                          $(LFLAGS_JSIG) $(JSIG_DEBUG_CFLAGS) -o $@ $<
-	$(QUIETLY) [ -f $(LIBJSIG_G) ] || { ln -s $@ $(LIBJSIG_G); }
 
 install_jsig: $(LIBJSIG)
 	@echo "Copying $(LIBJSIG) to $(DEST_JSIG)"
--- a/make/bsd/makefiles/jvmg.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/bsd/makefiles/jvmg.make	Sun Feb 03 22:43:57 2013 +0100
@@ -37,7 +37,6 @@
 # Linker mapfile
 MAPFILE = $(GAMMADIR)/make/bsd/makefiles/mapfile-vers-debug
 
-G_SUFFIX = _g
 VERSION = debug
 SYSDEFS += -DASSERT -DDEBUG
 PICFLAGS = DEFAULT
--- a/make/bsd/makefiles/mapfile-vers-debug	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/bsd/makefiles/mapfile-vers-debug	Sun Feb 03 22:43:57 2013 +0100
@@ -1,9 +1,9 @@
 #
-# @(#)mapfile-vers-debug	1.18 07/10/25 16:47:35
+# @(#)mapfile-vers-debug        1.18 07/10/25 16:47:35
 #
 
 #
-# Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -126,8 +126,9 @@
                 JVM_GetClassModifiers;
                 JVM_GetClassName;
                 JVM_GetClassNameUTF;
-		JVM_GetClassSignature;
+                        JVM_GetClassSignature;
                 JVM_GetClassSigners;
+                JVM_GetClassTypeAnnotations;
                 JVM_GetComponentType;
                 JVM_GetDeclaredClasses;
                 JVM_GetDeclaringClass;
@@ -154,6 +155,7 @@
                 JVM_GetMethodIxNameUTF;
                 JVM_GetMethodIxSignatureUTF;
                 JVM_GetMethodParameterAnnotations;
+                JVM_GetMethodParameters;
                 JVM_GetPrimitiveArrayElement;
                 JVM_GetProtectionDomain;
                 JVM_GetSockName;
@@ -203,7 +205,6 @@
                 JVM_NewMultiArray;
                 JVM_OnExit;
                 JVM_Open;
-                JVM_PrintStackTrace;
                 JVM_RaiseSignal;
                 JVM_RawMonitorCreate;
                 JVM_RawMonitorDestroy;
@@ -283,7 +284,7 @@
                 # This is for Forte Analyzer profiling support.
                 AsyncGetCallTrace;
 
-		# INSERT VTABLE SYMBOLS HERE
+                # INSERT VTABLE SYMBOLS HERE
 
         local:
                 *;
--- a/make/bsd/makefiles/mapfile-vers-product	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/bsd/makefiles/mapfile-vers-product	Sun Feb 03 22:43:57 2013 +0100
@@ -3,7 +3,7 @@
 #
 
 #
-# Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -128,6 +128,7 @@
                 JVM_GetClassNameUTF;
                 JVM_GetClassSignature;
                 JVM_GetClassSigners;
+                JVM_GetClassTypeAnnotations;
                 JVM_GetComponentType;
                 JVM_GetDeclaredClasses;
                 JVM_GetDeclaringClass;
@@ -154,6 +155,7 @@
                 JVM_GetMethodIxNameUTF;
                 JVM_GetMethodIxSignatureUTF;
                 JVM_GetMethodParameterAnnotations;
+                JVM_GetMethodParameters;
                 JVM_GetPrimitiveArrayElement;
                 JVM_GetProtectionDomain;
                 JVM_GetSockName;
@@ -203,7 +205,6 @@
                 JVM_NewMultiArray;
                 JVM_OnExit;
                 JVM_Open;
-                JVM_PrintStackTrace;
                 JVM_RaiseSignal;
                 JVM_RawMonitorCreate;
                 JVM_RawMonitorDestroy;
--- a/make/bsd/makefiles/optimized.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/bsd/makefiles/optimized.make	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 1999, 2008, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -40,5 +40,4 @@
 # Linker mapfile
 MAPFILE = $(GAMMADIR)/make/bsd/makefiles/mapfile-vers-debug
 
-G_SUFFIX =
 VERSION = optimized
--- a/make/bsd/makefiles/product.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/bsd/makefiles/product.make	Sun Feb 03 22:43:57 2013 +0100
@@ -40,7 +40,6 @@
 # Linker mapfile
 MAPFILE = $(GAMMADIR)/make/bsd/makefiles/mapfile-vers-product
 
-G_SUFFIX =
 SYSDEFS += -DPRODUCT
 VERSION = optimized
 
--- a/make/bsd/makefiles/saproc.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/bsd/makefiles/saproc.make	Sun Feb 03 22:43:57 2013 +0100
@@ -24,16 +24,13 @@
 
 # Rules to build serviceability agent library, used by vm.make
 
-# libsaproc[_g].so: serviceability agent
+# libsaproc.so: serviceability agent
 SAPROC   = saproc
-SAPROC_G = $(SAPROC)$(G_SUFFIX)
 
 ifeq ($(OS_VENDOR), Darwin)
   LIBSAPROC   = lib$(SAPROC).dylib
-  LIBSAPROC_G = lib$(SAPROC_G).dylib
 else
   LIBSAPROC   = lib$(SAPROC).so
-  LIBSAPROC_G = lib$(SAPROC_G).so
 endif
 
 AGENT_DIR = $(GAMMADIR)/agent
@@ -114,7 +111,6 @@
 	           $(SA_DEBUG_CFLAGS)                                   \
 	           -o $@                                                \
 	           $(SALIBS)
-	$(QUIETLY) [ -f $(LIBSAPROC_G) ] || { ln -s $@ $(LIBSAPROC_G); }
 
 install_saproc: $(BUILDLIBSAPROC)
 	$(QUIETLY) if [ -e $(LIBSAPROC) ] ; then             \
--- a/make/bsd/makefiles/vm.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/bsd/makefiles/vm.make	Sun Feb 03 22:43:57 2013 +0100
@@ -138,11 +138,9 @@
 JVM    = jvm
 ifeq ($(OS_VENDOR), Darwin)
   LIBJVM   = lib$(JVM).dylib
-  LIBJVM_G = lib$(JVM)$(G_SUFFIX).dylib
   CFLAGS  += -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE
 else
   LIBJVM   = lib$(JVM).so
-  LIBJVM_G = lib$(JVM)$(G_SUFFIX).so
 endif
 
 SPECIAL_PATHS:=adlc c1 gc_implementation opto shark libadt
@@ -314,7 +312,6 @@
 		       $(LFLAGS_VM) -o $@ $(sort $(LIBJVM.o)) $(LIBS_VM); \
 	    $(LINK_LIB.CXX/POST_HOOK)                                    \
 	    rm -f $@.1; ln -s $@ $@.1;                                  \
-	    [ -f $(LIBJVM_G) ] || { ln -s $@ $(LIBJVM_G); ln -s $@.1 $(LIBJVM_G).1; }; \
 	}
 
 DEST_JVM = $(JDK_LIBDIR)/$(VM_SUBDIR)/$(LIBJVM)
--- a/make/excludeSrc.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/excludeSrc.make	Sun Feb 03 22:43:57 2013 +0100
@@ -79,10 +79,10 @@
       CXXFLAGS += -DSERIALGC
       CFLAGS += -DSERIALGC
       Src_Files_EXCLUDE += \
-	binaryTreeDictionary.cpp cmsAdaptiveSizePolicy.cpp cmsCollectorPolicy.cpp \
+	cmsAdaptiveSizePolicy.cpp cmsCollectorPolicy.cpp \
 	cmsGCAdaptivePolicyCounters.cpp cmsLockVerifier.cpp cmsPermGen.cpp compactibleFreeListSpace.cpp \
-	concurrentMarkSweepGeneration.cpp concurrentMarkSweepThread.cpp freeBlockDictionary.cpp \
-	freeChunk.cpp freeList.cpp promotionInfo.cpp vmCMSOperations.cpp collectionSetChooser.cpp \
+	concurrentMarkSweepGeneration.cpp concurrentMarkSweepThread.cpp \
+	freeChunk.cpp adaptiveFreeList.cpp promotionInfo.cpp vmCMSOperations.cpp collectionSetChooser.cpp \
 	concurrentG1Refine.cpp concurrentG1RefineThread.cpp concurrentMark.cpp concurrentMarkThread.cpp \
 	dirtyCardQueue.cpp g1AllocRegion.cpp g1BlockOffsetTable.cpp g1CollectedHeap.cpp g1GCPhaseTimes.cpp \
 	g1CollectorPolicy.cpp g1ErgoVerbose.cpp g1_globals.cpp g1HRPrinter.cpp g1MarkSweep.cpp \
--- a/make/hotspot_version	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/hotspot_version	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 # 
-# Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2006, 2013, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -31,11 +31,11 @@
 #
 
 # Don't put quotes (fail windows build).
-HOTSPOT_VM_COPYRIGHT=Copyright 2012
+HOTSPOT_VM_COPYRIGHT=Copyright 2013
 
 HS_MAJOR_VER=25
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=06
+HS_BUILD_NUMBER=17
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=8
--- a/make/linux/Makefile	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/linux/Makefile	Sun Feb 03 22:43:57 2013 +0100
@@ -47,10 +47,10 @@
 
 # Along with VM, Serviceability Agent (SA) is built for SA/JDI binding.
 # JDI binding on SA produces two binaries:
-#  1. sa-jdi.jar       - This is build before building libjvm[_g].so
+#  1. sa-jdi.jar       - This is built before building libjvm.so
 #                        Please refer to ./makefiles/sa.make
-#  2. libsa[_g].so     - Native library for SA - This is built after
-#                        libjsig[_g].so (signal interposition library)
+#  2. libsa.so         - Native library for SA - This is built after
+#                        libjsig.so (signal interposition library)
 #                        Please refer to ./makefiles/vm.make
 # If $(GAMMADIR)/agent dir is not present, SA components are not built.
 
@@ -181,9 +181,9 @@
 #
 # What you get with each target:
 #
-# debug*     - "thin" libjvm_g - debug info linked into the gamma_g launcher
+# debug*     - "thin" libjvm - debug info linked into the gamma launcher
 # fastdebug* - optimized compile, but with asserts enabled
-# jvmg*      - "fat" libjvm_g - debug info linked into libjvm_g.so
+# jvmg*      - "fat" libjvm - debug info linked into libjvm.so
 # optimized* - optimized compile, no asserts
 # profiled*  - gprof
 # product*   - the shippable thing:  optimized compile, no asserts, -DPRODUCT
--- a/make/linux/makefiles/buildtree.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/linux/makefiles/buildtree.make	Sun Feb 03 22:43:57 2013 +0100
@@ -442,12 +442,7 @@
 	echo "  exit 0"; \
 	echo "fi"; \
 	echo ""; \
-	echo "# Use gamma_g if it exists"; \
-	echo ""; \
 	echo "GAMMA_PROG=gamma"; \
-	echo "if [ -f gamma_g ]; then "; \
-	echo "  GAMMA_PROG=gamma_g"; \
-	echo "fi"; \
 	echo ""; \
 	echo "if [ \"$(OS_VENDOR)\" = \"Darwin\" ]; then "; \
 	echo "  # Ensure architecture for gamma and JAVA_HOME is the same."; \
--- a/make/linux/makefiles/debug.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/linux/makefiles/debug.make	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 1999, 2008, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -38,7 +38,6 @@
  "Please use 'make jvmg' to build debug JVM.                            \n" \
  "----------------------------------------------------------------------\n")
 
-G_SUFFIX = _g
 VERSION = debug
 SYSDEFS += -DASSERT -DDEBUG
 PICFLAGS = DEFAULT
--- a/make/linux/makefiles/defs.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/linux/makefiles/defs.make	Sun Feb 03 22:43:57 2013 +0100
@@ -170,68 +170,70 @@
   # overridden in some situations, e.g., a BUILD_FLAVOR != product
   # build.
 
-  ifeq ($(BUILD_FLAVOR), product)
-    FULL_DEBUG_SYMBOLS ?= 1
-    ENABLE_FULL_DEBUG_SYMBOLS = $(FULL_DEBUG_SYMBOLS)
-  else
-    # debug variants always get Full Debug Symbols (if available)
-    ENABLE_FULL_DEBUG_SYMBOLS = 1
-  endif
-  _JUNK_ := $(shell \
-    echo >&2 "INFO: ENABLE_FULL_DEBUG_SYMBOLS=$(ENABLE_FULL_DEBUG_SYMBOLS)")
-  # since objcopy is optional, we set ZIP_DEBUGINFO_FILES later
+  # Due to the multiple sub-make processes that occur this logic gets
+  # executed multiple times. We reduce the noise by at least checking that
+  # BUILD_FLAVOR has been set.
+  ifneq ($(BUILD_FLAVOR),)
+    ifeq ($(BUILD_FLAVOR), product)
+      FULL_DEBUG_SYMBOLS ?= 1
+      ENABLE_FULL_DEBUG_SYMBOLS = $(FULL_DEBUG_SYMBOLS)
+    else
+      # debug variants always get Full Debug Symbols (if available)
+      ENABLE_FULL_DEBUG_SYMBOLS = 1
+    endif
+    _JUNK_ := $(shell \
+      echo >&2 "INFO: ENABLE_FULL_DEBUG_SYMBOLS=$(ENABLE_FULL_DEBUG_SYMBOLS)")
+    # since objcopy is optional, we set ZIP_DEBUGINFO_FILES later
 
-  ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
-    # Default OBJCOPY comes from GNU Binutils on Linux:
-    DEF_OBJCOPY=/usr/bin/objcopy
-    ifdef CROSS_COMPILE_ARCH
-      # don't try to generate .debuginfo files when cross compiling
-      _JUNK_ := $(shell \
-        echo >&2 "INFO: cross compiling for ARCH $(CROSS_COMPILE_ARCH)," \
-          "skipping .debuginfo generation.")
-      OBJCOPY=
-    else
+    ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
+      # Default OBJCOPY comes from GNU Binutils on Linux
+      ifeq ($(CROSS_COMPILE_ARCH),)
+        DEF_OBJCOPY=/usr/bin/objcopy
+      else
+        # Assume objcopy is part of the cross-compilation toolset
+        ifneq ($(ALT_COMPILER_PATH),)
+          DEF_OBJCOPY=$(ALT_COMPILER_PATH)/objcopy
+        endif
+      endif
       OBJCOPY=$(shell test -x $(DEF_OBJCOPY) && echo $(DEF_OBJCOPY))
       ifneq ($(ALT_OBJCOPY),)
         _JUNK_ := $(shell echo >&2 "INFO: ALT_OBJCOPY=$(ALT_OBJCOPY)")
         OBJCOPY=$(shell test -x $(ALT_OBJCOPY) && echo $(ALT_OBJCOPY))
       endif
-    endif
-  else
-    OBJCOPY=
-  endif
 
-  ifeq ($(OBJCOPY),)
-    _JUNK_ := $(shell \
-      echo >&2 "INFO: no objcopy cmd found so cannot create .debuginfo files.")
-    ENABLE_FULL_DEBUG_SYMBOLS=0
-    _JUNK_ := $(shell \
-      echo >&2 "INFO: ENABLE_FULL_DEBUG_SYMBOLS=$(ENABLE_FULL_DEBUG_SYMBOLS)")
-  else
-    _JUNK_ := $(shell \
-      echo >&2 "INFO: $(OBJCOPY) cmd found so will create .debuginfo files.")
+      ifeq ($(OBJCOPY),)
+        _JUNK_ := $(shell \
+          echo >&2 "INFO: no objcopy cmd found so cannot create .debuginfo files. You may need to set ALT_OBJCOPY.")
+        ENABLE_FULL_DEBUG_SYMBOLS=0
+        _JUNK_ := $(shell \
+          echo >&2 "INFO: ENABLE_FULL_DEBUG_SYMBOLS=$(ENABLE_FULL_DEBUG_SYMBOLS)")
+      else
+        _JUNK_ := $(shell \
+          echo >&2 "INFO: $(OBJCOPY) cmd found so will create .debuginfo files.")
 
-    # Library stripping policies for .debuginfo configs:
-    #   all_strip - strips everything from the library
-    #   min_strip - strips most stuff from the library; leaves minimum symbols
-    #   no_strip  - does not strip the library at all
-    #
-    # Oracle security policy requires "all_strip". A waiver was granted on
-    # 2011.09.01 that permits using "min_strip" in the Java JDK and Java JRE.
-    #
-    # Currently, STRIP_POLICY is only used when Full Debug Symbols is enabled.
-    #
-    STRIP_POLICY ?= min_strip
+        # Library stripping policies for .debuginfo configs:
+        #   all_strip - strips everything from the library
+        #   min_strip - strips most stuff from the library; leaves minimum symbols
+        #   no_strip  - does not strip the library at all
+        #
+        # Oracle security policy requires "all_strip". A waiver was granted on
+        # 2011.09.01 that permits using "min_strip" in the Java JDK and Java JRE.
+        #
+        # Currently, STRIP_POLICY is only used when Full Debug Symbols is enabled.
+        #
+        STRIP_POLICY ?= min_strip
 
-    _JUNK_ := $(shell \
-      echo >&2 "INFO: STRIP_POLICY=$(STRIP_POLICY)")
+        _JUNK_ := $(shell \
+          echo >&2 "INFO: STRIP_POLICY=$(STRIP_POLICY)")
 
-    ZIP_DEBUGINFO_FILES ?= 1
+        ZIP_DEBUGINFO_FILES ?= 1
 
-    _JUNK_ := $(shell \
-      echo >&2 "INFO: ZIP_DEBUGINFO_FILES=$(ZIP_DEBUGINFO_FILES)")
-  endif
-endif
+        _JUNK_ := $(shell \
+          echo >&2 "INFO: ZIP_DEBUGINFO_FILES=$(ZIP_DEBUGINFO_FILES)")
+      endif
+    endif # ENABLE_FULL_DEBUG_SYMBOLS=1
+  endif # BUILD_FLAVOR
+endif # JDK_6_OR_EARLIER
 
 JDK_INCLUDE_SUBDIR=linux
 
--- a/make/linux/makefiles/fastdebug.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/linux/makefiles/fastdebug.make	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 1999, 2008, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -58,7 +58,6 @@
 # Linker mapfile
 MAPFILE = $(GAMMADIR)/make/linux/makefiles/mapfile-vers-debug
 
-G_SUFFIX = _g
 VERSION = optimized
 SYSDEFS += -DASSERT -DFASTDEBUG
 PICFLAGS = DEFAULT
--- a/make/linux/makefiles/gcc.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/linux/makefiles/gcc.make	Sun Feb 03 22:43:57 2013 +0100
@@ -229,9 +229,9 @@
 else
   # Use the stabs format for debugging information (this is the default
   # on gcc-2.91). It's good enough, has all the information about line
-  # numbers and local variables, and libjvm_g.so is only about 16M.
+  # numbers and local variables, and libjvm.so is only about 16M.
   # Change this back to "-g" if you want the most expressive format.
-  # (warning: that could easily inflate libjvm_g.so to 150M!)
+  # (warning: that could easily inflate libjvm.so to 150M!)
   # Note: The Itanium gcc compiler crashes when using -gstabs.
   DEBUG_CFLAGS/ia64  = -g
   DEBUG_CFLAGS/amd64 = -g
--- a/make/linux/makefiles/jsig.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/linux/makefiles/jsig.make	Sun Feb 03 22:43:57 2013 +0100
@@ -24,17 +24,12 @@
 
 # Rules to build signal interposition library, used by vm.make
 
-# libjsig[_g].so: signal interposition library
+# libjsig.so: signal interposition library
 JSIG = jsig
 LIBJSIG = lib$(JSIG).so
 
-JSIG_G    = $(JSIG)$(G_SUFFIX)
-LIBJSIG_G = lib$(JSIG_G).so
-
 LIBJSIG_DEBUGINFO   = lib$(JSIG).debuginfo
 LIBJSIG_DIZ         = lib$(JSIG).diz
-LIBJSIG_G_DEBUGINFO = lib$(JSIG_G).debuginfo
-LIBJSIG_G_DIZ       = lib$(JSIG_G).diz
 
 JSIGSRCDIR = $(GAMMADIR)/src/os/$(Platform_os_family)/vm
 
@@ -60,7 +55,6 @@
 	@echo Making signal interposition lib...
 	$(QUIETLY) $(CC) $(SYMFLAG) $(ARCHFLAG) $(SHARED_FLAG) $(PICFLAG) \
                          $(LFLAGS_JSIG) $(JSIG_DEBUG_CFLAGS) -o $@ $< -ldl
-	$(QUIETLY) [ -f $(LIBJSIG_G) ] || { ln -s $@ $(LIBJSIG_G); }
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
 	$(QUIETLY) $(OBJCOPY) --only-keep-debug $@ $(LIBJSIG_DEBUGINFO)
 	$(QUIETLY) $(OBJCOPY) --add-gnu-debuglink=$(LIBJSIG_DEBUGINFO) $@
@@ -72,11 +66,9 @@
     # implied else here is no stripping at all
     endif
   endif
-	[ -f $(LIBJSIG_G_DEBUGINFO) ] || { ln -s $(LIBJSIG_DEBUGINFO) $(LIBJSIG_G_DEBUGINFO); }
   ifeq ($(ZIP_DEBUGINFO_FILES),1)
-	$(ZIPEXE) -q -y $(LIBJSIG_DIZ) $(LIBJSIG_DEBUGINFO) $(LIBJSIG_G_DEBUGINFO)
-	$(RM) $(LIBJSIG_DEBUGINFO) $(LIBJSIG_G_DEBUGINFO)
-	[ -f $(LIBJSIG_G_DIZ) ] || { ln -s $(LIBJSIG_DIZ) $(LIBJSIG_G_DIZ); }
+	$(ZIPEXE) -q -y $(LIBJSIG_DIZ) $(LIBJSIG_DEBUGINFO)
+	$(RM) $(LIBJSIG_DEBUGINFO)
   endif
 endif
 
--- a/make/linux/makefiles/jvmg.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/linux/makefiles/jvmg.make	Sun Feb 03 22:43:57 2013 +0100
@@ -37,7 +37,6 @@
 # Linker mapfile
 MAPFILE = $(GAMMADIR)/make/linux/makefiles/mapfile-vers-debug
 
-G_SUFFIX = _g
 VERSION = debug
 SYSDEFS += -DASSERT -DDEBUG
 PICFLAGS = DEFAULT
--- a/make/linux/makefiles/mapfile-vers-debug	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/linux/makefiles/mapfile-vers-debug	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -124,6 +124,7 @@
                 JVM_GetClassNameUTF;
 		JVM_GetClassSignature;
                 JVM_GetClassSigners;
+                JVM_GetClassTypeAnnotations;
                 JVM_GetComponentType;
                 JVM_GetDeclaredClasses;
                 JVM_GetDeclaringClass;
@@ -150,6 +151,7 @@
                 JVM_GetMethodIxNameUTF;
                 JVM_GetMethodIxSignatureUTF;
                 JVM_GetMethodParameterAnnotations;
+                JVM_GetMethodParameters;
                 JVM_GetPrimitiveArrayElement;
                 JVM_GetProtectionDomain;
                 JVM_GetSockName;
@@ -199,7 +201,6 @@
                 JVM_NewMultiArray;
                 JVM_OnExit;
                 JVM_Open;
-                JVM_PrintStackTrace;
                 JVM_RaiseSignal;
                 JVM_RawMonitorCreate;
                 JVM_RawMonitorDestroy;
--- a/make/linux/makefiles/mapfile-vers-product	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/linux/makefiles/mapfile-vers-product	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -124,6 +124,7 @@
                 JVM_GetClassNameUTF;
                 JVM_GetClassSignature;
                 JVM_GetClassSigners;
+                JVM_GetClassTypeAnnotations;
                 JVM_GetComponentType;
                 JVM_GetDeclaredClasses;
                 JVM_GetDeclaringClass;
@@ -150,6 +151,7 @@
                 JVM_GetMethodIxNameUTF;
                 JVM_GetMethodIxSignatureUTF;
                 JVM_GetMethodParameterAnnotations;
+                JVM_GetMethodParameters;
                 JVM_GetPrimitiveArrayElement;
                 JVM_GetProtectionDomain;
                 JVM_GetSockName;
@@ -199,7 +201,6 @@
                 JVM_NewMultiArray;
                 JVM_OnExit;
                 JVM_Open;
-                JVM_PrintStackTrace;
                 JVM_RaiseSignal;
                 JVM_RawMonitorCreate;
                 JVM_RawMonitorDestroy;
--- a/make/linux/makefiles/optimized.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/linux/makefiles/optimized.make	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 1999, 2008, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -40,5 +40,4 @@
 # Linker mapfile
 MAPFILE = $(GAMMADIR)/make/linux/makefiles/mapfile-vers-debug
 
-G_SUFFIX =
 VERSION = optimized
--- a/make/linux/makefiles/product.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/linux/makefiles/product.make	Sun Feb 03 22:43:57 2013 +0100
@@ -40,7 +40,6 @@
 # Linker mapfile
 MAPFILE = $(GAMMADIR)/make/linux/makefiles/mapfile-vers-product
 
-G_SUFFIX =
 SYSDEFS += -DPRODUCT
 VERSION = optimized
 
--- a/make/linux/makefiles/saproc.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/linux/makefiles/saproc.make	Sun Feb 03 22:43:57 2013 +0100
@@ -26,18 +26,13 @@
 
 # Rules to build serviceability agent library, used by vm.make
 
-# libsaproc[_g].so: serviceability agent
+# libsaproc.so: serviceability agent
 
 SAPROC = saproc
 LIBSAPROC = lib$(SAPROC).so
 
-SAPROC_G = $(SAPROC)$(G_SUFFIX)
-LIBSAPROC_G = lib$(SAPROC_G).so
-
 LIBSAPROC_DEBUGINFO   = lib$(SAPROC).debuginfo
 LIBSAPROC_DIZ         = lib$(SAPROC).diz
-LIBSAPROC_G_DEBUGINFO = lib$(SAPROC_G).debuginfo
-LIBSAPROC_G_DIZ       = lib$(SAPROC_G).diz
 
 AGENT_DIR = $(GAMMADIR)/agent
 
@@ -99,7 +94,6 @@
 	           $(SA_DEBUG_CFLAGS)                                   \
 	           -o $@                                                \
 	           -lthread_db
-	$(QUIETLY) [ -f $(LIBSAPROC_G) ] || { ln -s $@ $(LIBSAPROC_G); }
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
 	$(QUIETLY) $(OBJCOPY) --only-keep-debug $@ $(LIBSAPROC_DEBUGINFO)
 	$(QUIETLY) $(OBJCOPY) --add-gnu-debuglink=$(LIBSAPROC_DEBUGINFO) $@
@@ -111,11 +105,9 @@
     # implied else here is no stripping at all
     endif
   endif
-	[ -f $(LIBSAPROC_G_DEBUGINFO) ] || { ln -s $(LIBSAPROC_DEBUGINFO) $(LIBSAPROC_G_DEBUGINFO); }
   ifeq ($(ZIP_DEBUGINFO_FILES),1)
-	$(ZIPEXE) -q -y $(LIBSAPROC_DIZ) $(LIBSAPROC_DEBUGINFO) $(LIBSAPROC_G_DEBUGINFO)
-	$(RM) $(LIBSAPROC_DEBUGINFO) $(LIBSAPROC_G_DEBUGINFO)
-	[ -f $(LIBSAPROC_G_DIZ) ] || { ln -s $(LIBSAPROC_DIZ) $(LIBSAPROC_G_DIZ); }
+	$(ZIPEXE) -q -y $(LIBSAPROC_DIZ) $(LIBSAPROC_DEBUGINFO)
+	$(RM) $(LIBSAPROC_DEBUGINFO)
   endif
 endif
 
--- a/make/linux/makefiles/vm.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/linux/makefiles/vm.make	Sun Feb 03 22:43:57 2013 +0100
@@ -138,12 +138,9 @@
 
 JVM      = jvm
 LIBJVM   = lib$(JVM).so
-LIBJVM_G = lib$(JVM)$(G_SUFFIX).so
 
 LIBJVM_DEBUGINFO   = lib$(JVM).debuginfo
 LIBJVM_DIZ         = lib$(JVM).diz
-LIBJVM_G_DEBUGINFO = lib$(JVM)$(G_SUFFIX).debuginfo
-LIBJVM_G_DIZ       = lib$(JVM)$(G_SUFFIX).diz
 
 SPECIAL_PATHS:=adlc c1 gc_implementation opto shark libadt
 
@@ -323,7 +320,6 @@
 		       $(LFLAGS_VM) -o $@ $(sort $(LIBJVM.o)) $(LIBS_VM);       \
 	    $(LINK_LIB.CXX/POST_HOOK)                                    \
 	    rm -f $@.1; ln -s $@ $@.1;                                  \
-	    [ -f $(LIBJVM_G) ] || { ln -s $@ $(LIBJVM_G); ln -s $@.1 $(LIBJVM_G).1; }; \
             if [ \"$(CROSS_COMPILE_ARCH)\" = \"\" ] ; then                    \
 	      if [ -x /usr/sbin/selinuxenabled ] ; then                 \
 	        /usr/sbin/selinuxenabled;                               \
@@ -336,24 +332,21 @@
 	      fi                                                        \
             fi 								\
 	}
-ifeq ($(CROSS_COMPILE_ARCH),)
-  ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
+
+ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
 	$(QUIETLY) $(OBJCOPY) --only-keep-debug $@ $(LIBJVM_DEBUGINFO)
 	$(QUIETLY) $(OBJCOPY) --add-gnu-debuglink=$(LIBJVM_DEBUGINFO) $@
-    ifeq ($(STRIP_POLICY),all_strip)
+  ifeq ($(STRIP_POLICY),all_strip)
 	$(QUIETLY) $(STRIP) $@
-    else
-      ifeq ($(STRIP_POLICY),min_strip)
+  else
+    ifeq ($(STRIP_POLICY),min_strip)
 	$(QUIETLY) $(STRIP) -g $@
-      # implied else here is no stripping at all
-      endif
+    # implied else here is no stripping at all
     endif
-	$(QUIETLY) [ -f $(LIBJVM_G_DEBUGINFO) ] || ln -s $(LIBJVM_DEBUGINFO) $(LIBJVM_G_DEBUGINFO)
-    ifeq ($(ZIP_DEBUGINFO_FILES),1)
-	$(ZIPEXE) -q -y $(LIBJVM_DIZ) $(LIBJVM_DEBUGINFO) $(LIBJVM_G_DEBUGINFO)
-	$(RM) $(LIBJVM_DEBUGINFO) $(LIBJVM_G_DEBUGINFO)
-	[ -f $(LIBJVM_G_DIZ) ] || { ln -s $(LIBJVM_DIZ) $(LIBJVM_G_DIZ); }
-    endif
+  endif
+  ifeq ($(ZIP_DEBUGINFO_FILES),1)
+	$(ZIPEXE) -q -y $(LIBJVM_DIZ) $(LIBJVM_DEBUGINFO)
+	$(RM) $(LIBJVM_DEBUGINFO)
   endif
 endif
 
--- a/make/solaris/Makefile	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/solaris/Makefile	Sun Feb 03 22:43:57 2013 +0100
@@ -38,10 +38,10 @@
 
 # Along with VM, Serviceability Agent (SA) is built for SA/JDI binding.
 # JDI binding on SA produces two binaries:
-#  1. sa-jdi.jar       - This is build before building libjvm[_g].so
+#  1. sa-jdi.jar       - This is built before building libjvm.so
 #                        Please refer to ./makefiles/sa.make
-#  2. libsaproc[_g].so - Native library for SA - This is built after
-#                        libjsig[_g].so (signal interposition library)
+#  2. libsaproc.so     - Native library for SA - This is built after
+#                        libjsig.so (signal interposition library)
 #                        Please refer to ./makefiles/vm.make
 # If $(GAMMADIR)/agent dir is not present, SA components are not built.
 
@@ -141,9 +141,9 @@
 #
 # What you get with each target:
 #
-# debug*     - "thin" libjvm_g - debug info linked into the gamma_g launcher
+# debug*     - "thin" libjvm - debug info linked into the gamma launcher
 # fastdebug* - optimized compile, but with asserts enabled
-# jvmg*      - "fat" libjvm_g - debug info linked into libjvm_g.so
+# jvmg*      - "fat" libjvm - debug info linked into libjvm.so
 # optimized* - optimized compile, no asserts
 # profiled*  - gprof
 # product*   - the shippable thing:  optimized compile, no asserts, -DPRODUCT
--- a/make/solaris/makefiles/buildtree.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/solaris/makefiles/buildtree.make	Sun Feb 03 22:43:57 2013 +0100
@@ -436,12 +436,7 @@
 	echo "  exit 0"; \
 	echo "fi"; \
 	echo ""; \
-	echo "# Use gamma_g if it exists"; \
-	echo ""; \
 	echo "GAMMA_PROG=gamma"; \
-	echo "if [ -f gamma_g ]; then "; \
-	echo "  GAMMA_PROG=gamma_g"; \
-	echo "fi"; \
 	echo ""; \
 	echo "if [ \"$(OS_VENDOR)\" = \"Darwin\" ]; then "; \
 	echo "  # Ensure architecture for gamma and JAVA_HOME is the same."; \
--- a/make/solaris/makefiles/debug.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/solaris/makefiles/debug.make	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 1998, 2008, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -53,7 +53,6 @@
  "Please use 'gnumake jvmg' to build debug JVM.                            \n" \
  "-------------------------------------------------------------------------\n")
 
-G_SUFFIX = _g
 VERSION = debug
 SYSDEFS += -DASSERT -DDEBUG
 PICFLAGS = DEFAULT
--- a/make/solaris/makefiles/defs.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/solaris/makefiles/defs.make	Sun Feb 03 22:43:57 2013 +0100
@@ -109,60 +109,63 @@
   # overridden in some situations, e.g., a BUILD_FLAVOR != product
   # build.
 
-  ifeq ($(BUILD_FLAVOR), product)
-    FULL_DEBUG_SYMBOLS ?= 1
-    ENABLE_FULL_DEBUG_SYMBOLS = $(FULL_DEBUG_SYMBOLS)
-  else
-    # debug variants always get Full Debug Symbols (if available)
-    ENABLE_FULL_DEBUG_SYMBOLS = 1
-  endif
-  _JUNK_ := $(shell \
-    echo >&2 "INFO: ENABLE_FULL_DEBUG_SYMBOLS=$(ENABLE_FULL_DEBUG_SYMBOLS)")
-  # since objcopy is optional, we set ZIP_DEBUGINFO_FILES later
-
-  ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
-    # Default OBJCOPY comes from the SUNWbinutils package:
-    DEF_OBJCOPY=/usr/sfw/bin/gobjcopy
-    OBJCOPY=$(shell test -x $(DEF_OBJCOPY) && echo $(DEF_OBJCOPY))
-    ifneq ($(ALT_OBJCOPY),)
-      _JUNK_ := $(shell echo >&2 "INFO: ALT_OBJCOPY=$(ALT_OBJCOPY)")
-      OBJCOPY=$(shell test -x $(ALT_OBJCOPY) && echo $(ALT_OBJCOPY))
+  # Due to the multiple sub-make processes that occur this logic gets
+  # executed multiple times. We reduce the noise by at least checking that
+  # BUILD_FLAVOR has been set.
+  ifneq ($(BUILD_FLAVOR),)
+    ifeq ($(BUILD_FLAVOR), product)
+      FULL_DEBUG_SYMBOLS ?= 1
+      ENABLE_FULL_DEBUG_SYMBOLS = $(FULL_DEBUG_SYMBOLS)
+    else
+      # debug variants always get Full Debug Symbols (if available)
+      ENABLE_FULL_DEBUG_SYMBOLS = 1
     endif
-  else
-    OBJCOPY=
-  endif
-
-  ifeq ($(OBJCOPY),)
-    _JUNK_ := $(shell \
-      echo >&2 "INFO: no objcopy cmd found so cannot create .debuginfo files.")
-    ENABLE_FULL_DEBUG_SYMBOLS=0
     _JUNK_ := $(shell \
       echo >&2 "INFO: ENABLE_FULL_DEBUG_SYMBOLS=$(ENABLE_FULL_DEBUG_SYMBOLS)")
-  else
-    _JUNK_ := $(shell \
-      echo >&2 "INFO: $(OBJCOPY) cmd found so will create .debuginfo files.")
+    # since objcopy is optional, we set ZIP_DEBUGINFO_FILES later
+
+    ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
+      # Default OBJCOPY comes from the SUNWbinutils package:
+      DEF_OBJCOPY=/usr/sfw/bin/gobjcopy
+      OBJCOPY=$(shell test -x $(DEF_OBJCOPY) && echo $(DEF_OBJCOPY))
+      ifneq ($(ALT_OBJCOPY),)
+        _JUNK_ := $(shell echo >&2 "INFO: ALT_OBJCOPY=$(ALT_OBJCOPY)")
+        OBJCOPY=$(shell test -x $(ALT_OBJCOPY) && echo $(ALT_OBJCOPY))
+      endif
+
+      ifeq ($(OBJCOPY),)
+        _JUNK_ := $(shell \
+          echo >&2 "INFO: no objcopy cmd found so cannot create .debuginfo files.")
+        ENABLE_FULL_DEBUG_SYMBOLS=0
+        _JUNK_ := $(shell \
+          echo >&2 "INFO: ENABLE_FULL_DEBUG_SYMBOLS=$(ENABLE_FULL_DEBUG_SYMBOLS)")
+      else
+        _JUNK_ := $(shell \
+          echo >&2 "INFO: $(OBJCOPY) cmd found so will create .debuginfo files.")
 
-    # Library stripping policies for .debuginfo configs:
-    #   all_strip - strips everything from the library
-    #   min_strip - strips most stuff from the library; leaves minimum symbols
-    #   no_strip  - does not strip the library at all
-    #
-    # Oracle security policy requires "all_strip". A waiver was granted on
-    # 2011.09.01 that permits using "min_strip" in the Java JDK and Java JRE.
-    #
-    # Currently, STRIP_POLICY is only used when Full Debug Symbols is enabled.
-    #
-    STRIP_POLICY ?= min_strip
+        # Library stripping policies for .debuginfo configs:
+        #   all_strip - strips everything from the library
+        #   min_strip - strips most stuff from the library; leaves minimum symbols
+        #   no_strip  - does not strip the library at all
+        #
+        # Oracle security policy requires "all_strip". A waiver was granted on
+        # 2011.09.01 that permits using "min_strip" in the Java JDK and Java JRE.
+        #
+        # Currently, STRIP_POLICY is only used when Full Debug Symbols is enabled.
+        #
+        STRIP_POLICY ?= min_strip
 
-    _JUNK_ := $(shell \
-      echo >&2 "INFO: STRIP_POLICY=$(STRIP_POLICY)")
+        _JUNK_ := $(shell \
+          echo >&2 "INFO: STRIP_POLICY=$(STRIP_POLICY)")
 
-    ZIP_DEBUGINFO_FILES ?= 1
+        ZIP_DEBUGINFO_FILES ?= 1
 
-    _JUNK_ := $(shell \
-      echo >&2 "INFO: ZIP_DEBUGINFO_FILES=$(ZIP_DEBUGINFO_FILES)")
-  endif
-endif
+        _JUNK_ := $(shell \
+          echo >&2 "INFO: ZIP_DEBUGINFO_FILES=$(ZIP_DEBUGINFO_FILES)")
+      endif
+    endif # ENABLE_FULL_DEBUG_SYMBOLS=1
+  endif # BUILD_FLAVOR
+endif # JDK_6_OR_EARLIER
 
 JDK_INCLUDE_SUBDIR=solaris
 
--- a/make/solaris/makefiles/dtrace.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/solaris/makefiles/dtrace.make	Sun Feb 03 22:43:57 2013 +0100
@@ -39,21 +39,15 @@
 
 JVM_DB = libjvm_db
 LIBJVM_DB = libjvm_db.so
-LIBJVM_DB_G = libjvm$(G_SUFFIX)_db.so
 
 LIBJVM_DB_DEBUGINFO   = libjvm_db.debuginfo
 LIBJVM_DB_DIZ         = libjvm_db.diz
-LIBJVM_DB_G_DEBUGINFO = libjvm$(G_SUFFIX)_db.debuginfo
-LIBJVM_DB_G_DIZ       = libjvm$(G_SUFFIX)_db.diz
 
 JVM_DTRACE = jvm_dtrace
 LIBJVM_DTRACE = libjvm_dtrace.so
-LIBJVM_DTRACE_G = libjvm$(G_SUFFIX)_dtrace.so
 
 LIBJVM_DTRACE_DEBUGINFO   = libjvm_dtrace.debuginfo
 LIBJVM_DTRACE_DIZ         = libjvm_dtrace.diz
-LIBJVM_DTRACE_G_DEBUGINFO = libjvm$(G_SUFFIX)_dtrace.debuginfo
-LIBJVM_DTRACE_G_DIZ       = libjvm$(G_SUFFIX)_dtrace.diz
 
 JVMOFFS = JvmOffsets
 JVMOFFS.o = $(JVMOFFS).o
@@ -96,25 +90,18 @@
 
 XLIBJVM_DIR = 64
 XLIBJVM_DB = $(XLIBJVM_DIR)/$(LIBJVM_DB)
-XLIBJVM_DB_G = $(XLIBJVM_DIR)/$(LIBJVM_DB_G)
 XLIBJVM_DTRACE = $(XLIBJVM_DIR)/$(LIBJVM_DTRACE)
-XLIBJVM_DTRACE_G = $(XLIBJVM_DIR)/$(LIBJVM_DTRACE_G)
 
 XLIBJVM_DB_DEBUGINFO       = $(XLIBJVM_DIR)/$(LIBJVM_DB_DEBUGINFO)
 XLIBJVM_DB_DIZ             = $(XLIBJVM_DIR)/$(LIBJVM_DB_DIZ)
-XLIBJVM_DB_G_DEBUGINFO     = $(XLIBJVM_DIR)/$(LIBJVM_DB_G_DEBUGINFO)
-XLIBJVM_DB_G_DIZ           = $(XLIBJVM_DIR)/$(LIBJVM_DB_G_DIZ)
 XLIBJVM_DTRACE_DEBUGINFO   = $(XLIBJVM_DIR)/$(LIBJVM_DTRACE_DEBUGINFO)
 XLIBJVM_DTRACE_DIZ         = $(XLIBJVM_DIR)/$(LIBJVM_DTRACE_DIZ)
-XLIBJVM_DTRACE_G_DEBUGINFO = $(XLIBJVM_DIR)/$(LIBJVM_DTRACE_G_DEBUGINFO)
-XLIBJVM_DTRACE_G_DIZ       = $(XLIBJVM_DIR)/$(LIBJVM_DTRACE_G_DIZ)
 
 $(XLIBJVM_DB): $(ADD_GNU_DEBUGLINK) $(FIX_EMPTY_SEC_HDR_FLAGS) $(DTRACE_SRCDIR)/$(JVM_DB).c $(JVMOFFS).h $(LIBJVM_DB_MAPFILE)
 	@echo Making $@
 	$(QUIETLY) mkdir -p $(XLIBJVM_DIR) ; \
 	$(CC) $(SYMFLAG) $(ARCHFLAG/$(ISA)) -D$(TYPE) -I. -I$(GENERATED) \
 		$(SHARED_FLAG) $(LFLAGS_JVM_DB) -o $@ $(DTRACE_SRCDIR)/$(JVM_DB).c -lc
-	[ -f $(XLIBJVM_DB_G) ] || { ln -s $(LIBJVM_DB) $(XLIBJVM_DB_G); }
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
 # gobjcopy crashes on "empty" section headers with the SHF_ALLOC flag set.
 # Clear the SHF_ALLOC flag (if set) from empty section headers.
@@ -137,13 +124,11 @@
     # implied else here is no stripping at all
     endif
   endif
-	[ -f $(XLIBJVM_DB_G_DEBUGINFO) ] || { cd $(XLIBJVM_DIR) && ln -s $(LIBJVM_DB_DEBUGINFO) $(LIBJVM_DB_G_DEBUGINFO); }
   ifeq ($(ZIP_DEBUGINFO_FILES),1)
 # Do this part in the $(XLIBJVM_DIR) subdir so $(XLIBJVM_DIR) is not
 # in the archived name:
-	( cd $(XLIBJVM_DIR) && $(ZIPEXE) -q -y $(LIBJVM_DB_DIZ) $(LIBJVM_DB_DEBUGINFO) $(LIBJVM_DB_G_DEBUGINFO) )
-	$(RM) $(XLIBJVM_DB_DEBUGINFO) $(XLIBJVM_DB_G_DEBUGINFO)
-	[ -f $(XLIBJVM_DB_G_DIZ) ] || { cd $(XLIBJVM_DIR) && ln -s $(LIBJVM_DB_DIZ) $(LIBJVM_DB_G_DIZ); }
+	( cd $(XLIBJVM_DIR) && $(ZIPEXE) -q -y $(LIBJVM_DB_DIZ) $(LIBJVM_DB_DEBUGINFO) )
+	$(RM) $(XLIBJVM_DB_DEBUGINFO)
   endif
 endif
 
@@ -152,7 +137,6 @@
 	$(QUIETLY) mkdir -p $(XLIBJVM_DIR) ; \
 	$(CC) $(SYMFLAG) $(ARCHFLAG/$(ISA)) -D$(TYPE) -I. \
 		$(SHARED_FLAG) $(LFLAGS_JVM_DTRACE) -o $@ $(DTRACE_SRCDIR)/$(JVM_DTRACE).c -lc -lthread -ldoor
-	[ -f $(XLIBJVM_DTRACE_G) ] || { ln -s $(LIBJVM_DTRACE) $(XLIBJVM_DTRACE_G); }
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
 # Clear the SHF_ALLOC flag (if set) from empty section headers.
 	$(QUIETLY) $(FIX_EMPTY_SEC_HDR_FLAGS) $@
@@ -170,13 +154,11 @@
     # implied else here is no stripping at all
     endif
   endif
-	[ -f $(XLIBJVM_DTRACE_G_DEBUGINFO) ] || { cd $(XLIBJVM_DIR) && ln -s $(LIBJVM_DTRACE_DEBUGINFO) $(LIBJVM_DTRACE_G_DEBUGINFO); }
   ifeq ($(ZIP_DEBUGINFO_FILES),1)
 # Do this part in the $(XLIBJVM_DIR) subdir so $(XLIBJVM_DIR) is not
 # in the archived name:
-	( cd $(XLIBJVM_DIR) && $(ZIPEXE) -q -y $(LIBJVM_DTRACE_DIZ) $(LIBJVM_DTRACE_DEBUGINFO) $(LIBJVM_DTRACE_G_DEBUGINFO) )
-	$(RM) $(XLIBJVM_DTRACE_DEBUGINFO) $(XLIBJVM_DTRACE_G_DEBUGINFO)
-	[ -f $(XLIBJVM_DTRACE_G_DIZ) ] || { cd $(XLIBJVM_DIR) && ln -s $(LIBJVM_DTRACE_DIZ) $(LIBJVM_DTRACE_G_DIZ); }
+	( cd $(XLIBJVM_DIR) && $(ZIPEXE) -q -y $(LIBJVM_DTRACE_DIZ) $(LIBJVM_DTRACE_DEBUGINFO))
+	$(RM) $(XLIBJVM_DTRACE_DEBUGINFO)
   endif
 endif
 
@@ -224,7 +206,6 @@
 	@echo Making $@
 	$(QUIETLY) $(CC) $(SYMFLAG) $(ARCHFLAG) -D$(TYPE) -I. -I$(GENERATED) \
 		$(SHARED_FLAG) $(LFLAGS_JVM_DB) -o $@ $(DTRACE_SRCDIR)/$(JVM_DB).c -lc
-	[ -f $(LIBJVM_DB_G) ] || { ln -s $@ $(LIBJVM_DB_G); }
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
 # Clear the SHF_ALLOC flag (if set) from empty section headers.
 	$(QUIETLY) $(FIX_EMPTY_SEC_HDR_FLAGS) $@
@@ -240,11 +221,9 @@
     # implied else here is no stripping at all
     endif
   endif
-	[ -f $(LIBJVM_DB_G_DEBUGINFO) ] || { ln -s $(LIBJVM_DB_DEBUGINFO) $(LIBJVM_DB_G_DEBUGINFO); }
   ifeq ($(ZIP_DEBUGINFO_FILES),1)
-	$(ZIPEXE) -q -y $(LIBJVM_DB_DIZ) $(LIBJVM_DB_DEBUGINFO) $(LIBJVM_DB_G_DEBUGINFO)
-	$(RM) $(LIBJVM_DB_DEBUGINFO) $(LIBJVM_DB_G_DEBUGINFO)
-	[ -f $(LIBJVM_DB_G_DIZ) ] || { ln -s $(LIBJVM_DB_DIZ) $(LIBJVM_DB_G_DIZ); }
+	$(ZIPEXE) -q -y $(LIBJVM_DB_DIZ) $(LIBJVM_DB_DEBUGINFO)
+	$(RM) $(LIBJVM_DB_DEBUGINFO)
   endif
 endif
 
@@ -252,7 +231,6 @@
 	@echo Making $@
 	$(QUIETLY) $(CC) $(SYMFLAG) $(ARCHFLAG) -D$(TYPE) -I.  \
 		$(SHARED_FLAG) $(LFLAGS_JVM_DTRACE) -o $@ $(DTRACE_SRCDIR)/$(JVM_DTRACE).c -lc -lthread -ldoor
-	[ -f $(LIBJVM_DTRACE_G) ] || { ln -s $@ $(LIBJVM_DTRACE_G); }
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
 # Clear the SHF_ALLOC flag (if set) from empty section headers.
 	$(QUIETLY) $(FIX_EMPTY_SEC_HDR_FLAGS) $@
@@ -268,11 +246,9 @@
     # implied else here is no stripping at all
     endif
   endif
-	[ -f $(LIBJVM_DTRACE_G_DEBUGINFO) ] || { ln -s $(LIBJVM_DTRACE_DEBUGINFO) $(LIBJVM_DTRACE_G_DEBUGINFO); }
   ifeq ($(ZIP_DEBUGINFO_FILES),1)
-	$(ZIPEXE) -q -y $(LIBJVM_DTRACE_DIZ) $(LIBJVM_DTRACE_DEBUGINFO) $(LIBJVM_DTRACE_G_DEBUGINFO)
-	$(RM) $(LIBJVM_DTRACE_DEBUGINFO) $(LIBJVM_DTRACE_G_DEBUGINFO)
-	[ -f $(LIBJVM_DTRACE_G_DIZ) ] || { ln -s $(LIBJVM_DTRACE_DIZ) $(LIBJVM_DTRACE_G_DIZ); }
+	$(ZIPEXE) -q -y $(LIBJVM_DTRACE_DIZ) $(LIBJVM_DTRACE_DEBUGINFO) 
+	$(RM) $(LIBJVM_DTRACE_DEBUGINFO)
   endif
 endif
 
--- a/make/solaris/makefiles/fastdebug.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/solaris/makefiles/fastdebug.make	Sun Feb 03 22:43:57 2013 +0100
@@ -122,7 +122,6 @@
 # and mustn't be otherwise.
 MAPFILE_DTRACE = $(GAMMADIR)/make/solaris/makefiles/mapfile-vers-$(TYPE)
 
-G_SUFFIX = _g
 VERSION = optimized
 SYSDEFS += -DASSERT -DFASTDEBUG -DCHECK_UNHANDLED_OOPS
 PICFLAGS = DEFAULT
--- a/make/solaris/makefiles/gcc.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/solaris/makefiles/gcc.make	Sun Feb 03 22:43:57 2013 +0100
@@ -187,9 +187,9 @@
 
 # Use the stabs format for debugging information (this is the default 
 # on gcc-2.91). It's good enough, has all the information about line 
-# numbers and local variables, and libjvm_g.so is only about 16M. 
+# numbers and local variables, and libjvm.so is only about 16M. 
 # Change this back to "-g" if you want the most expressive format. 
-# (warning: that could easily inflate libjvm_g.so to 150M!) 
+# (warning: that could easily inflate libjvm.so to 150M!) 
 # Note: The Itanium gcc compiler crashes when using -gstabs. 
 DEBUG_CFLAGS/ia64  = -g 
 DEBUG_CFLAGS/amd64 = -g 
--- a/make/solaris/makefiles/jsig.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/solaris/makefiles/jsig.make	Sun Feb 03 22:43:57 2013 +0100
@@ -24,17 +24,12 @@
 
 # Rules to build signal interposition library, used by vm.make
 
-# libjsig[_g].so: signal interposition library
+# libjsig.so: signal interposition library
 JSIG      = jsig
 LIBJSIG   = lib$(JSIG).so
 
-JSIG_G    = $(JSIG)$(G_SUFFIX)
-LIBJSIG_G = lib$(JSIG_G).so
-
 LIBJSIG_DEBUGINFO   = lib$(JSIG).debuginfo
 LIBJSIG_DIZ         = lib$(JSIG).diz
-LIBJSIG_G_DEBUGINFO = lib$(JSIG_G).debuginfo
-LIBJSIG_G_DIZ       = lib$(JSIG_G).diz
 
 JSIGSRCDIR = $(GAMMADIR)/src/os/$(Platform_os_family)/vm
 
@@ -56,7 +51,6 @@
 	@echo Making signal interposition lib...
 	$(QUIETLY) $(CC) $(SYMFLAG) $(ARCHFLAG) $(SHARED_FLAG) $(PICFLAG) \
                          $(LFLAGS_JSIG) -o $@ $(JSIGSRCDIR)/jsig.c -ldl
-	[ -f $(LIBJSIG_G) ] || { ln -s $@ $(LIBJSIG_G); }
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
 # gobjcopy crashes on "empty" section headers with the SHF_ALLOC flag set.
 # Clear the SHF_ALLOC flag (if set) from empty section headers.
@@ -77,11 +71,9 @@
     # implied else here is no stripping at all
     endif
   endif
-	[ -f $(LIBJSIG_G_DEBUGINFO) ] || { ln -s $(LIBJSIG_DEBUGINFO) $(LIBJSIG_G_DEBUGINFO); }
   ifeq ($(ZIP_DEBUGINFO_FILES),1)
-	$(ZIPEXE) -q -y $(LIBJSIG_DIZ) $(LIBJSIG_DEBUGINFO) $(LIBJSIG_G_DEBUGINFO)
-	$(RM) $(LIBJSIG_DEBUGINFO) $(LIBJSIG_G_DEBUGINFO)
-	[ -f $(LIBJSIG_G_DIZ) ] || { ln -s $(LIBJSIG_DIZ) $(LIBJSIG_G_DIZ); }
+	$(ZIPEXE) -q -y $(LIBJSIG_DIZ) $(LIBJSIG_DEBUGINFO)
+	$(RM) $(LIBJSIG_DEBUGINFO)
   endif
 endif
 
--- a/make/solaris/makefiles/jvmg.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/solaris/makefiles/jvmg.make	Sun Feb 03 22:43:57 2013 +0100
@@ -51,7 +51,6 @@
 # and mustn't be otherwise.
 MAPFILE_DTRACE = $(GAMMADIR)/make/solaris/makefiles/mapfile-vers-$(TYPE)
 
-G_SUFFIX = _g
 VERSION = debug
 SYSDEFS += -DASSERT -DDEBUG
 PICFLAGS = DEFAULT
--- a/make/solaris/makefiles/mapfile-vers	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/solaris/makefiles/mapfile-vers	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -26,235 +26,236 @@
 
 SUNWprivate_1.1 {
         global:
-		# JNI
+                # JNI
                 JNI_CreateJavaVM;
                 JNI_GetCreatedJavaVMs;
                 JNI_GetDefaultJavaVMInitArgs;
-	
-		# JVM
-		JVM_Accept;
-		JVM_ActiveProcessorCount;
-		JVM_AllocateNewArray;
-		JVM_AllocateNewObject;
-		JVM_ArrayCopy;
-		JVM_AssertionStatusDirectives;
-		JVM_Available;
-		JVM_Bind;
-		JVM_ClassDepth;
-		JVM_ClassLoaderDepth;
-		JVM_Clone;
-		JVM_Close;
-		JVM_CX8Field;
-		JVM_CompileClass;
-		JVM_CompileClasses;
-		JVM_CompilerCommand;
-		JVM_Connect;
-		JVM_ConstantPoolGetClassAt;
-		JVM_ConstantPoolGetClassAtIfLoaded;
-		JVM_ConstantPoolGetDoubleAt;
-		JVM_ConstantPoolGetFieldAt;
-		JVM_ConstantPoolGetFieldAtIfLoaded;
-		JVM_ConstantPoolGetFloatAt;
-		JVM_ConstantPoolGetIntAt;
-		JVM_ConstantPoolGetLongAt;
-		JVM_ConstantPoolGetMethodAt;
-		JVM_ConstantPoolGetMethodAtIfLoaded;
-		JVM_ConstantPoolGetMemberRefInfoAt;
-		JVM_ConstantPoolGetSize;
-		JVM_ConstantPoolGetStringAt;
-		JVM_ConstantPoolGetUTF8At;
-		JVM_CountStackFrames;
-		JVM_CurrentClassLoader;
-		JVM_CurrentLoadedClass;
-		JVM_CurrentThread;
-		JVM_CurrentTimeMillis;
-		JVM_DefineClass;
-		JVM_DefineClassWithSource;
-		JVM_DefineClassWithSourceCond;
-		JVM_DesiredAssertionStatus;
-		JVM_DisableCompiler;
-		JVM_DoPrivileged;
-		JVM_DTraceGetVersion;
-		JVM_DTraceActivate;
-		JVM_DTraceIsProbeEnabled;
-		JVM_DTraceIsSupported;
-		JVM_DTraceDispose;
-		JVM_DumpAllStacks;
-		JVM_DumpThreads;
-		JVM_EnableCompiler;
-		JVM_Exit;
-		JVM_FillInStackTrace;
-		JVM_FindClassFromClass;
-		JVM_FindClassFromClassLoader;
-		JVM_FindClassFromBootLoader;
-		JVM_FindLibraryEntry;
-		JVM_FindLoadedClass;
-		JVM_FindPrimitiveClass;
-		JVM_FindSignal;
-		JVM_FreeMemory;
-		JVM_GC;
-		JVM_GetAllThreads;
-		JVM_GetArrayElement;
-		JVM_GetArrayLength;
-		JVM_GetCPClassNameUTF;
-		JVM_GetCPFieldClassNameUTF;
-		JVM_GetCPFieldModifiers;
-		JVM_GetCPFieldNameUTF;
-		JVM_GetCPFieldSignatureUTF;
-		JVM_GetCPMethodClassNameUTF;
-		JVM_GetCPMethodModifiers;
-		JVM_GetCPMethodNameUTF;
-		JVM_GetCPMethodSignatureUTF;
-		JVM_GetCallerClass;
-		JVM_GetClassAccessFlags;
-		JVM_GetClassAnnotations;
-		JVM_GetClassCPEntriesCount;
-		JVM_GetClassCPTypes;
-		JVM_GetClassConstantPool;
-		JVM_GetClassContext;
-		JVM_GetClassDeclaredConstructors;
-		JVM_GetClassDeclaredFields;
-		JVM_GetClassDeclaredMethods;
-		JVM_GetClassFieldsCount;
-		JVM_GetClassInterfaces;
-		JVM_GetClassLoader;
-		JVM_GetClassMethodsCount;
-		JVM_GetClassModifiers;
-		JVM_GetClassName;
-		JVM_GetClassNameUTF;
-		JVM_GetClassSignature;
-		JVM_GetClassSigners;
-		JVM_GetComponentType;
-		JVM_GetDeclaredClasses;
-		JVM_GetDeclaringClass;
-		JVM_GetEnclosingMethodInfo;
-	        JVM_GetFieldAnnotations;
-		JVM_GetFieldIxModifiers;
-		JVM_GetHostName;
-		JVM_GetInheritedAccessControlContext;
-		JVM_GetInterfaceVersion;
-		JVM_GetLastErrorString;
-		JVM_GetManagement;
-	        JVM_GetMethodAnnotations;
-	        JVM_GetMethodDefaultAnnotationValue;
-		JVM_GetMethodIxArgsSize;
-		JVM_GetMethodIxByteCode;
-		JVM_GetMethodIxByteCodeLength;
-		JVM_GetMethodIxExceptionIndexes;
-		JVM_GetMethodIxExceptionTableEntry;
-		JVM_GetMethodIxExceptionTableLength;
-		JVM_GetMethodIxExceptionsCount;
-		JVM_GetMethodIxLocalsCount;
-		JVM_GetMethodIxMaxStack;
-		JVM_GetMethodIxModifiers;
-		JVM_GetMethodIxNameUTF;
-		JVM_GetMethodIxSignatureUTF;
-	        JVM_GetMethodParameterAnnotations;
-		JVM_GetPrimitiveArrayElement;
-		JVM_GetProtectionDomain;
-		JVM_GetSockName;
-		JVM_GetSockOpt;
-		JVM_GetStackAccessControlContext;
-		JVM_GetStackTraceDepth;
-		JVM_GetStackTraceElement;
-		JVM_GetSystemPackage;
-		JVM_GetSystemPackages;
-		JVM_GetThreadStateNames;
-		JVM_GetThreadStateValues;
-		JVM_GetVersionInfo;
-		JVM_Halt;
-		JVM_HoldsLock;
-		JVM_IHashCode;
-		JVM_InitAgentProperties;
-		JVM_InitProperties;
-		JVM_InitializeCompiler;
-		JVM_InitializeSocketLibrary;
-		JVM_InternString;
-		JVM_Interrupt;
-		JVM_InvokeMethod;
-		JVM_IsArrayClass;
-		JVM_IsConstructorIx;
-		JVM_IsInterface;
-		JVM_IsInterrupted;
-		JVM_IsNaN;
-		JVM_IsPrimitiveClass;
-		JVM_IsSameClassPackage;
-		JVM_IsSilentCompiler;
-		JVM_IsSupportedJNIVersion;
-		JVM_IsThreadAlive;
-		JVM_LatestUserDefinedLoader;
-		JVM_Listen;
-		JVM_LoadClass0;
-		JVM_LoadLibrary;
-		JVM_Lseek;
-		JVM_MaxObjectInspectionAge;
-		JVM_MaxMemory;
-		JVM_MonitorNotify;
-		JVM_MonitorNotifyAll;
-		JVM_MonitorWait;
-		JVM_NativePath;
-		JVM_NanoTime;
-		JVM_NewArray;
-		JVM_NewInstanceFromConstructor;
-		JVM_NewMultiArray;
-		JVM_OnExit;
-		JVM_Open;
-		JVM_PrintStackTrace;
-		JVM_RaiseSignal;
-		JVM_RawMonitorCreate;
-		JVM_RawMonitorDestroy;
-		JVM_RawMonitorEnter;
-		JVM_RawMonitorExit;
-		JVM_Read;
-		JVM_Recv;
-		JVM_RecvFrom;
-		JVM_RegisterSignal;
-		JVM_ReleaseUTF;
-		JVM_ResolveClass;
-		JVM_ResumeThread;
-		JVM_Send;
-		JVM_SendTo;
-		JVM_SetArrayElement;
-		JVM_SetClassSigners;
-		JVM_SetLength;
+        
+                # JVM
+                JVM_Accept;
+                JVM_ActiveProcessorCount;
+                JVM_AllocateNewArray;
+                JVM_AllocateNewObject;
+                JVM_ArrayCopy;
+                JVM_AssertionStatusDirectives;
+                JVM_Available;
+                JVM_Bind;
+                JVM_ClassDepth;
+                JVM_ClassLoaderDepth;
+                JVM_Clone;
+                JVM_Close;
+                JVM_CX8Field;
+                JVM_CompileClass;
+                JVM_CompileClasses;
+                JVM_CompilerCommand;
+                JVM_Connect;
+                JVM_ConstantPoolGetClassAt;
+                JVM_ConstantPoolGetClassAtIfLoaded;
+                JVM_ConstantPoolGetDoubleAt;
+                JVM_ConstantPoolGetFieldAt;
+                JVM_ConstantPoolGetFieldAtIfLoaded;
+                JVM_ConstantPoolGetFloatAt;
+                JVM_ConstantPoolGetIntAt;
+                JVM_ConstantPoolGetLongAt;
+                JVM_ConstantPoolGetMethodAt;
+                JVM_ConstantPoolGetMethodAtIfLoaded;
+                JVM_ConstantPoolGetMemberRefInfoAt;
+                JVM_ConstantPoolGetSize;
+                JVM_ConstantPoolGetStringAt;
+                JVM_ConstantPoolGetUTF8At;
+                JVM_CountStackFrames;
+                JVM_CurrentClassLoader;
+                JVM_CurrentLoadedClass;
+                JVM_CurrentThread;
+                JVM_CurrentTimeMillis;
+                JVM_DefineClass;
+                JVM_DefineClassWithSource;
+                JVM_DefineClassWithSourceCond;
+                JVM_DesiredAssertionStatus;
+                JVM_DisableCompiler;
+                JVM_DoPrivileged;
+                JVM_DTraceGetVersion;
+                JVM_DTraceActivate;
+                JVM_DTraceIsProbeEnabled;
+                JVM_DTraceIsSupported;
+                JVM_DTraceDispose;
+                JVM_DumpAllStacks;
+                JVM_DumpThreads;
+                JVM_EnableCompiler;
+                JVM_Exit;
+                JVM_FillInStackTrace;
+                JVM_FindClassFromClass;
+                JVM_FindClassFromClassLoader;
+                JVM_FindClassFromBootLoader;
+                JVM_FindLibraryEntry;
+                JVM_FindLoadedClass;
+                JVM_FindPrimitiveClass;
+                JVM_FindSignal;
+                JVM_FreeMemory;
+                JVM_GC;
+                JVM_GetAllThreads;
+                JVM_GetArrayElement;
+                JVM_GetArrayLength;
+                JVM_GetCPClassNameUTF;
+                JVM_GetCPFieldClassNameUTF;
+                JVM_GetCPFieldModifiers;
+                JVM_GetCPFieldNameUTF;
+                JVM_GetCPFieldSignatureUTF;
+                JVM_GetCPMethodClassNameUTF;
+                JVM_GetCPMethodModifiers;
+                JVM_GetCPMethodNameUTF;
+                JVM_GetCPMethodSignatureUTF;
+                JVM_GetCallerClass;
+                JVM_GetClassAccessFlags;
+                JVM_GetClassAnnotations;
+                JVM_GetClassCPEntriesCount;
+                JVM_GetClassCPTypes;
+                JVM_GetClassConstantPool;
+                JVM_GetClassContext;
+                JVM_GetClassDeclaredConstructors;
+                JVM_GetClassDeclaredFields;
+                JVM_GetClassDeclaredMethods;
+                JVM_GetClassFieldsCount;
+                JVM_GetClassInterfaces;
+                JVM_GetClassLoader;
+                JVM_GetClassMethodsCount;
+                JVM_GetClassModifiers;
+                JVM_GetClassName;
+                JVM_GetClassNameUTF;
+                JVM_GetClassSignature;
+                JVM_GetClassSigners;
+                JVM_GetComponentType;
+                JVM_GetClassTypeAnnotations;
+                JVM_GetDeclaredClasses;
+                JVM_GetDeclaringClass;
+                JVM_GetEnclosingMethodInfo;
+                JVM_GetFieldAnnotations;
+                JVM_GetFieldIxModifiers;
+                JVM_GetHostName;
+                JVM_GetInheritedAccessControlContext;
+                JVM_GetInterfaceVersion;
+                JVM_GetLastErrorString;
+                JVM_GetManagement;
+                JVM_GetMethodAnnotations;
+                JVM_GetMethodDefaultAnnotationValue;
+                JVM_GetMethodIxArgsSize;
+                JVM_GetMethodIxByteCode;
+                JVM_GetMethodIxByteCodeLength;
+                JVM_GetMethodIxExceptionIndexes;
+                JVM_GetMethodIxExceptionTableEntry;
+                JVM_GetMethodIxExceptionTableLength;
+                JVM_GetMethodIxExceptionsCount;
+                JVM_GetMethodIxLocalsCount;
+                JVM_GetMethodIxMaxStack;
+                JVM_GetMethodIxModifiers;
+                JVM_GetMethodIxNameUTF;
+                JVM_GetMethodIxSignatureUTF;
+                JVM_GetMethodParameterAnnotations;
+                JVM_GetMethodParameters;
+                JVM_GetPrimitiveArrayElement;
+                JVM_GetProtectionDomain;
+                JVM_GetSockName;
+                JVM_GetSockOpt;
+                JVM_GetStackAccessControlContext;
+                JVM_GetStackTraceDepth;
+                JVM_GetStackTraceElement;
+                JVM_GetSystemPackage;
+                JVM_GetSystemPackages;
+                JVM_GetThreadStateNames;
+                JVM_GetThreadStateValues;
+                JVM_GetVersionInfo;
+                JVM_Halt;
+                JVM_HoldsLock;
+                JVM_IHashCode;
+                JVM_InitAgentProperties;
+                JVM_InitProperties;
+                JVM_InitializeCompiler;
+                JVM_InitializeSocketLibrary;
+                JVM_InternString;
+                JVM_Interrupt;
+                JVM_InvokeMethod;
+                JVM_IsArrayClass;
+                JVM_IsConstructorIx;
+                JVM_IsInterface;
+                JVM_IsInterrupted;
+                JVM_IsNaN;
+                JVM_IsPrimitiveClass;
+                JVM_IsSameClassPackage;
+                JVM_IsSilentCompiler;
+                JVM_IsSupportedJNIVersion;
+                JVM_IsThreadAlive;
+                JVM_LatestUserDefinedLoader;
+                JVM_Listen;
+                JVM_LoadClass0;
+                JVM_LoadLibrary;
+                JVM_Lseek;
+                JVM_MaxObjectInspectionAge;
+                JVM_MaxMemory;
+                JVM_MonitorNotify;
+                JVM_MonitorNotifyAll;
+                JVM_MonitorWait;
+                JVM_NativePath;
+                JVM_NanoTime;
+                JVM_NewArray;
+                JVM_NewInstanceFromConstructor;
+                JVM_NewMultiArray;
+                JVM_OnExit;
+                JVM_Open;
+                JVM_RaiseSignal;
+                JVM_RawMonitorCreate;
+                JVM_RawMonitorDestroy;
+                JVM_RawMonitorEnter;
+                JVM_RawMonitorExit;
+                JVM_Read;
+                JVM_Recv;
+                JVM_RecvFrom;
+                JVM_RegisterSignal;
+                JVM_ReleaseUTF;
+                JVM_ResolveClass;
+                JVM_ResumeThread;
+                JVM_Send;
+                JVM_SendTo;
+                JVM_SetArrayElement;
+                JVM_SetClassSigners;
+                JVM_SetLength;
                 JVM_SetNativeThreadName;
-		JVM_SetPrimitiveArrayElement;
-		JVM_SetProtectionDomain;
-		JVM_SetSockOpt;
-		JVM_SetThreadPriority;
-		JVM_Sleep;
-		JVM_Socket;
-		JVM_SocketAvailable;
-		JVM_SocketClose;
-		JVM_SocketShutdown;
-		JVM_StartThread;
-		JVM_StopThread;
-		JVM_SuspendThread;
-		JVM_SupportsCX8;
-		JVM_Sync;
-		JVM_Timeout;
-		JVM_TotalMemory;
-		JVM_TraceInstructions;
-		JVM_TraceMethodCalls;
-		JVM_UnloadLibrary;
-		JVM_Write;
-		JVM_Yield;
-		JVM_handle_solaris_signal;
+                JVM_SetPrimitiveArrayElement;
+                JVM_SetProtectionDomain;
+                JVM_SetSockOpt;
+                JVM_SetThreadPriority;
+                JVM_Sleep;
+                JVM_Socket;
+                JVM_SocketAvailable;
+                JVM_SocketClose;
+                JVM_SocketShutdown;
+                JVM_StartThread;
+                JVM_StopThread;
+                JVM_SuspendThread;
+                JVM_SupportsCX8;
+                JVM_Sync;
+                JVM_Timeout;
+                JVM_TotalMemory;
+                JVM_TraceInstructions;
+                JVM_TraceMethodCalls;
+                JVM_UnloadLibrary;
+                JVM_Write;
+                JVM_Yield;
+                JVM_handle_solaris_signal;
 
-		# miscellaneous functions
-		jio_fprintf;
-		jio_printf;
-		jio_snprintf;
-		jio_vfprintf;
-		jio_vsnprintf;
+                # miscellaneous functions
+                jio_fprintf;
+                jio_printf;
+                jio_snprintf;
+                jio_vfprintf;
+                jio_vsnprintf;
 
-		# Needed because there is no JVM interface for this.
-		sysThreadAvailableStackWithSlack;
+                # Needed because there is no JVM interface for this.
+                sysThreadAvailableStackWithSlack;
 
-		# This is for Forte Analyzer profiling support.
-		AsyncGetCallTrace;
+                # This is for Forte Analyzer profiling support.
+                AsyncGetCallTrace;
 
-		# INSERT VTABLE SYMBOLS HERE
+                # INSERT VTABLE SYMBOLS HERE
 
         local:
                 *;
--- a/make/solaris/makefiles/optimized.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/solaris/makefiles/optimized.make	Sun Feb 03 22:43:57 2013 +0100
@@ -62,5 +62,4 @@
 # Set the environment variable HOTSPARC_GENERIC to "true"
 # to inhibit the effect of the previous line on CFLAGS.
 
-G_SUFFIX =
 VERSION = optimized
--- a/make/solaris/makefiles/product.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/solaris/makefiles/product.make	Sun Feb 03 22:43:57 2013 +0100
@@ -78,6 +78,5 @@
 # and this macro is not used.
 # LINK_LIB.CXX/POST_HOOK += $(STRIP_LIB.CXX/POST_HOOK)
 
-G_SUFFIX =
 SYSDEFS += -DPRODUCT
 VERSION = optimized
--- a/make/solaris/makefiles/saproc.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/solaris/makefiles/saproc.make	Sun Feb 03 22:43:57 2013 +0100
@@ -24,20 +24,15 @@
 
 # Rules to build serviceability agent library, used by vm.make
 
-# libsaproc[_g].so: serviceability agent
+# libsaproc.so: serviceability agent
 
 SAPROC = saproc
 SADIS = sadis
 LIBSAPROC = lib$(SAPROC).so
 SADISOBJ = $(SADIS).o
 
-SAPROC_G = $(SAPROC)$(G_SUFFIX)
-LIBSAPROC_G = lib$(SAPROC_G).so
-
 LIBSAPROC_DEBUGINFO   = lib$(SAPROC).debuginfo
 LIBSAPROC_DIZ         = lib$(SAPROC).diz
-LIBSAPROC_G_DEBUGINFO = lib$(SAPROC_G).debuginfo
-LIBSAPROC_G_DIZ       = lib$(SAPROC_G).diz
 
 AGENT_DIR = $(GAMMADIR)/agent
 
@@ -113,7 +108,6 @@
 	           $(SA_LFLAGS)                                         \
 	           -o $@                                                \
 	           -ldl -ldemangle -lthread -lc
-	[ -f $(LIBSAPROC_G) ] || { ln -s $@ $(LIBSAPROC_G); }
 
 $(SADISOBJ): $(SADISSRCFILES)
 	           $(QUIETLY) $(CC)                                     \
@@ -146,11 +140,9 @@
     # implied else here is no stripping at all
     endif
   endif
-	[ -f $(LIBSAPROC_G_DEBUGINFO) ] || { ln -s $(LIBSAPROC_DEBUGINFO) $(LIBSAPROC_G_DEBUGINFO); }
   ifeq ($(ZIP_DEBUGINFO_FILES),1)
-	$(ZIPEXE) -q -y $(LIBSAPROC_DIZ) $(LIBSAPROC_DEBUGINFO) $(LIBSAPROC_G_DEBUGINFO)
-	$(RM) $(LIBSAPROC_DEBUGINFO) $(LIBSAPROC_G_DEBUGINFO)
-	[ -f $(LIBSAPROC_G_DIZ) ] || { ln -s $(LIBSAPROC_DIZ) $(LIBSAPROC_G_DIZ); }
+	$(ZIPEXE) -q -y $(LIBSAPROC_DIZ) $(LIBSAPROC_DEBUGINFO)
+	$(RM) $(LIBSAPROC_DEBUGINFO)
   endif
 endif
 
--- a/make/solaris/makefiles/vm.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/solaris/makefiles/vm.make	Sun Feb 03 22:43:57 2013 +0100
@@ -157,12 +157,9 @@
 
 JVM      = jvm
 LIBJVM   = lib$(JVM).so
-LIBJVM_G = lib$(JVM)$(G_SUFFIX).so
 
 LIBJVM_DEBUGINFO   = lib$(JVM).debuginfo
 LIBJVM_DIZ         = lib$(JVM).diz
-LIBJVM_G_DEBUGINFO = lib$(JVM)$(G_SUFFIX).debuginfo
-LIBJVM_G_DIZ       = lib$(JVM)$(G_SUFFIX).diz
 
 SPECIAL_PATHS:=adlc c1 dist gc_implementation opto shark libadt
 
@@ -291,8 +288,6 @@
 	$(QUIETLY) $(LINK_VM) $(LFLAGS_VM) -o $@ $(sort $(LIBJVM.o)) $(LIBS_VM)
 	$(QUIETLY) $(LINK_LIB.CXX/POST_HOOK)
 	$(QUIETLY) rm -f $@.1 && ln -s $@ $@.1
-	$(QUIETLY) [ -f $(LIBJVM_G) ] || ln -s $@ $(LIBJVM_G)
-	$(QUIETLY) [ -f $(LIBJVM_G).1 ] || ln -s $@.1 $(LIBJVM_G).1
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
 # gobjcopy crashes on "empty" section headers with the SHF_ALLOC flag set.
 # Clear the SHF_ALLOC flag (if set) from empty section headers.
@@ -313,11 +308,9 @@
     # implied else here is no stripping at all
     endif
   endif
-	$(QUIETLY) [ -f $(LIBJVM_G_DEBUGINFO) ] || ln -s $(LIBJVM_DEBUGINFO) $(LIBJVM_G_DEBUGINFO)
   ifeq ($(ZIP_DEBUGINFO_FILES),1)
-	$(ZIPEXE) -q -y $(LIBJVM_DIZ) $(LIBJVM_DEBUGINFO) $(LIBJVM_G_DEBUGINFO)
-	$(RM) $(LIBJVM_DEBUGINFO) $(LIBJVM_G_DEBUGINFO)
-	[ -f $(LIBJVM_G_DIZ) ] || { ln -s $(LIBJVM_DIZ) $(LIBJVM_G_DIZ); }
+	$(ZIPEXE) -q -y $(LIBJVM_DIZ) $(LIBJVM_DEBUGINFO)
+	$(RM) $(LIBJVM_DEBUGINFO)
   endif
 endif
 endif # filter -sbfast -xsbfast
--- a/make/windows/build.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/windows/build.make	Sun Feb 03 22:43:57 2013 +0100
@@ -33,7 +33,7 @@
 # SA components are built if BUILD_WIN_SA=1 is specified.
 # See notes in README. This produces files:
 #  1. sa-jdi.jar       - This is built before building jvm.dll
-#  2. sawindbg[_g].dll - Native library for SA - This is built after jvm.dll
+#  2. sawindbg.dll     - Native library for SA - This is built after jvm.dll
 #                      - Also, .lib, .map, .pdb.
 #
 # Please refer to ./makefiles/sa.make
@@ -115,7 +115,7 @@
 !endif
 
 #########################################################################
-# Parameters for VERSIONINFO resource for jvm[_g].dll.
+# Parameters for VERSIONINFO resource for jvm.dll.
 # These can be overridden via the nmake.exe command line.
 # They are overridden by RE during the control builds.
 #
@@ -225,11 +225,6 @@
 
 #########################################################################
 
-# With the jvm_g.dll now being named jvm.dll, we can't build both and place
-#   the dll's in the same directory, so we only build one at a time,
-#   re-directing the output to different output directories (done by user
-#   of this makefile).
-#
 defaultTarget: product
 
 # The product or release build is an optimized build, and is the default
--- a/make/windows/makefiles/defs.make	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/windows/makefiles/defs.make	Sun Feb 03 22:43:57 2013 +0100
@@ -131,23 +131,29 @@
 # overridden in some situations, e.g., a BUILD_FLAVOR != product
 # build.
 
-ifeq ($(BUILD_FLAVOR), product)
-  FULL_DEBUG_SYMBOLS ?= 1
-  ENABLE_FULL_DEBUG_SYMBOLS = $(FULL_DEBUG_SYMBOLS)
-else
-  # debug variants always get Full Debug Symbols (if available)
-  ENABLE_FULL_DEBUG_SYMBOLS = 1
+# Due to the multiple sub-make processes that occur this logic gets
+# executed multiple times. We reduce the noise by at least checking that
+# BUILD_FLAVOR has been set.
+ifneq ($(BUILD_FLAVOR),)
+  ifeq ($(BUILD_FLAVOR), product)
+    FULL_DEBUG_SYMBOLS ?= 1
+    ENABLE_FULL_DEBUG_SYMBOLS = $(FULL_DEBUG_SYMBOLS)
+  else
+    # debug variants always get Full Debug Symbols (if available)
+    ENABLE_FULL_DEBUG_SYMBOLS = 1
+  endif
+  _JUNK_ := $(shell \
+    echo >&2 "INFO: ENABLE_FULL_DEBUG_SYMBOLS=$(ENABLE_FULL_DEBUG_SYMBOLS)")
+  MAKE_ARGS += ENABLE_FULL_DEBUG_SYMBOLS=$(ENABLE_FULL_DEBUG_SYMBOLS)
+
+  ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
+    ZIP_DEBUGINFO_FILES ?= 1
+  else
+    ZIP_DEBUGINFO_FILES=0
+  endif
+  MAKE_ARGS += ZIP_DEBUGINFO_FILES=$(ZIP_DEBUGINFO_FILES)
 endif
-_JUNK_ := $(shell \
-  echo >&2 "INFO: ENABLE_FULL_DEBUG_SYMBOLS=$(ENABLE_FULL_DEBUG_SYMBOLS)")
-MAKE_ARGS += ENABLE_FULL_DEBUG_SYMBOLS=$(ENABLE_FULL_DEBUG_SYMBOLS)
 
-ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
-  ZIP_DEBUGINFO_FILES ?= 1
-else
-  ZIP_DEBUGINFO_FILES=0
-endif
-MAKE_ARGS += ZIP_DEBUGINFO_FILES=$(ZIP_DEBUGINFO_FILES)
 MAKE_ARGS += RM="$(RM)"
 MAKE_ARGS += ZIPEXE=$(ZIPEXE)
 
--- a/make/windows/projectfiles/common/Makefile	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/windows/projectfiles/common/Makefile	Sun Feb 03 22:43:57 2013 +0100
@@ -71,41 +71,36 @@
 
 !include $(HOTSPOTWORKSPACE)/make/hotspot_version
 
-!if "$(HOTSPOT_RELEASE_VERSION)" != ""
-HOTSPOT_RELEASE_VERSION="$(HOTSPOT_RELEASE_VERSION)"
+!if "$(USER_RELEASE_SUFFIX)" != ""
+HOTSPOT_BUILD_VERSION = internal-$(USER_RELEASE_SUFFIX)
 !else
-HOTSPOT_RELEASE_VERSION="$(HS_MAJOR_VER).$(HS_MINOR_VER)-b$(HS_BUILD_NUMBER)"
+HOTSPOT_BUILD_VERSION = internal
 !endif
-!if "$(USER_RELEASE_SUFFIX)" != ""
-HOTSPOT_BUILD_VERSION$(HOTSPOT_BUILD_VERSION) = internal-$(USER_RELEASE_SUFFIX)
+!if "$(HOTSPOT_RELEASE_VERSION)" != ""
+HOTSPOT_RELEASE_VERSION="\\\"$(HOTSPOT_RELEASE_VERSION)\\\""
 !else
-HOTSPOT_BUILD_VERSION$(HOTSPOT_BUILD_VERSION) = internal
-!endif
-!if "$(HOTSPOT_BUILD_VERSION)" != ""
-HOTSPOT_RELEASE_VERSION="$(HOTSPOT_RELEASE_VERSION)-$(HOTSPOT_BUILD_VERSION)"
+HOTSPOT_RELEASE_VERSION="\\\"$(HS_MAJOR_VER).$(HS_MINOR_VER)-b$(HS_BUILD_NUMBER)-$(HOTSPOT_BUILD_VERSION)\\\""
 !endif
 !if "$(JRE_RELEASE_VERSION)" != ""
-JRE_RELEASE_VERSION="$(JRE_RELEASE_VERSION)"
+JRE_RELEASE_VERSION="\\\"$(JRE_RELEASE_VERSION)\\\""
 !else
-JRE_RELEASE_VERSION="$(JDK_MAJOR_VER).$(JDK_MINOR_VER).$(JDK_MICRO_VER)"
+JRE_RELEASE_VERSION="\\\"$(JDK_MAJOR_VER).$(JDK_MINOR_VER).$(JDK_MICRO_VER)\\\""
 !endif
 
 # Define HOTSPOT_VM_DISTRO if HOTSPOT_VM_DISTRO is set,
 # and if it is not see if we have the src/closed directory
 !if "$(HOTSPOT_VM_DISTRO)" != ""
-HOTSPOT_VM_DISTRO="$(HOTSPOT_VM_DISTRO)"
+HOTSPOT_VM_DISTRO=$(HOTSPOT_VM_DISTRO)
 !else
 !if exists($(HOTSPOTWORKSPACE)\src\closed)
-HOTSPOT_VM_DISTRO="Java HotSpot(TM)"
+HOTSPOT_VM_DISTRO="\\\"Java HotSpot(TM)\\\""
 !else
-HOTSPOT_VM_DISTRO="OpenJDK"
+HOTSPOT_VM_DISTRO="\\\"OpenJDK\\\""
 !endif
 !endif
 
-ProjectCreatorIDEOptions =       $(ProjectCreatorIDEOptions) \
-      -define              HOTSPOT_RELEASE_VERSION=\\\"$(HOTSPOT_RELEASE_VERSION)\\\" \
-      -define              JRE_RELEASE_VERSION=\\\"$(JRE_RELEASE_VERSION)\\\" \
-      -define              HOTSPOT_VM_DISTRO=\\\"$(HOTSPOT_VM_DISTRO)\\\"
+ReleaseOptions = -define HOTSPOT_RELEASE_VERSION=$(HOTSPOT_RELEASE_VERSION) -define JRE_RELEASE_VERSION=$(JRE_RELEASE_VERSION) -define HOTSPOT_VM_DISTRO=$(HOTSPOT_VM_DISTRO)
+ProjectCreatorIDEOptions = $(ProjectCreatorIDEOptions) $(ReleaseOptions)
 
 $(HOTSPOTBUILDSPACE)/$(ProjectFile): $(HOTSPOTBUILDSPACE)/classes/ProjectCreator.class
 	@$(RUN_JAVA) -Djava.class.path="$(HOTSPOTBUILDSPACE)/classes" ProjectCreator WinGammaPlatform$(VcVersion) $(ProjectCreatorIDEOptions)
--- a/make/windows/projectfiles/compiler2/ADLCompiler.dsp	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/windows/projectfiles/compiler2/ADLCompiler.dsp	Sun Feb 03 22:43:57 2013 +0100
@@ -72,11 +72,11 @@
 # ADD RSC /l 0x409
 BSC32=bscmake.exe
 # ADD BASE BSC32 /nologo
-# ADD BSC32 /o".\adlc\Debug\adlc_g.bsc"
+# ADD BSC32 /o".\adlc\Debug\adlc.bsc"
 # SUBTRACT BSC32 /nologo
 LINK32=link.exe
 # ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
-# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib uuid.lib /nologo /subsystem:console /debug /machine:I386 /out:".\bin\adlc_g.exe"
+# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib uuid.lib /nologo /subsystem:console /debug /machine:I386 /out:".\bin\adlc.exe"
 
 !ENDIF 
 
--- a/make/windows/projectfiles/tiered/ADLCompiler.dsp	Mon Nov 05 17:03:33 2012 -0500
+++ b/make/windows/projectfiles/tiered/ADLCompiler.dsp	Sun Feb 03 22:43:57 2013 +0100
@@ -72,11 +72,11 @@
 # ADD RSC /l 0x409
 BSC32=bscmake.exe
 # ADD BASE BSC32 /nologo
-# ADD BSC32 /o".\adlc\Debug\adlc_g.bsc"
+# ADD BSC32 /o".\adlc\Debug\adlc.bsc"
 # SUBTRACT BSC32 /nologo
 LINK32=link.exe
 # ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
-# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib uuid.lib /nologo /subsystem:console /debug /machine:I386 /out:".\bin\adlc_g.exe"
+# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib uuid.lib /nologo /subsystem:console /debug /machine:I386 /out:".\bin\adlc.exe"
 
 !ENDIF 
 
--- a/src/cpu/sparc/vm/assembler_sparc.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/sparc/vm/assembler_sparc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -24,4985 +24,8 @@
 
 #include "precompiled.hpp"
 #include "asm/assembler.hpp"
-#include "assembler_sparc.inline.hpp"
-#include "gc_interface/collectedHeap.inline.hpp"
-#include "interpreter/interpreter.hpp"
-#include "memory/cardTableModRefBS.hpp"
-#include "memory/resourceArea.hpp"
-#include "prims/methodHandles.hpp"
-#include "runtime/biasedLocking.hpp"
-#include "runtime/interfaceSupport.hpp"
-#include "runtime/objectMonitor.hpp"
-#include "runtime/os.hpp"
-#include "runtime/sharedRuntime.hpp"
-#include "runtime/stubRoutines.hpp"
-#ifndef SERIALGC
-#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
-#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
-#include "gc_implementation/g1/heapRegion.hpp"
-#endif
-
-#ifdef PRODUCT
-#define BLOCK_COMMENT(str) /* nothing */
-#define STOP(error) stop(error)
-#else
-#define BLOCK_COMMENT(str) block_comment(str)
-#define STOP(error) block_comment(error); stop(error)
-#endif
-
-// Convert the raw encoding form into the form expected by the
-// constructor for Address.
-Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) {
-  assert(scale == 0, "not supported");
-  RelocationHolder rspec;
-  if (disp_reloc != relocInfo::none) {
-    rspec = Relocation::spec_simple(disp_reloc);
-  }
-
-  Register rindex = as_Register(index);
-  if (rindex != G0) {
-    Address madr(as_Register(base), rindex);
-    madr._rspec = rspec;
-    return madr;
-  } else {
-    Address madr(as_Register(base), disp);
-    madr._rspec = rspec;
-    return madr;
-  }
-}
-
-Address Argument::address_in_frame() const {
-  // Warning: In LP64 mode disp will occupy more than 10 bits, but
-  //          op codes such as ld or ldx, only access disp() to get
-  //          their simm13 argument.
-  int disp = ((_number - Argument::n_register_parameters + frame::memory_parameter_word_sp_offset) * BytesPerWord) + STACK_BIAS;
-  if (is_in())
-    return Address(FP, disp); // In argument.
-  else
-    return Address(SP, disp); // Out argument.
-}
-
-static const char* argumentNames[][2] = {
-  {"A0","P0"}, {"A1","P1"}, {"A2","P2"}, {"A3","P3"}, {"A4","P4"},
-  {"A5","P5"}, {"A6","P6"}, {"A7","P7"}, {"A8","P8"}, {"A9","P9"},
-  {"A(n>9)","P(n>9)"}
-};
-
-const char* Argument::name() const {
-  int nofArgs = sizeof argumentNames / sizeof argumentNames[0];
-  int num = number();
-  if (num >= nofArgs)  num = nofArgs - 1;
-  return argumentNames[num][is_in() ? 1 : 0];
-}
-
-void Assembler::print_instruction(int inst) {
-  const char* s;
-  switch (inv_op(inst)) {
-  default:         s = "????"; break;
-  case call_op:    s = "call"; break;
-  case branch_op:
-    switch (inv_op2(inst)) {
-      case fb_op2:     s = "fb";   break;
-      case fbp_op2:    s = "fbp";  break;
-      case br_op2:     s = "br";   break;
-      case bp_op2:     s = "bp";   break;
-      case cb_op2:     s = "cb";   break;
-      case bpr_op2: {
-        if (is_cbcond(inst)) {
-          s = is_cxb(inst) ? "cxb" : "cwb";
-        } else {
-          s = "bpr";
-        }
-        break;
-      }
-      default:         s = "????"; break;
-    }
-  }
-  ::tty->print("%s", s);
-}
-
-
-// Patch instruction inst at offset inst_pos to refer to dest_pos
-// and return the resulting instruction.
-// We should have pcs, not offsets, but since all is relative, it will work out
-// OK.
-int Assembler::patched_branch(int dest_pos, int inst, int inst_pos) {
-
-  int m; // mask for displacement field
-  int v; // new value for displacement field
-  const int word_aligned_ones = -4;
-  switch (inv_op(inst)) {
-  default: ShouldNotReachHere();
-  case call_op:    m = wdisp(word_aligned_ones, 0, 30);  v = wdisp(dest_pos, inst_pos, 30); break;
-  case branch_op:
-    switch (inv_op2(inst)) {
-      case fbp_op2:    m = wdisp(  word_aligned_ones, 0, 19);  v = wdisp(  dest_pos, inst_pos, 19); break;
-      case bp_op2:     m = wdisp(  word_aligned_ones, 0, 19);  v = wdisp(  dest_pos, inst_pos, 19); break;
-      case fb_op2:     m = wdisp(  word_aligned_ones, 0, 22);  v = wdisp(  dest_pos, inst_pos, 22); break;
-      case br_op2:     m = wdisp(  word_aligned_ones, 0, 22);  v = wdisp(  dest_pos, inst_pos, 22); break;
-      case cb_op2:     m = wdisp(  word_aligned_ones, 0, 22);  v = wdisp(  dest_pos, inst_pos, 22); break;
-      case bpr_op2: {
-        if (is_cbcond(inst)) {
-          m = wdisp10(word_aligned_ones, 0);
-          v = wdisp10(dest_pos, inst_pos);
-        } else {
-          m = wdisp16(word_aligned_ones, 0);
-          v = wdisp16(dest_pos, inst_pos);
-        }
-        break;
-      }
-      default: ShouldNotReachHere();
-    }
-  }
-  return  inst & ~m  |  v;
-}
-
-// Return the offset of the branch destionation of instruction inst
-// at offset pos.
-// Should have pcs, but since all is relative, it works out.
-int Assembler::branch_destination(int inst, int pos) {
-  int r;
-  switch (inv_op(inst)) {
-  default: ShouldNotReachHere();
-  case call_op:        r = inv_wdisp(inst, pos, 30);  break;
-  case branch_op:
-    switch (inv_op2(inst)) {
-      case fbp_op2:    r = inv_wdisp(  inst, pos, 19);  break;
-      case bp_op2:     r = inv_wdisp(  inst, pos, 19);  break;
-      case fb_op2:     r = inv_wdisp(  inst, pos, 22);  break;
-      case br_op2:     r = inv_wdisp(  inst, pos, 22);  break;
-      case cb_op2:     r = inv_wdisp(  inst, pos, 22);  break;
-      case bpr_op2: {
-        if (is_cbcond(inst)) {
-          r = inv_wdisp10(inst, pos);
-        } else {
-          r = inv_wdisp16(inst, pos);
-        }
-        break;
-      }
-      default: ShouldNotReachHere();
-    }
-  }
-  return r;
-}
+#include "asm/assembler.inline.hpp"
 
 int AbstractAssembler::code_fill_byte() {
   return 0x00;                  // illegal instruction 0x00000000
 }
-
-Assembler::Condition Assembler::reg_cond_to_cc_cond(Assembler::RCondition in) {
-  switch (in) {
-  case rc_z:   return equal;
-  case rc_lez: return lessEqual;
-  case rc_lz:  return less;
-  case rc_nz:  return notEqual;
-  case rc_gz:  return greater;
-  case rc_gez: return greaterEqual;
-  default:
-    ShouldNotReachHere();
-  }
-  return equal;
-}
-
-// Generate a bunch 'o stuff (including v9's
-#ifndef PRODUCT
-void Assembler::test_v9() {
-  add(    G0, G1, G2 );
-  add(    G3,  0, G4 );
-
-  addcc(  G5, G6, G7 );
-  addcc(  I0,  1, I1 );
-  addc(   I2, I3, I4 );
-  addc(   I5, -1, I6 );
-  addccc( I7, L0, L1 );
-  addccc( L2, (1 << 12) - 2, L3 );
-
-  Label lbl1, lbl2, lbl3;
-
-  bind(lbl1);
-
-  bpr( rc_z,    true, pn, L4, pc(),  relocInfo::oop_type );
-  delayed()->nop();
-  bpr( rc_lez, false, pt, L5, lbl1);
-  delayed()->nop();
-
-  fb( f_never,     true, pc() + 4,  relocInfo::none);
-  delayed()->nop();
-  fb( f_notEqual, false, lbl2 );
-  delayed()->nop();
-
-  fbp( f_notZero,        true, fcc0, pn, pc() - 4,  relocInfo::none);
-  delayed()->nop();
-  fbp( f_lessOrGreater, false, fcc1, pt, lbl3 );
-  delayed()->nop();
-
-  br( equal,  true, pc() + 1024, relocInfo::none);
-  delayed()->nop();
-  br( lessEqual, false, lbl1 );
-  delayed()->nop();
-  br( never, false, lbl1 );
-  delayed()->nop();
-
-  bp( less,               true, icc, pn, pc(), relocInfo::none);
-  delayed()->nop();
-  bp( lessEqualUnsigned, false, xcc, pt, lbl2 );
-  delayed()->nop();
-
-  call( pc(), relocInfo::none);
-  delayed()->nop();
-  call( lbl3 );
-  delayed()->nop();
-
-
-  casa(  L6, L7, O0 );
-  casxa( O1, O2, O3, 0 );
-
-  udiv(   O4, O5, O7 );
-  udiv(   G0, (1 << 12) - 1, G1 );
-  sdiv(   G1, G2, G3 );
-  sdiv(   G4, -((1 << 12) - 1), G5 );
-  udivcc( G6, G7, I0 );
-  udivcc( I1, -((1 << 12) - 2), I2 );
-  sdivcc( I3, I4, I5 );
-  sdivcc( I6, -((1 << 12) - 0), I7 );
-
-  done();
-  retry();
-
-  fadd( FloatRegisterImpl::S, F0,  F1, F2 );
-  fsub( FloatRegisterImpl::D, F34, F0, F62 );
-
-  fcmp(  FloatRegisterImpl::Q, fcc0, F0, F60);
-  fcmpe( FloatRegisterImpl::S, fcc1, F31, F30);
-
-  ftox( FloatRegisterImpl::D, F2, F4 );
-  ftoi( FloatRegisterImpl::Q, F4, F8 );
-
-  ftof( FloatRegisterImpl::S, FloatRegisterImpl::Q, F3, F12 );
-
-  fxtof( FloatRegisterImpl::S, F4, F5 );
-  fitof( FloatRegisterImpl::D, F6, F8 );
-
-  fmov( FloatRegisterImpl::Q, F16, F20 );
-  fneg( FloatRegisterImpl::S, F6, F7 );
-  fabs( FloatRegisterImpl::D, F10, F12 );
-
-  fmul( FloatRegisterImpl::Q,  F24, F28, F32 );
-  fmul( FloatRegisterImpl::S,  FloatRegisterImpl::D,  F8, F9, F14 );
-  fdiv( FloatRegisterImpl::S,  F10, F11, F12 );
-
-  fsqrt( FloatRegisterImpl::S, F13, F14 );
-
-  flush( L0, L1 );
-  flush( L2, -1 );
-
-  flushw();
-
-  illtrap( (1 << 22) - 2);
-
-  impdep1( 17, (1 << 19) - 1 );
-  impdep2( 3,  0 );
-
-  jmpl( L3, L4, L5 );
-  delayed()->nop();
-  jmpl( L6, -1, L7, Relocation::spec_simple(relocInfo::none));
-  delayed()->nop();
-
-
-  ldf(    FloatRegisterImpl::S, O0, O1, F15 );
-  ldf(    FloatRegisterImpl::D, O2, -1, F14 );
-
-
-  ldfsr(  O3, O4 );
-  ldfsr(  O5, -1 );
-  ldxfsr( O6, O7 );
-  ldxfsr( I0, -1 );
-
-  ldfa(  FloatRegisterImpl::D, I1, I2, 1, F16 );
-  ldfa(  FloatRegisterImpl::Q, I3, -1,    F36 );
-
-  ldsb(  I4, I5, I6 );
-  ldsb(  I7, -1, G0 );
-  ldsh(  G1, G3, G4 );
-  ldsh(  G5, -1, G6 );
-  ldsw(  G7, L0, L1 );
-  ldsw(  L2, -1, L3 );
-  ldub(  L4, L5, L6 );
-  ldub(  L7, -1, O0 );
-  lduh(  O1, O2, O3 );
-  lduh(  O4, -1, O5 );
-  lduw(  O6, O7, G0 );
-  lduw(  G1, -1, G2 );
-  ldx(   G3, G4, G5 );
-  ldx(   G6, -1, G7 );
-  ldd(   I0, I1, I2 );
-  ldd(   I3, -1, I4 );
-
-  ldsba(  I5, I6, 2, I7 );
-  ldsba(  L0, -1, L1 );
-  ldsha(  L2, L3, 3, L4 );
-  ldsha(  L5, -1, L6 );
-  ldswa(  L7, O0, (1 << 8) - 1, O1 );
-  ldswa(  O2, -1, O3 );
-  lduba(  O4, O5, 0, O6 );
-  lduba(  O7, -1, I0 );
-  lduha(  I1, I2, 1, I3 );
-  lduha(  I4, -1, I5 );
-  lduwa(  I6, I7, 2, L0 );
-  lduwa(  L1, -1, L2 );
-  ldxa(   L3, L4, 3, L5 );
-  ldxa(   L6, -1, L7 );
-  ldda(   G0, G1, 4, G2 );
-  ldda(   G3, -1, G4 );
-
-  ldstub(  G5, G6, G7 );
-  ldstub(  O0, -1, O1 );
-
-  ldstuba( O2, O3, 5, O4 );
-  ldstuba( O5, -1, O6 );
-
-  and3(    I0, L0, O0 );
-  and3(    G7, -1, O7 );
-  andcc(   L2, I2, G2 );
-  andcc(   L4, -1, G4 );
-  andn(    I5, I6, I7 );
-  andn(    I6, -1, I7 );
-  andncc(  I5, I6, I7 );
-  andncc(  I7, -1, I6 );
-  or3(     I5, I6, I7 );
-  or3(     I7, -1, I6 );
-  orcc(    I5, I6, I7 );
-  orcc(    I7, -1, I6 );
-  orn(     I5, I6, I7 );
-  orn(     I7, -1, I6 );
-  orncc(   I5, I6, I7 );
-  orncc(   I7, -1, I6 );
-  xor3(    I5, I6, I7 );
-  xor3(    I7, -1, I6 );
-  xorcc(   I5, I6, I7 );
-  xorcc(   I7, -1, I6 );
-  xnor(    I5, I6, I7 );
-  xnor(    I7, -1, I6 );
-  xnorcc(  I5, I6, I7 );
-  xnorcc(  I7, -1, I6 );
-
-  membar( Membar_mask_bits(StoreStore | LoadStore | StoreLoad | LoadLoad | Sync | MemIssue | Lookaside ) );
-  membar( StoreStore );
-  membar( LoadStore );
-  membar( StoreLoad );
-  membar( LoadLoad );
-  membar( Sync );
-  membar( MemIssue );
-  membar( Lookaside );
-
-  fmov( FloatRegisterImpl::S, f_ordered,  true, fcc2, F16, F17 );
-  fmov( FloatRegisterImpl::D, rc_lz, L5, F18, F20 );
-
-  movcc( overflowClear,  false, icc, I6, L4 );
-  movcc( f_unorderedOrEqual, true, fcc2, (1 << 10) - 1, O0 );
-
-  movr( rc_nz, I5, I6, I7 );
-  movr( rc_gz, L1, -1,  L2 );
-
-  mulx(  I5, I6, I7 );
-  mulx(  I7, -1, I6 );
-  sdivx( I5, I6, I7 );
-  sdivx( I7, -1, I6 );
-  udivx( I5, I6, I7 );
-  udivx( I7, -1, I6 );
-
-  umul(   I5, I6, I7 );
-  umul(   I7, -1, I6 );
-  smul(   I5, I6, I7 );
-  smul(   I7, -1, I6 );
-  umulcc( I5, I6, I7 );
-  umulcc( I7, -1, I6 );
-  smulcc( I5, I6, I7 );
-  smulcc( I7, -1, I6 );
-
-  mulscc(   I5, I6, I7 );
-  mulscc(   I7, -1, I6 );
-
-  nop();
-
-
-  popc( G0,  G1);
-  popc( -1, G2);
-
-  prefetch(   L1, L2,    severalReads );
-  prefetch(   L3, -1,    oneRead );
-  prefetcha(  O3, O2, 6, severalWritesAndPossiblyReads );
-  prefetcha(  G2, -1,    oneWrite );
-
-  rett( I7, I7);
-  delayed()->nop();
-  rett( G0, -1, relocInfo::none);
-  delayed()->nop();
-
-  save(    I5, I6, I7 );
-  save(    I7, -1, I6 );
-  restore( I5, I6, I7 );
-  restore( I7, -1, I6 );
-
-  saved();
-  restored();
-
-  sethi( 0xaaaaaaaa, I3, Relocation::spec_simple(relocInfo::none));
-
-  sll(  I5, I6, I7 );
-  sll(  I7, 31, I6 );
-  srl(  I5, I6, I7 );
-  srl(  I7,  0, I6 );
-  sra(  I5, I6, I7 );
-  sra(  I7, 30, I6 );
-  sllx( I5, I6, I7 );
-  sllx( I7, 63, I6 );
-  srlx( I5, I6, I7 );
-  srlx( I7,  0, I6 );
-  srax( I5, I6, I7 );
-  srax( I7, 62, I6 );
-
-  sir( -1 );
-
-  stbar();
-
-  stf(    FloatRegisterImpl::Q, F40, G0, I7 );
-  stf(    FloatRegisterImpl::S, F18, I3, -1 );
-
-  stfsr(  L1, L2 );
-  stfsr(  I7, -1 );
-  stxfsr( I6, I5 );
-  stxfsr( L4, -1 );
-
-  stfa(  FloatRegisterImpl::D, F22, I6, I7, 7 );
-  stfa(  FloatRegisterImpl::Q, F44, G0, -1 );
-
-  stb(  L5, O2, I7 );
-  stb(  I7, I6, -1 );
-  sth(  L5, O2, I7 );
-  sth(  I7, I6, -1 );
-  stw(  L5, O2, I7 );
-  stw(  I7, I6, -1 );
-  stx(  L5, O2, I7 );
-  stx(  I7, I6, -1 );
-  std(  L5, O2, I7 );
-  std(  I7, I6, -1 );
-
-  stba(  L5, O2, I7, 8 );
-  stba(  I7, I6, -1    );
-  stha(  L5, O2, I7, 9 );
-  stha(  I7, I6, -1    );
-  stwa(  L5, O2, I7, 0 );
-  stwa(  I7, I6, -1    );
-  stxa(  L5, O2, I7, 11 );
-  stxa(  I7, I6, -1     );
-  stda(  L5, O2, I7, 12 );
-  stda(  I7, I6, -1     );
-
-  sub(    I5, I6, I7 );
-  sub(    I7, -1, I6 );
-  subcc(  I5, I6, I7 );
-  subcc(  I7, -1, I6 );
-  subc(   I5, I6, I7 );
-  subc(   I7, -1, I6 );
-  subccc( I5, I6, I7 );
-  subccc( I7, -1, I6 );
-
-  swap( I5, I6, I7 );
-  swap( I7, -1, I6 );
-
-  swapa(   G0, G1, 13, G2 );
-  swapa(   I7, -1,     I6 );
-
-  taddcc(    I5, I6, I7 );
-  taddcc(    I7, -1, I6 );
-  taddcctv(  I5, I6, I7 );
-  taddcctv(  I7, -1, I6 );
-
-  tsubcc(    I5, I6, I7 );
-  tsubcc(    I7, -1, I6 );
-  tsubcctv(  I5, I6, I7 );
-  tsubcctv(  I7, -1, I6 );
-
-  trap( overflowClear, xcc, G0, G1 );
-  trap( lessEqual,     icc, I7, 17 );
-
-  bind(lbl2);
-  bind(lbl3);
-
-  code()->decode();
-}
-
-// Generate a bunch 'o stuff unique to V8
-void Assembler::test_v8_onlys() {
-  Label lbl1;
-
-  cb( cp_0or1or2, false, pc() - 4, relocInfo::none);
-  delayed()->nop();
-  cb( cp_never,    true, lbl1);
-  delayed()->nop();
-
-  cpop1(1, 2, 3, 4);
-  cpop2(5, 6, 7, 8);
-
-  ldc( I0, I1, 31);
-  ldc( I2, -1,  0);
-
-  lddc( I4, I4, 30);
-  lddc( I6,  0, 1 );
-
-  ldcsr( L0, L1, 0);
-  ldcsr( L1, (1 << 12) - 1, 17 );
-
-  stc( 31, L4, L5);
-  stc( 30, L6, -(1 << 12) );
-
-  stdc( 0, L7, G0);
-  stdc( 1, G1, 0 );
-
-  stcsr( 16, G2, G3);
-  stcsr( 17, G4, 1 );
-
-  stdcq( 4, G5, G6);
-  stdcq( 5, G7, -1 );
-
-  bind(lbl1);
-
-  code()->decode();
-}
-#endif
-
-// Implementation of MacroAssembler
-
-void MacroAssembler::null_check(Register reg, int offset) {
-  if (needs_explicit_null_check((intptr_t)offset)) {
-    // provoke OS NULL exception if reg = NULL by
-    // accessing M[reg] w/o changing any registers
-    ld_ptr(reg, 0, G0);
-  }
-  else {
-    // nothing to do, (later) access of M[reg + offset]
-    // will provoke OS NULL exception if reg = NULL
-  }
-}
-
-// Ring buffer jumps
-
-#ifndef PRODUCT
-void MacroAssembler::ret(  bool trace )   { if (trace) {
-                                                    mov(I7, O7); // traceable register
-                                                    JMP(O7, 2 * BytesPerInstWord);
-                                                  } else {
-                                                    jmpl( I7, 2 * BytesPerInstWord, G0 );
-                                                  }
-                                                }
-
-void MacroAssembler::retl( bool trace )  { if (trace) JMP(O7, 2 * BytesPerInstWord);
-                                                 else jmpl( O7, 2 * BytesPerInstWord, G0 ); }
-#endif /* PRODUCT */
-
-
-void MacroAssembler::jmp2(Register r1, Register r2, const char* file, int line ) {
-  assert_not_delayed();
-  // This can only be traceable if r1 & r2 are visible after a window save
-  if (TraceJumps) {
-#ifndef PRODUCT
-    save_frame(0);
-    verify_thread();
-    ld(G2_thread, in_bytes(JavaThread::jmp_ring_index_offset()), O0);
-    add(G2_thread, in_bytes(JavaThread::jmp_ring_offset()), O1);
-    sll(O0, exact_log2(4*sizeof(intptr_t)), O2);
-    add(O2, O1, O1);
-
-    add(r1->after_save(), r2->after_save(), O2);
-    set((intptr_t)file, O3);
-    set(line, O4);
-    Label L;
-    // get nearby pc, store jmp target
-    call(L, relocInfo::none);  // No relocation for call to pc+0x8
-    delayed()->st(O2, O1, 0);
-    bind(L);
-
-    // store nearby pc
-    st(O7, O1, sizeof(intptr_t));
-    // store file
-    st(O3, O1, 2*sizeof(intptr_t));
-    // store line
-    st(O4, O1, 3*sizeof(intptr_t));
-    add(O0, 1, O0);
-    and3(O0, JavaThread::jump_ring_buffer_size  - 1, O0);
-    st(O0, G2_thread, in_bytes(JavaThread::jmp_ring_index_offset()));
-    restore();
-#endif /* PRODUCT */
-  }
-  jmpl(r1, r2, G0);
-}
-void MacroAssembler::jmp(Register r1, int offset, const char* file, int line ) {
-  assert_not_delayed();
-  // This can only be traceable if r1 is visible after a window save
-  if (TraceJumps) {
-#ifndef PRODUCT
-    save_frame(0);
-    verify_thread();
-    ld(G2_thread, in_bytes(JavaThread::jmp_ring_index_offset()), O0);
-    add(G2_thread, in_bytes(JavaThread::jmp_ring_offset()), O1);
-    sll(O0, exact_log2(4*sizeof(intptr_t)), O2);
-    add(O2, O1, O1);
-
-    add(r1->after_save(), offset, O2);
-    set((intptr_t)file, O3);
-    set(line, O4);
-    Label L;
-    // get nearby pc, store jmp target
-    call(L, relocInfo::none);  // No relocation for call to pc+0x8
-    delayed()->st(O2, O1, 0);
-    bind(L);
-
-    // store nearby pc
-    st(O7, O1, sizeof(intptr_t));
-    // store file
-    st(O3, O1, 2*sizeof(intptr_t));
-    // store line
-    st(O4, O1, 3*sizeof(intptr_t));
-    add(O0, 1, O0);
-    and3(O0, JavaThread::jump_ring_buffer_size  - 1, O0);
-    st(O0, G2_thread, in_bytes(JavaThread::jmp_ring_index_offset()));
-    restore();
-#endif /* PRODUCT */
-  }
-  jmp(r1, offset);
-}
-
-// This code sequence is relocatable to any address, even on LP64.
-void MacroAssembler::jumpl(const AddressLiteral& addrlit, Register temp, Register d, int offset, const char* file, int line) {
-  assert_not_delayed();
-  // Force fixed length sethi because NativeJump and NativeFarCall don't handle
-  // variable length instruction streams.
-  patchable_sethi(addrlit, temp);
-  Address a(temp, addrlit.low10() + offset);  // Add the offset to the displacement.
-  if (TraceJumps) {
-#ifndef PRODUCT
-    // Must do the add here so relocation can find the remainder of the
-    // value to be relocated.
-    add(a.base(), a.disp(), a.base(), addrlit.rspec(offset));
-    save_frame(0);
-    verify_thread();
-    ld(G2_thread, in_bytes(JavaThread::jmp_ring_index_offset()), O0);
-    add(G2_thread, in_bytes(JavaThread::jmp_ring_offset()), O1);
-    sll(O0, exact_log2(4*sizeof(intptr_t)), O2);
-    add(O2, O1, O1);
-
-    set((intptr_t)file, O3);
-    set(line, O4);
-    Label L;
-
-    // get nearby pc, store jmp target
-    call(L, relocInfo::none);  // No relocation for call to pc+0x8
-    delayed()->st(a.base()->after_save(), O1, 0);
-    bind(L);
-
-    // store nearby pc
-    st(O7, O1, sizeof(intptr_t));
-    // store file
-    st(O3, O1, 2*sizeof(intptr_t));
-    // store line
-    st(O4, O1, 3*sizeof(intptr_t));
-    add(O0, 1, O0);
-    and3(O0, JavaThread::jump_ring_buffer_size  - 1, O0);
-    st(O0, G2_thread, in_bytes(JavaThread::jmp_ring_index_offset()));
-    restore();
-    jmpl(a.base(), G0, d);
-#else
-    jmpl(a.base(), a.disp(), d);
-#endif /* PRODUCT */
-  } else {
-    jmpl(a.base(), a.disp(), d);
-  }
-}
-
-void MacroAssembler::jump(const AddressLiteral& addrlit, Register temp, int offset, const char* file, int line) {
-  jumpl(addrlit, temp, G0, offset, file, line);
-}
-
-
-// Conditional breakpoint (for assertion checks in assembly code)
-void MacroAssembler::breakpoint_trap(Condition c, CC cc) {
-  trap(c, cc, G0, ST_RESERVED_FOR_USER_0);
-}
-
-// We want to use ST_BREAKPOINT here, but the debugger is confused by it.
-void MacroAssembler::breakpoint_trap() {
-  trap(ST_RESERVED_FOR_USER_0);
-}
-
-// flush windows (except current) using flushw instruction if avail.
-void MacroAssembler::flush_windows() {
-  if (VM_Version::v9_instructions_work())  flushw();
-  else                                     flush_windows_trap();
-}
-
-// Write serialization page so VM thread can do a pseudo remote membar
-// We use the current thread pointer to calculate a thread specific
-// offset to write to within the page. This minimizes bus traffic
-// due to cache line collision.
-void MacroAssembler::serialize_memory(Register thread, Register tmp1, Register tmp2) {
-  srl(thread, os::get_serialize_page_shift_count(), tmp2);
-  if (Assembler::is_simm13(os::vm_page_size())) {
-    and3(tmp2, (os::vm_page_size() - sizeof(int)), tmp2);
-  }
-  else {
-    set((os::vm_page_size() - sizeof(int)), tmp1);
-    and3(tmp2, tmp1, tmp2);
-  }
-  set(os::get_memory_serialize_page(), tmp1);
-  st(G0, tmp1, tmp2);
-}
-
-
-
-void MacroAssembler::enter() {
-  Unimplemented();
-}
-
-void MacroAssembler::leave() {
-  Unimplemented();
-}
-
-void MacroAssembler::mult(Register s1, Register s2, Register d) {
-  if(VM_Version::v9_instructions_work()) {
-    mulx (s1, s2, d);
-  } else {
-    smul (s1, s2, d);
-  }
-}
-
-void MacroAssembler::mult(Register s1, int simm13a, Register d) {
-  if(VM_Version::v9_instructions_work()) {
-    mulx (s1, simm13a, d);
-  } else {
-    smul (s1, simm13a, d);
-  }
-}
-
-
-#ifdef ASSERT
-void MacroAssembler::read_ccr_v8_assert(Register ccr_save) {
-  const Register s1 = G3_scratch;
-  const Register s2 = G4_scratch;
-  Label get_psr_test;
-  // Get the condition codes the V8 way.
-  read_ccr_trap(s1);
-  mov(ccr_save, s2);
-  // This is a test of V8 which has icc but not xcc
-  // so mask off the xcc bits
-  and3(s2, 0xf, s2);
-  // Compare condition codes from the V8 and V9 ways.
-  subcc(s2, s1, G0);
-  br(Assembler::notEqual, true, Assembler::pt, get_psr_test);
-  delayed()->breakpoint_trap();
-  bind(get_psr_test);
-}
-
-void MacroAssembler::write_ccr_v8_assert(Register ccr_save) {
-  const Register s1 = G3_scratch;
-  const Register s2 = G4_scratch;
-  Label set_psr_test;
-  // Write out the saved condition codes the V8 way
-  write_ccr_trap(ccr_save, s1, s2);
-  // Read back the condition codes using the V9 instruction
-  rdccr(s1);
-  mov(ccr_save, s2);
-  // This is a test of V8 which has icc but not xcc
-  // so mask off the xcc bits
-  and3(s2, 0xf, s2);
-  and3(s1, 0xf, s1);
-  // Compare the V8 way with the V9 way.
-  subcc(s2, s1, G0);
-  br(Assembler::notEqual, true, Assembler::pt, set_psr_test);
-  delayed()->breakpoint_trap();
-  bind(set_psr_test);
-}
-#else
-#define read_ccr_v8_assert(x)
-#define write_ccr_v8_assert(x)
-#endif // ASSERT
-
-void MacroAssembler::read_ccr(Register ccr_save) {
-  if (VM_Version::v9_instructions_work()) {
-    rdccr(ccr_save);
-    // Test code sequence used on V8.  Do not move above rdccr.
-    read_ccr_v8_assert(ccr_save);
-  } else {
-    read_ccr_trap(ccr_save);
-  }
-}
-
-void MacroAssembler::write_ccr(Register ccr_save) {
-  if (VM_Version::v9_instructions_work()) {
-    // Test code sequence used on V8.  Do not move below wrccr.
-    write_ccr_v8_assert(ccr_save);
-    wrccr(ccr_save);
-  } else {
-    const Register temp_reg1 = G3_scratch;
-    const Register temp_reg2 = G4_scratch;
-    write_ccr_trap(ccr_save, temp_reg1, temp_reg2);
-  }
-}
-
-
-// Calls to C land
-
-#ifdef ASSERT
-// a hook for debugging
-static Thread* reinitialize_thread() {
-  return ThreadLocalStorage::thread();
-}
-#else
-#define reinitialize_thread ThreadLocalStorage::thread
-#endif
-
-#ifdef ASSERT
-address last_get_thread = NULL;
-#endif
-
-// call this when G2_thread is not known to be valid
-void MacroAssembler::get_thread() {
-  save_frame(0);                // to avoid clobbering O0
-  mov(G1, L0);                  // avoid clobbering G1
-  mov(G5_method, L1);           // avoid clobbering G5
-  mov(G3, L2);                  // avoid clobbering G3 also
-  mov(G4, L5);                  // avoid clobbering G4
-#ifdef ASSERT
-  AddressLiteral last_get_thread_addrlit(&last_get_thread);
-  set(last_get_thread_addrlit, L3);
-  inc(L4, get_pc(L4) + 2 * BytesPerInstWord); // skip getpc() code + inc + st_ptr to point L4 at call
-  st_ptr(L4, L3, 0);
-#endif
-  call(CAST_FROM_FN_PTR(address, reinitialize_thread), relocInfo::runtime_call_type);
-  delayed()->nop();
-  mov(L0, G1);
-  mov(L1, G5_method);
-  mov(L2, G3);
-  mov(L5, G4);
-  restore(O0, 0, G2_thread);
-}
-
-static Thread* verify_thread_subroutine(Thread* gthread_value) {
-  Thread* correct_value = ThreadLocalStorage::thread();
-  guarantee(gthread_value == correct_value, "G2_thread value must be the thread");
-  return correct_value;
-}
-
-void MacroAssembler::verify_thread() {
-  if (VerifyThread) {
-    // NOTE: this chops off the heads of the 64-bit O registers.
-#ifdef CC_INTERP
-    save_frame(0);
-#else
-    // make sure G2_thread contains the right value
-    save_frame_and_mov(0, Lmethod, Lmethod);   // to avoid clobbering O0 (and propagate Lmethod for -Xprof)
-    mov(G1, L1);                // avoid clobbering G1
-    // G2 saved below
-    mov(G3, L3);                // avoid clobbering G3
-    mov(G4, L4);                // avoid clobbering G4
-    mov(G5_method, L5);         // avoid clobbering G5_method
-#endif /* CC_INTERP */
-#if defined(COMPILER2) && !defined(_LP64)
-    // Save & restore possible 64-bit Long arguments in G-regs
-    srlx(G1,32,L0);
-    srlx(G4,32,L6);
-#endif
-    call(CAST_FROM_FN_PTR(address,verify_thread_subroutine), relocInfo::runtime_call_type);
-    delayed()->mov(G2_thread, O0);
-
-    mov(L1, G1);                // Restore G1
-    // G2 restored below
-    mov(L3, G3);                // restore G3
-    mov(L4, G4);                // restore G4
-    mov(L5, G5_method);         // restore G5_method
-#if defined(COMPILER2) && !defined(_LP64)
-    // Save & restore possible 64-bit Long arguments in G-regs
-    sllx(L0,32,G2);             // Move old high G1 bits high in G2
-    srl(G1, 0,G1);              // Clear current high G1 bits
-    or3 (G1,G2,G1);             // Recover 64-bit G1
-    sllx(L6,32,G2);             // Move old high G4 bits high in G2
-    srl(G4, 0,G4);              // Clear current high G4 bits
-    or3 (G4,G2,G4);             // Recover 64-bit G4
-#endif
-    restore(O0, 0, G2_thread);
-  }
-}
-
-
-void MacroAssembler::save_thread(const Register thread_cache) {
-  verify_thread();
-  if (thread_cache->is_valid()) {
-    assert(thread_cache->is_local() || thread_cache->is_in(), "bad volatile");
-    mov(G2_thread, thread_cache);
-  }
-  if (VerifyThread) {
-    // smash G2_thread, as if the VM were about to anyway
-    set(0x67676767, G2_thread);
-  }
-}
-
-
-void MacroAssembler::restore_thread(const Register thread_cache) {
-  if (thread_cache->is_valid()) {
-    assert(thread_cache->is_local() || thread_cache->is_in(), "bad volatile");
-    mov(thread_cache, G2_thread);
-    verify_thread();
-  } else {
-    // do it the slow way
-    get_thread();
-  }
-}
-
-
-// %%% maybe get rid of [re]set_last_Java_frame
-void MacroAssembler::set_last_Java_frame(Register last_java_sp, Register last_Java_pc) {
-  assert_not_delayed();
-  Address flags(G2_thread, JavaThread::frame_anchor_offset() +
-                           JavaFrameAnchor::flags_offset());
-  Address pc_addr(G2_thread, JavaThread::last_Java_pc_offset());
-
-  // Always set last_Java_pc and flags first because once last_Java_sp is visible
-  // has_last_Java_frame is true and users will look at the rest of the fields.
-  // (Note: flags should always be zero before we get here so doesn't need to be set.)
-
-#ifdef ASSERT
-  // Verify that flags was zeroed on return to Java
-  Label PcOk;
-  save_frame(0);                // to avoid clobbering O0
-  ld_ptr(pc_addr, L0);
-  br_null_short(L0, Assembler::pt, PcOk);
-  STOP("last_Java_pc not zeroed before leaving Java");
-  bind(PcOk);
-
-  // Verify that flags was zeroed on return to Java
-  Label FlagsOk;
-  ld(flags, L0);
-  tst(L0);
-  br(Assembler::zero, false, Assembler::pt, FlagsOk);
-  delayed() -> restore();
-  STOP("flags not zeroed before leaving Java");
-  bind(FlagsOk);
-#endif /* ASSERT */
-  //
-  // When returning from calling out from Java mode the frame anchor's last_Java_pc
-  // will always be set to NULL. It is set here so that if we are doing a call to
-  // native (not VM) that we capture the known pc and don't have to rely on the
-  // native call having a standard frame linkage where we can find the pc.
-
-  if (last_Java_pc->is_valid()) {
-    st_ptr(last_Java_pc, pc_addr);
-  }
-
-#ifdef _LP64
-#ifdef ASSERT
-  // Make sure that we have an odd stack
-  Label StackOk;
-  andcc(last_java_sp, 0x01, G0);
-  br(Assembler::notZero, false, Assembler::pt, StackOk);
-  delayed()->nop();
-  STOP("Stack Not Biased in set_last_Java_frame");
-  bind(StackOk);
-#endif // ASSERT
-  assert( last_java_sp != G4_scratch, "bad register usage in set_last_Java_frame");
-  add( last_java_sp, STACK_BIAS, G4_scratch );
-  st_ptr(G4_scratch, G2_thread, JavaThread::last_Java_sp_offset());
-#else
-  st_ptr(last_java_sp, G2_thread, JavaThread::last_Java_sp_offset());
-#endif // _LP64
-}
-
-void MacroAssembler::reset_last_Java_frame(void) {
-  assert_not_delayed();
-
-  Address sp_addr(G2_thread, JavaThread::last_Java_sp_offset());
-  Address pc_addr(G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
-  Address flags  (G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset());
-
-#ifdef ASSERT
-  // check that it WAS previously set
-#ifdef CC_INTERP
-    save_frame(0);
-#else
-    save_frame_and_mov(0, Lmethod, Lmethod);     // Propagate Lmethod to helper frame for -Xprof
-#endif /* CC_INTERP */
-    ld_ptr(sp_addr, L0);
-    tst(L0);
-    breakpoint_trap(Assembler::zero, Assembler::ptr_cc);
-    restore();
-#endif // ASSERT
-
-  st_ptr(G0, sp_addr);
-  // Always return last_Java_pc to zero
-  st_ptr(G0, pc_addr);
-  // Always null flags after return to Java
-  st(G0, flags);
-}
-
-
-void MacroAssembler::call_VM_base(
-  Register        oop_result,
-  Register        thread_cache,
-  Register        last_java_sp,
-  address         entry_point,
-  int             number_of_arguments,
-  bool            check_exceptions)
-{
-  assert_not_delayed();
-
-  // determine last_java_sp register
-  if (!last_java_sp->is_valid()) {
-    last_java_sp = SP;
-  }
-  // debugging support
-  assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
-
-  // 64-bit last_java_sp is biased!
-  set_last_Java_frame(last_java_sp, noreg);
-  if (VerifyThread)  mov(G2_thread, O0); // about to be smashed; pass early
-  save_thread(thread_cache);
-  // do the call
-  call(entry_point, relocInfo::runtime_call_type);
-  if (!VerifyThread)
-    delayed()->mov(G2_thread, O0);  // pass thread as first argument
-  else
-    delayed()->nop();             // (thread already passed)
-  restore_thread(thread_cache);
-  reset_last_Java_frame();
-
-  // check for pending exceptions. use Gtemp as scratch register.
-  if (check_exceptions) {
-    check_and_forward_exception(Gtemp);
-  }
-
-#ifdef ASSERT
-  set(badHeapWordVal, G3);
-  set(badHeapWordVal, G4);
-  set(badHeapWordVal, G5);
-#endif
-
-  // get oop result if there is one and reset the value in the thread
-  if (oop_result->is_valid()) {
-    get_vm_result(oop_result);
-  }
-}
-
-void MacroAssembler::check_and_forward_exception(Register scratch_reg)
-{
-  Label L;
-
-  check_and_handle_popframe(scratch_reg);
-  check_and_handle_earlyret(scratch_reg);
-
-  Address exception_addr(G2_thread, Thread::pending_exception_offset());
-  ld_ptr(exception_addr, scratch_reg);
-  br_null_short(scratch_reg, pt, L);
-  // we use O7 linkage so that forward_exception_entry has the issuing PC
-  call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
-  delayed()->nop();
-  bind(L);
-}
-
-
-void MacroAssembler::check_and_handle_popframe(Register scratch_reg) {
-}
-
-
-void MacroAssembler::check_and_handle_earlyret(Register scratch_reg) {
-}
-
-
-void MacroAssembler::call_VM(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
-  call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions);
-}
-
-
-void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) {
-  // O0 is reserved for the thread
-  mov(arg_1, O1);
-  call_VM(oop_result, entry_point, 1, check_exceptions);
-}
-
-
-void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
-  // O0 is reserved for the thread
-  mov(arg_1, O1);
-  mov(arg_2, O2); assert(arg_2 != O1, "smashed argument");
-  call_VM(oop_result, entry_point, 2, check_exceptions);
-}
-
-
-void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
-  // O0 is reserved for the thread
-  mov(arg_1, O1);
-  mov(arg_2, O2); assert(arg_2 != O1,                "smashed argument");
-  mov(arg_3, O3); assert(arg_3 != O1 && arg_3 != O2, "smashed argument");
-  call_VM(oop_result, entry_point, 3, check_exceptions);
-}
-
-
-
-// Note: The following call_VM overloadings are useful when a "save"
-// has already been performed by a stub, and the last Java frame is
-// the previous one.  In that case, last_java_sp must be passed as FP
-// instead of SP.
-
-
-void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) {
-  call_VM_base(oop_result, noreg, last_java_sp, entry_point, number_of_arguments, check_exceptions);
-}
-
-
-void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) {
-  // O0 is reserved for the thread
-  mov(arg_1, O1);
-  call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
-}
-
-
-void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
-  // O0 is reserved for the thread
-  mov(arg_1, O1);
-  mov(arg_2, O2); assert(arg_2 != O1, "smashed argument");
-  call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
-}
-
-
-void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
-  // O0 is reserved for the thread
-  mov(arg_1, O1);
-  mov(arg_2, O2); assert(arg_2 != O1,                "smashed argument");
-  mov(arg_3, O3); assert(arg_3 != O1 && arg_3 != O2, "smashed argument");
-  call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
-}
-
-
-
-void MacroAssembler::call_VM_leaf_base(Register thread_cache, address entry_point, int number_of_arguments) {
-  assert_not_delayed();
-  save_thread(thread_cache);
-  // do the call
-  call(entry_point, relocInfo::runtime_call_type);
-  delayed()->nop();
-  restore_thread(thread_cache);
-#ifdef ASSERT
-  set(badHeapWordVal, G3);
-  set(badHeapWordVal, G4);
-  set(badHeapWordVal, G5);
-#endif
-}
-
-
-void MacroAssembler::call_VM_leaf(Register thread_cache, address entry_point, int number_of_arguments) {
-  call_VM_leaf_base(thread_cache, entry_point, number_of_arguments);
-}
-
-
-void MacroAssembler::call_VM_leaf(Register thread_cache, address entry_point, Register arg_1) {
-  mov(arg_1, O0);
-  call_VM_leaf(thread_cache, entry_point, 1);
-}
-
-
-void MacroAssembler::call_VM_leaf(Register thread_cache, address entry_point, Register arg_1, Register arg_2) {
-  mov(arg_1, O0);
-  mov(arg_2, O1); assert(arg_2 != O0, "smashed argument");
-  call_VM_leaf(thread_cache, entry_point, 2);
-}
-
-
-void MacroAssembler::call_VM_leaf(Register thread_cache, address entry_point, Register arg_1, Register arg_2, Register arg_3) {
-  mov(arg_1, O0);
-  mov(arg_2, O1); assert(arg_2 != O0,                "smashed argument");
-  mov(arg_3, O2); assert(arg_3 != O0 && arg_3 != O1, "smashed argument");
-  call_VM_leaf(thread_cache, entry_point, 3);
-}
-
-
-void MacroAssembler::get_vm_result(Register oop_result) {
-  verify_thread();
-  Address vm_result_addr(G2_thread, JavaThread::vm_result_offset());
-  ld_ptr(    vm_result_addr, oop_result);
-  st_ptr(G0, vm_result_addr);
-  verify_oop(oop_result);
-}
-
-
-void MacroAssembler::get_vm_result_2(Register metadata_result) {
-  verify_thread();
-  Address vm_result_addr_2(G2_thread, JavaThread::vm_result_2_offset());
-  ld_ptr(vm_result_addr_2, metadata_result);
-  st_ptr(G0, vm_result_addr_2);
-}
-
-
-// We require that C code which does not return a value in vm_result will
-// leave it undisturbed.
-void MacroAssembler::set_vm_result(Register oop_result) {
-  verify_thread();
-  Address vm_result_addr(G2_thread, JavaThread::vm_result_offset());
-  verify_oop(oop_result);
-
-# ifdef ASSERT
-    // Check that we are not overwriting any other oop.
-#ifdef CC_INTERP
-    save_frame(0);
-#else
-    save_frame_and_mov(0, Lmethod, Lmethod);     // Propagate Lmethod for -Xprof
-#endif /* CC_INTERP */
-    ld_ptr(vm_result_addr, L0);
-    tst(L0);
-    restore();
-    breakpoint_trap(notZero, Assembler::ptr_cc);
-    // }
-# endif
-
-  st_ptr(oop_result, vm_result_addr);
-}
-
-
-void MacroAssembler::ic_call(address entry, bool emit_delay) {
-  RelocationHolder rspec = virtual_call_Relocation::spec(pc());
-  patchable_set((intptr_t)Universe::non_oop_word(), G5_inline_cache_reg);
-  relocate(rspec);
-  call(entry, relocInfo::none);
-  if (emit_delay) {
-    delayed()->nop();
-  }
-}
-
-
-void MacroAssembler::card_table_write(jbyte* byte_map_base,
-                                      Register tmp, Register obj) {
-#ifdef _LP64
-  srlx(obj, CardTableModRefBS::card_shift, obj);
-#else
-  srl(obj, CardTableModRefBS::card_shift, obj);
-#endif
-  assert(tmp != obj, "need separate temp reg");
-  set((address) byte_map_base, tmp);
-  stb(G0, tmp, obj);
-}
-
-
-void MacroAssembler::internal_sethi(const AddressLiteral& addrlit, Register d, bool ForceRelocatable) {
-  address save_pc;
-  int shiftcnt;
-#ifdef _LP64
-# ifdef CHECK_DELAY
-  assert_not_delayed((char*) "cannot put two instructions in delay slot");
-# endif
-  v9_dep();
-  save_pc = pc();
-
-  int msb32 = (int) (addrlit.value() >> 32);
-  int lsb32 = (int) (addrlit.value());
-
-  if (msb32 == 0 && lsb32 >= 0) {
-    Assembler::sethi(lsb32, d, addrlit.rspec());
-  }
-  else if (msb32 == -1) {
-    Assembler::sethi(~lsb32, d, addrlit.rspec());
-    xor3(d, ~low10(~0), d);
-  }
-  else {
-    Assembler::sethi(msb32, d, addrlit.rspec());  // msb 22-bits
-    if (msb32 & 0x3ff)                            // Any bits?
-      or3(d, msb32 & 0x3ff, d);                   // msb 32-bits are now in lsb 32
-    if (lsb32 & 0xFFFFFC00) {                     // done?
-      if ((lsb32 >> 20) & 0xfff) {                // Any bits set?
-        sllx(d, 12, d);                           // Make room for next 12 bits
-        or3(d, (lsb32 >> 20) & 0xfff, d);         // Or in next 12
-        shiftcnt = 0;                             // We already shifted
-      }
-      else
-        shiftcnt = 12;
-      if ((lsb32 >> 10) & 0x3ff) {
-        sllx(d, shiftcnt + 10, d);                // Make room for last 10 bits
-        or3(d, (lsb32 >> 10) & 0x3ff, d);         // Or in next 10
-        shiftcnt = 0;
-      }
-      else
-        shiftcnt = 10;
-      sllx(d, shiftcnt + 10, d);                  // Shift leaving disp field 0'd
-    }
-    else
-      sllx(d, 32, d);
-  }
-  // Pad out the instruction sequence so it can be patched later.
-  if (ForceRelocatable || (addrlit.rtype() != relocInfo::none &&
-                           addrlit.rtype() != relocInfo::runtime_call_type)) {
-    while (pc() < (save_pc + (7 * BytesPerInstWord)))
-      nop();
-  }
-#else
-  Assembler::sethi(addrlit.value(), d, addrlit.rspec());
-#endif
-}
-
-
-void MacroAssembler::sethi(const AddressLiteral& addrlit, Register d) {
-  internal_sethi(addrlit, d, false);
-}
-
-
-void MacroAssembler::patchable_sethi(const AddressLiteral& addrlit, Register d) {
-  internal_sethi(addrlit, d, true);
-}
-
-
-int MacroAssembler::insts_for_sethi(address a, bool worst_case) {
-#ifdef _LP64
-  if (worst_case)  return 7;
-  intptr_t iaddr = (intptr_t) a;
-  int msb32 = (int) (iaddr >> 32);
-  int lsb32 = (int) (iaddr);
-  int count;
-  if (msb32 == 0 && lsb32 >= 0)
-    count = 1;
-  else if (msb32 == -1)
-    count = 2;
-  else {
-    count = 2;
-    if (msb32 & 0x3ff)
-      count++;
-    if (lsb32 & 0xFFFFFC00 ) {
-      if ((lsb32 >> 20) & 0xfff)  count += 2;
-      if ((lsb32 >> 10) & 0x3ff)  count += 2;
-    }
-  }
-  return count;
-#else
-  return 1;
-#endif
-}
-
-int MacroAssembler::worst_case_insts_for_set() {
-  return insts_for_sethi(NULL, true) + 1;
-}
-
-
-// Keep in sync with MacroAssembler::insts_for_internal_set
-void MacroAssembler::internal_set(const AddressLiteral& addrlit, Register d, bool ForceRelocatable) {
-  intptr_t value = addrlit.value();
-
-  if (!ForceRelocatable && addrlit.rspec().type() == relocInfo::none) {
-    // can optimize
-    if (-4096 <= value && value <= 4095) {
-      or3(G0, value, d); // setsw (this leaves upper 32 bits sign-extended)
-      return;
-    }
-    if (inv_hi22(hi22(value)) == value) {
-      sethi(addrlit, d);
-      return;
-    }
-  }
-  assert_not_delayed((char*) "cannot put two instructions in delay slot");
-  internal_sethi(addrlit, d, ForceRelocatable);
-  if (ForceRelocatable || addrlit.rspec().type() != relocInfo::none || addrlit.low10() != 0) {
-    add(d, addrlit.low10(), d, addrlit.rspec());
-  }
-}
-
-// Keep in sync with MacroAssembler::internal_set
-int MacroAssembler::insts_for_internal_set(intptr_t value) {
-  // can optimize
-  if (-4096 <= value && value <= 4095) {
-    return 1;
-  }
-  if (inv_hi22(hi22(value)) == value) {
-    return insts_for_sethi((address) value);
-  }
-  int count = insts_for_sethi((address) value);
-  AddressLiteral al(value);
-  if (al.low10() != 0) {
-    count++;
-  }
-  return count;
-}
-
-void MacroAssembler::set(const AddressLiteral& al, Register d) {
-  internal_set(al, d, false);
-}
-
-void MacroAssembler::set(intptr_t value, Register d) {
-  AddressLiteral al(value);
-  internal_set(al, d, false);
-}
-
-void MacroAssembler::set(address addr, Register d, RelocationHolder const& rspec) {
-  AddressLiteral al(addr, rspec);
-  internal_set(al, d, false);
-}
-
-void MacroAssembler::patchable_set(const AddressLiteral& al, Register d) {
-  internal_set(al, d, true);
-}
-
-void MacroAssembler::patchable_set(intptr_t value, Register d) {
-  AddressLiteral al(value);
-  internal_set(al, d, true);
-}
-
-
-void MacroAssembler::set64(jlong value, Register d, Register tmp) {
-  assert_not_delayed();
-  v9_dep();
-
-  int hi = (int)(value >> 32);
-  int lo = (int)(value & ~0);
-  // (Matcher::isSimpleConstant64 knows about the following optimizations.)
-  if (Assembler::is_simm13(lo) && value == lo) {
-    or3(G0, lo, d);
-  } else if (hi == 0) {
-    Assembler::sethi(lo, d);   // hardware version zero-extends to upper 32
-    if (low10(lo) != 0)
-      or3(d, low10(lo), d);
-  }
-  else if (hi == -1) {
-    Assembler::sethi(~lo, d);  // hardware version zero-extends to upper 32
-    xor3(d, low10(lo) ^ ~low10(~0), d);
-  }
-  else if (lo == 0) {
-    if (Assembler::is_simm13(hi)) {
-      or3(G0, hi, d);
-    } else {
-      Assembler::sethi(hi, d);   // hardware version zero-extends to upper 32
-      if (low10(hi) != 0)
-        or3(d, low10(hi), d);
-    }
-    sllx(d, 32, d);
-  }
-  else {
-    Assembler::sethi(hi, tmp);
-    Assembler::sethi(lo,   d); // macro assembler version sign-extends
-    if (low10(hi) != 0)
-      or3 (tmp, low10(hi), tmp);
-    if (low10(lo) != 0)
-      or3 (  d, low10(lo),   d);
-    sllx(tmp, 32, tmp);
-    or3 (d, tmp, d);
-  }
-}
-
-int MacroAssembler::insts_for_set64(jlong value) {
-  v9_dep();
-
-  int hi = (int) (value >> 32);
-  int lo = (int) (value & ~0);
-  int count = 0;
-
-  // (Matcher::isSimpleConstant64 knows about the following optimizations.)
-  if (Assembler::is_simm13(lo) && value == lo) {
-    count++;
-  } else if (hi == 0) {
-    count++;
-    if (low10(lo) != 0)
-      count++;
-  }
-  else if (hi == -1) {
-    count += 2;
-  }
-  else if (lo == 0) {
-    if (Assembler::is_simm13(hi)) {
-      count++;
-    } else {
-      count++;
-      if (low10(hi) != 0)
-        count++;
-    }
-    count++;
-  }
-  else {
-    count += 2;
-    if (low10(hi) != 0)
-      count++;
-    if (low10(lo) != 0)
-      count++;
-    count += 2;
-  }
-  return count;
-}
-
-// compute size in bytes of sparc frame, given
-// number of extraWords
-int MacroAssembler::total_frame_size_in_bytes(int extraWords) {
-
-  int nWords = frame::memory_parameter_word_sp_offset;
-
-  nWords += extraWords;
-
-  if (nWords & 1) ++nWords; // round up to double-word
-
-  return nWords * BytesPerWord;
-}
-
-
-// save_frame: given number of "extra" words in frame,
-// issue approp. save instruction (p 200, v8 manual)
-
-void MacroAssembler::save_frame(int extraWords) {
-  int delta = -total_frame_size_in_bytes(extraWords);
-  if (is_simm13(delta)) {
-    save(SP, delta, SP);
-  } else {
-    set(delta, G3_scratch);
-    save(SP, G3_scratch, SP);
-  }
-}
-
-
-void MacroAssembler::save_frame_c1(int size_in_bytes) {
-  if (is_simm13(-size_in_bytes)) {
-    save(SP, -size_in_bytes, SP);
-  } else {
-    set(-size_in_bytes, G3_scratch);
-    save(SP, G3_scratch, SP);
-  }
-}
-
-
-void MacroAssembler::save_frame_and_mov(int extraWords,
-                                        Register s1, Register d1,
-                                        Register s2, Register d2) {
-  assert_not_delayed();
-
-  // The trick here is to use precisely the same memory word
-  // that trap handlers also use to save the register.
-  // This word cannot be used for any other purpose, but
-  // it works fine to save the register's value, whether or not
-  // an interrupt flushes register windows at any given moment!
-  Address s1_addr;
-  if (s1->is_valid() && (s1->is_in() || s1->is_local())) {
-    s1_addr = s1->address_in_saved_window();
-    st_ptr(s1, s1_addr);
-  }
-
-  Address s2_addr;
-  if (s2->is_valid() && (s2->is_in() || s2->is_local())) {
-    s2_addr = s2->address_in_saved_window();
-    st_ptr(s2, s2_addr);
-  }
-
-  save_frame(extraWords);
-
-  if (s1_addr.base() == SP) {
-    ld_ptr(s1_addr.after_save(), d1);
-  } else if (s1->is_valid()) {
-    mov(s1->after_save(), d1);
-  }
-
-  if (s2_addr.base() == SP) {
-    ld_ptr(s2_addr.after_save(), d2);
-  } else if (s2->is_valid()) {
-    mov(s2->after_save(), d2);
-  }
-}
-
-
-AddressLiteral MacroAssembler::allocate_metadata_address(Metadata* obj) {
-  assert(oop_recorder() != NULL, "this assembler needs a Recorder");
-  int index = oop_recorder()->allocate_metadata_index(obj);
-  RelocationHolder rspec = metadata_Relocation::spec(index);
-  return AddressLiteral((address)obj, rspec);
-}
-
-AddressLiteral MacroAssembler::constant_metadata_address(Metadata* obj) {
-  assert(oop_recorder() != NULL, "this assembler needs a Recorder");
-  int index = oop_recorder()->find_index(obj);
-  RelocationHolder rspec = metadata_Relocation::spec(index);
-  return AddressLiteral((address)obj, rspec);
-}
-
-
-AddressLiteral MacroAssembler::constant_oop_address(jobject obj) {
-  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
-  assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "not an oop");
-  int oop_index = oop_recorder()->find_index(obj);
-  return AddressLiteral(obj, oop_Relocation::spec(oop_index));
-}
-
-void  MacroAssembler::set_narrow_oop(jobject obj, Register d) {
-  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
-  int oop_index = oop_recorder()->find_index(obj);
-  RelocationHolder rspec = oop_Relocation::spec(oop_index);
-
-  assert_not_delayed();
-  // Relocation with special format (see relocInfo_sparc.hpp).
-  relocate(rspec, 1);
-  // Assembler::sethi(0x3fffff, d);
-  emit_long( op(branch_op) | rd(d) | op2(sethi_op2) | hi22(0x3fffff) );
-  // Don't add relocation for 'add'. Do patching during 'sethi' processing.
-  add(d, 0x3ff, d);
-
-}
-
-void  MacroAssembler::set_narrow_klass(Klass* k, Register d) {
-  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
-  int klass_index = oop_recorder()->find_index(k);
-  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
-  narrowOop encoded_k = oopDesc::encode_klass(k);
-
-  assert_not_delayed();
-  // Relocation with special format (see relocInfo_sparc.hpp).
-  relocate(rspec, 1);
-  // Assembler::sethi(encoded_k, d);
-  emit_long( op(branch_op) | rd(d) | op2(sethi_op2) | hi22(encoded_k) );
-  // Don't add relocation for 'add'. Do patching during 'sethi' processing.
-  add(d, low10(encoded_k), d);
-
-}
-
-void MacroAssembler::align(int modulus) {
-  while (offset() % modulus != 0) nop();
-}
-
-
-void MacroAssembler::safepoint() {
-  relocate(breakpoint_Relocation::spec(breakpoint_Relocation::safepoint));
-}
-
-
-void RegistersForDebugging::print(outputStream* s) {
-  FlagSetting fs(Debugging, true);
-  int j;
-  for (j = 0; j < 8; ++j) {
-    if (j != 6) { s->print("i%d = ", j); os::print_location(s, i[j]); }
-    else        { s->print( "fp = "   ); os::print_location(s, i[j]); }
-  }
-  s->cr();
-
-  for (j = 0;  j < 8;  ++j) {
-    s->print("l%d = ", j); os::print_location(s, l[j]);
-  }
-  s->cr();
-
-  for (j = 0; j < 8; ++j) {
-    if (j != 6) { s->print("o%d = ", j); os::print_location(s, o[j]); }
-    else        { s->print( "sp = "   ); os::print_location(s, o[j]); }
-  }
-  s->cr();
-
-  for (j = 0; j < 8; ++j) {
-    s->print("g%d = ", j); os::print_location(s, g[j]);
-  }
-  s->cr();
-
-  // print out floats with compression
-  for (j = 0; j < 32; ) {
-    jfloat val = f[j];
-    int last = j;
-    for ( ;  last+1 < 32;  ++last ) {
-      char b1[1024], b2[1024];
-      sprintf(b1, "%f", val);
-      sprintf(b2, "%f", f[last+1]);
-      if (strcmp(b1, b2))
-        break;
-    }
-    s->print("f%d", j);
-    if ( j != last )  s->print(" - f%d", last);
-    s->print(" = %f", val);
-    s->fill_to(25);
-    s->print_cr(" (0x%x)", val);
-    j = last + 1;
-  }
-  s->cr();
-
-  // and doubles (evens only)
-  for (j = 0; j < 32; ) {
-    jdouble val = d[j];
-    int last = j;
-    for ( ;  last+1 < 32;  ++last ) {
-      char b1[1024], b2[1024];
-      sprintf(b1, "%f", val);
-      sprintf(b2, "%f", d[last+1]);
-      if (strcmp(b1, b2))
-        break;
-    }
-    s->print("d%d", 2 * j);
-    if ( j != last )  s->print(" - d%d", last);
-    s->print(" = %f", val);
-    s->fill_to(30);
-    s->print("(0x%x)", *(int*)&val);
-    s->fill_to(42);
-    s->print_cr("(0x%x)", *(1 + (int*)&val));
-    j = last + 1;
-  }
-  s->cr();
-}
-
-void RegistersForDebugging::save_registers(MacroAssembler* a) {
-  a->sub(FP, round_to(sizeof(RegistersForDebugging), sizeof(jdouble)) - STACK_BIAS, O0);
-  a->flush_windows();
-  int i;
-  for (i = 0; i < 8; ++i) {
-    a->ld_ptr(as_iRegister(i)->address_in_saved_window().after_save(), L1);  a->st_ptr( L1, O0, i_offset(i));
-    a->ld_ptr(as_lRegister(i)->address_in_saved_window().after_save(), L1);  a->st_ptr( L1, O0, l_offset(i));
-    a->st_ptr(as_oRegister(i)->after_save(), O0, o_offset(i));
-    a->st_ptr(as_gRegister(i)->after_save(), O0, g_offset(i));
-  }
-  for (i = 0;  i < 32; ++i) {
-    a->stf(FloatRegisterImpl::S, as_FloatRegister(i), O0, f_offset(i));
-  }
-  for (i = 0; i < (VM_Version::v9_instructions_work() ? 64 : 32); i += 2) {
-    a->stf(FloatRegisterImpl::D, as_FloatRegister(i), O0, d_offset(i));
-  }
-}
-
-void RegistersForDebugging::restore_registers(MacroAssembler* a, Register r) {
-  for (int i = 1; i < 8;  ++i) {
-    a->ld_ptr(r, g_offset(i), as_gRegister(i));
-  }
-  for (int j = 0; j < 32; ++j) {
-    a->ldf(FloatRegisterImpl::S, O0, f_offset(j), as_FloatRegister(j));
-  }
-  for (int k = 0; k < (VM_Version::v9_instructions_work() ? 64 : 32); k += 2) {
-    a->ldf(FloatRegisterImpl::D, O0, d_offset(k), as_FloatRegister(k));
-  }
-}
-
-
-// pushes double TOS element of FPU stack on CPU stack; pops from FPU stack
-void MacroAssembler::push_fTOS() {
-  // %%%%%% need to implement this
-}
-
-// pops double TOS element from CPU stack and pushes on FPU stack
-void MacroAssembler::pop_fTOS() {
-  // %%%%%% need to implement this
-}
-
-void MacroAssembler::empty_FPU_stack() {
-  // %%%%%% need to implement this
-}
-
-void MacroAssembler::_verify_oop(Register reg, const char* msg, const char * file, int line) {
-  // plausibility check for oops
-  if (!VerifyOops) return;
-
-  if (reg == G0)  return;       // always NULL, which is always an oop
-
-  BLOCK_COMMENT("verify_oop {");
-  char buffer[64];
-#ifdef COMPILER1
-  if (CommentedAssembly) {
-    snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset());
-    block_comment(buffer);
-  }
-#endif
-
-  int len = strlen(file) + strlen(msg) + 1 + 4;
-  sprintf(buffer, "%d", line);
-  len += strlen(buffer);
-  sprintf(buffer, " at offset %d ", offset());
-  len += strlen(buffer);
-  char * real_msg = new char[len];
-  sprintf(real_msg, "%s%s(%s:%d)", msg, buffer, file, line);
-
-  // Call indirectly to solve generation ordering problem
-  AddressLiteral a(StubRoutines::verify_oop_subroutine_entry_address());
-
-  // Make some space on stack above the current register window.
-  // Enough to hold 8 64-bit registers.
-  add(SP,-8*8,SP);
-
-  // Save some 64-bit registers; a normal 'save' chops the heads off
-  // of 64-bit longs in the 32-bit build.
-  stx(O0,SP,frame::register_save_words*wordSize+STACK_BIAS+0*8);
-  stx(O1,SP,frame::register_save_words*wordSize+STACK_BIAS+1*8);
-  mov(reg,O0); // Move arg into O0; arg might be in O7 which is about to be crushed
-  stx(O7,SP,frame::register_save_words*wordSize+STACK_BIAS+7*8);
-
-  // Size of set() should stay the same
-  patchable_set((intptr_t)real_msg, O1);
-  // Load address to call to into O7
-  load_ptr_contents(a, O7);
-  // Register call to verify_oop_subroutine
-  callr(O7, G0);
-  delayed()->nop();
-  // recover frame size
-  add(SP, 8*8,SP);
-  BLOCK_COMMENT("} verify_oop");
-}
-
-void MacroAssembler::_verify_oop_addr(Address addr, const char* msg, const char * file, int line) {
-  // plausibility check for oops
-  if (!VerifyOops) return;
-
-  char buffer[64];
-  sprintf(buffer, "%d", line);
-  int len = strlen(file) + strlen(msg) + 1 + 4 + strlen(buffer);
-  sprintf(buffer, " at SP+%d ", addr.disp());
-  len += strlen(buffer);
-  char * real_msg = new char[len];
-  sprintf(real_msg, "%s at SP+%d (%s:%d)", msg, addr.disp(), file, line);
-
-  // Call indirectly to solve generation ordering problem
-  AddressLiteral a(StubRoutines::verify_oop_subroutine_entry_address());
-
-  // Make some space on stack above the current register window.
-  // Enough to hold 8 64-bit registers.
-  add(SP,-8*8,SP);
-
-  // Save some 64-bit registers; a normal 'save' chops the heads off
-  // of 64-bit longs in the 32-bit build.
-  stx(O0,SP,frame::register_save_words*wordSize+STACK_BIAS+0*8);
-  stx(O1,SP,frame::register_save_words*wordSize+STACK_BIAS+1*8);
-  ld_ptr(addr.base(), addr.disp() + 8*8, O0); // Load arg into O0; arg might be in O7 which is about to be crushed
-  stx(O7,SP,frame::register_save_words*wordSize+STACK_BIAS+7*8);
-
-  // Size of set() should stay the same
-  patchable_set((intptr_t)real_msg, O1);
-  // Load address to call to into O7
-  load_ptr_contents(a, O7);
-  // Register call to verify_oop_subroutine
-  callr(O7, G0);
-  delayed()->nop();
-  // recover frame size
-  add(SP, 8*8,SP);
-}
-
-// side-door communication with signalHandler in os_solaris.cpp
-address MacroAssembler::_verify_oop_implicit_branch[3] = { NULL };
-
-// This macro is expanded just once; it creates shared code.  Contract:
-// receives an oop in O0.  Must restore O0 & O7 from TLS.  Must not smash ANY
-// registers, including flags.  May not use a register 'save', as this blows
-// the high bits of the O-regs if they contain Long values.  Acts as a 'leaf'
-// call.
-void MacroAssembler::verify_oop_subroutine() {
-  assert( VM_Version::v9_instructions_work(), "VerifyOops not supported for V8" );
-
-  // Leaf call; no frame.
-  Label succeed, fail, null_or_fail;
-
-  // O0 and O7 were saved already (O0 in O0's TLS home, O7 in O5's TLS home).
-  // O0 is now the oop to be checked.  O7 is the return address.
-  Register O0_obj = O0;
-
-  // Save some more registers for temps.
-  stx(O2,SP,frame::register_save_words*wordSize+STACK_BIAS+2*8);
-  stx(O3,SP,frame::register_save_words*wordSize+STACK_BIAS+3*8);
-  stx(O4,SP,frame::register_save_words*wordSize+STACK_BIAS+4*8);
-  stx(O5,SP,frame::register_save_words*wordSize+STACK_BIAS+5*8);
-
-  // Save flags
-  Register O5_save_flags = O5;
-  rdccr( O5_save_flags );
-
-  { // count number of verifies
-    Register O2_adr   = O2;
-    Register O3_accum = O3;
-    inc_counter(StubRoutines::verify_oop_count_addr(), O2_adr, O3_accum);
-  }
-
-  Register O2_mask = O2;
-  Register O3_bits = O3;
-  Register O4_temp = O4;
-
-  // mark lower end of faulting range
-  assert(_verify_oop_implicit_branch[0] == NULL, "set once");
-  _verify_oop_implicit_branch[0] = pc();
-
-  // We can't check the mark oop because it could be in the process of
-  // locking or unlocking while this is running.
-  set(Universe::verify_oop_mask (), O2_mask);
-  set(Universe::verify_oop_bits (), O3_bits);
-
-  // assert((obj & oop_mask) == oop_bits);
-  and3(O0_obj, O2_mask, O4_temp);
-  cmp_and_brx_short(O4_temp, O3_bits, notEqual, pn, null_or_fail);
-
-  if ((NULL_WORD & Universe::verify_oop_mask()) == Universe::verify_oop_bits()) {
-    // the null_or_fail case is useless; must test for null separately
-    br_null_short(O0_obj, pn, succeed);
-  }
-
-  // Check the Klass* of this object for being in the right area of memory.
-  // Cannot do the load in the delay above slot in case O0 is null
-  load_klass(O0_obj, O0_obj);
-  // assert((klass != NULL)
-  br_null_short(O0_obj, pn, fail);
-  // TODO: Future assert that klass is lower 4g memory for UseCompressedKlassPointers
-
-  wrccr( O5_save_flags ); // Restore CCR's
-
-  // mark upper end of faulting range
-  _verify_oop_implicit_branch[1] = pc();
-
-  //-----------------------
-  // all tests pass
-  bind(succeed);
-
-  // Restore prior 64-bit registers
-  ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+0*8,O0);
-  ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+1*8,O1);
-  ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+2*8,O2);
-  ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+3*8,O3);
-  ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+4*8,O4);
-  ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+5*8,O5);
-
-  retl();                       // Leaf return; restore prior O7 in delay slot
-  delayed()->ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+7*8,O7);
-
-  //-----------------------
-  bind(null_or_fail);           // nulls are less common but OK
-  br_null(O0_obj, false, pt, succeed);
-  delayed()->wrccr( O5_save_flags ); // Restore CCR's
-
-  //-----------------------
-  // report failure:
-  bind(fail);
-  _verify_oop_implicit_branch[2] = pc();
-
-  wrccr( O5_save_flags ); // Restore CCR's
-
-  save_frame(::round_to(sizeof(RegistersForDebugging) / BytesPerWord, 2));
-
-  // stop_subroutine expects message pointer in I1.
-  mov(I1, O1);
-
-  // Restore prior 64-bit registers
-  ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+0*8,I0);
-  ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+1*8,I1);
-  ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+2*8,I2);
-  ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+3*8,I3);
-  ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+4*8,I4);
-  ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+5*8,I5);
-
-  // factor long stop-sequence into subroutine to save space
-  assert(StubRoutines::Sparc::stop_subroutine_entry_address(), "hasn't been generated yet");
-
-  // call indirectly to solve generation ordering problem
-  AddressLiteral al(StubRoutines::Sparc::stop_subroutine_entry_address());
-  load_ptr_contents(al, O5);
-  jmpl(O5, 0, O7);
-  delayed()->nop();
-}
-
-
-void MacroAssembler::stop(const char* msg) {
-  // save frame first to get O7 for return address
-  // add one word to size in case struct is odd number of words long
-  // It must be doubleword-aligned for storing doubles into it.
-
-    save_frame(::round_to(sizeof(RegistersForDebugging) / BytesPerWord, 2));
-
-    // stop_subroutine expects message pointer in I1.
-    // Size of set() should stay the same
-    patchable_set((intptr_t)msg, O1);
-
-    // factor long stop-sequence into subroutine to save space
-    assert(StubRoutines::Sparc::stop_subroutine_entry_address(), "hasn't been generated yet");
-
-    // call indirectly to solve generation ordering problem
-    AddressLiteral a(StubRoutines::Sparc::stop_subroutine_entry_address());
-    load_ptr_contents(a, O5);
-    jmpl(O5, 0, O7);
-    delayed()->nop();
-
-    breakpoint_trap();   // make stop actually stop rather than writing
-                         // unnoticeable results in the output files.
-
-    // restore(); done in callee to save space!
-}
-
-
-void MacroAssembler::warn(const char* msg) {
-  save_frame(::round_to(sizeof(RegistersForDebugging) / BytesPerWord, 2));
-  RegistersForDebugging::save_registers(this);
-  mov(O0, L0);
-  // Size of set() should stay the same
-  patchable_set((intptr_t)msg, O0);
-  call( CAST_FROM_FN_PTR(address, warning) );
-  delayed()->nop();
-//  ret();
-//  delayed()->restore();
-  RegistersForDebugging::restore_registers(this, L0);
-  restore();
-}
-
-
-void MacroAssembler::untested(const char* what) {
-  // We must be able to turn interactive prompting off
-  // in order to run automated test scripts on the VM
-  // Use the flag ShowMessageBoxOnError
-
-  char* b = new char[1024];
-  sprintf(b, "untested: %s", what);
-
-  if (ShowMessageBoxOnError) { STOP(b); }
-  else                       { warn(b); }
-}
-
-
-void MacroAssembler::stop_subroutine() {
-  RegistersForDebugging::save_registers(this);
-
-  // for the sake of the debugger, stick a PC on the current frame
-  // (this assumes that the caller has performed an extra "save")
-  mov(I7, L7);
-  add(O7, -7 * BytesPerInt, I7);
-
-  save_frame(); // one more save to free up another O7 register
-  mov(I0, O1); // addr of reg save area
-
-  // We expect pointer to message in I1. Caller must set it up in O1
-  mov(I1, O0); // get msg
-  call (CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
-  delayed()->nop();
-
-  restore();
-
-  RegistersForDebugging::restore_registers(this, O0);
-
-  save_frame(0);
-  call(CAST_FROM_FN_PTR(address,breakpoint));
-  delayed()->nop();
-  restore();
-
-  mov(L7, I7);
-  retl();
-  delayed()->restore(); // see stop above
-}
-
-
-void MacroAssembler::debug(char* msg, RegistersForDebugging* regs) {
-  if ( ShowMessageBoxOnError ) {
-    JavaThread* thread = JavaThread::current();
-    JavaThreadState saved_state = thread->thread_state();
-    thread->set_thread_state(_thread_in_vm);
-      {
-        // In order to get locks work, we need to fake a in_VM state
-        ttyLocker ttyl;
-        ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
-        if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
-        BytecodeCounter::print();
-        }
-        if (os::message_box(msg, "Execution stopped, print registers?"))
-          regs->print(::tty);
-      }
-    BREAKPOINT;
-      ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
-  }
-  else {
-     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
-  }
-  assert(false, err_msg("DEBUG MESSAGE: %s", msg));
-}
-
-#ifndef PRODUCT
-void MacroAssembler::test() {
-  ResourceMark rm;
-
-  CodeBuffer cb("test", 10000, 10000);
-  MacroAssembler* a = new MacroAssembler(&cb);
-  VM_Version::allow_all();
-  a->test_v9();
-  a->test_v8_onlys();
-  VM_Version::revert();
-
-  StubRoutines::Sparc::test_stop_entry()();
-}
-#endif
-
-
-void MacroAssembler::calc_mem_param_words(Register Rparam_words, Register Rresult) {
-  subcc( Rparam_words, Argument::n_register_parameters, Rresult); // how many mem words?
-  Label no_extras;
-  br( negative, true, pt, no_extras ); // if neg, clear reg
-  delayed()->set(0, Rresult);          // annuled, so only if taken
-  bind( no_extras );
-}
-
-
-void MacroAssembler::calc_frame_size(Register Rextra_words, Register Rresult) {
-#ifdef _LP64
-  add(Rextra_words, frame::memory_parameter_word_sp_offset, Rresult);
-#else
-  add(Rextra_words, frame::memory_parameter_word_sp_offset + 1, Rresult);
-#endif
-  bclr(1, Rresult);
-  sll(Rresult, LogBytesPerWord, Rresult);  // Rresult has total frame bytes
-}
-
-
-void MacroAssembler::calc_frame_size_and_save(Register Rextra_words, Register Rresult) {
-  calc_frame_size(Rextra_words, Rresult);
-  neg(Rresult);
-  save(SP, Rresult, SP);
-}
-
-
-// ---------------------------------------------------------
-Assembler::RCondition cond2rcond(Assembler::Condition c) {
-  switch (c) {
-    /*case zero: */
-    case Assembler::equal:        return Assembler::rc_z;
-    case Assembler::lessEqual:    return Assembler::rc_lez;
-    case Assembler::less:         return Assembler::rc_lz;
-    /*case notZero:*/
-    case Assembler::notEqual:     return Assembler::rc_nz;
-    case Assembler::greater:      return Assembler::rc_gz;
-    case Assembler::greaterEqual: return Assembler::rc_gez;
-  }
-  ShouldNotReachHere();
-  return Assembler::rc_z;
-}
-
-// compares (32 bit) register with zero and branches.  NOT FOR USE WITH 64-bit POINTERS
-void MacroAssembler::cmp_zero_and_br(Condition c, Register s1, Label& L, bool a, Predict p) {
-  tst(s1);
-  br (c, a, p, L);
-}
-
-// Compares a pointer register with zero and branches on null.
-// Does a test & branch on 32-bit systems and a register-branch on 64-bit.
-void MacroAssembler::br_null( Register s1, bool a, Predict p, Label& L ) {
-  assert_not_delayed();
-#ifdef _LP64
-  bpr( rc_z, a, p, s1, L );
-#else
-  tst(s1);
-  br ( zero, a, p, L );
-#endif
-}
-
-void MacroAssembler::br_notnull( Register s1, bool a, Predict p, Label& L ) {
-  assert_not_delayed();
-#ifdef _LP64
-  bpr( rc_nz, a, p, s1, L );
-#else
-  tst(s1);
-  br ( notZero, a, p, L );
-#endif
-}
-
-// Compare registers and branch with nop in delay slot or cbcond without delay slot.
-
-// Compare integer (32 bit) values (icc only).
-void MacroAssembler::cmp_and_br_short(Register s1, Register s2, Condition c,
-                                      Predict p, Label& L) {
-  assert_not_delayed();
-  if (use_cbcond(L)) {
-    Assembler::cbcond(c, icc, s1, s2, L);
-  } else {
-    cmp(s1, s2);
-    br(c, false, p, L);
-    delayed()->nop();
-  }
-}
-
-// Compare integer (32 bit) values (icc only).
-void MacroAssembler::cmp_and_br_short(Register s1, int simm13a, Condition c,
-                                      Predict p, Label& L) {
-  assert_not_delayed();
-  if (is_simm(simm13a,5) && use_cbcond(L)) {
-    Assembler::cbcond(c, icc, s1, simm13a, L);
-  } else {
-    cmp(s1, simm13a);
-    br(c, false, p, L);
-    delayed()->nop();
-  }
-}
-
-// Branch that tests xcc in LP64 and icc in !LP64
-void MacroAssembler::cmp_and_brx_short(Register s1, Register s2, Condition c,
-                                       Predict p, Label& L) {
-  assert_not_delayed();
-  if (use_cbcond(L)) {
-    Assembler::cbcond(c, ptr_cc, s1, s2, L);
-  } else {
-    cmp(s1, s2);
-    brx(c, false, p, L);
-    delayed()->nop();
-  }
-}
-
-// Branch that tests xcc in LP64 and icc in !LP64
-void MacroAssembler::cmp_and_brx_short(Register s1, int simm13a, Condition c,
-                                       Predict p, Label& L) {
-  assert_not_delayed();
-  if (is_simm(simm13a,5) && use_cbcond(L)) {
-    Assembler::cbcond(c, ptr_cc, s1, simm13a, L);
-  } else {
-    cmp(s1, simm13a);
-    brx(c, false, p, L);
-    delayed()->nop();
-  }
-}
-
-// Short branch version for compares a pointer with zero.
-
-void MacroAssembler::br_null_short(Register s1, Predict p, Label& L) {
-  assert_not_delayed();
-  if (use_cbcond(L)) {
-    Assembler::cbcond(zero, ptr_cc, s1, 0, L);
-    return;
-  }
-  br_null(s1, false, p, L);
-  delayed()->nop();
-}
-
-void MacroAssembler::br_notnull_short(Register s1, Predict p, Label& L) {
-  assert_not_delayed();
-  if (use_cbcond(L)) {
-    Assembler::cbcond(notZero, ptr_cc, s1, 0, L);
-    return;
-  }
-  br_notnull(s1, false, p, L);
-  delayed()->nop();
-}
-
-// Unconditional short branch
-void MacroAssembler::ba_short(Label& L) {
-  if (use_cbcond(L)) {
-    Assembler::cbcond(equal, icc, G0, G0, L);
-    return;
-  }
-  br(always, false, pt, L);
-  delayed()->nop();
-}
-
-// instruction sequences factored across compiler & interpreter
-
-
-void MacroAssembler::lcmp( Register Ra_hi, Register Ra_low,
-                           Register Rb_hi, Register Rb_low,
-                           Register Rresult) {
-
-  Label check_low_parts, done;
-
-  cmp(Ra_hi, Rb_hi );  // compare hi parts
-  br(equal, true, pt, check_low_parts);
-  delayed()->cmp(Ra_low, Rb_low); // test low parts
-
-  // And, with an unsigned comparison, it does not matter if the numbers
-  // are negative or not.
-  // E.g., -2 cmp -1: the low parts are 0xfffffffe and 0xffffffff.
-  // The second one is bigger (unsignedly).
-
-  // Other notes:  The first move in each triplet can be unconditional
-  // (and therefore probably prefetchable).
-  // And the equals case for the high part does not need testing,
-  // since that triplet is reached only after finding the high halves differ.
-
-  if (VM_Version::v9_instructions_work()) {
-    mov(-1, Rresult);
-    ba(done);  delayed()-> movcc(greater, false, icc,  1, Rresult);
-  } else {
-    br(less,    true, pt, done); delayed()-> set(-1, Rresult);
-    br(greater, true, pt, done); delayed()-> set( 1, Rresult);
-  }
-
-  bind( check_low_parts );
-
-  if (VM_Version::v9_instructions_work()) {
-    mov(                               -1, Rresult);
-    movcc(equal,           false, icc,  0, Rresult);
-    movcc(greaterUnsigned, false, icc,  1, Rresult);
-  } else {
-    set(-1, Rresult);
-    br(equal,           true, pt, done); delayed()->set( 0, Rresult);
-    br(greaterUnsigned, true, pt, done); delayed()->set( 1, Rresult);
-  }
-  bind( done );
-}
-
-void MacroAssembler::lneg( Register Rhi, Register Rlow ) {
-  subcc(  G0, Rlow, Rlow );
-  subc(   G0, Rhi,  Rhi  );
-}
-
-void MacroAssembler::lshl( Register Rin_high,  Register Rin_low,
-                           Register Rcount,
-                           Register Rout_high, Register Rout_low,
-                           Register Rtemp ) {
-
-
-  Register Ralt_count = Rtemp;
-  Register Rxfer_bits = Rtemp;
-
-  assert( Ralt_count != Rin_high
-      &&  Ralt_count != Rin_low
-      &&  Ralt_count != Rcount
-      &&  Rxfer_bits != Rin_low
-      &&  Rxfer_bits != Rin_high
-      &&  Rxfer_bits != Rcount
-      &&  Rxfer_bits != Rout_low
-      &&  Rout_low   != Rin_high,
-        "register alias checks");
-
-  Label big_shift, done;
-
-  // This code can be optimized to use the 64 bit shifts in V9.
-  // Here we use the 32 bit shifts.
-
-  and3( Rcount, 0x3f, Rcount);     // take least significant 6 bits
-  subcc(Rcount,   31, Ralt_count);
-  br(greater, true, pn, big_shift);
-  delayed()->dec(Ralt_count);
-
-  // shift < 32 bits, Ralt_count = Rcount-31
-
-  // We get the transfer bits by shifting right by 32-count the low
-  // register. This is done by shifting right by 31-count and then by one
-  // more to take care of the special (rare) case where count is zero
-  // (shifting by 32 would not work).
-
-  neg(Ralt_count);
-
-  // The order of the next two instructions is critical in the case where
-  // Rin and Rout are the same and should not be reversed.
-
-  srl(Rin_low, Ralt_count, Rxfer_bits); // shift right by 31-count
-  if (Rcount != Rout_low) {
-    sll(Rin_low, Rcount, Rout_low); // low half
-  }
-  sll(Rin_high, Rcount, Rout_high);
-  if (Rcount == Rout_low) {
-    sll(Rin_low, Rcount, Rout_low); // low half
-  }
-  srl(Rxfer_bits, 1, Rxfer_bits ); // shift right by one more
-  ba(done);
-  delayed()->or3(Rout_high, Rxfer_bits, Rout_high);   // new hi value: or in shifted old hi part and xfer from low
-
-  // shift >= 32 bits, Ralt_count = Rcount-32
-  bind(big_shift);
-  sll(Rin_low, Ralt_count, Rout_high  );
-  clr(Rout_low);
-
-  bind(done);
-}
-
-
-void MacroAssembler::lshr( Register Rin_high,  Register Rin_low,
-                           Register Rcount,
-                           Register Rout_high, Register Rout_low,
-                           Register Rtemp ) {
-
-  Register Ralt_count = Rtemp;
-  Register Rxfer_bits = Rtemp;
-
-  assert( Ralt_count != Rin_high
-      &&  Ralt_count != Rin_low
-      &&  Ralt_count != Rcount
-      &&  Rxfer_bits != Rin_low
-      &&  Rxfer_bits != Rin_high
-      &&  Rxfer_bits != Rcount
-      &&  Rxfer_bits != Rout_high
-      &&  Rout_high  != Rin_low,
-        "register alias checks");
-
-  Label big_shift, done;
-
-  // This code can be optimized to use the 64 bit shifts in V9.
-  // Here we use the 32 bit shifts.
-
-  and3( Rcount, 0x3f, Rcount);     // take least significant 6 bits
-  subcc(Rcount,   31, Ralt_count);
-  br(greater, true, pn, big_shift);
-  delayed()->dec(Ralt_count);
-
-  // shift < 32 bits, Ralt_count = Rcount-31
-
-  // We get the transfer bits by shifting left by 32-count the high
-  // register. This is done by shifting left by 31-count and then by one
-  // more to take care of the special (rare) case where count is zero
-  // (shifting by 32 would not work).
-
-  neg(Ralt_count);
-  if (Rcount != Rout_low) {
-    srl(Rin_low, Rcount, Rout_low);
-  }
-
-  // The order of the next two instructions is critical in the case where
-  // Rin and Rout are the same and should not be reversed.
-
-  sll(Rin_high, Ralt_count, Rxfer_bits); // shift left by 31-count
-  sra(Rin_high,     Rcount, Rout_high ); // high half
-  sll(Rxfer_bits,        1, Rxfer_bits); // shift left by one more
-  if (Rcount == Rout_low) {
-    srl(Rin_low, Rcount, Rout_low);
-  }
-  ba(done);
-  delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high
-
-  // shift >= 32 bits, Ralt_count = Rcount-32
-  bind(big_shift);
-
-  sra(Rin_high, Ralt_count, Rout_low);
-  sra(Rin_high,         31, Rout_high); // sign into hi
-
-  bind( done );
-}
-
-
-
-void MacroAssembler::lushr( Register Rin_high,  Register Rin_low,
-                            Register Rcount,
-                            Register Rout_high, Register Rout_low,
-                            Register Rtemp ) {
-
-  Register Ralt_count = Rtemp;
-  Register Rxfer_bits = Rtemp;
-
-  assert( Ralt_count != Rin_high
-      &&  Ralt_count != Rin_low
-      &&  Ralt_count != Rcount
-      &&  Rxfer_bits != Rin_low
-      &&  Rxfer_bits != Rin_high
-      &&  Rxfer_bits != Rcount
-      &&  Rxfer_bits != Rout_high
-      &&  Rout_high  != Rin_low,
-        "register alias checks");
-
-  Label big_shift, done;
-
-  // This code can be optimized to use the 64 bit shifts in V9.
-  // Here we use the 32 bit shifts.
-
-  and3( Rcount, 0x3f, Rcount);     // take least significant 6 bits
-  subcc(Rcount,   31, Ralt_count);
-  br(greater, true, pn, big_shift);
-  delayed()->dec(Ralt_count);
-
-  // shift < 32 bits, Ralt_count = Rcount-31
-
-  // We get the transfer bits by shifting left by 32-count the high
-  // register. This is done by shifting left by 31-count and then by one
-  // more to take care of the special (rare) case where count is zero
-  // (shifting by 32 would not work).
-
-  neg(Ralt_count);
-  if (Rcount != Rout_low) {
-    srl(Rin_low, Rcount, Rout_low);
-  }
-
-  // The order of the next two instructions is critical in the case where
-  // Rin and Rout are the same and should not be reversed.
-
-  sll(Rin_high, Ralt_count, Rxfer_bits); // shift left by 31-count
-  srl(Rin_high,     Rcount, Rout_high ); // high half
-  sll(Rxfer_bits,        1, Rxfer_bits); // shift left by one more
-  if (Rcount == Rout_low) {
-    srl(Rin_low, Rcount, Rout_low);
-  }
-  ba(done);
-  delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high
-
-  // shift >= 32 bits, Ralt_count = Rcount-32
-  bind(big_shift);
-
-  srl(Rin_high, Ralt_count, Rout_low);
-  clr(Rout_high);
-
-  bind( done );
-}
-
-#ifdef _LP64
-void MacroAssembler::lcmp( Register Ra, Register Rb, Register Rresult) {
-  cmp(Ra, Rb);
-  mov(-1, Rresult);
-  movcc(equal,   false, xcc,  0, Rresult);
-  movcc(greater, false, xcc,  1, Rresult);
-}
-#endif
-
-
-void MacroAssembler::load_sized_value(Address src, Register dst, size_t size_in_bytes, bool is_signed) {
-  switch (size_in_bytes) {
-  case  8:  ld_long(src, dst); break;
-  case  4:  ld(     src, dst); break;
-  case  2:  is_signed ? ldsh(src, dst) : lduh(src, dst); break;
-  case  1:  is_signed ? ldsb(src, dst) : ldub(src, dst); break;
-  default:  ShouldNotReachHere();
-  }
-}
-
-void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) {
-  switch (size_in_bytes) {
-  case  8:  st_long(src, dst); break;
-  case  4:  st(     src, dst); break;
-  case  2:  sth(    src, dst); break;
-  case  1:  stb(    src, dst); break;
-  default:  ShouldNotReachHere();
-  }
-}
-
-
-void MacroAssembler::float_cmp( bool is_float, int unordered_result,
-                                FloatRegister Fa, FloatRegister Fb,
-                                Register Rresult) {
-
-  fcmp(is_float ? FloatRegisterImpl::S : FloatRegisterImpl::D, fcc0, Fa, Fb);
-
-  Condition lt = unordered_result == -1 ? f_unorderedOrLess    : f_less;
-  Condition eq =                          f_equal;
-  Condition gt = unordered_result ==  1 ? f_unorderedOrGreater : f_greater;
-
-  if (VM_Version::v9_instructions_work()) {
-
-    mov(-1, Rresult);
-    movcc(eq, true, fcc0, 0, Rresult);
-    movcc(gt, true, fcc0, 1, Rresult);
-
-  } else {
-    Label done;
-
-    set( -1, Rresult );
-    //fb(lt, true, pn, done); delayed()->set( -1, Rresult );
-    fb( eq, true, pn, done);  delayed()->set(  0, Rresult );
-    fb( gt, true, pn, done);  delayed()->set(  1, Rresult );
-
-    bind (done);
-  }
-}
-
-
-void MacroAssembler::fneg( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d)
-{
-  if (VM_Version::v9_instructions_work()) {
-    Assembler::fneg(w, s, d);
-  } else {
-    if (w == FloatRegisterImpl::S) {
-      Assembler::fneg(w, s, d);
-    } else if (w == FloatRegisterImpl::D) {
-      // number() does a sanity check on the alignment.
-      assert(((s->encoding(FloatRegisterImpl::D) & 1) == 0) &&
-        ((d->encoding(FloatRegisterImpl::D) & 1) == 0), "float register alignment check");
-
-      Assembler::fneg(FloatRegisterImpl::S, s, d);
-      Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor());
-    } else {
-      assert(w == FloatRegisterImpl::Q, "Invalid float register width");
-
-      // number() does a sanity check on the alignment.
-      assert(((s->encoding(FloatRegisterImpl::D) & 3) == 0) &&
-        ((d->encoding(FloatRegisterImpl::D) & 3) == 0), "float register alignment check");
-
-      Assembler::fneg(FloatRegisterImpl::S, s, d);
-      Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor());
-      Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor(), d->successor()->successor());
-      Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor()->successor(), d->successor()->successor()->successor());
-    }
-  }
-}
-
-void MacroAssembler::fmov( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d)
-{
-  if (VM_Version::v9_instructions_work()) {
-    Assembler::fmov(w, s, d);
-  } else {
-    if (w == FloatRegisterImpl::S) {
-      Assembler::fmov(w, s, d);
-    } else if (w == FloatRegisterImpl::D) {
-      // number() does a sanity check on the alignment.
-      assert(((s->encoding(FloatRegisterImpl::D) & 1) == 0) &&
-        ((d->encoding(FloatRegisterImpl::D) & 1) == 0), "float register alignment check");
-
-      Assembler::fmov(FloatRegisterImpl::S, s, d);
-      Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor());
-    } else {
-      assert(w == FloatRegisterImpl::Q, "Invalid float register width");
-
-      // number() does a sanity check on the alignment.
-      assert(((s->encoding(FloatRegisterImpl::D) & 3) == 0) &&
-        ((d->encoding(FloatRegisterImpl::D) & 3) == 0), "float register alignment check");
-
-      Assembler::fmov(FloatRegisterImpl::S, s, d);
-      Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor());
-      Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor(), d->successor()->successor());
-      Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor()->successor(), d->successor()->successor()->successor());
-    }
-  }
-}
-
-void MacroAssembler::fabs( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d)
-{
-  if (VM_Version::v9_instructions_work()) {
-    Assembler::fabs(w, s, d);
-  } else {
-    if (w == FloatRegisterImpl::S) {
-      Assembler::fabs(w, s, d);
-    } else if (w == FloatRegisterImpl::D) {
-      // number() does a sanity check on the alignment.
-      assert(((s->encoding(FloatRegisterImpl::D) & 1) == 0) &&
-        ((d->encoding(FloatRegisterImpl::D) & 1) == 0), "float register alignment check");
-
-      Assembler::fabs(FloatRegisterImpl::S, s, d);
-      Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor());
-    } else {
-      assert(w == FloatRegisterImpl::Q, "Invalid float register width");
-
-      // number() does a sanity check on the alignment.
-      assert(((s->encoding(FloatRegisterImpl::D) & 3) == 0) &&
-       ((d->encoding(FloatRegisterImpl::D) & 3) == 0), "float register alignment check");
-
-      Assembler::fabs(FloatRegisterImpl::S, s, d);
-      Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor());
-      Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor(), d->successor()->successor());
-      Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor()->successor(), d->successor()->successor()->successor());
-    }
-  }
-}
-
-void MacroAssembler::save_all_globals_into_locals() {
-  mov(G1,L1);
-  mov(G2,L2);
-  mov(G3,L3);
-  mov(G4,L4);
-  mov(G5,L5);
-  mov(G6,L6);
-  mov(G7,L7);
-}
-
-void MacroAssembler::restore_globals_from_locals() {
-  mov(L1,G1);
-  mov(L2,G2);
-  mov(L3,G3);
-  mov(L4,G4);
-  mov(L5,G5);
-  mov(L6,G6);
-  mov(L7,G7);
-}
-
-// Use for 64 bit operation.
-void MacroAssembler::casx_under_lock(Register top_ptr_reg, Register top_reg, Register ptr_reg, address lock_addr, bool use_call_vm)
-{
-  // store ptr_reg as the new top value
-#ifdef _LP64
-  casx(top_ptr_reg, top_reg, ptr_reg);
-#else
-  cas_under_lock(top_ptr_reg, top_reg, ptr_reg, lock_addr, use_call_vm);
-#endif // _LP64
-}
-
-// [RGV] This routine does not handle 64 bit operations.
-//       use casx_under_lock() or casx directly!!!
-void MacroAssembler::cas_under_lock(Register top_ptr_reg, Register top_reg, Register ptr_reg, address lock_addr, bool use_call_vm)
-{
-  // store ptr_reg as the new top value
-  if (VM_Version::v9_instructions_work()) {
-    cas(top_ptr_reg, top_reg, ptr_reg);
-  } else {
-
-    // If the register is not an out nor global, it is not visible
-    // after the save.  Allocate a register for it, save its
-    // value in the register save area (the save may not flush
-    // registers to the save area).
-
-    Register top_ptr_reg_after_save;
-    Register top_reg_after_save;
-    Register ptr_reg_after_save;
-
-    if (top_ptr_reg->is_out() || top_ptr_reg->is_global()) {
-      top_ptr_reg_after_save = top_ptr_reg->after_save();
-    } else {
-      Address reg_save_addr = top_ptr_reg->address_in_saved_window();
-      top_ptr_reg_after_save = L0;
-      st(top_ptr_reg, reg_save_addr);
-    }
-
-    if (top_reg->is_out() || top_reg->is_global()) {
-      top_reg_after_save = top_reg->after_save();
-    } else {
-      Address reg_save_addr = top_reg->address_in_saved_window();
-      top_reg_after_save = L1;
-      st(top_reg, reg_save_addr);
-    }
-
-    if (ptr_reg->is_out() || ptr_reg->is_global()) {
-      ptr_reg_after_save = ptr_reg->after_save();
-    } else {
-      Address reg_save_addr = ptr_reg->address_in_saved_window();
-      ptr_reg_after_save = L2;
-      st(ptr_reg, reg_save_addr);
-    }
-
-    const Register& lock_reg = L3;
-    const Register& lock_ptr_reg = L4;
-    const Register& value_reg = L5;
-    const Register& yield_reg = L6;
-    const Register& yieldall_reg = L7;
-
-    save_frame();
-
-    if (top_ptr_reg_after_save == L0) {
-      ld(top_ptr_reg->address_in_saved_window().after_save(), top_ptr_reg_after_save);
-    }
-
-    if (top_reg_after_save == L1) {
-      ld(top_reg->address_in_saved_window().after_save(), top_reg_after_save);
-    }
-
-    if (ptr_reg_after_save == L2) {
-      ld(ptr_reg->address_in_saved_window().after_save(), ptr_reg_after_save);
-    }
-
-    Label(retry_get_lock);
-    Label(not_same);
-    Label(dont_yield);
-
-    assert(lock_addr, "lock_address should be non null for v8");
-    set((intptr_t)lock_addr, lock_ptr_reg);
-    // Initialize yield counter
-    mov(G0,yield_reg);
-    mov(G0, yieldall_reg);
-    set(StubRoutines::Sparc::locked, lock_reg);
-
-    bind(retry_get_lock);
-    cmp_and_br_short(yield_reg, V8AtomicOperationUnderLockSpinCount, Assembler::less, Assembler::pt, dont_yield);
-
-    if(use_call_vm) {
-      Untested("Need to verify global reg consistancy");
-      call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::yield_all), yieldall_reg);
-    } else {
-      // Save the regs and make space for a C call
-      save(SP, -96, SP);
-      save_all_globals_into_locals();
-      call(CAST_FROM_FN_PTR(address,os::yield_all));
-      delayed()->mov(yieldall_reg, O0);
-      restore_globals_from_locals();
-      restore();
-    }
-
-    // reset the counter
-    mov(G0,yield_reg);
-    add(yieldall_reg, 1, yieldall_reg);
-
-    bind(dont_yield);
-    // try to get lock
-    swap(lock_ptr_reg, 0, lock_reg);
-
-    // did we get the lock?
-    cmp(lock_reg, StubRoutines::Sparc::unlocked);
-    br(Assembler::notEqual, true, Assembler::pn, retry_get_lock);
-    delayed()->add(yield_reg,1,yield_reg);
-
-    // yes, got lock.  do we have the same top?
-    ld(top_ptr_reg_after_save, 0, value_reg);
-    cmp_and_br_short(value_reg, top_reg_after_save, Assembler::notEqual, Assembler::pn, not_same);
-
-    // yes, same top.
-    st(ptr_reg_after_save, top_ptr_reg_after_save, 0);
-    membar(Assembler::StoreStore);
-
-    bind(not_same);
-    mov(value_reg, ptr_reg_after_save);
-    st(lock_reg, lock_ptr_reg, 0); // unlock
-
-    restore();
-  }
-}
-
-RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
-                                                      Register tmp,
-                                                      int offset) {
-  intptr_t value = *delayed_value_addr;
-  if (value != 0)
-    return RegisterOrConstant(value + offset);
-
-  // load indirectly to solve generation ordering problem
-  AddressLiteral a(delayed_value_addr);
-  load_ptr_contents(a, tmp);
-
-#ifdef ASSERT
-  tst(tmp);
-  breakpoint_trap(zero, xcc);
-#endif
-
-  if (offset != 0)
-    add(tmp, offset, tmp);
-
-  return RegisterOrConstant(tmp);
-}
-
-
-RegisterOrConstant MacroAssembler::regcon_andn_ptr(RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp) {
-  assert(d.register_or_noreg() != G0, "lost side effect");
-  if ((s2.is_constant() && s2.as_constant() == 0) ||
-      (s2.is_register() && s2.as_register() == G0)) {
-    // Do nothing, just move value.
-    if (s1.is_register()) {
-      if (d.is_constant())  d = temp;
-      mov(s1.as_register(), d.as_register());
-      return d;
-    } else {
-      return s1;
-    }
-  }
-
-  if (s1.is_register()) {
-    assert_different_registers(s1.as_register(), temp);
-    if (d.is_constant())  d = temp;
-    andn(s1.as_register(), ensure_simm13_or_reg(s2, temp), d.as_register());
-    return d;
-  } else {
-    if (s2.is_register()) {
-      assert_different_registers(s2.as_register(), temp);
-      if (d.is_constant())  d = temp;
-      set(s1.as_constant(), temp);
-      andn(temp, s2.as_register(), d.as_register());
-      return d;
-    } else {
-      intptr_t res = s1.as_constant() & ~s2.as_constant();
-      return res;
-    }
-  }
-}
-
-RegisterOrConstant MacroAssembler::regcon_inc_ptr(RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp) {
-  assert(d.register_or_noreg() != G0, "lost side effect");
-  if ((s2.is_constant() && s2.as_constant() == 0) ||
-      (s2.is_register() && s2.as_register() == G0)) {
-    // Do nothing, just move value.
-    if (s1.is_register()) {
-      if (d.is_constant())  d = temp;
-      mov(s1.as_register(), d.as_register());
-      return d;
-    } else {
-      return s1;
-    }
-  }
-
-  if (s1.is_register()) {
-    assert_different_registers(s1.as_register(), temp);
-    if (d.is_constant())  d = temp;
-    add(s1.as_register(), ensure_simm13_or_reg(s2, temp), d.as_register());
-    return d;
-  } else {
-    if (s2.is_register()) {
-      assert_different_registers(s2.as_register(), temp);
-      if (d.is_constant())  d = temp;
-      add(s2.as_register(), ensure_simm13_or_reg(s1, temp), d.as_register());
-      return d;
-    } else {
-      intptr_t res = s1.as_constant() + s2.as_constant();
-      return res;
-    }
-  }
-}
-
-RegisterOrConstant MacroAssembler::regcon_sll_ptr(RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp) {
-  assert(d.register_or_noreg() != G0, "lost side effect");
-  if (!is_simm13(s2.constant_or_zero()))
-    s2 = (s2.as_constant() & 0xFF);
-  if ((s2.is_constant() && s2.as_constant() == 0) ||
-      (s2.is_register() && s2.as_register() == G0)) {
-    // Do nothing, just move value.
-    if (s1.is_register()) {
-      if (d.is_constant())  d = temp;
-      mov(s1.as_register(), d.as_register());
-      return d;
-    } else {
-      return s1;
-    }
-  }
-
-  if (s1.is_register()) {
-    assert_different_registers(s1.as_register(), temp);
-    if (d.is_constant())  d = temp;
-    sll_ptr(s1.as_register(), ensure_simm13_or_reg(s2, temp), d.as_register());
-    return d;
-  } else {
-    if (s2.is_register()) {
-      assert_different_registers(s2.as_register(), temp);
-      if (d.is_constant())  d = temp;
-      set(s1.as_constant(), temp);
-      sll_ptr(temp, s2.as_register(), d.as_register());
-      return d;
-    } else {
-      intptr_t res = s1.as_constant() << s2.as_constant();
-      return res;
-    }
-  }
-}
-
-
-// Look up the method for a megamorphic invokeinterface call.
-// The target method is determined by <intf_klass, itable_index>.
-// The receiver klass is in recv_klass.
-// On success, the result will be in method_result, and execution falls through.
-// On failure, execution transfers to the given label.
-void MacroAssembler::lookup_interface_method(Register recv_klass,
-                                             Register intf_klass,
-                                             RegisterOrConstant itable_index,
-                                             Register method_result,
-                                             Register scan_temp,
-                                             Register sethi_temp,
-                                             Label& L_no_such_interface) {
-  assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
-  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
-         "caller must use same register for non-constant itable index as for method");
-
-  Label L_no_such_interface_restore;
-  bool did_save = false;
-  if (scan_temp == noreg || sethi_temp == noreg) {
-    Register recv_2 = recv_klass->is_global() ? recv_klass : L0;
-    Register intf_2 = intf_klass->is_global() ? intf_klass : L1;
-    assert(method_result->is_global(), "must be able to return value");
-    scan_temp  = L2;
-    sethi_temp = L3;
-    save_frame_and_mov(0, recv_klass, recv_2, intf_klass, intf_2);
-    recv_klass = recv_2;
-    intf_klass = intf_2;
-    did_save = true;
-  }
-
-  // Compute start of first itableOffsetEntry (which is at the end of the vtable)
-  int vtable_base = InstanceKlass::vtable_start_offset() * wordSize;
-  int scan_step   = itableOffsetEntry::size() * wordSize;
-  int vte_size    = vtableEntry::size() * wordSize;
-
-  lduw(recv_klass, InstanceKlass::vtable_length_offset() * wordSize, scan_temp);
-  // %%% We should store the aligned, prescaled offset in the klassoop.
-  // Then the next several instructions would fold away.
-
-  int round_to_unit = ((HeapWordsPerLong > 1) ? BytesPerLong : 0);
-  int itb_offset = vtable_base;
-  if (round_to_unit != 0) {
-    // hoist first instruction of round_to(scan_temp, BytesPerLong):
-    itb_offset += round_to_unit - wordSize;
-  }
-  int itb_scale = exact_log2(vtableEntry::size() * wordSize);
-  sll(scan_temp, itb_scale,  scan_temp);
-  add(scan_temp, itb_offset, scan_temp);
-  if (round_to_unit != 0) {
-    // Round up to align_object_offset boundary
-    // see code for InstanceKlass::start_of_itable!
-    // Was: round_to(scan_temp, BytesPerLong);
-    // Hoisted: add(scan_temp, BytesPerLong-1, scan_temp);
-    and3(scan_temp, -round_to_unit, scan_temp);
-  }
-  add(recv_klass, scan_temp, scan_temp);
-
-  // Adjust recv_klass by scaled itable_index, so we can free itable_index.
-  RegisterOrConstant itable_offset = itable_index;
-  itable_offset = regcon_sll_ptr(itable_index, exact_log2(itableMethodEntry::size() * wordSize), itable_offset);
-  itable_offset = regcon_inc_ptr(itable_offset, itableMethodEntry::method_offset_in_bytes(), itable_offset);
-  add(recv_klass, ensure_simm13_or_reg(itable_offset, sethi_temp), recv_klass);
-
-  // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
-  //   if (scan->interface() == intf) {
-  //     result = (klass + scan->offset() + itable_index);
-  //   }
-  // }
-  Label L_search, L_found_method;
-
-  for (int peel = 1; peel >= 0; peel--) {
-    // %%%% Could load both offset and interface in one ldx, if they were
-    // in the opposite order.  This would save a load.
-    ld_ptr(scan_temp, itableOffsetEntry::interface_offset_in_bytes(), method_result);
-
-    // Check that this entry is non-null.  A null entry means that
-    // the receiver class doesn't implement the interface, and wasn't the
-    // same as when the caller was compiled.
-    bpr(Assembler::rc_z, false, Assembler::pn, method_result, did_save ? L_no_such_interface_restore : L_no_such_interface);
-    delayed()->cmp(method_result, intf_klass);
-
-    if (peel) {
-      brx(Assembler::equal,    false, Assembler::pt, L_found_method);
-    } else {
-      brx(Assembler::notEqual, false, Assembler::pn, L_search);
-      // (invert the test to fall through to found_method...)
-    }
-    delayed()->add(scan_temp, scan_step, scan_temp);
-
-    if (!peel)  break;
-
-    bind(L_search);
-  }
-
-  bind(L_found_method);
-
-  // Got a hit.
-  int ito_offset = itableOffsetEntry::offset_offset_in_bytes();
-  // scan_temp[-scan_step] points to the vtable offset we need
-  ito_offset -= scan_step;
-  lduw(scan_temp, ito_offset, scan_temp);
-  ld_ptr(recv_klass, scan_temp, method_result);
-
-  if (did_save) {
-    Label L_done;
-    ba(L_done);
-    delayed()->restore();
-
-    bind(L_no_such_interface_restore);
-    ba(L_no_such_interface);
-    delayed()->restore();
-
-    bind(L_done);
-  }
-}
-
-
-// virtual method calling
-void MacroAssembler::lookup_virtual_method(Register recv_klass,
-                                           RegisterOrConstant vtable_index,
-                                           Register method_result) {
-  assert_different_registers(recv_klass, method_result, vtable_index.register_or_noreg());
-  Register sethi_temp = method_result;
-  const int base = (InstanceKlass::vtable_start_offset() * wordSize +
-                    // method pointer offset within the vtable entry:
-                    vtableEntry::method_offset_in_bytes());
-  RegisterOrConstant vtable_offset = vtable_index;
-  // Each of the following three lines potentially generates an instruction.
-  // But the total number of address formation instructions will always be
-  // at most two, and will often be zero.  In any case, it will be optimal.
-  // If vtable_index is a register, we will have (sll_ptr N,x; inc_ptr B,x; ld_ptr k,x).
-  // If vtable_index is a constant, we will have at most (set B+X<<N,t; ld_ptr k,t).
-  vtable_offset = regcon_sll_ptr(vtable_index, exact_log2(vtableEntry::size() * wordSize), vtable_offset);
-  vtable_offset = regcon_inc_ptr(vtable_offset, base, vtable_offset, sethi_temp);
-  Address vtable_entry_addr(recv_klass, ensure_simm13_or_reg(vtable_offset, sethi_temp));
-  ld_ptr(vtable_entry_addr, method_result);
-}
-
-
-void MacroAssembler::check_klass_subtype(Register sub_klass,
-                                         Register super_klass,
-                                         Register temp_reg,
-                                         Register temp2_reg,
-                                         Label& L_success) {
-  Register sub_2 = sub_klass;
-  Register sup_2 = super_klass;
-  if (!sub_2->is_global())  sub_2 = L0;
-  if (!sup_2->is_global())  sup_2 = L1;
-  bool did_save = false;
-  if (temp_reg == noreg || temp2_reg == noreg) {
-    temp_reg = L2;
-    temp2_reg = L3;
-    save_frame_and_mov(0, sub_klass, sub_2, super_klass, sup_2);
-    sub_klass = sub_2;
-    super_klass = sup_2;
-    did_save = true;
-  }
-  Label L_failure, L_pop_to_failure, L_pop_to_success;
-  check_klass_subtype_fast_path(sub_klass, super_klass,
-                                temp_reg, temp2_reg,
-                                (did_save ? &L_pop_to_success : &L_success),
-                                (did_save ? &L_pop_to_failure : &L_failure), NULL);
-
-  if (!did_save)
-    save_frame_and_mov(0, sub_klass, sub_2, super_klass, sup_2);
-  check_klass_subtype_slow_path(sub_2, sup_2,
-                                L2, L3, L4, L5,
-                                NULL, &L_pop_to_failure);
-
-  // on success:
-  bind(L_pop_to_success);
-  restore();
-  ba_short(L_success);
-
-  // on failure:
-  bind(L_pop_to_failure);
-  restore();
-  bind(L_failure);
-}
-
-
-void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
-                                                   Register super_klass,
-                                                   Register temp_reg,
-                                                   Register temp2_reg,
-                                                   Label* L_success,
-                                                   Label* L_failure,
-                                                   Label* L_slow_path,
-                                        RegisterOrConstant super_check_offset) {
-  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
-  int sco_offset = in_bytes(Klass::super_check_offset_offset());
-
-  bool must_load_sco  = (super_check_offset.constant_or_zero() == -1);
-  bool need_slow_path = (must_load_sco ||
-                         super_check_offset.constant_or_zero() == sco_offset);
-
-  assert_different_registers(sub_klass, super_klass, temp_reg);
-  if (super_check_offset.is_register()) {
-    assert_different_registers(sub_klass, super_klass, temp_reg,
-                               super_check_offset.as_register());
-  } else if (must_load_sco) {
-    assert(temp2_reg != noreg, "supply either a temp or a register offset");
-  }
-
-  Label L_fallthrough;
-  int label_nulls = 0;
-  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
-  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
-  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
-  assert(label_nulls <= 1 ||
-         (L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path),
-         "at most one NULL in the batch, usually");
-
-  // If the pointers are equal, we are done (e.g., String[] elements).
-  // This self-check enables sharing of secondary supertype arrays among
-  // non-primary types such as array-of-interface.  Otherwise, each such
-  // type would need its own customized SSA.
-  // We move this check to the front of the fast path because many
-  // type checks are in fact trivially successful in this manner,
-  // so we get a nicely predicted branch right at the start of the check.
-  cmp(super_klass, sub_klass);
-  brx(Assembler::equal, false, Assembler::pn, *L_success);
-  delayed()->nop();
-
-  // Check the supertype display:
-  if (must_load_sco) {
-    // The super check offset is always positive...
-    lduw(super_klass, sco_offset, temp2_reg);
-    super_check_offset = RegisterOrConstant(temp2_reg);
-    // super_check_offset is register.
-    assert_different_registers(sub_klass, super_klass, temp_reg, super_check_offset.as_register());
-  }
-  ld_ptr(sub_klass, super_check_offset, temp_reg);
-  cmp(super_klass, temp_reg);
-
-  // This check has worked decisively for primary supers.
-  // Secondary supers are sought in the super_cache ('super_cache_addr').
-  // (Secondary supers are interfaces and very deeply nested subtypes.)
-  // This works in the same check above because of a tricky aliasing
-  // between the super_cache and the primary super display elements.
-  // (The 'super_check_addr' can address either, as the case requires.)
-  // Note that the cache is updated below if it does not help us find
-  // what we need immediately.
-  // So if it was a primary super, we can just fail immediately.
-  // Otherwise, it's the slow path for us (no success at this point).
-
-  // Hacked ba(), which may only be used just before L_fallthrough.
-#define FINAL_JUMP(label)            \
-  if (&(label) != &L_fallthrough) {  \
-    ba(label);  delayed()->nop();    \
-  }
-
-  if (super_check_offset.is_register()) {
-    brx(Assembler::equal, false, Assembler::pn, *L_success);
-    delayed()->cmp(super_check_offset.as_register(), sc_offset);
-
-    if (L_failure == &L_fallthrough) {
-      brx(Assembler::equal, false, Assembler::pt, *L_slow_path);
-      delayed()->nop();
-    } else {
-      brx(Assembler::notEqual, false, Assembler::pn, *L_failure);
-      delayed()->nop();
-      FINAL_JUMP(*L_slow_path);
-    }
-  } else if (super_check_offset.as_constant() == sc_offset) {
-    // Need a slow path; fast failure is impossible.
-    if (L_slow_path == &L_fallthrough) {
-      brx(Assembler::equal, false, Assembler::pt, *L_success);
-      delayed()->nop();
-    } else {
-      brx(Assembler::notEqual, false, Assembler::pn, *L_slow_path);
-      delayed()->nop();
-      FINAL_JUMP(*L_success);
-    }
-  } else {
-    // No slow path; it's a fast decision.
-    if (L_failure == &L_fallthrough) {
-      brx(Assembler::equal, false, Assembler::pt, *L_success);
-      delayed()->nop();
-    } else {
-      brx(Assembler::notEqual, false, Assembler::pn, *L_failure);
-      delayed()->nop();
-      FINAL_JUMP(*L_success);
-    }
-  }
-
-  bind(L_fallthrough);
-
-#undef FINAL_JUMP
-}
-
-
-void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
-                                                   Register super_klass,
-                                                   Register count_temp,
-                                                   Register scan_temp,
-                                                   Register scratch_reg,
-                                                   Register coop_reg,
-                                                   Label* L_success,
-                                                   Label* L_failure) {
-  assert_different_registers(sub_klass, super_klass,
-                             count_temp, scan_temp, scratch_reg, coop_reg);
-
-  Label L_fallthrough, L_loop;
-  int label_nulls = 0;
-  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
-  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
-  assert(label_nulls <= 1, "at most one NULL in the batch");
-
-  // a couple of useful fields in sub_klass:
-  int ss_offset = in_bytes(Klass::secondary_supers_offset());
-  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
-
-  // Do a linear scan of the secondary super-klass chain.
-  // This code is rarely used, so simplicity is a virtue here.
-
-#ifndef PRODUCT
-  int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
-  inc_counter((address) pst_counter, count_temp, scan_temp);
-#endif
-
-  // We will consult the secondary-super array.
-  ld_ptr(sub_klass, ss_offset, scan_temp);
-
-  Register search_key = super_klass;
-
-  // Load the array length.  (Positive movl does right thing on LP64.)
-  lduw(scan_temp, Array<Klass*>::length_offset_in_bytes(), count_temp);
-
-  // Check for empty secondary super list
-  tst(count_temp);
-
-  // In the array of super classes elements are pointer sized.
-  int element_size = wordSize;
-
-  // Top of search loop
-  bind(L_loop);
-  br(Assembler::equal, false, Assembler::pn, *L_failure);
-  delayed()->add(scan_temp, element_size, scan_temp);
-
-  // Skip the array header in all array accesses.
-  int elem_offset = Array<Klass*>::base_offset_in_bytes();
-  elem_offset -= element_size;   // the scan pointer was pre-incremented also
-
-  // Load next super to check
-    ld_ptr( scan_temp, elem_offset, scratch_reg );
-
-  // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
-  cmp(scratch_reg, search_key);
-
-  // A miss means we are NOT a subtype and need to keep looping
-  brx(Assembler::notEqual, false, Assembler::pn, L_loop);
-  delayed()->deccc(count_temp); // decrement trip counter in delay slot
-
-  // Success.  Cache the super we found and proceed in triumph.
-  st_ptr(super_klass, sub_klass, sc_offset);
-
-  if (L_success != &L_fallthrough) {
-    ba(*L_success);
-    delayed()->nop();
-  }
-
-  bind(L_fallthrough);
-}
-
-
-RegisterOrConstant MacroAssembler::argument_offset(RegisterOrConstant arg_slot,
-                                                   Register temp_reg,
-                                                   int extra_slot_offset) {
-  // cf. TemplateTable::prepare_invoke(), if (load_receiver).
-  int stackElementSize = Interpreter::stackElementSize;
-  int offset = extra_slot_offset * stackElementSize;
-  if (arg_slot.is_constant()) {
-    offset += arg_slot.as_constant() * stackElementSize;
-    return offset;
-  } else {
-    assert(temp_reg != noreg, "must specify");
-    sll_ptr(arg_slot.as_register(), exact_log2(stackElementSize), temp_reg);
-    if (offset != 0)
-      add(temp_reg, offset, temp_reg);
-    return temp_reg;
-  }
-}
-
-
-Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
-                                         Register temp_reg,
-                                         int extra_slot_offset) {
-  return Address(Gargs, argument_offset(arg_slot, temp_reg, extra_slot_offset));
-}
-
-
-void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg,
-                                          Register temp_reg,
-                                          Label& done, Label* slow_case,
-                                          BiasedLockingCounters* counters) {
-  assert(UseBiasedLocking, "why call this otherwise?");
-
-  if (PrintBiasedLockingStatistics) {
-    assert_different_registers(obj_reg, mark_reg, temp_reg, O7);
-    if (counters == NULL)
-      counters = BiasedLocking::counters();
-  }
-
-  Label cas_label;
-
-  // Biased locking
-  // See whether the lock is currently biased toward our thread and
-  // whether the epoch is still valid
-  // Note that the runtime guarantees sufficient alignment of JavaThread
-  // pointers to allow age to be placed into low bits
-  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
-  and3(mark_reg, markOopDesc::biased_lock_mask_in_place, temp_reg);
-  cmp_and_brx_short(temp_reg, markOopDesc::biased_lock_pattern, Assembler::notEqual, Assembler::pn, cas_label);
-
-  load_klass(obj_reg, temp_reg);
-  ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg);
-  or3(G2_thread, temp_reg, temp_reg);
-  xor3(mark_reg, temp_reg, temp_reg);
-  andcc(temp_reg, ~((int) markOopDesc::age_mask_in_place), temp_reg);
-  if (counters != NULL) {
-    cond_inc(Assembler::equal, (address) counters->biased_lock_entry_count_addr(), mark_reg, temp_reg);
-    // Reload mark_reg as we may need it later
-    ld_ptr(Address(obj_reg, oopDesc::mark_offset_in_bytes()), mark_reg);
-  }
-  brx(Assembler::equal, true, Assembler::pt, done);
-  delayed()->nop();
-
-  Label try_revoke_bias;
-  Label try_rebias;
-  Address mark_addr = Address(obj_reg, oopDesc::mark_offset_in_bytes());
-  assert(mark_addr.disp() == 0, "cas must take a zero displacement");
-
-  // At this point we know that the header has the bias pattern and
-  // that we are not the bias owner in the current epoch. We need to
-  // figure out more details about the state of the header in order to
-  // know what operations can be legally performed on the object's
-  // header.
-
-  // If the low three bits in the xor result aren't clear, that means
-  // the prototype header is no longer biased and we have to revoke
-  // the bias on this object.
-  btst(markOopDesc::biased_lock_mask_in_place, temp_reg);
-  brx(Assembler::notZero, false, Assembler::pn, try_revoke_bias);
-
-  // Biasing is still enabled for this data type. See whether the
-  // epoch of the current bias is still valid, meaning that the epoch
-  // bits of the mark word are equal to the epoch bits of the
-  // prototype header. (Note that the prototype header's epoch bits
-  // only change at a safepoint.) If not, attempt to rebias the object
-  // toward the current thread. Note that we must be absolutely sure
-  // that the current epoch is invalid in order to do this because
-  // otherwise the manipulations it performs on the mark word are
-  // illegal.
-  delayed()->btst(markOopDesc::epoch_mask_in_place, temp_reg);
-  brx(Assembler::notZero, false, Assembler::pn, try_rebias);
-
-  // The epoch of the current bias is still valid but we know nothing
-  // about the owner; it might be set or it might be clear. Try to
-  // acquire the bias of the object using an atomic operation. If this
-  // fails we will go in to the runtime to revoke the object's bias.
-  // Note that we first construct the presumed unbiased header so we
-  // don't accidentally blow away another thread's valid bias.
-  delayed()->and3(mark_reg,
-                  markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place,
-                  mark_reg);
-  or3(G2_thread, mark_reg, temp_reg);
-  casn(mark_addr.base(), mark_reg, temp_reg);
-  // If the biasing toward our thread failed, this means that
-  // another thread succeeded in biasing it toward itself and we
-  // need to revoke that bias. The revocation will occur in the
-  // interpreter runtime in the slow case.
-  cmp(mark_reg, temp_reg);
-  if (counters != NULL) {
-    cond_inc(Assembler::zero, (address) counters->anonymously_biased_lock_entry_count_addr(), mark_reg, temp_reg);
-  }
-  if (slow_case != NULL) {
-    brx(Assembler::notEqual, true, Assembler::pn, *slow_case);
-    delayed()->nop();
-  }
-  ba_short(done);
-
-  bind(try_rebias);
-  // At this point we know the epoch has expired, meaning that the
-  // current "bias owner", if any, is actually invalid. Under these
-  // circumstances _only_, we are allowed to use the current header's
-  // value as the comparison value when doing the cas to acquire the
-  // bias in the current epoch. In other words, we allow transfer of
-  // the bias from one thread to another directly in this situation.
-  //
-  // FIXME: due to a lack of registers we currently blow away the age
-  // bits in this situation. Should attempt to preserve them.
-  load_klass(obj_reg, temp_reg);
-  ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg);
-  or3(G2_thread, temp_reg, temp_reg);
-  casn(mark_addr.base(), mark_reg, temp_reg);
-  // If the biasing toward our thread failed, this means that
-  // another thread succeeded in biasing it toward itself and we
-  // need to revoke that bias. The revocation will occur in the
-  // interpreter runtime in the slow case.
-  cmp(mark_reg, temp_reg);
-  if (counters != NULL) {
-    cond_inc(Assembler::zero, (address) counters->rebiased_lock_entry_count_addr(), mark_reg, temp_reg);
-  }
-  if (slow_case != NULL) {
-    brx(Assembler::notEqual, true, Assembler::pn, *slow_case);
-    delayed()->nop();
-  }
-  ba_short(done);
-
-  bind(try_revoke_bias);
-  // The prototype mark in the klass doesn't have the bias bit set any
-  // more, indicating that objects of this data type are not supposed
-  // to be biased any more. We are going to try to reset the mark of
-  // this object to the prototype value and fall through to the
-  // CAS-based locking scheme. Note that if our CAS fails, it means
-  // that another thread raced us for the privilege of revoking the
-  // bias of this particular object, so it's okay to continue in the
-  // normal locking code.
-  //
-  // FIXME: due to a lack of registers we currently blow away the age
-  // bits in this situation. Should attempt to preserve them.
-  load_klass(obj_reg, temp_reg);
-  ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg);
-  casn(mark_addr.base(), mark_reg, temp_reg);
-  // Fall through to the normal CAS-based lock, because no matter what
-  // the result of the above CAS, some thread must have succeeded in
-  // removing the bias bit from the object's header.
-  if (counters != NULL) {
-    cmp(mark_reg, temp_reg);
-    cond_inc(Assembler::zero, (address) counters->revoked_lock_entry_count_addr(), mark_reg, temp_reg);
-  }
-
-  bind(cas_label);
-}
-
-void MacroAssembler::biased_locking_exit (Address mark_addr, Register temp_reg, Label& done,
-                                          bool allow_delay_slot_filling) {
-  // Check for biased locking unlock case, which is a no-op
-  // Note: we do not have to check the thread ID for two reasons.
-  // First, the interpreter checks for IllegalMonitorStateException at
-  // a higher level. Second, if the bias was revoked while we held the
-  // lock, the object could not be rebiased toward another thread, so
-  // the bias bit would be clear.
-  ld_ptr(mark_addr, temp_reg);
-  and3(temp_reg, markOopDesc::biased_lock_mask_in_place, temp_reg);
-  cmp(temp_reg, markOopDesc::biased_lock_pattern);
-  brx(Assembler::equal, allow_delay_slot_filling, Assembler::pt, done);
-  delayed();
-  if (!allow_delay_slot_filling) {
-    nop();
-  }
-}
-
-
-// CASN -- 32-64 bit switch hitter similar to the synthetic CASN provided by
-// Solaris/SPARC's "as".  Another apt name would be cas_ptr()
-
-void MacroAssembler::casn (Register addr_reg, Register cmp_reg, Register set_reg ) {
-  casx_under_lock (addr_reg, cmp_reg, set_reg, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
-}
-
-
-
-// compiler_lock_object() and compiler_unlock_object() are direct transliterations
-// of i486.ad fast_lock() and fast_unlock().  See those methods for detailed comments.
-// The code could be tightened up considerably.
-//
-// box->dhw disposition - post-conditions at DONE_LABEL.
-// -   Successful inflated lock:  box->dhw != 0.
-//     Any non-zero value suffices.
-//     Consider G2_thread, rsp, boxReg, or unused_mark()
-// -   Successful Stack-lock: box->dhw == mark.
-//     box->dhw must contain the displaced mark word value
-// -   Failure -- icc.ZFlag == 0 and box->dhw is undefined.
-//     The slow-path fast_enter() and slow_enter() operators
-//     are responsible for setting box->dhw = NonZero (typically ::unused_mark).
-// -   Biased: box->dhw is undefined
-//
-// SPARC refworkload performance - specifically jetstream and scimark - are
-// extremely sensitive to the size of the code emitted by compiler_lock_object
-// and compiler_unlock_object.  Critically, the key factor is code size, not path
-// length.  (Simply experiments to pad CLO with unexecuted NOPs demonstrte the
-// effect).
-
-
-void MacroAssembler::compiler_lock_object(Register Roop, Register Rmark,
-                                          Register Rbox, Register Rscratch,
-                                          BiasedLockingCounters* counters,
-                                          bool try_bias) {
-   Address mark_addr(Roop, oopDesc::mark_offset_in_bytes());
-
-   verify_oop(Roop);
-   Label done ;
-
-   if (counters != NULL) {
-     inc_counter((address) counters->total_entry_count_addr(), Rmark, Rscratch);
-   }
-
-   if (EmitSync & 1) {
-     mov(3, Rscratch);
-     st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
-     cmp(SP, G0);
-     return ;
-   }
-
-   if (EmitSync & 2) {
-
-     // Fetch object's markword
-     ld_ptr(mark_addr, Rmark);
-
-     if (try_bias) {
-        biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
-     }
-
-     // Save Rbox in Rscratch to be used for the cas operation
-     mov(Rbox, Rscratch);
-
-     // set Rmark to markOop | markOopDesc::unlocked_value
-     or3(Rmark, markOopDesc::unlocked_value, Rmark);
-
-     // Initialize the box.  (Must happen before we update the object mark!)
-     st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
-
-     // compare object markOop with Rmark and if equal exchange Rscratch with object markOop
-     assert(mark_addr.disp() == 0, "cas must take a zero displacement");
-     casx_under_lock(mark_addr.base(), Rmark, Rscratch,
-        (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
-
-     // if compare/exchange succeeded we found an unlocked object and we now have locked it
-     // hence we are done
-     cmp(Rmark, Rscratch);
-#ifdef _LP64
-     sub(Rscratch, STACK_BIAS, Rscratch);
-#endif
-     brx(Assembler::equal, false, Assembler::pt, done);
-     delayed()->sub(Rscratch, SP, Rscratch);  //pull next instruction into delay slot
-
-     // we did not find an unlocked object so see if this is a recursive case
-     // sub(Rscratch, SP, Rscratch);
-     assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
-     andcc(Rscratch, 0xfffff003, Rscratch);
-     st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
-     bind (done);
-     return ;
-   }
-
-   Label Egress ;
-
-   if (EmitSync & 256) {
-      Label IsInflated ;
-
-      ld_ptr(mark_addr, Rmark);           // fetch obj->mark
-      // Triage: biased, stack-locked, neutral, inflated
-      if (try_bias) {
-        biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
-        // Invariant: if control reaches this point in the emitted stream
-        // then Rmark has not been modified.
-      }
-
-      // Store mark into displaced mark field in the on-stack basic-lock "box"
-      // Critically, this must happen before the CAS
-      // Maximize the ST-CAS distance to minimize the ST-before-CAS penalty.
-      st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
-      andcc(Rmark, 2, G0);
-      brx(Assembler::notZero, false, Assembler::pn, IsInflated);
-      delayed()->
-
-      // Try stack-lock acquisition.
-      // Beware: the 1st instruction is in a delay slot
-      mov(Rbox,  Rscratch);
-      or3(Rmark, markOopDesc::unlocked_value, Rmark);
-      assert(mark_addr.disp() == 0, "cas must take a zero displacement");
-      casn(mark_addr.base(), Rmark, Rscratch);
-      cmp(Rmark, Rscratch);
-      brx(Assembler::equal, false, Assembler::pt, done);
-      delayed()->sub(Rscratch, SP, Rscratch);
-
-      // Stack-lock attempt failed - check for recursive stack-lock.
-      // See the comments below about how we might remove this case.
-#ifdef _LP64
-      sub(Rscratch, STACK_BIAS, Rscratch);
-#endif
-      assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
-      andcc(Rscratch, 0xfffff003, Rscratch);
-      br(Assembler::always, false, Assembler::pt, done);
-      delayed()-> st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
-
-      bind(IsInflated);
-      if (EmitSync & 64) {
-         // If m->owner != null goto IsLocked
-         // Pessimistic form: Test-and-CAS vs CAS
-         // The optimistic form avoids RTS->RTO cache line upgrades.
-         ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
-         andcc(Rscratch, Rscratch, G0);
-         brx(Assembler::notZero, false, Assembler::pn, done);
-         delayed()->nop();
-         // m->owner == null : it's unlocked.
-      }
-
-      // Try to CAS m->owner from null to Self
-      // Invariant: if we acquire the lock then _recursions should be 0.
-      add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark);
-      mov(G2_thread, Rscratch);
-      casn(Rmark, G0, Rscratch);
-      cmp(Rscratch, G0);
-      // Intentional fall-through into done
-   } else {
-      // Aggressively avoid the Store-before-CAS penalty
-      // Defer the store into box->dhw until after the CAS
-      Label IsInflated, Recursive ;
-
-// Anticipate CAS -- Avoid RTS->RTO upgrade
-// prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
-
-      ld_ptr(mark_addr, Rmark);           // fetch obj->mark
-      // Triage: biased, stack-locked, neutral, inflated
-
-      if (try_bias) {
-        biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
-        // Invariant: if control reaches this point in the emitted stream
-        // then Rmark has not been modified.
-      }
-      andcc(Rmark, 2, G0);
-      brx(Assembler::notZero, false, Assembler::pn, IsInflated);
-      delayed()->                         // Beware - dangling delay-slot
-
-      // Try stack-lock acquisition.
-      // Transiently install BUSY (0) encoding in the mark word.
-      // if the CAS of 0 into the mark was successful then we execute:
-      //   ST box->dhw  = mark   -- save fetched mark in on-stack basiclock box
-      //   ST obj->mark = box    -- overwrite transient 0 value
-      // This presumes TSO, of course.
-
-      mov(0, Rscratch);
-      or3(Rmark, markOopDesc::unlocked_value, Rmark);
-      assert(mark_addr.disp() == 0, "cas must take a zero displacement");
-      casn(mark_addr.base(), Rmark, Rscratch);
-// prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
-      cmp(Rscratch, Rmark);
-      brx(Assembler::notZero, false, Assembler::pn, Recursive);
-      delayed()->st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
-      if (counters != NULL) {
-        cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
-      }
-      ba(done);
-      delayed()->st_ptr(Rbox, mark_addr);
-
-      bind(Recursive);
-      // Stack-lock attempt failed - check for recursive stack-lock.
-      // Tests show that we can remove the recursive case with no impact
-      // on refworkload 0.83.  If we need to reduce the size of the code
-      // emitted by compiler_lock_object() the recursive case is perfect
-      // candidate.
-      //
-      // A more extreme idea is to always inflate on stack-lock recursion.
-      // This lets us eliminate the recursive checks in compiler_lock_object
-      // and compiler_unlock_object and the (box->dhw == 0) encoding.
-      // A brief experiment - requiring changes to synchronizer.cpp, interpreter,
-      // and showed a performance *increase*.  In the same experiment I eliminated
-      // the fast-path stack-lock code from the interpreter and always passed
-      // control to the "slow" operators in synchronizer.cpp.
-
-      // RScratch contains the fetched obj->mark value from the failed CASN.
-#ifdef _LP64
-      sub(Rscratch, STACK_BIAS, Rscratch);
-#endif
-      sub(Rscratch, SP, Rscratch);
-      assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
-      andcc(Rscratch, 0xfffff003, Rscratch);
-      if (counters != NULL) {
-        // Accounting needs the Rscratch register
-        st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
-        cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
-        ba_short(done);
-      } else {
-        ba(done);
-        delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
-      }
-
-      bind   (IsInflated);
-      if (EmitSync & 64) {
-         // If m->owner != null goto IsLocked
-         // Test-and-CAS vs CAS
-         // Pessimistic form avoids futile (doomed) CAS attempts
-         // The optimistic form avoids RTS->RTO cache line upgrades.
-         ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
-         andcc(Rscratch, Rscratch, G0);
-         brx(Assembler::notZero, false, Assembler::pn, done);
-         delayed()->nop();
-         // m->owner == null : it's unlocked.
-      }
-
-      // Try to CAS m->owner from null to Self
-      // Invariant: if we acquire the lock then _recursions should be 0.
-      add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark);
-      mov(G2_thread, Rscratch);
-      casn(Rmark, G0, Rscratch);
-      cmp(Rscratch, G0);
-      // ST box->displaced_header = NonZero.
-      // Any non-zero value suffices:
-      //    unused_mark(), G2_thread, RBox, RScratch, rsp, etc.
-      st_ptr(Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes());
-      // Intentional fall-through into done
-   }
-
-   bind   (done);
-}
-
-void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark,
-                                            Register Rbox, Register Rscratch,
-                                            bool try_bias) {
-   Address mark_addr(Roop, oopDesc::mark_offset_in_bytes());
-
-   Label done ;
-
-   if (EmitSync & 4) {
-     cmp(SP, G0);
-     return ;
-   }
-
-   if (EmitSync & 8) {
-     if (try_bias) {
-        biased_locking_exit(mark_addr, Rscratch, done);
-     }
-
-     // Test first if it is a fast recursive unlock
-     ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark);
-     br_null_short(Rmark, Assembler::pt, done);
-
-     // Check if it is still a light weight lock, this is is true if we see
-     // the stack address of the basicLock in the markOop of the object
-     assert(mark_addr.disp() == 0, "cas must take a zero displacement");
-     casx_under_lock(mark_addr.base(), Rbox, Rmark,
-       (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
-     ba(done);
-     delayed()->cmp(Rbox, Rmark);
-     bind(done);
-     return ;
-   }
-
-   // Beware ... If the aggregate size of the code emitted by CLO and CUO is
-   // is too large performance rolls abruptly off a cliff.
-   // This could be related to inlining policies, code cache management, or
-   // I$ effects.
-   Label LStacked ;
-
-   if (try_bias) {
-      // TODO: eliminate redundant LDs of obj->mark
-      biased_locking_exit(mark_addr, Rscratch, done);
-   }
-
-   ld_ptr(Roop, oopDesc::mark_offset_in_bytes(), Rmark);
-   ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rscratch);
-   andcc(Rscratch, Rscratch, G0);
-   brx(Assembler::zero, false, Assembler::pn, done);
-   delayed()->nop();      // consider: relocate fetch of mark, above, into this DS
-   andcc(Rmark, 2, G0);
-   brx(Assembler::zero, false, Assembler::pt, LStacked);
-   delayed()->nop();
-
-   // It's inflated
-   // Conceptually we need a #loadstore|#storestore "release" MEMBAR before
-   // the ST of 0 into _owner which releases the lock.  This prevents loads
-   // and stores within the critical section from reordering (floating)
-   // past the store that releases the lock.  But TSO is a strong memory model
-   // and that particular flavor of barrier is a noop, so we can safely elide it.
-   // Note that we use 1-0 locking by default for the inflated case.  We
-   // close the resultant (and rare) race by having contented threads in
-   // monitorenter periodically poll _owner.
-   ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
-   ld_ptr(Rmark, ObjectMonitor::recursions_offset_in_bytes() - 2, Rbox);
-   xor3(Rscratch, G2_thread, Rscratch);
-   orcc(Rbox, Rscratch, Rbox);
-   brx(Assembler::notZero, false, Assembler::pn, done);
-   delayed()->
-   ld_ptr(Rmark, ObjectMonitor::EntryList_offset_in_bytes() - 2, Rscratch);
-   ld_ptr(Rmark, ObjectMonitor::cxq_offset_in_bytes() - 2, Rbox);
-   orcc(Rbox, Rscratch, G0);
-   if (EmitSync & 65536) {
-      Label LSucc ;
-      brx(Assembler::notZero, false, Assembler::pn, LSucc);
-      delayed()->nop();
-      ba(done);
-      delayed()->st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
-
-      bind(LSucc);
-      st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
-      if (os::is_MP()) { membar (StoreLoad); }
-      ld_ptr(Rmark, ObjectMonitor::succ_offset_in_bytes() - 2, Rscratch);
-      andcc(Rscratch, Rscratch, G0);
-      brx(Assembler::notZero, false, Assembler::pt, done);
-      delayed()->andcc(G0, G0, G0);
-      add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark);
-      mov(G2_thread, Rscratch);
-      casn(Rmark, G0, Rscratch);
-      // invert icc.zf and goto done
-      br_notnull(Rscratch, false, Assembler::pt, done);
-      delayed()->cmp(G0, G0);
-      ba(done);
-      delayed()->cmp(G0, 1);
-   } else {
-      brx(Assembler::notZero, false, Assembler::pn, done);
-      delayed()->nop();
-      ba(done);
-      delayed()->st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
-   }
-
-   bind   (LStacked);
-   // Consider: we could replace the expensive CAS in the exit
-   // path with a simple ST of the displaced mark value fetched from
-   // the on-stack basiclock box.  That admits a race where a thread T2
-   // in the slow lock path -- inflating with monitor M -- could race a
-   // thread T1 in the fast unlock path, resulting in a missed wakeup for T2.
-   // More precisely T1 in the stack-lock unlock path could "stomp" the
-   // inflated mark value M installed by T2, resulting in an orphan
-   // object monitor M and T2 becoming stranded.  We can remedy that situation
-   // by having T2 periodically poll the object's mark word using timed wait
-   // operations.  If T2 discovers that a stomp has occurred it vacates
-   // the monitor M and wakes any other threads stranded on the now-orphan M.
-   // In addition the monitor scavenger, which performs deflation,
-   // would also need to check for orpan monitors and stranded threads.
-   //
-   // Finally, inflation is also used when T2 needs to assign a hashCode
-   // to O and O is stack-locked by T1.  The "stomp" race could cause
-   // an assigned hashCode value to be lost.  We can avoid that condition
-   // and provide the necessary hashCode stability invariants by ensuring
-   // that hashCode generation is idempotent between copying GCs.
-   // For example we could compute the hashCode of an object O as
-   // O's heap address XOR some high quality RNG value that is refreshed
-   // at GC-time.  The monitor scavenger would install the hashCode
-   // found in any orphan monitors.  Again, the mechanism admits a
-   // lost-update "stomp" WAW race but detects and recovers as needed.
-   //
-   // A prototype implementation showed excellent results, although
-   // the scavenger and timeout code was rather involved.
-
-   casn(mark_addr.base(), Rbox, Rscratch);
-   cmp(Rbox, Rscratch);
-   // Intentional fall through into done ...
-
-   bind(done);
-}
-
-
-
-void MacroAssembler::print_CPU_state() {
-  // %%%%% need to implement this
-}
-
-void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
-  // %%%%% need to implement this
-}
-
-void MacroAssembler::push_IU_state() {
-  // %%%%% need to implement this
-}
-
-
-void MacroAssembler::pop_IU_state() {
-  // %%%%% need to implement this
-}
-
-
-void MacroAssembler::push_FPU_state() {
-  // %%%%% need to implement this
-}
-
-
-void MacroAssembler::pop_FPU_state() {
-  // %%%%% need to implement this
-}
-
-
-void MacroAssembler::push_CPU_state() {
-  // %%%%% need to implement this
-}
-
-
-void MacroAssembler::pop_CPU_state() {
-  // %%%%% need to implement this
-}
-
-
-
-void MacroAssembler::verify_tlab() {
-#ifdef ASSERT
-  if (UseTLAB && VerifyOops) {
-    Label next, next2, ok;
-    Register t1 = L0;
-    Register t2 = L1;
-    Register t3 = L2;
-
-    save_frame(0);
-    ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1);
-    ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t2);
-    or3(t1, t2, t3);
-    cmp_and_br_short(t1, t2, Assembler::greaterEqual, Assembler::pn, next);
-    STOP("assert(top >= start)");
-    should_not_reach_here();
-
-    bind(next);
-    ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1);
-    ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t2);
-    or3(t3, t2, t3);
-    cmp_and_br_short(t1, t2, Assembler::lessEqual, Assembler::pn, next2);
-    STOP("assert(top <= end)");
-    should_not_reach_here();
-
-    bind(next2);
-    and3(t3, MinObjAlignmentInBytesMask, t3);
-    cmp_and_br_short(t3, 0, Assembler::lessEqual, Assembler::pn, ok);
-    STOP("assert(aligned)");
-    should_not_reach_here();
-
-    bind(ok);
-    restore();
-  }
-#endif
-}
-
-
-void MacroAssembler::eden_allocate(
-  Register obj,                        // result: pointer to object after successful allocation
-  Register var_size_in_bytes,          // object size in bytes if unknown at compile time; invalid otherwise
-  int      con_size_in_bytes,          // object size in bytes if   known at compile time
-  Register t1,                         // temp register
-  Register t2,                         // temp register
-  Label&   slow_case                   // continuation point if fast allocation fails
-){
-  // make sure arguments make sense
-  assert_different_registers(obj, var_size_in_bytes, t1, t2);
-  assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size");
-  assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment");
-
-  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
-    // No allocation in the shared eden.
-    ba_short(slow_case);
-  } else {
-    // get eden boundaries
-    // note: we need both top & top_addr!
-    const Register top_addr = t1;
-    const Register end      = t2;
-
-    CollectedHeap* ch = Universe::heap();
-    set((intx)ch->top_addr(), top_addr);
-    intx delta = (intx)ch->end_addr() - (intx)ch->top_addr();
-    ld_ptr(top_addr, delta, end);
-    ld_ptr(top_addr, 0, obj);
-
-    // try to allocate
-    Label retry;
-    bind(retry);
-#ifdef ASSERT
-    // make sure eden top is properly aligned
-    {
-      Label L;
-      btst(MinObjAlignmentInBytesMask, obj);
-      br(Assembler::zero, false, Assembler::pt, L);
-      delayed()->nop();
-      STOP("eden top is not properly aligned");
-      bind(L);
-    }
-#endif // ASSERT
-    const Register free = end;
-    sub(end, obj, free);                                   // compute amount of free space
-    if (var_size_in_bytes->is_valid()) {
-      // size is unknown at compile time
-      cmp(free, var_size_in_bytes);
-      br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
-      delayed()->add(obj, var_size_in_bytes, end);
-    } else {
-      // size is known at compile time
-      cmp(free, con_size_in_bytes);
-      br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
-      delayed()->add(obj, con_size_in_bytes, end);
-    }
-    // Compare obj with the value at top_addr; if still equal, swap the value of
-    // end with the value at top_addr. If not equal, read the value at top_addr
-    // into end.
-    casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
-    // if someone beat us on the allocation, try again, otherwise continue
-    cmp(obj, end);
-    brx(Assembler::notEqual, false, Assembler::pn, retry);
-    delayed()->mov(end, obj);                              // nop if successfull since obj == end
-
-#ifdef ASSERT
-    // make sure eden top is properly aligned
-    {
-      Label L;
-      const Register top_addr = t1;
-
-      set((intx)ch->top_addr(), top_addr);
-      ld_ptr(top_addr, 0, top_addr);
-      btst(MinObjAlignmentInBytesMask, top_addr);
-      br(Assembler::zero, false, Assembler::pt, L);
-      delayed()->nop();
-      STOP("eden top is not properly aligned");
-      bind(L);
-    }
-#endif // ASSERT
-  }
-}
-
-
-void MacroAssembler::tlab_allocate(
-  Register obj,                        // result: pointer to object after successful allocation
-  Register var_size_in_bytes,          // object size in bytes if unknown at compile time; invalid otherwise
-  int      con_size_in_bytes,          // object size in bytes if   known at compile time
-  Register t1,                         // temp register
-  Label&   slow_case                   // continuation point if fast allocation fails
-){
-  // make sure arguments make sense
-  assert_different_registers(obj, var_size_in_bytes, t1);
-  assert(0 <= con_size_in_bytes && is_simm13(con_size_in_bytes), "illegal object size");
-  assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment");
-
-  const Register free  = t1;
-
-  verify_tlab();
-
-  ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), obj);
-
-  // calculate amount of free space
-  ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), free);
-  sub(free, obj, free);
-
-  Label done;
-  if (var_size_in_bytes == noreg) {
-    cmp(free, con_size_in_bytes);
-  } else {
-    cmp(free, var_size_in_bytes);
-  }
-  br(Assembler::less, false, Assembler::pn, slow_case);
-  // calculate the new top pointer
-  if (var_size_in_bytes == noreg) {
-    delayed()->add(obj, con_size_in_bytes, free);
-  } else {
-    delayed()->add(obj, var_size_in_bytes, free);
-  }
-
-  bind(done);
-
-#ifdef ASSERT
-  // make sure new free pointer is properly aligned
-  {
-    Label L;
-    btst(MinObjAlignmentInBytesMask, free);
-    br(Assembler::zero, false, Assembler::pt, L);
-    delayed()->nop();
-    STOP("updated TLAB free is not properly aligned");
-    bind(L);
-  }
-#endif // ASSERT
-
-  // update the tlab top pointer
-  st_ptr(free, G2_thread, in_bytes(JavaThread::tlab_top_offset()));
-  verify_tlab();
-}
-
-
-void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) {
-  Register top = O0;
-  Register t1 = G1;
-  Register t2 = G3;
-  Register t3 = O1;
-  assert_different_registers(top, t1, t2, t3, G4, G5 /* preserve G4 and G5 */);
-  Label do_refill, discard_tlab;
-
-  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
-    // No allocation in the shared eden.
-    ba_short(slow_case);
-  }
-
-  ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), top);
-  ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t1);
-  ld_ptr(G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()), t2);
-
-  // calculate amount of free space
-  sub(t1, top, t1);
-  srl_ptr(t1, LogHeapWordSize, t1);
-
-  // Retain tlab and allocate object in shared space if
-  // the amount free in the tlab is too large to discard.
-  cmp(t1, t2);
-  brx(Assembler::lessEqual, false, Assembler::pt, discard_tlab);
-
-  // increment waste limit to prevent getting stuck on this slow path
-  delayed()->add(t2, ThreadLocalAllocBuffer::refill_waste_limit_increment(), t2);
-  st_ptr(t2, G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
-  if (TLABStats) {
-    // increment number of slow_allocations
-    ld(G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset()), t2);
-    add(t2, 1, t2);
-    stw(t2, G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset()));
-  }
-  ba_short(try_eden);
-
-  bind(discard_tlab);
-  if (TLABStats) {
-    // increment number of refills
-    ld(G2_thread, in_bytes(JavaThread::tlab_number_of_refills_offset()), t2);
-    add(t2, 1, t2);
-    stw(t2, G2_thread, in_bytes(JavaThread::tlab_number_of_refills_offset()));
-    // accumulate wastage
-    ld(G2_thread, in_bytes(JavaThread::tlab_fast_refill_waste_offset()), t2);
-    add(t2, t1, t2);
-    stw(t2, G2_thread, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
-  }
-
-  // if tlab is currently allocated (top or end != null) then
-  // fill [top, end + alignment_reserve) with array object
-  br_null_short(top, Assembler::pn, do_refill);
-
-  set((intptr_t)markOopDesc::prototype()->copy_set_hash(0x2), t2);
-  st_ptr(t2, top, oopDesc::mark_offset_in_bytes()); // set up the mark word
-  // set klass to intArrayKlass
-  sub(t1, typeArrayOopDesc::header_size(T_INT), t1);
-  add(t1, ThreadLocalAllocBuffer::alignment_reserve(), t1);
-  sll_ptr(t1, log2_intptr(HeapWordSize/sizeof(jint)), t1);
-  st(t1, top, arrayOopDesc::length_offset_in_bytes());
-  set((intptr_t)Universe::intArrayKlassObj_addr(), t2);
-  ld_ptr(t2, 0, t2);
-  // store klass last.  concurrent gcs assumes klass length is valid if
-  // klass field is not null.
-  store_klass(t2, top);
-  verify_oop(top);
-
-  ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t1);
-  sub(top, t1, t1); // size of tlab's allocated portion
-  incr_allocated_bytes(t1, t2, t3);
-
-  // refill the tlab with an eden allocation
-  bind(do_refill);
-  ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t1);
-  sll_ptr(t1, LogHeapWordSize, t1);
-  // allocate new tlab, address returned in top
-  eden_allocate(top, t1, 0, t2, t3, slow_case);
-
-  st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_start_offset()));
-  st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_top_offset()));
-#ifdef ASSERT
-  // check that tlab_size (t1) is still valid
-  {
-    Label ok;
-    ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t2);
-    sll_ptr(t2, LogHeapWordSize, t2);
-    cmp_and_br_short(t1, t2, Assembler::equal, Assembler::pt, ok);
-    STOP("assert(t1 == tlab_size)");
-    should_not_reach_here();
-
-    bind(ok);
-  }
-#endif // ASSERT
-  add(top, t1, top); // t1 is tlab_size
-  sub(top, ThreadLocalAllocBuffer::alignment_reserve_in_bytes(), top);
-  st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_end_offset()));
-  verify_tlab();
-  ba_short(retry);
-}
-
-void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes,
-                                          Register t1, Register t2) {
-  // Bump total bytes allocated by this thread
-  assert(t1->is_global(), "must be global reg"); // so all 64 bits are saved on a context switch
-  assert_different_registers(size_in_bytes.register_or_noreg(), t1, t2);
-  // v8 support has gone the way of the dodo
-  ldx(G2_thread, in_bytes(JavaThread::allocated_bytes_offset()), t1);
-  add(t1, ensure_simm13_or_reg(size_in_bytes, t2), t1);
-  stx(t1, G2_thread, in_bytes(JavaThread::allocated_bytes_offset()));
-}
-
-Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
-  switch (cond) {
-    // Note some conditions are synonyms for others
-    case Assembler::never:                return Assembler::always;
-    case Assembler::zero:                 return Assembler::notZero;
-    case Assembler::lessEqual:            return Assembler::greater;
-    case Assembler::less:                 return Assembler::greaterEqual;
-    case Assembler::lessEqualUnsigned:    return Assembler::greaterUnsigned;
-    case Assembler::lessUnsigned:         return Assembler::greaterEqualUnsigned;
-    case Assembler::negative:             return Assembler::positive;
-    case Assembler::overflowSet:          return Assembler::overflowClear;
-    case Assembler::always:               return Assembler::never;
-    case Assembler::notZero:              return Assembler::zero;
-    case Assembler::greater:              return Assembler::lessEqual;
-    case Assembler::greaterEqual:         return Assembler::less;
-    case Assembler::greaterUnsigned:      return Assembler::lessEqualUnsigned;
-    case Assembler::greaterEqualUnsigned: return Assembler::lessUnsigned;
-    case Assembler::positive:             return Assembler::negative;
-    case Assembler::overflowClear:        return Assembler::overflowSet;
-  }
-
-  ShouldNotReachHere(); return Assembler::overflowClear;
-}
-
-void MacroAssembler::cond_inc(Assembler::Condition cond, address counter_ptr,
-                              Register Rtmp1, Register Rtmp2 /*, Register Rtmp3, Register Rtmp4 */) {
-  Condition negated_cond = negate_condition(cond);
-  Label L;
-  brx(negated_cond, false, Assembler::pt, L);
-  delayed()->nop();
-  inc_counter(counter_ptr, Rtmp1, Rtmp2);
-  bind(L);
-}
-
-void MacroAssembler::inc_counter(address counter_addr, Register Rtmp1, Register Rtmp2) {
-  AddressLiteral addrlit(counter_addr);
-  sethi(addrlit, Rtmp1);                 // Move hi22 bits into temporary register.
-  Address addr(Rtmp1, addrlit.low10());  // Build an address with low10 bits.
-  ld(addr, Rtmp2);
-  inc(Rtmp2);
-  st(Rtmp2, addr);
-}
-
-void MacroAssembler::inc_counter(int* counter_addr, Register Rtmp1, Register Rtmp2) {
-  inc_counter((address) counter_addr, Rtmp1, Rtmp2);
-}
-
-SkipIfEqual::SkipIfEqual(
-    MacroAssembler* masm, Register temp, const bool* flag_addr,
-    Assembler::Condition condition) {
-  _masm = masm;
-  AddressLiteral flag(flag_addr);
-  _masm->sethi(flag, temp);
-  _masm->ldub(temp, flag.low10(), temp);
-  _masm->tst(temp);
-  _masm->br(condition, false, Assembler::pt, _label);
-  _masm->delayed()->nop();
-}
-
-SkipIfEqual::~SkipIfEqual() {
-  _masm->bind(_label);
-}
-
-
-// Writes to stack successive pages until offset reached to check for
-// stack overflow + shadow pages.  This clobbers tsp and scratch.
-void MacroAssembler::bang_stack_size(Register Rsize, Register Rtsp,
-                                     Register Rscratch) {
-  // Use stack pointer in temp stack pointer
-  mov(SP, Rtsp);
-
-  // Bang stack for total size given plus stack shadow page size.
-  // Bang one page at a time because a large size can overflow yellow and
-  // red zones (the bang will fail but stack overflow handling can't tell that
-  // it was a stack overflow bang vs a regular segv).
-  int offset = os::vm_page_size();
-  Register Roffset = Rscratch;
-
-  Label loop;
-  bind(loop);
-  set((-offset)+STACK_BIAS, Rscratch);
-  st(G0, Rtsp, Rscratch);
-  set(offset, Roffset);
-  sub(Rsize, Roffset, Rsize);
-  cmp(Rsize, G0);
-  br(Assembler::greater, false, Assembler::pn, loop);
-  delayed()->sub(Rtsp, Roffset, Rtsp);
-
-  // Bang down shadow pages too.
-  // The -1 because we already subtracted 1 page.
-  for (int i = 0; i< StackShadowPages-1; i++) {
-    set((-i*offset)+STACK_BIAS, Rscratch);
-    st(G0, Rtsp, Rscratch);
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////////
-#ifndef SERIALGC
-
-static address satb_log_enqueue_with_frame = NULL;
-static u_char* satb_log_enqueue_with_frame_end = NULL;
-
-static address satb_log_enqueue_frameless = NULL;
-static u_char* satb_log_enqueue_frameless_end = NULL;
-
-static int EnqueueCodeSize = 128 DEBUG_ONLY( + 256); // Instructions?
-
-static void generate_satb_log_enqueue(bool with_frame) {
-  BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize);
-  CodeBuffer buf(bb);
-  MacroAssembler masm(&buf);
-
-#define __ masm.
-
-  address start = __ pc();
-  Register pre_val;
-
-  Label refill, restart;
-  if (with_frame) {
-    __ save_frame(0);
-    pre_val = I0;  // Was O0 before the save.
-  } else {
-    pre_val = O0;
-  }
-
-  int satb_q_index_byte_offset =
-    in_bytes(JavaThread::satb_mark_queue_offset() +
-             PtrQueue::byte_offset_of_index());
-
-  int satb_q_buf_byte_offset =
-    in_bytes(JavaThread::satb_mark_queue_offset() +
-             PtrQueue::byte_offset_of_buf());
-
-  assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) &&
-         in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t),
-         "check sizes in assembly below");
-
-  __ bind(restart);
-
-  // Load the index into the SATB buffer. PtrQueue::_index is a size_t
-  // so ld_ptr is appropriate.
-  __ ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
-
-  // index == 0?
-  __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill);
-
-  __ ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
-  __ sub(L0, oopSize, L0);
-
-  __ st_ptr(pre_val, L1, L0);  // [_buf + index] := I0
-  if (!with_frame) {
-    // Use return-from-leaf
-    __ retl();
-    __ delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset);
-  } else {
-    // Not delayed.
-    __ st_ptr(L0, G2_thread, satb_q_index_byte_offset);
-  }
-  if (with_frame) {
-    __ ret();
-    __ delayed()->restore();
-  }
-  __ bind(refill);
-
-  address handle_zero =
-    CAST_FROM_FN_PTR(address,
-                     &SATBMarkQueueSet::handle_zero_index_for_thread);
-  // This should be rare enough that we can afford to save all the
-  // scratch registers that the calling context might be using.
-  __ mov(G1_scratch, L0);
-  __ mov(G3_scratch, L1);
-  __ mov(G4, L2);
-  // We need the value of O0 above (for the write into the buffer), so we
-  // save and restore it.
-  __ mov(O0, L3);
-  // Since the call will overwrite O7, we save and restore that, as well.
-  __ mov(O7, L4);
-  __ call_VM_leaf(L5, handle_zero, G2_thread);
-  __ mov(L0, G1_scratch);
-  __ mov(L1, G3_scratch);
-  __ mov(L2, G4);
-  __ mov(L3, O0);
-  __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
-  __ delayed()->mov(L4, O7);
-
-  if (with_frame) {
-    satb_log_enqueue_with_frame = start;
-    satb_log_enqueue_with_frame_end = __ pc();
-  } else {
-    satb_log_enqueue_frameless = start;
-    satb_log_enqueue_frameless_end = __ pc();
-  }
-
-#undef __
-}
-
-static inline void generate_satb_log_enqueue_if_necessary(bool with_frame) {
-  if (with_frame) {
-    if (satb_log_enqueue_with_frame == 0) {
-      generate_satb_log_enqueue(with_frame);
-      assert(satb_log_enqueue_with_frame != 0, "postcondition.");
-      if (G1SATBPrintStubs) {
-        tty->print_cr("Generated with-frame satb enqueue:");
-        Disassembler::decode((u_char*)satb_log_enqueue_with_frame,
-                             satb_log_enqueue_with_frame_end,
-                             tty);
-      }
-    }
-  } else {
-    if (satb_log_enqueue_frameless == 0) {
-      generate_satb_log_enqueue(with_frame);
-      assert(satb_log_enqueue_frameless != 0, "postcondition.");
-      if (G1SATBPrintStubs) {
-        tty->print_cr("Generated frameless satb enqueue:");
-        Disassembler::decode((u_char*)satb_log_enqueue_frameless,
-                             satb_log_enqueue_frameless_end,
-                             tty);
-      }
-    }
-  }
-}
-
-void MacroAssembler::g1_write_barrier_pre(Register obj,
-                                          Register index,
-                                          int offset,
-                                          Register pre_val,
-                                          Register tmp,
-                                          bool preserve_o_regs) {
-  Label filtered;
-
-  if (obj == noreg) {
-    // We are not loading the previous value so make
-    // sure that we don't trash the value in pre_val
-    // with the code below.
-    assert_different_registers(pre_val, tmp);
-  } else {
-    // We will be loading the previous value
-    // in this code so...
-    assert(offset == 0 || index == noreg, "choose one");
-    assert(pre_val == noreg, "check this code");
-  }
-
-  // Is marking active?
-  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
-    ld(G2,
-       in_bytes(JavaThread::satb_mark_queue_offset() +
-                PtrQueue::byte_offset_of_active()),
-       tmp);
-  } else {
-    guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1,
-              "Assumption");
-    ldsb(G2,
-         in_bytes(JavaThread::satb_mark_queue_offset() +
-                  PtrQueue::byte_offset_of_active()),
-         tmp);
-  }
-
-  // Is marking active?
-  cmp_and_br_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
-
-  // Do we need to load the previous value?
-  if (obj != noreg) {
-    // Load the previous value...
-    if (index == noreg) {
-      if (Assembler::is_simm13(offset)) {
-        load_heap_oop(obj, offset, tmp);
-      } else {
-        set(offset, tmp);
-        load_heap_oop(obj, tmp, tmp);
-      }
-    } else {
-      load_heap_oop(obj, index, tmp);
-    }
-    // Previous value has been loaded into tmp
-    pre_val = tmp;
-  }
-
-  assert(pre_val != noreg, "must have a real register");
-
-  // Is the previous value null?
-  cmp_and_brx_short(pre_val, G0, Assembler::equal, Assembler::pt, filtered);
-
-  // OK, it's not filtered, so we'll need to call enqueue.  In the normal
-  // case, pre_val will be a scratch G-reg, but there are some cases in
-  // which it's an O-reg.  In the first case, do a normal call.  In the
-  // latter, do a save here and call the frameless version.
-
-  guarantee(pre_val->is_global() || pre_val->is_out(),
-            "Or we need to think harder.");
-
-  if (pre_val->is_global() && !preserve_o_regs) {
-    generate_satb_log_enqueue_if_necessary(true); // with frame
-
-    call(satb_log_enqueue_with_frame);
-    delayed()->mov(pre_val, O0);
-  } else {
-    generate_satb_log_enqueue_if_necessary(false); // frameless
-
-    save_frame(0);
-    call(satb_log_enqueue_frameless);
-    delayed()->mov(pre_val->after_save(), O0);
-    restore();
-  }
-
-  bind(filtered);
-}
-
-static address dirty_card_log_enqueue = 0;
-static u_char* dirty_card_log_enqueue_end = 0;
-
-// This gets to assume that o0 contains the object address.
-static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) {
-  BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2);
-  CodeBuffer buf(bb);
-  MacroAssembler masm(&buf);
-#define __ masm.
-  address start = __ pc();
-
-  Label not_already_dirty, restart, refill;
-
-#ifdef _LP64
-  __ srlx(O0, CardTableModRefBS::card_shift, O0);
-#else
-  __ srl(O0, CardTableModRefBS::card_shift, O0);
-#endif
-  AddressLiteral addrlit(byte_map_base);
-  __ set(addrlit, O1); // O1 := <card table base>
-  __ ldub(O0, O1, O2); // O2 := [O0 + O1]
-
-  assert(CardTableModRefBS::dirty_card_val() == 0, "otherwise check this code");
-  __ cmp_and_br_short(O2, G0, Assembler::notEqual, Assembler::pt, not_already_dirty);
-
-  // We didn't take the branch, so we're already dirty: return.
-  // Use return-from-leaf
-  __ retl();
-  __ delayed()->nop();
-
-  // Not dirty.
-  __ bind(not_already_dirty);
-
-  // Get O0 + O1 into a reg by itself
-  __ add(O0, O1, O3);
-
-  // First, dirty it.
-  __ stb(G0, O3, G0);  // [cardPtr] := 0  (i.e., dirty).
-
-  int dirty_card_q_index_byte_offset =
-    in_bytes(JavaThread::dirty_card_queue_offset() +
-             PtrQueue::byte_offset_of_index());
-  int dirty_card_q_buf_byte_offset =
-    in_bytes(JavaThread::dirty_card_queue_offset() +
-             PtrQueue::byte_offset_of_buf());
-  __ bind(restart);
-
-  // Load the index into the update buffer. PtrQueue::_index is
-  // a size_t so ld_ptr is appropriate here.
-  __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0);
-
-  // index == 0?
-  __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill);
-
-  __ ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1);
-  __ sub(L0, oopSize, L0);
-
-  __ st_ptr(O3, L1, L0);  // [_buf + index] := I0
-  // Use return-from-leaf
-  __ retl();
-  __ delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset);
-
-  __ bind(refill);
-  address handle_zero =
-    CAST_FROM_FN_PTR(address,
-                     &DirtyCardQueueSet::handle_zero_index_for_thread);
-  // This should be rare enough that we can afford to save all the
-  // scratch registers that the calling context might be using.
-  __ mov(G1_scratch, L3);
-  __ mov(G3_scratch, L5);
-  // We need the value of O3 above (for the write into the buffer), so we
-  // save and restore it.
-  __ mov(O3, L6);
-  // Since the call will overwrite O7, we save and restore that, as well.
-  __ mov(O7, L4);
-
-  __ call_VM_leaf(L7_thread_cache, handle_zero, G2_thread);
-  __ mov(L3, G1_scratch);
-  __ mov(L5, G3_scratch);
-  __ mov(L6, O3);
-  __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
-  __ delayed()->mov(L4, O7);
-
-  dirty_card_log_enqueue = start;
-  dirty_card_log_enqueue_end = __ pc();
-  // XXX Should have a guarantee here about not going off the end!
-  // Does it already do so?  Do an experiment...
-
-#undef __
-
-}
-
-static inline void
-generate_dirty_card_log_enqueue_if_necessary(jbyte* byte_map_base) {
-  if (dirty_card_log_enqueue == 0) {
-    generate_dirty_card_log_enqueue(byte_map_base);
-    assert(dirty_card_log_enqueue != 0, "postcondition.");
-    if (G1SATBPrintStubs) {
-      tty->print_cr("Generated dirty_card enqueue:");
-      Disassembler::decode((u_char*)dirty_card_log_enqueue,
-                           dirty_card_log_enqueue_end,
-                           tty);
-    }
-  }
-}
-
-
-void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
-
-  Label filtered;
-  MacroAssembler* post_filter_masm = this;
-
-  if (new_val == G0) return;
-
-  G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set();
-  assert(bs->kind() == BarrierSet::G1SATBCT ||
-         bs->kind() == BarrierSet::G1SATBCTLogging, "wrong barrier");
-
-  if (G1RSBarrierRegionFilter) {
-    xor3(store_addr, new_val, tmp);
-#ifdef _LP64
-    srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
-#else
-    srl(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
-#endif
-
-    // XXX Should I predict this taken or not?  Does it matter?
-    cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
-  }
-
-  // If the "store_addr" register is an "in" or "local" register, move it to
-  // a scratch reg so we can pass it as an argument.
-  bool use_scr = !(store_addr->is_global() || store_addr->is_out());
-  // Pick a scratch register different from "tmp".
-  Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch);
-  // Make sure we use up the delay slot!
-  if (use_scr) {
-    post_filter_masm->mov(store_addr, scr);
-  } else {
-    post_filter_masm->nop();
-  }
-  generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base);
-  save_frame(0);
-  call(dirty_card_log_enqueue);
-  if (use_scr) {
-    delayed()->mov(scr, O0);
-  } else {
-    delayed()->mov(store_addr->after_save(), O0);
-  }
-  restore();
-
-  bind(filtered);
-}
-
-#endif  // SERIALGC
-///////////////////////////////////////////////////////////////////////////////////
-
-void MacroAssembler::card_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
-  // If we're writing constant NULL, we can skip the write barrier.
-  if (new_val == G0) return;
-  CardTableModRefBS* bs = (CardTableModRefBS*) Universe::heap()->barrier_set();
-  assert(bs->kind() == BarrierSet::CardTableModRef ||
-         bs->kind() == BarrierSet::CardTableExtension, "wrong barrier");
-  card_table_write(bs->byte_map_base, tmp, store_addr);
-}
-
-void MacroAssembler::load_klass(Register src_oop, Register klass) {
-  // The number of bytes in this code is used by
-  // MachCallDynamicJavaNode::ret_addr_offset()
-  // if this changes, change that.
-  if (UseCompressedKlassPointers) {
-    lduw(src_oop, oopDesc::klass_offset_in_bytes(), klass);
-    decode_klass_not_null(klass);
-  } else {
-    ld_ptr(src_oop, oopDesc::klass_offset_in_bytes(), klass);
-  }
-}
-
-void MacroAssembler::store_klass(Register klass, Register dst_oop) {
-  if (UseCompressedKlassPointers) {
-    assert(dst_oop != klass, "not enough registers");
-    encode_klass_not_null(klass);
-    st(klass, dst_oop, oopDesc::klass_offset_in_bytes());
-  } else {
-    st_ptr(klass, dst_oop, oopDesc::klass_offset_in_bytes());
-  }
-}
-
-void MacroAssembler::store_klass_gap(Register s, Register d) {
-  if (UseCompressedKlassPointers) {
-    assert(s != d, "not enough registers");
-    st(s, d, oopDesc::klass_gap_offset_in_bytes());
-  }
-}
-
-void MacroAssembler::load_heap_oop(const Address& s, Register d) {
-  if (UseCompressedOops) {
-    lduw(s, d);
-    decode_heap_oop(d);
-  } else {
-    ld_ptr(s, d);
-  }
-}
-
-void MacroAssembler::load_heap_oop(Register s1, Register s2, Register d) {
-   if (UseCompressedOops) {
-    lduw(s1, s2, d);
-    decode_heap_oop(d, d);
-  } else {
-    ld_ptr(s1, s2, d);
-  }
-}
-
-void MacroAssembler::load_heap_oop(Register s1, int simm13a, Register d) {
-   if (UseCompressedOops) {
-    lduw(s1, simm13a, d);
-    decode_heap_oop(d, d);
-  } else {
-    ld_ptr(s1, simm13a, d);
-  }
-}
-
-void MacroAssembler::load_heap_oop(Register s1, RegisterOrConstant s2, Register d) {
-  if (s2.is_constant())  load_heap_oop(s1, s2.as_constant(), d);
-  else                   load_heap_oop(s1, s2.as_register(), d);
-}
-
-void MacroAssembler::store_heap_oop(Register d, Register s1, Register s2) {
-  if (UseCompressedOops) {
-    assert(s1 != d && s2 != d, "not enough registers");
-    encode_heap_oop(d);
-    st(d, s1, s2);
-  } else {
-    st_ptr(d, s1, s2);
-  }
-}
-
-void MacroAssembler::store_heap_oop(Register d, Register s1, int simm13a) {
-  if (UseCompressedOops) {
-    assert(s1 != d, "not enough registers");
-    encode_heap_oop(d);
-    st(d, s1, simm13a);
-  } else {
-    st_ptr(d, s1, simm13a);
-  }
-}
-
-void MacroAssembler::store_heap_oop(Register d, const Address& a, int offset) {
-  if (UseCompressedOops) {
-    assert(a.base() != d, "not enough registers");
-    encode_heap_oop(d);
-    st(d, a, offset);
-  } else {
-    st_ptr(d, a, offset);
-  }
-}
-
-
-void MacroAssembler::encode_heap_oop(Register src, Register dst) {
-  assert (UseCompressedOops, "must be compressed");
-  assert (Universe::heap() != NULL, "java heap should be initialized");
-  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
-  verify_oop(src);
-  if (Universe::narrow_oop_base() == NULL) {
-    srlx(src, LogMinObjAlignmentInBytes, dst);
-    return;
-  }
-  Label done;
-  if (src == dst) {
-    // optimize for frequent case src == dst
-    bpr(rc_nz, true, Assembler::pt, src, done);
-    delayed() -> sub(src, G6_heapbase, dst); // annuled if not taken
-    bind(done);
-    srlx(src, LogMinObjAlignmentInBytes, dst);
-  } else {
-    bpr(rc_z, false, Assembler::pn, src, done);
-    delayed() -> mov(G0, dst);
-    // could be moved before branch, and annulate delay,
-    // but may add some unneeded work decoding null
-    sub(src, G6_heapbase, dst);
-    srlx(dst, LogMinObjAlignmentInBytes, dst);
-    bind(done);
-  }
-}
-
-
-void MacroAssembler::encode_heap_oop_not_null(Register r) {
-  assert (UseCompressedOops, "must be compressed");
-  assert (Universe::heap() != NULL, "java heap should be initialized");
-  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
-  verify_oop(r);
-  if (Universe::narrow_oop_base() != NULL)
-    sub(r, G6_heapbase, r);
-  srlx(r, LogMinObjAlignmentInBytes, r);
-}
-
-void MacroAssembler::encode_heap_oop_not_null(Register src, Register dst) {
-  assert (UseCompressedOops, "must be compressed");
-  assert (Universe::heap() != NULL, "java heap should be initialized");
-  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
-  verify_oop(src);
-  if (Universe::narrow_oop_base() == NULL) {
-    srlx(src, LogMinObjAlignmentInBytes, dst);
-  } else {
-    sub(src, G6_heapbase, dst);
-    srlx(dst, LogMinObjAlignmentInBytes, dst);
-  }
-}
-
-// Same algorithm as oops.inline.hpp decode_heap_oop.
-void  MacroAssembler::decode_heap_oop(Register src, Register dst) {
-  assert (UseCompressedOops, "must be compressed");
-  assert (Universe::heap() != NULL, "java heap should be initialized");
-  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
-  sllx(src, LogMinObjAlignmentInBytes, dst);
-  if (Universe::narrow_oop_base() != NULL) {
-    Label done;
-    bpr(rc_nz, true, Assembler::pt, dst, done);
-    delayed() -> add(dst, G6_heapbase, dst); // annuled if not taken
-    bind(done);
-  }
-  verify_oop(dst);
-}
-
-void  MacroAssembler::decode_heap_oop_not_null(Register r) {
-  // Do not add assert code to this unless you change vtableStubs_sparc.cpp
-  // pd_code_size_limit.
-  // Also do not verify_oop as this is called by verify_oop.
-  assert (UseCompressedOops, "must be compressed");
-  assert (Universe::heap() != NULL, "java heap should be initialized");
-  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
-  sllx(r, LogMinObjAlignmentInBytes, r);
-  if (Universe::narrow_oop_base() != NULL)
-    add(r, G6_heapbase, r);
-}
-
-void  MacroAssembler::decode_heap_oop_not_null(Register src, Register dst) {
-  // Do not add assert code to this unless you change vtableStubs_sparc.cpp
-  // pd_code_size_limit.
-  // Also do not verify_oop as this is called by verify_oop.
-  assert (UseCompressedOops, "must be compressed");
-  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
-  sllx(src, LogMinObjAlignmentInBytes, dst);
-  if (Universe::narrow_oop_base() != NULL)
-    add(dst, G6_heapbase, dst);
-}
-
-void MacroAssembler::encode_klass_not_null(Register r) {
-  assert(Metaspace::is_initialized(), "metaspace should be initialized");
-  assert (UseCompressedKlassPointers, "must be compressed");
-  assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-  if (Universe::narrow_klass_base() != NULL)
-    sub(r, G6_heapbase, r);
-  srlx(r, LogKlassAlignmentInBytes, r);
-}
-
-void MacroAssembler::encode_klass_not_null(Register src, Register dst) {
-  assert(Metaspace::is_initialized(), "metaspace should be initialized");
-  assert (UseCompressedKlassPointers, "must be compressed");
-  assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-  if (Universe::narrow_klass_base() == NULL) {
-    srlx(src, LogKlassAlignmentInBytes, dst);
-  } else {
-    sub(src, G6_heapbase, dst);
-    srlx(dst, LogKlassAlignmentInBytes, dst);
-  }
-}
-
-void  MacroAssembler::decode_klass_not_null(Register r) {
-  assert(Metaspace::is_initialized(), "metaspace should be initialized");
-  // Do not add assert code to this unless you change vtableStubs_sparc.cpp
-  // pd_code_size_limit.
-  assert (UseCompressedKlassPointers, "must be compressed");
-  assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-  sllx(r, LogKlassAlignmentInBytes, r);
-  if (Universe::narrow_klass_base() != NULL)
-    add(r, G6_heapbase, r);
-}
-
-void  MacroAssembler::decode_klass_not_null(Register src, Register dst) {
-  assert(Metaspace::is_initialized(), "metaspace should be initialized");
-  // Do not add assert code to this unless you change vtableStubs_sparc.cpp
-  // pd_code_size_limit.
-  assert (UseCompressedKlassPointers, "must be compressed");
-  assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-  sllx(src, LogKlassAlignmentInBytes, dst);
-  if (Universe::narrow_klass_base() != NULL)
-    add(dst, G6_heapbase, dst);
-}
-
-void MacroAssembler::reinit_heapbase() {
-  if (UseCompressedOops || UseCompressedKlassPointers) {
-    AddressLiteral base(Universe::narrow_ptrs_base_addr());
-    load_ptr_contents(base, G6_heapbase);
-  }
-}
-
-// Compare char[] arrays aligned to 4 bytes.
-void MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
-                                        Register limit, Register result,
-                                        Register chr1, Register chr2, Label& Ldone) {
-  Label Lvector, Lloop;
-  assert(chr1 == result, "should be the same");
-
-  // Note: limit contains number of bytes (2*char_elements) != 0.
-  andcc(limit, 0x2, chr1); // trailing character ?
-  br(Assembler::zero, false, Assembler::pt, Lvector);
-  delayed()->nop();
-
-  // compare the trailing char
-  sub(limit, sizeof(jchar), limit);
-  lduh(ary1, limit, chr1);
-  lduh(ary2, limit, chr2);
-  cmp(chr1, chr2);
-  br(Assembler::notEqual, true, Assembler::pt, Ldone);
-  delayed()->mov(G0, result);     // not equal
-
-  // only one char ?
-  cmp_zero_and_br(zero, limit, Ldone, true, Assembler::pn);
-  delayed()->add(G0, 1, result); // zero-length arrays are equal
-
-  // word by word compare, dont't need alignment check
-  bind(Lvector);
-  // Shift ary1 and ary2 to the end of the arrays, negate limit
-  add(ary1, limit, ary1);
-  add(ary2, limit, ary2);
-  neg(limit, limit);
-
-  lduw(ary1, limit, chr1);
-  bind(Lloop);
-  lduw(ary2, limit, chr2);
-  cmp(chr1, chr2);
-  br(Assembler::notEqual, true, Assembler::pt, Ldone);
-  delayed()->mov(G0, result);     // not equal
-  inccc(limit, 2*sizeof(jchar));
-  // annul LDUW if branch is not taken to prevent access past end of array
-  br(Assembler::notZero, true, Assembler::pt, Lloop);
-  delayed()->lduw(ary1, limit, chr1); // hoisted
-
-  // Caller should set it:
-  // add(G0, 1, result); // equals
-}
-
-// Use BIS for zeroing (count is in bytes).
-void MacroAssembler::bis_zeroing(Register to, Register count, Register temp, Label& Ldone) {
-  assert(UseBlockZeroing && VM_Version::has_block_zeroing(), "only works with BIS zeroing");
-  Register end = count;
-  int cache_line_size = VM_Version::prefetch_data_size();
-  // Minimum count when BIS zeroing can be used since
-  // it needs membar which is expensive.
-  int block_zero_size  = MAX2(cache_line_size*3, (int)BlockZeroingLowLimit);
-
-  Label small_loop;
-  // Check if count is negative (dead code) or zero.
-  // Note, count uses 64bit in 64 bit VM.
-  cmp_and_brx_short(count, 0, Assembler::lessEqual, Assembler::pn, Ldone);
-
-  // Use BIS zeroing only for big arrays since it requires membar.
-  if (Assembler::is_simm13(block_zero_size)) { // < 4096
-    cmp(count, block_zero_size);
-  } else {
-    set(block_zero_size, temp);
-    cmp(count, temp);
-  }
-  br(Assembler::lessUnsigned, false, Assembler::pt, small_loop);
-  delayed()->add(to, count, end);
-
-  // Note: size is >= three (32 bytes) cache lines.
-
-  // Clean the beginning of space up to next cache line.
-  for (int offs = 0; offs < cache_line_size; offs += 8) {
-    stx(G0, to, offs);
-  }
-
-  // align to next cache line
-  add(to, cache_line_size, to);
-  and3(to, -cache_line_size, to);
-
-  // Note: size left >= two (32 bytes) cache lines.
-
-  // BIS should not be used to zero tail (64 bytes)
-  // to avoid zeroing a header of the following object.
-  sub(end, (cache_line_size*2)-8, end);
-
-  Label bis_loop;
-  bind(bis_loop);
-  stxa(G0, to, G0, Assembler::ASI_ST_BLKINIT_PRIMARY);
-  add(to, cache_line_size, to);
-  cmp_and_brx_short(to, end, Assembler::lessUnsigned, Assembler::pt, bis_loop);
-
-  // BIS needs membar.
-  membar(Assembler::StoreLoad);
-
-  add(end, (cache_line_size*2)-8, end); // restore end
-  cmp_and_brx_short(to, end, Assembler::greaterEqualUnsigned, Assembler::pn, Ldone);
-
-  // Clean the tail.
-  bind(small_loop);
-  stx(G0, to, 0);
-  add(to, 8, to);
-  cmp_and_brx_short(to, end, Assembler::lessUnsigned, Assembler::pt, small_loop);
-  nop(); // Separate short branches
-}
--- a/src/cpu/sparc/vm/assembler_sparc.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/sparc/vm/assembler_sparc.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -25,554 +25,13 @@
 #ifndef CPU_SPARC_VM_ASSEMBLER_SPARC_HPP
 #define CPU_SPARC_VM_ASSEMBLER_SPARC_HPP
 
-class BiasedLockingCounters;
-
-// <sys/trap.h> promises that the system will not use traps 16-31
-#define ST_RESERVED_FOR_USER_0 0x10
-
-/* Written: David Ungar 4/19/97 */
-
-// Contains all the definitions needed for sparc assembly code generation.
-
-// Register aliases for parts of the system:
-
-// 64 bit values can be kept in g1-g5, o1-o5 and o7 and all 64 bits are safe
-// across context switches in V8+ ABI.  Of course, there are no 64 bit regs
-// in V8 ABI. All 64 bits are preserved in V9 ABI for all registers.
-
-// g2-g4 are scratch registers called "application globals".  Their
-// meaning is reserved to the "compilation system"--which means us!
-// They are are not supposed to be touched by ordinary C code, although
-// highly-optimized C code might steal them for temps.  They are safe
-// across thread switches, and the ABI requires that they be safe
-// across function calls.
-//
-// g1 and g3 are touched by more modules.  V8 allows g1 to be clobbered
-// across func calls, and V8+ also allows g5 to be clobbered across
-// func calls.  Also, g1 and g5 can get touched while doing shared
-// library loading.
-//
-// We must not touch g7 (it is the thread-self register) and g6 is
-// reserved for certain tools.  g0, of course, is always zero.
-//
-// (Sources:  SunSoft Compilers Group, thread library engineers.)
-
-// %%%% The interpreter should be revisited to reduce global scratch regs.
-
-// This global always holds the current JavaThread pointer:
-
-REGISTER_DECLARATION(Register, G2_thread , G2);
-REGISTER_DECLARATION(Register, G6_heapbase , G6);
-
-// The following globals are part of the Java calling convention:
-
-REGISTER_DECLARATION(Register, G5_method             , G5);
-REGISTER_DECLARATION(Register, G5_megamorphic_method , G5_method);
-REGISTER_DECLARATION(Register, G5_inline_cache_reg   , G5_method);
-
-// The following globals are used for the new C1 & interpreter calling convention:
-REGISTER_DECLARATION(Register, Gargs        , G4); // pointing to the last argument
-
-// This local is used to preserve G2_thread in the interpreter and in stubs:
-REGISTER_DECLARATION(Register, L7_thread_cache , L7);
-
-// These globals are used as scratch registers in the interpreter:
-
-REGISTER_DECLARATION(Register, Gframe_size   , G1); // SAME REG as G1_scratch
-REGISTER_DECLARATION(Register, G1_scratch    , G1); // also SAME
-REGISTER_DECLARATION(Register, G3_scratch    , G3);
-REGISTER_DECLARATION(Register, G4_scratch    , G4);
-
-// These globals are used as short-lived scratch registers in the compiler:
-
-REGISTER_DECLARATION(Register, Gtemp  , G5);
-
-// JSR 292 fixed register usages:
-REGISTER_DECLARATION(Register, G5_method_type        , G5);
-REGISTER_DECLARATION(Register, G3_method_handle      , G3);
-REGISTER_DECLARATION(Register, L7_mh_SP_save         , L7);
-
-// The compiler requires that G5_megamorphic_method is G5_inline_cache_klass,
-// because a single patchable "set" instruction (NativeMovConstReg,
-// or NativeMovConstPatching for compiler1) instruction
-// serves to set up either quantity, depending on whether the compiled
-// call site is an inline cache or is megamorphic.  See the function
-// CompiledIC::set_to_megamorphic.
-//
-// If a inline cache targets an interpreted method, then the
-// G5 register will be used twice during the call.  First,
-// the call site will be patched to load a compiledICHolder
-// into G5. (This is an ordered pair of ic_klass, method.)
-// The c2i adapter will first check the ic_klass, then load
-// G5_method with the method part of the pair just before
-// jumping into the interpreter.
-//
-// Note that G5_method is only the method-self for the interpreter,
-// and is logically unrelated to G5_megamorphic_method.
-//
-// Invariants on G2_thread (the JavaThread pointer):
-//  - it should not be used for any other purpose anywhere
-//  - it must be re-initialized by StubRoutines::call_stub()
-//  - it must be preserved around every use of call_VM
-
-// We can consider using g2/g3/g4 to cache more values than the
-// JavaThread, such as the card-marking base or perhaps pointers into
-// Eden.  It's something of a waste to use them as scratch temporaries,
-// since they are not supposed to be volatile.  (Of course, if we find
-// that Java doesn't benefit from application globals, then we can just
-// use them as ordinary temporaries.)
-//
-// Since g1 and g5 (and/or g6) are the volatile (caller-save) registers,
-// it makes sense to use them routinely for procedure linkage,
-// whenever the On registers are not applicable.  Examples:  G5_method,
-// G5_inline_cache_klass, and a double handful of miscellaneous compiler
-// stubs.  This means that compiler stubs, etc., should be kept to a
-// maximum of two or three G-register arguments.
-
-
-// stub frames
-
-REGISTER_DECLARATION(Register, Lentry_args      , L0); // pointer to args passed to callee (interpreter) not stub itself
-
-// Interpreter frames
-
-#ifdef CC_INTERP
-REGISTER_DECLARATION(Register, Lstate           , L0); // interpreter state object pointer
-REGISTER_DECLARATION(Register, L1_scratch       , L1); // scratch
-REGISTER_DECLARATION(Register, Lmirror          , L1); // mirror (for native methods only)
-REGISTER_DECLARATION(Register, L2_scratch       , L2);
-REGISTER_DECLARATION(Register, L3_scratch       , L3);
-REGISTER_DECLARATION(Register, L4_scratch       , L4);
-REGISTER_DECLARATION(Register, Lscratch         , L5); // C1 uses
-REGISTER_DECLARATION(Register, Lscratch2        , L6); // C1 uses
-REGISTER_DECLARATION(Register, L7_scratch       , L7); // constant pool cache
-REGISTER_DECLARATION(Register, O5_savedSP       , O5);
-REGISTER_DECLARATION(Register, I5_savedSP       , I5); // Saved SP before bumping for locals.  This is simply
-                                                       // a copy SP, so in 64-bit it's a biased value.  The bias
-                                                       // is added and removed as needed in the frame code.
-// Interface to signature handler
-REGISTER_DECLARATION(Register, Llocals          , L7); // pointer to locals for signature handler
-REGISTER_DECLARATION(Register, Lmethod          , L6); // Method* when calling signature handler
-
-#else
-REGISTER_DECLARATION(Register, Lesp             , L0); // expression stack pointer
-REGISTER_DECLARATION(Register, Lbcp             , L1); // pointer to next bytecode
-REGISTER_DECLARATION(Register, Lmethod          , L2);
-REGISTER_DECLARATION(Register, Llocals          , L3);
-REGISTER_DECLARATION(Register, Largs            , L3); // pointer to locals for signature handler
-                                                       // must match Llocals in asm interpreter
-REGISTER_DECLARATION(Register, Lmonitors        , L4);
-REGISTER_DECLARATION(Register, Lbyte_code       , L5);
-// When calling out from the interpreter we record SP so that we can remove any extra stack
-// space allocated during adapter transitions. This register is only live from the point
-// of the call until we return.
-REGISTER_DECLARATION(Register, Llast_SP         , L5);
-REGISTER_DECLARATION(Register, Lscratch         , L5);
-REGISTER_DECLARATION(Register, Lscratch2        , L6);
-REGISTER_DECLARATION(Register, LcpoolCache      , L6); // constant pool cache
-
-REGISTER_DECLARATION(Register, O5_savedSP       , O5);
-REGISTER_DECLARATION(Register, I5_savedSP       , I5); // Saved SP before bumping for locals.  This is simply
-                                                       // a copy SP, so in 64-bit it's a biased value.  The bias
-                                                       // is added and removed as needed in the frame code.
-REGISTER_DECLARATION(Register, IdispatchTables  , I4); // Base address of the bytecode dispatch tables
-REGISTER_DECLARATION(Register, IdispatchAddress , I3); // Register which saves the dispatch address for each bytecode
-REGISTER_DECLARATION(Register, ImethodDataPtr   , I2); // Pointer to the current method data
-#endif /* CC_INTERP */
-
-// NOTE: Lscratch2 and LcpoolCache point to the same registers in
-//       the interpreter code. If Lscratch2 needs to be used for some
-//       purpose than LcpoolCache should be restore after that for
-//       the interpreter to work right
-// (These assignments must be compatible with L7_thread_cache; see above.)
-
-// Since Lbcp points into the middle of the method object,
-// it is temporarily converted into a "bcx" during GC.
-
-// Exception processing
-// These registers are passed into exception handlers.
-// All exception handlers require the exception object being thrown.
-// In addition, an nmethod's exception handler must be passed
-// the address of the call site within the nmethod, to allow
-// proper selection of the applicable catch block.
-// (Interpreter frames use their own bcp() for this purpose.)
-//
-// The Oissuing_pc value is not always needed.  When jumping to a
-// handler that is known to be interpreted, the Oissuing_pc value can be
-// omitted.  An actual catch block in compiled code receives (from its
-// nmethod's exception handler) the thrown exception in the Oexception,
-// but it doesn't need the Oissuing_pc.
-//
-// If an exception handler (either interpreted or compiled)
-// discovers there is no applicable catch block, it updates
-// the Oissuing_pc to the continuation PC of its own caller,
-// pops back to that caller's stack frame, and executes that
-// caller's exception handler.  Obviously, this process will
-// iterate until the control stack is popped back to a method
-// containing an applicable catch block.  A key invariant is
-// that the Oissuing_pc value is always a value local to
-// the method whose exception handler is currently executing.
-//
-// Note:  The issuing PC value is __not__ a raw return address (I7 value).
-// It is a "return pc", the address __following__ the call.
-// Raw return addresses are converted to issuing PCs by frame::pc(),
-// or by stubs.  Issuing PCs can be used directly with PC range tables.
-//
-REGISTER_DECLARATION(Register, Oexception  , O0); // exception being thrown
-REGISTER_DECLARATION(Register, Oissuing_pc , O1); // where the exception is coming from
-
-
-// These must occur after the declarations above
-#ifndef DONT_USE_REGISTER_DEFINES
-
-#define Gthread             AS_REGISTER(Register, Gthread)
-#define Gmethod             AS_REGISTER(Register, Gmethod)
-#define Gmegamorphic_method AS_REGISTER(Register, Gmegamorphic_method)
-#define Ginline_cache_reg   AS_REGISTER(Register, Ginline_cache_reg)
-#define Gargs               AS_REGISTER(Register, Gargs)
-#define Lthread_cache       AS_REGISTER(Register, Lthread_cache)
-#define Gframe_size         AS_REGISTER(Register, Gframe_size)
-#define Gtemp               AS_REGISTER(Register, Gtemp)
-
-#ifdef CC_INTERP
-#define Lstate              AS_REGISTER(Register, Lstate)
-#define Lesp                AS_REGISTER(Register, Lesp)
-#define L1_scratch          AS_REGISTER(Register, L1_scratch)
-#define Lmirror             AS_REGISTER(Register, Lmirror)
-#define L2_scratch          AS_REGISTER(Register, L2_scratch)
-#define L3_scratch          AS_REGISTER(Register, L3_scratch)
-#define L4_scratch          AS_REGISTER(Register, L4_scratch)
-#define Lscratch            AS_REGISTER(Register, Lscratch)
-#define Lscratch2           AS_REGISTER(Register, Lscratch2)
-#define L7_scratch          AS_REGISTER(Register, L7_scratch)
-#define Ostate              AS_REGISTER(Register, Ostate)
-#else
-#define Lesp                AS_REGISTER(Register, Lesp)
-#define Lbcp                AS_REGISTER(Register, Lbcp)
-#define Lmethod             AS_REGISTER(Register, Lmethod)
-#define Llocals             AS_REGISTER(Register, Llocals)
-#define Lmonitors           AS_REGISTER(Register, Lmonitors)
-#define Lbyte_code          AS_REGISTER(Register, Lbyte_code)
-#define Lscratch            AS_REGISTER(Register, Lscratch)
-#define Lscratch2           AS_REGISTER(Register, Lscratch2)
-#define LcpoolCache         AS_REGISTER(Register, LcpoolCache)
-#endif /* ! CC_INTERP */
-
-#define Lentry_args         AS_REGISTER(Register, Lentry_args)
-#define I5_savedSP          AS_REGISTER(Register, I5_savedSP)
-#define O5_savedSP          AS_REGISTER(Register, O5_savedSP)
-#define IdispatchAddress    AS_REGISTER(Register, IdispatchAddress)
-#define ImethodDataPtr      AS_REGISTER(Register, ImethodDataPtr)
-#define IdispatchTables     AS_REGISTER(Register, IdispatchTables)
-
-#define Oexception          AS_REGISTER(Register, Oexception)
-#define Oissuing_pc         AS_REGISTER(Register, Oissuing_pc)
-
-
-#endif
-
-// Address is an abstraction used to represent a memory location.
-//
-// Note: A register location is represented via a Register, not
-//       via an address for efficiency & simplicity reasons.
-
-class Address VALUE_OBJ_CLASS_SPEC {
- private:
-  Register           _base;           // Base register.
-  RegisterOrConstant _index_or_disp;  // Index register or constant displacement.
-  RelocationHolder   _rspec;
-
- public:
-  Address() : _base(noreg), _index_or_disp(noreg) {}
-
-  Address(Register base, RegisterOrConstant index_or_disp)
-    : _base(base),
-      _index_or_disp(index_or_disp) {
-  }
-
-  Address(Register base, Register index)
-    : _base(base),
-      _index_or_disp(index) {
-  }
-
-  Address(Register base, int disp)
-    : _base(base),
-      _index_or_disp(disp) {
-  }
-
-#ifdef ASSERT
-  // ByteSize is only a class when ASSERT is defined, otherwise it's an int.
-  Address(Register base, ByteSize disp)
-    : _base(base),
-      _index_or_disp(in_bytes(disp)) {
-  }
-#endif
-
-  // accessors
-  Register base()             const { return _base; }
-  Register index()            const { return _index_or_disp.as_register(); }
-  int      disp()             const { return _index_or_disp.as_constant(); }
-
-  bool     has_index()        const { return _index_or_disp.is_register(); }
-  bool     has_disp()         const { return _index_or_disp.is_constant(); }
-
-  bool     uses(Register reg) const { return base() == reg || (has_index() && index() == reg); }
-
-  const relocInfo::relocType rtype() { return _rspec.type(); }
-  const RelocationHolder&    rspec() { return _rspec; }
-
-  RelocationHolder rspec(int offset) const {
-    return offset == 0 ? _rspec : _rspec.plus(offset);
-  }
-
-  inline bool is_simm13(int offset = 0);  // check disp+offset for overflow
-
-  Address plus_disp(int plusdisp) const {     // bump disp by a small amount
-    assert(_index_or_disp.is_constant(), "must have a displacement");
-    Address a(base(), disp() + plusdisp);
-    return a;
-  }
-  bool is_same_address(Address a) const {
-    // disregard _rspec
-    return base() == a.base() && (has_index() ? index() == a.index() : disp() == a.disp());
-  }
-
-  Address after_save() const {
-    Address a = (*this);
-    a._base = a._base->after_save();
-    return a;
-  }
-
-  Address after_restore() const {
-    Address a = (*this);
-    a._base = a._base->after_restore();
-    return a;
-  }
-
-  // Convert the raw encoding form into the form expected by the
-  // constructor for Address.
-  static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc);
-
-  friend class Assembler;
-};
-
-
-class AddressLiteral VALUE_OBJ_CLASS_SPEC {
- private:
-  address          _address;
-  RelocationHolder _rspec;
-
-  RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) {
-    switch (rtype) {
-    case relocInfo::external_word_type:
-      return external_word_Relocation::spec(addr);
-    case relocInfo::internal_word_type:
-      return internal_word_Relocation::spec(addr);
-#ifdef _LP64
-    case relocInfo::opt_virtual_call_type:
-      return opt_virtual_call_Relocation::spec();
-    case relocInfo::static_call_type:
-      return static_call_Relocation::spec();
-    case relocInfo::runtime_call_type:
-      return runtime_call_Relocation::spec();
-#endif
-    case relocInfo::none:
-      return RelocationHolder();
-    default:
-      ShouldNotReachHere();
-      return RelocationHolder();
-    }
-  }
-
- protected:
-  // creation
-  AddressLiteral() : _address(NULL), _rspec(NULL) {}
-
- public:
-  AddressLiteral(address addr, RelocationHolder const& rspec)
-    : _address(addr),
-      _rspec(rspec) {}
-
-  // Some constructors to avoid casting at the call site.
-  AddressLiteral(jobject obj, RelocationHolder const& rspec)
-    : _address((address) obj),
-      _rspec(rspec) {}
-
-  AddressLiteral(intptr_t value, RelocationHolder const& rspec)
-    : _address((address) value),
-      _rspec(rspec) {}
-
-  AddressLiteral(address addr, relocInfo::relocType rtype = relocInfo::none)
-    : _address((address) addr),
-    _rspec(rspec_from_rtype(rtype, (address) addr)) {}
-
-  // Some constructors to avoid casting at the call site.
-  AddressLiteral(address* addr, relocInfo::relocType rtype = relocInfo::none)
-    : _address((address) addr),
-    _rspec(rspec_from_rtype(rtype, (address) addr)) {}
-
-  AddressLiteral(bool* addr, relocInfo::relocType rtype = relocInfo::none)
-    : _address((address) addr),
-      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
-
-  AddressLiteral(const bool* addr, relocInfo::relocType rtype = relocInfo::none)
-    : _address((address) addr),
-      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
-
-  AddressLiteral(signed char* addr, relocInfo::relocType rtype = relocInfo::none)
-    : _address((address) addr),
-      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
-
-  AddressLiteral(int* addr, relocInfo::relocType rtype = relocInfo::none)
-    : _address((address) addr),
-      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
-
-  AddressLiteral(intptr_t addr, relocInfo::relocType rtype = relocInfo::none)
-    : _address((address) addr),
-      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
-
-#ifdef _LP64
-  // 32-bit complains about a multiple declaration for int*.
-  AddressLiteral(intptr_t* addr, relocInfo::relocType rtype = relocInfo::none)
-    : _address((address) addr),
-      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
-#endif
-
-  AddressLiteral(Metadata* addr, relocInfo::relocType rtype = relocInfo::none)
-    : _address((address) addr),
-      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
-
-  AddressLiteral(Metadata** addr, relocInfo::relocType rtype = relocInfo::none)
-    : _address((address) addr),
-      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
-
-  AddressLiteral(float* addr, relocInfo::relocType rtype = relocInfo::none)
-    : _address((address) addr),
-      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
-
-  AddressLiteral(double* addr, relocInfo::relocType rtype = relocInfo::none)
-    : _address((address) addr),
-      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
-
-  intptr_t value() const { return (intptr_t) _address; }
-  int      low10() const;
-
-  const relocInfo::relocType rtype() const { return _rspec.type(); }
-  const RelocationHolder&    rspec() const { return _rspec; }
-
-  RelocationHolder rspec(int offset) const {
-    return offset == 0 ? _rspec : _rspec.plus(offset);
-  }
-};
-
-// Convenience classes
-class ExternalAddress: public AddressLiteral {
- private:
-  static relocInfo::relocType reloc_for_target(address target) {
-    // Sometimes ExternalAddress is used for values which aren't
-    // exactly addresses, like the card table base.
-    // external_word_type can't be used for values in the first page
-    // so just skip the reloc in that case.
-    return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none;
-  }
-
- public:
-  ExternalAddress(address target) : AddressLiteral(target, reloc_for_target(          target)) {}
-  ExternalAddress(Metadata** target) : AddressLiteral(target, reloc_for_target((address) target)) {}
-};
-
-inline Address RegisterImpl::address_in_saved_window() const {
-   return (Address(SP, (sp_offset_in_saved_window() * wordSize) + STACK_BIAS));
-}
-
-
-
-// Argument is an abstraction used to represent an outgoing
-// actual argument or an incoming formal parameter, whether
-// it resides in memory or in a register, in a manner consistent
-// with the SPARC Application Binary Interface, or ABI.  This is
-// often referred to as the native or C calling convention.
-
-class Argument VALUE_OBJ_CLASS_SPEC {
- private:
-  int _number;
-  bool _is_in;
-
- public:
-#ifdef _LP64
-  enum {
-    n_register_parameters = 6,          // only 6 registers may contain integer parameters
-    n_float_register_parameters = 16    // Can have up to 16 floating registers
-  };
-#else
-  enum {
-    n_register_parameters = 6           // only 6 registers may contain integer parameters
-  };
-#endif
-
-  // creation
-  Argument(int number, bool is_in) : _number(number), _is_in(is_in) {}
-
-  int  number() const  { return _number;  }
-  bool is_in()  const  { return _is_in;   }
-  bool is_out() const  { return !is_in(); }
-
-  Argument successor() const  { return Argument(number() + 1, is_in()); }
-  Argument as_in()     const  { return Argument(number(), true ); }
-  Argument as_out()    const  { return Argument(number(), false); }
-
-  // locating register-based arguments:
-  bool is_register() const { return _number < n_register_parameters; }
-
-#ifdef _LP64
-  // locating Floating Point register-based arguments:
-  bool is_float_register() const { return _number < n_float_register_parameters; }
-
-  FloatRegister as_float_register() const {
-    assert(is_float_register(), "must be a register argument");
-    return as_FloatRegister(( number() *2 ) + 1);
-  }
-  FloatRegister as_double_register() const {
-    assert(is_float_register(), "must be a register argument");
-    return as_FloatRegister(( number() *2 ));
-  }
-#endif
-
-  Register as_register() const {
-    assert(is_register(), "must be a register argument");
-    return is_in() ? as_iRegister(number()) : as_oRegister(number());
-  }
-
-  // locating memory-based arguments
-  Address as_address() const {
-    assert(!is_register(), "must be a memory argument");
-    return address_in_frame();
-  }
-
-  // When applied to a register-based argument, give the corresponding address
-  // into the 6-word area "into which callee may store register arguments"
-  // (This is a different place than the corresponding register-save area location.)
-  Address address_in_frame() const;
-
-  // debugging
-  const char* name() const;
-
-  friend class Assembler;
-};
-
+#include "asm/register.hpp"
 
 // The SPARC Assembler: Pure assembler doing NO optimizations on the instruction
 // level; i.e., what you write
 // is what you get. The Assembler is generating code into a CodeBuffer.
 
 class Assembler : public AbstractAssembler  {
- protected:
-
-  static void print_instruction(int inst);
-  static int  patched_branch(int dest_pos, int inst, int inst_pos);
-  static int  branch_destination(int inst, int pos);
-
-
   friend class AbstractAssembler;
   friend class AddressLiteral;
 
@@ -1216,8 +675,8 @@
     AbstractAssembler::flush();
   }
 
-  inline void emit_long(int);  // shadows AbstractAssembler::emit_long
-  inline void emit_data(int x) { emit_long(x); }
+  inline void emit_int32(int);  // shadows AbstractAssembler::emit_int32
+  inline void emit_data(int x) { emit_int32(x); }
   inline void emit_data(int, RelocationHolder const&);
   inline void emit_data(int, relocInfo::relocType rtype);
   // helper for above fcns
@@ -1230,17 +689,14 @@
   // pp 135 (addc was addx in v8)
 
   inline void add(Register s1, Register s2, Register d );
-  inline void add(Register s1, int simm13a, Register d, relocInfo::relocType rtype = relocInfo::none);
-  inline void add(Register s1, int simm13a, Register d, RelocationHolder const& rspec);
-  inline void add(Register s1, RegisterOrConstant s2, Register d, int offset = 0);
-  inline void add(const Address& a, Register d, int offset = 0);
+  inline void add(Register s1, int simm13a, Register d );
 
-  void addcc(  Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(add_op3  | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
-  void addcc(  Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(add_op3  | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void addc(   Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(addc_op3             ) | rs1(s1) | rs2(s2) ); }
-  void addc(   Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(addc_op3             ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void addccc( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(addc_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
-  void addccc( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(addc_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void addcc(  Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(add_op3  | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
+  void addcc(  Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(add_op3  | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void addc(   Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(addc_op3             ) | rs1(s1) | rs2(s2) ); }
+  void addc(   Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(addc_op3             ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void addccc( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(addc_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
+  void addccc( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(addc_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
 
 
   // pp 136
@@ -1293,76 +749,76 @@
   // at address s1 is swapped with the data in d. If the values are not equal,
   // the the contents of memory at s1 is loaded into d, without the swap.
 
-  void casa(  Register s1, Register s2, Register d, int ia = -1 ) { v9_only();  emit_long( op(ldst_op) | rd(d) | op3(casa_op3 ) | rs1(s1) | (ia == -1  ? immed(true) : imm_asi(ia)) | rs2(s2)); }
-  void casxa( Register s1, Register s2, Register d, int ia = -1 ) { v9_only();  emit_long( op(ldst_op) | rd(d) | op3(casxa_op3) | rs1(s1) | (ia == -1  ? immed(true) : imm_asi(ia)) | rs2(s2)); }
+  void casa(  Register s1, Register s2, Register d, int ia = -1 ) { v9_only();  emit_int32( op(ldst_op) | rd(d) | op3(casa_op3 ) | rs1(s1) | (ia == -1  ? immed(true) : imm_asi(ia)) | rs2(s2)); }
+  void casxa( Register s1, Register s2, Register d, int ia = -1 ) { v9_only();  emit_int32( op(ldst_op) | rd(d) | op3(casxa_op3) | rs1(s1) | (ia == -1  ? immed(true) : imm_asi(ia)) | rs2(s2)); }
 
   // pp 152
 
-  void udiv(   Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(udiv_op3             ) | rs1(s1) | rs2(s2)); }
-  void udiv(   Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(udiv_op3             ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void sdiv(   Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(sdiv_op3             ) | rs1(s1) | rs2(s2)); }
-  void sdiv(   Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(sdiv_op3             ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void udivcc( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(udiv_op3 | cc_bit_op3) | rs1(s1) | rs2(s2)); }
-  void udivcc( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(udiv_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void sdivcc( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(sdiv_op3 | cc_bit_op3) | rs1(s1) | rs2(s2)); }
-  void sdivcc( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(sdiv_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void udiv(   Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(udiv_op3             ) | rs1(s1) | rs2(s2)); }
+  void udiv(   Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(udiv_op3             ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void sdiv(   Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(sdiv_op3             ) | rs1(s1) | rs2(s2)); }
+  void sdiv(   Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(sdiv_op3             ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void udivcc( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(udiv_op3 | cc_bit_op3) | rs1(s1) | rs2(s2)); }
+  void udivcc( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(udiv_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void sdivcc( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(sdiv_op3 | cc_bit_op3) | rs1(s1) | rs2(s2)); }
+  void sdivcc( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(sdiv_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
 
   // pp 155
 
-  void done()  { v9_only();  cti();  emit_long( op(arith_op) | fcn(0) | op3(done_op3) ); }
-  void retry() { v9_only();  cti();  emit_long( op(arith_op) | fcn(1) | op3(retry_op3) ); }
+  void done()  { v9_only();  cti();  emit_int32( op(arith_op) | fcn(0) | op3(done_op3) ); }
+  void retry() { v9_only();  cti();  emit_int32( op(arith_op) | fcn(1) | op3(retry_op3) ); }
 
   // pp 156
 
-  void fadd( FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_long( op(arith_op) | fd(d, w) | op3(fpop1_op3) | fs1(s1, w) | opf(0x40 + w) | fs2(s2, w)); }
-  void fsub( FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_long( op(arith_op) | fd(d, w) | op3(fpop1_op3) | fs1(s1, w) | opf(0x44 + w) | fs2(s2, w)); }
+  void fadd( FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, w) | op3(fpop1_op3) | fs1(s1, w) | opf(0x40 + w) | fs2(s2, w)); }
+  void fsub( FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, w) | op3(fpop1_op3) | fs1(s1, w) | opf(0x44 + w) | fs2(s2, w)); }
 
   // pp 157
 
-  void fcmp(  FloatRegisterImpl::Width w, CC cc, FloatRegister s1, FloatRegister s2) { v8_no_cc(cc);  emit_long( op(arith_op) | cmpcc(cc) | op3(fpop2_op3) | fs1(s1, w) | opf(0x50 + w) | fs2(s2, w)); }
-  void fcmpe( FloatRegisterImpl::Width w, CC cc, FloatRegister s1, FloatRegister s2) { v8_no_cc(cc);  emit_long( op(arith_op) | cmpcc(cc) | op3(fpop2_op3) | fs1(s1, w) | opf(0x54 + w) | fs2(s2, w)); }
+  void fcmp(  FloatRegisterImpl::Width w, CC cc, FloatRegister s1, FloatRegister s2) { v8_no_cc(cc);  emit_int32( op(arith_op) | cmpcc(cc) | op3(fpop2_op3) | fs1(s1, w) | opf(0x50 + w) | fs2(s2, w)); }
+  void fcmpe( FloatRegisterImpl::Width w, CC cc, FloatRegister s1, FloatRegister s2) { v8_no_cc(cc);  emit_int32( op(arith_op) | cmpcc(cc) | op3(fpop2_op3) | fs1(s1, w) | opf(0x54 + w) | fs2(s2, w)); }
 
   // pp 159
 
-  void ftox( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { v9_only();  emit_long( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(fpop1_op3) | opf(0x80 + w) | fs2(s, w)); }
-  void ftoi( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) {             emit_long( op(arith_op) | fd(d, FloatRegisterImpl::S) | op3(fpop1_op3) | opf(0xd0 + w) | fs2(s, w)); }
+  void ftox( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { v9_only();  emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(fpop1_op3) | opf(0x80 + w) | fs2(s, w)); }
+  void ftoi( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) {             emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::S) | op3(fpop1_op3) | opf(0xd0 + w) | fs2(s, w)); }
 
   // pp 160
 
-  void ftof( FloatRegisterImpl::Width sw, FloatRegisterImpl::Width dw, FloatRegister s, FloatRegister d ) { emit_long( op(arith_op) | fd(d, dw) | op3(fpop1_op3) | opf(0xc0 + sw + dw*4) | fs2(s, sw)); }
+  void ftof( FloatRegisterImpl::Width sw, FloatRegisterImpl::Width dw, FloatRegister s, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, dw) | op3(fpop1_op3) | opf(0xc0 + sw + dw*4) | fs2(s, sw)); }
 
   // pp 161
 
-  void fxtof( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { v9_only();  emit_long( op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x80 + w*4) | fs2(s, FloatRegisterImpl::D)); }
-  void fitof( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) {             emit_long( op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0xc0 + w*4) | fs2(s, FloatRegisterImpl::S)); }
+  void fxtof( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { v9_only();  emit_int32( op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x80 + w*4) | fs2(s, FloatRegisterImpl::D)); }
+  void fitof( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) {             emit_int32( op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0xc0 + w*4) | fs2(s, FloatRegisterImpl::S)); }
 
   // pp 162
 
-  void fmov( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { v8_s_only(w);  emit_long( op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x00 + w) | fs2(s, w)); }
+  void fmov( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { v8_s_only(w);  emit_int32( op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x00 + w) | fs2(s, w)); }
 
-  void fneg( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { v8_s_only(w);  emit_long( op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x04 + w) | fs2(s, w)); }
+  void fneg( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { v8_s_only(w);  emit_int32( op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x04 + w) | fs2(s, w)); }
 
   // page 144 sparc v8 architecture (double prec works on v8 if the source and destination registers are the same). fnegs is the only instruction available
   // on v8 to do negation of single, double and quad precision floats.
 
-  void fneg( FloatRegisterImpl::Width w, FloatRegister sd ) { if (VM_Version::v9_instructions_work()) emit_long( op(arith_op) | fd(sd, w) | op3(fpop1_op3) | opf(0x04 + w) | fs2(sd, w)); else emit_long( op(arith_op) | fd(sd, w) | op3(fpop1_op3) |  opf(0x05) | fs2(sd, w)); }
+  void fneg( FloatRegisterImpl::Width w, FloatRegister sd ) { if (VM_Version::v9_instructions_work()) emit_int32( op(arith_op) | fd(sd, w) | op3(fpop1_op3) | opf(0x04 + w) | fs2(sd, w)); else emit_int32( op(arith_op) | fd(sd, w) | op3(fpop1_op3) |  opf(0x05) | fs2(sd, w)); }
 
-  void fabs( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { v8_s_only(w);  emit_long( op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x08 + w) | fs2(s, w)); }
+  void fabs( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { v8_s_only(w);  emit_int32( op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x08 + w) | fs2(s, w)); }
 
   // page 144 sparc v8 architecture (double prec works on v8 if the source and destination registers are the same). fabss is the only instruction available
   // on v8 to do abs operation on single/double/quad precision floats.
 
-  void fabs( FloatRegisterImpl::Width w, FloatRegister sd ) { if (VM_Version::v9_instructions_work()) emit_long( op(arith_op) | fd(sd, w) | op3(fpop1_op3) | opf(0x08 + w) | fs2(sd, w)); else emit_long( op(arith_op) | fd(sd, w) | op3(fpop1_op3) | opf(0x09) | fs2(sd, w)); }
+  void fabs( FloatRegisterImpl::Width w, FloatRegister sd ) { if (VM_Version::v9_instructions_work()) emit_int32( op(arith_op) | fd(sd, w) | op3(fpop1_op3) | opf(0x08 + w) | fs2(sd, w)); else emit_int32( op(arith_op) | fd(sd, w) | op3(fpop1_op3) | opf(0x09) | fs2(sd, w)); }
 
   // pp 163
 
-  void fmul( FloatRegisterImpl::Width w,                            FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_long( op(arith_op) | fd(d, w)  | op3(fpop1_op3) | fs1(s1, w)  | opf(0x48 + w)         | fs2(s2, w)); }
-  void fmul( FloatRegisterImpl::Width sw, FloatRegisterImpl::Width dw,  FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_long( op(arith_op) | fd(d, dw) | op3(fpop1_op3) | fs1(s1, sw) | opf(0x60 + sw + dw*4) | fs2(s2, sw)); }
-  void fdiv( FloatRegisterImpl::Width w,                            FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_long( op(arith_op) | fd(d, w)  | op3(fpop1_op3) | fs1(s1, w)  | opf(0x4c + w)         | fs2(s2, w)); }
+  void fmul( FloatRegisterImpl::Width w,                            FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, w)  | op3(fpop1_op3) | fs1(s1, w)  | opf(0x48 + w)         | fs2(s2, w)); }
+  void fmul( FloatRegisterImpl::Width sw, FloatRegisterImpl::Width dw,  FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, dw) | op3(fpop1_op3) | fs1(s1, sw) | opf(0x60 + sw + dw*4) | fs2(s2, sw)); }
+  void fdiv( FloatRegisterImpl::Width w,                            FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, w)  | op3(fpop1_op3) | fs1(s1, w)  | opf(0x4c + w)         | fs2(s2, w)); }
 
   // pp 164
 
-  void fsqrt( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { emit_long( op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x28 + w) | fs2(s, w)); }
+  void fsqrt( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x28 + w) | fs2(s, w)); }
 
   // pp 165
 
@@ -1371,22 +827,22 @@
 
   // pp 167
 
-  void flushw() { v9_only();  emit_long( op(arith_op) | op3(flushw_op3) ); }
+  void flushw() { v9_only();  emit_int32( op(arith_op) | op3(flushw_op3) ); }
 
   // pp 168
 
-  void illtrap( int const22a) { if (const22a != 0) v9_only();  emit_long( op(branch_op) | u_field(const22a, 21, 0) ); }
+  void illtrap( int const22a) { if (const22a != 0) v9_only();  emit_int32( op(branch_op) | u_field(const22a, 21, 0) ); }
   // v8 unimp == illtrap(0)
 
   // pp 169
 
-  void impdep1( int id1, int const19a ) { v9_only();  emit_long( op(arith_op) | fcn(id1) | op3(impdep1_op3) | u_field(const19a, 18, 0)); }
-  void impdep2( int id1, int const19a ) { v9_only();  emit_long( op(arith_op) | fcn(id1) | op3(impdep2_op3) | u_field(const19a, 18, 0)); }
+  void impdep1( int id1, int const19a ) { v9_only();  emit_int32( op(arith_op) | fcn(id1) | op3(impdep1_op3) | u_field(const19a, 18, 0)); }
+  void impdep2( int id1, int const19a ) { v9_only();  emit_int32( op(arith_op) | fcn(id1) | op3(impdep2_op3) | u_field(const19a, 18, 0)); }
 
   // pp 149 (v8)
 
-  void cpop1( int opc, int cr1, int cr2, int crd ) { v8_only();  emit_long( op(arith_op) | fcn(crd) | op3(impdep1_op3) | u_field(cr1, 18, 14) | opf(opc) | u_field(cr2, 4, 0)); }
-  void cpop2( int opc, int cr1, int cr2, int crd ) { v8_only();  emit_long( op(arith_op) | fcn(crd) | op3(impdep2_op3) | u_field(cr1, 18, 14) | opf(opc) | u_field(cr2, 4, 0)); }
+  void cpop1( int opc, int cr1, int cr2, int crd ) { v8_only();  emit_int32( op(arith_op) | fcn(crd) | op3(impdep1_op3) | u_field(cr1, 18, 14) | opf(opc) | u_field(cr2, 4, 0)); }
+  void cpop2( int opc, int cr1, int cr2, int crd ) { v8_only();  emit_int32( op(arith_op) | fcn(crd) | op3(impdep2_op3) | u_field(cr1, 18, 14) | opf(opc) | u_field(cr2, 4, 0)); }
 
   // pp 170
 
@@ -1395,12 +851,9 @@
 
   // 171
 
-  inline void ldf(FloatRegisterImpl::Width w, Register s1, RegisterOrConstant s2, FloatRegister d);
   inline void ldf(FloatRegisterImpl::Width w, Register s1, Register s2, FloatRegister d);
   inline void ldf(FloatRegisterImpl::Width w, Register s1, int simm13a, FloatRegister d, RelocationHolder const& rspec = RelocationHolder());
 
-  inline void ldf(FloatRegisterImpl::Width w, const Address& a, FloatRegister d, int offset = 0);
-
 
   inline void ldfsr(  Register s1, Register s2 );
   inline void ldfsr(  Register s1, int simm13a);
@@ -1419,8 +872,8 @@
 
   // 173
 
-  void ldfa(  FloatRegisterImpl::Width w, Register s1, Register s2, int ia, FloatRegister d ) { v9_only();  emit_long( op(ldst_op) | fd(d, w) | alt_op3(ldf_op3 | alt_bit_op3, w) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
-  void ldfa(  FloatRegisterImpl::Width w, Register s1, int simm13a,         FloatRegister d ) { v9_only();  emit_long( op(ldst_op) | fd(d, w) | alt_op3(ldf_op3 | alt_bit_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void ldfa(  FloatRegisterImpl::Width w, Register s1, Register s2, int ia, FloatRegister d ) { v9_only();  emit_int32( op(ldst_op) | fd(d, w) | alt_op3(ldf_op3 | alt_bit_op3, w) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+  void ldfa(  FloatRegisterImpl::Width w, Register s1, int simm13a,         FloatRegister d ) { v9_only();  emit_int32( op(ldst_op) | fd(d, w) | alt_op3(ldf_op3 | alt_bit_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
 
   // pp 175, lduw is ld on v8
 
@@ -1438,54 +891,27 @@
   inline void lduw(  Register s1, int simm13a, Register d);
   inline void ldx(   Register s1, Register s2, Register d );
   inline void ldx(   Register s1, int simm13a, Register d);
-  inline void ld(    Register s1, Register s2, Register d );
-  inline void ld(    Register s1, int simm13a, Register d);
   inline void ldd(   Register s1, Register s2, Register d );
   inline void ldd(   Register s1, int simm13a, Register d);
 
-#ifdef ASSERT
-  // ByteSize is only a class when ASSERT is defined, otherwise it's an int.
-  inline void ld(    Register s1, ByteSize simm13a, Register d);
-#endif
-
-  inline void ldsb(const Address& a, Register d, int offset = 0);
-  inline void ldsh(const Address& a, Register d, int offset = 0);
-  inline void ldsw(const Address& a, Register d, int offset = 0);
-  inline void ldub(const Address& a, Register d, int offset = 0);
-  inline void lduh(const Address& a, Register d, int offset = 0);
-  inline void lduw(const Address& a, Register d, int offset = 0);
-  inline void ldx( const Address& a, Register d, int offset = 0);
-  inline void ld(  const Address& a, Register d, int offset = 0);
-  inline void ldd( const Address& a, Register d, int offset = 0);
-
-  inline void ldub(  Register s1, RegisterOrConstant s2, Register d );
-  inline void ldsb(  Register s1, RegisterOrConstant s2, Register d );
-  inline void lduh(  Register s1, RegisterOrConstant s2, Register d );
-  inline void ldsh(  Register s1, RegisterOrConstant s2, Register d );
-  inline void lduw(  Register s1, RegisterOrConstant s2, Register d );
-  inline void ldsw(  Register s1, RegisterOrConstant s2, Register d );
-  inline void ldx(   Register s1, RegisterOrConstant s2, Register d );
-  inline void ld(    Register s1, RegisterOrConstant s2, Register d );
-  inline void ldd(   Register s1, RegisterOrConstant s2, Register d );
-
   // pp 177
 
-  void ldsba(  Register s1, Register s2, int ia, Register d ) {             emit_long( op(ldst_op) | rd(d) | op3(ldsb_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
-  void ldsba(  Register s1, int simm13a,         Register d ) {             emit_long( op(ldst_op) | rd(d) | op3(ldsb_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void ldsha(  Register s1, Register s2, int ia, Register d ) {             emit_long( op(ldst_op) | rd(d) | op3(ldsh_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
-  void ldsha(  Register s1, int simm13a,         Register d ) {             emit_long( op(ldst_op) | rd(d) | op3(ldsh_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void ldswa(  Register s1, Register s2, int ia, Register d ) { v9_only();  emit_long( op(ldst_op) | rd(d) | op3(ldsw_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
-  void ldswa(  Register s1, int simm13a,         Register d ) { v9_only();  emit_long( op(ldst_op) | rd(d) | op3(ldsw_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void lduba(  Register s1, Register s2, int ia, Register d ) {             emit_long( op(ldst_op) | rd(d) | op3(ldub_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
-  void lduba(  Register s1, int simm13a,         Register d ) {             emit_long( op(ldst_op) | rd(d) | op3(ldub_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void lduha(  Register s1, Register s2, int ia, Register d ) {             emit_long( op(ldst_op) | rd(d) | op3(lduh_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
-  void lduha(  Register s1, int simm13a,         Register d ) {             emit_long( op(ldst_op) | rd(d) | op3(lduh_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void lduwa(  Register s1, Register s2, int ia, Register d ) {             emit_long( op(ldst_op) | rd(d) | op3(lduw_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
-  void lduwa(  Register s1, int simm13a,         Register d ) {             emit_long( op(ldst_op) | rd(d) | op3(lduw_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void ldxa(   Register s1, Register s2, int ia, Register d ) { v9_only();  emit_long( op(ldst_op) | rd(d) | op3(ldx_op3  | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
-  void ldxa(   Register s1, int simm13a,         Register d ) { v9_only();  emit_long( op(ldst_op) | rd(d) | op3(ldx_op3  | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void ldda(   Register s1, Register s2, int ia, Register d ) { v9_dep();   emit_long( op(ldst_op) | rd(d) | op3(ldd_op3  | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
-  void ldda(   Register s1, int simm13a,         Register d ) { v9_dep();   emit_long( op(ldst_op) | rd(d) | op3(ldd_op3  | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void ldsba(  Register s1, Register s2, int ia, Register d ) {             emit_int32( op(ldst_op) | rd(d) | op3(ldsb_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+  void ldsba(  Register s1, int simm13a,         Register d ) {             emit_int32( op(ldst_op) | rd(d) | op3(ldsb_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void ldsha(  Register s1, Register s2, int ia, Register d ) {             emit_int32( op(ldst_op) | rd(d) | op3(ldsh_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+  void ldsha(  Register s1, int simm13a,         Register d ) {             emit_int32( op(ldst_op) | rd(d) | op3(ldsh_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void ldswa(  Register s1, Register s2, int ia, Register d ) { v9_only();  emit_int32( op(ldst_op) | rd(d) | op3(ldsw_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+  void ldswa(  Register s1, int simm13a,         Register d ) { v9_only();  emit_int32( op(ldst_op) | rd(d) | op3(ldsw_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void lduba(  Register s1, Register s2, int ia, Register d ) {             emit_int32( op(ldst_op) | rd(d) | op3(ldub_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+  void lduba(  Register s1, int simm13a,         Register d ) {             emit_int32( op(ldst_op) | rd(d) | op3(ldub_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void lduha(  Register s1, Register s2, int ia, Register d ) {             emit_int32( op(ldst_op) | rd(d) | op3(lduh_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+  void lduha(  Register s1, int simm13a,         Register d ) {             emit_int32( op(ldst_op) | rd(d) | op3(lduh_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void lduwa(  Register s1, Register s2, int ia, Register d ) {             emit_int32( op(ldst_op) | rd(d) | op3(lduw_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+  void lduwa(  Register s1, int simm13a,         Register d ) {             emit_int32( op(ldst_op) | rd(d) | op3(lduw_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void ldxa(   Register s1, Register s2, int ia, Register d ) { v9_only();  emit_int32( op(ldst_op) | rd(d) | op3(ldx_op3  | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+  void ldxa(   Register s1, int simm13a,         Register d ) { v9_only();  emit_int32( op(ldst_op) | rd(d) | op3(ldx_op3  | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void ldda(   Register s1, Register s2, int ia, Register d ) { v9_dep();   emit_int32( op(ldst_op) | rd(d) | op3(ldd_op3  | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+  void ldda(   Register s1, int simm13a,         Register d ) { v9_dep();   emit_int32( op(ldst_op) | rd(d) | op3(ldd_op3  | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
 
   // pp 179
 
@@ -1494,113 +920,111 @@
 
   // pp 180
 
-  void ldstuba( Register s1, Register s2, int ia, Register d ) { emit_long( op(ldst_op) | rd(d) | op3(ldstub_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
-  void ldstuba( Register s1, int simm13a,         Register d ) { emit_long( op(ldst_op) | rd(d) | op3(ldstub_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void ldstuba( Register s1, Register s2, int ia, Register d ) { emit_int32( op(ldst_op) | rd(d) | op3(ldstub_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+  void ldstuba( Register s1, int simm13a,         Register d ) { emit_int32( op(ldst_op) | rd(d) | op3(ldstub_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
 
   // pp 181
 
-  void and3(    Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(and_op3              ) | rs1(s1) | rs2(s2) ); }
-  void and3(    Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(and_op3              ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void andcc(   Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(and_op3  | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
-  void andcc(   Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(and_op3  | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void andn(    Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(andn_op3             ) | rs1(s1) | rs2(s2) ); }
-  void andn(    Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(andn_op3             ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void andn(    Register s1, RegisterOrConstant s2, Register d);
-  void andncc(  Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(andn_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
-  void andncc(  Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(andn_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void or3(     Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(or_op3               ) | rs1(s1) | rs2(s2) ); }
-  void or3(     Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(or_op3               ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void orcc(    Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(or_op3   | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
-  void orcc(    Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(or_op3   | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void orn(     Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(orn_op3) | rs1(s1) | rs2(s2) ); }
-  void orn(     Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(orn_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void orncc(   Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(orn_op3  | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
-  void orncc(   Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(orn_op3  | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void xor3(    Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(xor_op3              ) | rs1(s1) | rs2(s2) ); }
-  void xor3(    Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(xor_op3              ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void xorcc(   Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(xor_op3  | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
-  void xorcc(   Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(xor_op3  | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void xnor(    Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(xnor_op3             ) | rs1(s1) | rs2(s2) ); }
-  void xnor(    Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(xnor_op3             ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void xnorcc(  Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(xnor_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
-  void xnorcc(  Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(xnor_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void and3(    Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(and_op3              ) | rs1(s1) | rs2(s2) ); }
+  void and3(    Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(and_op3              ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void andcc(   Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(and_op3  | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
+  void andcc(   Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(and_op3  | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void andn(    Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(andn_op3             ) | rs1(s1) | rs2(s2) ); }
+  void andn(    Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(andn_op3             ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void andncc(  Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(andn_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
+  void andncc(  Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(andn_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void or3(     Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(or_op3               ) | rs1(s1) | rs2(s2) ); }
+  void or3(     Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(or_op3               ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void orcc(    Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(or_op3   | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
+  void orcc(    Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(or_op3   | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void orn(     Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(orn_op3) | rs1(s1) | rs2(s2) ); }
+  void orn(     Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(orn_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void orncc(   Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(orn_op3  | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
+  void orncc(   Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(orn_op3  | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void xor3(    Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(xor_op3              ) | rs1(s1) | rs2(s2) ); }
+  void xor3(    Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(xor_op3              ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void xorcc(   Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(xor_op3  | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
+  void xorcc(   Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(xor_op3  | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void xnor(    Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(xnor_op3             ) | rs1(s1) | rs2(s2) ); }
+  void xnor(    Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(xnor_op3             ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void xnorcc(  Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(xnor_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
+  void xnorcc(  Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(xnor_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
 
   // pp 183
 
-  void membar( Membar_mask_bits const7a ) { v9_only(); emit_long( op(arith_op) | op3(membar_op3) | rs1(O7) | immed(true) | u_field( int(const7a), 6, 0)); }
+  void membar( Membar_mask_bits const7a ) { v9_only(); emit_int32( op(arith_op) | op3(membar_op3) | rs1(O7) | immed(true) | u_field( int(const7a), 6, 0)); }
 
   // pp 185
 
-  void fmov( FloatRegisterImpl::Width w, Condition c,  bool floatCC, CC cca, FloatRegister s2, FloatRegister d ) { v9_only();  emit_long( op(arith_op) | fd(d, w) | op3(fpop2_op3) | cond_mov(c) | opf_cc(cca, floatCC) | opf_low6(w) | fs2(s2, w)); }
+  void fmov( FloatRegisterImpl::Width w, Condition c,  bool floatCC, CC cca, FloatRegister s2, FloatRegister d ) { v9_only();  emit_int32( op(arith_op) | fd(d, w) | op3(fpop2_op3) | cond_mov(c) | opf_cc(cca, floatCC) | opf_low6(w) | fs2(s2, w)); }
 
   // pp 189
 
-  void fmov( FloatRegisterImpl::Width w, RCondition c, Register s1,  FloatRegister s2, FloatRegister d ) { v9_only();  emit_long( op(arith_op) | fd(d, w) | op3(fpop2_op3) | rs1(s1) | rcond(c) | opf_low5(4 + w) | fs2(s2, w)); }
+  void fmov( FloatRegisterImpl::Width w, RCondition c, Register s1,  FloatRegister s2, FloatRegister d ) { v9_only();  emit_int32( op(arith_op) | fd(d, w) | op3(fpop2_op3) | rs1(s1) | rcond(c) | opf_low5(4 + w) | fs2(s2, w)); }
 
   // pp 191
 
-  void movcc( Condition c, bool floatCC, CC cca, Register s2, Register d ) { v9_only();  emit_long( op(arith_op) | rd(d) | op3(movcc_op3) | mov_cc(cca, floatCC) | cond_mov(c) | rs2(s2) ); }
-  void movcc( Condition c, bool floatCC, CC cca, int simm11a, Register d ) { v9_only();  emit_long( op(arith_op) | rd(d) | op3(movcc_op3) | mov_cc(cca, floatCC) | cond_mov(c) | immed(true) | simm(simm11a, 11) ); }
+  void movcc( Condition c, bool floatCC, CC cca, Register s2, Register d ) { v9_only();  emit_int32( op(arith_op) | rd(d) | op3(movcc_op3) | mov_cc(cca, floatCC) | cond_mov(c) | rs2(s2) ); }
+  void movcc( Condition c, bool floatCC, CC cca, int simm11a, Register d ) { v9_only();  emit_int32( op(arith_op) | rd(d) | op3(movcc_op3) | mov_cc(cca, floatCC) | cond_mov(c) | immed(true) | simm(simm11a, 11) ); }
 
   // pp 195
 
-  void movr( RCondition c, Register s1, Register s2,  Register d ) { v9_only();  emit_long( op(arith_op) | rd(d) | op3(movr_op3) | rs1(s1) | rcond(c) | rs2(s2) ); }
-  void movr( RCondition c, Register s1, int simm10a,  Register d ) { v9_only();  emit_long( op(arith_op) | rd(d) | op3(movr_op3) | rs1(s1) | rcond(c) | immed(true) | simm(simm10a, 10) ); }
+  void movr( RCondition c, Register s1, Register s2,  Register d ) { v9_only();  emit_int32( op(arith_op) | rd(d) | op3(movr_op3) | rs1(s1) | rcond(c) | rs2(s2) ); }
+  void movr( RCondition c, Register s1, int simm10a,  Register d ) { v9_only();  emit_int32( op(arith_op) | rd(d) | op3(movr_op3) | rs1(s1) | rcond(c) | immed(true) | simm(simm10a, 10) ); }
 
   // pp 196
 
-  void mulx(  Register s1, Register s2, Register d ) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(mulx_op3 ) | rs1(s1) | rs2(s2) ); }
-  void mulx(  Register s1, int simm13a, Register d ) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(mulx_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void sdivx( Register s1, Register s2, Register d ) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(sdivx_op3) | rs1(s1) | rs2(s2) ); }
-  void sdivx( Register s1, int simm13a, Register d ) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(sdivx_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void udivx( Register s1, Register s2, Register d ) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(udivx_op3) | rs1(s1) | rs2(s2) ); }
-  void udivx( Register s1, int simm13a, Register d ) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(udivx_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void mulx(  Register s1, Register s2, Register d ) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(mulx_op3 ) | rs1(s1) | rs2(s2) ); }
+  void mulx(  Register s1, int simm13a, Register d ) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(mulx_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void sdivx( Register s1, Register s2, Register d ) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(sdivx_op3) | rs1(s1) | rs2(s2) ); }
+  void sdivx( Register s1, int simm13a, Register d ) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(sdivx_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void udivx( Register s1, Register s2, Register d ) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(udivx_op3) | rs1(s1) | rs2(s2) ); }
+  void udivx( Register s1, int simm13a, Register d ) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(udivx_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
 
   // pp 197
 
-  void umul(   Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(umul_op3             ) | rs1(s1) | rs2(s2) ); }
-  void umul(   Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(umul_op3             ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void smul(   Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(smul_op3             ) | rs1(s1) | rs2(s2) ); }
-  void smul(   Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(smul_op3             ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void umulcc( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(umul_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
-  void umulcc( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(umul_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void smulcc( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(smul_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
-  void smulcc( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(smul_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void umul(   Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(umul_op3             ) | rs1(s1) | rs2(s2) ); }
+  void umul(   Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(umul_op3             ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void smul(   Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(smul_op3             ) | rs1(s1) | rs2(s2) ); }
+  void smul(   Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(smul_op3             ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void umulcc( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(umul_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
+  void umulcc( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(umul_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void smulcc( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(smul_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
+  void smulcc( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(smul_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
 
   // pp 199
 
-  void mulscc(   Register s1, Register s2, Register d ) { v9_dep();  emit_long( op(arith_op) | rd(d) | op3(mulscc_op3) | rs1(s1) | rs2(s2) ); }
-  void mulscc(   Register s1, int simm13a, Register d ) { v9_dep();  emit_long( op(arith_op) | rd(d) | op3(mulscc_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void mulscc(   Register s1, Register s2, Register d ) { v9_dep();  emit_int32( op(arith_op) | rd(d) | op3(mulscc_op3) | rs1(s1) | rs2(s2) ); }
+  void mulscc(   Register s1, int simm13a, Register d ) { v9_dep();  emit_int32( op(arith_op) | rd(d) | op3(mulscc_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
 
   // pp 201
 
-  void nop() { emit_long( op(branch_op) | op2(sethi_op2) ); }
+  void nop() { emit_int32( op(branch_op) | op2(sethi_op2) ); }
 
 
   // pp 202
 
-  void popc( Register s,  Register d) { v9_only();  emit_long( op(arith_op) | rd(d) | op3(popc_op3) | rs2(s)); }
-  void popc( int simm13a, Register d) { v9_only();  emit_long( op(arith_op) | rd(d) | op3(popc_op3) | immed(true) | simm(simm13a, 13)); }
+  void popc( Register s,  Register d) { v9_only();  emit_int32( op(arith_op) | rd(d) | op3(popc_op3) | rs2(s)); }
+  void popc( int simm13a, Register d) { v9_only();  emit_int32( op(arith_op) | rd(d) | op3(popc_op3) | immed(true) | simm(simm13a, 13)); }
 
   // pp 203
 
-  void prefetch(   Register s1, Register s2,         PrefetchFcn f);
-  void prefetch(   Register s1, int simm13a,         PrefetchFcn f);
-  void prefetcha(  Register s1, Register s2, int ia, PrefetchFcn f ) { v9_only();  emit_long( op(ldst_op) | fcn(f) | op3(prefetch_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
-  void prefetcha(  Register s1, int simm13a,         PrefetchFcn f ) { v9_only();  emit_long( op(ldst_op) | fcn(f) | op3(prefetch_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void prefetch(   Register s1, Register s2, PrefetchFcn f) { v9_only();  emit_int32( op(ldst_op) | fcn(f) | op3(prefetch_op3) | rs1(s1) | rs2(s2) ); }
+  void prefetch(   Register s1, int simm13a, PrefetchFcn f) { v9_only();  emit_data( op(ldst_op) | fcn(f) | op3(prefetch_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
 
-  inline void prefetch(const Address& a, PrefetchFcn F, int offset = 0);
+  void prefetcha(  Register s1, Register s2, int ia, PrefetchFcn f ) { v9_only();  emit_int32( op(ldst_op) | fcn(f) | op3(prefetch_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+  void prefetcha(  Register s1, int simm13a,         PrefetchFcn f ) { v9_only();  emit_int32( op(ldst_op) | fcn(f) | op3(prefetch_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
 
   // pp 208
 
   // not implementing read privileged register
 
-  inline void rdy(    Register d) { v9_dep();  emit_long( op(arith_op) | rd(d) | op3(rdreg_op3) | u_field(0, 18, 14)); }
-  inline void rdccr(  Register d) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(rdreg_op3) | u_field(2, 18, 14)); }
-  inline void rdasi(  Register d) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(rdreg_op3) | u_field(3, 18, 14)); }
-  inline void rdtick( Register d) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(rdreg_op3) | u_field(4, 18, 14)); } // Spoon!
-  inline void rdpc(   Register d) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(rdreg_op3) | u_field(5, 18, 14)); }
-  inline void rdfprs( Register d) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(rdreg_op3) | u_field(6, 18, 14)); }
+  inline void rdy(    Register d) { v9_dep();  emit_int32( op(arith_op) | rd(d) | op3(rdreg_op3) | u_field(0, 18, 14)); }
+  inline void rdccr(  Register d) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(rdreg_op3) | u_field(2, 18, 14)); }
+  inline void rdasi(  Register d) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(rdreg_op3) | u_field(3, 18, 14)); }
+  inline void rdtick( Register d) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(rdreg_op3) | u_field(4, 18, 14)); } // Spoon!
+  inline void rdpc(   Register d) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(rdreg_op3) | u_field(5, 18, 14)); }
+  inline void rdfprs( Register d) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(rdreg_op3) | u_field(6, 18, 14)); }
 
   // pp 213
 
@@ -1609,54 +1033,52 @@
 
   // pp 214
 
-  void save(    Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | rs2(s2) ); }
+  void save(    Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | rs2(s2) ); }
   void save(    Register s1, int simm13a, Register d ) {
     // make sure frame is at least large enough for the register save area
     assert(-simm13a >= 16 * wordSize, "frame too small");
-    emit_long( op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) );
+    emit_int32( op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) );
   }
 
-  void restore( Register s1 = G0,  Register s2 = G0, Register d = G0 ) { emit_long( op(arith_op) | rd(d) | op3(restore_op3) | rs1(s1) | rs2(s2) ); }
-  void restore( Register s1,       int simm13a,      Register d      ) { emit_long( op(arith_op) | rd(d) | op3(restore_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void restore( Register s1 = G0,  Register s2 = G0, Register d = G0 ) { emit_int32( op(arith_op) | rd(d) | op3(restore_op3) | rs1(s1) | rs2(s2) ); }
+  void restore( Register s1,       int simm13a,      Register d      ) { emit_int32( op(arith_op) | rd(d) | op3(restore_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
 
   // pp 216
 
-  void saved()    { v9_only();  emit_long( op(arith_op) | fcn(0) | op3(saved_op3)); }
-  void restored() { v9_only();  emit_long( op(arith_op) | fcn(1) | op3(saved_op3)); }
+  void saved()    { v9_only();  emit_int32( op(arith_op) | fcn(0) | op3(saved_op3)); }
+  void restored() { v9_only();  emit_int32( op(arith_op) | fcn(1) | op3(saved_op3)); }
 
   // pp 217
 
   inline void sethi( int imm22a, Register d, RelocationHolder const& rspec = RelocationHolder() );
   // pp 218
 
-  void sll(  Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(sll_op3) | rs1(s1) | sx(0) | rs2(s2) ); }
-  void sll(  Register s1, int imm5a,   Register d ) { emit_long( op(arith_op) | rd(d) | op3(sll_op3) | rs1(s1) | sx(0) | immed(true) | u_field(imm5a, 4, 0) ); }
-  void srl(  Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(srl_op3) | rs1(s1) | sx(0) | rs2(s2) ); }
-  void srl(  Register s1, int imm5a,   Register d ) { emit_long( op(arith_op) | rd(d) | op3(srl_op3) | rs1(s1) | sx(0) | immed(true) | u_field(imm5a, 4, 0) ); }
-  void sra(  Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(sra_op3) | rs1(s1) | sx(0) | rs2(s2) ); }
-  void sra(  Register s1, int imm5a,   Register d ) { emit_long( op(arith_op) | rd(d) | op3(sra_op3) | rs1(s1) | sx(0) | immed(true) | u_field(imm5a, 4, 0) ); }
+  void sll(  Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(sll_op3) | rs1(s1) | sx(0) | rs2(s2) ); }
+  void sll(  Register s1, int imm5a,   Register d ) { emit_int32( op(arith_op) | rd(d) | op3(sll_op3) | rs1(s1) | sx(0) | immed(true) | u_field(imm5a, 4, 0) ); }
+  void srl(  Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(srl_op3) | rs1(s1) | sx(0) | rs2(s2) ); }
+  void srl(  Register s1, int imm5a,   Register d ) { emit_int32( op(arith_op) | rd(d) | op3(srl_op3) | rs1(s1) | sx(0) | immed(true) | u_field(imm5a, 4, 0) ); }
+  void sra(  Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(sra_op3) | rs1(s1) | sx(0) | rs2(s2) ); }
+  void sra(  Register s1, int imm5a,   Register d ) { emit_int32( op(arith_op) | rd(d) | op3(sra_op3) | rs1(s1) | sx(0) | immed(true) | u_field(imm5a, 4, 0) ); }
 
-  void sllx( Register s1, Register s2, Register d ) { v9_only();  emit_long( op(arith_op) | rd(d) | op3(sll_op3) | rs1(s1) | sx(1) | rs2(s2) ); }
-  void sllx( Register s1, int imm6a,   Register d ) { v9_only();  emit_long( op(arith_op) | rd(d) | op3(sll_op3) | rs1(s1) | sx(1) | immed(true) | u_field(imm6a, 5, 0) ); }
-  void srlx( Register s1, Register s2, Register d ) { v9_only();  emit_long( op(arith_op) | rd(d) | op3(srl_op3) | rs1(s1) | sx(1) | rs2(s2) ); }
-  void srlx( Register s1, int imm6a,   Register d ) { v9_only();  emit_long( op(arith_op) | rd(d) | op3(srl_op3) | rs1(s1) | sx(1) | immed(true) | u_field(imm6a, 5, 0) ); }
-  void srax( Register s1, Register s2, Register d ) { v9_only();  emit_long( op(arith_op) | rd(d) | op3(sra_op3) | rs1(s1) | sx(1) | rs2(s2) ); }
-  void srax( Register s1, int imm6a,   Register d ) { v9_only();  emit_long( op(arith_op) | rd(d) | op3(sra_op3) | rs1(s1) | sx(1) | immed(true) | u_field(imm6a, 5, 0) ); }
+  void sllx( Register s1, Register s2, Register d ) { v9_only();  emit_int32( op(arith_op) | rd(d) | op3(sll_op3) | rs1(s1) | sx(1) | rs2(s2) ); }
+  void sllx( Register s1, int imm6a,   Register d ) { v9_only();  emit_int32( op(arith_op) | rd(d) | op3(sll_op3) | rs1(s1) | sx(1) | immed(true) | u_field(imm6a, 5, 0) ); }
+  void srlx( Register s1, Register s2, Register d ) { v9_only();  emit_int32( op(arith_op) | rd(d) | op3(srl_op3) | rs1(s1) | sx(1) | rs2(s2) ); }
+  void srlx( Register s1, int imm6a,   Register d ) { v9_only();  emit_int32( op(arith_op) | rd(d) | op3(srl_op3) | rs1(s1) | sx(1) | immed(true) | u_field(imm6a, 5, 0) ); }
+  void srax( Register s1, Register s2, Register d ) { v9_only();  emit_int32( op(arith_op) | rd(d) | op3(sra_op3) | rs1(s1) | sx(1) | rs2(s2) ); }
+  void srax( Register s1, int imm6a,   Register d ) { v9_only();  emit_int32( op(arith_op) | rd(d) | op3(sra_op3) | rs1(s1) | sx(1) | immed(true) | u_field(imm6a, 5, 0) ); }
 
   // pp 220
 
-  void sir( int simm13a ) { emit_long( op(arith_op) | fcn(15) | op3(sir_op3) | immed(true) | simm(simm13a, 13)); }
+  void sir( int simm13a ) { emit_int32( op(arith_op) | fcn(15) | op3(sir_op3) | immed(true) | simm(simm13a, 13)); }
 
   // pp 221
 
-  void stbar() { emit_long( op(arith_op) | op3(membar_op3) | u_field(15, 18, 14)); }
+  void stbar() { emit_int32( op(arith_op) | op3(membar_op3) | u_field(15, 18, 14)); }
 
   // pp 222
 
-  inline void stf(    FloatRegisterImpl::Width w, FloatRegister d, Register s1, RegisterOrConstant s2);
   inline void stf(    FloatRegisterImpl::Width w, FloatRegister d, Register s1, Register s2);
   inline void stf(    FloatRegisterImpl::Width w, FloatRegister d, Register s1, int simm13a);
-  inline void stf(    FloatRegisterImpl::Width w, FloatRegister d, const Address& a, int offset = 0);
 
   inline void stfsr(  Register s1, Register s2 );
   inline void stfsr(  Register s1, int simm13a);
@@ -1665,8 +1087,8 @@
 
   //  pp 224
 
-  void stfa(  FloatRegisterImpl::Width w, FloatRegister d, Register s1, Register s2, int ia ) { v9_only();  emit_long( op(ldst_op) | fd(d, w) | alt_op3(stf_op3 | alt_bit_op3, w) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
-  void stfa(  FloatRegisterImpl::Width w, FloatRegister d, Register s1, int simm13a         ) { v9_only();  emit_long( op(ldst_op) | fd(d, w) | alt_op3(stf_op3 | alt_bit_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void stfa(  FloatRegisterImpl::Width w, FloatRegister d, Register s1, Register s2, int ia ) { v9_only();  emit_int32( op(ldst_op) | fd(d, w) | alt_op3(stf_op3 | alt_bit_op3, w) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+  void stfa(  FloatRegisterImpl::Width w, FloatRegister d, Register s1, int simm13a         ) { v9_only();  emit_int32( op(ldst_op) | fd(d, w) | alt_op3(stf_op3 | alt_bit_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
 
   // p 226
 
@@ -1676,44 +1098,23 @@
   inline void sth(  Register d, Register s1, int simm13a);
   inline void stw(  Register d, Register s1, Register s2 );
   inline void stw(  Register d, Register s1, int simm13a);
-  inline void st(   Register d, Register s1, Register s2 );
-  inline void st(   Register d, Register s1, int simm13a);
   inline void stx(  Register d, Register s1, Register s2 );
   inline void stx(  Register d, Register s1, int simm13a);
   inline void std(  Register d, Register s1, Register s2 );
   inline void std(  Register d, Register s1, int simm13a);
 
-#ifdef ASSERT
-  // ByteSize is only a class when ASSERT is defined, otherwise it's an int.
-  inline void st(   Register d, Register s1, ByteSize simm13a);
-#endif
-
-  inline void stb(  Register d, const Address& a, int offset = 0 );
-  inline void sth(  Register d, const Address& a, int offset = 0 );
-  inline void stw(  Register d, const Address& a, int offset = 0 );
-  inline void stx(  Register d, const Address& a, int offset = 0 );
-  inline void st(   Register d, const Address& a, int offset = 0 );
-  inline void std(  Register d, const Address& a, int offset = 0 );
-
-  inline void stb(  Register d, Register s1, RegisterOrConstant s2 );
-  inline void sth(  Register d, Register s1, RegisterOrConstant s2 );
-  inline void stw(  Register d, Register s1, RegisterOrConstant s2 );
-  inline void stx(  Register d, Register s1, RegisterOrConstant s2 );
-  inline void std(  Register d, Register s1, RegisterOrConstant s2 );
-  inline void st(   Register d, Register s1, RegisterOrConstant s2 );
-
   // pp 177
 
-  void stba(  Register d, Register s1, Register s2, int ia ) {             emit_long( op(ldst_op) | rd(d) | op3(stb_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
-  void stba(  Register d, Register s1, int simm13a         ) {             emit_long( op(ldst_op) | rd(d) | op3(stb_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void stha(  Register d, Register s1, Register s2, int ia ) {             emit_long( op(ldst_op) | rd(d) | op3(sth_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
-  void stha(  Register d, Register s1, int simm13a         ) {             emit_long( op(ldst_op) | rd(d) | op3(sth_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void stwa(  Register d, Register s1, Register s2, int ia ) {             emit_long( op(ldst_op) | rd(d) | op3(stw_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
-  void stwa(  Register d, Register s1, int simm13a         ) {             emit_long( op(ldst_op) | rd(d) | op3(stw_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void stxa(  Register d, Register s1, Register s2, int ia ) { v9_only();  emit_long( op(ldst_op) | rd(d) | op3(stx_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
-  void stxa(  Register d, Register s1, int simm13a         ) { v9_only();  emit_long( op(ldst_op) | rd(d) | op3(stx_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void stda(  Register d, Register s1, Register s2, int ia ) {             emit_long( op(ldst_op) | rd(d) | op3(std_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
-  void stda(  Register d, Register s1, int simm13a         ) {             emit_long( op(ldst_op) | rd(d) | op3(std_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void stba(  Register d, Register s1, Register s2, int ia ) {             emit_int32( op(ldst_op) | rd(d) | op3(stb_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+  void stba(  Register d, Register s1, int simm13a         ) {             emit_int32( op(ldst_op) | rd(d) | op3(stb_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void stha(  Register d, Register s1, Register s2, int ia ) {             emit_int32( op(ldst_op) | rd(d) | op3(sth_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+  void stha(  Register d, Register s1, int simm13a         ) {             emit_int32( op(ldst_op) | rd(d) | op3(sth_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void stwa(  Register d, Register s1, Register s2, int ia ) {             emit_int32( op(ldst_op) | rd(d) | op3(stw_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+  void stwa(  Register d, Register s1, int simm13a         ) {             emit_int32( op(ldst_op) | rd(d) | op3(stw_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void stxa(  Register d, Register s1, Register s2, int ia ) { v9_only();  emit_int32( op(ldst_op) | rd(d) | op3(stx_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+  void stxa(  Register d, Register s1, int simm13a         ) { v9_only();  emit_int32( op(ldst_op) | rd(d) | op3(stx_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void stda(  Register d, Register s1, Register s2, int ia ) {             emit_int32( op(ldst_op) | rd(d) | op3(std_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+  void stda(  Register d, Register s1, int simm13a         ) {             emit_int32( op(ldst_op) | rd(d) | op3(std_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
 
   // pp 97 (v8)
 
@@ -1728,85 +1129,72 @@
 
   // pp 230
 
-  void sub(    Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(sub_op3              ) | rs1(s1) | rs2(s2) ); }
-  void sub(    Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(sub_op3              ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-
-  // Note: offset is added to s2.
-  inline void sub(Register s1, RegisterOrConstant s2, Register d, int offset = 0);
+  void sub(    Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(sub_op3              ) | rs1(s1) | rs2(s2) ); }
+  void sub(    Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(sub_op3              ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
 
-  void subcc(  Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(sub_op3 | cc_bit_op3 ) | rs1(s1) | rs2(s2) ); }
-  void subcc(  Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(sub_op3 | cc_bit_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void subc(   Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(subc_op3             ) | rs1(s1) | rs2(s2) ); }
-  void subc(   Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(subc_op3             ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void subccc( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(subc_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
-  void subccc( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(subc_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void subcc(  Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(sub_op3 | cc_bit_op3 ) | rs1(s1) | rs2(s2) ); }
+  void subcc(  Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(sub_op3 | cc_bit_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void subc(   Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(subc_op3             ) | rs1(s1) | rs2(s2) ); }
+  void subc(   Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(subc_op3             ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void subccc( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(subc_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
+  void subccc( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(subc_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
 
   // pp 231
 
   inline void swap( Register s1, Register s2, Register d );
   inline void swap( Register s1, int simm13a, Register d);
-  inline void swap( Address& a,               Register d, int offset = 0 );
 
   // pp 232
 
-  void swapa(   Register s1, Register s2, int ia, Register d ) { v9_dep();  emit_long( op(ldst_op) | rd(d) | op3(swap_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
-  void swapa(   Register s1, int simm13a,         Register d ) { v9_dep();  emit_long( op(ldst_op) | rd(d) | op3(swap_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void swapa(   Register s1, Register s2, int ia, Register d ) { v9_dep();  emit_int32( op(ldst_op) | rd(d) | op3(swap_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+  void swapa(   Register s1, int simm13a,         Register d ) { v9_dep();  emit_int32( op(ldst_op) | rd(d) | op3(swap_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
 
   // pp 234, note op in book is wrong, see pp 268
 
-  void taddcc(    Register s1, Register s2, Register d ) {            emit_long( op(arith_op) | rd(d) | op3(taddcc_op3  ) | rs1(s1) | rs2(s2) ); }
-  void taddcc(    Register s1, int simm13a, Register d ) {            emit_long( op(arith_op) | rd(d) | op3(taddcc_op3  ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void taddcctv(  Register s1, Register s2, Register d ) { v9_dep();  emit_long( op(arith_op) | rd(d) | op3(taddcctv_op3) | rs1(s1) | rs2(s2) ); }
-  void taddcctv(  Register s1, int simm13a, Register d ) { v9_dep();  emit_long( op(arith_op) | rd(d) | op3(taddcctv_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void taddcc(    Register s1, Register s2, Register d ) {            emit_int32( op(arith_op) | rd(d) | op3(taddcc_op3  ) | rs1(s1) | rs2(s2) ); }
+  void taddcc(    Register s1, int simm13a, Register d ) {            emit_int32( op(arith_op) | rd(d) | op3(taddcc_op3  ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void taddcctv(  Register s1, Register s2, Register d ) { v9_dep();  emit_int32( op(arith_op) | rd(d) | op3(taddcctv_op3) | rs1(s1) | rs2(s2) ); }
+  void taddcctv(  Register s1, int simm13a, Register d ) { v9_dep();  emit_int32( op(arith_op) | rd(d) | op3(taddcctv_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
 
   // pp 235
 
-  void tsubcc(    Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(tsubcc_op3  ) | rs1(s1) | rs2(s2) ); }
-  void tsubcc(    Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(tsubcc_op3  ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
-  void tsubcctv(  Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(tsubcctv_op3) | rs1(s1) | rs2(s2) ); }
-  void tsubcctv(  Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(tsubcctv_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void tsubcc(    Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(tsubcc_op3  ) | rs1(s1) | rs2(s2) ); }
+  void tsubcc(    Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(tsubcc_op3  ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void tsubcctv(  Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(tsubcctv_op3) | rs1(s1) | rs2(s2) ); }
+  void tsubcctv(  Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(tsubcctv_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
 
   // pp 237
 
-  void trap( Condition c, CC cc, Register s1, Register s2 ) { v8_no_cc(cc);  emit_long( op(arith_op) | cond(c) | op3(trap_op3) | rs1(s1) | trapcc(cc) | rs2(s2)); }
-  void trap( Condition c, CC cc, Register s1, int trapa   ) { v8_no_cc(cc);  emit_long( op(arith_op) | cond(c) | op3(trap_op3) | rs1(s1) | trapcc(cc) | immed(true) | u_field(trapa, 6, 0)); }
+  void trap( Condition c, CC cc, Register s1, Register s2 ) { v8_no_cc(cc);  emit_int32( op(arith_op) | cond(c) | op3(trap_op3) | rs1(s1) | trapcc(cc) | rs2(s2)); }
+  void trap( Condition c, CC cc, Register s1, int trapa   ) { v8_no_cc(cc);  emit_int32( op(arith_op) | cond(c) | op3(trap_op3) | rs1(s1) | trapcc(cc) | immed(true) | u_field(trapa, 6, 0)); }
   // simple uncond. trap
   void trap( int trapa ) { trap( always, icc, G0, trapa ); }
 
   // pp 239 omit write priv register for now
 
-  inline void wry(    Register d) { v9_dep();  emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(0, 29, 25)); }
-  inline void wrccr(Register s) { v9_only(); emit_long( op(arith_op) | rs1(s) | op3(wrreg_op3) | u_field(2, 29, 25)); }
-  inline void wrccr(Register s, int simm13a) { v9_only(); emit_long( op(arith_op) |
+  inline void wry(    Register d) { v9_dep();  emit_int32( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(0, 29, 25)); }
+  inline void wrccr(Register s) { v9_only(); emit_int32( op(arith_op) | rs1(s) | op3(wrreg_op3) | u_field(2, 29, 25)); }
+  inline void wrccr(Register s, int simm13a) { v9_only(); emit_int32( op(arith_op) |
                                                                            rs1(s) |
                                                                            op3(wrreg_op3) |
                                                                            u_field(2, 29, 25) |
                                                                            immed(true) |
                                                                            simm(simm13a, 13)); }
-  inline void wrasi(Register d) { v9_only(); emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(3, 29, 25)); }
+  inline void wrasi(Register d) { v9_only(); emit_int32( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(3, 29, 25)); }
   // wrasi(d, imm) stores (d xor imm) to asi
-  inline void wrasi(Register d, int simm13a) { v9_only(); emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) |
+  inline void wrasi(Register d, int simm13a) { v9_only(); emit_int32( op(arith_op) | rs1(d) | op3(wrreg_op3) |
                                                u_field(3, 29, 25) | immed(true) | simm(simm13a, 13)); }
-  inline void wrfprs( Register d) { v9_only(); emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(6, 29, 25)); }
+  inline void wrfprs( Register d) { v9_only(); emit_int32( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(6, 29, 25)); }
 
 
   // VIS3 instructions
 
-  void movstosw( FloatRegister s, Register d ) { vis3_only();  emit_long( op(arith_op) | rd(d) | op3(mftoi_op3) | opf(mstosw_opf) | fs2(s, FloatRegisterImpl::S)); }
-  void movstouw( FloatRegister s, Register d ) { vis3_only();  emit_long( op(arith_op) | rd(d) | op3(mftoi_op3) | opf(mstouw_opf) | fs2(s, FloatRegisterImpl::S)); }
-  void movdtox(  FloatRegister s, Register d ) { vis3_only();  emit_long( op(arith_op) | rd(d) | op3(mftoi_op3) | opf(mdtox_opf) | fs2(s, FloatRegisterImpl::D)); }
-
-  void movwtos( Register s, FloatRegister d ) { vis3_only();  emit_long( op(arith_op) | fd(d, FloatRegisterImpl::S) | op3(mftoi_op3) | opf(mwtos_opf) | rs2(s)); }
-  void movxtod( Register s, FloatRegister d ) { vis3_only();  emit_long( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(mftoi_op3) | opf(mxtod_opf) | rs2(s)); }
-
+  void movstosw( FloatRegister s, Register d ) { vis3_only();  emit_int32( op(arith_op) | rd(d) | op3(mftoi_op3) | opf(mstosw_opf) | fs2(s, FloatRegisterImpl::S)); }
+  void movstouw( FloatRegister s, Register d ) { vis3_only();  emit_int32( op(arith_op) | rd(d) | op3(mftoi_op3) | opf(mstouw_opf) | fs2(s, FloatRegisterImpl::S)); }
+  void movdtox(  FloatRegister s, Register d ) { vis3_only();  emit_int32( op(arith_op) | rd(d) | op3(mftoi_op3) | opf(mdtox_opf) | fs2(s, FloatRegisterImpl::D)); }
 
-
-
-  // For a given register condition, return the appropriate condition code
-  // Condition (the one you would use to get the same effect after "tst" on
-  // the target register.)
-  Assembler::Condition reg_cond_to_cc_cond(RCondition in);
-
+  void movwtos( Register s, FloatRegister d ) { vis3_only();  emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::S) | op3(mftoi_op3) | opf(mwtos_opf) | rs2(s)); }
+  void movxtod( Register s, FloatRegister d ) { vis3_only();  emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(mftoi_op3) | opf(mxtod_opf) | rs2(s)); }
 
   // Creation
   Assembler(CodeBuffer* code) : AbstractAssembler(code) {
@@ -1814,864 +1202,6 @@
     delay_state = no_delay;
 #endif
   }
-
-  // Testing
-#ifndef PRODUCT
-  void test_v9();
-  void test_v8_onlys();
-#endif
-};
-
-
-class RegistersForDebugging : public StackObj {
- public:
-  intptr_t i[8], l[8], o[8], g[8];
-  float    f[32];
-  double   d[32];
-
-  void print(outputStream* s);
-
-  static int i_offset(int j) { return offset_of(RegistersForDebugging, i[j]); }
-  static int l_offset(int j) { return offset_of(RegistersForDebugging, l[j]); }
-  static int o_offset(int j) { return offset_of(RegistersForDebugging, o[j]); }
-  static int g_offset(int j) { return offset_of(RegistersForDebugging, g[j]); }
-  static int f_offset(int j) { return offset_of(RegistersForDebugging, f[j]); }
-  static int d_offset(int j) { return offset_of(RegistersForDebugging, d[j / 2]); }
-
-  // gen asm code to save regs
-  static void save_registers(MacroAssembler* a);
-
-  // restore global registers in case C code disturbed them
-  static void restore_registers(MacroAssembler* a, Register r);
-
-
 };
 
-
-// MacroAssembler extends Assembler by a few frequently used macros.
-//
-// Most of the standard SPARC synthetic ops are defined here.
-// Instructions for which a 'better' code sequence exists depending
-// on arguments should also go in here.
-
-#define JMP2(r1, r2) jmp(r1, r2, __FILE__, __LINE__)
-#define JMP(r1, off) jmp(r1, off, __FILE__, __LINE__)
-#define JUMP(a, temp, off)     jump(a, temp, off, __FILE__, __LINE__)
-#define JUMPL(a, temp, d, off) jumpl(a, temp, d, off, __FILE__, __LINE__)
-
-
-class MacroAssembler: public Assembler {
- protected:
-  // Support for VM calls
-  // This is the base routine called by the different versions of call_VM_leaf. The interpreter
-  // may customize this version by overriding it for its purposes (e.g., to save/restore
-  // additional registers when doing a VM call).
-#ifdef CC_INTERP
-  #define VIRTUAL
-#else
-  #define VIRTUAL virtual
-#endif
-
-  VIRTUAL void call_VM_leaf_base(Register thread_cache, address entry_point, int number_of_arguments);
-
-  //
-  // It is imperative that all calls into the VM are handled via the call_VM macros.
-  // They make sure that the stack linkage is setup correctly. call_VM's correspond
-  // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
-  //
-  // This is the base routine called by the different versions of call_VM. The interpreter
-  // may customize this version by overriding it for its purposes (e.g., to save/restore
-  // additional registers when doing a VM call).
-  //
-  // A non-volatile java_thread_cache register should be specified so
-  // that the G2_thread value can be preserved across the call.
-  // (If java_thread_cache is noreg, then a slow get_thread call
-  // will re-initialize the G2_thread.) call_VM_base returns the register that contains the
-  // thread.
-  //
-  // If no last_java_sp is specified (noreg) than SP will be used instead.
-
-  virtual void call_VM_base(
-    Register        oop_result,             // where an oop-result ends up if any; use noreg otherwise
-    Register        java_thread_cache,      // the thread if computed before     ; use noreg otherwise
-    Register        last_java_sp,           // to set up last_Java_frame in stubs; use noreg otherwise
-    address         entry_point,            // the entry point
-    int             number_of_arguments,    // the number of arguments (w/o thread) to pop after call
-    bool            check_exception=true    // flag which indicates if exception should be checked
-  );
-
-  // This routine should emit JVMTI PopFrame and ForceEarlyReturn handling code.
-  // The implementation is only non-empty for the InterpreterMacroAssembler,
-  // as only the interpreter handles and ForceEarlyReturn PopFrame requests.
-  virtual void check_and_handle_popframe(Register scratch_reg);
-  virtual void check_and_handle_earlyret(Register scratch_reg);
-
- public:
-  MacroAssembler(CodeBuffer* code) : Assembler(code) {}
-
-  // Support for NULL-checks
-  //
-  // Generates code that causes a NULL OS exception if the content of reg is NULL.
-  // If the accessed location is M[reg + offset] and the offset is known, provide the
-  // offset.  No explicit code generation is needed if the offset is within a certain
-  // range (0 <= offset <= page_size).
-  //
-  // %%%%%% Currently not done for SPARC
-
-  void null_check(Register reg, int offset = -1);
-  static bool needs_explicit_null_check(intptr_t offset);
-
-  // support for delayed instructions
-  MacroAssembler* delayed() { Assembler::delayed();  return this; }
-
-  // branches that use right instruction for v8 vs. v9
-  inline void br( Condition c, bool a, Predict p, address d, relocInfo::relocType rt = relocInfo::none );
-  inline void br( Condition c, bool a, Predict p, Label& L );
-
-  inline void fb( Condition c, bool a, Predict p, address d, relocInfo::relocType rt = relocInfo::none );
-  inline void fb( Condition c, bool a, Predict p, Label& L );
-
-  // compares register with zero (32 bit) and branches (V9 and V8 instructions)
-  void cmp_zero_and_br( Condition c, Register s1, Label& L, bool a = false, Predict p = pn );
-  // Compares a pointer register with zero and branches on (not)null.
-  // Does a test & branch on 32-bit systems and a register-branch on 64-bit.
-  void br_null   ( Register s1, bool a, Predict p, Label& L );
-  void br_notnull( Register s1, bool a, Predict p, Label& L );
-
-  //
-  // Compare registers and branch with nop in delay slot or cbcond without delay slot.
-  //
-  // ATTENTION: use these instructions with caution because cbcond instruction
-  //            has very short distance: 512 instructions (2Kbyte).
-
-  // Compare integer (32 bit) values (icc only).
-  void cmp_and_br_short(Register s1, Register s2, Condition c, Predict p, Label& L);
-  void cmp_and_br_short(Register s1, int simm13a, Condition c, Predict p, Label& L);
-  // Platform depending version for pointer compare (icc on !LP64 and xcc on LP64).
-  void cmp_and_brx_short(Register s1, Register s2, Condition c, Predict p, Label& L);
-  void cmp_and_brx_short(Register s1, int simm13a, Condition c, Predict p, Label& L);
-
-  // Short branch version for compares a pointer pwith zero.
-  void br_null_short   ( Register s1, Predict p, Label& L );
-  void br_notnull_short( Register s1, Predict p, Label& L );
-
-  // unconditional short branch
-  void ba_short(Label& L);
-
-  inline void bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none );
-  inline void bp( Condition c, bool a, CC cc, Predict p, Label& L );
-
-  // Branch that tests xcc in LP64 and icc in !LP64
-  inline void brx( Condition c, bool a, Predict p, address d, relocInfo::relocType rt = relocInfo::none );
-  inline void brx( Condition c, bool a, Predict p, Label& L );
-
-  // unconditional branch
-  inline void ba( Label& L );
-
-  // Branch that tests fp condition codes
-  inline void fbp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none );
-  inline void fbp( Condition c, bool a, CC cc, Predict p, Label& L );
-
-  // get PC the best way
-  inline int get_pc( Register d );
-
-  // Sparc shorthands(pp 85, V8 manual, pp 289 V9 manual)
-  inline void cmp(  Register s1, Register s2 ) { subcc( s1, s2, G0 ); }
-  inline void cmp(  Register s1, int simm13a ) { subcc( s1, simm13a, G0 ); }
-
-  inline void jmp( Register s1, Register s2 );
-  inline void jmp( Register s1, int simm13a, RelocationHolder const& rspec = RelocationHolder() );
-
-  // Check if the call target is out of wdisp30 range (relative to the code cache)
-  static inline bool is_far_target(address d);
-  inline void call( address d,  relocInfo::relocType rt = relocInfo::runtime_call_type );
-  inline void call( Label& L,   relocInfo::relocType rt = relocInfo::runtime_call_type );
-  inline void callr( Register s1, Register s2 );
-  inline void callr( Register s1, int simm13a, RelocationHolder const& rspec = RelocationHolder() );
-
-  // Emits nothing on V8
-  inline void iprefetch( address d, relocInfo::relocType rt = relocInfo::none );
-  inline void iprefetch( Label& L);
-
-  inline void tst( Register s ) { orcc( G0, s, G0 ); }
-
-#ifdef PRODUCT
-  inline void ret(  bool trace = TraceJumps )   { if (trace) {
-                                                    mov(I7, O7); // traceable register
-                                                    JMP(O7, 2 * BytesPerInstWord);
-                                                  } else {
-                                                    jmpl( I7, 2 * BytesPerInstWord, G0 );
-                                                  }
-                                                }
-
-  inline void retl( bool trace = TraceJumps )  { if (trace) JMP(O7, 2 * BytesPerInstWord);
-                                                 else jmpl( O7, 2 * BytesPerInstWord, G0 ); }
-#else
-  void ret(  bool trace = TraceJumps );
-  void retl( bool trace = TraceJumps );
-#endif /* PRODUCT */
-
-  // Required platform-specific helpers for Label::patch_instructions.
-  // They _shadow_ the declarations in AbstractAssembler, which are undefined.
-  void pd_patch_instruction(address branch, address target);
-#ifndef PRODUCT
-  static void pd_print_patched_instruction(address branch);
-#endif
-
-  // sethi Macro handles optimizations and relocations
-private:
-  void internal_sethi(const AddressLiteral& addrlit, Register d, bool ForceRelocatable);
-public:
-  void sethi(const AddressLiteral& addrlit, Register d);
-  void patchable_sethi(const AddressLiteral& addrlit, Register d);
-
-  // compute the number of instructions for a sethi/set
-  static int  insts_for_sethi( address a, bool worst_case = false );
-  static int  worst_case_insts_for_set();
-
-  // set may be either setsw or setuw (high 32 bits may be zero or sign)
-private:
-  void internal_set(const AddressLiteral& al, Register d, bool ForceRelocatable);
-  static int insts_for_internal_set(intptr_t value);
-public:
-  void set(const AddressLiteral& addrlit, Register d);
-  void set(intptr_t value, Register d);
-  void set(address addr, Register d, RelocationHolder const& rspec);
-  static int insts_for_set(intptr_t value) { return insts_for_internal_set(value); }
-
-  void patchable_set(const AddressLiteral& addrlit, Register d);
-  void patchable_set(intptr_t value, Register d);
-  void set64(jlong value, Register d, Register tmp);
-  static int insts_for_set64(jlong value);
-
-  // sign-extend 32 to 64
-  inline void signx( Register s, Register d ) { sra( s, G0, d); }
-  inline void signx( Register d )             { sra( d, G0, d); }
-
-  inline void not1( Register s, Register d ) { xnor( s, G0, d ); }
-  inline void not1( Register d )             { xnor( d, G0, d ); }
-
-  inline void neg( Register s, Register d ) { sub( G0, s, d ); }
-  inline void neg( Register d )             { sub( G0, d, d ); }
-
-  inline void cas(  Register s1, Register s2, Register d) { casa( s1, s2, d, ASI_PRIMARY); }
-  inline void casx( Register s1, Register s2, Register d) { casxa(s1, s2, d, ASI_PRIMARY); }
-  // Functions for isolating 64 bit atomic swaps for LP64
-  // cas_ptr will perform cas for 32 bit VM's and casx for 64 bit VM's
-  inline void cas_ptr(  Register s1, Register s2, Register d) {
-#ifdef _LP64
-    casx( s1, s2, d );
-#else
-    cas( s1, s2, d );
-#endif
-  }
-
-  // Functions for isolating 64 bit shifts for LP64
-  inline void sll_ptr( Register s1, Register s2, Register d );
-  inline void sll_ptr( Register s1, int imm6a,   Register d );
-  inline void sll_ptr( Register s1, RegisterOrConstant s2, Register d );
-  inline void srl_ptr( Register s1, Register s2, Register d );
-  inline void srl_ptr( Register s1, int imm6a,   Register d );
-
-  // little-endian
-  inline void casl(  Register s1, Register s2, Register d) { casa( s1, s2, d, ASI_PRIMARY_LITTLE); }
-  inline void casxl( Register s1, Register s2, Register d) { casxa(s1, s2, d, ASI_PRIMARY_LITTLE); }
-
-  inline void inc(   Register d,  int const13 = 1 ) { add(   d, const13, d); }
-  inline void inccc( Register d,  int const13 = 1 ) { addcc( d, const13, d); }
-
-  inline void dec(   Register d,  int const13 = 1 ) { sub(   d, const13, d); }
-  inline void deccc( Register d,  int const13 = 1 ) { subcc( d, const13, d); }
-
-  inline void btst( Register s1,  Register s2 ) { andcc( s1, s2, G0 ); }
-  inline void btst( int simm13a,  Register s )  { andcc( s,  simm13a, G0 ); }
-
-  inline void bset( Register s1,  Register s2 ) { or3( s1, s2, s2 ); }
-  inline void bset( int simm13a,  Register s )  { or3( s,  simm13a, s ); }
-
-  inline void bclr( Register s1,  Register s2 ) { andn( s1, s2, s2 ); }
-  inline void bclr( int simm13a,  Register s )  { andn( s,  simm13a, s ); }
-
-  inline void btog( Register s1,  Register s2 ) { xor3( s1, s2, s2 ); }
-  inline void btog( int simm13a,  Register s )  { xor3( s,  simm13a, s ); }
-
-  inline void clr( Register d ) { or3( G0, G0, d ); }
-
-  inline void clrb( Register s1, Register s2);
-  inline void clrh( Register s1, Register s2);
-  inline void clr(  Register s1, Register s2);
-  inline void clrx( Register s1, Register s2);
-
-  inline void clrb( Register s1, int simm13a);
-  inline void clrh( Register s1, int simm13a);
-  inline void clr(  Register s1, int simm13a);
-  inline void clrx( Register s1, int simm13a);
-
-  // copy & clear upper word
-  inline void clruw( Register s, Register d ) { srl( s, G0, d); }
-  // clear upper word
-  inline void clruwu( Register d ) { srl( d, G0, d); }
-
-  // membar psuedo instruction.  takes into account target memory model.
-  inline void membar( Assembler::Membar_mask_bits const7a );
-
-  // returns if membar generates anything.
-  inline bool membar_has_effect( Assembler::Membar_mask_bits const7a );
-
-  // mov pseudo instructions
-  inline void mov( Register s,  Register d) {
-    if ( s != d )    or3( G0, s, d);
-    else             assert_not_delayed();  // Put something useful in the delay slot!
-  }
-
-  inline void mov_or_nop( Register s,  Register d) {
-    if ( s != d )    or3( G0, s, d);
-    else             nop();
-  }
-
-  inline void mov( int simm13a, Register d) { or3( G0, simm13a, d); }
-
-  // address pseudos: make these names unlike instruction names to avoid confusion
-  inline intptr_t load_pc_address( Register reg, int bytes_to_skip );
-  inline void load_contents(const AddressLiteral& addrlit, Register d, int offset = 0);
-  inline void load_bool_contents(const AddressLiteral& addrlit, Register d, int offset = 0);
-  inline void load_ptr_contents(const AddressLiteral& addrlit, Register d, int offset = 0);
-  inline void store_contents(Register s, const AddressLiteral& addrlit, Register temp, int offset = 0);
-  inline void store_ptr_contents(Register s, const AddressLiteral& addrlit, Register temp, int offset = 0);
-  inline void jumpl_to(const AddressLiteral& addrlit, Register temp, Register d, int offset = 0);
-  inline void jump_to(const AddressLiteral& addrlit, Register temp, int offset = 0);
-  inline void jump_indirect_to(Address& a, Register temp, int ld_offset = 0, int jmp_offset = 0);
-
-  // ring buffer traceable jumps
-
-  void jmp2( Register r1, Register r2, const char* file, int line );
-  void jmp ( Register r1, int offset,  const char* file, int line );
-
-  void jumpl(const AddressLiteral& addrlit, Register temp, Register d, int offset, const char* file, int line);
-  void jump (const AddressLiteral& addrlit, Register temp,             int offset, const char* file, int line);
-
-
-  // argument pseudos:
-
-  inline void load_argument( Argument& a, Register  d );
-  inline void store_argument( Register s, Argument& a );
-  inline void store_ptr_argument( Register s, Argument& a );
-  inline void store_float_argument( FloatRegister s, Argument& a );
-  inline void store_double_argument( FloatRegister s, Argument& a );
-  inline void store_long_argument( Register s, Argument& a );
-
-  // handy macros:
-
-  inline void round_to( Register r, int modulus ) {
-    assert_not_delayed();
-    inc( r, modulus - 1 );
-    and3( r, -modulus, r );
-  }
-
-  // --------------------------------------------------
-
-  // Functions for isolating 64 bit loads for LP64
-  // ld_ptr will perform ld for 32 bit VM's and ldx for 64 bit VM's
-  // st_ptr will perform st for 32 bit VM's and stx for 64 bit VM's
-  inline void ld_ptr(Register s1, Register s2, Register d);
-  inline void ld_ptr(Register s1, int simm13a, Register d);
-  inline void ld_ptr(Register s1, RegisterOrConstant s2, Register d);
-  inline void ld_ptr(const Address& a, Register d, int offset = 0);
-  inline void st_ptr(Register d, Register s1, Register s2);
-  inline void st_ptr(Register d, Register s1, int simm13a);
-  inline void st_ptr(Register d, Register s1, RegisterOrConstant s2);
-  inline void st_ptr(Register d, const Address& a, int offset = 0);
-
-#ifdef ASSERT
-  // ByteSize is only a class when ASSERT is defined, otherwise it's an int.
-  inline void ld_ptr(Register s1, ByteSize simm13a, Register d);
-  inline void st_ptr(Register d, Register s1, ByteSize simm13a);
-#endif
-
-  // ld_long will perform ldd for 32 bit VM's and ldx for 64 bit VM's
-  // st_long will perform std for 32 bit VM's and stx for 64 bit VM's
-  inline void ld_long(Register s1, Register s2, Register d);
-  inline void ld_long(Register s1, int simm13a, Register d);
-  inline void ld_long(Register s1, RegisterOrConstant s2, Register d);
-  inline void ld_long(const Address& a, Register d, int offset = 0);
-  inline void st_long(Register d, Register s1, Register s2);
-  inline void st_long(Register d, Register s1, int simm13a);
-  inline void st_long(Register d, Register s1, RegisterOrConstant s2);
-  inline void st_long(Register d, const Address& a, int offset = 0);
-
-  // Helpers for address formation.
-  // - They emit only a move if s2 is a constant zero.
-  // - If dest is a constant and either s1 or s2 is a register, the temp argument is required and becomes the result.
-  // - If dest is a register and either s1 or s2 is a non-simm13 constant, the temp argument is required and used to materialize the constant.
-  RegisterOrConstant regcon_andn_ptr(RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp = noreg);
-  RegisterOrConstant regcon_inc_ptr( RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp = noreg);
-  RegisterOrConstant regcon_sll_ptr( RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp = noreg);
-
-  RegisterOrConstant ensure_simm13_or_reg(RegisterOrConstant src, Register temp) {
-    if (is_simm13(src.constant_or_zero()))
-      return src;               // register or short constant
-    guarantee(temp != noreg, "constant offset overflow");
-    set(src.as_constant(), temp);
-    return temp;
-  }
-
-  // --------------------------------------------------
-
- public:
-  // traps as per trap.h (SPARC ABI?)
-
-  void breakpoint_trap();
-  void breakpoint_trap(Condition c, CC cc);
-  void flush_windows_trap();
-  void clean_windows_trap();
-  void get_psr_trap();
-  void set_psr_trap();
-
-  // V8/V9 flush_windows
-  void flush_windows();
-
-  // Support for serializing memory accesses between threads
-  void serialize_memory(Register thread, Register tmp1, Register tmp2);
-
-  // Stack frame creation/removal
-  void enter();
-  void leave();
-
-  // V8/V9 integer multiply
-  void mult(Register s1, Register s2, Register d);
-  void mult(Register s1, int simm13a, Register d);
-
-  // V8/V9 read and write of condition codes.
-  void read_ccr(Register d);
-  void write_ccr(Register s);
-
-  // Manipulation of C++ bools
-  // These are idioms to flag the need for care with accessing bools but on
-  // this platform we assume byte size
-
-  inline void stbool(Register d, const Address& a) { stb(d, a); }
-  inline void ldbool(const Address& a, Register d) { ldub(a, d); }
-  inline void movbool( bool boolconst, Register d) { mov( (int) boolconst, d); }
-
-  // klass oop manipulations if compressed
-  void load_klass(Register src_oop, Register klass);
-  void store_klass(Register klass, Register dst_oop);
-  void store_klass_gap(Register s, Register dst_oop);
-
-   // oop manipulations
-  void load_heap_oop(const Address& s, Register d);
-  void load_heap_oop(Register s1, Register s2, Register d);
-  void load_heap_oop(Register s1, int simm13a, Register d);
-  void load_heap_oop(Register s1, RegisterOrConstant s2, Register d);
-  void store_heap_oop(Register d, Register s1, Register s2);
-  void store_heap_oop(Register d, Register s1, int simm13a);
-  void store_heap_oop(Register d, const Address& a, int offset = 0);
-
-  void encode_heap_oop(Register src, Register dst);
-  void encode_heap_oop(Register r) {
-    encode_heap_oop(r, r);
-  }
-  void decode_heap_oop(Register src, Register dst);
-  void decode_heap_oop(Register r) {
-    decode_heap_oop(r, r);
-  }
-  void encode_heap_oop_not_null(Register r);
-  void decode_heap_oop_not_null(Register r);
-  void encode_heap_oop_not_null(Register src, Register dst);
-  void decode_heap_oop_not_null(Register src, Register dst);
-
-  void encode_klass_not_null(Register r);
-  void decode_klass_not_null(Register r);
-  void encode_klass_not_null(Register src, Register dst);
-  void decode_klass_not_null(Register src, Register dst);
-
-  // Support for managing the JavaThread pointer (i.e.; the reference to
-  // thread-local information).
-  void get_thread();                                // load G2_thread
-  void verify_thread();                             // verify G2_thread contents
-  void save_thread   (const Register threache); // save to cache
-  void restore_thread(const Register thread_cache); // restore from cache
-
-  // Support for last Java frame (but use call_VM instead where possible)
-  void set_last_Java_frame(Register last_java_sp, Register last_Java_pc);
-  void reset_last_Java_frame(void);
-
-  // Call into the VM.
-  // Passes the thread pointer (in O0) as a prepended argument.
-  // Makes sure oop return values are visible to the GC.
-  void call_VM(Register oop_result, address entry_point, int number_of_arguments = 0, bool check_exceptions = true);
-  void call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions = true);
-  void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true);
-  void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true);
-
-  // these overloadings are not presently used on SPARC:
-  void call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true);
-  void call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true);
-  void call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true);
-  void call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true);
-
-  void call_VM_leaf(Register thread_cache, address entry_point, int number_of_arguments = 0);
-  void call_VM_leaf(Register thread_cache, address entry_point, Register arg_1);
-  void call_VM_leaf(Register thread_cache, address entry_point, Register arg_1, Register arg_2);
-  void call_VM_leaf(Register thread_cache, address entry_point, Register arg_1, Register arg_2, Register arg_3);
-
-  void get_vm_result  (Register oop_result);
-  void get_vm_result_2(Register metadata_result);
-
-  // vm result is currently getting hijacked to for oop preservation
-  void set_vm_result(Register oop_result);
-
-  // Emit the CompiledIC call idiom
-  void ic_call(address entry, bool emit_delay = true);
-
-  // if call_VM_base was called with check_exceptions=false, then call
-  // check_and_forward_exception to handle exceptions when it is safe
-  void check_and_forward_exception(Register scratch_reg);
-
- private:
-  // For V8
-  void read_ccr_trap(Register ccr_save);
-  void write_ccr_trap(Register ccr_save1, Register scratch1, Register scratch2);
-
-#ifdef ASSERT
-  // For V8 debugging.  Uses V8 instruction sequence and checks
-  // result with V9 insturctions rdccr and wrccr.
-  // Uses Gscatch and Gscatch2
-  void read_ccr_v8_assert(Register ccr_save);
-  void write_ccr_v8_assert(Register ccr_save);
-#endif // ASSERT
-
- public:
-
-  // Write to card table for - register is destroyed afterwards.
-  void card_table_write(jbyte* byte_map_base, Register tmp, Register obj);
-
-  void card_write_barrier_post(Register store_addr, Register new_val, Register tmp);
-
-#ifndef SERIALGC
-  // General G1 pre-barrier generator.
-  void g1_write_barrier_pre(Register obj, Register index, int offset, Register pre_val, Register tmp, bool preserve_o_regs);
-
-  // General G1 post-barrier generator
-  void g1_write_barrier_post(Register store_addr, Register new_val, Register tmp);
-#endif // SERIALGC
-
-  // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack
-  void push_fTOS();
-
-  // pops double TOS element from CPU stack and pushes on FPU stack
-  void pop_fTOS();
-
-  void empty_FPU_stack();
-
-  void push_IU_state();
-  void pop_IU_state();
-
-  void push_FPU_state();
-  void pop_FPU_state();
-
-  void push_CPU_state();
-  void pop_CPU_state();
-
-  // if heap base register is used - reinit it with the correct value
-  void reinit_heapbase();
-
-  // Debugging
-  void _verify_oop(Register reg, const char * msg, const char * file, int line);
-  void _verify_oop_addr(Address addr, const char * msg, const char * file, int line);
-
-  // TODO: verify_method and klass metadata (compare against vptr?)
-  void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
-  void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){}
-
-#define verify_oop(reg) _verify_oop(reg, "broken oop " #reg, __FILE__, __LINE__)
-#define verify_oop_addr(addr) _verify_oop_addr(addr, "broken oop addr ", __FILE__, __LINE__)
-#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
-#define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
-
-        // only if +VerifyOops
-  void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
-        // only if +VerifyFPU
-  void stop(const char* msg);                          // prints msg, dumps registers and stops execution
-  void warn(const char* msg);                          // prints msg, but don't stop
-  void untested(const char* what = "");
-  void unimplemented(const char* what = "")      { char* b = new char[1024];  jio_snprintf(b, 1024, "unimplemented: %s", what);  stop(b); }
-  void should_not_reach_here()                   { stop("should not reach here"); }
-  void print_CPU_state();
-
-  // oops in code
-  AddressLiteral allocate_oop_address(jobject obj);                          // allocate_index
-  AddressLiteral constant_oop_address(jobject obj);                          // find_index
-  inline void    set_oop             (jobject obj, Register d);              // uses allocate_oop_address
-  inline void    set_oop_constant    (jobject obj, Register d);              // uses constant_oop_address
-  inline void    set_oop             (const AddressLiteral& obj_addr, Register d); // same as load_address
-
-  // metadata in code that we have to keep track of
-  AddressLiteral allocate_metadata_address(Metadata* obj); // allocate_index
-  AddressLiteral constant_metadata_address(Metadata* obj); // find_index
-  inline void    set_metadata             (Metadata* obj, Register d);              // uses allocate_metadata_address
-  inline void    set_metadata_constant    (Metadata* obj, Register d);              // uses constant_metadata_address
-  inline void    set_metadata             (const AddressLiteral& obj_addr, Register d); // same as load_address
-
-  void set_narrow_oop( jobject obj, Register d );
-  void set_narrow_klass( Klass* k, Register d );
-
-  // nop padding
-  void align(int modulus);
-
-  // declare a safepoint
-  void safepoint();
-
-  // factor out part of stop into subroutine to save space
-  void stop_subroutine();
-  // factor out part of verify_oop into subroutine to save space
-  void verify_oop_subroutine();
-
-  // side-door communication with signalHandler in os_solaris.cpp
-  static address _verify_oop_implicit_branch[3];
-
-#ifndef PRODUCT
-  static void test();
-#endif
-
-  int total_frame_size_in_bytes(int extraWords);
-
-  // used when extraWords known statically
-  void save_frame(int extraWords = 0);
-  void save_frame_c1(int size_in_bytes);
-  // make a frame, and simultaneously pass up one or two register value
-  // into the new register window
-  void save_frame_and_mov(int extraWords, Register s1, Register d1, Register s2 = Register(), Register d2 = Register());
-
-  // give no. (outgoing) params, calc # of words will need on frame
-  void calc_mem_param_words(Register Rparam_words, Register Rresult);
-
-  // used to calculate frame size dynamically
-  // result is in bytes and must be negated for save inst
-  void calc_frame_size(Register extraWords, Register resultReg);
-
-  // calc and also save
-  void calc_frame_size_and_save(Register extraWords, Register resultReg);
-
-  static void debug(char* msg, RegistersForDebugging* outWindow);
-
-  // implementations of bytecodes used by both interpreter and compiler
-
-  void lcmp( Register Ra_hi, Register Ra_low,
-             Register Rb_hi, Register Rb_low,
-             Register Rresult);
-
-  void lneg( Register Rhi, Register Rlow );
-
-  void lshl(  Register Rin_high,  Register Rin_low,  Register Rcount,
-              Register Rout_high, Register Rout_low, Register Rtemp );
-
-  void lshr(  Register Rin_high,  Register Rin_low,  Register Rcount,
-              Register Rout_high, Register Rout_low, Register Rtemp );
-
-  void lushr( Register Rin_high,  Register Rin_low,  Register Rcount,
-              Register Rout_high, Register Rout_low, Register Rtemp );
-
-#ifdef _LP64
-  void lcmp( Register Ra, Register Rb, Register Rresult);
-#endif
-
-  // Load and store values by size and signed-ness
-  void load_sized_value( Address src, Register dst, size_t size_in_bytes, bool is_signed);
-  void store_sized_value(Register src, Address dst, size_t size_in_bytes);
-
-  void float_cmp( bool is_float, int unordered_result,
-                  FloatRegister Fa, FloatRegister Fb,
-                  Register Rresult);
-
-  void fneg( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d);
-  void fneg( FloatRegisterImpl::Width w, FloatRegister sd ) { Assembler::fneg(w, sd); }
-  void fmov( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d);
-  void fabs( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d);
-
-  void save_all_globals_into_locals();
-  void restore_globals_from_locals();
-
-  void casx_under_lock(Register top_ptr_reg, Register top_reg, Register ptr_reg,
-    address lock_addr=0, bool use_call_vm=false);
-  void cas_under_lock(Register top_ptr_reg, Register top_reg, Register ptr_reg,
-    address lock_addr=0, bool use_call_vm=false);
-  void casn (Register addr_reg, Register cmp_reg, Register set_reg) ;
-
-  // These set the icc condition code to equal if the lock succeeded
-  // and notEqual if it failed and requires a slow case
-  void compiler_lock_object(Register Roop, Register Rmark, Register Rbox,
-                            Register Rscratch,
-                            BiasedLockingCounters* counters = NULL,
-                            bool try_bias = UseBiasedLocking);
-  void compiler_unlock_object(Register Roop, Register Rmark, Register Rbox,
-                              Register Rscratch,
-                              bool try_bias = UseBiasedLocking);
-
-  // Biased locking support
-  // Upon entry, lock_reg must point to the lock record on the stack,
-  // obj_reg must contain the target object, and mark_reg must contain
-  // the target object's header.
-  // Destroys mark_reg if an attempt is made to bias an anonymously
-  // biased lock. In this case a failure will go either to the slow
-  // case or fall through with the notEqual condition code set with
-  // the expectation that the slow case in the runtime will be called.
-  // In the fall-through case where the CAS-based lock is done,
-  // mark_reg is not destroyed.
-  void biased_locking_enter(Register obj_reg, Register mark_reg, Register temp_reg,
-                            Label& done, Label* slow_case = NULL,
-                            BiasedLockingCounters* counters = NULL);
-  // Upon entry, the base register of mark_addr must contain the oop.
-  // Destroys temp_reg.
-
-  // If allow_delay_slot_filling is set to true, the next instruction
-  // emitted after this one will go in an annulled delay slot if the
-  // biased locking exit case failed.
-  void biased_locking_exit(Address mark_addr, Register temp_reg, Label& done, bool allow_delay_slot_filling = false);
-
-  // allocation
-  void eden_allocate(
-    Register obj,                      // result: pointer to object after successful allocation
-    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
-    int      con_size_in_bytes,        // object size in bytes if   known at compile time
-    Register t1,                       // temp register
-    Register t2,                       // temp register
-    Label&   slow_case                 // continuation point if fast allocation fails
-  );
-  void tlab_allocate(
-    Register obj,                      // result: pointer to object after successful allocation
-    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
-    int      con_size_in_bytes,        // object size in bytes if   known at compile time
-    Register t1,                       // temp register
-    Label&   slow_case                 // continuation point if fast allocation fails
-  );
-  void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case);
-  void incr_allocated_bytes(RegisterOrConstant size_in_bytes,
-                            Register t1, Register t2);
-
-  // interface method calling
-  void lookup_interface_method(Register recv_klass,
-                               Register intf_klass,
-                               RegisterOrConstant itable_index,
-                               Register method_result,
-                               Register temp_reg, Register temp2_reg,
-                               Label& no_such_interface);
-
-  // virtual method calling
-  void lookup_virtual_method(Register recv_klass,
-                             RegisterOrConstant vtable_index,
-                             Register method_result);
-
-  // Test sub_klass against super_klass, with fast and slow paths.
-
-  // The fast path produces a tri-state answer: yes / no / maybe-slow.
-  // One of the three labels can be NULL, meaning take the fall-through.
-  // If super_check_offset is -1, the value is loaded up from super_klass.
-  // No registers are killed, except temp_reg and temp2_reg.
-  // If super_check_offset is not -1, temp2_reg is not used and can be noreg.
-  void check_klass_subtype_fast_path(Register sub_klass,
-                                     Register super_klass,
-                                     Register temp_reg,
-                                     Register temp2_reg,
-                                     Label* L_success,
-                                     Label* L_failure,
-                                     Label* L_slow_path,
-                RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
-
-  // The rest of the type check; must be wired to a corresponding fast path.
-  // It does not repeat the fast path logic, so don't use it standalone.
-  // The temp_reg can be noreg, if no temps are available.
-  // It can also be sub_klass or super_klass, meaning it's OK to kill that one.
-  // Updates the sub's secondary super cache as necessary.
-  void check_klass_subtype_slow_path(Register sub_klass,
-                                     Register super_klass,
-                                     Register temp_reg,
-                                     Register temp2_reg,
-                                     Register temp3_reg,
-                                     Register temp4_reg,
-                                     Label* L_success,
-                                     Label* L_failure);
-
-  // Simplified, combined version, good for typical uses.
-  // Falls through on failure.
-  void check_klass_subtype(Register sub_klass,
-                           Register super_klass,
-                           Register temp_reg,
-                           Register temp2_reg,
-                           Label& L_success);
-
-  // method handles (JSR 292)
-  // offset relative to Gargs of argument at tos[arg_slot].
-  // (arg_slot == 0 means the last argument, not the first).
-  RegisterOrConstant argument_offset(RegisterOrConstant arg_slot,
-                                     Register temp_reg,
-                                     int extra_slot_offset = 0);
-  // Address of Gargs and argument_offset.
-  Address            argument_address(RegisterOrConstant arg_slot,
-                                      Register temp_reg = noreg,
-                                      int extra_slot_offset = 0);
-
-  // Stack overflow checking
-
-  // Note: this clobbers G3_scratch
-  void bang_stack_with_offset(int offset) {
-    // stack grows down, caller passes positive offset
-    assert(offset > 0, "must bang with negative offset");
-    set((-offset)+STACK_BIAS, G3_scratch);
-    st(G0, SP, G3_scratch);
-  }
-
-  // Writes to stack successive pages until offset reached to check for
-  // stack overflow + shadow pages.  Clobbers tsp and scratch registers.
-  void bang_stack_size(Register Rsize, Register Rtsp, Register Rscratch);
-
-  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, Register tmp, int offset);
-
-  void verify_tlab();
-
-  Condition negate_condition(Condition cond);
-
-  // Helper functions for statistics gathering.
-  // Conditionally (non-atomically) increments passed counter address, preserving condition codes.
-  void cond_inc(Condition cond, address counter_addr, Register Rtemp1, Register Rtemp2);
-  // Unconditional increment.
-  void inc_counter(address counter_addr, Register Rtmp1, Register Rtmp2);
-  void inc_counter(int*    counter_addr, Register Rtmp1, Register Rtmp2);
-
-  // Compare char[] arrays aligned to 4 bytes.
-  void char_arrays_equals(Register ary1, Register ary2,
-                          Register limit, Register result,
-                          Register chr1, Register chr2, Label& Ldone);
-  // Use BIS for zeroing
-  void bis_zeroing(Register to, Register count, Register temp, Label& Ldone);
-
-#undef VIRTUAL
-
-};
-
-/**
- * class SkipIfEqual:
- *
- * Instantiating this class will result in assembly code being output that will
- * jump around any code emitted between the creation of the instance and it's
- * automatic destruction at the end of a scope block, depending on the value of
- * the flag passed to the constructor, which will be checked at run-time.
- */
-class SkipIfEqual : public StackObj {
- private:
-  MacroAssembler* _masm;
-  Label _label;
-
- public:
-   // 'temp' is a temp register that this object can use (and trash)
-   SkipIfEqual(MacroAssembler*, Register temp,
-               const bool* flag_addr, Assembler::Condition condition);
-   ~SkipIfEqual();
-};
-
-#ifdef ASSERT
-// On RISC, there's no benefit to verifying instruction boundaries.
-inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
-#endif
-
 #endif // CPU_SPARC_VM_ASSEMBLER_SPARC_HPP
--- a/src/cpu/sparc/vm/assembler_sparc.inline.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/sparc/vm/assembler_sparc.inline.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -25,33 +25,8 @@
 #ifndef CPU_SPARC_VM_ASSEMBLER_SPARC_INLINE_HPP
 #define CPU_SPARC_VM_ASSEMBLER_SPARC_INLINE_HPP
 
-#include "asm/assembler.inline.hpp"
-#include "asm/codeBuffer.hpp"
-#include "code/codeCache.hpp"
-#include "runtime/handles.inline.hpp"
-
-inline void MacroAssembler::pd_patch_instruction(address branch, address target) {
-  jint& stub_inst = *(jint*) branch;
-  stub_inst = patched_branch(target - branch, stub_inst, 0);
-}
+#include "asm/assembler.hpp"
 
-#ifndef PRODUCT
-inline void MacroAssembler::pd_print_patched_instruction(address branch) {
-  jint stub_inst = *(jint*) branch;
-  print_instruction(stub_inst);
-  ::tty->print("%s", " (unresolved)");
-}
-#endif // PRODUCT
-
-inline bool Address::is_simm13(int offset) { return Assembler::is_simm13(disp() + offset); }
-
-
-inline int AddressLiteral::low10() const {
-  return Assembler::low10(value());
-}
-
-
-// inlines for SPARC assembler -- dmu 5/97
 
 inline void Assembler::check_delay() {
 # ifdef CHECK_DELAY
@@ -60,25 +35,24 @@
 # endif
 }
 
-inline void Assembler::emit_long(int x) {
+inline void Assembler::emit_int32(int x) {
   check_delay();
-  AbstractAssembler::emit_long(x);
+  AbstractAssembler::emit_int32(x);
 }
 
 inline void Assembler::emit_data(int x, relocInfo::relocType rtype) {
   relocate(rtype);
-  emit_long(x);
+  emit_int32(x);
 }
 
 inline void Assembler::emit_data(int x, RelocationHolder const& rspec) {
   relocate(rspec);
-  emit_long(x);
+  emit_int32(x);
 }
 
 
-inline void Assembler::add(Register s1, Register s2, Register d )                             { emit_long( op(arith_op) | rd(d) | op3(add_op3) | rs1(s1) | rs2(s2) ); }
-inline void Assembler::add(Register s1, int simm13a, Register d, relocInfo::relocType rtype ) { emit_data( op(arith_op) | rd(d) | op3(add_op3) | rs1(s1) | immed(true) | simm(simm13a, 13), rtype ); }
-inline void Assembler::add(Register s1, int simm13a, Register d, RelocationHolder const& rspec ) { emit_data( op(arith_op) | rd(d) | op3(add_op3) | rs1(s1) | immed(true) | simm(simm13a, 13), rspec ); }
+inline void Assembler::add(Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(add_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::add(Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(add_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
 
 inline void Assembler::bpr( RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt ) { v9_only();  cti();  emit_data( op(branch_op) | annul(a) | cond(c) | op2(bpr_op2) | wdisp16(intptr_t(d), intptr_t(pc())) | predict(p) | rs1(s1), rt);  has_delay_slot(); }
 inline void Assembler::bpr( RCondition c, bool a, Predict p, Register s1, Label& L) { bpr( c, a, p, s1, target(L)); }
@@ -105,792 +79,93 @@
 inline void Assembler::call( address d,  relocInfo::relocType rt ) { cti();  emit_data( op(call_op) | wdisp(intptr_t(d), intptr_t(pc()), 30), rt);  has_delay_slot(); assert(rt != relocInfo::virtual_call_type, "must use virtual_call_Relocation::spec"); }
 inline void Assembler::call( Label& L,   relocInfo::relocType rt ) { call( target(L), rt); }
 
-inline void Assembler::flush( Register s1, Register s2) { emit_long( op(arith_op) | op3(flush_op3) | rs1(s1) | rs2(s2)); }
+inline void Assembler::flush( Register s1, Register s2) { emit_int32( op(arith_op) | op3(flush_op3) | rs1(s1) | rs2(s2)); }
 inline void Assembler::flush( Register s1, int simm13a) { emit_data( op(arith_op) | op3(flush_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
 
-inline void Assembler::jmpl( Register s1, Register s2, Register d ) { cti();  emit_long( op(arith_op) | rd(d) | op3(jmpl_op3) | rs1(s1) | rs2(s2));  has_delay_slot(); }
+inline void Assembler::jmpl( Register s1, Register s2, Register d ) { cti();  emit_int32( op(arith_op) | rd(d) | op3(jmpl_op3) | rs1(s1) | rs2(s2));  has_delay_slot(); }
 inline void Assembler::jmpl( Register s1, int simm13a, Register d, RelocationHolder const& rspec ) { cti();  emit_data( op(arith_op) | rd(d) | op3(jmpl_op3) | rs1(s1) | immed(true) | simm(simm13a, 13), rspec);  has_delay_slot(); }
 
-inline void Assembler::ldf(FloatRegisterImpl::Width w, Register s1, RegisterOrConstant s2, FloatRegister d) {
-  if (s2.is_register()) ldf(w, s1, s2.as_register(), d);
-  else                  ldf(w, s1, s2.as_constant(), d);
-}
-
-inline void Assembler::ldf(FloatRegisterImpl::Width w, Register s1, Register s2, FloatRegister d) { emit_long( op(ldst_op) | fd(d, w) | alt_op3(ldf_op3, w) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::ldf(FloatRegisterImpl::Width w, Register s1, Register s2, FloatRegister d) { emit_int32( op(ldst_op) | fd(d, w) | alt_op3(ldf_op3, w) | rs1(s1) | rs2(s2) ); }
 inline void Assembler::ldf(FloatRegisterImpl::Width w, Register s1, int simm13a, FloatRegister d, RelocationHolder const& rspec) { emit_data( op(ldst_op) | fd(d, w) | alt_op3(ldf_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13), rspec); }
 
-inline void Assembler::ldf(FloatRegisterImpl::Width w, const Address& a, FloatRegister d, int offset) { relocate(a.rspec(offset)); ldf( w, a.base(), a.disp() + offset, d); }
-
-inline void Assembler::ldfsr(  Register s1, Register s2) { v9_dep();   emit_long( op(ldst_op) |             op3(ldfsr_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::ldfsr(  Register s1, Register s2) { v9_dep();   emit_int32( op(ldst_op) |             op3(ldfsr_op3) | rs1(s1) | rs2(s2) ); }
 inline void Assembler::ldfsr(  Register s1, int simm13a) { v9_dep();   emit_data( op(ldst_op) |             op3(ldfsr_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::ldxfsr( Register s1, Register s2) { v9_only();  emit_long( op(ldst_op) | rd(G1)    | op3(ldfsr_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::ldxfsr( Register s1, Register s2) { v9_only();  emit_int32( op(ldst_op) | rd(G1)    | op3(ldfsr_op3) | rs1(s1) | rs2(s2) ); }
 inline void Assembler::ldxfsr( Register s1, int simm13a) { v9_only();  emit_data( op(ldst_op) | rd(G1)    | op3(ldfsr_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
 
-inline void Assembler::ldc(   Register s1, Register s2, int crd) { v8_only();  emit_long( op(ldst_op) | fcn(crd) | op3(ldc_op3  ) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::ldc(   Register s1, Register s2, int crd) { v8_only();  emit_int32( op(ldst_op) | fcn(crd) | op3(ldc_op3  ) | rs1(s1) | rs2(s2) ); }
 inline void Assembler::ldc(   Register s1, int simm13a, int crd) { v8_only();  emit_data( op(ldst_op) | fcn(crd) | op3(ldc_op3  ) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::lddc(  Register s1, Register s2, int crd) { v8_only();  emit_long( op(ldst_op) | fcn(crd) | op3(lddc_op3 ) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::lddc(  Register s1, Register s2, int crd) { v8_only();  emit_int32( op(ldst_op) | fcn(crd) | op3(lddc_op3 ) | rs1(s1) | rs2(s2) ); }
 inline void Assembler::lddc(  Register s1, int simm13a, int crd) { v8_only();  emit_data( op(ldst_op) | fcn(crd) | op3(lddc_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::ldcsr( Register s1, Register s2, int crd) { v8_only();  emit_long( op(ldst_op) | fcn(crd) | op3(ldcsr_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::ldcsr( Register s1, Register s2, int crd) { v8_only();  emit_int32( op(ldst_op) | fcn(crd) | op3(ldcsr_op3) | rs1(s1) | rs2(s2) ); }
 inline void Assembler::ldcsr( Register s1, int simm13a, int crd) { v8_only();  emit_data( op(ldst_op) | fcn(crd) | op3(ldcsr_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
 
-inline void Assembler::ldsb(  Register s1, Register s2, Register d) { emit_long( op(ldst_op) | rd(d) | op3(ldsb_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::ldsb(  Register s1, Register s2, Register d) { emit_int32( op(ldst_op) | rd(d) | op3(ldsb_op3) | rs1(s1) | rs2(s2) ); }
 inline void Assembler::ldsb(  Register s1, int simm13a, Register d) { emit_data( op(ldst_op) | rd(d) | op3(ldsb_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
 
-inline void Assembler::ldsh(  Register s1, Register s2, Register d) { emit_long( op(ldst_op) | rd(d) | op3(ldsh_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::ldsh(  Register s1, Register s2, Register d) { emit_int32( op(ldst_op) | rd(d) | op3(ldsh_op3) | rs1(s1) | rs2(s2) ); }
 inline void Assembler::ldsh(  Register s1, int simm13a, Register d) { emit_data( op(ldst_op) | rd(d) | op3(ldsh_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::ldsw(  Register s1, Register s2, Register d) { emit_long( op(ldst_op) | rd(d) | op3(ldsw_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::ldsw(  Register s1, Register s2, Register d) { emit_int32( op(ldst_op) | rd(d) | op3(ldsw_op3) | rs1(s1) | rs2(s2) ); }
 inline void Assembler::ldsw(  Register s1, int simm13a, Register d) { emit_data( op(ldst_op) | rd(d) | op3(ldsw_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::ldub(  Register s1, Register s2, Register d) { emit_long( op(ldst_op) | rd(d) | op3(ldub_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::ldub(  Register s1, Register s2, Register d) { emit_int32( op(ldst_op) | rd(d) | op3(ldub_op3) | rs1(s1) | rs2(s2) ); }
 inline void Assembler::ldub(  Register s1, int simm13a, Register d) { emit_data( op(ldst_op) | rd(d) | op3(ldub_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::lduh(  Register s1, Register s2, Register d) { emit_long( op(ldst_op) | rd(d) | op3(lduh_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::lduh(  Register s1, Register s2, Register d) { emit_int32( op(ldst_op) | rd(d) | op3(lduh_op3) | rs1(s1) | rs2(s2) ); }
 inline void Assembler::lduh(  Register s1, int simm13a, Register d) { emit_data( op(ldst_op) | rd(d) | op3(lduh_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::lduw(  Register s1, Register s2, Register d) { emit_long( op(ldst_op) | rd(d) | op3(lduw_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::lduw(  Register s1, Register s2, Register d) { emit_int32( op(ldst_op) | rd(d) | op3(lduw_op3) | rs1(s1) | rs2(s2) ); }
 inline void Assembler::lduw(  Register s1, int simm13a, Register d) { emit_data( op(ldst_op) | rd(d) | op3(lduw_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
 
-inline void Assembler::ldx(   Register s1, Register s2, Register d) { v9_only();  emit_long( op(ldst_op) | rd(d) | op3(ldx_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::ldx(   Register s1, Register s2, Register d) { v9_only();  emit_int32( op(ldst_op) | rd(d) | op3(ldx_op3) | rs1(s1) | rs2(s2) ); }
 inline void Assembler::ldx(   Register s1, int simm13a, Register d) { v9_only();  emit_data( op(ldst_op) | rd(d) | op3(ldx_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::ldd(   Register s1, Register s2, Register d) { v9_dep(); assert(d->is_even(), "not even"); emit_long( op(ldst_op) | rd(d) | op3(ldd_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::ldd(   Register s1, Register s2, Register d) { v9_dep(); assert(d->is_even(), "not even"); emit_int32( op(ldst_op) | rd(d) | op3(ldd_op3) | rs1(s1) | rs2(s2) ); }
 inline void Assembler::ldd(   Register s1, int simm13a, Register d) { v9_dep(); assert(d->is_even(), "not even"); emit_data( op(ldst_op) | rd(d) | op3(ldd_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
 
-#ifdef _LP64
-// Make all 32 bit loads signed so 64 bit registers maintain proper sign
-inline void Assembler::ld(  Register s1, Register s2, Register d)      { ldsw( s1, s2, d); }
-inline void Assembler::ld(  Register s1, int simm13a, Register d)      { ldsw( s1, simm13a, d); }
-#else
-inline void Assembler::ld(  Register s1, Register s2, Register d)      { lduw( s1, s2, d); }
-inline void Assembler::ld(  Register s1, int simm13a, Register d)      { lduw( s1, simm13a, d); }
-#endif
-
-#ifdef ASSERT
-  // ByteSize is only a class when ASSERT is defined, otherwise it's an int.
-# ifdef _LP64
-inline void Assembler::ld(  Register s1, ByteSize simm13a, Register d) { ldsw( s1, in_bytes(simm13a), d); }
-# else
-inline void Assembler::ld(  Register s1, ByteSize simm13a, Register d) { lduw( s1, in_bytes(simm13a), d); }
-# endif
-#endif
-
-inline void Assembler::ld(  const Address& a, Register d, int offset) {
-  if (a.has_index()) { assert(offset == 0, ""); ld(  a.base(), a.index(),         d); }
-  else               {                          ld(  a.base(), a.disp() + offset, d); }
-}
-inline void Assembler::ldsb(const Address& a, Register d, int offset) {
-  if (a.has_index()) { assert(offset == 0, ""); ldsb(a.base(), a.index(),         d); }
-  else               {                          ldsb(a.base(), a.disp() + offset, d); }
-}
-inline void Assembler::ldsh(const Address& a, Register d, int offset) {
-  if (a.has_index()) { assert(offset == 0, ""); ldsh(a.base(), a.index(),         d); }
-  else               {                          ldsh(a.base(), a.disp() + offset, d); }
-}
-inline void Assembler::ldsw(const Address& a, Register d, int offset) {
-  if (a.has_index()) { assert(offset == 0, ""); ldsw(a.base(), a.index(),         d); }
-  else               {                          ldsw(a.base(), a.disp() + offset, d); }
-}
-inline void Assembler::ldub(const Address& a, Register d, int offset) {
-  if (a.has_index()) { assert(offset == 0, ""); ldub(a.base(), a.index(),         d); }
-  else               {                          ldub(a.base(), a.disp() + offset, d); }
-}
-inline void Assembler::lduh(const Address& a, Register d, int offset) {
-  if (a.has_index()) { assert(offset == 0, ""); lduh(a.base(), a.index(),         d); }
-  else               {                          lduh(a.base(), a.disp() + offset, d); }
-}
-inline void Assembler::lduw(const Address& a, Register d, int offset) {
-  if (a.has_index()) { assert(offset == 0, ""); lduw(a.base(), a.index(),         d); }
-  else               {                          lduw(a.base(), a.disp() + offset, d); }
-}
-inline void Assembler::ldd( const Address& a, Register d, int offset) {
-  if (a.has_index()) { assert(offset == 0, ""); ldd( a.base(), a.index(),         d); }
-  else               {                          ldd( a.base(), a.disp() + offset, d); }
-}
-inline void Assembler::ldx( const Address& a, Register d, int offset) {
-  if (a.has_index()) { assert(offset == 0, ""); ldx( a.base(), a.index(),         d); }
-  else               {                          ldx( a.base(), a.disp() + offset, d); }
-}
-
-inline void Assembler::ldub(Register s1, RegisterOrConstant s2, Register d) { ldub(Address(s1, s2), d); }
-inline void Assembler::ldsb(Register s1, RegisterOrConstant s2, Register d) { ldsb(Address(s1, s2), d); }
-inline void Assembler::lduh(Register s1, RegisterOrConstant s2, Register d) { lduh(Address(s1, s2), d); }
-inline void Assembler::ldsh(Register s1, RegisterOrConstant s2, Register d) { ldsh(Address(s1, s2), d); }
-inline void Assembler::lduw(Register s1, RegisterOrConstant s2, Register d) { lduw(Address(s1, s2), d); }
-inline void Assembler::ldsw(Register s1, RegisterOrConstant s2, Register d) { ldsw(Address(s1, s2), d); }
-inline void Assembler::ldx( Register s1, RegisterOrConstant s2, Register d) { ldx( Address(s1, s2), d); }
-inline void Assembler::ld(  Register s1, RegisterOrConstant s2, Register d) { ld(  Address(s1, s2), d); }
-inline void Assembler::ldd( Register s1, RegisterOrConstant s2, Register d) { ldd( Address(s1, s2), d); }
-
-// form effective addresses this way:
-inline void Assembler::add(const Address& a, Register d, int offset) {
-  if (a.has_index())   add(a.base(), a.index(),         d);
-  else               { add(a.base(), a.disp() + offset, d, a.rspec(offset)); offset = 0; }
-  if (offset != 0)     add(d,        offset,            d);
-}
-inline void Assembler::add(Register s1, RegisterOrConstant s2, Register d, int offset) {
-  if (s2.is_register())  add(s1, s2.as_register(),          d);
-  else                 { add(s1, s2.as_constant() + offset, d); offset = 0; }
-  if (offset != 0)       add(d,  offset,                    d);
-}
-
-inline void Assembler::andn(Register s1, RegisterOrConstant s2, Register d) {
-  if (s2.is_register())  andn(s1, s2.as_register(), d);
-  else                   andn(s1, s2.as_constant(), d);
-}
-
-inline void Assembler::ldstub(  Register s1, Register s2, Register d) { emit_long( op(ldst_op) | rd(d) | op3(ldstub_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::ldstub(  Register s1, Register s2, Register d) { emit_int32( op(ldst_op) | rd(d) | op3(ldstub_op3) | rs1(s1) | rs2(s2) ); }
 inline void Assembler::ldstub(  Register s1, int simm13a, Register d) { emit_data( op(ldst_op) | rd(d) | op3(ldstub_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
 
-
-inline void Assembler::prefetch(Register s1, Register s2, PrefetchFcn f) { v9_only();  emit_long( op(ldst_op) | fcn(f) | op3(prefetch_op3) | rs1(s1) | rs2(s2) ); }
-inline void Assembler::prefetch(Register s1, int simm13a, PrefetchFcn f) { v9_only();  emit_data( op(ldst_op) | fcn(f) | op3(prefetch_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-
-inline void Assembler::prefetch(const Address& a, PrefetchFcn f, int offset) { v9_only(); relocate(a.rspec(offset)); prefetch(a.base(), a.disp() + offset, f); }
-
-
-inline void Assembler::rett( Register s1, Register s2                         ) { cti();  emit_long( op(arith_op) | op3(rett_op3) | rs1(s1) | rs2(s2));  has_delay_slot(); }
+inline void Assembler::rett( Register s1, Register s2                         ) { cti();  emit_int32( op(arith_op) | op3(rett_op3) | rs1(s1) | rs2(s2));  has_delay_slot(); }
 inline void Assembler::rett( Register s1, int simm13a, relocInfo::relocType rt) { cti();  emit_data( op(arith_op) | op3(rett_op3) | rs1(s1) | immed(true) | simm(simm13a, 13), rt);  has_delay_slot(); }
 
 inline void Assembler::sethi( int imm22a, Register d, RelocationHolder const& rspec ) { emit_data( op(branch_op) | rd(d) | op2(sethi_op2) | hi22(imm22a), rspec); }
 
   // pp 222
 
-inline void Assembler::stf(    FloatRegisterImpl::Width w, FloatRegister d, Register s1, RegisterOrConstant s2) {
-  if (s2.is_register()) stf(w, d, s1, s2.as_register());
-  else                  stf(w, d, s1, s2.as_constant());
-}
-
-inline void Assembler::stf(    FloatRegisterImpl::Width w, FloatRegister d, Register s1, Register s2) { emit_long( op(ldst_op) | fd(d, w) | alt_op3(stf_op3, w) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::stf(    FloatRegisterImpl::Width w, FloatRegister d, Register s1, Register s2) { emit_int32( op(ldst_op) | fd(d, w) | alt_op3(stf_op3, w) | rs1(s1) | rs2(s2) ); }
 inline void Assembler::stf(    FloatRegisterImpl::Width w, FloatRegister d, Register s1, int simm13a) { emit_data( op(ldst_op) | fd(d, w) | alt_op3(stf_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
 
-inline void Assembler::stf(    FloatRegisterImpl::Width w, FloatRegister d, const Address& a, int offset) {
-  relocate(a.rspec(offset));
-  if (a.has_index()) { assert(offset == 0, ""); stf(w, d, a.base(), a.index()        ); }
-  else               {                          stf(w, d, a.base(), a.disp() + offset); }
-}
-
-inline void Assembler::stfsr(  Register s1, Register s2) { v9_dep();   emit_long( op(ldst_op) |             op3(stfsr_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::stfsr(  Register s1, Register s2) { v9_dep();   emit_int32( op(ldst_op) |             op3(stfsr_op3) | rs1(s1) | rs2(s2) ); }
 inline void Assembler::stfsr(  Register s1, int simm13a) { v9_dep();   emit_data( op(ldst_op) |             op3(stfsr_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::stxfsr( Register s1, Register s2) { v9_only();  emit_long( op(ldst_op) | rd(G1)    | op3(stfsr_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::stxfsr( Register s1, Register s2) { v9_only();  emit_int32( op(ldst_op) | rd(G1)    | op3(stfsr_op3) | rs1(s1) | rs2(s2) ); }
 inline void Assembler::stxfsr( Register s1, int simm13a) { v9_only();  emit_data( op(ldst_op) | rd(G1)    | op3(stfsr_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
 
   // p 226
 
-inline void Assembler::stb(  Register d, Register s1, Register s2) { emit_long( op(ldst_op) | rd(d) | op3(stb_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::stb(  Register d, Register s1, Register s2) { emit_int32( op(ldst_op) | rd(d) | op3(stb_op3) | rs1(s1) | rs2(s2) ); }
 inline void Assembler::stb(  Register d, Register s1, int simm13a) { emit_data( op(ldst_op) | rd(d) | op3(stb_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::sth(  Register d, Register s1, Register s2) { emit_long( op(ldst_op) | rd(d) | op3(sth_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::sth(  Register d, Register s1, Register s2) { emit_int32( op(ldst_op) | rd(d) | op3(sth_op3) | rs1(s1) | rs2(s2) ); }
 inline void Assembler::sth(  Register d, Register s1, int simm13a) { emit_data( op(ldst_op) | rd(d) | op3(sth_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::stw(  Register d, Register s1, Register s2) { emit_long( op(ldst_op) | rd(d) | op3(stw_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::stw(  Register d, Register s1, Register s2) { emit_int32( op(ldst_op) | rd(d) | op3(stw_op3) | rs1(s1) | rs2(s2) ); }
 inline void Assembler::stw(  Register d, Register s1, int simm13a) { emit_data( op(ldst_op) | rd(d) | op3(stw_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
 
 
-inline void Assembler::stx(  Register d, Register s1, Register s2) { v9_only();  emit_long( op(ldst_op) | rd(d) | op3(stx_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::stx(  Register d, Register s1, Register s2) { v9_only();  emit_int32( op(ldst_op) | rd(d) | op3(stx_op3) | rs1(s1) | rs2(s2) ); }
 inline void Assembler::stx(  Register d, Register s1, int simm13a) { v9_only();  emit_data( op(ldst_op) | rd(d) | op3(stx_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::std(  Register d, Register s1, Register s2) { v9_dep(); assert(d->is_even(), "not even"); emit_long( op(ldst_op) | rd(d) | op3(std_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::std(  Register d, Register s1, Register s2) { v9_dep(); assert(d->is_even(), "not even"); emit_int32( op(ldst_op) | rd(d) | op3(std_op3) | rs1(s1) | rs2(s2) ); }
 inline void Assembler::std(  Register d, Register s1, int simm13a) { v9_dep(); assert(d->is_even(), "not even"); emit_data( op(ldst_op) | rd(d) | op3(std_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
 
-inline void Assembler::st( Register d, Register s1, Register s2)      { stw(d, s1, s2); }
-inline void Assembler::st( Register d, Register s1, int simm13a)      { stw(d, s1, simm13a); }
-
-#ifdef ASSERT
-// ByteSize is only a class when ASSERT is defined, otherwise it's an int.
-inline void Assembler::st( Register d, Register s1, ByteSize simm13a) { stw(d, s1, in_bytes(simm13a)); }
-#endif
-
-inline void Assembler::stb(Register d, const Address& a, int offset) {
-  if (a.has_index()) { assert(offset == 0, ""); stb(d, a.base(), a.index()        ); }
-  else               {                          stb(d, a.base(), a.disp() + offset); }
-}
-inline void Assembler::sth(Register d, const Address& a, int offset) {
-  if (a.has_index()) { assert(offset == 0, ""); sth(d, a.base(), a.index()        ); }
-  else               {                          sth(d, a.base(), a.disp() + offset); }
-}
-inline void Assembler::stw(Register d, const Address& a, int offset) {
-  if (a.has_index()) { assert(offset == 0, ""); stw(d, a.base(), a.index()        ); }
-  else               {                          stw(d, a.base(), a.disp() + offset); }
-}
-inline void Assembler::st( Register d, const Address& a, int offset) {
-  if (a.has_index()) { assert(offset == 0, ""); st( d, a.base(), a.index()        ); }
-  else               {                          st( d, a.base(), a.disp() + offset); }
-}
-inline void Assembler::std(Register d, const Address& a, int offset) {
-  if (a.has_index()) { assert(offset == 0, ""); std(d, a.base(), a.index()        ); }
-  else               {                          std(d, a.base(), a.disp() + offset); }
-}
-inline void Assembler::stx(Register d, const Address& a, int offset) {
-  if (a.has_index()) { assert(offset == 0, ""); stx(d, a.base(), a.index()        ); }
-  else               {                          stx(d, a.base(), a.disp() + offset); }
-}
-
-inline void Assembler::stb(Register d, Register s1, RegisterOrConstant s2) { stb(d, Address(s1, s2)); }
-inline void Assembler::sth(Register d, Register s1, RegisterOrConstant s2) { sth(d, Address(s1, s2)); }
-inline void Assembler::stw(Register d, Register s1, RegisterOrConstant s2) { stw(d, Address(s1, s2)); }
-inline void Assembler::stx(Register d, Register s1, RegisterOrConstant s2) { stx(d, Address(s1, s2)); }
-inline void Assembler::std(Register d, Register s1, RegisterOrConstant s2) { std(d, Address(s1, s2)); }
-inline void Assembler::st( Register d, Register s1, RegisterOrConstant s2) { st( d, Address(s1, s2)); }
-
 // v8 p 99
 
-inline void Assembler::stc(    int crd, Register s1, Register s2) { v8_only();  emit_long( op(ldst_op) | fcn(crd) | op3(stc_op3 ) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::stc(    int crd, Register s1, Register s2) { v8_only();  emit_int32( op(ldst_op) | fcn(crd) | op3(stc_op3 ) | rs1(s1) | rs2(s2) ); }
 inline void Assembler::stc(    int crd, Register s1, int simm13a) { v8_only();  emit_data( op(ldst_op) | fcn(crd) | op3(stc_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::stdc(   int crd, Register s1, Register s2) { v8_only();  emit_long( op(ldst_op) | fcn(crd) | op3(stdc_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::stdc(   int crd, Register s1, Register s2) { v8_only();  emit_int32( op(ldst_op) | fcn(crd) | op3(stdc_op3) | rs1(s1) | rs2(s2) ); }
 inline void Assembler::stdc(   int crd, Register s1, int simm13a) { v8_only();  emit_data( op(ldst_op) | fcn(crd) | op3(stdc_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::stcsr(  int crd, Register s1, Register s2) { v8_only();  emit_long( op(ldst_op) | fcn(crd) | op3(stcsr_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::stcsr(  int crd, Register s1, Register s2) { v8_only();  emit_int32( op(ldst_op) | fcn(crd) | op3(stcsr_op3) | rs1(s1) | rs2(s2) ); }
 inline void Assembler::stcsr(  int crd, Register s1, int simm13a) { v8_only();  emit_data( op(ldst_op) | fcn(crd) | op3(stcsr_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::stdcq(  int crd, Register s1, Register s2) { v8_only();  emit_long( op(ldst_op) | fcn(crd) | op3(stdcq_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::stdcq(  int crd, Register s1, Register s2) { v8_only();  emit_int32( op(ldst_op) | fcn(crd) | op3(stdcq_op3) | rs1(s1) | rs2(s2) ); }
 inline void Assembler::stdcq(  int crd, Register s1, int simm13a) { v8_only();  emit_data( op(ldst_op) | fcn(crd) | op3(stdcq_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
 
-inline void Assembler::sub(Register s1, RegisterOrConstant s2, Register d, int offset) {
-  if (s2.is_register())  sub(s1, s2.as_register(),          d);
-  else                 { sub(s1, s2.as_constant() + offset, d); offset = 0; }
-  if (offset != 0)       sub(d,  offset,                    d);
-}
-
 // pp 231
 
-inline void Assembler::swap(    Register s1, Register s2, Register d) { v9_dep();  emit_long( op(ldst_op) | rd(d) | op3(swap_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::swap(    Register s1, Register s2, Register d) { v9_dep();  emit_int32( op(ldst_op) | rd(d) | op3(swap_op3) | rs1(s1) | rs2(s2) ); }
 inline void Assembler::swap(    Register s1, int simm13a, Register d) { v9_dep();  emit_data( op(ldst_op) | rd(d) | op3(swap_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
 
-inline void Assembler::swap(    Address& a, Register d, int offset ) {
-  relocate(a.rspec(offset));
-  if (a.has_index()) { assert(offset == 0, ""); swap( a.base(), a.index(), d         ); }
-  else               {                          swap( a.base(), a.disp() + offset, d ); }
-}
-
-
-// Use the right loads/stores for the platform
-inline void MacroAssembler::ld_ptr( Register s1, Register s2, Register d ) {
-#ifdef _LP64
-  Assembler::ldx(s1, s2, d);
-#else
-  Assembler::ld( s1, s2, d);
-#endif
-}
-
-inline void MacroAssembler::ld_ptr( Register s1, int simm13a, Register d ) {
-#ifdef _LP64
-  Assembler::ldx(s1, simm13a, d);
-#else
-  Assembler::ld( s1, simm13a, d);
-#endif
-}
-
-#ifdef ASSERT
-// ByteSize is only a class when ASSERT is defined, otherwise it's an int.
-inline void MacroAssembler::ld_ptr( Register s1, ByteSize simm13a, Register d ) {
-  ld_ptr(s1, in_bytes(simm13a), d);
-}
-#endif
-
-inline void MacroAssembler::ld_ptr( Register s1, RegisterOrConstant s2, Register d ) {
-#ifdef _LP64
-  Assembler::ldx(s1, s2, d);
-#else
-  Assembler::ld( s1, s2, d);
-#endif
-}
-
-inline void MacroAssembler::ld_ptr(const Address& a, Register d, int offset) {
-#ifdef _LP64
-  Assembler::ldx(a, d, offset);
-#else
-  Assembler::ld( a, d, offset);
-#endif
-}
-
-inline void MacroAssembler::st_ptr( Register d, Register s1, Register s2 ) {
-#ifdef _LP64
-  Assembler::stx(d, s1, s2);
-#else
-  Assembler::st( d, s1, s2);
-#endif
-}
-
-inline void MacroAssembler::st_ptr( Register d, Register s1, int simm13a ) {
-#ifdef _LP64
-  Assembler::stx(d, s1, simm13a);
-#else
-  Assembler::st( d, s1, simm13a);
-#endif
-}
-
-#ifdef ASSERT
-// ByteSize is only a class when ASSERT is defined, otherwise it's an int.
-inline void MacroAssembler::st_ptr( Register d, Register s1, ByteSize simm13a ) {
-  st_ptr(d, s1, in_bytes(simm13a));
-}
-#endif
-
-inline void MacroAssembler::st_ptr( Register d, Register s1, RegisterOrConstant s2 ) {
-#ifdef _LP64
-  Assembler::stx(d, s1, s2);
-#else
-  Assembler::st( d, s1, s2);
-#endif
-}
-
-inline void MacroAssembler::st_ptr(Register d, const Address& a, int offset) {
-#ifdef _LP64
-  Assembler::stx(d, a, offset);
-#else
-  Assembler::st( d, a, offset);
-#endif
-}
-
-// Use the right loads/stores for the platform
-inline void MacroAssembler::ld_long( Register s1, Register s2, Register d ) {
-#ifdef _LP64
-  Assembler::ldx(s1, s2, d);
-#else
-  Assembler::ldd(s1, s2, d);
-#endif
-}
-
-inline void MacroAssembler::ld_long( Register s1, int simm13a, Register d ) {
-#ifdef _LP64
-  Assembler::ldx(s1, simm13a, d);
-#else
-  Assembler::ldd(s1, simm13a, d);
-#endif
-}
-
-inline void MacroAssembler::ld_long( Register s1, RegisterOrConstant s2, Register d ) {
-#ifdef _LP64
-  Assembler::ldx(s1, s2, d);
-#else
-  Assembler::ldd(s1, s2, d);
-#endif
-}
-
-inline void MacroAssembler::ld_long(const Address& a, Register d, int offset) {
-#ifdef _LP64
-  Assembler::ldx(a, d, offset);
-#else
-  Assembler::ldd(a, d, offset);
-#endif
-}
-
-inline void MacroAssembler::st_long( Register d, Register s1, Register s2 ) {
-#ifdef _LP64
-  Assembler::stx(d, s1, s2);
-#else
-  Assembler::std(d, s1, s2);
-#endif
-}
-
-inline void MacroAssembler::st_long( Register d, Register s1, int simm13a ) {
-#ifdef _LP64
-  Assembler::stx(d, s1, simm13a);
-#else
-  Assembler::std(d, s1, simm13a);
-#endif
-}
-
-inline void MacroAssembler::st_long( Register d, Register s1, RegisterOrConstant s2 ) {
-#ifdef _LP64
-  Assembler::stx(d, s1, s2);
-#else
-  Assembler::std(d, s1, s2);
-#endif
-}
-
-inline void MacroAssembler::st_long( Register d, const Address& a, int offset ) {
-#ifdef _LP64
-  Assembler::stx(d, a, offset);
-#else
-  Assembler::std(d, a, offset);
-#endif
-}
-
-// Functions for isolating 64 bit shifts for LP64
-
-inline void MacroAssembler::sll_ptr( Register s1, Register s2, Register d ) {
-#ifdef _LP64
-  Assembler::sllx(s1, s2, d);
-#else
-  Assembler::sll( s1, s2, d);
-#endif
-}
-
-inline void MacroAssembler::sll_ptr( Register s1, int imm6a,   Register d ) {
-#ifdef _LP64
-  Assembler::sllx(s1, imm6a, d);
-#else
-  Assembler::sll( s1, imm6a, d);
-#endif
-}
-
-inline void MacroAssembler::srl_ptr( Register s1, Register s2, Register d ) {
-#ifdef _LP64
-  Assembler::srlx(s1, s2, d);
-#else
-  Assembler::srl( s1, s2, d);
-#endif
-}
-
-inline void MacroAssembler::srl_ptr( Register s1, int imm6a,   Register d ) {
-#ifdef _LP64
-  Assembler::srlx(s1, imm6a, d);
-#else
-  Assembler::srl( s1, imm6a, d);
-#endif
-}
-
-inline void MacroAssembler::sll_ptr( Register s1, RegisterOrConstant s2, Register d ) {
-  if (s2.is_register())  sll_ptr(s1, s2.as_register(), d);
-  else                   sll_ptr(s1, s2.as_constant(), d);
-}
-
-// Use the right branch for the platform
-
-inline void MacroAssembler::br( Condition c, bool a, Predict p, address d, relocInfo::relocType rt ) {
-  if (VM_Version::v9_instructions_work())
-    Assembler::bp(c, a, icc, p, d, rt);
-  else
-    Assembler::br(c, a, d, rt);
-}
-
-inline void MacroAssembler::br( Condition c, bool a, Predict p, Label& L ) {
-  br(c, a, p, target(L));
-}
-
-
-// Branch that tests either xcc or icc depending on the
-// architecture compiled (LP64 or not)
-inline void MacroAssembler::brx( Condition c, bool a, Predict p, address d, relocInfo::relocType rt ) {
-#ifdef _LP64
-    Assembler::bp(c, a, xcc, p, d, rt);
-#else
-    MacroAssembler::br(c, a, p, d, rt);
-#endif
-}
-
-inline void MacroAssembler::brx( Condition c, bool a, Predict p, Label& L ) {
-  brx(c, a, p, target(L));
-}
-
-inline void MacroAssembler::ba( Label& L ) {
-  br(always, false, pt, L);
-}
-
-// Warning: V9 only functions
-inline void MacroAssembler::bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt ) {
-  Assembler::bp(c, a, cc, p, d, rt);
-}
-
-inline void MacroAssembler::bp( Condition c, bool a, CC cc, Predict p, Label& L ) {
-  Assembler::bp(c, a, cc, p, L);
-}
-
-inline void MacroAssembler::fb( Condition c, bool a, Predict p, address d, relocInfo::relocType rt ) {
-  if (VM_Version::v9_instructions_work())
-    fbp(c, a, fcc0, p, d, rt);
-  else
-    Assembler::fb(c, a, d, rt);
-}
-
-inline void MacroAssembler::fb( Condition c, bool a, Predict p, Label& L ) {
-  fb(c, a, p, target(L));
-}
-
-inline void MacroAssembler::fbp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt ) {
-  Assembler::fbp(c, a, cc, p, d, rt);
-}
-
-inline void MacroAssembler::fbp( Condition c, bool a, CC cc, Predict p, Label& L ) {
-  Assembler::fbp(c, a, cc, p, L);
-}
-
-inline void MacroAssembler::jmp( Register s1, Register s2 ) { jmpl( s1, s2, G0 ); }
-inline void MacroAssembler::jmp( Register s1, int simm13a, RelocationHolder const& rspec ) { jmpl( s1, simm13a, G0, rspec); }
-
-inline bool MacroAssembler::is_far_target(address d) {
-  if (ForceUnreachable) {
-    // References outside the code cache should be treated as far
-    return d < CodeCache::low_bound() || d > CodeCache::high_bound();
-  }
-  return !is_in_wdisp30_range(d, CodeCache::low_bound()) || !is_in_wdisp30_range(d, CodeCache::high_bound());
-}
-
-// Call with a check to see if we need to deal with the added
-// expense of relocation and if we overflow the displacement
-// of the quick call instruction.
-inline void MacroAssembler::call( address d, relocInfo::relocType rt ) {
-#ifdef _LP64
-  intptr_t disp;
-  // NULL is ok because it will be relocated later.
-  // Must change NULL to a reachable address in order to
-  // pass asserts here and in wdisp.
-  if ( d == NULL )
-    d = pc();
-
-  // Is this address within range of the call instruction?
-  // If not, use the expensive instruction sequence
-  if (is_far_target(d)) {
-    relocate(rt);
-    AddressLiteral dest(d);
-    jumpl_to(dest, O7, O7);
-  } else {
-    Assembler::call(d, rt);
-  }
-#else
-  Assembler::call( d, rt );
-#endif
-}
-
-inline void MacroAssembler::call( Label& L,   relocInfo::relocType rt ) {
-  MacroAssembler::call( target(L), rt);
-}
-
-
-
-inline void MacroAssembler::callr( Register s1, Register s2 ) { jmpl( s1, s2, O7 ); }
-inline void MacroAssembler::callr( Register s1, int simm13a, RelocationHolder const& rspec ) { jmpl( s1, simm13a, O7, rspec); }
-
-// prefetch instruction
-inline void MacroAssembler::iprefetch( address d, relocInfo::relocType rt ) {
-  if (VM_Version::v9_instructions_work())
-    Assembler::bp( never, true, xcc, pt, d, rt );
-}
-inline void MacroAssembler::iprefetch( Label& L) { iprefetch( target(L) ); }
-
-
-// clobbers o7 on V8!!
-// returns delta from gotten pc to addr after
-inline int MacroAssembler::get_pc( Register d ) {
-  int x = offset();
-  if (VM_Version::v9_instructions_work())
-    rdpc(d);
-  else {
-    Label lbl;
-    Assembler::call(lbl, relocInfo::none);  // No relocation as this is call to pc+0x8
-    if (d == O7)  delayed()->nop();
-    else          delayed()->mov(O7, d);
-    bind(lbl);
-  }
-  return offset() - x;
-}
-
-
-// Note:  All MacroAssembler::set_foo functions are defined out-of-line.
-
-
-// Loads the current PC of the following instruction as an immediate value in
-// 2 instructions.  All PCs in the CodeCache are within 2 Gig of each other.
-inline intptr_t MacroAssembler::load_pc_address( Register reg, int bytes_to_skip ) {
-  intptr_t thepc = (intptr_t)pc() + 2*BytesPerInstWord + bytes_to_skip;
-#ifdef _LP64
-  Unimplemented();
-#else
-  Assembler::sethi(  thepc & ~0x3ff, reg, internal_word_Relocation::spec((address)thepc));
-  Assembler::add(reg,thepc &  0x3ff, reg, internal_word_Relocation::spec((address)thepc));
-#endif
-  return thepc;
-}
-
-
-inline void MacroAssembler::load_contents(const AddressLiteral& addrlit, Register d, int offset) {
-  assert_not_delayed();
-  if (ForceUnreachable) {
-    patchable_sethi(addrlit, d);
-  } else {
-    sethi(addrlit, d);
-  }
-  ld(d, addrlit.low10() + offset, d);
-}
-
-
-inline void MacroAssembler::load_bool_contents(const AddressLiteral& addrlit, Register d, int offset) {
-  assert_not_delayed();
-  if (ForceUnreachable) {
-    patchable_sethi(addrlit, d);
-  } else {
-    sethi(addrlit, d);
-  }
-  ldub(d, addrlit.low10() + offset, d);
-}
-
-
-inline void MacroAssembler::load_ptr_contents(const AddressLiteral& addrlit, Register d, int offset) {
-  assert_not_delayed();
-  if (ForceUnreachable) {
-    patchable_sethi(addrlit, d);
-  } else {
-    sethi(addrlit, d);
-  }
-  ld_ptr(d, addrlit.low10() + offset, d);
-}
-
-
-inline void MacroAssembler::store_contents(Register s, const AddressLiteral& addrlit, Register temp, int offset) {
-  assert_not_delayed();
-  if (ForceUnreachable) {
-    patchable_sethi(addrlit, temp);
-  } else {
-    sethi(addrlit, temp);
-  }
-  st(s, temp, addrlit.low10() + offset);
-}
-
-
-inline void MacroAssembler::store_ptr_contents(Register s, const AddressLiteral& addrlit, Register temp, int offset) {
-  assert_not_delayed();
-  if (ForceUnreachable) {
-    patchable_sethi(addrlit, temp);
-  } else {
-    sethi(addrlit, temp);
-  }
-  st_ptr(s, temp, addrlit.low10() + offset);
-}
-
-
-// This code sequence is relocatable to any address, even on LP64.
-inline void MacroAssembler::jumpl_to(const AddressLiteral& addrlit, Register temp, Register d, int offset) {
-  assert_not_delayed();
-  // Force fixed length sethi because NativeJump and NativeFarCall don't handle
-  // variable length instruction streams.
-  patchable_sethi(addrlit, temp);
-  jmpl(temp, addrlit.low10() + offset, d);
-}
-
-
-inline void MacroAssembler::jump_to(const AddressLiteral& addrlit, Register temp, int offset) {
-  jumpl_to(addrlit, temp, G0, offset);
-}
-
-
-inline void MacroAssembler::jump_indirect_to(Address& a, Register temp,
-                                             int ld_offset, int jmp_offset) {
-  assert_not_delayed();
-  //sethi(al);                   // sethi is caller responsibility for this one
-  ld_ptr(a, temp, ld_offset);
-  jmp(temp, jmp_offset);
-}
-
-
-inline void MacroAssembler::set_metadata(Metadata* obj, Register d) {
-  set_metadata(allocate_metadata_address(obj), d);
-}
-
-inline void MacroAssembler::set_metadata_constant(Metadata* obj, Register d) {
-  set_metadata(constant_metadata_address(obj), d);
-}
-
-inline void MacroAssembler::set_metadata(const AddressLiteral& obj_addr, Register d) {
-  assert(obj_addr.rspec().type() == relocInfo::metadata_type, "must be a metadata reloc");
-  set(obj_addr, d);
-}
-
-inline void MacroAssembler::set_oop(jobject obj, Register d) {
-  set_oop(allocate_oop_address(obj), d);
-}
-
-
-inline void MacroAssembler::set_oop_constant(jobject obj, Register d) {
-  set_oop(constant_oop_address(obj), d);
-}
-
-
-inline void MacroAssembler::set_oop(const AddressLiteral& obj_addr, Register d) {
-  assert(obj_addr.rspec().type() == relocInfo::oop_type, "must be an oop reloc");
-  set(obj_addr, d);
-}
-
-
-inline void MacroAssembler::load_argument( Argument& a, Register  d ) {
-  if (a.is_register())
-    mov(a.as_register(), d);
-  else
-    ld (a.as_address(),  d);
-}
-
-inline void MacroAssembler::store_argument( Register s, Argument& a ) {
-  if (a.is_register())
-    mov(s, a.as_register());
-  else
-    st_ptr (s, a.as_address());         // ABI says everything is right justified.
-}
-
-inline void MacroAssembler::store_ptr_argument( Register s, Argument& a ) {
-  if (a.is_register())
-    mov(s, a.as_register());
-  else
-    st_ptr (s, a.as_address());
-}
-
-
-#ifdef _LP64
-inline void MacroAssembler::store_float_argument( FloatRegister s, Argument& a ) {
-  if (a.is_float_register())
-// V9 ABI has F1, F3, F5 are used to pass instead of O0, O1, O2
-    fmov(FloatRegisterImpl::S, s, a.as_float_register() );
-  else
-    // Floats are stored in the high half of the stack entry
-    // The low half is undefined per the ABI.
-    stf(FloatRegisterImpl::S, s, a.as_address(), sizeof(jfloat));
-}
-
-inline void MacroAssembler::store_double_argument( FloatRegister s, Argument& a ) {
-  if (a.is_float_register())
-// V9 ABI has D0, D2, D4 are used to pass instead of O0, O1, O2
-    fmov(FloatRegisterImpl::D, s, a.as_double_register() );
-  else
-    stf(FloatRegisterImpl::D, s, a.as_address());
-}
-
-inline void MacroAssembler::store_long_argument( Register s, Argument& a ) {
-  if (a.is_register())
-    mov(s, a.as_register());
-  else
-    stx(s, a.as_address());
-}
-#endif
-
-inline void MacroAssembler::clrb( Register s1, Register s2) {  stb( G0, s1, s2 ); }
-inline void MacroAssembler::clrh( Register s1, Register s2) {  sth( G0, s1, s2 ); }
-inline void MacroAssembler::clr(  Register s1, Register s2) {  stw( G0, s1, s2 ); }
-inline void MacroAssembler::clrx( Register s1, Register s2) {  stx( G0, s1, s2 ); }
-
-inline void MacroAssembler::clrb( Register s1, int simm13a) { stb( G0, s1, simm13a); }
-inline void MacroAssembler::clrh( Register s1, int simm13a) { sth( G0, s1, simm13a); }
-inline void MacroAssembler::clr(  Register s1, int simm13a) { stw( G0, s1, simm13a); }
-inline void MacroAssembler::clrx( Register s1, int simm13a) { stx( G0, s1, simm13a); }
-
-// returns if membar generates anything, obviously this code should mirror
-// membar below.
-inline bool MacroAssembler::membar_has_effect( Membar_mask_bits const7a ) {
-  if( !os::is_MP() ) return false;  // Not needed on single CPU
-  if( VM_Version::v9_instructions_work() ) {
-    const Membar_mask_bits effective_mask =
-        Membar_mask_bits(const7a & ~(LoadLoad | LoadStore | StoreStore));
-    return (effective_mask != 0);
-  } else {
-    return true;
-  }
-}
-
-inline void MacroAssembler::membar( Membar_mask_bits const7a ) {
-  // Uniprocessors do not need memory barriers
-  if (!os::is_MP()) return;
-  // Weakened for current Sparcs and TSO.  See the v9 manual, sections 8.4.3,
-  // 8.4.4.3, a.31 and a.50.
-  if( VM_Version::v9_instructions_work() ) {
-    // Under TSO, setting bit 3, 2, or 0 is redundant, so the only value
-    // of the mmask subfield of const7a that does anything that isn't done
-    // implicitly is StoreLoad.
-    const Membar_mask_bits effective_mask =
-        Membar_mask_bits(const7a & ~(LoadLoad | LoadStore | StoreStore));
-    if ( effective_mask != 0 ) {
-      Assembler::membar( effective_mask );
-    }
-  } else {
-    // stbar is the closest there is on v8.  Equivalent to membar(StoreStore).  We
-    // do not issue the stbar because to my knowledge all v8 machines implement TSO,
-    // which guarantees that all stores behave as if an stbar were issued just after
-    // each one of them.  On these machines, stbar ought to be a nop.  There doesn't
-    // appear to be an equivalent of membar(StoreLoad) on v8: TSO doesn't require it,
-    // it can't be specified by stbar, nor have I come up with a way to simulate it.
-    //
-    // Addendum.  Dave says that ldstub guarantees a write buffer flush to coherent
-    // space.  Put one here to be on the safe side.
-    Assembler::ldstub(SP, 0, G0);
-  }
-}
-
 #endif // CPU_SPARC_VM_ASSEMBLER_SPARC_INLINE_HPP
--- a/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -298,7 +298,7 @@
     for (int i = 0; i < _bytes_to_copy; i++) {
       address ptr = (address)(_pc_start + i);
       int a_byte = (*ptr) & 0xFF;
-      __ a_byte (a_byte);
+      __ emit_int8 (a_byte);
     }
   }
 
@@ -340,10 +340,10 @@
   int being_initialized_entry_offset = __ offset() - being_initialized_entry + sizeof_patch_record;
 
   // Emit the patch record.  We need to emit a full word, so emit an extra empty byte
-  __ a_byte(0);
-  __ a_byte(being_initialized_entry_offset);
-  __ a_byte(bytes_to_skip);
-  __ a_byte(_bytes_to_copy);
+  __ emit_int8(0);
+  __ emit_int8(being_initialized_entry_offset);
+  __ emit_int8(bytes_to_skip);
+  __ emit_int8(_bytes_to_copy);
   address patch_info_pc = __ pc();
   assert(patch_info_pc - end_of_patch == bytes_to_skip, "incorrect patch info");
 
--- a/src/cpu/sparc/vm/codeBuffer_sparc.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/sparc/vm/codeBuffer_sparc.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -31,9 +31,4 @@
 public:
   void flush_bundle(bool start_new_bundle) {}
 
-  // Heuristic for pre-packing the pt/pn bit of a predicted branch.
-  bool is_backward_branch(Label& L) {
-    return L.is_bound() && insts_end() <= locator_address(L.loc());
-  }
-
 #endif // CPU_SPARC_VM_CODEBUFFER_SPARC_HPP
--- a/src/cpu/sparc/vm/cppInterpreter_sparc.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/sparc/vm/cppInterpreter_sparc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -137,7 +137,7 @@
   }
   __ ret();                           // return from interpreter activation
   __ delayed()->restore(I5_savedSP, G0, SP);  // remove interpreter frame
-  NOT_PRODUCT(__ emit_long(0);)       // marker for disassembly
+  NOT_PRODUCT(__ emit_int32(0);)       // marker for disassembly
   return entry;
 }
 
@@ -232,7 +232,7 @@
   }
   __ retl();                          // return from interpreter activation
   __ delayed()->nop();                // schedule this better
-  NOT_PRODUCT(__ emit_long(0);)       // marker for disassembly
+  NOT_PRODUCT(__ emit_int32(0);)       // marker for disassembly
   return entry;
 }
 
@@ -582,7 +582,9 @@
   // the following temporary registers are used during frame creation
   const Register Gtmp1 = G3_scratch ;
   const Register Gtmp2 = G1_scratch;
-  const Address size_of_parameters(G5_method, 0, in_bytes(Method::size_of_parameters_offset()));
+  const Register RconstMethod = Gtmp1;
+  const Address constMethod(G5_method, 0, in_bytes(Method::const_offset()));
+  const Address size_of_parameters(RconstMethod, 0, in_bytes(ConstMethod::size_of_parameters_offset()));
 
   bool inc_counter  = UseCompiler || CountCompiledCalls;
 
@@ -618,6 +620,7 @@
   }
 #endif // ASSERT
 
+  __ ld_ptr(constMethod, RconstMethod);
   __ lduh(size_of_parameters, Gtmp1);
   __ sll(Gtmp1, LogBytesPerWord, Gtmp2);       // parameter size in bytes
   __ add(Gargs, Gtmp2, Gargs);                 // points to first local + BytesPerWord
@@ -1047,9 +1050,6 @@
   const Register Gtmp = G3_scratch;
   const Address constMethod       (G5_method, 0, in_bytes(Method::const_offset()));
   const Address access_flags      (G5_method, 0, in_bytes(Method::access_flags_offset()));
-  const Address size_of_parameters(G5_method, 0, in_bytes(Method::size_of_parameters_offset()));
-  const Address max_stack         (G5_method, 0, in_bytes(Method::max_stack_offset()));
-  const Address size_of_locals    (G5_method, 0, in_bytes(Method::size_of_locals_offset()));
 
   // slop factor is two extra slots on the expression stack so that
   // we always have room to store a result when returning from a call without parameters
@@ -1067,10 +1067,15 @@
   // Now compute new frame size
 
   if (native) {
+    const Register RconstMethod = Gtmp;
+    const Address size_of_parameters(RconstMethod, 0, in_bytes(ConstMethod::size_of_parameters_offset()));
+    __ ld_ptr(constMethod, RconstMethod);
     __ lduh( size_of_parameters, Gtmp );
     __ calc_mem_param_words(Gtmp, Gtmp);     // space for native call parameters passed on the stack in words
   } else {
-    __ lduh(max_stack, Gtmp);                // Full size expression stack
+    // Full size expression stack
+    __ ld_ptr(constMethod, Gtmp);
+    __ lduh(Gtmp, in_bytes(ConstMethod::max_stack_offset()), Gtmp);
   }
   __ add(Gtmp, fixed_size, Gtmp);           // plus the fixed portion
 
@@ -1206,7 +1211,9 @@
   __ sub(O2, wordSize, O2);                    // prepush
   __ st_ptr(O2, XXX_STATE(_stack));                // PREPUSH
 
-  __ lduh(max_stack, O3);                      // Full size expression stack
+  // Full size expression stack
+  __ ld_ptr(constMethod, O3);
+  __ lduh(O3, in_bytes(ConstMethod::max_stack_offset()), O3);
   guarantee(!EnableInvokeDynamic, "no support yet for java.lang.invoke.MethodHandle"); //6815692
   //6815692//if (EnableInvokeDynamic)
   //6815692//  __ inc(O3, Method::extra_stack_entries());
@@ -1233,9 +1240,13 @@
     }
     if (init_value != noreg) {
       Label clear_loop;
+      const Register RconstMethod = O1;
+      const Address size_of_parameters(RconstMethod, 0, in_bytes(ConstMethod::size_of_parameters_offset()));
+      const Address size_of_locals    (RconstMethod, 0, in_bytes(ConstMethod::size_of_locals_offset()));
 
       // NOTE: If you change the frame layout, this code will need to
       // be updated!
+      __ ld_ptr( constMethod, RconstMethod );
       __ lduh( size_of_locals, O2 );
       __ lduh( size_of_parameters, O1 );
       __ sll( O2, LogBytesPerWord, O2);
@@ -1462,7 +1473,7 @@
     __ brx(Assembler::equal, false, Assembler::pt, skip);         \
     __ delayed()->nop();                                          \
     __ breakpoint_trap();                                         \
-    __ emit_long(marker);                                         \
+    __ emit_int32(marker);                                         \
     __ bind(skip);                                                \
   }
 #else
@@ -1480,13 +1491,16 @@
 //
 //  assert_different_registers(state, prev_state);
   const Register Gtmp = G3_scratch;
+  const RconstMethod = G3_scratch;
   const Register tmp = O2;
-  const Address size_of_parameters(G5_method, 0, in_bytes(Method::size_of_parameters_offset()));
-  const Address size_of_locals    (G5_method, 0, in_bytes(Method::size_of_locals_offset()));
+  const Address constMethod(G5_method, 0, in_bytes(Method::const_offset()));
+  const Address size_of_parameters(RconstMethod, 0, in_bytes(ConstMethod::size_of_parameters_offset()));
+  const Address size_of_locals    (RconstMethod, 0, in_bytes(ConstMethod::size_of_locals_offset()));
 
+  __ ld_ptr(constMethod, RconstMethod);
   __ lduh(size_of_parameters, tmp);
-  __ sll(tmp, LogBytesPerWord, Gtmp);       // parameter size in bytes
-  __ add(args, Gtmp, Gargs);                // points to first local + BytesPerWord
+  __ sll(tmp, LogBytesPerWord, Gargs);       // parameter size in bytes
+  __ add(args, Gargs, Gargs);                // points to first local + BytesPerWord
   // NEW
   __ add(Gargs, -wordSize, Gargs);             // points to first local[0]
   // determine extra space for non-argument locals & adjust caller's SP
@@ -1538,9 +1552,6 @@
 
   const Address constMethod       (G5_method, 0, in_bytes(Method::const_offset()));
   const Address access_flags      (G5_method, 0, in_bytes(Method::access_flags_offset()));
-  const Address size_of_parameters(G5_method, 0, in_bytes(Method::size_of_parameters_offset()));
-  const Address max_stack         (G5_method, 0, in_bytes(Method::max_stack_offset()));
-  const Address size_of_locals    (G5_method, 0, in_bytes(Method::size_of_locals_offset()));
 
   address entry_point = __ pc();
   __ mov(G0, prevState);                                                 // no current activation
@@ -1748,7 +1759,9 @@
 
   __ ld_ptr(STATE(_result._to_call._callee), L4_scratch);                        // called method
   __ ld_ptr(STATE(_stack), L1_scratch);                                          // get top of java expr stack
-  __ lduh(L4_scratch, in_bytes(Method::size_of_parameters_offset()), L2_scratch); // get parameter size
+  // get parameter size
+  __ ld_ptr(L4_scratch, in_bytes(Method::const_offset()), L2_scratch);
+  __ lduh(L2_scratch, in_bytes(ConstMethod::size_of_parameters_offset()), L2_scratch);
   __ sll(L2_scratch, LogBytesPerWord, L2_scratch     );                           // parameter size in bytes
   __ add(L1_scratch, L2_scratch, L1_scratch);                                      // stack destination for result
   __ ld(L4_scratch, in_bytes(Method::result_index_offset()), L3_scratch); // called method result type index
--- a/src/cpu/sparc/vm/frame_sparc.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/sparc/vm/frame_sparc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -648,7 +648,7 @@
   Method* m = *interpreter_frame_method_addr();
 
   // validate the method we'd find in this potential sender
-  if (!Universe::heap()->is_valid_method(m)) return false;
+  if (!m->is_valid_method()) return false;
 
   // stack frames shouldn't be much larger than max_stack elements
 
--- a/src/cpu/sparc/vm/frame_sparc.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/sparc/vm/frame_sparc.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -204,25 +204,6 @@
   intptr_t*     out_register_addr(Register reg) const {
     return younger_sp_addr_at(reg->after_save()->sp_offset_in_saved_window());
   }
-  intptr_t* memory_param_addr(int param_ix, bool is_in) const {
-    int offset = callee_register_argument_save_area_sp_offset + param_ix;
-    if (is_in)
-      return fp_addr_at(offset);
-    else
-      return sp_addr_at(offset);
-  }
-  intptr_t*        param_addr(int param_ix, bool is_in) const {
-    if (param_ix >= callee_register_argument_save_area_words)
-      return memory_param_addr(param_ix, is_in);
-    else if (is_in)
-      return register_addr(Argument(param_ix, true).as_register());
-    else {
-      // the registers are stored in the next younger frame
-      // %%% is this really necessary?
-      ShouldNotReachHere();
-      return NULL;
-    }
-  }
 
 
   // Interpreter frames
@@ -269,12 +250,8 @@
 #ifndef CC_INTERP
 
   // where Lmonitors is saved:
-  BasicObjectLock**  interpreter_frame_monitors_addr() const {
-    return (BasicObjectLock**) sp_addr_at(Lmonitors->sp_offset_in_saved_window());
-  }
-  intptr_t** interpreter_frame_esp_addr() const {
-    return (intptr_t**)sp_addr_at(Lesp->sp_offset_in_saved_window());
-  }
+  inline BasicObjectLock** interpreter_frame_monitors_addr() const;
+  inline intptr_t** interpreter_frame_esp_addr() const;
 
   inline void interpreter_frame_set_tos_address(intptr_t* x);
 
--- a/src/cpu/sparc/vm/frame_sparc.inline.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/sparc/vm/frame_sparc.inline.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -25,6 +25,8 @@
 #ifndef CPU_SPARC_VM_FRAME_SPARC_INLINE_HPP
 #define CPU_SPARC_VM_FRAME_SPARC_INLINE_HPP
 
+#include "asm/macroAssembler.hpp"
+
 // Inline functions for SPARC frames:
 
 // Constructors
@@ -185,6 +187,13 @@
   return *interpreter_frame_esp_addr() + 1;
 }
 
+inline BasicObjectLock** frame::interpreter_frame_monitors_addr() const {
+  return (BasicObjectLock**) sp_addr_at(Lmonitors->sp_offset_in_saved_window());
+}
+inline intptr_t** frame::interpreter_frame_esp_addr() const {
+  return (intptr_t**)sp_addr_at(Lesp->sp_offset_in_saved_window());
+}
+
 inline void frame::interpreter_frame_set_tos_address( intptr_t* x ) {
   *interpreter_frame_esp_addr() = x - 1;
 }
--- a/src/cpu/sparc/vm/icBuffer_sparc.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/sparc/vm/icBuffer_sparc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,8 +23,7 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
-#include "assembler_sparc.inline.hpp"
+#include "asm/macroAssembler.inline.hpp"
 #include "code/icBuffer.hpp"
 #include "gc_interface/collectedHeap.inline.hpp"
 #include "interpreter/bytecodes.hpp"
--- a/src/cpu/sparc/vm/icache_sparc.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/sparc/vm/icache_sparc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,7 +23,7 @@
  */
 
 #include "precompiled.hpp"
-#include "assembler_sparc.inline.hpp"
+#include "asm/macroAssembler.inline.hpp"
 #include "runtime/icache.hpp"
 
 #define __ _masm->
--- a/src/cpu/sparc/vm/interp_masm_sparc.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/sparc/vm/interp_masm_sparc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -36,12 +36,7 @@
 #include "runtime/basicLock.hpp"
 #include "runtime/biasedLocking.hpp"
 #include "runtime/sharedRuntime.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
+#include "runtime/thread.inline.hpp"
 
 #ifndef CC_INTERP
 #ifndef FAST_DISPATCH
@@ -523,7 +518,8 @@
   delayed()->nop();
 
   // Compute max expression stack+register save area
-  lduh(Lmethod, in_bytes(Method::max_stack_offset()), Gframe_size);  // Load max stack.
+  ld_ptr(Lmethod, in_bytes(Method::const_offset()), Gframe_size);
+  lduh(Gframe_size, in_bytes(ConstMethod::max_stack_offset()), Gframe_size);  // Load max stack.
   add( Gframe_size, frame::memory_parameter_word_sp_offset, Gframe_size );
 
   //
--- a/src/cpu/sparc/vm/interp_masm_sparc.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/sparc/vm/interp_masm_sparc.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -25,7 +25,7 @@
 #ifndef CPU_SPARC_VM_INTERP_MASM_SPARC_HPP
 #define CPU_SPARC_VM_INTERP_MASM_SPARC_HPP
 
-#include "assembler_sparc.inline.hpp"
+#include "asm/macroAssembler.inline.hpp"
 #include "interpreter/invocationCounter.hpp"
 
 // This file specializes the assember with interpreter-specific macros
--- a/src/cpu/sparc/vm/interpreter_sparc.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/sparc/vm/interpreter_sparc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,7 +23,7 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
+#include "asm/macroAssembler.hpp"
 #include "interpreter/bytecodeHistogram.hpp"
 #include "interpreter/interpreter.hpp"
 #include "interpreter/interpreterGenerator.hpp"
--- a/src/cpu/sparc/vm/jniFastGetField_sparc.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/sparc/vm/jniFastGetField_sparc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,7 +23,7 @@
  */
 
 #include "precompiled.hpp"
-#include "assembler_sparc.inline.hpp"
+#include "asm/macroAssembler.inline.hpp"
 #include "memory/resourceArea.hpp"
 #include "prims/jniFastGetField.hpp"
 #include "prims/jvm_misc.hpp"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/sparc/vm/macroAssembler_sparc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,4582 @@
+/*
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.inline.hpp"
+#include "compiler/disassembler.hpp"
+#include "gc_interface/collectedHeap.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/cardTableModRefBS.hpp"
+#include "memory/resourceArea.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/objectMonitor.hpp"
+#include "runtime/os.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#ifndef SERIALGC
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#include "gc_implementation/g1/heapRegion.hpp"
+#endif
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) /* nothing */
+#define STOP(error) stop(error)
+#else
+#define BLOCK_COMMENT(str) block_comment(str)
+#define STOP(error) block_comment(error); stop(error)
+#endif
+
+// Convert the raw encoding form into the form expected by the
+// constructor for Address.
+Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) {
+  assert(scale == 0, "not supported");
+  RelocationHolder rspec;
+  if (disp_reloc != relocInfo::none) {
+    rspec = Relocation::spec_simple(disp_reloc);
+  }
+
+  Register rindex = as_Register(index);
+  if (rindex != G0) {
+    Address madr(as_Register(base), rindex);
+    madr._rspec = rspec;
+    return madr;
+  } else {
+    Address madr(as_Register(base), disp);
+    madr._rspec = rspec;
+    return madr;
+  }
+}
+
+Address Argument::address_in_frame() const {
+  // Warning: In LP64 mode disp will occupy more than 10 bits, but
+  //          op codes such as ld or ldx, only access disp() to get
+  //          their simm13 argument.
+  int disp = ((_number - Argument::n_register_parameters + frame::memory_parameter_word_sp_offset) * BytesPerWord) + STACK_BIAS;
+  if (is_in())
+    return Address(FP, disp); // In argument.
+  else
+    return Address(SP, disp); // Out argument.
+}
+
+static const char* argumentNames[][2] = {
+  {"A0","P0"}, {"A1","P1"}, {"A2","P2"}, {"A3","P3"}, {"A4","P4"},
+  {"A5","P5"}, {"A6","P6"}, {"A7","P7"}, {"A8","P8"}, {"A9","P9"},
+  {"A(n>9)","P(n>9)"}
+};
+
+const char* Argument::name() const {
+  int nofArgs = sizeof argumentNames / sizeof argumentNames[0];
+  int num = number();
+  if (num >= nofArgs)  num = nofArgs - 1;
+  return argumentNames[num][is_in() ? 1 : 0];
+}
+
+#ifdef ASSERT
+// On RISC, there's no benefit to verifying instruction boundaries.
+bool AbstractAssembler::pd_check_instruction_mark() { return false; }
+#endif
+
+// Patch instruction inst at offset inst_pos to refer to dest_pos
+// and return the resulting instruction.
+// We should have pcs, not offsets, but since all is relative, it will work out
+// OK.
+int MacroAssembler::patched_branch(int dest_pos, int inst, int inst_pos) {
+  int m; // mask for displacement field
+  int v; // new value for displacement field
+  const int word_aligned_ones = -4;
+  switch (inv_op(inst)) {
+  default: ShouldNotReachHere();
+  case call_op:    m = wdisp(word_aligned_ones, 0, 30);  v = wdisp(dest_pos, inst_pos, 30); break;
+  case branch_op:
+    switch (inv_op2(inst)) {
+      case fbp_op2:    m = wdisp(  word_aligned_ones, 0, 19);  v = wdisp(  dest_pos, inst_pos, 19); break;
+      case bp_op2:     m = wdisp(  word_aligned_ones, 0, 19);  v = wdisp(  dest_pos, inst_pos, 19); break;
+      case fb_op2:     m = wdisp(  word_aligned_ones, 0, 22);  v = wdisp(  dest_pos, inst_pos, 22); break;
+      case br_op2:     m = wdisp(  word_aligned_ones, 0, 22);  v = wdisp(  dest_pos, inst_pos, 22); break;
+      case cb_op2:     m = wdisp(  word_aligned_ones, 0, 22);  v = wdisp(  dest_pos, inst_pos, 22); break;
+      case bpr_op2: {
+        if (is_cbcond(inst)) {
+          m = wdisp10(word_aligned_ones, 0);
+          v = wdisp10(dest_pos, inst_pos);
+        } else {
+          m = wdisp16(word_aligned_ones, 0);
+          v = wdisp16(dest_pos, inst_pos);
+        }
+        break;
+      }
+      default: ShouldNotReachHere();
+    }
+  }
+  return  inst & ~m  |  v;
+}
+
+// Return the offset of the branch destionation of instruction inst
+// at offset pos.
+// Should have pcs, but since all is relative, it works out.
+int MacroAssembler::branch_destination(int inst, int pos) {
+  int r;
+  switch (inv_op(inst)) {
+  default: ShouldNotReachHere();
+  case call_op:        r = inv_wdisp(inst, pos, 30);  break;
+  case branch_op:
+    switch (inv_op2(inst)) {
+      case fbp_op2:    r = inv_wdisp(  inst, pos, 19);  break;
+      case bp_op2:     r = inv_wdisp(  inst, pos, 19);  break;
+      case fb_op2:     r = inv_wdisp(  inst, pos, 22);  break;
+      case br_op2:     r = inv_wdisp(  inst, pos, 22);  break;
+      case cb_op2:     r = inv_wdisp(  inst, pos, 22);  break;
+      case bpr_op2: {
+        if (is_cbcond(inst)) {
+          r = inv_wdisp10(inst, pos);
+        } else {
+          r = inv_wdisp16(inst, pos);
+        }
+        break;
+      }
+      default: ShouldNotReachHere();
+    }
+  }
+  return r;
+}
+
+void MacroAssembler::null_check(Register reg, int offset) {
+  if (needs_explicit_null_check((intptr_t)offset)) {
+    // provoke OS NULL exception if reg = NULL by
+    // accessing M[reg] w/o changing any registers
+    ld_ptr(reg, 0, G0);
+  }
+  else {
+    // nothing to do, (later) access of M[reg + offset]
+    // will provoke OS NULL exception if reg = NULL
+  }
+}
+
+// Ring buffer jumps
+
+#ifndef PRODUCT
+void MacroAssembler::ret(  bool trace )   { if (trace) {
+                                                    mov(I7, O7); // traceable register
+                                                    JMP(O7, 2 * BytesPerInstWord);
+                                                  } else {
+                                                    jmpl( I7, 2 * BytesPerInstWord, G0 );
+                                                  }
+                                                }
+
+void MacroAssembler::retl( bool trace )  { if (trace) JMP(O7, 2 * BytesPerInstWord);
+                                                 else jmpl( O7, 2 * BytesPerInstWord, G0 ); }
+#endif /* PRODUCT */
+
+
+void MacroAssembler::jmp2(Register r1, Register r2, const char* file, int line ) {
+  assert_not_delayed();
+  // This can only be traceable if r1 & r2 are visible after a window save
+  if (TraceJumps) {
+#ifndef PRODUCT
+    save_frame(0);
+    verify_thread();
+    ld(G2_thread, in_bytes(JavaThread::jmp_ring_index_offset()), O0);
+    add(G2_thread, in_bytes(JavaThread::jmp_ring_offset()), O1);
+    sll(O0, exact_log2(4*sizeof(intptr_t)), O2);
+    add(O2, O1, O1);
+
+    add(r1->after_save(), r2->after_save(), O2);
+    set((intptr_t)file, O3);
+    set(line, O4);
+    Label L;
+    // get nearby pc, store jmp target
+    call(L, relocInfo::none);  // No relocation for call to pc+0x8
+    delayed()->st(O2, O1, 0);
+    bind(L);
+
+    // store nearby pc
+    st(O7, O1, sizeof(intptr_t));
+    // store file
+    st(O3, O1, 2*sizeof(intptr_t));
+    // store line
+    st(O4, O1, 3*sizeof(intptr_t));
+    add(O0, 1, O0);
+    and3(O0, JavaThread::jump_ring_buffer_size  - 1, O0);
+    st(O0, G2_thread, in_bytes(JavaThread::jmp_ring_index_offset()));
+    restore();
+#endif /* PRODUCT */
+  }
+  jmpl(r1, r2, G0);
+}
+void MacroAssembler::jmp(Register r1, int offset, const char* file, int line ) {
+  assert_not_delayed();
+  // This can only be traceable if r1 is visible after a window save
+  if (TraceJumps) {
+#ifndef PRODUCT
+    save_frame(0);
+    verify_thread();
+    ld(G2_thread, in_bytes(JavaThread::jmp_ring_index_offset()), O0);
+    add(G2_thread, in_bytes(JavaThread::jmp_ring_offset()), O1);
+    sll(O0, exact_log2(4*sizeof(intptr_t)), O2);
+    add(O2, O1, O1);
+
+    add(r1->after_save(), offset, O2);
+    set((intptr_t)file, O3);
+    set(line, O4);
+    Label L;
+    // get nearby pc, store jmp target
+    call(L, relocInfo::none);  // No relocation for call to pc+0x8
+    delayed()->st(O2, O1, 0);
+    bind(L);
+
+    // store nearby pc
+    st(O7, O1, sizeof(intptr_t));
+    // store file
+    st(O3, O1, 2*sizeof(intptr_t));
+    // store line
+    st(O4, O1, 3*sizeof(intptr_t));
+    add(O0, 1, O0);
+    and3(O0, JavaThread::jump_ring_buffer_size  - 1, O0);
+    st(O0, G2_thread, in_bytes(JavaThread::jmp_ring_index_offset()));
+    restore();
+#endif /* PRODUCT */
+  }
+  jmp(r1, offset);
+}
+
+// This code sequence is relocatable to any address, even on LP64.
+void MacroAssembler::jumpl(const AddressLiteral& addrlit, Register temp, Register d, int offset, const char* file, int line) {
+  assert_not_delayed();
+  // Force fixed length sethi because NativeJump and NativeFarCall don't handle
+  // variable length instruction streams.
+  patchable_sethi(addrlit, temp);
+  Address a(temp, addrlit.low10() + offset);  // Add the offset to the displacement.
+  if (TraceJumps) {
+#ifndef PRODUCT
+    // Must do the add here so relocation can find the remainder of the
+    // value to be relocated.
+    add(a.base(), a.disp(), a.base(), addrlit.rspec(offset));
+    save_frame(0);
+    verify_thread();
+    ld(G2_thread, in_bytes(JavaThread::jmp_ring_index_offset()), O0);
+    add(G2_thread, in_bytes(JavaThread::jmp_ring_offset()), O1);
+    sll(O0, exact_log2(4*sizeof(intptr_t)), O2);
+    add(O2, O1, O1);
+
+    set((intptr_t)file, O3);
+    set(line, O4);
+    Label L;
+
+    // get nearby pc, store jmp target
+    call(L, relocInfo::none);  // No relocation for call to pc+0x8
+    delayed()->st(a.base()->after_save(), O1, 0);
+    bind(L);
+
+    // store nearby pc
+    st(O7, O1, sizeof(intptr_t));
+    // store file
+    st(O3, O1, 2*sizeof(intptr_t));
+    // store line
+    st(O4, O1, 3*sizeof(intptr_t));
+    add(O0, 1, O0);
+    and3(O0, JavaThread::jump_ring_buffer_size  - 1, O0);
+    st(O0, G2_thread, in_bytes(JavaThread::jmp_ring_index_offset()));
+    restore();
+    jmpl(a.base(), G0, d);
+#else
+    jmpl(a.base(), a.disp(), d);
+#endif /* PRODUCT */
+  } else {
+    jmpl(a.base(), a.disp(), d);
+  }
+}
+
+void MacroAssembler::jump(const AddressLiteral& addrlit, Register temp, int offset, const char* file, int line) {
+  jumpl(addrlit, temp, G0, offset, file, line);
+}
+
+
+// Conditional breakpoint (for assertion checks in assembly code)
+void MacroAssembler::breakpoint_trap(Condition c, CC cc) {
+  trap(c, cc, G0, ST_RESERVED_FOR_USER_0);
+}
+
+// We want to use ST_BREAKPOINT here, but the debugger is confused by it.
+void MacroAssembler::breakpoint_trap() {
+  trap(ST_RESERVED_FOR_USER_0);
+}
+
+// flush windows (except current) using flushw instruction if avail.
+void MacroAssembler::flush_windows() {
+  if (VM_Version::v9_instructions_work())  flushw();
+  else                                     flush_windows_trap();
+}
+
+// Write serialization page so VM thread can do a pseudo remote membar
+// We use the current thread pointer to calculate a thread specific
+// offset to write to within the page. This minimizes bus traffic
+// due to cache line collision.
+void MacroAssembler::serialize_memory(Register thread, Register tmp1, Register tmp2) {
+  srl(thread, os::get_serialize_page_shift_count(), tmp2);
+  if (Assembler::is_simm13(os::vm_page_size())) {
+    and3(tmp2, (os::vm_page_size() - sizeof(int)), tmp2);
+  }
+  else {
+    set((os::vm_page_size() - sizeof(int)), tmp1);
+    and3(tmp2, tmp1, tmp2);
+  }
+  set(os::get_memory_serialize_page(), tmp1);
+  st(G0, tmp1, tmp2);
+}
+
+
+
+void MacroAssembler::enter() {
+  Unimplemented();
+}
+
+void MacroAssembler::leave() {
+  Unimplemented();
+}
+
+void MacroAssembler::mult(Register s1, Register s2, Register d) {
+  if(VM_Version::v9_instructions_work()) {
+    mulx (s1, s2, d);
+  } else {
+    smul (s1, s2, d);
+  }
+}
+
+void MacroAssembler::mult(Register s1, int simm13a, Register d) {
+  if(VM_Version::v9_instructions_work()) {
+    mulx (s1, simm13a, d);
+  } else {
+    smul (s1, simm13a, d);
+  }
+}
+
+
+#ifdef ASSERT
+void MacroAssembler::read_ccr_v8_assert(Register ccr_save) {
+  const Register s1 = G3_scratch;
+  const Register s2 = G4_scratch;
+  Label get_psr_test;
+  // Get the condition codes the V8 way.
+  read_ccr_trap(s1);
+  mov(ccr_save, s2);
+  // This is a test of V8 which has icc but not xcc
+  // so mask off the xcc bits
+  and3(s2, 0xf, s2);
+  // Compare condition codes from the V8 and V9 ways.
+  subcc(s2, s1, G0);
+  br(Assembler::notEqual, true, Assembler::pt, get_psr_test);
+  delayed()->breakpoint_trap();
+  bind(get_psr_test);
+}
+
+void MacroAssembler::write_ccr_v8_assert(Register ccr_save) {
+  const Register s1 = G3_scratch;
+  const Register s2 = G4_scratch;
+  Label set_psr_test;
+  // Write out the saved condition codes the V8 way
+  write_ccr_trap(ccr_save, s1, s2);
+  // Read back the condition codes using the V9 instruction
+  rdccr(s1);
+  mov(ccr_save, s2);
+  // This is a test of V8 which has icc but not xcc
+  // so mask off the xcc bits
+  and3(s2, 0xf, s2);
+  and3(s1, 0xf, s1);
+  // Compare the V8 way with the V9 way.
+  subcc(s2, s1, G0);
+  br(Assembler::notEqual, true, Assembler::pt, set_psr_test);
+  delayed()->breakpoint_trap();
+  bind(set_psr_test);
+}
+#else
+#define read_ccr_v8_assert(x)
+#define write_ccr_v8_assert(x)
+#endif // ASSERT
+
+void MacroAssembler::read_ccr(Register ccr_save) {
+  if (VM_Version::v9_instructions_work()) {
+    rdccr(ccr_save);
+    // Test code sequence used on V8.  Do not move above rdccr.
+    read_ccr_v8_assert(ccr_save);
+  } else {
+    read_ccr_trap(ccr_save);
+  }
+}
+
+void MacroAssembler::write_ccr(Register ccr_save) {
+  if (VM_Version::v9_instructions_work()) {
+    // Test code sequence used on V8.  Do not move below wrccr.
+    write_ccr_v8_assert(ccr_save);
+    wrccr(ccr_save);
+  } else {
+    const Register temp_reg1 = G3_scratch;
+    const Register temp_reg2 = G4_scratch;
+    write_ccr_trap(ccr_save, temp_reg1, temp_reg2);
+  }
+}
+
+
+// Calls to C land
+
+#ifdef ASSERT
+// a hook for debugging
+static Thread* reinitialize_thread() {
+  return ThreadLocalStorage::thread();
+}
+#else
+#define reinitialize_thread ThreadLocalStorage::thread
+#endif
+
+#ifdef ASSERT
+address last_get_thread = NULL;
+#endif
+
+// call this when G2_thread is not known to be valid
+void MacroAssembler::get_thread() {
+  save_frame(0);                // to avoid clobbering O0
+  mov(G1, L0);                  // avoid clobbering G1
+  mov(G5_method, L1);           // avoid clobbering G5
+  mov(G3, L2);                  // avoid clobbering G3 also
+  mov(G4, L5);                  // avoid clobbering G4
+#ifdef ASSERT
+  AddressLiteral last_get_thread_addrlit(&last_get_thread);
+  set(last_get_thread_addrlit, L3);
+  inc(L4, get_pc(L4) + 2 * BytesPerInstWord); // skip getpc() code + inc + st_ptr to point L4 at call
+  st_ptr(L4, L3, 0);
+#endif
+  call(CAST_FROM_FN_PTR(address, reinitialize_thread), relocInfo::runtime_call_type);
+  delayed()->nop();
+  mov(L0, G1);
+  mov(L1, G5_method);
+  mov(L2, G3);
+  mov(L5, G4);
+  restore(O0, 0, G2_thread);
+}
+
+static Thread* verify_thread_subroutine(Thread* gthread_value) {
+  Thread* correct_value = ThreadLocalStorage::thread();
+  guarantee(gthread_value == correct_value, "G2_thread value must be the thread");
+  return correct_value;
+}
+
+void MacroAssembler::verify_thread() {
+  if (VerifyThread) {
+    // NOTE: this chops off the heads of the 64-bit O registers.
+#ifdef CC_INTERP
+    save_frame(0);
+#else
+    // make sure G2_thread contains the right value
+    save_frame_and_mov(0, Lmethod, Lmethod);   // to avoid clobbering O0 (and propagate Lmethod for -Xprof)
+    mov(G1, L1);                // avoid clobbering G1
+    // G2 saved below
+    mov(G3, L3);                // avoid clobbering G3
+    mov(G4, L4);                // avoid clobbering G4
+    mov(G5_method, L5);         // avoid clobbering G5_method
+#endif /* CC_INTERP */
+#if defined(COMPILER2) && !defined(_LP64)
+    // Save & restore possible 64-bit Long arguments in G-regs
+    srlx(G1,32,L0);
+    srlx(G4,32,L6);
+#endif
+    call(CAST_FROM_FN_PTR(address,verify_thread_subroutine), relocInfo::runtime_call_type);
+    delayed()->mov(G2_thread, O0);
+
+    mov(L1, G1);                // Restore G1
+    // G2 restored below
+    mov(L3, G3);                // restore G3
+    mov(L4, G4);                // restore G4
+    mov(L5, G5_method);         // restore G5_method
+#if defined(COMPILER2) && !defined(_LP64)
+    // Save & restore possible 64-bit Long arguments in G-regs
+    sllx(L0,32,G2);             // Move old high G1 bits high in G2
+    srl(G1, 0,G1);              // Clear current high G1 bits
+    or3 (G1,G2,G1);             // Recover 64-bit G1
+    sllx(L6,32,G2);             // Move old high G4 bits high in G2
+    srl(G4, 0,G4);              // Clear current high G4 bits
+    or3 (G4,G2,G4);             // Recover 64-bit G4
+#endif
+    restore(O0, 0, G2_thread);
+  }
+}
+
+
+void MacroAssembler::save_thread(const Register thread_cache) {
+  verify_thread();
+  if (thread_cache->is_valid()) {
+    assert(thread_cache->is_local() || thread_cache->is_in(), "bad volatile");
+    mov(G2_thread, thread_cache);
+  }
+  if (VerifyThread) {
+    // smash G2_thread, as if the VM were about to anyway
+    set(0x67676767, G2_thread);
+  }
+}
+
+
+void MacroAssembler::restore_thread(const Register thread_cache) {
+  if (thread_cache->is_valid()) {
+    assert(thread_cache->is_local() || thread_cache->is_in(), "bad volatile");
+    mov(thread_cache, G2_thread);
+    verify_thread();
+  } else {
+    // do it the slow way
+    get_thread();
+  }
+}
+
+
+// %%% maybe get rid of [re]set_last_Java_frame
+void MacroAssembler::set_last_Java_frame(Register last_java_sp, Register last_Java_pc) {
+  assert_not_delayed();
+  Address flags(G2_thread, JavaThread::frame_anchor_offset() +
+                           JavaFrameAnchor::flags_offset());
+  Address pc_addr(G2_thread, JavaThread::last_Java_pc_offset());
+
+  // Always set last_Java_pc and flags first because once last_Java_sp is visible
+  // has_last_Java_frame is true and users will look at the rest of the fields.
+  // (Note: flags should always be zero before we get here so doesn't need to be set.)
+
+#ifdef ASSERT
+  // Verify that flags was zeroed on return to Java
+  Label PcOk;
+  save_frame(0);                // to avoid clobbering O0
+  ld_ptr(pc_addr, L0);
+  br_null_short(L0, Assembler::pt, PcOk);
+  STOP("last_Java_pc not zeroed before leaving Java");
+  bind(PcOk);
+
+  // Verify that flags was zeroed on return to Java
+  Label FlagsOk;
+  ld(flags, L0);
+  tst(L0);
+  br(Assembler::zero, false, Assembler::pt, FlagsOk);
+  delayed() -> restore();
+  STOP("flags not zeroed before leaving Java");
+  bind(FlagsOk);
+#endif /* ASSERT */
+  //
+  // When returning from calling out from Java mode the frame anchor's last_Java_pc
+  // will always be set to NULL. It is set here so that if we are doing a call to
+  // native (not VM) that we capture the known pc and don't have to rely on the
+  // native call having a standard frame linkage where we can find the pc.
+
+  if (last_Java_pc->is_valid()) {
+    st_ptr(last_Java_pc, pc_addr);
+  }
+
+#ifdef _LP64
+#ifdef ASSERT
+  // Make sure that we have an odd stack
+  Label StackOk;
+  andcc(last_java_sp, 0x01, G0);
+  br(Assembler::notZero, false, Assembler::pt, StackOk);
+  delayed()->nop();
+  STOP("Stack Not Biased in set_last_Java_frame");
+  bind(StackOk);
+#endif // ASSERT
+  assert( last_java_sp != G4_scratch, "bad register usage in set_last_Java_frame");
+  add( last_java_sp, STACK_BIAS, G4_scratch );
+  st_ptr(G4_scratch, G2_thread, JavaThread::last_Java_sp_offset());
+#else
+  st_ptr(last_java_sp, G2_thread, JavaThread::last_Java_sp_offset());
+#endif // _LP64
+}
+
+void MacroAssembler::reset_last_Java_frame(void) {
+  assert_not_delayed();
+
+  Address sp_addr(G2_thread, JavaThread::last_Java_sp_offset());
+  Address pc_addr(G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
+  Address flags  (G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset());
+
+#ifdef ASSERT
+  // check that it WAS previously set
+#ifdef CC_INTERP
+    save_frame(0);
+#else
+    save_frame_and_mov(0, Lmethod, Lmethod);     // Propagate Lmethod to helper frame for -Xprof
+#endif /* CC_INTERP */
+    ld_ptr(sp_addr, L0);
+    tst(L0);
+    breakpoint_trap(Assembler::zero, Assembler::ptr_cc);
+    restore();
+#endif // ASSERT
+
+  st_ptr(G0, sp_addr);
+  // Always return last_Java_pc to zero
+  st_ptr(G0, pc_addr);
+  // Always null flags after return to Java
+  st(G0, flags);
+}
+
+
+void MacroAssembler::call_VM_base(
+  Register        oop_result,
+  Register        thread_cache,
+  Register        last_java_sp,
+  address         entry_point,
+  int             number_of_arguments,
+  bool            check_exceptions)
+{
+  assert_not_delayed();
+
+  // determine last_java_sp register
+  if (!last_java_sp->is_valid()) {
+    last_java_sp = SP;
+  }
+  // debugging support
+  assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
+
+  // 64-bit last_java_sp is biased!
+  set_last_Java_frame(last_java_sp, noreg);
+  if (VerifyThread)  mov(G2_thread, O0); // about to be smashed; pass early
+  save_thread(thread_cache);
+  // do the call
+  call(entry_point, relocInfo::runtime_call_type);
+  if (!VerifyThread)
+    delayed()->mov(G2_thread, O0);  // pass thread as first argument
+  else
+    delayed()->nop();             // (thread already passed)
+  restore_thread(thread_cache);
+  reset_last_Java_frame();
+
+  // check for pending exceptions. use Gtemp as scratch register.
+  if (check_exceptions) {
+    check_and_forward_exception(Gtemp);
+  }
+
+#ifdef ASSERT
+  set(badHeapWordVal, G3);
+  set(badHeapWordVal, G4);
+  set(badHeapWordVal, G5);
+#endif
+
+  // get oop result if there is one and reset the value in the thread
+  if (oop_result->is_valid()) {
+    get_vm_result(oop_result);
+  }
+}
+
+void MacroAssembler::check_and_forward_exception(Register scratch_reg)
+{
+  Label L;
+
+  check_and_handle_popframe(scratch_reg);
+  check_and_handle_earlyret(scratch_reg);
+
+  Address exception_addr(G2_thread, Thread::pending_exception_offset());
+  ld_ptr(exception_addr, scratch_reg);
+  br_null_short(scratch_reg, pt, L);
+  // we use O7 linkage so that forward_exception_entry has the issuing PC
+  call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
+  delayed()->nop();
+  bind(L);
+}
+
+
+void MacroAssembler::check_and_handle_popframe(Register scratch_reg) {
+}
+
+
+void MacroAssembler::check_and_handle_earlyret(Register scratch_reg) {
+}
+
+
+void MacroAssembler::call_VM(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
+  call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions);
+}
+
+
+void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) {
+  // O0 is reserved for the thread
+  mov(arg_1, O1);
+  call_VM(oop_result, entry_point, 1, check_exceptions);
+}
+
+
+void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
+  // O0 is reserved for the thread
+  mov(arg_1, O1);
+  mov(arg_2, O2); assert(arg_2 != O1, "smashed argument");
+  call_VM(oop_result, entry_point, 2, check_exceptions);
+}
+
+
+void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
+  // O0 is reserved for the thread
+  mov(arg_1, O1);
+  mov(arg_2, O2); assert(arg_2 != O1,                "smashed argument");
+  mov(arg_3, O3); assert(arg_3 != O1 && arg_3 != O2, "smashed argument");
+  call_VM(oop_result, entry_point, 3, check_exceptions);
+}
+
+
+
+// Note: The following call_VM overloadings are useful when a "save"
+// has already been performed by a stub, and the last Java frame is
+// the previous one.  In that case, last_java_sp must be passed as FP
+// instead of SP.
+
+
+void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) {
+  call_VM_base(oop_result, noreg, last_java_sp, entry_point, number_of_arguments, check_exceptions);
+}
+
+
+void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) {
+  // O0 is reserved for the thread
+  mov(arg_1, O1);
+  call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
+}
+
+
+void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
+  // O0 is reserved for the thread
+  mov(arg_1, O1);
+  mov(arg_2, O2); assert(arg_2 != O1, "smashed argument");
+  call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
+}
+
+
+void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
+  // O0 is reserved for the thread
+  mov(arg_1, O1);
+  mov(arg_2, O2); assert(arg_2 != O1,                "smashed argument");
+  mov(arg_3, O3); assert(arg_3 != O1 && arg_3 != O2, "smashed argument");
+  call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
+}
+
+
+
+void MacroAssembler::call_VM_leaf_base(Register thread_cache, address entry_point, int number_of_arguments) {
+  assert_not_delayed();
+  save_thread(thread_cache);
+  // do the call
+  call(entry_point, relocInfo::runtime_call_type);
+  delayed()->nop();
+  restore_thread(thread_cache);
+#ifdef ASSERT
+  set(badHeapWordVal, G3);
+  set(badHeapWordVal, G4);
+  set(badHeapWordVal, G5);
+#endif
+}
+
+
+void MacroAssembler::call_VM_leaf(Register thread_cache, address entry_point, int number_of_arguments) {
+  call_VM_leaf_base(thread_cache, entry_point, number_of_arguments);
+}
+
+
+void MacroAssembler::call_VM_leaf(Register thread_cache, address entry_point, Register arg_1) {
+  mov(arg_1, O0);
+  call_VM_leaf(thread_cache, entry_point, 1);
+}
+
+
+void MacroAssembler::call_VM_leaf(Register thread_cache, address entry_point, Register arg_1, Register arg_2) {
+  mov(arg_1, O0);
+  mov(arg_2, O1); assert(arg_2 != O0, "smashed argument");
+  call_VM_leaf(thread_cache, entry_point, 2);
+}
+
+
+void MacroAssembler::call_VM_leaf(Register thread_cache, address entry_point, Register arg_1, Register arg_2, Register arg_3) {
+  mov(arg_1, O0);
+  mov(arg_2, O1); assert(arg_2 != O0,                "smashed argument");
+  mov(arg_3, O2); assert(arg_3 != O0 && arg_3 != O1, "smashed argument");
+  call_VM_leaf(thread_cache, entry_point, 3);
+}
+
+
+void MacroAssembler::get_vm_result(Register oop_result) {
+  verify_thread();
+  Address vm_result_addr(G2_thread, JavaThread::vm_result_offset());
+  ld_ptr(    vm_result_addr, oop_result);
+  st_ptr(G0, vm_result_addr);
+  verify_oop(oop_result);
+}
+
+
+void MacroAssembler::get_vm_result_2(Register metadata_result) {
+  verify_thread();
+  Address vm_result_addr_2(G2_thread, JavaThread::vm_result_2_offset());
+  ld_ptr(vm_result_addr_2, metadata_result);
+  st_ptr(G0, vm_result_addr_2);
+}
+
+
+// We require that C code which does not return a value in vm_result will
+// leave it undisturbed.
+void MacroAssembler::set_vm_result(Register oop_result) {
+  verify_thread();
+  Address vm_result_addr(G2_thread, JavaThread::vm_result_offset());
+  verify_oop(oop_result);
+
+# ifdef ASSERT
+    // Check that we are not overwriting any other oop.
+#ifdef CC_INTERP
+    save_frame(0);
+#else
+    save_frame_and_mov(0, Lmethod, Lmethod);     // Propagate Lmethod for -Xprof
+#endif /* CC_INTERP */
+    ld_ptr(vm_result_addr, L0);
+    tst(L0);
+    restore();
+    breakpoint_trap(notZero, Assembler::ptr_cc);
+    // }
+# endif
+
+  st_ptr(oop_result, vm_result_addr);
+}
+
+
+void MacroAssembler::ic_call(address entry, bool emit_delay) {
+  RelocationHolder rspec = virtual_call_Relocation::spec(pc());
+  patchable_set((intptr_t)Universe::non_oop_word(), G5_inline_cache_reg);
+  relocate(rspec);
+  call(entry, relocInfo::none);
+  if (emit_delay) {
+    delayed()->nop();
+  }
+}
+
+
+void MacroAssembler::card_table_write(jbyte* byte_map_base,
+                                      Register tmp, Register obj) {
+#ifdef _LP64
+  srlx(obj, CardTableModRefBS::card_shift, obj);
+#else
+  srl(obj, CardTableModRefBS::card_shift, obj);
+#endif
+  assert(tmp != obj, "need separate temp reg");
+  set((address) byte_map_base, tmp);
+  stb(G0, tmp, obj);
+}
+
+
+void MacroAssembler::internal_sethi(const AddressLiteral& addrlit, Register d, bool ForceRelocatable) {
+  address save_pc;
+  int shiftcnt;
+#ifdef _LP64
+# ifdef CHECK_DELAY
+  assert_not_delayed((char*) "cannot put two instructions in delay slot");
+# endif
+  v9_dep();
+  save_pc = pc();
+
+  int msb32 = (int) (addrlit.value() >> 32);
+  int lsb32 = (int) (addrlit.value());
+
+  if (msb32 == 0 && lsb32 >= 0) {
+    Assembler::sethi(lsb32, d, addrlit.rspec());
+  }
+  else if (msb32 == -1) {
+    Assembler::sethi(~lsb32, d, addrlit.rspec());
+    xor3(d, ~low10(~0), d);
+  }
+  else {
+    Assembler::sethi(msb32, d, addrlit.rspec());  // msb 22-bits
+    if (msb32 & 0x3ff)                            // Any bits?
+      or3(d, msb32 & 0x3ff, d);                   // msb 32-bits are now in lsb 32
+    if (lsb32 & 0xFFFFFC00) {                     // done?
+      if ((lsb32 >> 20) & 0xfff) {                // Any bits set?
+        sllx(d, 12, d);                           // Make room for next 12 bits
+        or3(d, (lsb32 >> 20) & 0xfff, d);         // Or in next 12
+        shiftcnt = 0;                             // We already shifted
+      }
+      else
+        shiftcnt = 12;
+      if ((lsb32 >> 10) & 0x3ff) {
+        sllx(d, shiftcnt + 10, d);                // Make room for last 10 bits
+        or3(d, (lsb32 >> 10) & 0x3ff, d);         // Or in next 10
+        shiftcnt = 0;
+      }
+      else
+        shiftcnt = 10;
+      sllx(d, shiftcnt + 10, d);                  // Shift leaving disp field 0'd
+    }
+    else
+      sllx(d, 32, d);
+  }
+  // Pad out the instruction sequence so it can be patched later.
+  if (ForceRelocatable || (addrlit.rtype() != relocInfo::none &&
+                           addrlit.rtype() != relocInfo::runtime_call_type)) {
+    while (pc() < (save_pc + (7 * BytesPerInstWord)))
+      nop();
+  }
+#else
+  Assembler::sethi(addrlit.value(), d, addrlit.rspec());
+#endif
+}
+
+
+void MacroAssembler::sethi(const AddressLiteral& addrlit, Register d) {
+  internal_sethi(addrlit, d, false);
+}
+
+
+void MacroAssembler::patchable_sethi(const AddressLiteral& addrlit, Register d) {
+  internal_sethi(addrlit, d, true);
+}
+
+
+int MacroAssembler::insts_for_sethi(address a, bool worst_case) {
+#ifdef _LP64
+  if (worst_case)  return 7;
+  intptr_t iaddr = (intptr_t) a;
+  int msb32 = (int) (iaddr >> 32);
+  int lsb32 = (int) (iaddr);
+  int count;
+  if (msb32 == 0 && lsb32 >= 0)
+    count = 1;
+  else if (msb32 == -1)
+    count = 2;
+  else {
+    count = 2;
+    if (msb32 & 0x3ff)
+      count++;
+    if (lsb32 & 0xFFFFFC00 ) {
+      if ((lsb32 >> 20) & 0xfff)  count += 2;
+      if ((lsb32 >> 10) & 0x3ff)  count += 2;
+    }
+  }
+  return count;
+#else
+  return 1;
+#endif
+}
+
+int MacroAssembler::worst_case_insts_for_set() {
+  return insts_for_sethi(NULL, true) + 1;
+}
+
+
+// Keep in sync with MacroAssembler::insts_for_internal_set
+void MacroAssembler::internal_set(const AddressLiteral& addrlit, Register d, bool ForceRelocatable) {
+  intptr_t value = addrlit.value();
+
+  if (!ForceRelocatable && addrlit.rspec().type() == relocInfo::none) {
+    // can optimize
+    if (-4096 <= value && value <= 4095) {
+      or3(G0, value, d); // setsw (this leaves upper 32 bits sign-extended)
+      return;
+    }
+    if (inv_hi22(hi22(value)) == value) {
+      sethi(addrlit, d);
+      return;
+    }
+  }
+  assert_not_delayed((char*) "cannot put two instructions in delay slot");
+  internal_sethi(addrlit, d, ForceRelocatable);
+  if (ForceRelocatable || addrlit.rspec().type() != relocInfo::none || addrlit.low10() != 0) {
+    add(d, addrlit.low10(), d, addrlit.rspec());
+  }
+}
+
+// Keep in sync with MacroAssembler::internal_set
+int MacroAssembler::insts_for_internal_set(intptr_t value) {
+  // can optimize
+  if (-4096 <= value && value <= 4095) {
+    return 1;
+  }
+  if (inv_hi22(hi22(value)) == value) {
+    return insts_for_sethi((address) value);
+  }
+  int count = insts_for_sethi((address) value);
+  AddressLiteral al(value);
+  if (al.low10() != 0) {
+    count++;
+  }
+  return count;
+}
+
+void MacroAssembler::set(const AddressLiteral& al, Register d) {
+  internal_set(al, d, false);
+}
+
+void MacroAssembler::set(intptr_t value, Register d) {
+  AddressLiteral al(value);
+  internal_set(al, d, false);
+}
+
+void MacroAssembler::set(address addr, Register d, RelocationHolder const& rspec) {
+  AddressLiteral al(addr, rspec);
+  internal_set(al, d, false);
+}
+
+void MacroAssembler::patchable_set(const AddressLiteral& al, Register d) {
+  internal_set(al, d, true);
+}
+
+void MacroAssembler::patchable_set(intptr_t value, Register d) {
+  AddressLiteral al(value);
+  internal_set(al, d, true);
+}
+
+
+void MacroAssembler::set64(jlong value, Register d, Register tmp) {
+  assert_not_delayed();
+  v9_dep();
+
+  int hi = (int)(value >> 32);
+  int lo = (int)(value & ~0);
+  // (Matcher::isSimpleConstant64 knows about the following optimizations.)
+  if (Assembler::is_simm13(lo) && value == lo) {
+    or3(G0, lo, d);
+  } else if (hi == 0) {
+    Assembler::sethi(lo, d);   // hardware version zero-extends to upper 32
+    if (low10(lo) != 0)
+      or3(d, low10(lo), d);
+  }
+  else if (hi == -1) {
+    Assembler::sethi(~lo, d);  // hardware version zero-extends to upper 32
+    xor3(d, low10(lo) ^ ~low10(~0), d);
+  }
+  else if (lo == 0) {
+    if (Assembler::is_simm13(hi)) {
+      or3(G0, hi, d);
+    } else {
+      Assembler::sethi(hi, d);   // hardware version zero-extends to upper 32
+      if (low10(hi) != 0)
+        or3(d, low10(hi), d);
+    }
+    sllx(d, 32, d);
+  }
+  else {
+    Assembler::sethi(hi, tmp);
+    Assembler::sethi(lo,   d); // macro assembler version sign-extends
+    if (low10(hi) != 0)
+      or3 (tmp, low10(hi), tmp);
+    if (low10(lo) != 0)
+      or3 (  d, low10(lo),   d);
+    sllx(tmp, 32, tmp);
+    or3 (d, tmp, d);
+  }
+}
+
+int MacroAssembler::insts_for_set64(jlong value) {
+  v9_dep();
+
+  int hi = (int) (value >> 32);
+  int lo = (int) (value & ~0);
+  int count = 0;
+
+  // (Matcher::isSimpleConstant64 knows about the following optimizations.)
+  if (Assembler::is_simm13(lo) && value == lo) {
+    count++;
+  } else if (hi == 0) {
+    count++;
+    if (low10(lo) != 0)
+      count++;
+  }
+  else if (hi == -1) {
+    count += 2;
+  }
+  else if (lo == 0) {
+    if (Assembler::is_simm13(hi)) {
+      count++;
+    } else {
+      count++;
+      if (low10(hi) != 0)
+        count++;
+    }
+    count++;
+  }
+  else {
+    count += 2;
+    if (low10(hi) != 0)
+      count++;
+    if (low10(lo) != 0)
+      count++;
+    count += 2;
+  }
+  return count;
+}
+
+// compute size in bytes of sparc frame, given
+// number of extraWords
+int MacroAssembler::total_frame_size_in_bytes(int extraWords) {
+
+  int nWords = frame::memory_parameter_word_sp_offset;
+
+  nWords += extraWords;
+
+  if (nWords & 1) ++nWords; // round up to double-word
+
+  return nWords * BytesPerWord;
+}
+
+
+// save_frame: given number of "extra" words in frame,
+// issue approp. save instruction (p 200, v8 manual)
+
+void MacroAssembler::save_frame(int extraWords) {
+  int delta = -total_frame_size_in_bytes(extraWords);
+  if (is_simm13(delta)) {
+    save(SP, delta, SP);
+  } else {
+    set(delta, G3_scratch);
+    save(SP, G3_scratch, SP);
+  }
+}
+
+
+void MacroAssembler::save_frame_c1(int size_in_bytes) {
+  if (is_simm13(-size_in_bytes)) {
+    save(SP, -size_in_bytes, SP);
+  } else {
+    set(-size_in_bytes, G3_scratch);
+    save(SP, G3_scratch, SP);
+  }
+}
+
+
+void MacroAssembler::save_frame_and_mov(int extraWords,
+                                        Register s1, Register d1,
+                                        Register s2, Register d2) {
+  assert_not_delayed();
+
+  // The trick here is to use precisely the same memory word
+  // that trap handlers also use to save the register.
+  // This word cannot be used for any other purpose, but
+  // it works fine to save the register's value, whether or not
+  // an interrupt flushes register windows at any given moment!
+  Address s1_addr;
+  if (s1->is_valid() && (s1->is_in() || s1->is_local())) {
+    s1_addr = s1->address_in_saved_window();
+    st_ptr(s1, s1_addr);
+  }
+
+  Address s2_addr;
+  if (s2->is_valid() && (s2->is_in() || s2->is_local())) {
+    s2_addr = s2->address_in_saved_window();
+    st_ptr(s2, s2_addr);
+  }
+
+  save_frame(extraWords);
+
+  if (s1_addr.base() == SP) {
+    ld_ptr(s1_addr.after_save(), d1);
+  } else if (s1->is_valid()) {
+    mov(s1->after_save(), d1);
+  }
+
+  if (s2_addr.base() == SP) {
+    ld_ptr(s2_addr.after_save(), d2);
+  } else if (s2->is_valid()) {
+    mov(s2->after_save(), d2);
+  }
+}
+
+
+AddressLiteral MacroAssembler::allocate_metadata_address(Metadata* obj) {
+  assert(oop_recorder() != NULL, "this assembler needs a Recorder");
+  int index = oop_recorder()->allocate_metadata_index(obj);
+  RelocationHolder rspec = metadata_Relocation::spec(index);
+  return AddressLiteral((address)obj, rspec);
+}
+
+AddressLiteral MacroAssembler::constant_metadata_address(Metadata* obj) {
+  assert(oop_recorder() != NULL, "this assembler needs a Recorder");
+  int index = oop_recorder()->find_index(obj);
+  RelocationHolder rspec = metadata_Relocation::spec(index);
+  return AddressLiteral((address)obj, rspec);
+}
+
+
+AddressLiteral MacroAssembler::constant_oop_address(jobject obj) {
+  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
+  assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "not an oop");
+  int oop_index = oop_recorder()->find_index(obj);
+  return AddressLiteral(obj, oop_Relocation::spec(oop_index));
+}
+
+void  MacroAssembler::set_narrow_oop(jobject obj, Register d) {
+  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
+  int oop_index = oop_recorder()->find_index(obj);
+  RelocationHolder rspec = oop_Relocation::spec(oop_index);
+
+  assert_not_delayed();
+  // Relocation with special format (see relocInfo_sparc.hpp).
+  relocate(rspec, 1);
+  // Assembler::sethi(0x3fffff, d);
+  emit_int32( op(branch_op) | rd(d) | op2(sethi_op2) | hi22(0x3fffff) );
+  // Don't add relocation for 'add'. Do patching during 'sethi' processing.
+  add(d, 0x3ff, d);
+
+}
+
+void  MacroAssembler::set_narrow_klass(Klass* k, Register d) {
+  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
+  int klass_index = oop_recorder()->find_index(k);
+  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
+  narrowOop encoded_k = oopDesc::encode_klass(k);
+
+  assert_not_delayed();
+  // Relocation with special format (see relocInfo_sparc.hpp).
+  relocate(rspec, 1);
+  // Assembler::sethi(encoded_k, d);
+  emit_int32( op(branch_op) | rd(d) | op2(sethi_op2) | hi22(encoded_k) );
+  // Don't add relocation for 'add'. Do patching during 'sethi' processing.
+  add(d, low10(encoded_k), d);
+
+}
+
+void MacroAssembler::align(int modulus) {
+  while (offset() % modulus != 0) nop();
+}
+
+
+void MacroAssembler::safepoint() {
+  relocate(breakpoint_Relocation::spec(breakpoint_Relocation::safepoint));
+}
+
+
+void RegistersForDebugging::print(outputStream* s) {
+  FlagSetting fs(Debugging, true);
+  int j;
+  for (j = 0; j < 8; ++j) {
+    if (j != 6) { s->print("i%d = ", j); os::print_location(s, i[j]); }
+    else        { s->print( "fp = "   ); os::print_location(s, i[j]); }
+  }
+  s->cr();
+
+  for (j = 0;  j < 8;  ++j) {
+    s->print("l%d = ", j); os::print_location(s, l[j]);
+  }
+  s->cr();
+
+  for (j = 0; j < 8; ++j) {
+    if (j != 6) { s->print("o%d = ", j); os::print_location(s, o[j]); }
+    else        { s->print( "sp = "   ); os::print_location(s, o[j]); }
+  }
+  s->cr();
+
+  for (j = 0; j < 8; ++j) {
+    s->print("g%d = ", j); os::print_location(s, g[j]);
+  }
+  s->cr();
+
+  // print out floats with compression
+  for (j = 0; j < 32; ) {
+    jfloat val = f[j];
+    int last = j;
+    for ( ;  last+1 < 32;  ++last ) {
+      char b1[1024], b2[1024];
+      sprintf(b1, "%f", val);
+      sprintf(b2, "%f", f[last+1]);
+      if (strcmp(b1, b2))
+        break;
+    }
+    s->print("f%d", j);
+    if ( j != last )  s->print(" - f%d", last);
+    s->print(" = %f", val);
+    s->fill_to(25);
+    s->print_cr(" (0x%x)", val);
+    j = last + 1;
+  }
+  s->cr();
+
+  // and doubles (evens only)
+  for (j = 0; j < 32; ) {
+    jdouble val = d[j];
+    int last = j;
+    for ( ;  last+1 < 32;  ++last ) {
+      char b1[1024], b2[1024];
+      sprintf(b1, "%f", val);
+      sprintf(b2, "%f", d[last+1]);
+      if (strcmp(b1, b2))
+        break;
+    }
+    s->print("d%d", 2 * j);
+    if ( j != last )  s->print(" - d%d", last);
+    s->print(" = %f", val);
+    s->fill_to(30);
+    s->print("(0x%x)", *(int*)&val);
+    s->fill_to(42);
+    s->print_cr("(0x%x)", *(1 + (int*)&val));
+    j = last + 1;
+  }
+  s->cr();
+}
+
+void RegistersForDebugging::save_registers(MacroAssembler* a) {
+  a->sub(FP, round_to(sizeof(RegistersForDebugging), sizeof(jdouble)) - STACK_BIAS, O0);
+  a->flush_windows();
+  int i;
+  for (i = 0; i < 8; ++i) {
+    a->ld_ptr(as_iRegister(i)->address_in_saved_window().after_save(), L1);  a->st_ptr( L1, O0, i_offset(i));
+    a->ld_ptr(as_lRegister(i)->address_in_saved_window().after_save(), L1);  a->st_ptr( L1, O0, l_offset(i));
+    a->st_ptr(as_oRegister(i)->after_save(), O0, o_offset(i));
+    a->st_ptr(as_gRegister(i)->after_save(), O0, g_offset(i));
+  }
+  for (i = 0;  i < 32; ++i) {
+    a->stf(FloatRegisterImpl::S, as_FloatRegister(i), O0, f_offset(i));
+  }
+  for (i = 0; i < (VM_Version::v9_instructions_work() ? 64 : 32); i += 2) {
+    a->stf(FloatRegisterImpl::D, as_FloatRegister(i), O0, d_offset(i));
+  }
+}
+
+void RegistersForDebugging::restore_registers(MacroAssembler* a, Register r) {
+  for (int i = 1; i < 8;  ++i) {
+    a->ld_ptr(r, g_offset(i), as_gRegister(i));
+  }
+  for (int j = 0; j < 32; ++j) {
+    a->ldf(FloatRegisterImpl::S, O0, f_offset(j), as_FloatRegister(j));
+  }
+  for (int k = 0; k < (VM_Version::v9_instructions_work() ? 64 : 32); k += 2) {
+    a->ldf(FloatRegisterImpl::D, O0, d_offset(k), as_FloatRegister(k));
+  }
+}
+
+
+// pushes double TOS element of FPU stack on CPU stack; pops from FPU stack
+void MacroAssembler::push_fTOS() {
+  // %%%%%% need to implement this
+}
+
+// pops double TOS element from CPU stack and pushes on FPU stack
+void MacroAssembler::pop_fTOS() {
+  // %%%%%% need to implement this
+}
+
+void MacroAssembler::empty_FPU_stack() {
+  // %%%%%% need to implement this
+}
+
+void MacroAssembler::_verify_oop(Register reg, const char* msg, const char * file, int line) {
+  // plausibility check for oops
+  if (!VerifyOops) return;
+
+  if (reg == G0)  return;       // always NULL, which is always an oop
+
+  BLOCK_COMMENT("verify_oop {");
+  char buffer[64];
+#ifdef COMPILER1
+  if (CommentedAssembly) {
+    snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset());
+    block_comment(buffer);
+  }
+#endif
+
+  int len = strlen(file) + strlen(msg) + 1 + 4;
+  sprintf(buffer, "%d", line);
+  len += strlen(buffer);
+  sprintf(buffer, " at offset %d ", offset());
+  len += strlen(buffer);
+  char * real_msg = new char[len];
+  sprintf(real_msg, "%s%s(%s:%d)", msg, buffer, file, line);
+
+  // Call indirectly to solve generation ordering problem
+  AddressLiteral a(StubRoutines::verify_oop_subroutine_entry_address());
+
+  // Make some space on stack above the current register window.
+  // Enough to hold 8 64-bit registers.
+  add(SP,-8*8,SP);
+
+  // Save some 64-bit registers; a normal 'save' chops the heads off
+  // of 64-bit longs in the 32-bit build.
+  stx(O0,SP,frame::register_save_words*wordSize+STACK_BIAS+0*8);
+  stx(O1,SP,frame::register_save_words*wordSize+STACK_BIAS+1*8);
+  mov(reg,O0); // Move arg into O0; arg might be in O7 which is about to be crushed
+  stx(O7,SP,frame::register_save_words*wordSize+STACK_BIAS+7*8);
+
+  // Size of set() should stay the same
+  patchable_set((intptr_t)real_msg, O1);
+  // Load address to call to into O7
+  load_ptr_contents(a, O7);
+  // Register call to verify_oop_subroutine
+  callr(O7, G0);
+  delayed()->nop();
+  // recover frame size
+  add(SP, 8*8,SP);
+  BLOCK_COMMENT("} verify_oop");
+}
+
+void MacroAssembler::_verify_oop_addr(Address addr, const char* msg, const char * file, int line) {
+  // plausibility check for oops
+  if (!VerifyOops) return;
+
+  char buffer[64];
+  sprintf(buffer, "%d", line);
+  int len = strlen(file) + strlen(msg) + 1 + 4 + strlen(buffer);
+  sprintf(buffer, " at SP+%d ", addr.disp());
+  len += strlen(buffer);
+  char * real_msg = new char[len];
+  sprintf(real_msg, "%s at SP+%d (%s:%d)", msg, addr.disp(), file, line);
+
+  // Call indirectly to solve generation ordering problem
+  AddressLiteral a(StubRoutines::verify_oop_subroutine_entry_address());
+
+  // Make some space on stack above the current register window.
+  // Enough to hold 8 64-bit registers.
+  add(SP,-8*8,SP);
+
+  // Save some 64-bit registers; a normal 'save' chops the heads off
+  // of 64-bit longs in the 32-bit build.
+  stx(O0,SP,frame::register_save_words*wordSize+STACK_BIAS+0*8);
+  stx(O1,SP,frame::register_save_words*wordSize+STACK_BIAS+1*8);
+  ld_ptr(addr.base(), addr.disp() + 8*8, O0); // Load arg into O0; arg might be in O7 which is about to be crushed
+  stx(O7,SP,frame::register_save_words*wordSize+STACK_BIAS+7*8);
+
+  // Size of set() should stay the same
+  patchable_set((intptr_t)real_msg, O1);
+  // Load address to call to into O7
+  load_ptr_contents(a, O7);
+  // Register call to verify_oop_subroutine
+  callr(O7, G0);
+  delayed()->nop();
+  // recover frame size
+  add(SP, 8*8,SP);
+}
+
+// side-door communication with signalHandler in os_solaris.cpp
+address MacroAssembler::_verify_oop_implicit_branch[3] = { NULL };
+
+// This macro is expanded just once; it creates shared code.  Contract:
+// receives an oop in O0.  Must restore O0 & O7 from TLS.  Must not smash ANY
+// registers, including flags.  May not use a register 'save', as this blows
+// the high bits of the O-regs if they contain Long values.  Acts as a 'leaf'
+// call.
+void MacroAssembler::verify_oop_subroutine() {
+  assert( VM_Version::v9_instructions_work(), "VerifyOops not supported for V8" );
+
+  // Leaf call; no frame.
+  Label succeed, fail, null_or_fail;
+
+  // O0 and O7 were saved already (O0 in O0's TLS home, O7 in O5's TLS home).
+  // O0 is now the oop to be checked.  O7 is the return address.
+  Register O0_obj = O0;
+
+  // Save some more registers for temps.
+  stx(O2,SP,frame::register_save_words*wordSize+STACK_BIAS+2*8);
+  stx(O3,SP,frame::register_save_words*wordSize+STACK_BIAS+3*8);
+  stx(O4,SP,frame::register_save_words*wordSize+STACK_BIAS+4*8);
+  stx(O5,SP,frame::register_save_words*wordSize+STACK_BIAS+5*8);
+
+  // Save flags
+  Register O5_save_flags = O5;
+  rdccr( O5_save_flags );
+
+  { // count number of verifies
+    Register O2_adr   = O2;
+    Register O3_accum = O3;
+    inc_counter(StubRoutines::verify_oop_count_addr(), O2_adr, O3_accum);
+  }
+
+  Register O2_mask = O2;
+  Register O3_bits = O3;
+  Register O4_temp = O4;
+
+  // mark lower end of faulting range
+  assert(_verify_oop_implicit_branch[0] == NULL, "set once");
+  _verify_oop_implicit_branch[0] = pc();
+
+  // We can't check the mark oop because it could be in the process of
+  // locking or unlocking while this is running.
+  set(Universe::verify_oop_mask (), O2_mask);
+  set(Universe::verify_oop_bits (), O3_bits);
+
+  // assert((obj & oop_mask) == oop_bits);
+  and3(O0_obj, O2_mask, O4_temp);
+  cmp_and_brx_short(O4_temp, O3_bits, notEqual, pn, null_or_fail);
+
+  if ((NULL_WORD & Universe::verify_oop_mask()) == Universe::verify_oop_bits()) {
+    // the null_or_fail case is useless; must test for null separately
+    br_null_short(O0_obj, pn, succeed);
+  }
+
+  // Check the Klass* of this object for being in the right area of memory.
+  // Cannot do the load in the delay above slot in case O0 is null
+  load_klass(O0_obj, O0_obj);
+  // assert((klass != NULL)
+  br_null_short(O0_obj, pn, fail);
+  // TODO: Future assert that klass is lower 4g memory for UseCompressedKlassPointers
+
+  wrccr( O5_save_flags ); // Restore CCR's
+
+  // mark upper end of faulting range
+  _verify_oop_implicit_branch[1] = pc();
+
+  //-----------------------
+  // all tests pass
+  bind(succeed);
+
+  // Restore prior 64-bit registers
+  ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+0*8,O0);
+  ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+1*8,O1);
+  ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+2*8,O2);
+  ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+3*8,O3);
+  ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+4*8,O4);
+  ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+5*8,O5);
+
+  retl();                       // Leaf return; restore prior O7 in delay slot
+  delayed()->ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+7*8,O7);
+
+  //-----------------------
+  bind(null_or_fail);           // nulls are less common but OK
+  br_null(O0_obj, false, pt, succeed);
+  delayed()->wrccr( O5_save_flags ); // Restore CCR's
+
+  //-----------------------
+  // report failure:
+  bind(fail);
+  _verify_oop_implicit_branch[2] = pc();
+
+  wrccr( O5_save_flags ); // Restore CCR's
+
+  save_frame(::round_to(sizeof(RegistersForDebugging) / BytesPerWord, 2));
+
+  // stop_subroutine expects message pointer in I1.
+  mov(I1, O1);
+
+  // Restore prior 64-bit registers
+  ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+0*8,I0);
+  ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+1*8,I1);
+  ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+2*8,I2);
+  ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+3*8,I3);
+  ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+4*8,I4);
+  ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+5*8,I5);
+
+  // factor long stop-sequence into subroutine to save space
+  assert(StubRoutines::Sparc::stop_subroutine_entry_address(), "hasn't been generated yet");
+
+  // call indirectly to solve generation ordering problem
+  AddressLiteral al(StubRoutines::Sparc::stop_subroutine_entry_address());
+  load_ptr_contents(al, O5);
+  jmpl(O5, 0, O7);
+  delayed()->nop();
+}
+
+
+void MacroAssembler::stop(const char* msg) {
+  // save frame first to get O7 for return address
+  // add one word to size in case struct is odd number of words long
+  // It must be doubleword-aligned for storing doubles into it.
+
+    save_frame(::round_to(sizeof(RegistersForDebugging) / BytesPerWord, 2));
+
+    // stop_subroutine expects message pointer in I1.
+    // Size of set() should stay the same
+    patchable_set((intptr_t)msg, O1);
+
+    // factor long stop-sequence into subroutine to save space
+    assert(StubRoutines::Sparc::stop_subroutine_entry_address(), "hasn't been generated yet");
+
+    // call indirectly to solve generation ordering problem
+    AddressLiteral a(StubRoutines::Sparc::stop_subroutine_entry_address());
+    load_ptr_contents(a, O5);
+    jmpl(O5, 0, O7);
+    delayed()->nop();
+
+    breakpoint_trap();   // make stop actually stop rather than writing
+                         // unnoticeable results in the output files.
+
+    // restore(); done in callee to save space!
+}
+
+
+void MacroAssembler::warn(const char* msg) {
+  save_frame(::round_to(sizeof(RegistersForDebugging) / BytesPerWord, 2));
+  RegistersForDebugging::save_registers(this);
+  mov(O0, L0);
+  // Size of set() should stay the same
+  patchable_set((intptr_t)msg, O0);
+  call( CAST_FROM_FN_PTR(address, warning) );
+  delayed()->nop();
+//  ret();
+//  delayed()->restore();
+  RegistersForDebugging::restore_registers(this, L0);
+  restore();
+}
+
+
+void MacroAssembler::untested(const char* what) {
+  // We must be able to turn interactive prompting off
+  // in order to run automated test scripts on the VM
+  // Use the flag ShowMessageBoxOnError
+
+  char* b = new char[1024];
+  sprintf(b, "untested: %s", what);
+
+  if (ShowMessageBoxOnError) { STOP(b); }
+  else                       { warn(b); }
+}
+
+
+void MacroAssembler::stop_subroutine() {
+  RegistersForDebugging::save_registers(this);
+
+  // for the sake of the debugger, stick a PC on the current frame
+  // (this assumes that the caller has performed an extra "save")
+  mov(I7, L7);
+  add(O7, -7 * BytesPerInt, I7);
+
+  save_frame(); // one more save to free up another O7 register
+  mov(I0, O1); // addr of reg save area
+
+  // We expect pointer to message in I1. Caller must set it up in O1
+  mov(I1, O0); // get msg
+  call (CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
+  delayed()->nop();
+
+  restore();
+
+  RegistersForDebugging::restore_registers(this, O0);
+
+  save_frame(0);
+  call(CAST_FROM_FN_PTR(address,breakpoint));
+  delayed()->nop();
+  restore();
+
+  mov(L7, I7);
+  retl();
+  delayed()->restore(); // see stop above
+}
+
+
+void MacroAssembler::debug(char* msg, RegistersForDebugging* regs) {
+  if ( ShowMessageBoxOnError ) {
+    JavaThread* thread = JavaThread::current();
+    JavaThreadState saved_state = thread->thread_state();
+    thread->set_thread_state(_thread_in_vm);
+      {
+        // In order to get locks work, we need to fake a in_VM state
+        ttyLocker ttyl;
+        ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
+        if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
+        BytecodeCounter::print();
+        }
+        if (os::message_box(msg, "Execution stopped, print registers?"))
+          regs->print(::tty);
+      }
+    BREAKPOINT;
+      ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
+  }
+  else {
+     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
+  }
+  assert(false, err_msg("DEBUG MESSAGE: %s", msg));
+}
+
+
+void MacroAssembler::calc_mem_param_words(Register Rparam_words, Register Rresult) {
+  subcc( Rparam_words, Argument::n_register_parameters, Rresult); // how many mem words?
+  Label no_extras;
+  br( negative, true, pt, no_extras ); // if neg, clear reg
+  delayed()->set(0, Rresult);          // annuled, so only if taken
+  bind( no_extras );
+}
+
+
+void MacroAssembler::calc_frame_size(Register Rextra_words, Register Rresult) {
+#ifdef _LP64
+  add(Rextra_words, frame::memory_parameter_word_sp_offset, Rresult);
+#else
+  add(Rextra_words, frame::memory_parameter_word_sp_offset + 1, Rresult);
+#endif
+  bclr(1, Rresult);
+  sll(Rresult, LogBytesPerWord, Rresult);  // Rresult has total frame bytes
+}
+
+
+void MacroAssembler::calc_frame_size_and_save(Register Rextra_words, Register Rresult) {
+  calc_frame_size(Rextra_words, Rresult);
+  neg(Rresult);
+  save(SP, Rresult, SP);
+}
+
+
+// ---------------------------------------------------------
+Assembler::RCondition cond2rcond(Assembler::Condition c) {
+  switch (c) {
+    /*case zero: */
+    case Assembler::equal:        return Assembler::rc_z;
+    case Assembler::lessEqual:    return Assembler::rc_lez;
+    case Assembler::less:         return Assembler::rc_lz;
+    /*case notZero:*/
+    case Assembler::notEqual:     return Assembler::rc_nz;
+    case Assembler::greater:      return Assembler::rc_gz;
+    case Assembler::greaterEqual: return Assembler::rc_gez;
+  }
+  ShouldNotReachHere();
+  return Assembler::rc_z;
+}
+
+// compares (32 bit) register with zero and branches.  NOT FOR USE WITH 64-bit POINTERS
+void MacroAssembler::cmp_zero_and_br(Condition c, Register s1, Label& L, bool a, Predict p) {
+  tst(s1);
+  br (c, a, p, L);
+}
+
+// Compares a pointer register with zero and branches on null.
+// Does a test & branch on 32-bit systems and a register-branch on 64-bit.
+void MacroAssembler::br_null( Register s1, bool a, Predict p, Label& L ) {
+  assert_not_delayed();
+#ifdef _LP64
+  bpr( rc_z, a, p, s1, L );
+#else
+  tst(s1);
+  br ( zero, a, p, L );
+#endif
+}
+
+void MacroAssembler::br_notnull( Register s1, bool a, Predict p, Label& L ) {
+  assert_not_delayed();
+#ifdef _LP64
+  bpr( rc_nz, a, p, s1, L );
+#else
+  tst(s1);
+  br ( notZero, a, p, L );
+#endif
+}
+
+// Compare registers and branch with nop in delay slot or cbcond without delay slot.
+
+// Compare integer (32 bit) values (icc only).
+void MacroAssembler::cmp_and_br_short(Register s1, Register s2, Condition c,
+                                      Predict p, Label& L) {
+  assert_not_delayed();
+  if (use_cbcond(L)) {
+    Assembler::cbcond(c, icc, s1, s2, L);
+  } else {
+    cmp(s1, s2);
+    br(c, false, p, L);
+    delayed()->nop();
+  }
+}
+
+// Compare integer (32 bit) values (icc only).
+void MacroAssembler::cmp_and_br_short(Register s1, int simm13a, Condition c,
+                                      Predict p, Label& L) {
+  assert_not_delayed();
+  if (is_simm(simm13a,5) && use_cbcond(L)) {
+    Assembler::cbcond(c, icc, s1, simm13a, L);
+  } else {
+    cmp(s1, simm13a);
+    br(c, false, p, L);
+    delayed()->nop();
+  }
+}
+
+// Branch that tests xcc in LP64 and icc in !LP64
+void MacroAssembler::cmp_and_brx_short(Register s1, Register s2, Condition c,
+                                       Predict p, Label& L) {
+  assert_not_delayed();
+  if (use_cbcond(L)) {
+    Assembler::cbcond(c, ptr_cc, s1, s2, L);
+  } else {
+    cmp(s1, s2);
+    brx(c, false, p, L);
+    delayed()->nop();
+  }
+}
+
+// Branch that tests xcc in LP64 and icc in !LP64
+void MacroAssembler::cmp_and_brx_short(Register s1, int simm13a, Condition c,
+                                       Predict p, Label& L) {
+  assert_not_delayed();
+  if (is_simm(simm13a,5) && use_cbcond(L)) {
+    Assembler::cbcond(c, ptr_cc, s1, simm13a, L);
+  } else {
+    cmp(s1, simm13a);
+    brx(c, false, p, L);
+    delayed()->nop();
+  }
+}
+
+// Short branch version for compares a pointer with zero.
+
+void MacroAssembler::br_null_short(Register s1, Predict p, Label& L) {
+  assert_not_delayed();
+  if (use_cbcond(L)) {
+    Assembler::cbcond(zero, ptr_cc, s1, 0, L);
+    return;
+  }
+  br_null(s1, false, p, L);
+  delayed()->nop();
+}
+
+void MacroAssembler::br_notnull_short(Register s1, Predict p, Label& L) {
+  assert_not_delayed();
+  if (use_cbcond(L)) {
+    Assembler::cbcond(notZero, ptr_cc, s1, 0, L);
+    return;
+  }
+  br_notnull(s1, false, p, L);
+  delayed()->nop();
+}
+
+// Unconditional short branch
+void MacroAssembler::ba_short(Label& L) {
+  if (use_cbcond(L)) {
+    Assembler::cbcond(equal, icc, G0, G0, L);
+    return;
+  }
+  br(always, false, pt, L);
+  delayed()->nop();
+}
+
+// instruction sequences factored across compiler & interpreter
+
+
+void MacroAssembler::lcmp( Register Ra_hi, Register Ra_low,
+                           Register Rb_hi, Register Rb_low,
+                           Register Rresult) {
+
+  Label check_low_parts, done;
+
+  cmp(Ra_hi, Rb_hi );  // compare hi parts
+  br(equal, true, pt, check_low_parts);
+  delayed()->cmp(Ra_low, Rb_low); // test low parts
+
+  // And, with an unsigned comparison, it does not matter if the numbers
+  // are negative or not.
+  // E.g., -2 cmp -1: the low parts are 0xfffffffe and 0xffffffff.
+  // The second one is bigger (unsignedly).
+
+  // Other notes:  The first move in each triplet can be unconditional
+  // (and therefore probably prefetchable).
+  // And the equals case for the high part does not need testing,
+  // since that triplet is reached only after finding the high halves differ.
+
+  if (VM_Version::v9_instructions_work()) {
+    mov(-1, Rresult);
+    ba(done);  delayed()-> movcc(greater, false, icc,  1, Rresult);
+  } else {
+    br(less,    true, pt, done); delayed()-> set(-1, Rresult);
+    br(greater, true, pt, done); delayed()-> set( 1, Rresult);
+  }
+
+  bind( check_low_parts );
+
+  if (VM_Version::v9_instructions_work()) {
+    mov(                               -1, Rresult);
+    movcc(equal,           false, icc,  0, Rresult);
+    movcc(greaterUnsigned, false, icc,  1, Rresult);
+  } else {
+    set(-1, Rresult);
+    br(equal,           true, pt, done); delayed()->set( 0, Rresult);
+    br(greaterUnsigned, true, pt, done); delayed()->set( 1, Rresult);
+  }
+  bind( done );
+}
+
+void MacroAssembler::lneg( Register Rhi, Register Rlow ) {
+  subcc(  G0, Rlow, Rlow );
+  subc(   G0, Rhi,  Rhi  );
+}
+
+void MacroAssembler::lshl( Register Rin_high,  Register Rin_low,
+                           Register Rcount,
+                           Register Rout_high, Register Rout_low,
+                           Register Rtemp ) {
+
+
+  Register Ralt_count = Rtemp;
+  Register Rxfer_bits = Rtemp;
+
+  assert( Ralt_count != Rin_high
+      &&  Ralt_count != Rin_low
+      &&  Ralt_count != Rcount
+      &&  Rxfer_bits != Rin_low
+      &&  Rxfer_bits != Rin_high
+      &&  Rxfer_bits != Rcount
+      &&  Rxfer_bits != Rout_low
+      &&  Rout_low   != Rin_high,
+        "register alias checks");
+
+  Label big_shift, done;
+
+  // This code can be optimized to use the 64 bit shifts in V9.
+  // Here we use the 32 bit shifts.
+
+  and3( Rcount, 0x3f, Rcount);     // take least significant 6 bits
+  subcc(Rcount,   31, Ralt_count);
+  br(greater, true, pn, big_shift);
+  delayed()->dec(Ralt_count);
+
+  // shift < 32 bits, Ralt_count = Rcount-31
+
+  // We get the transfer bits by shifting right by 32-count the low
+  // register. This is done by shifting right by 31-count and then by one
+  // more to take care of the special (rare) case where count is zero
+  // (shifting by 32 would not work).
+
+  neg(Ralt_count);
+
+  // The order of the next two instructions is critical in the case where
+  // Rin and Rout are the same and should not be reversed.
+
+  srl(Rin_low, Ralt_count, Rxfer_bits); // shift right by 31-count
+  if (Rcount != Rout_low) {
+    sll(Rin_low, Rcount, Rout_low); // low half
+  }
+  sll(Rin_high, Rcount, Rout_high);
+  if (Rcount == Rout_low) {
+    sll(Rin_low, Rcount, Rout_low); // low half
+  }
+  srl(Rxfer_bits, 1, Rxfer_bits ); // shift right by one more
+  ba(done);
+  delayed()->or3(Rout_high, Rxfer_bits, Rout_high);   // new hi value: or in shifted old hi part and xfer from low
+
+  // shift >= 32 bits, Ralt_count = Rcount-32
+  bind(big_shift);
+  sll(Rin_low, Ralt_count, Rout_high  );
+  clr(Rout_low);
+
+  bind(done);
+}
+
+
+void MacroAssembler::lshr( Register Rin_high,  Register Rin_low,
+                           Register Rcount,
+                           Register Rout_high, Register Rout_low,
+                           Register Rtemp ) {
+
+  Register Ralt_count = Rtemp;
+  Register Rxfer_bits = Rtemp;
+
+  assert( Ralt_count != Rin_high
+      &&  Ralt_count != Rin_low
+      &&  Ralt_count != Rcount
+      &&  Rxfer_bits != Rin_low
+      &&  Rxfer_bits != Rin_high
+      &&  Rxfer_bits != Rcount
+      &&  Rxfer_bits != Rout_high
+      &&  Rout_high  != Rin_low,
+        "register alias checks");
+
+  Label big_shift, done;
+
+  // This code can be optimized to use the 64 bit shifts in V9.
+  // Here we use the 32 bit shifts.
+
+  and3( Rcount, 0x3f, Rcount);     // take least significant 6 bits
+  subcc(Rcount,   31, Ralt_count);
+  br(greater, true, pn, big_shift);
+  delayed()->dec(Ralt_count);
+
+  // shift < 32 bits, Ralt_count = Rcount-31
+
+  // We get the transfer bits by shifting left by 32-count the high
+  // register. This is done by shifting left by 31-count and then by one
+  // more to take care of the special (rare) case where count is zero
+  // (shifting by 32 would not work).
+
+  neg(Ralt_count);
+  if (Rcount != Rout_low) {
+    srl(Rin_low, Rcount, Rout_low);
+  }
+
+  // The order of the next two instructions is critical in the case where
+  // Rin and Rout are the same and should not be reversed.
+
+  sll(Rin_high, Ralt_count, Rxfer_bits); // shift left by 31-count
+  sra(Rin_high,     Rcount, Rout_high ); // high half
+  sll(Rxfer_bits,        1, Rxfer_bits); // shift left by one more
+  if (Rcount == Rout_low) {
+    srl(Rin_low, Rcount, Rout_low);
+  }
+  ba(done);
+  delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high
+
+  // shift >= 32 bits, Ralt_count = Rcount-32
+  bind(big_shift);
+
+  sra(Rin_high, Ralt_count, Rout_low);
+  sra(Rin_high,         31, Rout_high); // sign into hi
+
+  bind( done );
+}
+
+
+
+void MacroAssembler::lushr( Register Rin_high,  Register Rin_low,
+                            Register Rcount,
+                            Register Rout_high, Register Rout_low,
+                            Register Rtemp ) {
+
+  Register Ralt_count = Rtemp;
+  Register Rxfer_bits = Rtemp;
+
+  assert( Ralt_count != Rin_high
+      &&  Ralt_count != Rin_low
+      &&  Ralt_count != Rcount
+      &&  Rxfer_bits != Rin_low
+      &&  Rxfer_bits != Rin_high
+      &&  Rxfer_bits != Rcount
+      &&  Rxfer_bits != Rout_high
+      &&  Rout_high  != Rin_low,
+        "register alias checks");
+
+  Label big_shift, done;
+
+  // This code can be optimized to use the 64 bit shifts in V9.
+  // Here we use the 32 bit shifts.
+
+  and3( Rcount, 0x3f, Rcount);     // take least significant 6 bits
+  subcc(Rcount,   31, Ralt_count);
+  br(greater, true, pn, big_shift);
+  delayed()->dec(Ralt_count);
+
+  // shift < 32 bits, Ralt_count = Rcount-31
+
+  // We get the transfer bits by shifting left by 32-count the high
+  // register. This is done by shifting left by 31-count and then by one
+  // more to take care of the special (rare) case where count is zero
+  // (shifting by 32 would not work).
+
+  neg(Ralt_count);
+  if (Rcount != Rout_low) {
+    srl(Rin_low, Rcount, Rout_low);
+  }
+
+  // The order of the next two instructions is critical in the case where
+  // Rin and Rout are the same and should not be reversed.
+
+  sll(Rin_high, Ralt_count, Rxfer_bits); // shift left by 31-count
+  srl(Rin_high,     Rcount, Rout_high ); // high half
+  sll(Rxfer_bits,        1, Rxfer_bits); // shift left by one more
+  if (Rcount == Rout_low) {
+    srl(Rin_low, Rcount, Rout_low);
+  }
+  ba(done);
+  delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high
+
+  // shift >= 32 bits, Ralt_count = Rcount-32
+  bind(big_shift);
+
+  srl(Rin_high, Ralt_count, Rout_low);
+  clr(Rout_high);
+
+  bind( done );
+}
+
+#ifdef _LP64
+void MacroAssembler::lcmp( Register Ra, Register Rb, Register Rresult) {
+  cmp(Ra, Rb);
+  mov(-1, Rresult);
+  movcc(equal,   false, xcc,  0, Rresult);
+  movcc(greater, false, xcc,  1, Rresult);
+}
+#endif
+
+
+void MacroAssembler::load_sized_value(Address src, Register dst, size_t size_in_bytes, bool is_signed) {
+  switch (size_in_bytes) {
+  case  8:  ld_long(src, dst); break;
+  case  4:  ld(     src, dst); break;
+  case  2:  is_signed ? ldsh(src, dst) : lduh(src, dst); break;
+  case  1:  is_signed ? ldsb(src, dst) : ldub(src, dst); break;
+  default:  ShouldNotReachHere();
+  }
+}
+
+void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) {
+  switch (size_in_bytes) {
+  case  8:  st_long(src, dst); break;
+  case  4:  st(     src, dst); break;
+  case  2:  sth(    src, dst); break;
+  case  1:  stb(    src, dst); break;
+  default:  ShouldNotReachHere();
+  }
+}
+
+
+void MacroAssembler::float_cmp( bool is_float, int unordered_result,
+                                FloatRegister Fa, FloatRegister Fb,
+                                Register Rresult) {
+
+  fcmp(is_float ? FloatRegisterImpl::S : FloatRegisterImpl::D, fcc0, Fa, Fb);
+
+  Condition lt = unordered_result == -1 ? f_unorderedOrLess    : f_less;
+  Condition eq =                          f_equal;
+  Condition gt = unordered_result ==  1 ? f_unorderedOrGreater : f_greater;
+
+  if (VM_Version::v9_instructions_work()) {
+
+    mov(-1, Rresult);
+    movcc(eq, true, fcc0, 0, Rresult);
+    movcc(gt, true, fcc0, 1, Rresult);
+
+  } else {
+    Label done;
+
+    set( -1, Rresult );
+    //fb(lt, true, pn, done); delayed()->set( -1, Rresult );
+    fb( eq, true, pn, done);  delayed()->set(  0, Rresult );
+    fb( gt, true, pn, done);  delayed()->set(  1, Rresult );
+
+    bind (done);
+  }
+}
+
+
+void MacroAssembler::fneg( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d)
+{
+  if (VM_Version::v9_instructions_work()) {
+    Assembler::fneg(w, s, d);
+  } else {
+    if (w == FloatRegisterImpl::S) {
+      Assembler::fneg(w, s, d);
+    } else if (w == FloatRegisterImpl::D) {
+      // number() does a sanity check on the alignment.
+      assert(((s->encoding(FloatRegisterImpl::D) & 1) == 0) &&
+        ((d->encoding(FloatRegisterImpl::D) & 1) == 0), "float register alignment check");
+
+      Assembler::fneg(FloatRegisterImpl::S, s, d);
+      Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor());
+    } else {
+      assert(w == FloatRegisterImpl::Q, "Invalid float register width");
+
+      // number() does a sanity check on the alignment.
+      assert(((s->encoding(FloatRegisterImpl::D) & 3) == 0) &&
+        ((d->encoding(FloatRegisterImpl::D) & 3) == 0), "float register alignment check");
+
+      Assembler::fneg(FloatRegisterImpl::S, s, d);
+      Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor());
+      Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor(), d->successor()->successor());
+      Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor()->successor(), d->successor()->successor()->successor());
+    }
+  }
+}
+
+void MacroAssembler::fmov( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d)
+{
+  if (VM_Version::v9_instructions_work()) {
+    Assembler::fmov(w, s, d);
+  } else {
+    if (w == FloatRegisterImpl::S) {
+      Assembler::fmov(w, s, d);
+    } else if (w == FloatRegisterImpl::D) {
+      // number() does a sanity check on the alignment.
+      assert(((s->encoding(FloatRegisterImpl::D) & 1) == 0) &&
+        ((d->encoding(FloatRegisterImpl::D) & 1) == 0), "float register alignment check");
+
+      Assembler::fmov(FloatRegisterImpl::S, s, d);
+      Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor());
+    } else {
+      assert(w == FloatRegisterImpl::Q, "Invalid float register width");
+
+      // number() does a sanity check on the alignment.
+      assert(((s->encoding(FloatRegisterImpl::D) & 3) == 0) &&
+        ((d->encoding(FloatRegisterImpl::D) & 3) == 0), "float register alignment check");
+
+      Assembler::fmov(FloatRegisterImpl::S, s, d);
+      Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor());
+      Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor(), d->successor()->successor());
+      Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor()->successor(), d->successor()->successor()->successor());
+    }
+  }
+}
+
+void MacroAssembler::fabs( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d)
+{
+  if (VM_Version::v9_instructions_work()) {
+    Assembler::fabs(w, s, d);
+  } else {
+    if (w == FloatRegisterImpl::S) {
+      Assembler::fabs(w, s, d);
+    } else if (w == FloatRegisterImpl::D) {
+      // number() does a sanity check on the alignment.
+      assert(((s->encoding(FloatRegisterImpl::D) & 1) == 0) &&
+        ((d->encoding(FloatRegisterImpl::D) & 1) == 0), "float register alignment check");
+
+      Assembler::fabs(FloatRegisterImpl::S, s, d);
+      Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor());
+    } else {
+      assert(w == FloatRegisterImpl::Q, "Invalid float register width");
+
+      // number() does a sanity check on the alignment.
+      assert(((s->encoding(FloatRegisterImpl::D) & 3) == 0) &&
+       ((d->encoding(FloatRegisterImpl::D) & 3) == 0), "float register alignment check");
+
+      Assembler::fabs(FloatRegisterImpl::S, s, d);
+      Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor());
+      Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor(), d->successor()->successor());
+      Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor()->successor(), d->successor()->successor()->successor());
+    }
+  }
+}
+
+void MacroAssembler::save_all_globals_into_locals() {
+  mov(G1,L1);
+  mov(G2,L2);
+  mov(G3,L3);
+  mov(G4,L4);
+  mov(G5,L5);
+  mov(G6,L6);
+  mov(G7,L7);
+}
+
+void MacroAssembler::restore_globals_from_locals() {
+  mov(L1,G1);
+  mov(L2,G2);
+  mov(L3,G3);
+  mov(L4,G4);
+  mov(L5,G5);
+  mov(L6,G6);
+  mov(L7,G7);
+}
+
+// Use for 64 bit operation.
+void MacroAssembler::casx_under_lock(Register top_ptr_reg, Register top_reg, Register ptr_reg, address lock_addr, bool use_call_vm)
+{
+  // store ptr_reg as the new top value
+#ifdef _LP64
+  casx(top_ptr_reg, top_reg, ptr_reg);
+#else
+  cas_under_lock(top_ptr_reg, top_reg, ptr_reg, lock_addr, use_call_vm);
+#endif // _LP64
+}
+
+// [RGV] This routine does not handle 64 bit operations.
+//       use casx_under_lock() or casx directly!!!
+void MacroAssembler::cas_under_lock(Register top_ptr_reg, Register top_reg, Register ptr_reg, address lock_addr, bool use_call_vm)
+{
+  // store ptr_reg as the new top value
+  if (VM_Version::v9_instructions_work()) {
+    cas(top_ptr_reg, top_reg, ptr_reg);
+  } else {
+
+    // If the register is not an out nor global, it is not visible
+    // after the save.  Allocate a register for it, save its
+    // value in the register save area (the save may not flush
+    // registers to the save area).
+
+    Register top_ptr_reg_after_save;
+    Register top_reg_after_save;
+    Register ptr_reg_after_save;
+
+    if (top_ptr_reg->is_out() || top_ptr_reg->is_global()) {
+      top_ptr_reg_after_save = top_ptr_reg->after_save();
+    } else {
+      Address reg_save_addr = top_ptr_reg->address_in_saved_window();
+      top_ptr_reg_after_save = L0;
+      st(top_ptr_reg, reg_save_addr);
+    }
+
+    if (top_reg->is_out() || top_reg->is_global()) {
+      top_reg_after_save = top_reg->after_save();
+    } else {
+      Address reg_save_addr = top_reg->address_in_saved_window();
+      top_reg_after_save = L1;
+      st(top_reg, reg_save_addr);
+    }
+
+    if (ptr_reg->is_out() || ptr_reg->is_global()) {
+      ptr_reg_after_save = ptr_reg->after_save();
+    } else {
+      Address reg_save_addr = ptr_reg->address_in_saved_window();
+      ptr_reg_after_save = L2;
+      st(ptr_reg, reg_save_addr);
+    }
+
+    const Register& lock_reg = L3;
+    const Register& lock_ptr_reg = L4;
+    const Register& value_reg = L5;
+    const Register& yield_reg = L6;
+    const Register& yieldall_reg = L7;
+
+    save_frame();
+
+    if (top_ptr_reg_after_save == L0) {
+      ld(top_ptr_reg->address_in_saved_window().after_save(), top_ptr_reg_after_save);
+    }
+
+    if (top_reg_after_save == L1) {
+      ld(top_reg->address_in_saved_window().after_save(), top_reg_after_save);
+    }
+
+    if (ptr_reg_after_save == L2) {
+      ld(ptr_reg->address_in_saved_window().after_save(), ptr_reg_after_save);
+    }
+
+    Label(retry_get_lock);
+    Label(not_same);
+    Label(dont_yield);
+
+    assert(lock_addr, "lock_address should be non null for v8");
+    set((intptr_t)lock_addr, lock_ptr_reg);
+    // Initialize yield counter
+    mov(G0,yield_reg);
+    mov(G0, yieldall_reg);
+    set(StubRoutines::Sparc::locked, lock_reg);
+
+    bind(retry_get_lock);
+    cmp_and_br_short(yield_reg, V8AtomicOperationUnderLockSpinCount, Assembler::less, Assembler::pt, dont_yield);
+
+    if(use_call_vm) {
+      Untested("Need to verify global reg consistancy");
+      call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::yield_all), yieldall_reg);
+    } else {
+      // Save the regs and make space for a C call
+      save(SP, -96, SP);
+      save_all_globals_into_locals();
+      call(CAST_FROM_FN_PTR(address,os::yield_all));
+      delayed()->mov(yieldall_reg, O0);
+      restore_globals_from_locals();
+      restore();
+    }
+
+    // reset the counter
+    mov(G0,yield_reg);
+    add(yieldall_reg, 1, yieldall_reg);
+
+    bind(dont_yield);
+    // try to get lock
+    Assembler::swap(lock_ptr_reg, 0, lock_reg);
+
+    // did we get the lock?
+    cmp(lock_reg, StubRoutines::Sparc::unlocked);
+    br(Assembler::notEqual, true, Assembler::pn, retry_get_lock);
+    delayed()->add(yield_reg,1,yield_reg);
+
+    // yes, got lock.  do we have the same top?
+    ld(top_ptr_reg_after_save, 0, value_reg);
+    cmp_and_br_short(value_reg, top_reg_after_save, Assembler::notEqual, Assembler::pn, not_same);
+
+    // yes, same top.
+    st(ptr_reg_after_save, top_ptr_reg_after_save, 0);
+    membar(Assembler::StoreStore);
+
+    bind(not_same);
+    mov(value_reg, ptr_reg_after_save);
+    st(lock_reg, lock_ptr_reg, 0); // unlock
+
+    restore();
+  }
+}
+
+RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
+                                                      Register tmp,
+                                                      int offset) {
+  intptr_t value = *delayed_value_addr;
+  if (value != 0)
+    return RegisterOrConstant(value + offset);
+
+  // load indirectly to solve generation ordering problem
+  AddressLiteral a(delayed_value_addr);
+  load_ptr_contents(a, tmp);
+
+#ifdef ASSERT
+  tst(tmp);
+  breakpoint_trap(zero, xcc);
+#endif
+
+  if (offset != 0)
+    add(tmp, offset, tmp);
+
+  return RegisterOrConstant(tmp);
+}
+
+
+RegisterOrConstant MacroAssembler::regcon_andn_ptr(RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp) {
+  assert(d.register_or_noreg() != G0, "lost side effect");
+  if ((s2.is_constant() && s2.as_constant() == 0) ||
+      (s2.is_register() && s2.as_register() == G0)) {
+    // Do nothing, just move value.
+    if (s1.is_register()) {
+      if (d.is_constant())  d = temp;
+      mov(s1.as_register(), d.as_register());
+      return d;
+    } else {
+      return s1;
+    }
+  }
+
+  if (s1.is_register()) {
+    assert_different_registers(s1.as_register(), temp);
+    if (d.is_constant())  d = temp;
+    andn(s1.as_register(), ensure_simm13_or_reg(s2, temp), d.as_register());
+    return d;
+  } else {
+    if (s2.is_register()) {
+      assert_different_registers(s2.as_register(), temp);
+      if (d.is_constant())  d = temp;
+      set(s1.as_constant(), temp);
+      andn(temp, s2.as_register(), d.as_register());
+      return d;
+    } else {
+      intptr_t res = s1.as_constant() & ~s2.as_constant();
+      return res;
+    }
+  }
+}
+
+RegisterOrConstant MacroAssembler::regcon_inc_ptr(RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp) {
+  assert(d.register_or_noreg() != G0, "lost side effect");
+  if ((s2.is_constant() && s2.as_constant() == 0) ||
+      (s2.is_register() && s2.as_register() == G0)) {
+    // Do nothing, just move value.
+    if (s1.is_register()) {
+      if (d.is_constant())  d = temp;
+      mov(s1.as_register(), d.as_register());
+      return d;
+    } else {
+      return s1;
+    }
+  }
+
+  if (s1.is_register()) {
+    assert_different_registers(s1.as_register(), temp);
+    if (d.is_constant())  d = temp;
+    add(s1.as_register(), ensure_simm13_or_reg(s2, temp), d.as_register());
+    return d;
+  } else {
+    if (s2.is_register()) {
+      assert_different_registers(s2.as_register(), temp);
+      if (d.is_constant())  d = temp;
+      add(s2.as_register(), ensure_simm13_or_reg(s1, temp), d.as_register());
+      return d;
+    } else {
+      intptr_t res = s1.as_constant() + s2.as_constant();
+      return res;
+    }
+  }
+}
+
+RegisterOrConstant MacroAssembler::regcon_sll_ptr(RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp) {
+  assert(d.register_or_noreg() != G0, "lost side effect");
+  if (!is_simm13(s2.constant_or_zero()))
+    s2 = (s2.as_constant() & 0xFF);
+  if ((s2.is_constant() && s2.as_constant() == 0) ||
+      (s2.is_register() && s2.as_register() == G0)) {
+    // Do nothing, just move value.
+    if (s1.is_register()) {
+      if (d.is_constant())  d = temp;
+      mov(s1.as_register(), d.as_register());
+      return d;
+    } else {
+      return s1;
+    }
+  }
+
+  if (s1.is_register()) {
+    assert_different_registers(s1.as_register(), temp);
+    if (d.is_constant())  d = temp;
+    sll_ptr(s1.as_register(), ensure_simm13_or_reg(s2, temp), d.as_register());
+    return d;
+  } else {
+    if (s2.is_register()) {
+      assert_different_registers(s2.as_register(), temp);
+      if (d.is_constant())  d = temp;
+      set(s1.as_constant(), temp);
+      sll_ptr(temp, s2.as_register(), d.as_register());
+      return d;
+    } else {
+      intptr_t res = s1.as_constant() << s2.as_constant();
+      return res;
+    }
+  }
+}
+
+
+// Look up the method for a megamorphic invokeinterface call.
+// The target method is determined by <intf_klass, itable_index>.
+// The receiver klass is in recv_klass.
+// On success, the result will be in method_result, and execution falls through.
+// On failure, execution transfers to the given label.
+void MacroAssembler::lookup_interface_method(Register recv_klass,
+                                             Register intf_klass,
+                                             RegisterOrConstant itable_index,
+                                             Register method_result,
+                                             Register scan_temp,
+                                             Register sethi_temp,
+                                             Label& L_no_such_interface) {
+  assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
+  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
+         "caller must use same register for non-constant itable index as for method");
+
+  Label L_no_such_interface_restore;
+  bool did_save = false;
+  if (scan_temp == noreg || sethi_temp == noreg) {
+    Register recv_2 = recv_klass->is_global() ? recv_klass : L0;
+    Register intf_2 = intf_klass->is_global() ? intf_klass : L1;
+    assert(method_result->is_global(), "must be able to return value");
+    scan_temp  = L2;
+    sethi_temp = L3;
+    save_frame_and_mov(0, recv_klass, recv_2, intf_klass, intf_2);
+    recv_klass = recv_2;
+    intf_klass = intf_2;
+    did_save = true;
+  }
+
+  // Compute start of first itableOffsetEntry (which is at the end of the vtable)
+  int vtable_base = InstanceKlass::vtable_start_offset() * wordSize;
+  int scan_step   = itableOffsetEntry::size() * wordSize;
+  int vte_size    = vtableEntry::size() * wordSize;
+
+  lduw(recv_klass, InstanceKlass::vtable_length_offset() * wordSize, scan_temp);
+  // %%% We should store the aligned, prescaled offset in the klassoop.
+  // Then the next several instructions would fold away.
+
+  int round_to_unit = ((HeapWordsPerLong > 1) ? BytesPerLong : 0);
+  int itb_offset = vtable_base;
+  if (round_to_unit != 0) {
+    // hoist first instruction of round_to(scan_temp, BytesPerLong):
+    itb_offset += round_to_unit - wordSize;
+  }
+  int itb_scale = exact_log2(vtableEntry::size() * wordSize);
+  sll(scan_temp, itb_scale,  scan_temp);
+  add(scan_temp, itb_offset, scan_temp);
+  if (round_to_unit != 0) {
+    // Round up to align_object_offset boundary
+    // see code for InstanceKlass::start_of_itable!
+    // Was: round_to(scan_temp, BytesPerLong);
+    // Hoisted: add(scan_temp, BytesPerLong-1, scan_temp);
+    and3(scan_temp, -round_to_unit, scan_temp);
+  }
+  add(recv_klass, scan_temp, scan_temp);
+
+  // Adjust recv_klass by scaled itable_index, so we can free itable_index.
+  RegisterOrConstant itable_offset = itable_index;
+  itable_offset = regcon_sll_ptr(itable_index, exact_log2(itableMethodEntry::size() * wordSize), itable_offset);
+  itable_offset = regcon_inc_ptr(itable_offset, itableMethodEntry::method_offset_in_bytes(), itable_offset);
+  add(recv_klass, ensure_simm13_or_reg(itable_offset, sethi_temp), recv_klass);
+
+  // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
+  //   if (scan->interface() == intf) {
+  //     result = (klass + scan->offset() + itable_index);
+  //   }
+  // }
+  Label L_search, L_found_method;
+
+  for (int peel = 1; peel >= 0; peel--) {
+    // %%%% Could load both offset and interface in one ldx, if they were
+    // in the opposite order.  This would save a load.
+    ld_ptr(scan_temp, itableOffsetEntry::interface_offset_in_bytes(), method_result);
+
+    // Check that this entry is non-null.  A null entry means that
+    // the receiver class doesn't implement the interface, and wasn't the
+    // same as when the caller was compiled.
+    bpr(Assembler::rc_z, false, Assembler::pn, method_result, did_save ? L_no_such_interface_restore : L_no_such_interface);
+    delayed()->cmp(method_result, intf_klass);
+
+    if (peel) {
+      brx(Assembler::equal,    false, Assembler::pt, L_found_method);
+    } else {
+      brx(Assembler::notEqual, false, Assembler::pn, L_search);
+      // (invert the test to fall through to found_method...)
+    }
+    delayed()->add(scan_temp, scan_step, scan_temp);
+
+    if (!peel)  break;
+
+    bind(L_search);
+  }
+
+  bind(L_found_method);
+
+  // Got a hit.
+  int ito_offset = itableOffsetEntry::offset_offset_in_bytes();
+  // scan_temp[-scan_step] points to the vtable offset we need
+  ito_offset -= scan_step;
+  lduw(scan_temp, ito_offset, scan_temp);
+  ld_ptr(recv_klass, scan_temp, method_result);
+
+  if (did_save) {
+    Label L_done;
+    ba(L_done);
+    delayed()->restore();
+
+    bind(L_no_such_interface_restore);
+    ba(L_no_such_interface);
+    delayed()->restore();
+
+    bind(L_done);
+  }
+}
+
+
+// virtual method calling
+void MacroAssembler::lookup_virtual_method(Register recv_klass,
+                                           RegisterOrConstant vtable_index,
+                                           Register method_result) {
+  assert_different_registers(recv_klass, method_result, vtable_index.register_or_noreg());
+  Register sethi_temp = method_result;
+  const int base = (InstanceKlass::vtable_start_offset() * wordSize +
+                    // method pointer offset within the vtable entry:
+                    vtableEntry::method_offset_in_bytes());
+  RegisterOrConstant vtable_offset = vtable_index;
+  // Each of the following three lines potentially generates an instruction.
+  // But the total number of address formation instructions will always be
+  // at most two, and will often be zero.  In any case, it will be optimal.
+  // If vtable_index is a register, we will have (sll_ptr N,x; inc_ptr B,x; ld_ptr k,x).
+  // If vtable_index is a constant, we will have at most (set B+X<<N,t; ld_ptr k,t).
+  vtable_offset = regcon_sll_ptr(vtable_index, exact_log2(vtableEntry::size() * wordSize), vtable_offset);
+  vtable_offset = regcon_inc_ptr(vtable_offset, base, vtable_offset, sethi_temp);
+  Address vtable_entry_addr(recv_klass, ensure_simm13_or_reg(vtable_offset, sethi_temp));
+  ld_ptr(vtable_entry_addr, method_result);
+}
+
+
+void MacroAssembler::check_klass_subtype(Register sub_klass,
+                                         Register super_klass,
+                                         Register temp_reg,
+                                         Register temp2_reg,
+                                         Label& L_success) {
+  Register sub_2 = sub_klass;
+  Register sup_2 = super_klass;
+  if (!sub_2->is_global())  sub_2 = L0;
+  if (!sup_2->is_global())  sup_2 = L1;
+  bool did_save = false;
+  if (temp_reg == noreg || temp2_reg == noreg) {
+    temp_reg = L2;
+    temp2_reg = L3;
+    save_frame_and_mov(0, sub_klass, sub_2, super_klass, sup_2);
+    sub_klass = sub_2;
+    super_klass = sup_2;
+    did_save = true;
+  }
+  Label L_failure, L_pop_to_failure, L_pop_to_success;
+  check_klass_subtype_fast_path(sub_klass, super_klass,
+                                temp_reg, temp2_reg,
+                                (did_save ? &L_pop_to_success : &L_success),
+                                (did_save ? &L_pop_to_failure : &L_failure), NULL);
+
+  if (!did_save)
+    save_frame_and_mov(0, sub_klass, sub_2, super_klass, sup_2);
+  check_klass_subtype_slow_path(sub_2, sup_2,
+                                L2, L3, L4, L5,
+                                NULL, &L_pop_to_failure);
+
+  // on success:
+  bind(L_pop_to_success);
+  restore();
+  ba_short(L_success);
+
+  // on failure:
+  bind(L_pop_to_failure);
+  restore();
+  bind(L_failure);
+}
+
+
+void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
+                                                   Register super_klass,
+                                                   Register temp_reg,
+                                                   Register temp2_reg,
+                                                   Label* L_success,
+                                                   Label* L_failure,
+                                                   Label* L_slow_path,
+                                        RegisterOrConstant super_check_offset) {
+  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
+  int sco_offset = in_bytes(Klass::super_check_offset_offset());
+
+  bool must_load_sco  = (super_check_offset.constant_or_zero() == -1);
+  bool need_slow_path = (must_load_sco ||
+                         super_check_offset.constant_or_zero() == sco_offset);
+
+  assert_different_registers(sub_klass, super_klass, temp_reg);
+  if (super_check_offset.is_register()) {
+    assert_different_registers(sub_klass, super_klass, temp_reg,
+                               super_check_offset.as_register());
+  } else if (must_load_sco) {
+    assert(temp2_reg != noreg, "supply either a temp or a register offset");
+  }
+
+  Label L_fallthrough;
+  int label_nulls = 0;
+  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
+  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
+  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
+  assert(label_nulls <= 1 ||
+         (L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path),
+         "at most one NULL in the batch, usually");
+
+  // If the pointers are equal, we are done (e.g., String[] elements).
+  // This self-check enables sharing of secondary supertype arrays among
+  // non-primary types such as array-of-interface.  Otherwise, each such
+  // type would need its own customized SSA.
+  // We move this check to the front of the fast path because many
+  // type checks are in fact trivially successful in this manner,
+  // so we get a nicely predicted branch right at the start of the check.
+  cmp(super_klass, sub_klass);
+  brx(Assembler::equal, false, Assembler::pn, *L_success);
+  delayed()->nop();
+
+  // Check the supertype display:
+  if (must_load_sco) {
+    // The super check offset is always positive...
+    lduw(super_klass, sco_offset, temp2_reg);
+    super_check_offset = RegisterOrConstant(temp2_reg);
+    // super_check_offset is register.
+    assert_different_registers(sub_klass, super_klass, temp_reg, super_check_offset.as_register());
+  }
+  ld_ptr(sub_klass, super_check_offset, temp_reg);
+  cmp(super_klass, temp_reg);
+
+  // This check has worked decisively for primary supers.
+  // Secondary supers are sought in the super_cache ('super_cache_addr').
+  // (Secondary supers are interfaces and very deeply nested subtypes.)
+  // This works in the same check above because of a tricky aliasing
+  // between the super_cache and the primary super display elements.
+  // (The 'super_check_addr' can address either, as the case requires.)
+  // Note that the cache is updated below if it does not help us find
+  // what we need immediately.
+  // So if it was a primary super, we can just fail immediately.
+  // Otherwise, it's the slow path for us (no success at this point).
+
+  // Hacked ba(), which may only be used just before L_fallthrough.
+#define FINAL_JUMP(label)            \
+  if (&(label) != &L_fallthrough) {  \
+    ba(label);  delayed()->nop();    \
+  }
+
+  if (super_check_offset.is_register()) {
+    brx(Assembler::equal, false, Assembler::pn, *L_success);
+    delayed()->cmp(super_check_offset.as_register(), sc_offset);
+
+    if (L_failure == &L_fallthrough) {
+      brx(Assembler::equal, false, Assembler::pt, *L_slow_path);
+      delayed()->nop();
+    } else {
+      brx(Assembler::notEqual, false, Assembler::pn, *L_failure);
+      delayed()->nop();
+      FINAL_JUMP(*L_slow_path);
+    }
+  } else if (super_check_offset.as_constant() == sc_offset) {
+    // Need a slow path; fast failure is impossible.
+    if (L_slow_path == &L_fallthrough) {
+      brx(Assembler::equal, false, Assembler::pt, *L_success);
+      delayed()->nop();
+    } else {
+      brx(Assembler::notEqual, false, Assembler::pn, *L_slow_path);
+      delayed()->nop();
+      FINAL_JUMP(*L_success);
+    }
+  } else {
+    // No slow path; it's a fast decision.
+    if (L_failure == &L_fallthrough) {
+      brx(Assembler::equal, false, Assembler::pt, *L_success);
+      delayed()->nop();
+    } else {
+      brx(Assembler::notEqual, false, Assembler::pn, *L_failure);
+      delayed()->nop();
+      FINAL_JUMP(*L_success);
+    }
+  }
+
+  bind(L_fallthrough);
+
+#undef FINAL_JUMP
+}
+
+
+void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
+                                                   Register super_klass,
+                                                   Register count_temp,
+                                                   Register scan_temp,
+                                                   Register scratch_reg,
+                                                   Register coop_reg,
+                                                   Label* L_success,
+                                                   Label* L_failure) {
+  assert_different_registers(sub_klass, super_klass,
+                             count_temp, scan_temp, scratch_reg, coop_reg);
+
+  Label L_fallthrough, L_loop;
+  int label_nulls = 0;
+  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
+  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
+  assert(label_nulls <= 1, "at most one NULL in the batch");
+
+  // a couple of useful fields in sub_klass:
+  int ss_offset = in_bytes(Klass::secondary_supers_offset());
+  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
+
+  // Do a linear scan of the secondary super-klass chain.
+  // This code is rarely used, so simplicity is a virtue here.
+
+#ifndef PRODUCT
+  int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
+  inc_counter((address) pst_counter, count_temp, scan_temp);
+#endif
+
+  // We will consult the secondary-super array.
+  ld_ptr(sub_klass, ss_offset, scan_temp);
+
+  Register search_key = super_klass;
+
+  // Load the array length.  (Positive movl does right thing on LP64.)
+  lduw(scan_temp, Array<Klass*>::length_offset_in_bytes(), count_temp);
+
+  // Check for empty secondary super list
+  tst(count_temp);
+
+  // In the array of super classes elements are pointer sized.
+  int element_size = wordSize;
+
+  // Top of search loop
+  bind(L_loop);
+  br(Assembler::equal, false, Assembler::pn, *L_failure);
+  delayed()->add(scan_temp, element_size, scan_temp);
+
+  // Skip the array header in all array accesses.
+  int elem_offset = Array<Klass*>::base_offset_in_bytes();
+  elem_offset -= element_size;   // the scan pointer was pre-incremented also
+
+  // Load next super to check
+    ld_ptr( scan_temp, elem_offset, scratch_reg );
+
+  // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
+  cmp(scratch_reg, search_key);
+
+  // A miss means we are NOT a subtype and need to keep looping
+  brx(Assembler::notEqual, false, Assembler::pn, L_loop);
+  delayed()->deccc(count_temp); // decrement trip counter in delay slot
+
+  // Success.  Cache the super we found and proceed in triumph.
+  st_ptr(super_klass, sub_klass, sc_offset);
+
+  if (L_success != &L_fallthrough) {
+    ba(*L_success);
+    delayed()->nop();
+  }
+
+  bind(L_fallthrough);
+}
+
+
+RegisterOrConstant MacroAssembler::argument_offset(RegisterOrConstant arg_slot,
+                                                   Register temp_reg,
+                                                   int extra_slot_offset) {
+  // cf. TemplateTable::prepare_invoke(), if (load_receiver).
+  int stackElementSize = Interpreter::stackElementSize;
+  int offset = extra_slot_offset * stackElementSize;
+  if (arg_slot.is_constant()) {
+    offset += arg_slot.as_constant() * stackElementSize;
+    return offset;
+  } else {
+    assert(temp_reg != noreg, "must specify");
+    sll_ptr(arg_slot.as_register(), exact_log2(stackElementSize), temp_reg);
+    if (offset != 0)
+      add(temp_reg, offset, temp_reg);
+    return temp_reg;
+  }
+}
+
+
+Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
+                                         Register temp_reg,
+                                         int extra_slot_offset) {
+  return Address(Gargs, argument_offset(arg_slot, temp_reg, extra_slot_offset));
+}
+
+
+void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg,
+                                          Register temp_reg,
+                                          Label& done, Label* slow_case,
+                                          BiasedLockingCounters* counters) {
+  assert(UseBiasedLocking, "why call this otherwise?");
+
+  if (PrintBiasedLockingStatistics) {
+    assert_different_registers(obj_reg, mark_reg, temp_reg, O7);
+    if (counters == NULL)
+      counters = BiasedLocking::counters();
+  }
+
+  Label cas_label;
+
+  // Biased locking
+  // See whether the lock is currently biased toward our thread and
+  // whether the epoch is still valid
+  // Note that the runtime guarantees sufficient alignment of JavaThread
+  // pointers to allow age to be placed into low bits
+  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
+  and3(mark_reg, markOopDesc::biased_lock_mask_in_place, temp_reg);
+  cmp_and_brx_short(temp_reg, markOopDesc::biased_lock_pattern, Assembler::notEqual, Assembler::pn, cas_label);
+
+  load_klass(obj_reg, temp_reg);
+  ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg);
+  or3(G2_thread, temp_reg, temp_reg);
+  xor3(mark_reg, temp_reg, temp_reg);
+  andcc(temp_reg, ~((int) markOopDesc::age_mask_in_place), temp_reg);
+  if (counters != NULL) {
+    cond_inc(Assembler::equal, (address) counters->biased_lock_entry_count_addr(), mark_reg, temp_reg);
+    // Reload mark_reg as we may need it later
+    ld_ptr(Address(obj_reg, oopDesc::mark_offset_in_bytes()), mark_reg);
+  }
+  brx(Assembler::equal, true, Assembler::pt, done);
+  delayed()->nop();
+
+  Label try_revoke_bias;
+  Label try_rebias;
+  Address mark_addr = Address(obj_reg, oopDesc::mark_offset_in_bytes());
+  assert(mark_addr.disp() == 0, "cas must take a zero displacement");
+
+  // At this point we know that the header has the bias pattern and
+  // that we are not the bias owner in the current epoch. We need to
+  // figure out more details about the state of the header in order to
+  // know what operations can be legally performed on the object's
+  // header.
+
+  // If the low three bits in the xor result aren't clear, that means
+  // the prototype header is no longer biased and we have to revoke
+  // the bias on this object.
+  btst(markOopDesc::biased_lock_mask_in_place, temp_reg);
+  brx(Assembler::notZero, false, Assembler::pn, try_revoke_bias);
+
+  // Biasing is still enabled for this data type. See whether the
+  // epoch of the current bias is still valid, meaning that the epoch
+  // bits of the mark word are equal to the epoch bits of the
+  // prototype header. (Note that the prototype header's epoch bits
+  // only change at a safepoint.) If not, attempt to rebias the object
+  // toward the current thread. Note that we must be absolutely sure
+  // that the current epoch is invalid in order to do this because
+  // otherwise the manipulations it performs on the mark word are
+  // illegal.
+  delayed()->btst(markOopDesc::epoch_mask_in_place, temp_reg);
+  brx(Assembler::notZero, false, Assembler::pn, try_rebias);
+
+  // The epoch of the current bias is still valid but we know nothing
+  // about the owner; it might be set or it might be clear. Try to
+  // acquire the bias of the object using an atomic operation. If this
+  // fails we will go in to the runtime to revoke the object's bias.
+  // Note that we first construct the presumed unbiased header so we
+  // don't accidentally blow away another thread's valid bias.
+  delayed()->and3(mark_reg,
+                  markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place,
+                  mark_reg);
+  or3(G2_thread, mark_reg, temp_reg);
+  casn(mark_addr.base(), mark_reg, temp_reg);
+  // If the biasing toward our thread failed, this means that
+  // another thread succeeded in biasing it toward itself and we
+  // need to revoke that bias. The revocation will occur in the
+  // interpreter runtime in the slow case.
+  cmp(mark_reg, temp_reg);
+  if (counters != NULL) {
+    cond_inc(Assembler::zero, (address) counters->anonymously_biased_lock_entry_count_addr(), mark_reg, temp_reg);
+  }
+  if (slow_case != NULL) {
+    brx(Assembler::notEqual, true, Assembler::pn, *slow_case);
+    delayed()->nop();
+  }
+  ba_short(done);
+
+  bind(try_rebias);
+  // At this point we know the epoch has expired, meaning that the
+  // current "bias owner", if any, is actually invalid. Under these
+  // circumstances _only_, we are allowed to use the current header's
+  // value as the comparison value when doing the cas to acquire the
+  // bias in the current epoch. In other words, we allow transfer of
+  // the bias from one thread to another directly in this situation.
+  //
+  // FIXME: due to a lack of registers we currently blow away the age
+  // bits in this situation. Should attempt to preserve them.
+  load_klass(obj_reg, temp_reg);
+  ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg);
+  or3(G2_thread, temp_reg, temp_reg);
+  casn(mark_addr.base(), mark_reg, temp_reg);
+  // If the biasing toward our thread failed, this means that
+  // another thread succeeded in biasing it toward itself and we
+  // need to revoke that bias. The revocation will occur in the
+  // interpreter runtime in the slow case.
+  cmp(mark_reg, temp_reg);
+  if (counters != NULL) {
+    cond_inc(Assembler::zero, (address) counters->rebiased_lock_entry_count_addr(), mark_reg, temp_reg);
+  }
+  if (slow_case != NULL) {
+    brx(Assembler::notEqual, true, Assembler::pn, *slow_case);
+    delayed()->nop();
+  }
+  ba_short(done);
+
+  bind(try_revoke_bias);
+  // The prototype mark in the klass doesn't have the bias bit set any
+  // more, indicating that objects of this data type are not supposed
+  // to be biased any more. We are going to try to reset the mark of
+  // this object to the prototype value and fall through to the
+  // CAS-based locking scheme. Note that if our CAS fails, it means
+  // that another thread raced us for the privilege of revoking the
+  // bias of this particular object, so it's okay to continue in the
+  // normal locking code.
+  //
+  // FIXME: due to a lack of registers we currently blow away the age
+  // bits in this situation. Should attempt to preserve them.
+  load_klass(obj_reg, temp_reg);
+  ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg);
+  casn(mark_addr.base(), mark_reg, temp_reg);
+  // Fall through to the normal CAS-based lock, because no matter what
+  // the result of the above CAS, some thread must have succeeded in
+  // removing the bias bit from the object's header.
+  if (counters != NULL) {
+    cmp(mark_reg, temp_reg);
+    cond_inc(Assembler::zero, (address) counters->revoked_lock_entry_count_addr(), mark_reg, temp_reg);
+  }
+
+  bind(cas_label);
+}
+
+void MacroAssembler::biased_locking_exit (Address mark_addr, Register temp_reg, Label& done,
+                                          bool allow_delay_slot_filling) {
+  // Check for biased locking unlock case, which is a no-op
+  // Note: we do not have to check the thread ID for two reasons.
+  // First, the interpreter checks for IllegalMonitorStateException at
+  // a higher level. Second, if the bias was revoked while we held the
+  // lock, the object could not be rebiased toward another thread, so
+  // the bias bit would be clear.
+  ld_ptr(mark_addr, temp_reg);
+  and3(temp_reg, markOopDesc::biased_lock_mask_in_place, temp_reg);
+  cmp(temp_reg, markOopDesc::biased_lock_pattern);
+  brx(Assembler::equal, allow_delay_slot_filling, Assembler::pt, done);
+  delayed();
+  if (!allow_delay_slot_filling) {
+    nop();
+  }
+}
+
+
+// CASN -- 32-64 bit switch hitter similar to the synthetic CASN provided by
+// Solaris/SPARC's "as".  Another apt name would be cas_ptr()
+
+void MacroAssembler::casn (Register addr_reg, Register cmp_reg, Register set_reg ) {
+  casx_under_lock (addr_reg, cmp_reg, set_reg, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
+}
+
+
+
+// compiler_lock_object() and compiler_unlock_object() are direct transliterations
+// of i486.ad fast_lock() and fast_unlock().  See those methods for detailed comments.
+// The code could be tightened up considerably.
+//
+// box->dhw disposition - post-conditions at DONE_LABEL.
+// -   Successful inflated lock:  box->dhw != 0.
+//     Any non-zero value suffices.
+//     Consider G2_thread, rsp, boxReg, or unused_mark()
+// -   Successful Stack-lock: box->dhw == mark.
+//     box->dhw must contain the displaced mark word value
+// -   Failure -- icc.ZFlag == 0 and box->dhw is undefined.
+//     The slow-path fast_enter() and slow_enter() operators
+//     are responsible for setting box->dhw = NonZero (typically ::unused_mark).
+// -   Biased: box->dhw is undefined
+//
+// SPARC refworkload performance - specifically jetstream and scimark - are
+// extremely sensitive to the size of the code emitted by compiler_lock_object
+// and compiler_unlock_object.  Critically, the key factor is code size, not path
+// length.  (Simply experiments to pad CLO with unexecuted NOPs demonstrte the
+// effect).
+
+
+void MacroAssembler::compiler_lock_object(Register Roop, Register Rmark,
+                                          Register Rbox, Register Rscratch,
+                                          BiasedLockingCounters* counters,
+                                          bool try_bias) {
+   Address mark_addr(Roop, oopDesc::mark_offset_in_bytes());
+
+   verify_oop(Roop);
+   Label done ;
+
+   if (counters != NULL) {
+     inc_counter((address) counters->total_entry_count_addr(), Rmark, Rscratch);
+   }
+
+   if (EmitSync & 1) {
+     mov(3, Rscratch);
+     st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
+     cmp(SP, G0);
+     return ;
+   }
+
+   if (EmitSync & 2) {
+
+     // Fetch object's markword
+     ld_ptr(mark_addr, Rmark);
+
+     if (try_bias) {
+        biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
+     }
+
+     // Save Rbox in Rscratch to be used for the cas operation
+     mov(Rbox, Rscratch);
+
+     // set Rmark to markOop | markOopDesc::unlocked_value
+     or3(Rmark, markOopDesc::unlocked_value, Rmark);
+
+     // Initialize the box.  (Must happen before we update the object mark!)
+     st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
+
+     // compare object markOop with Rmark and if equal exchange Rscratch with object markOop
+     assert(mark_addr.disp() == 0, "cas must take a zero displacement");
+     casx_under_lock(mark_addr.base(), Rmark, Rscratch,
+        (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
+
+     // if compare/exchange succeeded we found an unlocked object and we now have locked it
+     // hence we are done
+     cmp(Rmark, Rscratch);
+#ifdef _LP64
+     sub(Rscratch, STACK_BIAS, Rscratch);
+#endif
+     brx(Assembler::equal, false, Assembler::pt, done);
+     delayed()->sub(Rscratch, SP, Rscratch);  //pull next instruction into delay slot
+
+     // we did not find an unlocked object so see if this is a recursive case
+     // sub(Rscratch, SP, Rscratch);
+     assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
+     andcc(Rscratch, 0xfffff003, Rscratch);
+     st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
+     bind (done);
+     return ;
+   }
+
+   Label Egress ;
+
+   if (EmitSync & 256) {
+      Label IsInflated ;
+
+      ld_ptr(mark_addr, Rmark);           // fetch obj->mark
+      // Triage: biased, stack-locked, neutral, inflated
+      if (try_bias) {
+        biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
+        // Invariant: if control reaches this point in the emitted stream
+        // then Rmark has not been modified.
+      }
+
+      // Store mark into displaced mark field in the on-stack basic-lock "box"
+      // Critically, this must happen before the CAS
+      // Maximize the ST-CAS distance to minimize the ST-before-CAS penalty.
+      st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
+      andcc(Rmark, 2, G0);
+      brx(Assembler::notZero, false, Assembler::pn, IsInflated);
+      delayed()->
+
+      // Try stack-lock acquisition.
+      // Beware: the 1st instruction is in a delay slot
+      mov(Rbox,  Rscratch);
+      or3(Rmark, markOopDesc::unlocked_value, Rmark);
+      assert(mark_addr.disp() == 0, "cas must take a zero displacement");
+      casn(mark_addr.base(), Rmark, Rscratch);
+      cmp(Rmark, Rscratch);
+      brx(Assembler::equal, false, Assembler::pt, done);
+      delayed()->sub(Rscratch, SP, Rscratch);
+
+      // Stack-lock attempt failed - check for recursive stack-lock.
+      // See the comments below about how we might remove this case.
+#ifdef _LP64
+      sub(Rscratch, STACK_BIAS, Rscratch);
+#endif
+      assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
+      andcc(Rscratch, 0xfffff003, Rscratch);
+      br(Assembler::always, false, Assembler::pt, done);
+      delayed()-> st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
+
+      bind(IsInflated);
+      if (EmitSync & 64) {
+         // If m->owner != null goto IsLocked
+         // Pessimistic form: Test-and-CAS vs CAS
+         // The optimistic form avoids RTS->RTO cache line upgrades.
+         ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
+         andcc(Rscratch, Rscratch, G0);
+         brx(Assembler::notZero, false, Assembler::pn, done);
+         delayed()->nop();
+         // m->owner == null : it's unlocked.
+      }
+
+      // Try to CAS m->owner from null to Self
+      // Invariant: if we acquire the lock then _recursions should be 0.
+      add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark);
+      mov(G2_thread, Rscratch);
+      casn(Rmark, G0, Rscratch);
+      cmp(Rscratch, G0);
+      // Intentional fall-through into done
+   } else {
+      // Aggressively avoid the Store-before-CAS penalty
+      // Defer the store into box->dhw until after the CAS
+      Label IsInflated, Recursive ;
+
+// Anticipate CAS -- Avoid RTS->RTO upgrade
+// prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
+
+      ld_ptr(mark_addr, Rmark);           // fetch obj->mark
+      // Triage: biased, stack-locked, neutral, inflated
+
+      if (try_bias) {
+        biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
+        // Invariant: if control reaches this point in the emitted stream
+        // then Rmark has not been modified.
+      }
+      andcc(Rmark, 2, G0);
+      brx(Assembler::notZero, false, Assembler::pn, IsInflated);
+      delayed()->                         // Beware - dangling delay-slot
+
+      // Try stack-lock acquisition.
+      // Transiently install BUSY (0) encoding in the mark word.
+      // if the CAS of 0 into the mark was successful then we execute:
+      //   ST box->dhw  = mark   -- save fetched mark in on-stack basiclock box
+      //   ST obj->mark = box    -- overwrite transient 0 value
+      // This presumes TSO, of course.
+
+      mov(0, Rscratch);
+      or3(Rmark, markOopDesc::unlocked_value, Rmark);
+      assert(mark_addr.disp() == 0, "cas must take a zero displacement");
+      casn(mark_addr.base(), Rmark, Rscratch);
+// prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
+      cmp(Rscratch, Rmark);
+      brx(Assembler::notZero, false, Assembler::pn, Recursive);
+      delayed()->st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
+      if (counters != NULL) {
+        cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
+      }
+      ba(done);
+      delayed()->st_ptr(Rbox, mark_addr);
+
+      bind(Recursive);
+      // Stack-lock attempt failed - check for recursive stack-lock.
+      // Tests show that we can remove the recursive case with no impact
+      // on refworkload 0.83.  If we need to reduce the size of the code
+      // emitted by compiler_lock_object() the recursive case is perfect
+      // candidate.
+      //
+      // A more extreme idea is to always inflate on stack-lock recursion.
+      // This lets us eliminate the recursive checks in compiler_lock_object
+      // and compiler_unlock_object and the (box->dhw == 0) encoding.
+      // A brief experiment - requiring changes to synchronizer.cpp, interpreter,
+      // and showed a performance *increase*.  In the same experiment I eliminated
+      // the fast-path stack-lock code from the interpreter and always passed
+      // control to the "slow" operators in synchronizer.cpp.
+
+      // RScratch contains the fetched obj->mark value from the failed CASN.
+#ifdef _LP64
+      sub(Rscratch, STACK_BIAS, Rscratch);
+#endif
+      sub(Rscratch, SP, Rscratch);
+      assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
+      andcc(Rscratch, 0xfffff003, Rscratch);
+      if (counters != NULL) {
+        // Accounting needs the Rscratch register
+        st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
+        cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
+        ba_short(done);
+      } else {
+        ba(done);
+        delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
+      }
+
+      bind   (IsInflated);
+      if (EmitSync & 64) {
+         // If m->owner != null goto IsLocked
+         // Test-and-CAS vs CAS
+         // Pessimistic form avoids futile (doomed) CAS attempts
+         // The optimistic form avoids RTS->RTO cache line upgrades.
+         ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
+         andcc(Rscratch, Rscratch, G0);
+         brx(Assembler::notZero, false, Assembler::pn, done);
+         delayed()->nop();
+         // m->owner == null : it's unlocked.
+      }
+
+      // Try to CAS m->owner from null to Self
+      // Invariant: if we acquire the lock then _recursions should be 0.
+      add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark);
+      mov(G2_thread, Rscratch);
+      casn(Rmark, G0, Rscratch);
+      cmp(Rscratch, G0);
+      // ST box->displaced_header = NonZero.
+      // Any non-zero value suffices:
+      //    unused_mark(), G2_thread, RBox, RScratch, rsp, etc.
+      st_ptr(Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes());
+      // Intentional fall-through into done
+   }
+
+   bind   (done);
+}
+
+void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark,
+                                            Register Rbox, Register Rscratch,
+                                            bool try_bias) {
+   Address mark_addr(Roop, oopDesc::mark_offset_in_bytes());
+
+   Label done ;
+
+   if (EmitSync & 4) {
+     cmp(SP, G0);
+     return ;
+   }
+
+   if (EmitSync & 8) {
+     if (try_bias) {
+        biased_locking_exit(mark_addr, Rscratch, done);
+     }
+
+     // Test first if it is a fast recursive unlock
+     ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark);
+     br_null_short(Rmark, Assembler::pt, done);
+
+     // Check if it is still a light weight lock, this is is true if we see
+     // the stack address of the basicLock in the markOop of the object
+     assert(mark_addr.disp() == 0, "cas must take a zero displacement");
+     casx_under_lock(mark_addr.base(), Rbox, Rmark,
+       (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
+     ba(done);
+     delayed()->cmp(Rbox, Rmark);
+     bind(done);
+     return ;
+   }
+
+   // Beware ... If the aggregate size of the code emitted by CLO and CUO is
+   // is too large performance rolls abruptly off a cliff.
+   // This could be related to inlining policies, code cache management, or
+   // I$ effects.
+   Label LStacked ;
+
+   if (try_bias) {
+      // TODO: eliminate redundant LDs of obj->mark
+      biased_locking_exit(mark_addr, Rscratch, done);
+   }
+
+   ld_ptr(Roop, oopDesc::mark_offset_in_bytes(), Rmark);
+   ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rscratch);
+   andcc(Rscratch, Rscratch, G0);
+   brx(Assembler::zero, false, Assembler::pn, done);
+   delayed()->nop();      // consider: relocate fetch of mark, above, into this DS
+   andcc(Rmark, 2, G0);
+   brx(Assembler::zero, false, Assembler::pt, LStacked);
+   delayed()->nop();
+
+   // It's inflated
+   // Conceptually we need a #loadstore|#storestore "release" MEMBAR before
+   // the ST of 0 into _owner which releases the lock.  This prevents loads
+   // and stores within the critical section from reordering (floating)
+   // past the store that releases the lock.  But TSO is a strong memory model
+   // and that particular flavor of barrier is a noop, so we can safely elide it.
+   // Note that we use 1-0 locking by default for the inflated case.  We
+   // close the resultant (and rare) race by having contented threads in
+   // monitorenter periodically poll _owner.
+   ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
+   ld_ptr(Rmark, ObjectMonitor::recursions_offset_in_bytes() - 2, Rbox);
+   xor3(Rscratch, G2_thread, Rscratch);
+   orcc(Rbox, Rscratch, Rbox);
+   brx(Assembler::notZero, false, Assembler::pn, done);
+   delayed()->
+   ld_ptr(Rmark, ObjectMonitor::EntryList_offset_in_bytes() - 2, Rscratch);
+   ld_ptr(Rmark, ObjectMonitor::cxq_offset_in_bytes() - 2, Rbox);
+   orcc(Rbox, Rscratch, G0);
+   if (EmitSync & 65536) {
+      Label LSucc ;
+      brx(Assembler::notZero, false, Assembler::pn, LSucc);
+      delayed()->nop();
+      ba(done);
+      delayed()->st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
+
+      bind(LSucc);
+      st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
+      if (os::is_MP()) { membar (StoreLoad); }
+      ld_ptr(Rmark, ObjectMonitor::succ_offset_in_bytes() - 2, Rscratch);
+      andcc(Rscratch, Rscratch, G0);
+      brx(Assembler::notZero, false, Assembler::pt, done);
+      delayed()->andcc(G0, G0, G0);
+      add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark);
+      mov(G2_thread, Rscratch);
+      casn(Rmark, G0, Rscratch);
+      // invert icc.zf and goto done
+      br_notnull(Rscratch, false, Assembler::pt, done);
+      delayed()->cmp(G0, G0);
+      ba(done);
+      delayed()->cmp(G0, 1);
+   } else {
+      brx(Assembler::notZero, false, Assembler::pn, done);
+      delayed()->nop();
+      ba(done);
+      delayed()->st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
+   }
+
+   bind   (LStacked);
+   // Consider: we could replace the expensive CAS in the exit
+   // path with a simple ST of the displaced mark value fetched from
+   // the on-stack basiclock box.  That admits a race where a thread T2
+   // in the slow lock path -- inflating with monitor M -- could race a
+   // thread T1 in the fast unlock path, resulting in a missed wakeup for T2.
+   // More precisely T1 in the stack-lock unlock path could "stomp" the
+   // inflated mark value M installed by T2, resulting in an orphan
+   // object monitor M and T2 becoming stranded.  We can remedy that situation
+   // by having T2 periodically poll the object's mark word using timed wait
+   // operations.  If T2 discovers that a stomp has occurred it vacates
+   // the monitor M and wakes any other threads stranded on the now-orphan M.
+   // In addition the monitor scavenger, which performs deflation,
+   // would also need to check for orpan monitors and stranded threads.
+   //
+   // Finally, inflation is also used when T2 needs to assign a hashCode
+   // to O and O is stack-locked by T1.  The "stomp" race could cause
+   // an assigned hashCode value to be lost.  We can avoid that condition
+   // and provide the necessary hashCode stability invariants by ensuring
+   // that hashCode generation is idempotent between copying GCs.
+   // For example we could compute the hashCode of an object O as
+   // O's heap address XOR some high quality RNG value that is refreshed
+   // at GC-time.  The monitor scavenger would install the hashCode
+   // found in any orphan monitors.  Again, the mechanism admits a
+   // lost-update "stomp" WAW race but detects and recovers as needed.
+   //
+   // A prototype implementation showed excellent results, although
+   // the scavenger and timeout code was rather involved.
+
+   casn(mark_addr.base(), Rbox, Rscratch);
+   cmp(Rbox, Rscratch);
+   // Intentional fall through into done ...
+
+   bind(done);
+}
+
+
+
+void MacroAssembler::print_CPU_state() {
+  // %%%%% need to implement this
+}
+
+void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
+  // %%%%% need to implement this
+}
+
+void MacroAssembler::push_IU_state() {
+  // %%%%% need to implement this
+}
+
+
+void MacroAssembler::pop_IU_state() {
+  // %%%%% need to implement this
+}
+
+
+void MacroAssembler::push_FPU_state() {
+  // %%%%% need to implement this
+}
+
+
+void MacroAssembler::pop_FPU_state() {
+  // %%%%% need to implement this
+}
+
+
+void MacroAssembler::push_CPU_state() {
+  // %%%%% need to implement this
+}
+
+
+void MacroAssembler::pop_CPU_state() {
+  // %%%%% need to implement this
+}
+
+
+
+void MacroAssembler::verify_tlab() {
+#ifdef ASSERT
+  if (UseTLAB && VerifyOops) {
+    Label next, next2, ok;
+    Register t1 = L0;
+    Register t2 = L1;
+    Register t3 = L2;
+
+    save_frame(0);
+    ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1);
+    ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t2);
+    or3(t1, t2, t3);
+    cmp_and_br_short(t1, t2, Assembler::greaterEqual, Assembler::pn, next);
+    STOP("assert(top >= start)");
+    should_not_reach_here();
+
+    bind(next);
+    ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1);
+    ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t2);
+    or3(t3, t2, t3);
+    cmp_and_br_short(t1, t2, Assembler::lessEqual, Assembler::pn, next2);
+    STOP("assert(top <= end)");
+    should_not_reach_here();
+
+    bind(next2);
+    and3(t3, MinObjAlignmentInBytesMask, t3);
+    cmp_and_br_short(t3, 0, Assembler::lessEqual, Assembler::pn, ok);
+    STOP("assert(aligned)");
+    should_not_reach_here();
+
+    bind(ok);
+    restore();
+  }
+#endif
+}
+
+
+void MacroAssembler::eden_allocate(
+  Register obj,                        // result: pointer to object after successful allocation
+  Register var_size_in_bytes,          // object size in bytes if unknown at compile time; invalid otherwise
+  int      con_size_in_bytes,          // object size in bytes if   known at compile time
+  Register t1,                         // temp register
+  Register t2,                         // temp register
+  Label&   slow_case                   // continuation point if fast allocation fails
+){
+  // make sure arguments make sense
+  assert_different_registers(obj, var_size_in_bytes, t1, t2);
+  assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size");
+  assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment");
+
+  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
+    // No allocation in the shared eden.
+    ba_short(slow_case);
+  } else {
+    // get eden boundaries
+    // note: we need both top & top_addr!
+    const Register top_addr = t1;
+    const Register end      = t2;
+
+    CollectedHeap* ch = Universe::heap();
+    set((intx)ch->top_addr(), top_addr);
+    intx delta = (intx)ch->end_addr() - (intx)ch->top_addr();
+    ld_ptr(top_addr, delta, end);
+    ld_ptr(top_addr, 0, obj);
+
+    // try to allocate
+    Label retry;
+    bind(retry);
+#ifdef ASSERT
+    // make sure eden top is properly aligned
+    {
+      Label L;
+      btst(MinObjAlignmentInBytesMask, obj);
+      br(Assembler::zero, false, Assembler::pt, L);
+      delayed()->nop();
+      STOP("eden top is not properly aligned");
+      bind(L);
+    }
+#endif // ASSERT
+    const Register free = end;
+    sub(end, obj, free);                                   // compute amount of free space
+    if (var_size_in_bytes->is_valid()) {
+      // size is unknown at compile time
+      cmp(free, var_size_in_bytes);
+      br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
+      delayed()->add(obj, var_size_in_bytes, end);
+    } else {
+      // size is known at compile time
+      cmp(free, con_size_in_bytes);
+      br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
+      delayed()->add(obj, con_size_in_bytes, end);
+    }
+    // Compare obj with the value at top_addr; if still equal, swap the value of
+    // end with the value at top_addr. If not equal, read the value at top_addr
+    // into end.
+    casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
+    // if someone beat us on the allocation, try again, otherwise continue
+    cmp(obj, end);
+    brx(Assembler::notEqual, false, Assembler::pn, retry);
+    delayed()->mov(end, obj);                              // nop if successfull since obj == end
+
+#ifdef ASSERT
+    // make sure eden top is properly aligned
+    {
+      Label L;
+      const Register top_addr = t1;
+
+      set((intx)ch->top_addr(), top_addr);
+      ld_ptr(top_addr, 0, top_addr);
+      btst(MinObjAlignmentInBytesMask, top_addr);
+      br(Assembler::zero, false, Assembler::pt, L);
+      delayed()->nop();
+      STOP("eden top is not properly aligned");
+      bind(L);
+    }
+#endif // ASSERT
+  }
+}
+
+
+void MacroAssembler::tlab_allocate(
+  Register obj,                        // result: pointer to object after successful allocation
+  Register var_size_in_bytes,          // object size in bytes if unknown at compile time; invalid otherwise
+  int      con_size_in_bytes,          // object size in bytes if   known at compile time
+  Register t1,                         // temp register
+  Label&   slow_case                   // continuation point if fast allocation fails
+){
+  // make sure arguments make sense
+  assert_different_registers(obj, var_size_in_bytes, t1);
+  assert(0 <= con_size_in_bytes && is_simm13(con_size_in_bytes), "illegal object size");
+  assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment");
+
+  const Register free  = t1;
+
+  verify_tlab();
+
+  ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), obj);
+
+  // calculate amount of free space
+  ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), free);
+  sub(free, obj, free);
+
+  Label done;
+  if (var_size_in_bytes == noreg) {
+    cmp(free, con_size_in_bytes);
+  } else {
+    cmp(free, var_size_in_bytes);
+  }
+  br(Assembler::less, false, Assembler::pn, slow_case);
+  // calculate the new top pointer
+  if (var_size_in_bytes == noreg) {
+    delayed()->add(obj, con_size_in_bytes, free);
+  } else {
+    delayed()->add(obj, var_size_in_bytes, free);
+  }
+
+  bind(done);
+
+#ifdef ASSERT
+  // make sure new free pointer is properly aligned
+  {
+    Label L;
+    btst(MinObjAlignmentInBytesMask, free);
+    br(Assembler::zero, false, Assembler::pt, L);
+    delayed()->nop();
+    STOP("updated TLAB free is not properly aligned");
+    bind(L);
+  }
+#endif // ASSERT
+
+  // update the tlab top pointer
+  st_ptr(free, G2_thread, in_bytes(JavaThread::tlab_top_offset()));
+  verify_tlab();
+}
+
+
+void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) {
+  Register top = O0;
+  Register t1 = G1;
+  Register t2 = G3;
+  Register t3 = O1;
+  assert_different_registers(top, t1, t2, t3, G4, G5 /* preserve G4 and G5 */);
+  Label do_refill, discard_tlab;
+
+  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
+    // No allocation in the shared eden.
+    ba_short(slow_case);
+  }
+
+  ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), top);
+  ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t1);
+  ld_ptr(G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()), t2);
+
+  // calculate amount of free space
+  sub(t1, top, t1);
+  srl_ptr(t1, LogHeapWordSize, t1);
+
+  // Retain tlab and allocate object in shared space if
+  // the amount free in the tlab is too large to discard.
+  cmp(t1, t2);
+  brx(Assembler::lessEqual, false, Assembler::pt, discard_tlab);
+
+  // increment waste limit to prevent getting stuck on this slow path
+  delayed()->add(t2, ThreadLocalAllocBuffer::refill_waste_limit_increment(), t2);
+  st_ptr(t2, G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
+  if (TLABStats) {
+    // increment number of slow_allocations
+    ld(G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset()), t2);
+    add(t2, 1, t2);
+    stw(t2, G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset()));
+  }
+  ba_short(try_eden);
+
+  bind(discard_tlab);
+  if (TLABStats) {
+    // increment number of refills
+    ld(G2_thread, in_bytes(JavaThread::tlab_number_of_refills_offset()), t2);
+    add(t2, 1, t2);
+    stw(t2, G2_thread, in_bytes(JavaThread::tlab_number_of_refills_offset()));
+    // accumulate wastage
+    ld(G2_thread, in_bytes(JavaThread::tlab_fast_refill_waste_offset()), t2);
+    add(t2, t1, t2);
+    stw(t2, G2_thread, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
+  }
+
+  // if tlab is currently allocated (top or end != null) then
+  // fill [top, end + alignment_reserve) with array object
+  br_null_short(top, Assembler::pn, do_refill);
+
+  set((intptr_t)markOopDesc::prototype()->copy_set_hash(0x2), t2);
+  st_ptr(t2, top, oopDesc::mark_offset_in_bytes()); // set up the mark word
+  // set klass to intArrayKlass
+  sub(t1, typeArrayOopDesc::header_size(T_INT), t1);
+  add(t1, ThreadLocalAllocBuffer::alignment_reserve(), t1);
+  sll_ptr(t1, log2_intptr(HeapWordSize/sizeof(jint)), t1);
+  st(t1, top, arrayOopDesc::length_offset_in_bytes());
+  set((intptr_t)Universe::intArrayKlassObj_addr(), t2);
+  ld_ptr(t2, 0, t2);
+  // store klass last.  concurrent gcs assumes klass length is valid if
+  // klass field is not null.
+  store_klass(t2, top);
+  verify_oop(top);
+
+  ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t1);
+  sub(top, t1, t1); // size of tlab's allocated portion
+  incr_allocated_bytes(t1, t2, t3);
+
+  // refill the tlab with an eden allocation
+  bind(do_refill);
+  ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t1);
+  sll_ptr(t1, LogHeapWordSize, t1);
+  // allocate new tlab, address returned in top
+  eden_allocate(top, t1, 0, t2, t3, slow_case);
+
+  st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_start_offset()));
+  st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_top_offset()));
+#ifdef ASSERT
+  // check that tlab_size (t1) is still valid
+  {
+    Label ok;
+    ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t2);
+    sll_ptr(t2, LogHeapWordSize, t2);
+    cmp_and_br_short(t1, t2, Assembler::equal, Assembler::pt, ok);
+    STOP("assert(t1 == tlab_size)");
+    should_not_reach_here();
+
+    bind(ok);
+  }
+#endif // ASSERT
+  add(top, t1, top); // t1 is tlab_size
+  sub(top, ThreadLocalAllocBuffer::alignment_reserve_in_bytes(), top);
+  st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_end_offset()));
+  verify_tlab();
+  ba_short(retry);
+}
+
+void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes,
+                                          Register t1, Register t2) {
+  // Bump total bytes allocated by this thread
+  assert(t1->is_global(), "must be global reg"); // so all 64 bits are saved on a context switch
+  assert_different_registers(size_in_bytes.register_or_noreg(), t1, t2);
+  // v8 support has gone the way of the dodo
+  ldx(G2_thread, in_bytes(JavaThread::allocated_bytes_offset()), t1);
+  add(t1, ensure_simm13_or_reg(size_in_bytes, t2), t1);
+  stx(t1, G2_thread, in_bytes(JavaThread::allocated_bytes_offset()));
+}
+
+Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
+  switch (cond) {
+    // Note some conditions are synonyms for others
+    case Assembler::never:                return Assembler::always;
+    case Assembler::zero:                 return Assembler::notZero;
+    case Assembler::lessEqual:            return Assembler::greater;
+    case Assembler::less:                 return Assembler::greaterEqual;
+    case Assembler::lessEqualUnsigned:    return Assembler::greaterUnsigned;
+    case Assembler::lessUnsigned:         return Assembler::greaterEqualUnsigned;
+    case Assembler::negative:             return Assembler::positive;
+    case Assembler::overflowSet:          return Assembler::overflowClear;
+    case Assembler::always:               return Assembler::never;
+    case Assembler::notZero:              return Assembler::zero;
+    case Assembler::greater:              return Assembler::lessEqual;
+    case Assembler::greaterEqual:         return Assembler::less;
+    case Assembler::greaterUnsigned:      return Assembler::lessEqualUnsigned;
+    case Assembler::greaterEqualUnsigned: return Assembler::lessUnsigned;
+    case Assembler::positive:             return Assembler::negative;
+    case Assembler::overflowClear:        return Assembler::overflowSet;
+  }
+
+  ShouldNotReachHere(); return Assembler::overflowClear;
+}
+
+void MacroAssembler::cond_inc(Assembler::Condition cond, address counter_ptr,
+                              Register Rtmp1, Register Rtmp2 /*, Register Rtmp3, Register Rtmp4 */) {
+  Condition negated_cond = negate_condition(cond);
+  Label L;
+  brx(negated_cond, false, Assembler::pt, L);
+  delayed()->nop();
+  inc_counter(counter_ptr, Rtmp1, Rtmp2);
+  bind(L);
+}
+
+void MacroAssembler::inc_counter(address counter_addr, Register Rtmp1, Register Rtmp2) {
+  AddressLiteral addrlit(counter_addr);
+  sethi(addrlit, Rtmp1);                 // Move hi22 bits into temporary register.
+  Address addr(Rtmp1, addrlit.low10());  // Build an address with low10 bits.
+  ld(addr, Rtmp2);
+  inc(Rtmp2);
+  st(Rtmp2, addr);
+}
+
+void MacroAssembler::inc_counter(int* counter_addr, Register Rtmp1, Register Rtmp2) {
+  inc_counter((address) counter_addr, Rtmp1, Rtmp2);
+}
+
+SkipIfEqual::SkipIfEqual(
+    MacroAssembler* masm, Register temp, const bool* flag_addr,
+    Assembler::Condition condition) {
+  _masm = masm;
+  AddressLiteral flag(flag_addr);
+  _masm->sethi(flag, temp);
+  _masm->ldub(temp, flag.low10(), temp);
+  _masm->tst(temp);
+  _masm->br(condition, false, Assembler::pt, _label);
+  _masm->delayed()->nop();
+}
+
+SkipIfEqual::~SkipIfEqual() {
+  _masm->bind(_label);
+}
+
+
+// Writes to stack successive pages until offset reached to check for
+// stack overflow + shadow pages.  This clobbers tsp and scratch.
+void MacroAssembler::bang_stack_size(Register Rsize, Register Rtsp,
+                                     Register Rscratch) {
+  // Use stack pointer in temp stack pointer
+  mov(SP, Rtsp);
+
+  // Bang stack for total size given plus stack shadow page size.
+  // Bang one page at a time because a large size can overflow yellow and
+  // red zones (the bang will fail but stack overflow handling can't tell that
+  // it was a stack overflow bang vs a regular segv).
+  int offset = os::vm_page_size();
+  Register Roffset = Rscratch;
+
+  Label loop;
+  bind(loop);
+  set((-offset)+STACK_BIAS, Rscratch);
+  st(G0, Rtsp, Rscratch);
+  set(offset, Roffset);
+  sub(Rsize, Roffset, Rsize);
+  cmp(Rsize, G0);
+  br(Assembler::greater, false, Assembler::pn, loop);
+  delayed()->sub(Rtsp, Roffset, Rtsp);
+
+  // Bang down shadow pages too.
+  // The -1 because we already subtracted 1 page.
+  for (int i = 0; i< StackShadowPages-1; i++) {
+    set((-i*offset)+STACK_BIAS, Rscratch);
+    st(G0, Rtsp, Rscratch);
+  }
+}
+
+///////////////////////////////////////////////////////////////////////////////////
+#ifndef SERIALGC
+
+static address satb_log_enqueue_with_frame = NULL;
+static u_char* satb_log_enqueue_with_frame_end = NULL;
+
+static address satb_log_enqueue_frameless = NULL;
+static u_char* satb_log_enqueue_frameless_end = NULL;
+
+static int EnqueueCodeSize = 128 DEBUG_ONLY( + 256); // Instructions?
+
+static void generate_satb_log_enqueue(bool with_frame) {
+  BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize);
+  CodeBuffer buf(bb);
+  MacroAssembler masm(&buf);
+
+#define __ masm.
+
+  address start = __ pc();
+  Register pre_val;
+
+  Label refill, restart;
+  if (with_frame) {
+    __ save_frame(0);
+    pre_val = I0;  // Was O0 before the save.
+  } else {
+    pre_val = O0;
+  }
+
+  int satb_q_index_byte_offset =
+    in_bytes(JavaThread::satb_mark_queue_offset() +
+             PtrQueue::byte_offset_of_index());
+
+  int satb_q_buf_byte_offset =
+    in_bytes(JavaThread::satb_mark_queue_offset() +
+             PtrQueue::byte_offset_of_buf());
+
+  assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) &&
+         in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t),
+         "check sizes in assembly below");
+
+  __ bind(restart);
+
+  // Load the index into the SATB buffer. PtrQueue::_index is a size_t
+  // so ld_ptr is appropriate.
+  __ ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
+
+  // index == 0?
+  __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill);
+
+  __ ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
+  __ sub(L0, oopSize, L0);
+
+  __ st_ptr(pre_val, L1, L0);  // [_buf + index] := I0
+  if (!with_frame) {
+    // Use return-from-leaf
+    __ retl();
+    __ delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset);
+  } else {
+    // Not delayed.
+    __ st_ptr(L0, G2_thread, satb_q_index_byte_offset);
+  }
+  if (with_frame) {
+    __ ret();
+    __ delayed()->restore();
+  }
+  __ bind(refill);
+
+  address handle_zero =
+    CAST_FROM_FN_PTR(address,
+                     &SATBMarkQueueSet::handle_zero_index_for_thread);
+  // This should be rare enough that we can afford to save all the
+  // scratch registers that the calling context might be using.
+  __ mov(G1_scratch, L0);
+  __ mov(G3_scratch, L1);
+  __ mov(G4, L2);
+  // We need the value of O0 above (for the write into the buffer), so we
+  // save and restore it.
+  __ mov(O0, L3);
+  // Since the call will overwrite O7, we save and restore that, as well.
+  __ mov(O7, L4);
+  __ call_VM_leaf(L5, handle_zero, G2_thread);
+  __ mov(L0, G1_scratch);
+  __ mov(L1, G3_scratch);
+  __ mov(L2, G4);
+  __ mov(L3, O0);
+  __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
+  __ delayed()->mov(L4, O7);
+
+  if (with_frame) {
+    satb_log_enqueue_with_frame = start;
+    satb_log_enqueue_with_frame_end = __ pc();
+  } else {
+    satb_log_enqueue_frameless = start;
+    satb_log_enqueue_frameless_end = __ pc();
+  }
+
+#undef __
+}
+
+static inline void generate_satb_log_enqueue_if_necessary(bool with_frame) {
+  if (with_frame) {
+    if (satb_log_enqueue_with_frame == 0) {
+      generate_satb_log_enqueue(with_frame);
+      assert(satb_log_enqueue_with_frame != 0, "postcondition.");
+      if (G1SATBPrintStubs) {
+        tty->print_cr("Generated with-frame satb enqueue:");
+        Disassembler::decode((u_char*)satb_log_enqueue_with_frame,
+                             satb_log_enqueue_with_frame_end,
+                             tty);
+      }
+    }
+  } else {
+    if (satb_log_enqueue_frameless == 0) {
+      generate_satb_log_enqueue(with_frame);
+      assert(satb_log_enqueue_frameless != 0, "postcondition.");
+      if (G1SATBPrintStubs) {
+        tty->print_cr("Generated frameless satb enqueue:");
+        Disassembler::decode((u_char*)satb_log_enqueue_frameless,
+                             satb_log_enqueue_frameless_end,
+                             tty);
+      }
+    }
+  }
+}
+
+void MacroAssembler::g1_write_barrier_pre(Register obj,
+                                          Register index,
+                                          int offset,
+                                          Register pre_val,
+                                          Register tmp,
+                                          bool preserve_o_regs) {
+  Label filtered;
+
+  if (obj == noreg) {
+    // We are not loading the previous value so make
+    // sure that we don't trash the value in pre_val
+    // with the code below.
+    assert_different_registers(pre_val, tmp);
+  } else {
+    // We will be loading the previous value
+    // in this code so...
+    assert(offset == 0 || index == noreg, "choose one");
+    assert(pre_val == noreg, "check this code");
+  }
+
+  // Is marking active?
+  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
+    ld(G2,
+       in_bytes(JavaThread::satb_mark_queue_offset() +
+                PtrQueue::byte_offset_of_active()),
+       tmp);
+  } else {
+    guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1,
+              "Assumption");
+    ldsb(G2,
+         in_bytes(JavaThread::satb_mark_queue_offset() +
+                  PtrQueue::byte_offset_of_active()),
+         tmp);
+  }
+
+  // Is marking active?
+  cmp_and_br_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
+
+  // Do we need to load the previous value?
+  if (obj != noreg) {
+    // Load the previous value...
+    if (index == noreg) {
+      if (Assembler::is_simm13(offset)) {
+        load_heap_oop(obj, offset, tmp);
+      } else {
+        set(offset, tmp);
+        load_heap_oop(obj, tmp, tmp);
+      }
+    } else {
+      load_heap_oop(obj, index, tmp);
+    }
+    // Previous value has been loaded into tmp
+    pre_val = tmp;
+  }
+
+  assert(pre_val != noreg, "must have a real register");
+
+  // Is the previous value null?
+  cmp_and_brx_short(pre_val, G0, Assembler::equal, Assembler::pt, filtered);
+
+  // OK, it's not filtered, so we'll need to call enqueue.  In the normal
+  // case, pre_val will be a scratch G-reg, but there are some cases in
+  // which it's an O-reg.  In the first case, do a normal call.  In the
+  // latter, do a save here and call the frameless version.
+
+  guarantee(pre_val->is_global() || pre_val->is_out(),
+            "Or we need to think harder.");
+
+  if (pre_val->is_global() && !preserve_o_regs) {
+    generate_satb_log_enqueue_if_necessary(true); // with frame
+
+    call(satb_log_enqueue_with_frame);
+    delayed()->mov(pre_val, O0);
+  } else {
+    generate_satb_log_enqueue_if_necessary(false); // frameless
+
+    save_frame(0);
+    call(satb_log_enqueue_frameless);
+    delayed()->mov(pre_val->after_save(), O0);
+    restore();
+  }
+
+  bind(filtered);
+}
+
+static address dirty_card_log_enqueue = 0;
+static u_char* dirty_card_log_enqueue_end = 0;
+
+// This gets to assume that o0 contains the object address.
+static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) {
+  BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2);
+  CodeBuffer buf(bb);
+  MacroAssembler masm(&buf);
+#define __ masm.
+  address start = __ pc();
+
+  Label not_already_dirty, restart, refill;
+
+#ifdef _LP64
+  __ srlx(O0, CardTableModRefBS::card_shift, O0);
+#else
+  __ srl(O0, CardTableModRefBS::card_shift, O0);
+#endif
+  AddressLiteral addrlit(byte_map_base);
+  __ set(addrlit, O1); // O1 := <card table base>
+  __ ldub(O0, O1, O2); // O2 := [O0 + O1]
+
+  assert(CardTableModRefBS::dirty_card_val() == 0, "otherwise check this code");
+  __ cmp_and_br_short(O2, G0, Assembler::notEqual, Assembler::pt, not_already_dirty);
+
+  // We didn't take the branch, so we're already dirty: return.
+  // Use return-from-leaf
+  __ retl();
+  __ delayed()->nop();
+
+  // Not dirty.
+  __ bind(not_already_dirty);
+
+  // Get O0 + O1 into a reg by itself
+  __ add(O0, O1, O3);
+
+  // First, dirty it.
+  __ stb(G0, O3, G0);  // [cardPtr] := 0  (i.e., dirty).
+
+  int dirty_card_q_index_byte_offset =
+    in_bytes(JavaThread::dirty_card_queue_offset() +
+             PtrQueue::byte_offset_of_index());
+  int dirty_card_q_buf_byte_offset =
+    in_bytes(JavaThread::dirty_card_queue_offset() +
+             PtrQueue::byte_offset_of_buf());
+  __ bind(restart);
+
+  // Load the index into the update buffer. PtrQueue::_index is
+  // a size_t so ld_ptr is appropriate here.
+  __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0);
+
+  // index == 0?
+  __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill);
+
+  __ ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1);
+  __ sub(L0, oopSize, L0);
+
+  __ st_ptr(O3, L1, L0);  // [_buf + index] := I0
+  // Use return-from-leaf
+  __ retl();
+  __ delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset);
+
+  __ bind(refill);
+  address handle_zero =
+    CAST_FROM_FN_PTR(address,
+                     &DirtyCardQueueSet::handle_zero_index_for_thread);
+  // This should be rare enough that we can afford to save all the
+  // scratch registers that the calling context might be using.
+  __ mov(G1_scratch, L3);
+  __ mov(G3_scratch, L5);
+  // We need the value of O3 above (for the write into the buffer), so we
+  // save and restore it.
+  __ mov(O3, L6);
+  // Since the call will overwrite O7, we save and restore that, as well.
+  __ mov(O7, L4);
+
+  __ call_VM_leaf(L7_thread_cache, handle_zero, G2_thread);
+  __ mov(L3, G1_scratch);
+  __ mov(L5, G3_scratch);
+  __ mov(L6, O3);
+  __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
+  __ delayed()->mov(L4, O7);
+
+  dirty_card_log_enqueue = start;
+  dirty_card_log_enqueue_end = __ pc();
+  // XXX Should have a guarantee here about not going off the end!
+  // Does it already do so?  Do an experiment...
+
+#undef __
+
+}
+
+static inline void
+generate_dirty_card_log_enqueue_if_necessary(jbyte* byte_map_base) {
+  if (dirty_card_log_enqueue == 0) {
+    generate_dirty_card_log_enqueue(byte_map_base);
+    assert(dirty_card_log_enqueue != 0, "postcondition.");
+    if (G1SATBPrintStubs) {
+      tty->print_cr("Generated dirty_card enqueue:");
+      Disassembler::decode((u_char*)dirty_card_log_enqueue,
+                           dirty_card_log_enqueue_end,
+                           tty);
+    }
+  }
+}
+
+
+void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
+
+  Label filtered;
+  MacroAssembler* post_filter_masm = this;
+
+  if (new_val == G0) return;
+
+  G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set();
+  assert(bs->kind() == BarrierSet::G1SATBCT ||
+         bs->kind() == BarrierSet::G1SATBCTLogging, "wrong barrier");
+
+  if (G1RSBarrierRegionFilter) {
+    xor3(store_addr, new_val, tmp);
+#ifdef _LP64
+    srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
+#else
+    srl(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
+#endif
+
+    // XXX Should I predict this taken or not?  Does it matter?
+    cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
+  }
+
+  // If the "store_addr" register is an "in" or "local" register, move it to
+  // a scratch reg so we can pass it as an argument.
+  bool use_scr = !(store_addr->is_global() || store_addr->is_out());
+  // Pick a scratch register different from "tmp".
+  Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch);
+  // Make sure we use up the delay slot!
+  if (use_scr) {
+    post_filter_masm->mov(store_addr, scr);
+  } else {
+    post_filter_masm->nop();
+  }
+  generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base);
+  save_frame(0);
+  call(dirty_card_log_enqueue);
+  if (use_scr) {
+    delayed()->mov(scr, O0);
+  } else {
+    delayed()->mov(store_addr->after_save(), O0);
+  }
+  restore();
+
+  bind(filtered);
+}
+
+#endif  // SERIALGC
+///////////////////////////////////////////////////////////////////////////////////
+
+void MacroAssembler::card_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
+  // If we're writing constant NULL, we can skip the write barrier.
+  if (new_val == G0) return;
+  CardTableModRefBS* bs = (CardTableModRefBS*) Universe::heap()->barrier_set();
+  assert(bs->kind() == BarrierSet::CardTableModRef ||
+         bs->kind() == BarrierSet::CardTableExtension, "wrong barrier");
+  card_table_write(bs->byte_map_base, tmp, store_addr);
+}
+
+void MacroAssembler::load_klass(Register src_oop, Register klass) {
+  // The number of bytes in this code is used by
+  // MachCallDynamicJavaNode::ret_addr_offset()
+  // if this changes, change that.
+  if (UseCompressedKlassPointers) {
+    lduw(src_oop, oopDesc::klass_offset_in_bytes(), klass);
+    decode_klass_not_null(klass);
+  } else {
+    ld_ptr(src_oop, oopDesc::klass_offset_in_bytes(), klass);
+  }
+}
+
+void MacroAssembler::store_klass(Register klass, Register dst_oop) {
+  if (UseCompressedKlassPointers) {
+    assert(dst_oop != klass, "not enough registers");
+    encode_klass_not_null(klass);
+    st(klass, dst_oop, oopDesc::klass_offset_in_bytes());
+  } else {
+    st_ptr(klass, dst_oop, oopDesc::klass_offset_in_bytes());
+  }
+}
+
+void MacroAssembler::store_klass_gap(Register s, Register d) {
+  if (UseCompressedKlassPointers) {
+    assert(s != d, "not enough registers");
+    st(s, d, oopDesc::klass_gap_offset_in_bytes());
+  }
+}
+
+void MacroAssembler::load_heap_oop(const Address& s, Register d) {
+  if (UseCompressedOops) {
+    lduw(s, d);
+    decode_heap_oop(d);
+  } else {
+    ld_ptr(s, d);
+  }
+}
+
+void MacroAssembler::load_heap_oop(Register s1, Register s2, Register d) {
+   if (UseCompressedOops) {
+    lduw(s1, s2, d);
+    decode_heap_oop(d, d);
+  } else {
+    ld_ptr(s1, s2, d);
+  }
+}
+
+void MacroAssembler::load_heap_oop(Register s1, int simm13a, Register d) {
+   if (UseCompressedOops) {
+    lduw(s1, simm13a, d);
+    decode_heap_oop(d, d);
+  } else {
+    ld_ptr(s1, simm13a, d);
+  }
+}
+
+void MacroAssembler::load_heap_oop(Register s1, RegisterOrConstant s2, Register d) {
+  if (s2.is_constant())  load_heap_oop(s1, s2.as_constant(), d);
+  else                   load_heap_oop(s1, s2.as_register(), d);
+}
+
+void MacroAssembler::store_heap_oop(Register d, Register s1, Register s2) {
+  if (UseCompressedOops) {
+    assert(s1 != d && s2 != d, "not enough registers");
+    encode_heap_oop(d);
+    st(d, s1, s2);
+  } else {
+    st_ptr(d, s1, s2);
+  }
+}
+
+void MacroAssembler::store_heap_oop(Register d, Register s1, int simm13a) {
+  if (UseCompressedOops) {
+    assert(s1 != d, "not enough registers");
+    encode_heap_oop(d);
+    st(d, s1, simm13a);
+  } else {
+    st_ptr(d, s1, simm13a);
+  }
+}
+
+void MacroAssembler::store_heap_oop(Register d, const Address& a, int offset) {
+  if (UseCompressedOops) {
+    assert(a.base() != d, "not enough registers");
+    encode_heap_oop(d);
+    st(d, a, offset);
+  } else {
+    st_ptr(d, a, offset);
+  }
+}
+
+
+void MacroAssembler::encode_heap_oop(Register src, Register dst) {
+  assert (UseCompressedOops, "must be compressed");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+  verify_oop(src);
+  if (Universe::narrow_oop_base() == NULL) {
+    srlx(src, LogMinObjAlignmentInBytes, dst);
+    return;
+  }
+  Label done;
+  if (src == dst) {
+    // optimize for frequent case src == dst
+    bpr(rc_nz, true, Assembler::pt, src, done);
+    delayed() -> sub(src, G6_heapbase, dst); // annuled if not taken
+    bind(done);
+    srlx(src, LogMinObjAlignmentInBytes, dst);
+  } else {
+    bpr(rc_z, false, Assembler::pn, src, done);
+    delayed() -> mov(G0, dst);
+    // could be moved before branch, and annulate delay,
+    // but may add some unneeded work decoding null
+    sub(src, G6_heapbase, dst);
+    srlx(dst, LogMinObjAlignmentInBytes, dst);
+    bind(done);
+  }
+}
+
+
+void MacroAssembler::encode_heap_oop_not_null(Register r) {
+  assert (UseCompressedOops, "must be compressed");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+  verify_oop(r);
+  if (Universe::narrow_oop_base() != NULL)
+    sub(r, G6_heapbase, r);
+  srlx(r, LogMinObjAlignmentInBytes, r);
+}
+
+void MacroAssembler::encode_heap_oop_not_null(Register src, Register dst) {
+  assert (UseCompressedOops, "must be compressed");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+  verify_oop(src);
+  if (Universe::narrow_oop_base() == NULL) {
+    srlx(src, LogMinObjAlignmentInBytes, dst);
+  } else {
+    sub(src, G6_heapbase, dst);
+    srlx(dst, LogMinObjAlignmentInBytes, dst);
+  }
+}
+
+// Same algorithm as oops.inline.hpp decode_heap_oop.
+void  MacroAssembler::decode_heap_oop(Register src, Register dst) {
+  assert (UseCompressedOops, "must be compressed");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+  sllx(src, LogMinObjAlignmentInBytes, dst);
+  if (Universe::narrow_oop_base() != NULL) {
+    Label done;
+    bpr(rc_nz, true, Assembler::pt, dst, done);
+    delayed() -> add(dst, G6_heapbase, dst); // annuled if not taken
+    bind(done);
+  }
+  verify_oop(dst);
+}
+
+void  MacroAssembler::decode_heap_oop_not_null(Register r) {
+  // Do not add assert code to this unless you change vtableStubs_sparc.cpp
+  // pd_code_size_limit.
+  // Also do not verify_oop as this is called by verify_oop.
+  assert (UseCompressedOops, "must be compressed");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+  sllx(r, LogMinObjAlignmentInBytes, r);
+  if (Universe::narrow_oop_base() != NULL)
+    add(r, G6_heapbase, r);
+}
+
+void  MacroAssembler::decode_heap_oop_not_null(Register src, Register dst) {
+  // Do not add assert code to this unless you change vtableStubs_sparc.cpp
+  // pd_code_size_limit.
+  // Also do not verify_oop as this is called by verify_oop.
+  assert (UseCompressedOops, "must be compressed");
+  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+  sllx(src, LogMinObjAlignmentInBytes, dst);
+  if (Universe::narrow_oop_base() != NULL)
+    add(dst, G6_heapbase, dst);
+}
+
+void MacroAssembler::encode_klass_not_null(Register r) {
+  assert(Metaspace::is_initialized(), "metaspace should be initialized");
+  assert (UseCompressedKlassPointers, "must be compressed");
+  assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+  if (Universe::narrow_klass_base() != NULL)
+    sub(r, G6_heapbase, r);
+  srlx(r, LogKlassAlignmentInBytes, r);
+}
+
+void MacroAssembler::encode_klass_not_null(Register src, Register dst) {
+  assert(Metaspace::is_initialized(), "metaspace should be initialized");
+  assert (UseCompressedKlassPointers, "must be compressed");
+  assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+  if (Universe::narrow_klass_base() == NULL) {
+    srlx(src, LogKlassAlignmentInBytes, dst);
+  } else {
+    sub(src, G6_heapbase, dst);
+    srlx(dst, LogKlassAlignmentInBytes, dst);
+  }
+}
+
+void  MacroAssembler::decode_klass_not_null(Register r) {
+  assert(Metaspace::is_initialized(), "metaspace should be initialized");
+  // Do not add assert code to this unless you change vtableStubs_sparc.cpp
+  // pd_code_size_limit.
+  assert (UseCompressedKlassPointers, "must be compressed");
+  assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+  sllx(r, LogKlassAlignmentInBytes, r);
+  if (Universe::narrow_klass_base() != NULL)
+    add(r, G6_heapbase, r);
+}
+
+void  MacroAssembler::decode_klass_not_null(Register src, Register dst) {
+  assert(Metaspace::is_initialized(), "metaspace should be initialized");
+  // Do not add assert code to this unless you change vtableStubs_sparc.cpp
+  // pd_code_size_limit.
+  assert (UseCompressedKlassPointers, "must be compressed");
+  assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+  sllx(src, LogKlassAlignmentInBytes, dst);
+  if (Universe::narrow_klass_base() != NULL)
+    add(dst, G6_heapbase, dst);
+}
+
+void MacroAssembler::reinit_heapbase() {
+  if (UseCompressedOops || UseCompressedKlassPointers) {
+    AddressLiteral base(Universe::narrow_ptrs_base_addr());
+    load_ptr_contents(base, G6_heapbase);
+  }
+}
+
+// Compare char[] arrays aligned to 4 bytes.
+void MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
+                                        Register limit, Register result,
+                                        Register chr1, Register chr2, Label& Ldone) {
+  Label Lvector, Lloop;
+  assert(chr1 == result, "should be the same");
+
+  // Note: limit contains number of bytes (2*char_elements) != 0.
+  andcc(limit, 0x2, chr1); // trailing character ?
+  br(Assembler::zero, false, Assembler::pt, Lvector);
+  delayed()->nop();
+
+  // compare the trailing char
+  sub(limit, sizeof(jchar), limit);
+  lduh(ary1, limit, chr1);
+  lduh(ary2, limit, chr2);
+  cmp(chr1, chr2);
+  br(Assembler::notEqual, true, Assembler::pt, Ldone);
+  delayed()->mov(G0, result);     // not equal
+
+  // only one char ?
+  cmp_zero_and_br(zero, limit, Ldone, true, Assembler::pn);
+  delayed()->add(G0, 1, result); // zero-length arrays are equal
+
+  // word by word compare, dont't need alignment check
+  bind(Lvector);
+  // Shift ary1 and ary2 to the end of the arrays, negate limit
+  add(ary1, limit, ary1);
+  add(ary2, limit, ary2);
+  neg(limit, limit);
+
+  lduw(ary1, limit, chr1);
+  bind(Lloop);
+  lduw(ary2, limit, chr2);
+  cmp(chr1, chr2);
+  br(Assembler::notEqual, true, Assembler::pt, Ldone);
+  delayed()->mov(G0, result);     // not equal
+  inccc(limit, 2*sizeof(jchar));
+  // annul LDUW if branch is not taken to prevent access past end of array
+  br(Assembler::notZero, true, Assembler::pt, Lloop);
+  delayed()->lduw(ary1, limit, chr1); // hoisted
+
+  // Caller should set it:
+  // add(G0, 1, result); // equals
+}
+
+// Use BIS for zeroing (count is in bytes).
+void MacroAssembler::bis_zeroing(Register to, Register count, Register temp, Label& Ldone) {
+  assert(UseBlockZeroing && VM_Version::has_block_zeroing(), "only works with BIS zeroing");
+  Register end = count;
+  int cache_line_size = VM_Version::prefetch_data_size();
+  // Minimum count when BIS zeroing can be used since
+  // it needs membar which is expensive.
+  int block_zero_size  = MAX2(cache_line_size*3, (int)BlockZeroingLowLimit);
+
+  Label small_loop;
+  // Check if count is negative (dead code) or zero.
+  // Note, count uses 64bit in 64 bit VM.
+  cmp_and_brx_short(count, 0, Assembler::lessEqual, Assembler::pn, Ldone);
+
+  // Use BIS zeroing only for big arrays since it requires membar.
+  if (Assembler::is_simm13(block_zero_size)) { // < 4096
+    cmp(count, block_zero_size);
+  } else {
+    set(block_zero_size, temp);
+    cmp(count, temp);
+  }
+  br(Assembler::lessUnsigned, false, Assembler::pt, small_loop);
+  delayed()->add(to, count, end);
+
+  // Note: size is >= three (32 bytes) cache lines.
+
+  // Clean the beginning of space up to next cache line.
+  for (int offs = 0; offs < cache_line_size; offs += 8) {
+    stx(G0, to, offs);
+  }
+
+  // align to next cache line
+  add(to, cache_line_size, to);
+  and3(to, -cache_line_size, to);
+
+  // Note: size left >= two (32 bytes) cache lines.
+
+  // BIS should not be used to zero tail (64 bytes)
+  // to avoid zeroing a header of the following object.
+  sub(end, (cache_line_size*2)-8, end);
+
+  Label bis_loop;
+  bind(bis_loop);
+  stxa(G0, to, G0, Assembler::ASI_ST_BLKINIT_PRIMARY);
+  add(to, cache_line_size, to);
+  cmp_and_brx_short(to, end, Assembler::lessUnsigned, Assembler::pt, bis_loop);
+
+  // BIS needs membar.
+  membar(Assembler::StoreLoad);
+
+  add(end, (cache_line_size*2)-8, end); // restore end
+  cmp_and_brx_short(to, end, Assembler::greaterEqualUnsigned, Assembler::pn, Ldone);
+
+  // Clean the tail.
+  bind(small_loop);
+  stx(G0, to, 0);
+  add(to, 8, to);
+  cmp_and_brx_short(to, end, Assembler::lessUnsigned, Assembler::pt, small_loop);
+  nop(); // Separate short branches
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/sparc/vm/macroAssembler_sparc.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,1500 @@
+/*
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_SPARC_VM_MACROASSEMBLER_SPARC_HPP
+#define CPU_SPARC_VM_MACROASSEMBLER_SPARC_HPP
+
+#include "asm/assembler.hpp"
+
+// <sys/trap.h> promises that the system will not use traps 16-31
+#define ST_RESERVED_FOR_USER_0 0x10
+
+class BiasedLockingCounters;
+
+
+// Register aliases for parts of the system:
+
+// 64 bit values can be kept in g1-g5, o1-o5 and o7 and all 64 bits are safe
+// across context switches in V8+ ABI.  Of course, there are no 64 bit regs
+// in V8 ABI. All 64 bits are preserved in V9 ABI for all registers.
+
+// g2-g4 are scratch registers called "application globals".  Their
+// meaning is reserved to the "compilation system"--which means us!
+// They are are not supposed to be touched by ordinary C code, although
+// highly-optimized C code might steal them for temps.  They are safe
+// across thread switches, and the ABI requires that they be safe
+// across function calls.
+//
+// g1 and g3 are touched by more modules.  V8 allows g1 to be clobbered
+// across func calls, and V8+ also allows g5 to be clobbered across
+// func calls.  Also, g1 and g5 can get touched while doing shared
+// library loading.
+//
+// We must not touch g7 (it is the thread-self register) and g6 is
+// reserved for certain tools.  g0, of course, is always zero.
+//
+// (Sources:  SunSoft Compilers Group, thread library engineers.)
+
+// %%%% The interpreter should be revisited to reduce global scratch regs.
+
+// This global always holds the current JavaThread pointer:
+
+REGISTER_DECLARATION(Register, G2_thread , G2);
+REGISTER_DECLARATION(Register, G6_heapbase , G6);
+
+// The following globals are part of the Java calling convention:
+
+REGISTER_DECLARATION(Register, G5_method             , G5);
+REGISTER_DECLARATION(Register, G5_megamorphic_method , G5_method);
+REGISTER_DECLARATION(Register, G5_inline_cache_reg   , G5_method);
+
+// The following globals are used for the new C1 & interpreter calling convention:
+REGISTER_DECLARATION(Register, Gargs        , G4); // pointing to the last argument
+
+// This local is used to preserve G2_thread in the interpreter and in stubs:
+REGISTER_DECLARATION(Register, L7_thread_cache , L7);
+
+// These globals are used as scratch registers in the interpreter:
+
+REGISTER_DECLARATION(Register, Gframe_size   , G1); // SAME REG as G1_scratch
+REGISTER_DECLARATION(Register, G1_scratch    , G1); // also SAME
+REGISTER_DECLARATION(Register, G3_scratch    , G3);
+REGISTER_DECLARATION(Register, G4_scratch    , G4);
+
+// These globals are used as short-lived scratch registers in the compiler:
+
+REGISTER_DECLARATION(Register, Gtemp  , G5);
+
+// JSR 292 fixed register usages:
+REGISTER_DECLARATION(Register, G5_method_type        , G5);
+REGISTER_DECLARATION(Register, G3_method_handle      , G3);
+REGISTER_DECLARATION(Register, L7_mh_SP_save         , L7);
+
+// The compiler requires that G5_megamorphic_method is G5_inline_cache_klass,
+// because a single patchable "set" instruction (NativeMovConstReg,
+// or NativeMovConstPatching for compiler1) instruction
+// serves to set up either quantity, depending on whether the compiled
+// call site is an inline cache or is megamorphic.  See the function
+// CompiledIC::set_to_megamorphic.
+//
+// If a inline cache targets an interpreted method, then the
+// G5 register will be used twice during the call.  First,
+// the call site will be patched to load a compiledICHolder
+// into G5. (This is an ordered pair of ic_klass, method.)
+// The c2i adapter will first check the ic_klass, then load
+// G5_method with the method part of the pair just before
+// jumping into the interpreter.
+//
+// Note that G5_method is only the method-self for the interpreter,
+// and is logically unrelated to G5_megamorphic_method.
+//
+// Invariants on G2_thread (the JavaThread pointer):
+//  - it should not be used for any other purpose anywhere
+//  - it must be re-initialized by StubRoutines::call_stub()
+//  - it must be preserved around every use of call_VM
+
+// We can consider using g2/g3/g4 to cache more values than the
+// JavaThread, such as the card-marking base or perhaps pointers into
+// Eden.  It's something of a waste to use them as scratch temporaries,
+// since they are not supposed to be volatile.  (Of course, if we find
+// that Java doesn't benefit from application globals, then we can just
+// use them as ordinary temporaries.)
+//
+// Since g1 and g5 (and/or g6) are the volatile (caller-save) registers,
+// it makes sense to use them routinely for procedure linkage,
+// whenever the On registers are not applicable.  Examples:  G5_method,
+// G5_inline_cache_klass, and a double handful of miscellaneous compiler
+// stubs.  This means that compiler stubs, etc., should be kept to a
+// maximum of two or three G-register arguments.
+
+
+// stub frames
+
+REGISTER_DECLARATION(Register, Lentry_args      , L0); // pointer to args passed to callee (interpreter) not stub itself
+
+// Interpreter frames
+
+#ifdef CC_INTERP
+REGISTER_DECLARATION(Register, Lstate           , L0); // interpreter state object pointer
+REGISTER_DECLARATION(Register, L1_scratch       , L1); // scratch
+REGISTER_DECLARATION(Register, Lmirror          , L1); // mirror (for native methods only)
+REGISTER_DECLARATION(Register, L2_scratch       , L2);
+REGISTER_DECLARATION(Register, L3_scratch       , L3);
+REGISTER_DECLARATION(Register, L4_scratch       , L4);
+REGISTER_DECLARATION(Register, Lscratch         , L5); // C1 uses
+REGISTER_DECLARATION(Register, Lscratch2        , L6); // C1 uses
+REGISTER_DECLARATION(Register, L7_scratch       , L7); // constant pool cache
+REGISTER_DECLARATION(Register, O5_savedSP       , O5);
+REGISTER_DECLARATION(Register, I5_savedSP       , I5); // Saved SP before bumping for locals.  This is simply
+                                                       // a copy SP, so in 64-bit it's a biased value.  The bias
+                                                       // is added and removed as needed in the frame code.
+// Interface to signature handler
+REGISTER_DECLARATION(Register, Llocals          , L7); // pointer to locals for signature handler
+REGISTER_DECLARATION(Register, Lmethod          , L6); // Method* when calling signature handler
+
+#else
+REGISTER_DECLARATION(Register, Lesp             , L0); // expression stack pointer
+REGISTER_DECLARATION(Register, Lbcp             , L1); // pointer to next bytecode
+REGISTER_DECLARATION(Register, Lmethod          , L2);
+REGISTER_DECLARATION(Register, Llocals          , L3);
+REGISTER_DECLARATION(Register, Largs            , L3); // pointer to locals for signature handler
+                                                       // must match Llocals in asm interpreter
+REGISTER_DECLARATION(Register, Lmonitors        , L4);
+REGISTER_DECLARATION(Register, Lbyte_code       , L5);
+// When calling out from the interpreter we record SP so that we can remove any extra stack
+// space allocated during adapter transitions. This register is only live from the point
+// of the call until we return.
+REGISTER_DECLARATION(Register, Llast_SP         , L5);
+REGISTER_DECLARATION(Register, Lscratch         , L5);
+REGISTER_DECLARATION(Register, Lscratch2        , L6);
+REGISTER_DECLARATION(Register, LcpoolCache      , L6); // constant pool cache
+
+REGISTER_DECLARATION(Register, O5_savedSP       , O5);
+REGISTER_DECLARATION(Register, I5_savedSP       , I5); // Saved SP before bumping for locals.  This is simply
+                                                       // a copy SP, so in 64-bit it's a biased value.  The bias
+                                                       // is added and removed as needed in the frame code.
+REGISTER_DECLARATION(Register, IdispatchTables  , I4); // Base address of the bytecode dispatch tables
+REGISTER_DECLARATION(Register, IdispatchAddress , I3); // Register which saves the dispatch address for each bytecode
+REGISTER_DECLARATION(Register, ImethodDataPtr   , I2); // Pointer to the current method data
+#endif /* CC_INTERP */
+
+// NOTE: Lscratch2 and LcpoolCache point to the same registers in
+//       the interpreter code. If Lscratch2 needs to be used for some
+//       purpose than LcpoolCache should be restore after that for
+//       the interpreter to work right
+// (These assignments must be compatible with L7_thread_cache; see above.)
+
+// Since Lbcp points into the middle of the method object,
+// it is temporarily converted into a "bcx" during GC.
+
+// Exception processing
+// These registers are passed into exception handlers.
+// All exception handlers require the exception object being thrown.
+// In addition, an nmethod's exception handler must be passed
+// the address of the call site within the nmethod, to allow
+// proper selection of the applicable catch block.
+// (Interpreter frames use their own bcp() for this purpose.)
+//
+// The Oissuing_pc value is not always needed.  When jumping to a
+// handler that is known to be interpreted, the Oissuing_pc value can be
+// omitted.  An actual catch block in compiled code receives (from its
+// nmethod's exception handler) the thrown exception in the Oexception,
+// but it doesn't need the Oissuing_pc.
+//
+// If an exception handler (either interpreted or compiled)
+// discovers there is no applicable catch block, it updates
+// the Oissuing_pc to the continuation PC of its own caller,
+// pops back to that caller's stack frame, and executes that
+// caller's exception handler.  Obviously, this process will
+// iterate until the control stack is popped back to a method
+// containing an applicable catch block.  A key invariant is
+// that the Oissuing_pc value is always a value local to
+// the method whose exception handler is currently executing.
+//
+// Note:  The issuing PC value is __not__ a raw return address (I7 value).
+// It is a "return pc", the address __following__ the call.
+// Raw return addresses are converted to issuing PCs by frame::pc(),
+// or by stubs.  Issuing PCs can be used directly with PC range tables.
+//
+REGISTER_DECLARATION(Register, Oexception  , O0); // exception being thrown
+REGISTER_DECLARATION(Register, Oissuing_pc , O1); // where the exception is coming from
+
+
+// These must occur after the declarations above
+#ifndef DONT_USE_REGISTER_DEFINES
+
+#define Gthread             AS_REGISTER(Register, Gthread)
+#define Gmethod             AS_REGISTER(Register, Gmethod)
+#define Gmegamorphic_method AS_REGISTER(Register, Gmegamorphic_method)
+#define Ginline_cache_reg   AS_REGISTER(Register, Ginline_cache_reg)
+#define Gargs               AS_REGISTER(Register, Gargs)
+#define Lthread_cache       AS_REGISTER(Register, Lthread_cache)
+#define Gframe_size         AS_REGISTER(Register, Gframe_size)
+#define Gtemp               AS_REGISTER(Register, Gtemp)
+
+#ifdef CC_INTERP
+#define Lstate              AS_REGISTER(Register, Lstate)
+#define Lesp                AS_REGISTER(Register, Lesp)
+#define L1_scratch          AS_REGISTER(Register, L1_scratch)
+#define Lmirror             AS_REGISTER(Register, Lmirror)
+#define L2_scratch          AS_REGISTER(Register, L2_scratch)
+#define L3_scratch          AS_REGISTER(Register, L3_scratch)
+#define L4_scratch          AS_REGISTER(Register, L4_scratch)
+#define Lscratch            AS_REGISTER(Register, Lscratch)
+#define Lscratch2           AS_REGISTER(Register, Lscratch2)
+#define L7_scratch          AS_REGISTER(Register, L7_scratch)
+#define Ostate              AS_REGISTER(Register, Ostate)
+#else
+#define Lesp                AS_REGISTER(Register, Lesp)
+#define Lbcp                AS_REGISTER(Register, Lbcp)
+#define Lmethod             AS_REGISTER(Register, Lmethod)
+#define Llocals             AS_REGISTER(Register, Llocals)
+#define Lmonitors           AS_REGISTER(Register, Lmonitors)
+#define Lbyte_code          AS_REGISTER(Register, Lbyte_code)
+#define Lscratch            AS_REGISTER(Register, Lscratch)
+#define Lscratch2           AS_REGISTER(Register, Lscratch2)
+#define LcpoolCache         AS_REGISTER(Register, LcpoolCache)
+#endif /* ! CC_INTERP */
+
+#define Lentry_args         AS_REGISTER(Register, Lentry_args)
+#define I5_savedSP          AS_REGISTER(Register, I5_savedSP)
+#define O5_savedSP          AS_REGISTER(Register, O5_savedSP)
+#define IdispatchAddress    AS_REGISTER(Register, IdispatchAddress)
+#define ImethodDataPtr      AS_REGISTER(Register, ImethodDataPtr)
+#define IdispatchTables     AS_REGISTER(Register, IdispatchTables)
+
+#define Oexception          AS_REGISTER(Register, Oexception)
+#define Oissuing_pc         AS_REGISTER(Register, Oissuing_pc)
+
+#endif
+
+
+// Address is an abstraction used to represent a memory location.
+//
+// Note: A register location is represented via a Register, not
+//       via an address for efficiency & simplicity reasons.
+
+class Address VALUE_OBJ_CLASS_SPEC {
+ private:
+  Register           _base;           // Base register.
+  RegisterOrConstant _index_or_disp;  // Index register or constant displacement.
+  RelocationHolder   _rspec;
+
+ public:
+  Address() : _base(noreg), _index_or_disp(noreg) {}
+
+  Address(Register base, RegisterOrConstant index_or_disp)
+    : _base(base),
+      _index_or_disp(index_or_disp) {
+  }
+
+  Address(Register base, Register index)
+    : _base(base),
+      _index_or_disp(index) {
+  }
+
+  Address(Register base, int disp)
+    : _base(base),
+      _index_or_disp(disp) {
+  }
+
+#ifdef ASSERT
+  // ByteSize is only a class when ASSERT is defined, otherwise it's an int.
+  Address(Register base, ByteSize disp)
+    : _base(base),
+      _index_or_disp(in_bytes(disp)) {
+  }
+#endif
+
+  // accessors
+  Register base()             const { return _base; }
+  Register index()            const { return _index_or_disp.as_register(); }
+  int      disp()             const { return _index_or_disp.as_constant(); }
+
+  bool     has_index()        const { return _index_or_disp.is_register(); }
+  bool     has_disp()         const { return _index_or_disp.is_constant(); }
+
+  bool     uses(Register reg) const { return base() == reg || (has_index() && index() == reg); }
+
+  const relocInfo::relocType rtype() { return _rspec.type(); }
+  const RelocationHolder&    rspec() { return _rspec; }
+
+  RelocationHolder rspec(int offset) const {
+    return offset == 0 ? _rspec : _rspec.plus(offset);
+  }
+
+  inline bool is_simm13(int offset = 0);  // check disp+offset for overflow
+
+  Address plus_disp(int plusdisp) const {     // bump disp by a small amount
+    assert(_index_or_disp.is_constant(), "must have a displacement");
+    Address a(base(), disp() + plusdisp);
+    return a;
+  }
+  bool is_same_address(Address a) const {
+    // disregard _rspec
+    return base() == a.base() && (has_index() ? index() == a.index() : disp() == a.disp());
+  }
+
+  Address after_save() const {
+    Address a = (*this);
+    a._base = a._base->after_save();
+    return a;
+  }
+
+  Address after_restore() const {
+    Address a = (*this);
+    a._base = a._base->after_restore();
+    return a;
+  }
+
+  // Convert the raw encoding form into the form expected by the
+  // constructor for Address.
+  static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc);
+
+  friend class Assembler;
+};
+
+
+class AddressLiteral VALUE_OBJ_CLASS_SPEC {
+ private:
+  address          _address;
+  RelocationHolder _rspec;
+
+  RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) {
+    switch (rtype) {
+    case relocInfo::external_word_type:
+      return external_word_Relocation::spec(addr);
+    case relocInfo::internal_word_type:
+      return internal_word_Relocation::spec(addr);
+#ifdef _LP64
+    case relocInfo::opt_virtual_call_type:
+      return opt_virtual_call_Relocation::spec();
+    case relocInfo::static_call_type:
+      return static_call_Relocation::spec();
+    case relocInfo::runtime_call_type:
+      return runtime_call_Relocation::spec();
+#endif
+    case relocInfo::none:
+      return RelocationHolder();
+    default:
+      ShouldNotReachHere();
+      return RelocationHolder();
+    }
+  }
+
+ protected:
+  // creation
+  AddressLiteral() : _address(NULL), _rspec(NULL) {}
+
+ public:
+  AddressLiteral(address addr, RelocationHolder const& rspec)
+    : _address(addr),
+      _rspec(rspec) {}
+
+  // Some constructors to avoid casting at the call site.
+  AddressLiteral(jobject obj, RelocationHolder const& rspec)
+    : _address((address) obj),
+      _rspec(rspec) {}
+
+  AddressLiteral(intptr_t value, RelocationHolder const& rspec)
+    : _address((address) value),
+      _rspec(rspec) {}
+
+  AddressLiteral(address addr, relocInfo::relocType rtype = relocInfo::none)
+    : _address((address) addr),
+    _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+  // Some constructors to avoid casting at the call site.
+  AddressLiteral(address* addr, relocInfo::relocType rtype = relocInfo::none)
+    : _address((address) addr),
+    _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+  AddressLiteral(bool* addr, relocInfo::relocType rtype = relocInfo::none)
+    : _address((address) addr),
+      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+  AddressLiteral(const bool* addr, relocInfo::relocType rtype = relocInfo::none)
+    : _address((address) addr),
+      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+  AddressLiteral(signed char* addr, relocInfo::relocType rtype = relocInfo::none)
+    : _address((address) addr),
+      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+  AddressLiteral(int* addr, relocInfo::relocType rtype = relocInfo::none)
+    : _address((address) addr),
+      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+  AddressLiteral(intptr_t addr, relocInfo::relocType rtype = relocInfo::none)
+    : _address((address) addr),
+      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+#ifdef _LP64
+  // 32-bit complains about a multiple declaration for int*.
+  AddressLiteral(intptr_t* addr, relocInfo::relocType rtype = relocInfo::none)
+    : _address((address) addr),
+      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+#endif
+
+  AddressLiteral(Metadata* addr, relocInfo::relocType rtype = relocInfo::none)
+    : _address((address) addr),
+      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+  AddressLiteral(Metadata** addr, relocInfo::relocType rtype = relocInfo::none)
+    : _address((address) addr),
+      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+  AddressLiteral(float* addr, relocInfo::relocType rtype = relocInfo::none)
+    : _address((address) addr),
+      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+  AddressLiteral(double* addr, relocInfo::relocType rtype = relocInfo::none)
+    : _address((address) addr),
+      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+  intptr_t value() const { return (intptr_t) _address; }
+  int      low10() const;
+
+  const relocInfo::relocType rtype() const { return _rspec.type(); }
+  const RelocationHolder&    rspec() const { return _rspec; }
+
+  RelocationHolder rspec(int offset) const {
+    return offset == 0 ? _rspec : _rspec.plus(offset);
+  }
+};
+
+// Convenience classes
+class ExternalAddress: public AddressLiteral {
+ private:
+  static relocInfo::relocType reloc_for_target(address target) {
+    // Sometimes ExternalAddress is used for values which aren't
+    // exactly addresses, like the card table base.
+    // external_word_type can't be used for values in the first page
+    // so just skip the reloc in that case.
+    return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none;
+  }
+
+ public:
+  ExternalAddress(address target) : AddressLiteral(target, reloc_for_target(          target)) {}
+  ExternalAddress(Metadata** target) : AddressLiteral(target, reloc_for_target((address) target)) {}
+};
+
+inline Address RegisterImpl::address_in_saved_window() const {
+   return (Address(SP, (sp_offset_in_saved_window() * wordSize) + STACK_BIAS));
+}
+
+
+
+// Argument is an abstraction used to represent an outgoing
+// actual argument or an incoming formal parameter, whether
+// it resides in memory or in a register, in a manner consistent
+// with the SPARC Application Binary Interface, or ABI.  This is
+// often referred to as the native or C calling convention.
+
+class Argument VALUE_OBJ_CLASS_SPEC {
+ private:
+  int _number;
+  bool _is_in;
+
+ public:
+#ifdef _LP64
+  enum {
+    n_register_parameters = 6,          // only 6 registers may contain integer parameters
+    n_float_register_parameters = 16    // Can have up to 16 floating registers
+  };
+#else
+  enum {
+    n_register_parameters = 6           // only 6 registers may contain integer parameters
+  };
+#endif
+
+  // creation
+  Argument(int number, bool is_in) : _number(number), _is_in(is_in) {}
+
+  int  number() const  { return _number;  }
+  bool is_in()  const  { return _is_in;   }
+  bool is_out() const  { return !is_in(); }
+
+  Argument successor() const  { return Argument(number() + 1, is_in()); }
+  Argument as_in()     const  { return Argument(number(), true ); }
+  Argument as_out()    const  { return Argument(number(), false); }
+
+  // locating register-based arguments:
+  bool is_register() const { return _number < n_register_parameters; }
+
+#ifdef _LP64
+  // locating Floating Point register-based arguments:
+  bool is_float_register() const { return _number < n_float_register_parameters; }
+
+  FloatRegister as_float_register() const {
+    assert(is_float_register(), "must be a register argument");
+    return as_FloatRegister(( number() *2 ) + 1);
+  }
+  FloatRegister as_double_register() const {
+    assert(is_float_register(), "must be a register argument");
+    return as_FloatRegister(( number() *2 ));
+  }
+#endif
+
+  Register as_register() const {
+    assert(is_register(), "must be a register argument");
+    return is_in() ? as_iRegister(number()) : as_oRegister(number());
+  }
+
+  // locating memory-based arguments
+  Address as_address() const {
+    assert(!is_register(), "must be a memory argument");
+    return address_in_frame();
+  }
+
+  // When applied to a register-based argument, give the corresponding address
+  // into the 6-word area "into which callee may store register arguments"
+  // (This is a different place than the corresponding register-save area location.)
+  Address address_in_frame() const;
+
+  // debugging
+  const char* name() const;
+
+  friend class Assembler;
+};
+
+
+class RegistersForDebugging : public StackObj {
+ public:
+  intptr_t i[8], l[8], o[8], g[8];
+  float    f[32];
+  double   d[32];
+
+  void print(outputStream* s);
+
+  static int i_offset(int j) { return offset_of(RegistersForDebugging, i[j]); }
+  static int l_offset(int j) { return offset_of(RegistersForDebugging, l[j]); }
+  static int o_offset(int j) { return offset_of(RegistersForDebugging, o[j]); }
+  static int g_offset(int j) { return offset_of(RegistersForDebugging, g[j]); }
+  static int f_offset(int j) { return offset_of(RegistersForDebugging, f[j]); }
+  static int d_offset(int j) { return offset_of(RegistersForDebugging, d[j / 2]); }
+
+  // gen asm code to save regs
+  static void save_registers(MacroAssembler* a);
+
+  // restore global registers in case C code disturbed them
+  static void restore_registers(MacroAssembler* a, Register r);
+};
+
+
+// MacroAssembler extends Assembler by a few frequently used macros.
+//
+// Most of the standard SPARC synthetic ops are defined here.
+// Instructions for which a 'better' code sequence exists depending
+// on arguments should also go in here.
+
+#define JMP2(r1, r2) jmp(r1, r2, __FILE__, __LINE__)
+#define JMP(r1, off) jmp(r1, off, __FILE__, __LINE__)
+#define JUMP(a, temp, off)     jump(a, temp, off, __FILE__, __LINE__)
+#define JUMPL(a, temp, d, off) jumpl(a, temp, d, off, __FILE__, __LINE__)
+
+
+class MacroAssembler : public Assembler {
+  // code patchers need various routines like inv_wdisp()
+  friend class NativeInstruction;
+  friend class NativeGeneralJump;
+  friend class Relocation;
+  friend class Label;
+
+ protected:
+  static int  patched_branch(int dest_pos, int inst, int inst_pos);
+  static int  branch_destination(int inst, int pos);
+
+  // Support for VM calls
+  // This is the base routine called by the different versions of call_VM_leaf. The interpreter
+  // may customize this version by overriding it for its purposes (e.g., to save/restore
+  // additional registers when doing a VM call).
+#ifdef CC_INTERP
+  #define VIRTUAL
+#else
+  #define VIRTUAL virtual
+#endif
+
+  VIRTUAL void call_VM_leaf_base(Register thread_cache, address entry_point, int number_of_arguments);
+
+  //
+  // It is imperative that all calls into the VM are handled via the call_VM macros.
+  // They make sure that the stack linkage is setup correctly. call_VM's correspond
+  // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
+  //
+  // This is the base routine called by the different versions of call_VM. The interpreter
+  // may customize this version by overriding it for its purposes (e.g., to save/restore
+  // additional registers when doing a VM call).
+  //
+  // A non-volatile java_thread_cache register should be specified so
+  // that the G2_thread value can be preserved across the call.
+  // (If java_thread_cache is noreg, then a slow get_thread call
+  // will re-initialize the G2_thread.) call_VM_base returns the register that contains the
+  // thread.
+  //
+  // If no last_java_sp is specified (noreg) than SP will be used instead.
+
+  virtual void call_VM_base(
+    Register        oop_result,             // where an oop-result ends up if any; use noreg otherwise
+    Register        java_thread_cache,      // the thread if computed before     ; use noreg otherwise
+    Register        last_java_sp,           // to set up last_Java_frame in stubs; use noreg otherwise
+    address         entry_point,            // the entry point
+    int             number_of_arguments,    // the number of arguments (w/o thread) to pop after call
+    bool            check_exception=true    // flag which indicates if exception should be checked
+  );
+
+  // This routine should emit JVMTI PopFrame and ForceEarlyReturn handling code.
+  // The implementation is only non-empty for the InterpreterMacroAssembler,
+  // as only the interpreter handles and ForceEarlyReturn PopFrame requests.
+  virtual void check_and_handle_popframe(Register scratch_reg);
+  virtual void check_and_handle_earlyret(Register scratch_reg);
+
+ public:
+  MacroAssembler(CodeBuffer* code) : Assembler(code) {}
+
+  // Support for NULL-checks
+  //
+  // Generates code that causes a NULL OS exception if the content of reg is NULL.
+  // If the accessed location is M[reg + offset] and the offset is known, provide the
+  // offset.  No explicit code generation is needed if the offset is within a certain
+  // range (0 <= offset <= page_size).
+  //
+  // %%%%%% Currently not done for SPARC
+
+  void null_check(Register reg, int offset = -1);
+  static bool needs_explicit_null_check(intptr_t offset);
+
+  // support for delayed instructions
+  MacroAssembler* delayed() { Assembler::delayed();  return this; }
+
+  // branches that use right instruction for v8 vs. v9
+  inline void br( Condition c, bool a, Predict p, address d, relocInfo::relocType rt = relocInfo::none );
+  inline void br( Condition c, bool a, Predict p, Label& L );
+
+  inline void fb( Condition c, bool a, Predict p, address d, relocInfo::relocType rt = relocInfo::none );
+  inline void fb( Condition c, bool a, Predict p, Label& L );
+
+  // compares register with zero (32 bit) and branches (V9 and V8 instructions)
+  void cmp_zero_and_br( Condition c, Register s1, Label& L, bool a = false, Predict p = pn );
+  // Compares a pointer register with zero and branches on (not)null.
+  // Does a test & branch on 32-bit systems and a register-branch on 64-bit.
+  void br_null   ( Register s1, bool a, Predict p, Label& L );
+  void br_notnull( Register s1, bool a, Predict p, Label& L );
+
+  //
+  // Compare registers and branch with nop in delay slot or cbcond without delay slot.
+  //
+  // ATTENTION: use these instructions with caution because cbcond instruction
+  //            has very short distance: 512 instructions (2Kbyte).
+
+  // Compare integer (32 bit) values (icc only).
+  void cmp_and_br_short(Register s1, Register s2, Condition c, Predict p, Label& L);
+  void cmp_and_br_short(Register s1, int simm13a, Condition c, Predict p, Label& L);
+  // Platform depending version for pointer compare (icc on !LP64 and xcc on LP64).
+  void cmp_and_brx_short(Register s1, Register s2, Condition c, Predict p, Label& L);
+  void cmp_and_brx_short(Register s1, int simm13a, Condition c, Predict p, Label& L);
+
+  // Short branch version for compares a pointer pwith zero.
+  void br_null_short   ( Register s1, Predict p, Label& L );
+  void br_notnull_short( Register s1, Predict p, Label& L );
+
+  // unconditional short branch
+  void ba_short(Label& L);
+
+  inline void bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none );
+  inline void bp( Condition c, bool a, CC cc, Predict p, Label& L );
+
+  // Branch that tests xcc in LP64 and icc in !LP64
+  inline void brx( Condition c, bool a, Predict p, address d, relocInfo::relocType rt = relocInfo::none );
+  inline void brx( Condition c, bool a, Predict p, Label& L );
+
+  // unconditional branch
+  inline void ba( Label& L );
+
+  // Branch that tests fp condition codes
+  inline void fbp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none );
+  inline void fbp( Condition c, bool a, CC cc, Predict p, Label& L );
+
+  // get PC the best way
+  inline int get_pc( Register d );
+
+  // Sparc shorthands(pp 85, V8 manual, pp 289 V9 manual)
+  inline void cmp(  Register s1, Register s2 ) { subcc( s1, s2, G0 ); }
+  inline void cmp(  Register s1, int simm13a ) { subcc( s1, simm13a, G0 ); }
+
+  inline void jmp( Register s1, Register s2 );
+  inline void jmp( Register s1, int simm13a, RelocationHolder const& rspec = RelocationHolder() );
+
+  // Check if the call target is out of wdisp30 range (relative to the code cache)
+  static inline bool is_far_target(address d);
+  inline void call( address d,  relocInfo::relocType rt = relocInfo::runtime_call_type );
+  inline void call( Label& L,   relocInfo::relocType rt = relocInfo::runtime_call_type );
+  inline void callr( Register s1, Register s2 );
+  inline void callr( Register s1, int simm13a, RelocationHolder const& rspec = RelocationHolder() );
+
+  // Emits nothing on V8
+  inline void iprefetch( address d, relocInfo::relocType rt = relocInfo::none );
+  inline void iprefetch( Label& L);
+
+  inline void tst( Register s ) { orcc( G0, s, G0 ); }
+
+#ifdef PRODUCT
+  inline void ret(  bool trace = TraceJumps )   { if (trace) {
+                                                    mov(I7, O7); // traceable register
+                                                    JMP(O7, 2 * BytesPerInstWord);
+                                                  } else {
+                                                    jmpl( I7, 2 * BytesPerInstWord, G0 );
+                                                  }
+                                                }
+
+  inline void retl( bool trace = TraceJumps )  { if (trace) JMP(O7, 2 * BytesPerInstWord);
+                                                 else jmpl( O7, 2 * BytesPerInstWord, G0 ); }
+#else
+  void ret(  bool trace = TraceJumps );
+  void retl( bool trace = TraceJumps );
+#endif /* PRODUCT */
+
+  // Required platform-specific helpers for Label::patch_instructions.
+  // They _shadow_ the declarations in AbstractAssembler, which are undefined.
+  void pd_patch_instruction(address branch, address target);
+
+  // sethi Macro handles optimizations and relocations
+private:
+  void internal_sethi(const AddressLiteral& addrlit, Register d, bool ForceRelocatable);
+public:
+  void sethi(const AddressLiteral& addrlit, Register d);
+  void patchable_sethi(const AddressLiteral& addrlit, Register d);
+
+  // compute the number of instructions for a sethi/set
+  static int  insts_for_sethi( address a, bool worst_case = false );
+  static int  worst_case_insts_for_set();
+
+  // set may be either setsw or setuw (high 32 bits may be zero or sign)
+private:
+  void internal_set(const AddressLiteral& al, Register d, bool ForceRelocatable);
+  static int insts_for_internal_set(intptr_t value);
+public:
+  void set(const AddressLiteral& addrlit, Register d);
+  void set(intptr_t value, Register d);
+  void set(address addr, Register d, RelocationHolder const& rspec);
+  static int insts_for_set(intptr_t value) { return insts_for_internal_set(value); }
+
+  void patchable_set(const AddressLiteral& addrlit, Register d);
+  void patchable_set(intptr_t value, Register d);
+  void set64(jlong value, Register d, Register tmp);
+  static int insts_for_set64(jlong value);
+
+  // sign-extend 32 to 64
+  inline void signx( Register s, Register d ) { sra( s, G0, d); }
+  inline void signx( Register d )             { sra( d, G0, d); }
+
+  inline void not1( Register s, Register d ) { xnor( s, G0, d ); }
+  inline void not1( Register d )             { xnor( d, G0, d ); }
+
+  inline void neg( Register s, Register d ) { sub( G0, s, d ); }
+  inline void neg( Register d )             { sub( G0, d, d ); }
+
+  inline void cas(  Register s1, Register s2, Register d) { casa( s1, s2, d, ASI_PRIMARY); }
+  inline void casx( Register s1, Register s2, Register d) { casxa(s1, s2, d, ASI_PRIMARY); }
+  // Functions for isolating 64 bit atomic swaps for LP64
+  // cas_ptr will perform cas for 32 bit VM's and casx for 64 bit VM's
+  inline void cas_ptr(  Register s1, Register s2, Register d) {
+#ifdef _LP64
+    casx( s1, s2, d );
+#else
+    cas( s1, s2, d );
+#endif
+  }
+
+  // Functions for isolating 64 bit shifts for LP64
+  inline void sll_ptr( Register s1, Register s2, Register d );
+  inline void sll_ptr( Register s1, int imm6a,   Register d );
+  inline void sll_ptr( Register s1, RegisterOrConstant s2, Register d );
+  inline void srl_ptr( Register s1, Register s2, Register d );
+  inline void srl_ptr( Register s1, int imm6a,   Register d );
+
+  // little-endian
+  inline void casl(  Register s1, Register s2, Register d) { casa( s1, s2, d, ASI_PRIMARY_LITTLE); }
+  inline void casxl( Register s1, Register s2, Register d) { casxa(s1, s2, d, ASI_PRIMARY_LITTLE); }
+
+  inline void inc(   Register d,  int const13 = 1 ) { add(   d, const13, d); }
+  inline void inccc( Register d,  int const13 = 1 ) { addcc( d, const13, d); }
+
+  inline void dec(   Register d,  int const13 = 1 ) { sub(   d, const13, d); }
+  inline void deccc( Register d,  int const13 = 1 ) { subcc( d, const13, d); }
+
+  using Assembler::add;
+  inline void add(Register s1, int simm13a, Register d, relocInfo::relocType rtype);
+  inline void add(Register s1, int simm13a, Register d, RelocationHolder const& rspec);
+  inline void add(Register s1, RegisterOrConstant s2, Register d, int offset = 0);
+  inline void add(const Address& a, Register d, int offset = 0);
+
+  using Assembler::andn;
+  inline void andn(  Register s1, RegisterOrConstant s2, Register d);
+
+  inline void btst( Register s1,  Register s2 ) { andcc( s1, s2, G0 ); }
+  inline void btst( int simm13a,  Register s )  { andcc( s,  simm13a, G0 ); }
+
+  inline void bset( Register s1,  Register s2 ) { or3( s1, s2, s2 ); }
+  inline void bset( int simm13a,  Register s )  { or3( s,  simm13a, s ); }
+
+  inline void bclr( Register s1,  Register s2 ) { andn( s1, s2, s2 ); }
+  inline void bclr( int simm13a,  Register s )  { andn( s,  simm13a, s ); }
+
+  inline void btog( Register s1,  Register s2 ) { xor3( s1, s2, s2 ); }
+  inline void btog( int simm13a,  Register s )  { xor3( s,  simm13a, s ); }
+
+  inline void clr( Register d ) { or3( G0, G0, d ); }
+
+  inline void clrb( Register s1, Register s2);
+  inline void clrh( Register s1, Register s2);
+  inline void clr(  Register s1, Register s2);
+  inline void clrx( Register s1, Register s2);
+
+  inline void clrb( Register s1, int simm13a);
+  inline void clrh( Register s1, int simm13a);
+  inline void clr(  Register s1, int simm13a);
+  inline void clrx( Register s1, int simm13a);
+
+  // copy & clear upper word
+  inline void clruw( Register s, Register d ) { srl( s, G0, d); }
+  // clear upper word
+  inline void clruwu( Register d ) { srl( d, G0, d); }
+
+  using Assembler::ldsb;
+  using Assembler::ldsh;
+  using Assembler::ldsw;
+  using Assembler::ldub;
+  using Assembler::lduh;
+  using Assembler::lduw;
+  using Assembler::ldx;
+  using Assembler::ldd;
+
+#ifdef ASSERT
+  // ByteSize is only a class when ASSERT is defined, otherwise it's an int.
+  inline void ld(Register s1, ByteSize simm13a, Register d);
+#endif
+
+  inline void ld(Register s1, Register s2, Register d);
+  inline void ld(Register s1, int simm13a, Register d);
+
+  inline void ldsb(const Address& a, Register d, int offset = 0);
+  inline void ldsh(const Address& a, Register d, int offset = 0);
+  inline void ldsw(const Address& a, Register d, int offset = 0);
+  inline void ldub(const Address& a, Register d, int offset = 0);
+  inline void lduh(const Address& a, Register d, int offset = 0);
+  inline void lduw(const Address& a, Register d, int offset = 0);
+  inline void ldx( const Address& a, Register d, int offset = 0);
+  inline void ld(  const Address& a, Register d, int offset = 0);
+  inline void ldd( const Address& a, Register d, int offset = 0);
+
+  inline void ldub(Register s1, RegisterOrConstant s2, Register d );
+  inline void ldsb(Register s1, RegisterOrConstant s2, Register d );
+  inline void lduh(Register s1, RegisterOrConstant s2, Register d );
+  inline void ldsh(Register s1, RegisterOrConstant s2, Register d );
+  inline void lduw(Register s1, RegisterOrConstant s2, Register d );
+  inline void ldsw(Register s1, RegisterOrConstant s2, Register d );
+  inline void ldx( Register s1, RegisterOrConstant s2, Register d );
+  inline void ld(  Register s1, RegisterOrConstant s2, Register d );
+  inline void ldd( Register s1, RegisterOrConstant s2, Register d );
+
+  using Assembler::ldf;
+  inline void ldf(FloatRegisterImpl::Width w, Register s1, RegisterOrConstant s2, FloatRegister d);
+  inline void ldf(FloatRegisterImpl::Width w, const Address& a, FloatRegister d, int offset = 0);
+
+  // membar psuedo instruction.  takes into account target memory model.
+  inline void membar( Assembler::Membar_mask_bits const7a );
+
+  // returns if membar generates anything.
+  inline bool membar_has_effect( Assembler::Membar_mask_bits const7a );
+
+  // mov pseudo instructions
+  inline void mov( Register s,  Register d) {
+    if ( s != d )    or3( G0, s, d);
+    else             assert_not_delayed();  // Put something useful in the delay slot!
+  }
+
+  inline void mov_or_nop( Register s,  Register d) {
+    if ( s != d )    or3( G0, s, d);
+    else             nop();
+  }
+
+  inline void mov( int simm13a, Register d) { or3( G0, simm13a, d); }
+
+  using Assembler::prefetch;
+  inline void prefetch(const Address& a, PrefetchFcn F, int offset = 0);
+
+  using Assembler::stb;
+  using Assembler::sth;
+  using Assembler::stw;
+  using Assembler::stx;
+  using Assembler::std;
+
+#ifdef ASSERT
+  // ByteSize is only a class when ASSERT is defined, otherwise it's an int.
+  inline void st(Register d, Register s1, ByteSize simm13a);
+#endif
+
+  inline void st(Register d, Register s1, Register s2);
+  inline void st(Register d, Register s1, int simm13a);
+
+  inline void stb(Register d, const Address& a, int offset = 0 );
+  inline void sth(Register d, const Address& a, int offset = 0 );
+  inline void stw(Register d, const Address& a, int offset = 0 );
+  inline void stx(Register d, const Address& a, int offset = 0 );
+  inline void st( Register d, const Address& a, int offset = 0 );
+  inline void std(Register d, const Address& a, int offset = 0 );
+
+  inline void stb(Register d, Register s1, RegisterOrConstant s2 );
+  inline void sth(Register d, Register s1, RegisterOrConstant s2 );
+  inline void stw(Register d, Register s1, RegisterOrConstant s2 );
+  inline void stx(Register d, Register s1, RegisterOrConstant s2 );
+  inline void std(Register d, Register s1, RegisterOrConstant s2 );
+  inline void st( Register d, Register s1, RegisterOrConstant s2 );
+
+  using Assembler::stf;
+  inline void stf(FloatRegisterImpl::Width w, FloatRegister d, Register s1, RegisterOrConstant s2);
+  inline void stf(FloatRegisterImpl::Width w, FloatRegister d, const Address& a, int offset = 0);
+
+  // Note: offset is added to s2.
+  using Assembler::sub;
+  inline void sub(Register s1, RegisterOrConstant s2, Register d, int offset = 0);
+
+  using Assembler::swap;
+  inline void swap(Address& a, Register d, int offset = 0);
+
+  // address pseudos: make these names unlike instruction names to avoid confusion
+  inline intptr_t load_pc_address( Register reg, int bytes_to_skip );
+  inline void load_contents(const AddressLiteral& addrlit, Register d, int offset = 0);
+  inline void load_bool_contents(const AddressLiteral& addrlit, Register d, int offset = 0);
+  inline void load_ptr_contents(const AddressLiteral& addrlit, Register d, int offset = 0);
+  inline void store_contents(Register s, const AddressLiteral& addrlit, Register temp, int offset = 0);
+  inline void store_ptr_contents(Register s, const AddressLiteral& addrlit, Register temp, int offset = 0);
+  inline void jumpl_to(const AddressLiteral& addrlit, Register temp, Register d, int offset = 0);
+  inline void jump_to(const AddressLiteral& addrlit, Register temp, int offset = 0);
+  inline void jump_indirect_to(Address& a, Register temp, int ld_offset = 0, int jmp_offset = 0);
+
+  // ring buffer traceable jumps
+
+  void jmp2( Register r1, Register r2, const char* file, int line );
+  void jmp ( Register r1, int offset,  const char* file, int line );
+
+  void jumpl(const AddressLiteral& addrlit, Register temp, Register d, int offset, const char* file, int line);
+  void jump (const AddressLiteral& addrlit, Register temp,             int offset, const char* file, int line);
+
+
+  // argument pseudos:
+
+  inline void load_argument( Argument& a, Register  d );
+  inline void store_argument( Register s, Argument& a );
+  inline void store_ptr_argument( Register s, Argument& a );
+  inline void store_float_argument( FloatRegister s, Argument& a );
+  inline void store_double_argument( FloatRegister s, Argument& a );
+  inline void store_long_argument( Register s, Argument& a );
+
+  // handy macros:
+
+  inline void round_to( Register r, int modulus ) {
+    assert_not_delayed();
+    inc( r, modulus - 1 );
+    and3( r, -modulus, r );
+  }
+
+  // --------------------------------------------------
+
+  // Functions for isolating 64 bit loads for LP64
+  // ld_ptr will perform ld for 32 bit VM's and ldx for 64 bit VM's
+  // st_ptr will perform st for 32 bit VM's and stx for 64 bit VM's
+  inline void ld_ptr(Register s1, Register s2, Register d);
+  inline void ld_ptr(Register s1, int simm13a, Register d);
+  inline void ld_ptr(Register s1, RegisterOrConstant s2, Register d);
+  inline void ld_ptr(const Address& a, Register d, int offset = 0);
+  inline void st_ptr(Register d, Register s1, Register s2);
+  inline void st_ptr(Register d, Register s1, int simm13a);
+  inline void st_ptr(Register d, Register s1, RegisterOrConstant s2);
+  inline void st_ptr(Register d, const Address& a, int offset = 0);
+
+#ifdef ASSERT
+  // ByteSize is only a class when ASSERT is defined, otherwise it's an int.
+  inline void ld_ptr(Register s1, ByteSize simm13a, Register d);
+  inline void st_ptr(Register d, Register s1, ByteSize simm13a);
+#endif
+
+  // ld_long will perform ldd for 32 bit VM's and ldx for 64 bit VM's
+  // st_long will perform std for 32 bit VM's and stx for 64 bit VM's
+  inline void ld_long(Register s1, Register s2, Register d);
+  inline void ld_long(Register s1, int simm13a, Register d);
+  inline void ld_long(Register s1, RegisterOrConstant s2, Register d);
+  inline void ld_long(const Address& a, Register d, int offset = 0);
+  inline void st_long(Register d, Register s1, Register s2);
+  inline void st_long(Register d, Register s1, int simm13a);
+  inline void st_long(Register d, Register s1, RegisterOrConstant s2);
+  inline void st_long(Register d, const Address& a, int offset = 0);
+
+  // Helpers for address formation.
+  // - They emit only a move if s2 is a constant zero.
+  // - If dest is a constant and either s1 or s2 is a register, the temp argument is required and becomes the result.
+  // - If dest is a register and either s1 or s2 is a non-simm13 constant, the temp argument is required and used to materialize the constant.
+  RegisterOrConstant regcon_andn_ptr(RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp = noreg);
+  RegisterOrConstant regcon_inc_ptr( RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp = noreg);
+  RegisterOrConstant regcon_sll_ptr( RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp = noreg);
+
+  RegisterOrConstant ensure_simm13_or_reg(RegisterOrConstant src, Register temp) {
+    if (is_simm13(src.constant_or_zero()))
+      return src;               // register or short constant
+    guarantee(temp != noreg, "constant offset overflow");
+    set(src.as_constant(), temp);
+    return temp;
+  }
+
+  // --------------------------------------------------
+
+ public:
+  // traps as per trap.h (SPARC ABI?)
+
+  void breakpoint_trap();
+  void breakpoint_trap(Condition c, CC cc);
+  void flush_windows_trap();
+  void clean_windows_trap();
+  void get_psr_trap();
+  void set_psr_trap();
+
+  // V8/V9 flush_windows
+  void flush_windows();
+
+  // Support for serializing memory accesses between threads
+  void serialize_memory(Register thread, Register tmp1, Register tmp2);
+
+  // Stack frame creation/removal
+  void enter();
+  void leave();
+
+  // V8/V9 integer multiply
+  void mult(Register s1, Register s2, Register d);
+  void mult(Register s1, int simm13a, Register d);
+
+  // V8/V9 read and write of condition codes.
+  void read_ccr(Register d);
+  void write_ccr(Register s);
+
+  // Manipulation of C++ bools
+  // These are idioms to flag the need for care with accessing bools but on
+  // this platform we assume byte size
+
+  inline void stbool(Register d, const Address& a) { stb(d, a); }
+  inline void ldbool(const Address& a, Register d) { ldub(a, d); }
+  inline void movbool( bool boolconst, Register d) { mov( (int) boolconst, d); }
+
+  // klass oop manipulations if compressed
+  void load_klass(Register src_oop, Register klass);
+  void store_klass(Register klass, Register dst_oop);
+  void store_klass_gap(Register s, Register dst_oop);
+
+   // oop manipulations
+  void load_heap_oop(const Address& s, Register d);
+  void load_heap_oop(Register s1, Register s2, Register d);
+  void load_heap_oop(Register s1, int simm13a, Register d);
+  void load_heap_oop(Register s1, RegisterOrConstant s2, Register d);
+  void store_heap_oop(Register d, Register s1, Register s2);
+  void store_heap_oop(Register d, Register s1, int simm13a);
+  void store_heap_oop(Register d, const Address& a, int offset = 0);
+
+  void encode_heap_oop(Register src, Register dst);
+  void encode_heap_oop(Register r) {
+    encode_heap_oop(r, r);
+  }
+  void decode_heap_oop(Register src, Register dst);
+  void decode_heap_oop(Register r) {
+    decode_heap_oop(r, r);
+  }
+  void encode_heap_oop_not_null(Register r);
+  void decode_heap_oop_not_null(Register r);
+  void encode_heap_oop_not_null(Register src, Register dst);
+  void decode_heap_oop_not_null(Register src, Register dst);
+
+  void encode_klass_not_null(Register r);
+  void decode_klass_not_null(Register r);
+  void encode_klass_not_null(Register src, Register dst);
+  void decode_klass_not_null(Register src, Register dst);
+
+  // Support for managing the JavaThread pointer (i.e.; the reference to
+  // thread-local information).
+  void get_thread();                                // load G2_thread
+  void verify_thread();                             // verify G2_thread contents
+  void save_thread   (const Register threache); // save to cache
+  void restore_thread(const Register thread_cache); // restore from cache
+
+  // Support for last Java frame (but use call_VM instead where possible)
+  void set_last_Java_frame(Register last_java_sp, Register last_Java_pc);
+  void reset_last_Java_frame(void);
+
+  // Call into the VM.
+  // Passes the thread pointer (in O0) as a prepended argument.
+  // Makes sure oop return values are visible to the GC.
+  void call_VM(Register oop_result, address entry_point, int number_of_arguments = 0, bool check_exceptions = true);
+  void call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions = true);
+  void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true);
+  void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true);
+
+  // these overloadings are not presently used on SPARC:
+  void call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true);
+  void call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true);
+  void call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true);
+  void call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true);
+
+  void call_VM_leaf(Register thread_cache, address entry_point, int number_of_arguments = 0);
+  void call_VM_leaf(Register thread_cache, address entry_point, Register arg_1);
+  void call_VM_leaf(Register thread_cache, address entry_point, Register arg_1, Register arg_2);
+  void call_VM_leaf(Register thread_cache, address entry_point, Register arg_1, Register arg_2, Register arg_3);
+
+  void get_vm_result  (Register oop_result);
+  void get_vm_result_2(Register metadata_result);
+
+  // vm result is currently getting hijacked to for oop preservation
+  void set_vm_result(Register oop_result);
+
+  // Emit the CompiledIC call idiom
+  void ic_call(address entry, bool emit_delay = true);
+
+  // if call_VM_base was called with check_exceptions=false, then call
+  // check_and_forward_exception to handle exceptions when it is safe
+  void check_and_forward_exception(Register scratch_reg);
+
+ private:
+  // For V8
+  void read_ccr_trap(Register ccr_save);
+  void write_ccr_trap(Register ccr_save1, Register scratch1, Register scratch2);
+
+#ifdef ASSERT
+  // For V8 debugging.  Uses V8 instruction sequence and checks
+  // result with V9 insturctions rdccr and wrccr.
+  // Uses Gscatch and Gscatch2
+  void read_ccr_v8_assert(Register ccr_save);
+  void write_ccr_v8_assert(Register ccr_save);
+#endif // ASSERT
+
+ public:
+
+  // Write to card table for - register is destroyed afterwards.
+  void card_table_write(jbyte* byte_map_base, Register tmp, Register obj);
+
+  void card_write_barrier_post(Register store_addr, Register new_val, Register tmp);
+
+#ifndef SERIALGC
+  // General G1 pre-barrier generator.
+  void g1_write_barrier_pre(Register obj, Register index, int offset, Register pre_val, Register tmp, bool preserve_o_regs);
+
+  // General G1 post-barrier generator
+  void g1_write_barrier_post(Register store_addr, Register new_val, Register tmp);
+#endif // SERIALGC
+
+  // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack
+  void push_fTOS();
+
+  // pops double TOS element from CPU stack and pushes on FPU stack
+  void pop_fTOS();
+
+  void empty_FPU_stack();
+
+  void push_IU_state();
+  void pop_IU_state();
+
+  void push_FPU_state();
+  void pop_FPU_state();
+
+  void push_CPU_state();
+  void pop_CPU_state();
+
+  // if heap base register is used - reinit it with the correct value
+  void reinit_heapbase();
+
+  // Debugging
+  void _verify_oop(Register reg, const char * msg, const char * file, int line);
+  void _verify_oop_addr(Address addr, const char * msg, const char * file, int line);
+
+  // TODO: verify_method and klass metadata (compare against vptr?)
+  void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
+  void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){}
+
+#define verify_oop(reg) _verify_oop(reg, "broken oop " #reg, __FILE__, __LINE__)
+#define verify_oop_addr(addr) _verify_oop_addr(addr, "broken oop addr ", __FILE__, __LINE__)
+#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
+#define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
+
+        // only if +VerifyOops
+  void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
+        // only if +VerifyFPU
+  void stop(const char* msg);                          // prints msg, dumps registers and stops execution
+  void warn(const char* msg);                          // prints msg, but don't stop
+  void untested(const char* what = "");
+  void unimplemented(const char* what = "")      { char* b = new char[1024];  jio_snprintf(b, 1024, "unimplemented: %s", what);  stop(b); }
+  void should_not_reach_here()                   { stop("should not reach here"); }
+  void print_CPU_state();
+
+  // oops in code
+  AddressLiteral allocate_oop_address(jobject obj);                          // allocate_index
+  AddressLiteral constant_oop_address(jobject obj);                          // find_index
+  inline void    set_oop             (jobject obj, Register d);              // uses allocate_oop_address
+  inline void    set_oop_constant    (jobject obj, Register d);              // uses constant_oop_address
+  inline void    set_oop             (const AddressLiteral& obj_addr, Register d); // same as load_address
+
+  // metadata in code that we have to keep track of
+  AddressLiteral allocate_metadata_address(Metadata* obj); // allocate_index
+  AddressLiteral constant_metadata_address(Metadata* obj); // find_index
+  inline void    set_metadata             (Metadata* obj, Register d);              // uses allocate_metadata_address
+  inline void    set_metadata_constant    (Metadata* obj, Register d);              // uses constant_metadata_address
+  inline void    set_metadata             (const AddressLiteral& obj_addr, Register d); // same as load_address
+
+  void set_narrow_oop( jobject obj, Register d );
+  void set_narrow_klass( Klass* k, Register d );
+
+  // nop padding
+  void align(int modulus);
+
+  // declare a safepoint
+  void safepoint();
+
+  // factor out part of stop into subroutine to save space
+  void stop_subroutine();
+  // factor out part of verify_oop into subroutine to save space
+  void verify_oop_subroutine();
+
+  // side-door communication with signalHandler in os_solaris.cpp
+  static address _verify_oop_implicit_branch[3];
+
+  int total_frame_size_in_bytes(int extraWords);
+
+  // used when extraWords known statically
+  void save_frame(int extraWords = 0);
+  void save_frame_c1(int size_in_bytes);
+  // make a frame, and simultaneously pass up one or two register value
+  // into the new register window
+  void save_frame_and_mov(int extraWords, Register s1, Register d1, Register s2 = Register(), Register d2 = Register());
+
+  // give no. (outgoing) params, calc # of words will need on frame
+  void calc_mem_param_words(Register Rparam_words, Register Rresult);
+
+  // used to calculate frame size dynamically
+  // result is in bytes and must be negated for save inst
+  void calc_frame_size(Register extraWords, Register resultReg);
+
+  // calc and also save
+  void calc_frame_size_and_save(Register extraWords, Register resultReg);
+
+  static void debug(char* msg, RegistersForDebugging* outWindow);
+
+  // implementations of bytecodes used by both interpreter and compiler
+
+  void lcmp( Register Ra_hi, Register Ra_low,
+             Register Rb_hi, Register Rb_low,
+             Register Rresult);
+
+  void lneg( Register Rhi, Register Rlow );
+
+  void lshl(  Register Rin_high,  Register Rin_low,  Register Rcount,
+              Register Rout_high, Register Rout_low, Register Rtemp );
+
+  void lshr(  Register Rin_high,  Register Rin_low,  Register Rcount,
+              Register Rout_high, Register Rout_low, Register Rtemp );
+
+  void lushr( Register Rin_high,  Register Rin_low,  Register Rcount,
+              Register Rout_high, Register Rout_low, Register Rtemp );
+
+#ifdef _LP64
+  void lcmp( Register Ra, Register Rb, Register Rresult);
+#endif
+
+  // Load and store values by size and signed-ness
+  void load_sized_value( Address src, Register dst, size_t size_in_bytes, bool is_signed);
+  void store_sized_value(Register src, Address dst, size_t size_in_bytes);
+
+  void float_cmp( bool is_float, int unordered_result,
+                  FloatRegister Fa, FloatRegister Fb,
+                  Register Rresult);
+
+  void fneg( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d);
+  void fneg( FloatRegisterImpl::Width w, FloatRegister sd ) { Assembler::fneg(w, sd); }
+  void fmov( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d);
+  void fabs( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d);
+
+  void save_all_globals_into_locals();
+  void restore_globals_from_locals();
+
+  void casx_under_lock(Register top_ptr_reg, Register top_reg, Register ptr_reg,
+    address lock_addr=0, bool use_call_vm=false);
+  void cas_under_lock(Register top_ptr_reg, Register top_reg, Register ptr_reg,
+    address lock_addr=0, bool use_call_vm=false);
+  void casn (Register addr_reg, Register cmp_reg, Register set_reg) ;
+
+  // These set the icc condition code to equal if the lock succeeded
+  // and notEqual if it failed and requires a slow case
+  void compiler_lock_object(Register Roop, Register Rmark, Register Rbox,
+                            Register Rscratch,
+                            BiasedLockingCounters* counters = NULL,
+                            bool try_bias = UseBiasedLocking);
+  void compiler_unlock_object(Register Roop, Register Rmark, Register Rbox,
+                              Register Rscratch,
+                              bool try_bias = UseBiasedLocking);
+
+  // Biased locking support
+  // Upon entry, lock_reg must point to the lock record on the stack,
+  // obj_reg must contain the target object, and mark_reg must contain
+  // the target object's header.
+  // Destroys mark_reg if an attempt is made to bias an anonymously
+  // biased lock. In this case a failure will go either to the slow
+  // case or fall through with the notEqual condition code set with
+  // the expectation that the slow case in the runtime will be called.
+  // In the fall-through case where the CAS-based lock is done,
+  // mark_reg is not destroyed.
+  void biased_locking_enter(Register obj_reg, Register mark_reg, Register temp_reg,
+                            Label& done, Label* slow_case = NULL,
+                            BiasedLockingCounters* counters = NULL);
+  // Upon entry, the base register of mark_addr must contain the oop.
+  // Destroys temp_reg.
+
+  // If allow_delay_slot_filling is set to true, the next instruction
+  // emitted after this one will go in an annulled delay slot if the
+  // biased locking exit case failed.
+  void biased_locking_exit(Address mark_addr, Register temp_reg, Label& done, bool allow_delay_slot_filling = false);
+
+  // allocation
+  void eden_allocate(
+    Register obj,                      // result: pointer to object after successful allocation
+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
+    Register t1,                       // temp register
+    Register t2,                       // temp register
+    Label&   slow_case                 // continuation point if fast allocation fails
+  );
+  void tlab_allocate(
+    Register obj,                      // result: pointer to object after successful allocation
+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
+    Register t1,                       // temp register
+    Label&   slow_case                 // continuation point if fast allocation fails
+  );
+  void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case);
+  void incr_allocated_bytes(RegisterOrConstant size_in_bytes,
+                            Register t1, Register t2);
+
+  // interface method calling
+  void lookup_interface_method(Register recv_klass,
+                               Register intf_klass,
+                               RegisterOrConstant itable_index,
+                               Register method_result,
+                               Register temp_reg, Register temp2_reg,
+                               Label& no_such_interface);
+
+  // virtual method calling
+  void lookup_virtual_method(Register recv_klass,
+                             RegisterOrConstant vtable_index,
+                             Register method_result);
+
+  // Test sub_klass against super_klass, with fast and slow paths.
+
+  // The fast path produces a tri-state answer: yes / no / maybe-slow.
+  // One of the three labels can be NULL, meaning take the fall-through.
+  // If super_check_offset is -1, the value is loaded up from super_klass.
+  // No registers are killed, except temp_reg and temp2_reg.
+  // If super_check_offset is not -1, temp2_reg is not used and can be noreg.
+  void check_klass_subtype_fast_path(Register sub_klass,
+                                     Register super_klass,
+                                     Register temp_reg,
+                                     Register temp2_reg,
+                                     Label* L_success,
+                                     Label* L_failure,
+                                     Label* L_slow_path,
+                RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
+
+  // The rest of the type check; must be wired to a corresponding fast path.
+  // It does not repeat the fast path logic, so don't use it standalone.
+  // The temp_reg can be noreg, if no temps are available.
+  // It can also be sub_klass or super_klass, meaning it's OK to kill that one.
+  // Updates the sub's secondary super cache as necessary.
+  void check_klass_subtype_slow_path(Register sub_klass,
+                                     Register super_klass,
+                                     Register temp_reg,
+                                     Register temp2_reg,
+                                     Register temp3_reg,
+                                     Register temp4_reg,
+                                     Label* L_success,
+                                     Label* L_failure);
+
+  // Simplified, combined version, good for typical uses.
+  // Falls through on failure.
+  void check_klass_subtype(Register sub_klass,
+                           Register super_klass,
+                           Register temp_reg,
+                           Register temp2_reg,
+                           Label& L_success);
+
+  // method handles (JSR 292)
+  // offset relative to Gargs of argument at tos[arg_slot].
+  // (arg_slot == 0 means the last argument, not the first).
+  RegisterOrConstant argument_offset(RegisterOrConstant arg_slot,
+                                     Register temp_reg,
+                                     int extra_slot_offset = 0);
+  // Address of Gargs and argument_offset.
+  Address            argument_address(RegisterOrConstant arg_slot,
+                                      Register temp_reg = noreg,
+                                      int extra_slot_offset = 0);
+
+  // Stack overflow checking
+
+  // Note: this clobbers G3_scratch
+  void bang_stack_with_offset(int offset) {
+    // stack grows down, caller passes positive offset
+    assert(offset > 0, "must bang with negative offset");
+    set((-offset)+STACK_BIAS, G3_scratch);
+    st(G0, SP, G3_scratch);
+  }
+
+  // Writes to stack successive pages until offset reached to check for
+  // stack overflow + shadow pages.  Clobbers tsp and scratch registers.
+  void bang_stack_size(Register Rsize, Register Rtsp, Register Rscratch);
+
+  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, Register tmp, int offset);
+
+  void verify_tlab();
+
+  Condition negate_condition(Condition cond);
+
+  // Helper functions for statistics gathering.
+  // Conditionally (non-atomically) increments passed counter address, preserving condition codes.
+  void cond_inc(Condition cond, address counter_addr, Register Rtemp1, Register Rtemp2);
+  // Unconditional increment.
+  void inc_counter(address counter_addr, Register Rtmp1, Register Rtmp2);
+  void inc_counter(int*    counter_addr, Register Rtmp1, Register Rtmp2);
+
+  // Compare char[] arrays aligned to 4 bytes.
+  void char_arrays_equals(Register ary1, Register ary2,
+                          Register limit, Register result,
+                          Register chr1, Register chr2, Label& Ldone);
+  // Use BIS for zeroing
+  void bis_zeroing(Register to, Register count, Register temp, Label& Ldone);
+
+#undef VIRTUAL
+};
+
+/**
+ * class SkipIfEqual:
+ *
+ * Instantiating this class will result in assembly code being output that will
+ * jump around any code emitted between the creation of the instance and it's
+ * automatic destruction at the end of a scope block, depending on the value of
+ * the flag passed to the constructor, which will be checked at run-time.
+ */
+class SkipIfEqual : public StackObj {
+ private:
+  MacroAssembler* _masm;
+  Label _label;
+
+ public:
+   // 'temp' is a temp register that this object can use (and trash)
+   SkipIfEqual(MacroAssembler*, Register temp,
+               const bool* flag_addr, Assembler::Condition condition);
+   ~SkipIfEqual();
+};
+
+#endif // CPU_SPARC_VM_MACROASSEMBLER_SPARC_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/sparc/vm/macroAssembler_sparc.inline.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,757 @@
+/*
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_SPARC_VM_MACROASSEMBLER_SPARC_INLINE_HPP
+#define CPU_SPARC_VM_MACROASSEMBLER_SPARC_INLINE_HPP
+
+#include "asm/assembler.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/codeBuffer.hpp"
+#include "code/codeCache.hpp"
+
+inline bool Address::is_simm13(int offset) { return Assembler::is_simm13(disp() + offset); }
+
+
+inline int AddressLiteral::low10() const {
+  return Assembler::low10(value());
+}
+
+
+inline void MacroAssembler::pd_patch_instruction(address branch, address target) {
+  jint& stub_inst = *(jint*) branch;
+  stub_inst = patched_branch(target - branch, stub_inst, 0);
+}
+
+// Use the right loads/stores for the platform
+inline void MacroAssembler::ld_ptr( Register s1, Register s2, Register d ) {
+#ifdef _LP64
+  Assembler::ldx(s1, s2, d);
+#else
+             ld( s1, s2, d);
+#endif
+}
+
+inline void MacroAssembler::ld_ptr( Register s1, int simm13a, Register d ) {
+#ifdef _LP64
+  Assembler::ldx(s1, simm13a, d);
+#else
+             ld( s1, simm13a, d);
+#endif
+}
+
+#ifdef ASSERT
+// ByteSize is only a class when ASSERT is defined, otherwise it's an int.
+inline void MacroAssembler::ld_ptr( Register s1, ByteSize simm13a, Register d ) {
+  ld_ptr(s1, in_bytes(simm13a), d);
+}
+#endif
+
+inline void MacroAssembler::ld_ptr( Register s1, RegisterOrConstant s2, Register d ) {
+#ifdef _LP64
+  ldx(s1, s2, d);
+#else
+  ld( s1, s2, d);
+#endif
+}
+
+inline void MacroAssembler::ld_ptr(const Address& a, Register d, int offset) {
+#ifdef _LP64
+  ldx(a, d, offset);
+#else
+  ld( a, d, offset);
+#endif
+}
+
+inline void MacroAssembler::st_ptr( Register d, Register s1, Register s2 ) {
+#ifdef _LP64
+  Assembler::stx(d, s1, s2);
+#else
+             st( d, s1, s2);
+#endif
+}
+
+inline void MacroAssembler::st_ptr( Register d, Register s1, int simm13a ) {
+#ifdef _LP64
+  Assembler::stx(d, s1, simm13a);
+#else
+             st( d, s1, simm13a);
+#endif
+}
+
+#ifdef ASSERT
+// ByteSize is only a class when ASSERT is defined, otherwise it's an int.
+inline void MacroAssembler::st_ptr( Register d, Register s1, ByteSize simm13a ) {
+  st_ptr(d, s1, in_bytes(simm13a));
+}
+#endif
+
+inline void MacroAssembler::st_ptr( Register d, Register s1, RegisterOrConstant s2 ) {
+#ifdef _LP64
+  stx(d, s1, s2);
+#else
+  st( d, s1, s2);
+#endif
+}
+
+inline void MacroAssembler::st_ptr(Register d, const Address& a, int offset) {
+#ifdef _LP64
+  stx(d, a, offset);
+#else
+  st( d, a, offset);
+#endif
+}
+
+// Use the right loads/stores for the platform
+inline void MacroAssembler::ld_long( Register s1, Register s2, Register d ) {
+#ifdef _LP64
+  Assembler::ldx(s1, s2, d);
+#else
+  Assembler::ldd(s1, s2, d);
+#endif
+}
+
+inline void MacroAssembler::ld_long( Register s1, int simm13a, Register d ) {
+#ifdef _LP64
+  Assembler::ldx(s1, simm13a, d);
+#else
+  Assembler::ldd(s1, simm13a, d);
+#endif
+}
+
+inline void MacroAssembler::ld_long( Register s1, RegisterOrConstant s2, Register d ) {
+#ifdef _LP64
+  ldx(s1, s2, d);
+#else
+  ldd(s1, s2, d);
+#endif
+}
+
+inline void MacroAssembler::ld_long(const Address& a, Register d, int offset) {
+#ifdef _LP64
+  ldx(a, d, offset);
+#else
+  ldd(a, d, offset);
+#endif
+}
+
+inline void MacroAssembler::st_long( Register d, Register s1, Register s2 ) {
+#ifdef _LP64
+  Assembler::stx(d, s1, s2);
+#else
+  Assembler::std(d, s1, s2);
+#endif
+}
+
+inline void MacroAssembler::st_long( Register d, Register s1, int simm13a ) {
+#ifdef _LP64
+  Assembler::stx(d, s1, simm13a);
+#else
+  Assembler::std(d, s1, simm13a);
+#endif
+}
+
+inline void MacroAssembler::st_long( Register d, Register s1, RegisterOrConstant s2 ) {
+#ifdef _LP64
+  stx(d, s1, s2);
+#else
+  std(d, s1, s2);
+#endif
+}
+
+inline void MacroAssembler::st_long( Register d, const Address& a, int offset ) {
+#ifdef _LP64
+  stx(d, a, offset);
+#else
+  std(d, a, offset);
+#endif
+}
+
+// Functions for isolating 64 bit shifts for LP64
+
+inline void MacroAssembler::sll_ptr( Register s1, Register s2, Register d ) {
+#ifdef _LP64
+  Assembler::sllx(s1, s2, d);
+#else
+  Assembler::sll( s1, s2, d);
+#endif
+}
+
+inline void MacroAssembler::sll_ptr( Register s1, int imm6a,   Register d ) {
+#ifdef _LP64
+  Assembler::sllx(s1, imm6a, d);
+#else
+  Assembler::sll( s1, imm6a, d);
+#endif
+}
+
+inline void MacroAssembler::srl_ptr( Register s1, Register s2, Register d ) {
+#ifdef _LP64
+  Assembler::srlx(s1, s2, d);
+#else
+  Assembler::srl( s1, s2, d);
+#endif
+}
+
+inline void MacroAssembler::srl_ptr( Register s1, int imm6a,   Register d ) {
+#ifdef _LP64
+  Assembler::srlx(s1, imm6a, d);
+#else
+  Assembler::srl( s1, imm6a, d);
+#endif
+}
+
+inline void MacroAssembler::sll_ptr( Register s1, RegisterOrConstant s2, Register d ) {
+  if (s2.is_register())  sll_ptr(s1, s2.as_register(), d);
+  else                   sll_ptr(s1, s2.as_constant(), d);
+}
+
+// Use the right branch for the platform
+
+inline void MacroAssembler::br( Condition c, bool a, Predict p, address d, relocInfo::relocType rt ) {
+  if (VM_Version::v9_instructions_work())
+    Assembler::bp(c, a, icc, p, d, rt);
+  else
+    Assembler::br(c, a, d, rt);
+}
+
+inline void MacroAssembler::br( Condition c, bool a, Predict p, Label& L ) {
+  br(c, a, p, target(L));
+}
+
+
+// Branch that tests either xcc or icc depending on the
+// architecture compiled (LP64 or not)
+inline void MacroAssembler::brx( Condition c, bool a, Predict p, address d, relocInfo::relocType rt ) {
+#ifdef _LP64
+    Assembler::bp(c, a, xcc, p, d, rt);
+#else
+    MacroAssembler::br(c, a, p, d, rt);
+#endif
+}
+
+inline void MacroAssembler::brx( Condition c, bool a, Predict p, Label& L ) {
+  brx(c, a, p, target(L));
+}
+
+inline void MacroAssembler::ba( Label& L ) {
+  br(always, false, pt, L);
+}
+
+// Warning: V9 only functions
+inline void MacroAssembler::bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt ) {
+  Assembler::bp(c, a, cc, p, d, rt);
+}
+
+inline void MacroAssembler::bp( Condition c, bool a, CC cc, Predict p, Label& L ) {
+  Assembler::bp(c, a, cc, p, L);
+}
+
+inline void MacroAssembler::fb( Condition c, bool a, Predict p, address d, relocInfo::relocType rt ) {
+  if (VM_Version::v9_instructions_work())
+    fbp(c, a, fcc0, p, d, rt);
+  else
+    Assembler::fb(c, a, d, rt);
+}
+
+inline void MacroAssembler::fb( Condition c, bool a, Predict p, Label& L ) {
+  fb(c, a, p, target(L));
+}
+
+inline void MacroAssembler::fbp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt ) {
+  Assembler::fbp(c, a, cc, p, d, rt);
+}
+
+inline void MacroAssembler::fbp( Condition c, bool a, CC cc, Predict p, Label& L ) {
+  Assembler::fbp(c, a, cc, p, L);
+}
+
+inline void MacroAssembler::jmp( Register s1, Register s2 ) { jmpl( s1, s2, G0 ); }
+inline void MacroAssembler::jmp( Register s1, int simm13a, RelocationHolder const& rspec ) { jmpl( s1, simm13a, G0, rspec); }
+
+inline bool MacroAssembler::is_far_target(address d) {
+  if (ForceUnreachable) {
+    // References outside the code cache should be treated as far
+    return d < CodeCache::low_bound() || d > CodeCache::high_bound();
+  }
+  return !is_in_wdisp30_range(d, CodeCache::low_bound()) || !is_in_wdisp30_range(d, CodeCache::high_bound());
+}
+
+// Call with a check to see if we need to deal with the added
+// expense of relocation and if we overflow the displacement
+// of the quick call instruction.
+inline void MacroAssembler::call( address d, relocInfo::relocType rt ) {
+#ifdef _LP64
+  intptr_t disp;
+  // NULL is ok because it will be relocated later.
+  // Must change NULL to a reachable address in order to
+  // pass asserts here and in wdisp.
+  if ( d == NULL )
+    d = pc();
+
+  // Is this address within range of the call instruction?
+  // If not, use the expensive instruction sequence
+  if (is_far_target(d)) {
+    relocate(rt);
+    AddressLiteral dest(d);
+    jumpl_to(dest, O7, O7);
+  } else {
+    Assembler::call(d, rt);
+  }
+#else
+  Assembler::call( d, rt );
+#endif
+}
+
+inline void MacroAssembler::call( Label& L,   relocInfo::relocType rt ) {
+  MacroAssembler::call( target(L), rt);
+}
+
+
+
+inline void MacroAssembler::callr( Register s1, Register s2 ) { jmpl( s1, s2, O7 ); }
+inline void MacroAssembler::callr( Register s1, int simm13a, RelocationHolder const& rspec ) { jmpl( s1, simm13a, O7, rspec); }
+
+// prefetch instruction
+inline void MacroAssembler::iprefetch( address d, relocInfo::relocType rt ) {
+  if (VM_Version::v9_instructions_work())
+    Assembler::bp( never, true, xcc, pt, d, rt );
+}
+inline void MacroAssembler::iprefetch( Label& L) { iprefetch( target(L) ); }
+
+
+// clobbers o7 on V8!!
+// returns delta from gotten pc to addr after
+inline int MacroAssembler::get_pc( Register d ) {
+  int x = offset();
+  if (VM_Version::v9_instructions_work())
+    rdpc(d);
+  else {
+    Label lbl;
+    Assembler::call(lbl, relocInfo::none);  // No relocation as this is call to pc+0x8
+    if (d == O7)  delayed()->nop();
+    else          delayed()->mov(O7, d);
+    bind(lbl);
+  }
+  return offset() - x;
+}
+
+
+// Note:  All MacroAssembler::set_foo functions are defined out-of-line.
+
+
+// Loads the current PC of the following instruction as an immediate value in
+// 2 instructions.  All PCs in the CodeCache are within 2 Gig of each other.
+inline intptr_t MacroAssembler::load_pc_address( Register reg, int bytes_to_skip ) {
+  intptr_t thepc = (intptr_t)pc() + 2*BytesPerInstWord + bytes_to_skip;
+#ifdef _LP64
+  Unimplemented();
+#else
+  Assembler::sethi(   thepc & ~0x3ff, reg, internal_word_Relocation::spec((address)thepc));
+             add(reg, thepc &  0x3ff, reg, internal_word_Relocation::spec((address)thepc));
+#endif
+  return thepc;
+}
+
+
+inline void MacroAssembler::load_contents(const AddressLiteral& addrlit, Register d, int offset) {
+  assert_not_delayed();
+  if (ForceUnreachable) {
+    patchable_sethi(addrlit, d);
+  } else {
+    sethi(addrlit, d);
+  }
+  ld(d, addrlit.low10() + offset, d);
+}
+
+
+inline void MacroAssembler::load_bool_contents(const AddressLiteral& addrlit, Register d, int offset) {
+  assert_not_delayed();
+  if (ForceUnreachable) {
+    patchable_sethi(addrlit, d);
+  } else {
+    sethi(addrlit, d);
+  }
+  ldub(d, addrlit.low10() + offset, d);
+}
+
+
+inline void MacroAssembler::load_ptr_contents(const AddressLiteral& addrlit, Register d, int offset) {
+  assert_not_delayed();
+  if (ForceUnreachable) {
+    patchable_sethi(addrlit, d);
+  } else {
+    sethi(addrlit, d);
+  }
+  ld_ptr(d, addrlit.low10() + offset, d);
+}
+
+
+inline void MacroAssembler::store_contents(Register s, const AddressLiteral& addrlit, Register temp, int offset) {
+  assert_not_delayed();
+  if (ForceUnreachable) {
+    patchable_sethi(addrlit, temp);
+  } else {
+    sethi(addrlit, temp);
+  }
+  st(s, temp, addrlit.low10() + offset);
+}
+
+
+inline void MacroAssembler::store_ptr_contents(Register s, const AddressLiteral& addrlit, Register temp, int offset) {
+  assert_not_delayed();
+  if (ForceUnreachable) {
+    patchable_sethi(addrlit, temp);
+  } else {
+    sethi(addrlit, temp);
+  }
+  st_ptr(s, temp, addrlit.low10() + offset);
+}
+
+
+// This code sequence is relocatable to any address, even on LP64.
+inline void MacroAssembler::jumpl_to(const AddressLiteral& addrlit, Register temp, Register d, int offset) {
+  assert_not_delayed();
+  // Force fixed length sethi because NativeJump and NativeFarCall don't handle
+  // variable length instruction streams.
+  patchable_sethi(addrlit, temp);
+  jmpl(temp, addrlit.low10() + offset, d);
+}
+
+
+inline void MacroAssembler::jump_to(const AddressLiteral& addrlit, Register temp, int offset) {
+  jumpl_to(addrlit, temp, G0, offset);
+}
+
+
+inline void MacroAssembler::jump_indirect_to(Address& a, Register temp,
+                                             int ld_offset, int jmp_offset) {
+  assert_not_delayed();
+  //sethi(al);                   // sethi is caller responsibility for this one
+  ld_ptr(a, temp, ld_offset);
+  jmp(temp, jmp_offset);
+}
+
+
+inline void MacroAssembler::set_metadata(Metadata* obj, Register d) {
+  set_metadata(allocate_metadata_address(obj), d);
+}
+
+inline void MacroAssembler::set_metadata_constant(Metadata* obj, Register d) {
+  set_metadata(constant_metadata_address(obj), d);
+}
+
+inline void MacroAssembler::set_metadata(const AddressLiteral& obj_addr, Register d) {
+  assert(obj_addr.rspec().type() == relocInfo::metadata_type, "must be a metadata reloc");
+  set(obj_addr, d);
+}
+
+inline void MacroAssembler::set_oop(jobject obj, Register d) {
+  set_oop(allocate_oop_address(obj), d);
+}
+
+
+inline void MacroAssembler::set_oop_constant(jobject obj, Register d) {
+  set_oop(constant_oop_address(obj), d);
+}
+
+
+inline void MacroAssembler::set_oop(const AddressLiteral& obj_addr, Register d) {
+  assert(obj_addr.rspec().type() == relocInfo::oop_type, "must be an oop reloc");
+  set(obj_addr, d);
+}
+
+
+inline void MacroAssembler::load_argument( Argument& a, Register  d ) {
+  if (a.is_register())
+    mov(a.as_register(), d);
+  else
+    ld (a.as_address(),  d);
+}
+
+inline void MacroAssembler::store_argument( Register s, Argument& a ) {
+  if (a.is_register())
+    mov(s, a.as_register());
+  else
+    st_ptr (s, a.as_address());         // ABI says everything is right justified.
+}
+
+inline void MacroAssembler::store_ptr_argument( Register s, Argument& a ) {
+  if (a.is_register())
+    mov(s, a.as_register());
+  else
+    st_ptr (s, a.as_address());
+}
+
+
+#ifdef _LP64
+inline void MacroAssembler::store_float_argument( FloatRegister s, Argument& a ) {
+  if (a.is_float_register())
+// V9 ABI has F1, F3, F5 are used to pass instead of O0, O1, O2
+    fmov(FloatRegisterImpl::S, s, a.as_float_register() );
+  else
+    // Floats are stored in the high half of the stack entry
+    // The low half is undefined per the ABI.
+    stf(FloatRegisterImpl::S, s, a.as_address(), sizeof(jfloat));
+}
+
+inline void MacroAssembler::store_double_argument( FloatRegister s, Argument& a ) {
+  if (a.is_float_register())
+// V9 ABI has D0, D2, D4 are used to pass instead of O0, O1, O2
+    fmov(FloatRegisterImpl::D, s, a.as_double_register() );
+  else
+    stf(FloatRegisterImpl::D, s, a.as_address());
+}
+
+inline void MacroAssembler::store_long_argument( Register s, Argument& a ) {
+  if (a.is_register())
+    mov(s, a.as_register());
+  else
+    stx(s, a.as_address());
+}
+#endif
+
+inline void MacroAssembler::add(Register s1, int simm13a, Register d, relocInfo::relocType rtype) {
+  relocate(rtype);
+  add(s1, simm13a, d);
+}
+inline void MacroAssembler::add(Register s1, int simm13a, Register d, RelocationHolder const& rspec) {
+  relocate(rspec);
+  add(s1, simm13a, d);
+}
+
+// form effective addresses this way:
+inline void MacroAssembler::add(const Address& a, Register d, int offset) {
+  if (a.has_index())   add(a.base(), a.index(),         d);
+  else               { add(a.base(), a.disp() + offset, d, a.rspec(offset)); offset = 0; }
+  if (offset != 0)     add(d,        offset,            d);
+}
+inline void MacroAssembler::add(Register s1, RegisterOrConstant s2, Register d, int offset) {
+  if (s2.is_register())  add(s1, s2.as_register(),          d);
+  else                 { add(s1, s2.as_constant() + offset, d); offset = 0; }
+  if (offset != 0)       add(d,  offset,                    d);
+}
+
+inline void MacroAssembler::andn(Register s1, RegisterOrConstant s2, Register d) {
+  if (s2.is_register())  andn(s1, s2.as_register(), d);
+  else                   andn(s1, s2.as_constant(), d);
+}
+
+inline void MacroAssembler::clrb( Register s1, Register s2) { stb( G0, s1, s2 ); }
+inline void MacroAssembler::clrh( Register s1, Register s2) { sth( G0, s1, s2 ); }
+inline void MacroAssembler::clr(  Register s1, Register s2) { stw( G0, s1, s2 ); }
+inline void MacroAssembler::clrx( Register s1, Register s2) { stx( G0, s1, s2 ); }
+
+inline void MacroAssembler::clrb( Register s1, int simm13a) { stb( G0, s1, simm13a); }
+inline void MacroAssembler::clrh( Register s1, int simm13a) { sth( G0, s1, simm13a); }
+inline void MacroAssembler::clr(  Register s1, int simm13a) { stw( G0, s1, simm13a); }
+inline void MacroAssembler::clrx( Register s1, int simm13a) { stx( G0, s1, simm13a); }
+
+#ifdef _LP64
+// Make all 32 bit loads signed so 64 bit registers maintain proper sign
+inline void MacroAssembler::ld(  Register s1, Register s2, Register d)      { ldsw( s1, s2, d); }
+inline void MacroAssembler::ld(  Register s1, int simm13a, Register d)      { ldsw( s1, simm13a, d); }
+#else
+inline void MacroAssembler::ld(  Register s1, Register s2, Register d)      { lduw( s1, s2, d); }
+inline void MacroAssembler::ld(  Register s1, int simm13a, Register d)      { lduw( s1, simm13a, d); }
+#endif
+
+#ifdef ASSERT
+  // ByteSize is only a class when ASSERT is defined, otherwise it's an int.
+# ifdef _LP64
+inline void MacroAssembler::ld(Register s1, ByteSize simm13a, Register d) { ldsw( s1, in_bytes(simm13a), d); }
+# else
+inline void MacroAssembler::ld(Register s1, ByteSize simm13a, Register d) { lduw( s1, in_bytes(simm13a), d); }
+# endif
+#endif
+
+inline void MacroAssembler::ld(  const Address& a, Register d, int offset) {
+  if (a.has_index()) { assert(offset == 0, ""); ld(  a.base(), a.index(),         d); }
+  else               {                          ld(  a.base(), a.disp() + offset, d); }
+}
+
+inline void MacroAssembler::ldsb(const Address& a, Register d, int offset) {
+  if (a.has_index()) { assert(offset == 0, ""); ldsb(a.base(), a.index(),         d); }
+  else               {                          ldsb(a.base(), a.disp() + offset, d); }
+}
+inline void MacroAssembler::ldsh(const Address& a, Register d, int offset) {
+  if (a.has_index()) { assert(offset == 0, ""); ldsh(a.base(), a.index(),         d); }
+  else               {                          ldsh(a.base(), a.disp() + offset, d); }
+}
+inline void MacroAssembler::ldsw(const Address& a, Register d, int offset) {
+  if (a.has_index()) { assert(offset == 0, ""); ldsw(a.base(), a.index(),         d); }
+  else               {                          ldsw(a.base(), a.disp() + offset, d); }
+}
+inline void MacroAssembler::ldub(const Address& a, Register d, int offset) {
+  if (a.has_index()) { assert(offset == 0, ""); ldub(a.base(), a.index(),         d); }
+  else               {                          ldub(a.base(), a.disp() + offset, d); }
+}
+inline void MacroAssembler::lduh(const Address& a, Register d, int offset) {
+  if (a.has_index()) { assert(offset == 0, ""); lduh(a.base(), a.index(),         d); }
+  else               {                          lduh(a.base(), a.disp() + offset, d); }
+}
+inline void MacroAssembler::lduw(const Address& a, Register d, int offset) {
+  if (a.has_index()) { assert(offset == 0, ""); lduw(a.base(), a.index(),         d); }
+  else               {                          lduw(a.base(), a.disp() + offset, d); }
+}
+inline void MacroAssembler::ldd( const Address& a, Register d, int offset) {
+  if (a.has_index()) { assert(offset == 0, ""); ldd( a.base(), a.index(),         d); }
+  else               {                          ldd( a.base(), a.disp() + offset, d); }
+}
+inline void MacroAssembler::ldx( const Address& a, Register d, int offset) {
+  if (a.has_index()) { assert(offset == 0, ""); ldx( a.base(), a.index(),         d); }
+  else               {                          ldx( a.base(), a.disp() + offset, d); }
+}
+
+inline void MacroAssembler::ldub(Register s1, RegisterOrConstant s2, Register d) { ldub(Address(s1, s2), d); }
+inline void MacroAssembler::ldsb(Register s1, RegisterOrConstant s2, Register d) { ldsb(Address(s1, s2), d); }
+inline void MacroAssembler::lduh(Register s1, RegisterOrConstant s2, Register d) { lduh(Address(s1, s2), d); }
+inline void MacroAssembler::ldsh(Register s1, RegisterOrConstant s2, Register d) { ldsh(Address(s1, s2), d); }
+inline void MacroAssembler::lduw(Register s1, RegisterOrConstant s2, Register d) { lduw(Address(s1, s2), d); }
+inline void MacroAssembler::ldsw(Register s1, RegisterOrConstant s2, Register d) { ldsw(Address(s1, s2), d); }
+inline void MacroAssembler::ldx( Register s1, RegisterOrConstant s2, Register d) { ldx( Address(s1, s2), d); }
+inline void MacroAssembler::ld(  Register s1, RegisterOrConstant s2, Register d) { ld(  Address(s1, s2), d); }
+inline void MacroAssembler::ldd( Register s1, RegisterOrConstant s2, Register d) { ldd( Address(s1, s2), d); }
+
+inline void MacroAssembler::ldf(FloatRegisterImpl::Width w, Register s1, RegisterOrConstant s2, FloatRegister d) {
+  if (s2.is_register())  ldf(w, s1, s2.as_register(), d);
+  else                   ldf(w, s1, s2.as_constant(), d);
+}
+
+inline void MacroAssembler::ldf(FloatRegisterImpl::Width w, const Address& a, FloatRegister d, int offset) {
+  relocate(a.rspec(offset));
+  ldf(w, a.base(), a.disp() + offset, d);
+}
+
+// returns if membar generates anything, obviously this code should mirror
+// membar below.
+inline bool MacroAssembler::membar_has_effect( Membar_mask_bits const7a ) {
+  if( !os::is_MP() ) return false;  // Not needed on single CPU
+  if( VM_Version::v9_instructions_work() ) {
+    const Membar_mask_bits effective_mask =
+        Membar_mask_bits(const7a & ~(LoadLoad | LoadStore | StoreStore));
+    return (effective_mask != 0);
+  } else {
+    return true;
+  }
+}
+
+inline void MacroAssembler::membar( Membar_mask_bits const7a ) {
+  // Uniprocessors do not need memory barriers
+  if (!os::is_MP()) return;
+  // Weakened for current Sparcs and TSO.  See the v9 manual, sections 8.4.3,
+  // 8.4.4.3, a.31 and a.50.
+  if( VM_Version::v9_instructions_work() ) {
+    // Under TSO, setting bit 3, 2, or 0 is redundant, so the only value
+    // of the mmask subfield of const7a that does anything that isn't done
+    // implicitly is StoreLoad.
+    const Membar_mask_bits effective_mask =
+        Membar_mask_bits(const7a & ~(LoadLoad | LoadStore | StoreStore));
+    if ( effective_mask != 0 ) {
+      Assembler::membar( effective_mask );
+    }
+  } else {
+    // stbar is the closest there is on v8.  Equivalent to membar(StoreStore).  We
+    // do not issue the stbar because to my knowledge all v8 machines implement TSO,
+    // which guarantees that all stores behave as if an stbar were issued just after
+    // each one of them.  On these machines, stbar ought to be a nop.  There doesn't
+    // appear to be an equivalent of membar(StoreLoad) on v8: TSO doesn't require it,
+    // it can't be specified by stbar, nor have I come up with a way to simulate it.
+    //
+    // Addendum.  Dave says that ldstub guarantees a write buffer flush to coherent
+    // space.  Put one here to be on the safe side.
+    Assembler::ldstub(SP, 0, G0);
+  }
+}
+
+inline void MacroAssembler::prefetch(const Address& a, PrefetchFcn f, int offset) {
+  relocate(a.rspec(offset));
+  assert(!a.has_index(), "");
+  prefetch(a.base(), a.disp() + offset, f);
+}
+
+inline void MacroAssembler::st(Register d, Register s1, Register s2)      { stw(d, s1, s2); }
+inline void MacroAssembler::st(Register d, Register s1, int simm13a)      { stw(d, s1, simm13a); }
+
+#ifdef ASSERT
+// ByteSize is only a class when ASSERT is defined, otherwise it's an int.
+inline void MacroAssembler::st(Register d, Register s1, ByteSize simm13a) { stw(d, s1, in_bytes(simm13a)); }
+#endif
+
+inline void MacroAssembler::st(Register d, const Address& a, int offset) {
+  if (a.has_index()) { assert(offset == 0, ""); st( d, a.base(), a.index()        ); }
+  else               {                          st( d, a.base(), a.disp() + offset); }
+}
+
+inline void MacroAssembler::stb(Register d, const Address& a, int offset) {
+  if (a.has_index()) { assert(offset == 0, ""); stb(d, a.base(), a.index()        ); }
+  else               {                          stb(d, a.base(), a.disp() + offset); }
+}
+inline void MacroAssembler::sth(Register d, const Address& a, int offset) {
+  if (a.has_index()) { assert(offset == 0, ""); sth(d, a.base(), a.index()        ); }
+  else               {                          sth(d, a.base(), a.disp() + offset); }
+}
+inline void MacroAssembler::stw(Register d, const Address& a, int offset) {
+  if (a.has_index()) { assert(offset == 0, ""); stw(d, a.base(), a.index()        ); }
+  else               {                          stw(d, a.base(), a.disp() + offset); }
+}
+inline void MacroAssembler::std(Register d, const Address& a, int offset) {
+  if (a.has_index()) { assert(offset == 0, ""); std(d, a.base(), a.index()        ); }
+  else               {                          std(d, a.base(), a.disp() + offset); }
+}
+inline void MacroAssembler::stx(Register d, const Address& a, int offset) {
+  if (a.has_index()) { assert(offset == 0, ""); stx(d, a.base(), a.index()        ); }
+  else               {                          stx(d, a.base(), a.disp() + offset); }
+}
+
+inline void MacroAssembler::stb(Register d, Register s1, RegisterOrConstant s2) { stb(d, Address(s1, s2)); }
+inline void MacroAssembler::sth(Register d, Register s1, RegisterOrConstant s2) { sth(d, Address(s1, s2)); }
+inline void MacroAssembler::stw(Register d, Register s1, RegisterOrConstant s2) { stw(d, Address(s1, s2)); }
+inline void MacroAssembler::stx(Register d, Register s1, RegisterOrConstant s2) { stx(d, Address(s1, s2)); }
+inline void MacroAssembler::std(Register d, Register s1, RegisterOrConstant s2) { std(d, Address(s1, s2)); }
+inline void MacroAssembler::st( Register d, Register s1, RegisterOrConstant s2) { st( d, Address(s1, s2)); }
+
+inline void MacroAssembler::stf(FloatRegisterImpl::Width w, FloatRegister d, Register s1, RegisterOrConstant s2) {
+  if (s2.is_register())  stf(w, d, s1, s2.as_register());
+  else                   stf(w, d, s1, s2.as_constant());
+}
+
+inline void MacroAssembler::stf(FloatRegisterImpl::Width w, FloatRegister d, const Address& a, int offset) {
+  relocate(a.rspec(offset));
+  if (a.has_index()) { assert(offset == 0, ""); stf(w, d, a.base(), a.index()        ); }
+  else               {                          stf(w, d, a.base(), a.disp() + offset); }
+}
+
+inline void MacroAssembler::sub(Register s1, RegisterOrConstant s2, Register d, int offset) {
+  if (s2.is_register())  sub(s1, s2.as_register(),          d);
+  else                 { sub(s1, s2.as_constant() + offset, d); offset = 0; }
+  if (offset != 0)       sub(d,  offset,                    d);
+}
+
+inline void MacroAssembler::swap(Address& a, Register d, int offset) {
+  relocate(a.rspec(offset));
+  if (a.has_index()) { assert(offset == 0, ""); swap(a.base(), a.index(), d        ); }
+  else               {                          swap(a.base(), a.disp() + offset, d); }
+}
+
+#endif // CPU_SPARC_VM_MACROASSEMBLER_SPARC_INLINE_HPP
--- a/src/cpu/sparc/vm/metaspaceShared_sparc.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/sparc/vm/metaspaceShared_sparc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,7 +23,8 @@
  */
 
 #include "precompiled.hpp"
-#include "assembler_sparc.inline.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "asm/codeBuffer.hpp"
 #include "memory/metaspaceShared.hpp"
 
 // Generate the self-patching vtable method:
--- a/src/cpu/sparc/vm/methodHandles_sparc.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/sparc/vm/methodHandles_sparc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,6 +23,7 @@
  */
 
 #include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
 #include "interpreter/interpreter.hpp"
 #include "memory/allocation.inline.hpp"
 #include "prims/methodHandles.hpp"
@@ -170,7 +171,8 @@
 
   if (VerifyMethodHandles && !for_compiler_entry) {
     // make sure recv is already on stack
-    __ load_sized_value(Address(method_temp, Method::size_of_parameters_offset()),
+    __ ld_ptr(method_temp, in_bytes(Method::const_offset()), temp2);
+    __ load_sized_value(Address(temp2, ConstMethod::size_of_parameters_offset()),
                         temp2,
                         sizeof(u2), /*is_signed*/ false);
     // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
@@ -232,7 +234,8 @@
   int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
   assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
   if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
-    __ load_sized_value(Address(G5_method, Method::size_of_parameters_offset()),
+    __ ld_ptr(G5_method, in_bytes(Method::const_offset()), O4_param_size);
+    __ load_sized_value(Address(O4_param_size, ConstMethod::size_of_parameters_offset()),
                         O4_param_size,
                         sizeof(u2), /*is_signed*/ false);
     // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
--- a/src/cpu/sparc/vm/nativeInst_sparc.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/sparc/vm/nativeInst_sparc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,7 +23,7 @@
  */
 
 #include "precompiled.hpp"
-#include "assembler_sparc.inline.hpp"
+#include "asm/macroAssembler.hpp"
 #include "memory/resourceArea.hpp"
 #include "nativeInst_sparc.hpp"
 #include "oops/oop.inline.hpp"
--- a/src/cpu/sparc/vm/nativeInst_sparc.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/sparc/vm/nativeInst_sparc.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -25,7 +25,7 @@
 #ifndef CPU_SPARC_VM_NATIVEINST_SPARC_HPP
 #define CPU_SPARC_VM_NATIVEINST_SPARC_HPP
 
-#include "asm/assembler.hpp"
+#include "asm/macroAssembler.hpp"
 #include "memory/allocation.hpp"
 #include "runtime/icache.hpp"
 #include "runtime/os.hpp"
@@ -194,11 +194,10 @@
   static int inv_simm(    int x, int nbits ) { return Assembler::inv_simm(x, nbits); }
   static intptr_t inv_wdisp(   int x, int nbits ) { return Assembler::inv_wdisp(  x, 0, nbits); }
   static intptr_t inv_wdisp16( int x )            { return Assembler::inv_wdisp16(x, 0); }
-  static int branch_destination_offset(int x) { return Assembler::branch_destination(x, 0); }
+  static int branch_destination_offset(int x) { return MacroAssembler::branch_destination(x, 0); }
   static int patch_branch_destination_offset(int dest_offset, int x) {
-    return Assembler::patched_branch(dest_offset, x, 0);
+    return MacroAssembler::patched_branch(dest_offset, x, 0);
   }
-  void set_annul_bit() { set_long_at(0, long_at(0) | Assembler::annul(true)); }
 
   // utility for checking if x is either of 2 small constants
   static bool is_either(int x, int k1, int k2) {
@@ -889,7 +888,6 @@
     int patched_instr = patch_branch_destination_offset(dest - addr_at(0), long_at(0));
     set_long_at(0, patched_instr);
   }
-  void set_annul() { set_annul_bit(); }
   NativeInstruction *delay_slot_instr() { return nativeInstruction_at(addr_at(4));}
   void fill_delay_slot(int instr) { set_long_at(4, instr);}
   Assembler::Condition condition() {
--- a/src/cpu/sparc/vm/relocInfo_sparc.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/sparc/vm/relocInfo_sparc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,8 +23,7 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.inline.hpp"
-#include "assembler_sparc.inline.hpp"
+#include "asm/assembler.hpp"
 #include "code/relocInfo.hpp"
 #include "nativeInst_sparc.hpp"
 #include "oops/oop.inline.hpp"
--- a/src/cpu/sparc/vm/runtime_sparc.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/sparc/vm/runtime_sparc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -24,8 +24,7 @@
 
 #include "precompiled.hpp"
 #ifdef COMPILER2
-#include "asm/assembler.hpp"
-#include "assembler_sparc.inline.hpp"
+#include "asm/macroAssembler.inline.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "code/vmreg.hpp"
 #include "interpreter/interpreter.hpp"
--- a/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,8 +23,7 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
-#include "assembler_sparc.inline.hpp"
+#include "asm/macroAssembler.inline.hpp"
 #include "code/debugInfoRec.hpp"
 #include "code/icBuffer.hpp"
 #include "code/vtableStubs.hpp"
@@ -2322,7 +2321,7 @@
   // Pre-load a static method's oop into O1.  Used both by locking code and
   // the normal JNI call code.
   if (method->is_static() && !is_critical_native) {
-    __ set_oop_constant(JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()), O1);
+    __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()), O1);
 
     // Now handlize the static class mirror in O1.  It's known not-null.
     __ st_ptr(O1, SP, klass_offset + STACK_BIAS);
--- a/src/cpu/sparc/vm/sparc.ad	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/sparc/vm/sparc.ad	Sun Feb 03 22:43:57 2013 +0100
@@ -10224,7 +10224,7 @@
 
 //---------- Zeros Count Instructions ------------------------------------------
 
-instruct countLeadingZerosI(iRegI dst, iRegI src, iRegI tmp, flagsReg cr) %{
+instruct countLeadingZerosI(iRegIsafe dst, iRegI src, iRegI tmp, flagsReg cr) %{
   predicate(UsePopCountInstruction);  // See Matcher::match_rule_supported
   match(Set dst (CountLeadingZerosI src));
   effect(TEMP dst, TEMP tmp, KILL cr);
@@ -10321,7 +10321,7 @@
   ins_pipe(ialu_reg);
 %}
 
-instruct countTrailingZerosI(iRegI dst, iRegI src, flagsReg cr) %{
+instruct countTrailingZerosI(iRegIsafe dst, iRegI src, flagsReg cr) %{
   predicate(UsePopCountInstruction);  // See Matcher::match_rule_supported
   match(Set dst (CountTrailingZerosI src));
   effect(TEMP dst, KILL cr);
@@ -10364,19 +10364,21 @@
 
 //---------- Population Count Instructions -------------------------------------
 
-instruct popCountI(iRegI dst, iRegI src) %{
+instruct popCountI(iRegIsafe dst, iRegI src) %{
   predicate(UsePopCountInstruction);
   match(Set dst (PopCountI src));
 
-  format %{ "POPC   $src, $dst" %}
-  ins_encode %{
-    __ popc($src$$Register, $dst$$Register);
+  format %{ "SRL    $src, G0, $dst\t! clear upper word for 64 bit POPC\n\t"
+            "POPC   $dst, $dst" %}
+  ins_encode %{
+    __ srl($src$$Register, G0, $dst$$Register);
+    __ popc($dst$$Register, $dst$$Register);
   %}
   ins_pipe(ialu_reg);
 %}
 
 // Note: Long.bitCount(long) returns an int.
-instruct popCountL(iRegI dst, iRegL src) %{
+instruct popCountL(iRegIsafe dst, iRegL src) %{
   predicate(UsePopCountInstruction);
   match(Set dst (PopCountL src));
 
--- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,8 +23,7 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
-#include "assembler_sparc.inline.hpp"
+#include "asm/macroAssembler.inline.hpp"
 #include "interpreter/interpreter.hpp"
 #include "nativeInst_sparc.hpp"
 #include "oops/instanceOop.hpp"
@@ -37,13 +36,8 @@
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubCodeGenerator.hpp"
 #include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
 #include "utilities/top.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
 #ifdef COMPILER2
 #include "opto/runtime.hpp"
 #endif
--- a/src/cpu/sparc/vm/stubRoutines_sparc.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/sparc/vm/stubRoutines_sparc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -26,12 +26,7 @@
 #include "runtime/deoptimization.hpp"
 #include "runtime/frame.inline.hpp"
 #include "runtime/stubRoutines.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
+#include "runtime/thread.inline.hpp"
 
 // Implementation of the platform-specific part of StubRoutines - for
 // a description of how to extend it, see the stubRoutines.hpp file.
--- a/src/cpu/sparc/vm/templateInterpreter_sparc.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/sparc/vm/templateInterpreter_sparc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,7 +23,7 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
+#include "asm/macroAssembler.hpp"
 #include "interpreter/bytecodeHistogram.hpp"
 #include "interpreter/interpreter.hpp"
 #include "interpreter/interpreterGenerator.hpp"
@@ -259,7 +259,7 @@
   }
   __ ret();                           // return from interpreter activation
   __ delayed()->restore(I5_savedSP, G0, SP);  // remove interpreter frame
-  NOT_PRODUCT(__ emit_long(0);)       // marker for disassembly
+  NOT_PRODUCT(__ emit_int32(0);)       // marker for disassembly
   return entry;
 }
 
@@ -434,7 +434,7 @@
 
   // the frame is greater than one page in size, so check against
   // the bottom of the stack
-  __ cmp_and_brx_short(SP, Rscratch, Assembler::greater, Assembler::pt, after_frame_check);
+  __ cmp_and_brx_short(SP, Rscratch, Assembler::greaterUnsigned, Assembler::pt, after_frame_check);
 
   // the stack will overflow, throw an exception
 
@@ -494,9 +494,6 @@
   // (gri - 2/25/2000)
 
 
-  const Address size_of_parameters(G5_method, Method::size_of_parameters_offset());
-  const Address size_of_locals    (G5_method, Method::size_of_locals_offset());
-  const Address max_stack         (G5_method, Method::max_stack_offset());
   int rounded_vm_local_words = round_to( frame::interpreter_frame_vm_local_words, WordsPerLong );
 
   const int extra_space =
@@ -506,11 +503,15 @@
     (native_call ? frame::interpreter_frame_extra_outgoing_argument_words : 0);
 
   const Register Glocals_size = G3;
+  const Register RconstMethod = Glocals_size;
   const Register Otmp1 = O3;
   const Register Otmp2 = O4;
   // Lscratch can't be used as a temporary because the call_stub uses
   // it to assert that the stack frame was setup correctly.
+  const Address constMethod       (G5_method, Method::const_offset());
+  const Address size_of_parameters(RconstMethod, ConstMethod::size_of_parameters_offset());
 
+  __ ld_ptr( constMethod, RconstMethod );
   __ lduh( size_of_parameters, Glocals_size);
 
   // Gargs points to first local + BytesPerWord
@@ -530,6 +531,8 @@
     //
     // Compute number of locals in method apart from incoming parameters
     //
+    const Address size_of_locals    (Otmp1, ConstMethod::size_of_locals_offset());
+    __ ld_ptr( constMethod, Otmp1 );
     __ lduh( size_of_locals, Otmp1 );
     __ sub( Otmp1, Glocals_size, Glocals_size );
     __ round_to( Glocals_size, WordsPerLong );
@@ -538,7 +541,8 @@
     // see if the frame is greater than one page in size. If so,
     // then we need to verify there is enough stack space remaining
     // Frame_size = (max_stack + extra_space) * BytesPerWord;
-    __ lduh( max_stack, Gframe_size );
+    __ ld_ptr( constMethod, Gframe_size );
+    __ lduh( Gframe_size, in_bytes(ConstMethod::max_stack_offset()), Gframe_size );
     __ add( Gframe_size, extra_space, Gframe_size );
     __ round_to( Gframe_size, WordsPerLong );
     __ sll( Gframe_size, Interpreter::logStackElementSize, Gframe_size);
@@ -1255,8 +1259,7 @@
   // make sure registers are different!
   assert_different_registers(G2_thread, G5_method, Gargs, Gtmp1, Gtmp2);
 
-  const Address size_of_parameters(G5_method, Method::size_of_parameters_offset());
-  const Address size_of_locals    (G5_method, Method::size_of_locals_offset());
+  const Address constMethod       (G5_method, Method::const_offset());
   // Seems like G5_method is live at the point this is used. So we could make this look consistent
   // and use in the asserts.
   const Address access_flags      (Lmethod,   Method::access_flags_offset());
@@ -1306,8 +1309,13 @@
   init_value = G0;
   Label clear_loop;
 
+  const Register RconstMethod = O1;
+  const Address size_of_parameters(RconstMethod, ConstMethod::size_of_parameters_offset());
+  const Address size_of_locals    (RconstMethod, ConstMethod::size_of_locals_offset());
+
   // NOTE: If you change the frame layout, this code will need to
   // be updated!
+  __ ld_ptr( constMethod, RconstMethod );
   __ lduh( size_of_locals, O2 );
   __ lduh( size_of_parameters, O1 );
   __ sll( O2, Interpreter::logStackElementSize, O2);
@@ -1822,9 +1830,13 @@
 
     const Register Gtmp1 = G3_scratch;
     const Register Gtmp2 = G1_scratch;
+    const Register RconstMethod = Gtmp1;
+    const Address constMethod(Lmethod, Method::const_offset());
+    const Address size_of_parameters(RconstMethod, ConstMethod::size_of_parameters_offset());
 
     // Compute size of arguments for saving when returning to deoptimized caller
-    __ lduh(Lmethod, in_bytes(Method::size_of_parameters_offset()), Gtmp1);
+    __ ld_ptr(constMethod, RconstMethod);
+    __ lduh(size_of_parameters, Gtmp1);
     __ sll(Gtmp1, Interpreter::logStackElementSize, Gtmp1);
     __ sub(Llocals, Gtmp1, Gtmp2);
     __ add(Gtmp2, wordSize, Gtmp2);
--- a/src/cpu/sparc/vm/templateTable_sparc.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/sparc/vm/templateTable_sparc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -3040,7 +3040,8 @@
   Register Rtemp = G4_scratch;
 
   // Load receiver from stack slot
-  __ lduh(G5_method, in_bytes(Method::size_of_parameters_offset()), G4_scratch);
+  __ ld_ptr(G5_method, in_bytes(Method::const_offset()), G4_scratch);
+  __ lduh(G4_scratch, in_bytes(ConstMethod::size_of_parameters_offset()), G4_scratch);
   __ load_receiver(G4_scratch, O0);
 
   // receiver NULL check
--- a/src/cpu/sparc/vm/vmStructs_sparc.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/sparc/vm/vmStructs_sparc.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -29,7 +29,7 @@
 // constants required by the Serviceability Agent. This file is
 // referenced by vmStructs.cpp.
 
-#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field, last_entry)            \
+#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)            \
  \
   /******************************/                                                                                                   \
   /* JavaCallWrapper            */                                                                                                   \
@@ -37,22 +37,12 @@
   /******************************/                                                                                                   \
   /* JavaFrameAnchor            */                                                                                                   \
   /******************************/                                                                                                   \
-  volatile_nonstatic_field(JavaFrameAnchor,     _flags,                                          int)                                \
-                                                                                                                                     \
+  volatile_nonstatic_field(JavaFrameAnchor,     _flags,                                          int)
 
-  /* NOTE that we do not use the last_entry() macro here; it is used  */
-  /* in vmStructs_<os>_<cpu>.hpp's VM_STRUCTS_OS_CPU macro (and must  */
-  /* be present there)                                                */
+#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
 
 
-#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type, last_entry)                               \
-
-  /* NOTE that we do not use the last_entry() macro here; it is used  */
-  /* in vmStructs_<os>_<cpu>.hpp's VM_TYPES_OS_CPU macro (and must    */
-  /* be present there)                                                */
-
-
-#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant, last_entry)                                                              \
+#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)                                                              \
   /******************************/                                        \
   /* Register numbers (C2 only) */                                        \
   /******************************/                                        \
@@ -90,15 +80,6 @@
   declare_c2_constant(R_G6_num)                                           \
   declare_c2_constant(R_G7_num)
 
-
-  /* NOTE that we do not use the last_entry() macro here; it is used       */
-  /* in vmStructs_<os>_<cpu>.hpp's VM_INT_CONSTANTS_OS_CPU macro (and must */
-  /* be present there)                                                     */
-
-#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant, last_entry)                                                              \
-
-  /* NOTE that we do not use the last_entry() macro here; it is used        */
-  /* in vmStructs_<os>_<cpu>.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and must */
-  /* be present there)                                                      */
+#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
 
 #endif // CPU_SPARC_VM_VMSTRUCTS_SPARC_HPP
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,7 +23,7 @@
  */
 
 #include "precompiled.hpp"
-#include "assembler_sparc.inline.hpp"
+#include "asm/macroAssembler.inline.hpp"
 #include "memory/resourceArea.hpp"
 #include "runtime/java.hpp"
 #include "runtime/stubCodeGenerator.hpp"
@@ -259,6 +259,10 @@
   if (!has_vis1()) // Drop to 0 if no VIS1 support
     UseVIS = 0;
 
+  if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
+    (cache_line_size > ContendedPaddingWidth))
+    ContendedPaddingWidth = cache_line_size;
+
 #ifndef PRODUCT
   if (PrintMiscellaneous && Verbose) {
     tty->print("Allocation");
@@ -286,6 +290,9 @@
     if (PrefetchFieldsAhead > 0) {
       tty->print_cr("PrefetchFieldsAhead %d", PrefetchFieldsAhead);
     }
+    if (ContendedPaddingWidth > 0) {
+      tty->print_cr("ContendedPaddingWidth %d", ContendedPaddingWidth);
+    }
   }
 #endif // PRODUCT
 }
--- a/src/cpu/sparc/vm/vmreg_sparc.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/sparc/vm/vmreg_sparc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,7 +23,6 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
 #include "code/vmreg.hpp"
 
 
--- a/src/cpu/sparc/vm/vtableStubs_sparc.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/sparc/vm/vtableStubs_sparc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,8 +23,7 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
-#include "assembler_sparc.inline.hpp"
+#include "asm/macroAssembler.inline.hpp"
 #include "code/vtableStubs.hpp"
 #include "interp_masm_sparc.hpp"
 #include "memory/resourceArea.hpp"
--- a/src/cpu/x86/vm/assembler_x86.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,7 +23,8 @@
  */
 
 #include "precompiled.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/assembler.hpp"
+#include "asm/assembler.inline.hpp"
 #include "gc_interface/collectedHeap.inline.hpp"
 #include "interpreter/interpreter.hpp"
 #include "memory/cardTableModRefBS.hpp"
@@ -181,7 +182,7 @@
 // make this go away someday
 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
   if (rtype == relocInfo::none)
-        emit_long(data);
+        emit_int32(data);
   else  emit_data(data, Relocation::spec_simple(rtype), format);
 }
 
@@ -201,7 +202,7 @@
     else
       code_section()->relocate(inst_mark(), rspec, format);
   }
-  emit_long(data);
+  emit_int32(data);
 }
 
 static int encode(Register r) {
@@ -225,9 +226,9 @@
   assert(isByte(op1) && isByte(op2), "wrong opcode");
   assert(isByte(imm8), "not a byte");
   assert((op1 & 0x01) == 0, "should be 8bit operation");
-  emit_byte(op1);
-  emit_byte(op2 | encode(dst));
-  emit_byte(imm8);
+  emit_int8(op1);
+  emit_int8(op2 | encode(dst));
+  emit_int8(imm8);
 }
 
 
@@ -236,13 +237,13 @@
   assert((op1 & 0x01) == 1, "should be 32bit operation");
   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
   if (is8bit(imm32)) {
-    emit_byte(op1 | 0x02); // set sign bit
-    emit_byte(op2 | encode(dst));
-    emit_byte(imm32 & 0xFF);
+    emit_int8(op1 | 0x02); // set sign bit
+    emit_int8(op2 | encode(dst));
+    emit_int8(imm32 & 0xFF);
   } else {
-    emit_byte(op1);
-    emit_byte(op2 | encode(dst));
-    emit_long(imm32);
+    emit_int8(op1);
+    emit_int8(op2 | encode(dst));
+    emit_int32(imm32);
   }
 }
 
@@ -251,9 +252,9 @@
   assert(isByte(op1) && isByte(op2), "wrong opcode");
   assert((op1 & 0x01) == 1, "should be 32bit operation");
   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
-  emit_byte(op1);
-  emit_byte(op2 | encode(dst));
-  emit_long(imm32);
+  emit_int8(op1);
+  emit_int8(op2 | encode(dst));
+  emit_int32(imm32);
 }
 
 // immediate-to-memory forms
@@ -261,21 +262,21 @@
   assert((op1 & 0x01) == 1, "should be 32bit operation");
   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
   if (is8bit(imm32)) {
-    emit_byte(op1 | 0x02); // set sign bit
+    emit_int8(op1 | 0x02); // set sign bit
     emit_operand(rm, adr, 1);
-    emit_byte(imm32 & 0xFF);
+    emit_int8(imm32 & 0xFF);
   } else {
-    emit_byte(op1);
+    emit_int8(op1);
     emit_operand(rm, adr, 4);
-    emit_long(imm32);
+    emit_int32(imm32);
   }
 }
 
 
 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
   assert(isByte(op1) && isByte(op2), "wrong opcode");
-  emit_byte(op1);
-  emit_byte(op2 | encode(dst) << 3 | encode(src));
+  emit_int8(op1);
+  emit_int8(op2 | encode(dst) << 3 | encode(src));
 }
 
 
@@ -300,21 +301,21 @@
         // [base + index*scale]
         // [00 reg 100][ss index base]
         assert(index != rsp, "illegal addressing mode");
-        emit_byte(0x04 | regenc);
-        emit_byte(scale << 6 | indexenc | baseenc);
+        emit_int8(0x04 | regenc);
+        emit_int8(scale << 6 | indexenc | baseenc);
       } else if (is8bit(disp) && rtype == relocInfo::none) {
         // [base + index*scale + imm8]
         // [01 reg 100][ss index base] imm8
         assert(index != rsp, "illegal addressing mode");
-        emit_byte(0x44 | regenc);
-        emit_byte(scale << 6 | indexenc | baseenc);
-        emit_byte(disp & 0xFF);
+        emit_int8(0x44 | regenc);
+        emit_int8(scale << 6 | indexenc | baseenc);
+        emit_int8(disp & 0xFF);
       } else {
         // [base + index*scale + disp32]
         // [10 reg 100][ss index base] disp32
         assert(index != rsp, "illegal addressing mode");
-        emit_byte(0x84 | regenc);
-        emit_byte(scale << 6 | indexenc | baseenc);
+        emit_int8(0x84 | regenc);
+        emit_int8(scale << 6 | indexenc | baseenc);
         emit_data(disp, rspec, disp32_operand);
       }
     } else if (base == rsp LP64_ONLY(|| base == r12)) {
@@ -322,19 +323,19 @@
       if (disp == 0 && rtype == relocInfo::none) {
         // [rsp]
         // [00 reg 100][00 100 100]
-        emit_byte(0x04 | regenc);
-        emit_byte(0x24);
+        emit_int8(0x04 | regenc);
+        emit_int8(0x24);
       } else if (is8bit(disp) && rtype == relocInfo::none) {
         // [rsp + imm8]
         // [01 reg 100][00 100 100] disp8
-        emit_byte(0x44 | regenc);
-        emit_byte(0x24);
-        emit_byte(disp & 0xFF);
+        emit_int8(0x44 | regenc);
+        emit_int8(0x24);
+        emit_int8(disp & 0xFF);
       } else {
         // [rsp + imm32]
         // [10 reg 100][00 100 100] disp32
-        emit_byte(0x84 | regenc);
-        emit_byte(0x24);
+        emit_int8(0x84 | regenc);
+        emit_int8(0x24);
         emit_data(disp, rspec, disp32_operand);
       }
     } else {
@@ -344,16 +345,16 @@
           base != rbp LP64_ONLY(&& base != r13)) {
         // [base]
         // [00 reg base]
-        emit_byte(0x00 | regenc | baseenc);
+        emit_int8(0x00 | regenc | baseenc);
       } else if (is8bit(disp) && rtype == relocInfo::none) {
         // [base + disp8]
         // [01 reg base] disp8
-        emit_byte(0x40 | regenc | baseenc);
-        emit_byte(disp & 0xFF);
+        emit_int8(0x40 | regenc | baseenc);
+        emit_int8(disp & 0xFF);
       } else {
         // [base + disp32]
         // [10 reg base] disp32
-        emit_byte(0x80 | regenc | baseenc);
+        emit_int8(0x80 | regenc | baseenc);
         emit_data(disp, rspec, disp32_operand);
       }
     }
@@ -363,14 +364,14 @@
       // [index*scale + disp]
       // [00 reg 100][ss index 101] disp32
       assert(index != rsp, "illegal addressing mode");
-      emit_byte(0x04 | regenc);
-      emit_byte(scale << 6 | indexenc | 0x05);
+      emit_int8(0x04 | regenc);
+      emit_int8(scale << 6 | indexenc | 0x05);
       emit_data(disp, rspec, disp32_operand);
     } else if (rtype != relocInfo::none ) {
       // [disp] (64bit) RIP-RELATIVE (32bit) abs
       // [00 000 101] disp32
 
-      emit_byte(0x05 | regenc);
+      emit_int8(0x05 | regenc);
       // Note that the RIP-rel. correction applies to the generated
       // disp field, but _not_ to the target address in the rspec.
 
@@ -390,8 +391,8 @@
       // 32bit never did this, did everything as the rip-rel/disp code above
       // [disp] ABSOLUTE
       // [00 reg 100][00 100 101] disp32
-      emit_byte(0x04 | regenc);
-      emit_byte(0x25);
+      emit_int8(0x04 | regenc);
+      emit_int8(0x25);
       emit_data(disp, rspec, disp32_operand);
     }
   }
@@ -882,8 +883,8 @@
 void Assembler::emit_farith(int b1, int b2, int i) {
   assert(isByte(b1) && isByte(b2), "wrong opcode");
   assert(0 <= i &&  i < 8, "illegal stack offset");
-  emit_byte(b1);
-  emit_byte(b2 + i);
+  emit_int8(b1);
+  emit_int8(b2 + i);
 }
 
 
@@ -898,7 +899,7 @@
 void Assembler::adcl(Address dst, Register src) {
   InstructionMark im(this);
   prefix(dst, src);
-  emit_byte(0x11);
+  emit_int8(0x11);
   emit_operand(src, dst);
 }
 
@@ -910,7 +911,7 @@
 void Assembler::adcl(Register dst, Address src) {
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x13);
+  emit_int8(0x13);
   emit_operand(dst, src);
 }
 
@@ -928,7 +929,7 @@
 void Assembler::addl(Address dst, Register src) {
   InstructionMark im(this);
   prefix(dst, src);
-  emit_byte(0x01);
+  emit_int8(0x01);
   emit_operand(src, dst);
 }
 
@@ -940,7 +941,7 @@
 void Assembler::addl(Register dst, Address src) {
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x03);
+  emit_int8(0x03);
   emit_operand(dst, src);
 }
 
@@ -952,39 +953,41 @@
 void Assembler::addr_nop_4() {
   assert(UseAddressNop, "no CPU support");
   // 4 bytes: NOP DWORD PTR [EAX+0]
-  emit_byte(0x0F);
-  emit_byte(0x1F);
-  emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
-  emit_byte(0);    // 8-bits offset (1 byte)
+  emit_int8(0x0F);
+  emit_int8(0x1F);
+  emit_int8(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
+  emit_int8(0);    // 8-bits offset (1 byte)
 }
 
 void Assembler::addr_nop_5() {
   assert(UseAddressNop, "no CPU support");
   // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
-  emit_byte(0x0F);
-  emit_byte(0x1F);
-  emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
-  emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
-  emit_byte(0);    // 8-bits offset (1 byte)
+  emit_int8(0x0F);
+  emit_int8(0x1F);
+  emit_int8(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
+  emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
+  emit_int8(0);    // 8-bits offset (1 byte)
 }
 
 void Assembler::addr_nop_7() {
   assert(UseAddressNop, "no CPU support");
   // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
-  emit_byte(0x0F);
-  emit_byte(0x1F);
-  emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
-  emit_long(0);    // 32-bits offset (4 bytes)
+  emit_int8(0x0F);
+  emit_int8(0x1F);
+  emit_int8((unsigned char)0x80);
+                   // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
+  emit_int32(0);   // 32-bits offset (4 bytes)
 }
 
 void Assembler::addr_nop_8() {
   assert(UseAddressNop, "no CPU support");
   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
-  emit_byte(0x0F);
-  emit_byte(0x1F);
-  emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
-  emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
-  emit_long(0);    // 32-bits offset (4 bytes)
+  emit_int8(0x0F);
+  emit_int8(0x1F);
+  emit_int8((unsigned char)0x84);
+                   // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
+  emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
+  emit_int32(0);   // 32-bits offset (4 bytes)
 }
 
 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
@@ -1007,12 +1010,73 @@
   emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
 }
 
+void Assembler::aesdec(XMMRegister dst, Address src) {
+  assert(VM_Version::supports_aes(), "");
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  emit_int8((unsigned char)0xDE);
+  emit_operand(dst, src);
+}
+
+void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
+  assert(VM_Version::supports_aes(), "");
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  emit_int8((unsigned char)0xDE);
+  emit_int8(0xC0 | encode);
+}
+
+void Assembler::aesdeclast(XMMRegister dst, Address src) {
+  assert(VM_Version::supports_aes(), "");
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  emit_int8((unsigned char)0xDF);
+  emit_operand(dst, src);
+}
+
+void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
+  assert(VM_Version::supports_aes(), "");
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  emit_int8((unsigned char)0xDF);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::aesenc(XMMRegister dst, Address src) {
+  assert(VM_Version::supports_aes(), "");
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  emit_int8((unsigned char)0xDC);
+  emit_operand(dst, src);
+}
+
+void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
+  assert(VM_Version::supports_aes(), "");
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  emit_int8((unsigned char)0xDC);
+  emit_int8(0xC0 | encode);
+}
+
+void Assembler::aesenclast(XMMRegister dst, Address src) {
+  assert(VM_Version::supports_aes(), "");
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  emit_int8((unsigned char)0xDD);
+  emit_operand(dst, src);
+}
+
+void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
+  assert(VM_Version::supports_aes(), "");
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  emit_int8((unsigned char)0xDD);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+
 void Assembler::andl(Address dst, int32_t imm32) {
   InstructionMark im(this);
   prefix(dst);
-  emit_byte(0x81);
+  emit_int8((unsigned char)0x81);
   emit_operand(rsp, dst, 4);
-  emit_long(imm32);
+  emit_int32(imm32);
 }
 
 void Assembler::andl(Register dst, int32_t imm32) {
@@ -1023,7 +1087,7 @@
 void Assembler::andl(Register dst, Address src) {
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x23);
+  emit_int8(0x23);
   emit_operand(dst, src);
 }
 
@@ -1034,23 +1098,23 @@
 
 void Assembler::bsfl(Register dst, Register src) {
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xBC);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBC);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::bsrl(Register dst, Register src) {
   assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xBD);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBD);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::bswapl(Register reg) { // bswap
   int encode = prefix_and_encode(reg->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xC8 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)(0xC8 | encode));
 }
 
 void Assembler::call(Label& L, relocInfo::relocType rtype) {
@@ -1063,37 +1127,37 @@
     assert(offs <= 0, "assembler error");
     InstructionMark im(this);
     // 1110 1000 #32-bit disp
-    emit_byte(0xE8);
+    emit_int8((unsigned char)0xE8);
     emit_data(offs - long_size, rtype, operand);
   } else {
     InstructionMark im(this);
     // 1110 1000 #32-bit disp
     L.add_patch_at(code(), locator());
 
-    emit_byte(0xE8);
+    emit_int8((unsigned char)0xE8);
     emit_data(int(0), rtype, operand);
   }
 }
 
 void Assembler::call(Register dst) {
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xFF);
-  emit_byte(0xD0 | encode);
+  emit_int8((unsigned char)0xFF);
+  emit_int8((unsigned char)(0xD0 | encode));
 }
 
 
 void Assembler::call(Address adr) {
   InstructionMark im(this);
   prefix(adr);
-  emit_byte(0xFF);
+  emit_int8((unsigned char)0xFF);
   emit_operand(rdx, adr);
 }
 
 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
   assert(entry != NULL, "call most probably wrong");
   InstructionMark im(this);
-  emit_byte(0xE8);
-  intptr_t disp = entry - (_code_pos + sizeof(int32_t));
+  emit_int8((unsigned char)0xE8);
+  intptr_t disp = entry - (pc() + sizeof(int32_t));
   assert(is_simm32(disp), "must be 32bit offset (call2)");
   // Technically, should use call32_operand, but this format is
   // implied by the fact that we're emitting a call instruction.
@@ -1103,40 +1167,44 @@
 }
 
 void Assembler::cdql() {
-  emit_byte(0x99);
+  emit_int8((unsigned char)0x99);
+}
+
+void Assembler::cld() {
+  emit_int8((unsigned char)0xFC);
 }
 
 void Assembler::cmovl(Condition cc, Register dst, Register src) {
   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0x40 | cc);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8(0x40 | cc);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 
 void Assembler::cmovl(Condition cc, Register dst, Address src) {
   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
   prefix(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0x40 | cc);
+  emit_int8(0x0F);
+  emit_int8(0x40 | cc);
   emit_operand(dst, src);
 }
 
 void Assembler::cmpb(Address dst, int imm8) {
   InstructionMark im(this);
   prefix(dst);
-  emit_byte(0x80);
+  emit_int8((unsigned char)0x80);
   emit_operand(rdi, dst, 1);
-  emit_byte(imm8);
+  emit_int8(imm8);
 }
 
 void Assembler::cmpl(Address dst, int32_t imm32) {
   InstructionMark im(this);
   prefix(dst);
-  emit_byte(0x81);
+  emit_int8((unsigned char)0x81);
   emit_operand(rdi, dst, 4);
-  emit_long(imm32);
+  emit_int32(imm32);
 }
 
 void Assembler::cmpl(Register dst, int32_t imm32) {
@@ -1153,17 +1221,17 @@
 void Assembler::cmpl(Register dst, Address  src) {
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x3B);
+  emit_int8((unsigned char)0x3B);
   emit_operand(dst, src);
 }
 
 void Assembler::cmpw(Address dst, int imm16) {
   InstructionMark im(this);
   assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
-  emit_byte(0x66);
-  emit_byte(0x81);
+  emit_int8(0x66);
+  emit_int8((unsigned char)0x81);
   emit_operand(rdi, dst, 2);
-  emit_word(imm16);
+  emit_int16(imm16);
 }
 
 // The 32-bit cmpxchg compares the value at adr with the contents of rax,
@@ -1172,8 +1240,8 @@
 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
   InstructionMark im(this);
   prefix(adr, reg);
-  emit_byte(0x0F);
-  emit_byte(0xB1);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xB1);
   emit_operand(reg, adr);
 }
 
@@ -1199,6 +1267,11 @@
   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
 }
 
+void Assembler::cpuid() {
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xA2);
+}
+
 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3);
@@ -1222,8 +1295,8 @@
 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x2A);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x2A);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
@@ -1234,8 +1307,8 @@
 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x2A);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x2A);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
@@ -1257,22 +1330,22 @@
 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2);
-  emit_byte(0x2C);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x2C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
-  emit_byte(0x2C);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x2C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::decl(Address dst) {
   // Don't use it directly. Use MacroAssembler::decrement() instead.
   InstructionMark im(this);
   prefix(dst);
-  emit_byte(0xFF);
+  emit_int8((unsigned char)0xFF);
   emit_operand(rcx, dst);
 }
 
@@ -1298,44 +1371,44 @@
 
 void Assembler::emms() {
   NOT_LP64(assert(VM_Version::supports_mmx(), ""));
-  emit_byte(0x0F);
-  emit_byte(0x77);
+  emit_int8(0x0F);
+  emit_int8(0x77);
 }
 
 void Assembler::hlt() {
-  emit_byte(0xF4);
+  emit_int8((unsigned char)0xF4);
 }
 
 void Assembler::idivl(Register src) {
   int encode = prefix_and_encode(src->encoding());
-  emit_byte(0xF7);
-  emit_byte(0xF8 | encode);
+  emit_int8((unsigned char)0xF7);
+  emit_int8((unsigned char)(0xF8 | encode));
 }
 
 void Assembler::divl(Register src) { // Unsigned
   int encode = prefix_and_encode(src->encoding());
-  emit_byte(0xF7);
-  emit_byte(0xF0 | encode);
+  emit_int8((unsigned char)0xF7);
+  emit_int8((unsigned char)(0xF0 | encode));
 }
 
 void Assembler::imull(Register dst, Register src) {
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xAF);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xAF);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 
 void Assembler::imull(Register dst, Register src, int value) {
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
   if (is8bit(value)) {
-    emit_byte(0x6B);
-    emit_byte(0xC0 | encode);
-    emit_byte(value & 0xFF);
+    emit_int8(0x6B);
+    emit_int8((unsigned char)(0xC0 | encode));
+    emit_int8(value & 0xFF);
   } else {
-    emit_byte(0x69);
-    emit_byte(0xC0 | encode);
-    emit_long(value);
+    emit_int8(0x69);
+    emit_int8((unsigned char)(0xC0 | encode));
+    emit_int32(value);
   }
 }
 
@@ -1343,7 +1416,7 @@
   // Don't use it directly. Use MacroAssembler::increment() instead.
   InstructionMark im(this);
   prefix(dst);
-  emit_byte(0xFF);
+  emit_int8((unsigned char)0xFF);
   emit_operand(rax, dst);
 }
 
@@ -1356,18 +1429,18 @@
 
     const int short_size = 2;
     const int long_size = 6;
-    intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos;
+    intptr_t offs = (intptr_t)dst - (intptr_t)pc();
     if (maybe_short && is8bit(offs - short_size)) {
       // 0111 tttn #8-bit disp
-      emit_byte(0x70 | cc);
-      emit_byte((offs - short_size) & 0xFF);
+      emit_int8(0x70 | cc);
+      emit_int8((offs - short_size) & 0xFF);
     } else {
       // 0000 1111 1000 tttn #32-bit disp
       assert(is_simm32(offs - long_size),
              "must be 32bit offset (call4)");
-      emit_byte(0x0F);
-      emit_byte(0x80 | cc);
-      emit_long(offs - long_size);
+      emit_int8(0x0F);
+      emit_int8((unsigned char)(0x80 | cc));
+      emit_int32(offs - long_size);
     }
   } else {
     // Note: could eliminate cond. jumps to this jump if condition
@@ -1375,9 +1448,9 @@
     // Note: use jccb() if label to be bound is very close to get
     //       an 8-bit displacement
     L.add_patch_at(code(), locator());
-    emit_byte(0x0F);
-    emit_byte(0x80 | cc);
-    emit_long(0);
+    emit_int8(0x0F);
+    emit_int8((unsigned char)(0x80 | cc));
+    emit_int32(0);
   }
 }
 
@@ -1386,29 +1459,29 @@
     const int short_size = 2;
     address entry = target(L);
 #ifdef ASSERT
-    intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size);
+    intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
     intptr_t delta = short_branch_delta();
     if (delta != 0) {
       dist += (dist < 0 ? (-delta) :delta);
     }
     assert(is8bit(dist), "Dispacement too large for a short jmp");
 #endif
-    intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos;
+    intptr_t offs = (intptr_t)entry - (intptr_t)pc();
     // 0111 tttn #8-bit disp
-    emit_byte(0x70 | cc);
-    emit_byte((offs - short_size) & 0xFF);
+    emit_int8(0x70 | cc);
+    emit_int8((offs - short_size) & 0xFF);
   } else {
     InstructionMark im(this);
     L.add_patch_at(code(), locator());
-    emit_byte(0x70 | cc);
-    emit_byte(0);
+    emit_int8(0x70 | cc);
+    emit_int8(0);
   }
 }
 
 void Assembler::jmp(Address adr) {
   InstructionMark im(this);
   prefix(adr);
-  emit_byte(0xFF);
+  emit_int8((unsigned char)0xFF);
   emit_operand(rsp, adr);
 }
 
@@ -1419,13 +1492,13 @@
     InstructionMark im(this);
     const int short_size = 2;
     const int long_size = 5;
-    intptr_t offs = entry - _code_pos;
+    intptr_t offs = entry - pc();
     if (maybe_short && is8bit(offs - short_size)) {
-      emit_byte(0xEB);
-      emit_byte((offs - short_size) & 0xFF);
+      emit_int8((unsigned char)0xEB);
+      emit_int8((offs - short_size) & 0xFF);
     } else {
-      emit_byte(0xE9);
-      emit_long(offs - long_size);
+      emit_int8((unsigned char)0xE9);
+      emit_int32(offs - long_size);
     }
   } else {
     // By default, forward jumps are always 32-bit displacements, since
@@ -1434,22 +1507,22 @@
     // force an 8-bit displacement.
     InstructionMark im(this);
     L.add_patch_at(code(), locator());
-    emit_byte(0xE9);
-    emit_long(0);
+    emit_int8((unsigned char)0xE9);
+    emit_int32(0);
   }
 }
 
 void Assembler::jmp(Register entry) {
   int encode = prefix_and_encode(entry->encoding());
-  emit_byte(0xFF);
-  emit_byte(0xE0 | encode);
+  emit_int8((unsigned char)0xFF);
+  emit_int8((unsigned char)(0xE0 | encode));
 }
 
 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
   InstructionMark im(this);
-  emit_byte(0xE9);
+  emit_int8((unsigned char)0xE9);
   assert(dest != NULL, "must have a target");
-  intptr_t disp = dest - (_code_pos + sizeof(int32_t));
+  intptr_t disp = dest - (pc() + sizeof(int32_t));
   assert(is_simm32(disp), "must be 32bit offset (jmp)");
   emit_data(disp, rspec.reloc(), call32_operand);
 }
@@ -1460,21 +1533,21 @@
     address entry = target(L);
     assert(entry != NULL, "jmp most probably wrong");
 #ifdef ASSERT
-    intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size);
+    intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
     intptr_t delta = short_branch_delta();
     if (delta != 0) {
       dist += (dist < 0 ? (-delta) :delta);
     }
     assert(is8bit(dist), "Dispacement too large for a short jmp");
 #endif
-    intptr_t offs = entry - _code_pos;
-    emit_byte(0xEB);
-    emit_byte((offs - short_size) & 0xFF);
+    intptr_t offs = entry - pc();
+    emit_int8((unsigned char)0xEB);
+    emit_int8((offs - short_size) & 0xFF);
   } else {
     InstructionMark im(this);
     L.add_patch_at(code(), locator());
-    emit_byte(0xEB);
-    emit_byte(0);
+    emit_int8((unsigned char)0xEB);
+    emit_int8(0);
   }
 }
 
@@ -1482,40 +1555,46 @@
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   InstructionMark im(this);
   prefix(src);
-  emit_byte(0x0F);
-  emit_byte(0xAE);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xAE);
   emit_operand(as_Register(2), src);
 }
 
 void Assembler::leal(Register dst, Address src) {
   InstructionMark im(this);
 #ifdef _LP64
-  emit_byte(0x67); // addr32
+  emit_int8(0x67); // addr32
   prefix(src, dst);
 #endif // LP64
-  emit_byte(0x8D);
+  emit_int8((unsigned char)0x8D);
   emit_operand(dst, src);
 }
 
+void Assembler::lfence() {
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xAE);
+  emit_int8((unsigned char)0xE8);
+}
+
 void Assembler::lock() {
-  emit_byte(0xF0);
+  emit_int8((unsigned char)0xF0);
 }
 
 void Assembler::lzcntl(Register dst, Register src) {
   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
-  emit_byte(0xF3);
+  emit_int8((unsigned char)0xF3);
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xBD);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBD);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 // Emit mfence instruction
 void Assembler::mfence() {
   NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
-  emit_byte( 0x0F );
-  emit_byte( 0xAE );
-  emit_byte( 0xF0 );
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xAE);
+  emit_int8((unsigned char)0xF0);
 }
 
 void Assembler::mov(Register dst, Register src) {
@@ -1535,15 +1614,15 @@
 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE);
-  emit_byte(0x16);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x16);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movb(Register dst, Address src) {
   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
   InstructionMark im(this);
   prefix(src, dst, true);
-  emit_byte(0x8A);
+  emit_int8((unsigned char)0x8A);
   emit_operand(dst, src);
 }
 
@@ -1551,9 +1630,9 @@
 void Assembler::movb(Address dst, int imm8) {
   InstructionMark im(this);
    prefix(dst);
-  emit_byte(0xC6);
+  emit_int8((unsigned char)0xC6);
   emit_operand(rax, dst, 1);
-  emit_byte(imm8);
+  emit_int8(imm8);
 }
 
 
@@ -1561,30 +1640,30 @@
   assert(src->has_byte_register(), "must have byte register");
   InstructionMark im(this);
   prefix(dst, src, true);
-  emit_byte(0x88);
+  emit_int8((unsigned char)0x88);
   emit_operand(src, dst);
 }
 
 void Assembler::movdl(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
-  emit_byte(0x6E);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x6E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movdl(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // swap src/dst to get correct prefix
   int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66);
-  emit_byte(0x7E);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x7E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movdl(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_66);
-  emit_byte(0x6E);
+  emit_int8(0x6E);
   emit_operand(dst, src);
 }
 
@@ -1592,7 +1671,7 @@
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_66);
-  emit_byte(0x7E);
+  emit_int8(0x7E);
   emit_operand(src, dst);
 }
 
@@ -1615,7 +1694,7 @@
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_F3);
-  emit_byte(0x7F);
+  emit_int8(0x7F);
   emit_operand(src, dst);
 }
 
@@ -1624,8 +1703,8 @@
   assert(UseAVX, "");
   bool vector256 = true;
   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector256);
-  emit_byte(0x6F);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x6F);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vmovdqu(XMMRegister dst, Address src) {
@@ -1633,7 +1712,7 @@
   InstructionMark im(this);
   bool vector256 = true;
   vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector256);
-  emit_byte(0x6F);
+  emit_int8(0x6F);
   emit_operand(dst, src);
 }
 
@@ -1644,7 +1723,7 @@
   // swap src<->dst for encoding
   assert(src != xnoreg, "sanity");
   vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector256);
-  emit_byte(0x7F);
+  emit_int8(0x7F);
   emit_operand(src, dst);
 }
 
@@ -1652,35 +1731,35 @@
 
 void Assembler::movl(Register dst, int32_t imm32) {
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xB8 | encode);
-  emit_long(imm32);
+  emit_int8((unsigned char)(0xB8 | encode));
+  emit_int32(imm32);
 }
 
 void Assembler::movl(Register dst, Register src) {
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x8B);
-  emit_byte(0xC0 | encode);
+  emit_int8((unsigned char)0x8B);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movl(Register dst, Address src) {
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x8B);
+  emit_int8((unsigned char)0x8B);
   emit_operand(dst, src);
 }
 
 void Assembler::movl(Address dst, int32_t imm32) {
   InstructionMark im(this);
   prefix(dst);
-  emit_byte(0xC7);
+  emit_int8((unsigned char)0xC7);
   emit_operand(rax, dst, 4);
-  emit_long(imm32);
+  emit_int32(imm32);
 }
 
 void Assembler::movl(Address dst, Register src) {
   InstructionMark im(this);
   prefix(dst, src);
-  emit_byte(0x89);
+  emit_int8((unsigned char)0x89);
   emit_operand(src, dst);
 }
 
@@ -1694,15 +1773,15 @@
 
 void Assembler::movq( MMXRegister dst, Address src ) {
   assert( VM_Version::supports_mmx(), "" );
-  emit_byte(0x0F);
-  emit_byte(0x6F);
+  emit_int8(0x0F);
+  emit_int8(0x6F);
   emit_operand(dst, src);
 }
 
 void Assembler::movq( Address dst, MMXRegister src ) {
   assert( VM_Version::supports_mmx(), "" );
-  emit_byte(0x0F);
-  emit_byte(0x7F);
+  emit_int8(0x0F);
+  emit_int8(0x7F);
   // workaround gcc (3.2.1-7a) bug
   // In that version of gcc with only an emit_operand(MMX, Address)
   // gcc will tail jump and try and reverse the parameters completely
@@ -1716,7 +1795,7 @@
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_F3);
-  emit_byte(0x7E);
+  emit_int8(0x7E);
   emit_operand(dst, src);
 }
 
@@ -1724,24 +1803,24 @@
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_66);
-  emit_byte(0xD6);
+  emit_int8((unsigned char)0xD6);
   emit_operand(src, dst);
 }
 
 void Assembler::movsbl(Register dst, Address src) { // movsxb
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0xBE);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBE);
   emit_operand(dst, src);
 }
 
 void Assembler::movsbl(Register dst, Register src) { // movsxb
   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
   int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
-  emit_byte(0x0F);
-  emit_byte(0xBE);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBE);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
@@ -1758,7 +1837,7 @@
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_F2);
-  emit_byte(0x11);
+  emit_int8(0x11);
   emit_operand(src, dst);
 }
 
@@ -1776,93 +1855,93 @@
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_F3);
-  emit_byte(0x11);
+  emit_int8(0x11);
   emit_operand(src, dst);
 }
 
 void Assembler::movswl(Register dst, Address src) { // movsxw
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0xBF);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBF);
   emit_operand(dst, src);
 }
 
 void Assembler::movswl(Register dst, Register src) { // movsxw
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xBF);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBF);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movw(Address dst, int imm16) {
   InstructionMark im(this);
 
-  emit_byte(0x66); // switch to 16-bit mode
+  emit_int8(0x66); // switch to 16-bit mode
   prefix(dst);
-  emit_byte(0xC7);
+  emit_int8((unsigned char)0xC7);
   emit_operand(rax, dst, 2);
-  emit_word(imm16);
+  emit_int16(imm16);
 }
 
 void Assembler::movw(Register dst, Address src) {
   InstructionMark im(this);
-  emit_byte(0x66);
+  emit_int8(0x66);
   prefix(src, dst);
-  emit_byte(0x8B);
+  emit_int8((unsigned char)0x8B);
   emit_operand(dst, src);
 }
 
 void Assembler::movw(Address dst, Register src) {
   InstructionMark im(this);
-  emit_byte(0x66);
+  emit_int8(0x66);
   prefix(dst, src);
-  emit_byte(0x89);
+  emit_int8((unsigned char)0x89);
   emit_operand(src, dst);
 }
 
 void Assembler::movzbl(Register dst, Address src) { // movzxb
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0xB6);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xB6);
   emit_operand(dst, src);
 }
 
 void Assembler::movzbl(Register dst, Register src) { // movzxb
   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
   int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
-  emit_byte(0x0F);
-  emit_byte(0xB6);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xB6);
+  emit_int8(0xC0 | encode);
 }
 
 void Assembler::movzwl(Register dst, Address src) { // movzxw
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0xB7);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xB7);
   emit_operand(dst, src);
 }
 
 void Assembler::movzwl(Register dst, Register src) { // movzxw
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xB7);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xB7);
+  emit_int8(0xC0 | encode);
 }
 
 void Assembler::mull(Address src) {
   InstructionMark im(this);
   prefix(src);
-  emit_byte(0xF7);
+  emit_int8((unsigned char)0xF7);
   emit_operand(rsp, src);
 }
 
 void Assembler::mull(Register src) {
   int encode = prefix_and_encode(src->encoding());
-  emit_byte(0xF7);
-  emit_byte(0xE0 | encode);
+  emit_int8((unsigned char)0xF7);
+  emit_int8((unsigned char)(0xE0 | encode));
 }
 
 void Assembler::mulsd(XMMRegister dst, Address src) {
@@ -1887,8 +1966,8 @@
 
 void Assembler::negl(Register dst) {
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xF7);
-  emit_byte(0xD8 | encode);
+  emit_int8((unsigned char)0xF7);
+  emit_int8((unsigned char)(0xD8 | encode));
 }
 
 void Assembler::nop(int i) {
@@ -1899,7 +1978,7 @@
   // speed is not an issue so simply use the single byte traditional nop
   // to do alignment.
 
-  for (; i > 0 ; i--) emit_byte(0x90);
+  for (; i > 0 ; i--) emit_int8((unsigned char)0x90);
   return;
 
 #endif // ASSERT
@@ -1929,33 +2008,35 @@
     while(i >= 15) {
       // For Intel don't generate consecutive addess nops (mix with regular nops)
       i -= 15;
-      emit_byte(0x66);   // size prefix
-      emit_byte(0x66);   // size prefix
-      emit_byte(0x66);   // size prefix
+      emit_int8(0x66);   // size prefix
+      emit_int8(0x66);   // size prefix
+      emit_int8(0x66);   // size prefix
       addr_nop_8();
-      emit_byte(0x66);   // size prefix
-      emit_byte(0x66);   // size prefix
-      emit_byte(0x66);   // size prefix
-      emit_byte(0x90);   // nop
+      emit_int8(0x66);   // size prefix
+      emit_int8(0x66);   // size prefix
+      emit_int8(0x66);   // size prefix
+      emit_int8((unsigned char)0x90);
+                         // nop
     }
     switch (i) {
       case 14:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 13:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 12:
         addr_nop_8();
-        emit_byte(0x66); // size prefix
-        emit_byte(0x66); // size prefix
-        emit_byte(0x66); // size prefix
-        emit_byte(0x90); // nop
+        emit_int8(0x66); // size prefix
+        emit_int8(0x66); // size prefix
+        emit_int8(0x66); // size prefix
+        emit_int8((unsigned char)0x90);
+                         // nop
         break;
       case 11:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 10:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 9:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 8:
         addr_nop_8();
         break;
@@ -1963,7 +2044,7 @@
         addr_nop_7();
         break;
       case 6:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 5:
         addr_nop_5();
         break;
@@ -1972,11 +2053,12 @@
         break;
       case 3:
         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 2:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 1:
-        emit_byte(0x90); // nop
+        emit_int8((unsigned char)0x90);
+                         // nop
         break;
       default:
         assert(i == 0, " ");
@@ -2009,24 +2091,24 @@
 
     while(i >= 22) {
       i -= 11;
-      emit_byte(0x66); // size prefix
-      emit_byte(0x66); // size prefix
-      emit_byte(0x66); // size prefix
+      emit_int8(0x66); // size prefix
+      emit_int8(0x66); // size prefix
+      emit_int8(0x66); // size prefix
       addr_nop_8();
     }
     // Generate first nop for size between 21-12
     switch (i) {
       case 21:
         i -= 1;
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 20:
       case 19:
         i -= 1;
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 18:
       case 17:
         i -= 1;
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 16:
       case 15:
         i -= 8;
@@ -2039,7 +2121,7 @@
         break;
       case 12:
         i -= 6;
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
         addr_nop_5();
         break;
       default:
@@ -2049,11 +2131,11 @@
     // Generate second nop for size between 11-1
     switch (i) {
       case 11:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 10:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 9:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 8:
         addr_nop_8();
         break;
@@ -2061,7 +2143,7 @@
         addr_nop_7();
         break;
       case 6:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 5:
         addr_nop_5();
         break;
@@ -2070,11 +2152,12 @@
         break;
       case 3:
         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 2:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 1:
-        emit_byte(0x90); // nop
+        emit_int8((unsigned char)0x90);
+                         // nop
         break;
       default:
         assert(i == 0, " ");
@@ -2097,42 +2180,43 @@
   //
   while(i > 12) {
     i -= 4;
-    emit_byte(0x66); // size prefix
-    emit_byte(0x66);
-    emit_byte(0x66);
-    emit_byte(0x90); // nop
+    emit_int8(0x66); // size prefix
+    emit_int8(0x66);
+    emit_int8(0x66);
+    emit_int8((unsigned char)0x90);
+                     // nop
   }
   // 1 - 12 nops
   if(i > 8) {
     if(i > 9) {
       i -= 1;
-      emit_byte(0x66);
+      emit_int8(0x66);
     }
     i -= 3;
-    emit_byte(0x66);
-    emit_byte(0x66);
-    emit_byte(0x90);
+    emit_int8(0x66);
+    emit_int8(0x66);
+    emit_int8((unsigned char)0x90);
   }
   // 1 - 8 nops
   if(i > 4) {
     if(i > 6) {
       i -= 1;
-      emit_byte(0x66);
+      emit_int8(0x66);
     }
     i -= 3;
-    emit_byte(0x66);
-    emit_byte(0x66);
-    emit_byte(0x90);
+    emit_int8(0x66);
+    emit_int8(0x66);
+    emit_int8((unsigned char)0x90);
   }
   switch (i) {
     case 4:
-      emit_byte(0x66);
+      emit_int8(0x66);
     case 3:
-      emit_byte(0x66);
+      emit_int8(0x66);
     case 2:
-      emit_byte(0x66);
+      emit_int8(0x66);
     case 1:
-      emit_byte(0x90);
+      emit_int8((unsigned char)0x90);
       break;
     default:
       assert(i == 0, " ");
@@ -2141,8 +2225,8 @@
 
 void Assembler::notl(Register dst) {
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xF7);
-  emit_byte(0xD0 | encode );
+  emit_int8((unsigned char)0xF7);
+  emit_int8((unsigned char)(0xD0 | encode));
 }
 
 void Assembler::orl(Address dst, int32_t imm32) {
@@ -2159,7 +2243,7 @@
 void Assembler::orl(Register dst, Address src) {
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x0B);
+  emit_int8(0x0B);
   emit_operand(dst, src);
 }
 
@@ -2179,65 +2263,77 @@
   emit_simd_arith(0x67, dst, src, VEX_SIMD_66);
 }
 
+void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0x67, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, bool vector256) {
+    int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, true, vector256);
+    emit_int8(0x00);
+    emit_int8(0xC0 | encode);
+    emit_int8(imm8);
+}
+
 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
   assert(VM_Version::supports_sse4_2(), "");
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
-  emit_byte(0x61);
+  emit_int8(0x61);
   emit_operand(dst, src);
-  emit_byte(imm8);
+  emit_int8(imm8);
 }
 
 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_2(), "");
   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
-  emit_byte(0x61);
-  emit_byte(0xC0 | encode);
-  emit_byte(imm8);
+  emit_int8(0x61);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(imm8);
 }
 
 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
   assert(VM_Version::supports_sse4_1(), "");
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0x30);
+  emit_int8(0x30);
   emit_operand(dst, src);
 }
 
 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_sse4_1(), "");
   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0x30);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x30);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 // generic
 void Assembler::pop(Register dst) {
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0x58 | encode);
+  emit_int8(0x58 | encode);
 }
 
 void Assembler::popcntl(Register dst, Address src) {
   assert(VM_Version::supports_popcnt(), "must support");
   InstructionMark im(this);
-  emit_byte(0xF3);
+  emit_int8((unsigned char)0xF3);
   prefix(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0xB8);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xB8);
   emit_operand(dst, src);
 }
 
 void Assembler::popcntl(Register dst, Register src) {
   assert(VM_Version::supports_popcnt(), "must support");
-  emit_byte(0xF3);
+  emit_int8((unsigned char)0xF3);
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xB8);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xB8);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::popf() {
-  emit_byte(0x9D);
+  emit_int8((unsigned char)0x9D);
 }
 
 #ifndef _LP64 // no 32bit push/pop on amd64
@@ -2245,21 +2341,21 @@
   // NOTE: this will adjust stack by 8byte on 64bits
   InstructionMark im(this);
   prefix(dst);
-  emit_byte(0x8F);
+  emit_int8((unsigned char)0x8F);
   emit_operand(rax, dst);
 }
 #endif
 
 void Assembler::prefetch_prefix(Address src) {
   prefix(src);
-  emit_byte(0x0F);
+  emit_int8(0x0F);
 }
 
 void Assembler::prefetchnta(Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
   InstructionMark im(this);
   prefetch_prefix(src);
-  emit_byte(0x18);
+  emit_int8(0x18);
   emit_operand(rax, src); // 0, src
 }
 
@@ -2267,7 +2363,7 @@
   assert(VM_Version::supports_3dnow_prefetch(), "must support");
   InstructionMark im(this);
   prefetch_prefix(src);
-  emit_byte(0x0D);
+  emit_int8(0x0D);
   emit_operand(rax, src); // 0, src
 }
 
@@ -2275,7 +2371,7 @@
   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
   InstructionMark im(this);
   prefetch_prefix(src);
-  emit_byte(0x18);
+  emit_int8(0x18);
   emit_operand(rcx, src); // 1, src
 }
 
@@ -2283,7 +2379,7 @@
   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
   InstructionMark im(this);
   prefetch_prefix(src);
-  emit_byte(0x18);
+  emit_int8(0x18);
   emit_operand(rdx, src); // 2, src
 }
 
@@ -2291,7 +2387,7 @@
   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
   InstructionMark im(this);
   prefetch_prefix(src);
-  emit_byte(0x18);
+  emit_int8(0x18);
   emit_operand(rbx, src); // 3, src
 }
 
@@ -2299,19 +2395,34 @@
   assert(VM_Version::supports_3dnow_prefetch(), "must support");
   InstructionMark im(this);
   prefetch_prefix(src);
-  emit_byte(0x0D);
+  emit_int8(0x0D);
   emit_operand(rcx, src); // 1, src
 }
 
 void Assembler::prefix(Prefix p) {
-  a_byte(p);
+  emit_int8(p);
+}
+
+void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
+  assert(VM_Version::supports_ssse3(), "");
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  emit_int8(0x00);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::pshufb(XMMRegister dst, Address src) {
+  assert(VM_Version::supports_ssse3(), "");
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  emit_int8(0x00);
+  emit_operand(dst, src);
 }
 
 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66);
-  emit_byte(mode & 0xFF);
+  emit_int8(mode & 0xFF);
 
 }
 
@@ -2321,16 +2432,16 @@
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_66);
-  emit_byte(0x70);
+  emit_int8(0x70);
   emit_operand(dst, src);
-  emit_byte(mode & 0xFF);
+  emit_int8(mode & 0xFF);
 }
 
 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2);
-  emit_byte(mode & 0xFF);
+  emit_int8(mode & 0xFF);
 }
 
 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
@@ -2339,18 +2450,18 @@
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_F2);
-  emit_byte(0x70);
+  emit_int8(0x70);
   emit_operand(dst, src);
-  emit_byte(mode & 0xFF);
+  emit_int8(mode & 0xFF);
 }
 
 void Assembler::psrldq(XMMRegister dst, int shift) {
   // Shift 128 bit value in xmm register by number of bytes.
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66);
-  emit_byte(0x73);
-  emit_byte(0xC0 | encode);
-  emit_byte(shift);
+  emit_int8(0x73);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(shift);
 }
 
 void Assembler::ptest(XMMRegister dst, Address src) {
@@ -2358,15 +2469,35 @@
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0x17);
+  emit_int8(0x17);
   emit_operand(dst, src);
 }
 
 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_sse4_1(), "");
   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0x17);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x17);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::vptest(XMMRegister dst, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  InstructionMark im(this);
+  bool vector256 = true;
+  assert(dst != xnoreg, "sanity");
+  int dst_enc = dst->encoding();
+  // swap src<->dst for encoding
+  vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
+  emit_int8(0x17);
+  emit_operand(dst, src);
+}
+
+void Assembler::vptest(XMMRegister dst, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  bool vector256 = true;
+  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
+  emit_int8(0x17);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::punpcklbw(XMMRegister dst, Address src) {
@@ -2399,18 +2530,18 @@
 void Assembler::push(int32_t imm32) {
   // in 64bits we push 64bits onto the stack but only
   // take a 32bit immediate
-  emit_byte(0x68);
-  emit_long(imm32);
+  emit_int8(0x68);
+  emit_int32(imm32);
 }
 
 void Assembler::push(Register src) {
   int encode = prefix_and_encode(src->encoding());
 
-  emit_byte(0x50 | encode);
+  emit_int8(0x50 | encode);
 }
 
 void Assembler::pushf() {
-  emit_byte(0x9C);
+  emit_int8((unsigned char)0x9C);
 }
 
 #ifndef _LP64 // no 32bit push/pop on amd64
@@ -2418,7 +2549,7 @@
   // Note this will push 64bit on 64bit
   InstructionMark im(this);
   prefix(src);
-  emit_byte(0xFF);
+  emit_int8((unsigned char)0xFF);
   emit_operand(rsi, src);
 }
 #endif
@@ -2427,58 +2558,64 @@
   assert(isShiftCount(imm8), "illegal shift count");
   int encode = prefix_and_encode(dst->encoding());
   if (imm8 == 1) {
-    emit_byte(0xD1);
-    emit_byte(0xD0 | encode);
+    emit_int8((unsigned char)0xD1);
+    emit_int8((unsigned char)(0xD0 | encode));
   } else {
-    emit_byte(0xC1);
-    emit_byte(0xD0 | encode);
-    emit_byte(imm8);
+    emit_int8((unsigned char)0xC1);
+    emit_int8((unsigned char)0xD0 | encode);
+    emit_int8(imm8);
   }
 }
 
 // copies data from [esi] to [edi] using rcx pointer sized words
 // generic
 void Assembler::rep_mov() {
-  emit_byte(0xF3);
+  emit_int8((unsigned char)0xF3);
   // MOVSQ
   LP64_ONLY(prefix(REX_W));
-  emit_byte(0xA5);
+  emit_int8((unsigned char)0xA5);
+}
+
+// sets rcx bytes with rax, value at [edi]
+void Assembler::rep_stosb() {
+  emit_int8((unsigned char)0xF3); // REP
+  LP64_ONLY(prefix(REX_W));
+  emit_int8((unsigned char)0xAA); // STOSB
 }
 
 // sets rcx pointer sized words with rax, value at [edi]
 // generic
-void Assembler::rep_set() { // rep_set
-  emit_byte(0xF3);
-  // STOSQ
-  LP64_ONLY(prefix(REX_W));
-  emit_byte(0xAB);
+void Assembler::rep_stos() {
+  emit_int8((unsigned char)0xF3); // REP
+  LP64_ONLY(prefix(REX_W));       // LP64:STOSQ, LP32:STOSD
+  emit_int8((unsigned char)0xAB);
 }
 
 // scans rcx pointer sized words at [edi] for occurance of rax,
 // generic
 void Assembler::repne_scan() { // repne_scan
-  emit_byte(0xF2);
+  emit_int8((unsigned char)0xF2);
   // SCASQ
   LP64_ONLY(prefix(REX_W));
-  emit_byte(0xAF);
+  emit_int8((unsigned char)0xAF);
 }
 
 #ifdef _LP64
 // scans rcx 4 byte words at [edi] for occurance of rax,
 // generic
 void Assembler::repne_scanl() { // repne_scan
-  emit_byte(0xF2);
+  emit_int8((unsigned char)0xF2);
   // SCASL
-  emit_byte(0xAF);
+  emit_int8((unsigned char)0xAF);
 }
 #endif
 
 void Assembler::ret(int imm16) {
   if (imm16 == 0) {
-    emit_byte(0xC3);
+    emit_int8((unsigned char)0xC3);
   } else {
-    emit_byte(0xC2);
-    emit_word(imm16);
+    emit_int8((unsigned char)0xC2);
+    emit_int16(imm16);
   }
 }
 
@@ -2487,26 +2624,26 @@
   // Not supported in 64bit mode
   ShouldNotReachHere();
 #endif
-  emit_byte(0x9E);
+  emit_int8((unsigned char)0x9E);
 }
 
 void Assembler::sarl(Register dst, int imm8) {
   int encode = prefix_and_encode(dst->encoding());
   assert(isShiftCount(imm8), "illegal shift count");
   if (imm8 == 1) {
-    emit_byte(0xD1);
-    emit_byte(0xF8 | encode);
+    emit_int8((unsigned char)0xD1);
+    emit_int8((unsigned char)(0xF8 | encode));
   } else {
-    emit_byte(0xC1);
-    emit_byte(0xF8 | encode);
-    emit_byte(imm8);
+    emit_int8((unsigned char)0xC1);
+    emit_int8((unsigned char)(0xF8 | encode));
+    emit_int8(imm8);
   }
 }
 
 void Assembler::sarl(Register dst) {
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xD3);
-  emit_byte(0xF8 | encode);
+  emit_int8((unsigned char)0xD3);
+  emit_int8((unsigned char)(0xF8 | encode));
 }
 
 void Assembler::sbbl(Address dst, int32_t imm32) {
@@ -2524,7 +2661,7 @@
 void Assembler::sbbl(Register dst, Address src) {
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x1B);
+  emit_int8(0x1B);
   emit_operand(dst, src);
 }
 
@@ -2536,47 +2673,47 @@
 void Assembler::setb(Condition cc, Register dst) {
   assert(0 <= cc && cc < 16, "illegal cc");
   int encode = prefix_and_encode(dst->encoding(), true);
-  emit_byte(0x0F);
-  emit_byte(0x90 | cc);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0x90 | cc);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::shll(Register dst, int imm8) {
   assert(isShiftCount(imm8), "illegal shift count");
   int encode = prefix_and_encode(dst->encoding());
   if (imm8 == 1 ) {
-    emit_byte(0xD1);
-    emit_byte(0xE0 | encode);
+    emit_int8((unsigned char)0xD1);
+    emit_int8((unsigned char)(0xE0 | encode));
   } else {
-    emit_byte(0xC1);
-    emit_byte(0xE0 | encode);
-    emit_byte(imm8);
+    emit_int8((unsigned char)0xC1);
+    emit_int8((unsigned char)(0xE0 | encode));
+    emit_int8(imm8);
   }
 }
 
 void Assembler::shll(Register dst) {
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xD3);
-  emit_byte(0xE0 | encode);
+  emit_int8((unsigned char)0xD3);
+  emit_int8((unsigned char)(0xE0 | encode));
 }
 
 void Assembler::shrl(Register dst, int imm8) {
   assert(isShiftCount(imm8), "illegal shift count");
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xC1);
-  emit_byte(0xE8 | encode);
-  emit_byte(imm8);
+  emit_int8((unsigned char)0xC1);
+  emit_int8((unsigned char)(0xE8 | encode));
+  emit_int8(imm8);
 }
 
 void Assembler::shrl(Register dst) {
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xD3);
-  emit_byte(0xE8 | encode);
+  emit_int8((unsigned char)0xD3);
+  emit_int8((unsigned char)(0xE8 | encode));
 }
 
 // copies a single word from [esi] to [edi]
 void Assembler::smovl() {
-  emit_byte(0xA5);
+  emit_int8((unsigned char)0xA5);
 }
 
 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
@@ -2594,6 +2731,10 @@
   emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
 }
 
+void Assembler::std() {
+  emit_int8((unsigned char)0xFD);
+}
+
 void Assembler::sqrtss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
@@ -2603,8 +2744,8 @@
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   InstructionMark im(this);
   prefix(dst);
-  emit_byte(0x0F);
-  emit_byte(0xAE);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xAE);
   emit_operand(as_Register(3), dst);
 }
 
@@ -2617,7 +2758,7 @@
 void Assembler::subl(Address dst, Register src) {
   InstructionMark im(this);
   prefix(dst, src);
-  emit_byte(0x29);
+  emit_int8(0x29);
   emit_operand(src, dst);
 }
 
@@ -2635,7 +2776,7 @@
 void Assembler::subl(Register dst, Address src) {
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x2B);
+  emit_int8(0x2B);
   emit_operand(dst, src);
 }
 
@@ -2676,13 +2817,13 @@
   // 8bit operands
   int encode = dst->encoding();
   if (encode == 0) {
-    emit_byte(0xA9);
+    emit_int8((unsigned char)0xA9);
   } else {
     encode = prefix_and_encode(encode);
-    emit_byte(0xF7);
-    emit_byte(0xC0 | encode);
+    emit_int8((unsigned char)0xF7);
+    emit_int8((unsigned char)(0xC0 | encode));
   }
-  emit_long(imm32);
+  emit_int32(imm32);
 }
 
 void Assembler::testl(Register dst, Register src) {
@@ -2693,7 +2834,7 @@
 void Assembler::testl(Register dst, Address  src) {
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x85);
+  emit_int8((unsigned char)0x85);
   emit_operand(dst, src);
 }
 
@@ -2721,22 +2862,28 @@
 void Assembler::xaddl(Address dst, Register src) {
   InstructionMark im(this);
   prefix(dst, src);
-  emit_byte(0x0F);
-  emit_byte(0xC1);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xC1);
   emit_operand(src, dst);
 }
 
 void Assembler::xchgl(Register dst, Address src) { // xchg
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x87);
+  emit_int8((unsigned char)0x87);
   emit_operand(dst, src);
 }
 
 void Assembler::xchgl(Register dst, Register src) {
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x87);
-  emit_byte(0xc0 | encode);
+  emit_int8((unsigned char)0x87);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::xgetbv() {
+  emit_int8(0x0F);
+  emit_int8(0x01);
+  emit_int8((unsigned char)0xD0);
 }
 
 void Assembler::xorl(Register dst, int32_t imm32) {
@@ -2747,7 +2894,7 @@
 void Assembler::xorl(Register dst, Address src) {
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x33);
+  emit_int8(0x33);
   emit_operand(dst, src);
 }
 
@@ -3173,8 +3320,8 @@
 void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_sse4_1(), "");
   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0x40);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x40);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
@@ -3185,8 +3332,8 @@
 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
-  emit_byte(0x40);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x40);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
@@ -3200,7 +3347,7 @@
   int dst_enc = dst->encoding();
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
   vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
-  emit_byte(0x40);
+  emit_int8(0x40);
   emit_operand(dst, src);
 }
 
@@ -3209,27 +3356,27 @@
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
-  emit_byte(0x71);
-  emit_byte(0xC0 | encode);
-  emit_byte(shift & 0xFF);
+  emit_int8(0x71);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::pslld(XMMRegister dst, int shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
-  emit_byte(0x72);
-  emit_byte(0xC0 | encode);
-  emit_byte(shift & 0xFF);
+  emit_int8(0x72);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::psllq(XMMRegister dst, int shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
-  emit_byte(0x73);
-  emit_byte(0xC0 | encode);
-  emit_byte(shift & 0xFF);
+  emit_int8(0x73);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
@@ -3251,21 +3398,21 @@
   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
   emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector256);
-  emit_byte(shift & 0xFF);
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
   emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector256);
-  emit_byte(shift & 0xFF);
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
   emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector256);
-  emit_byte(shift & 0xFF);
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
@@ -3288,18 +3435,18 @@
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
-  emit_byte(0x71);
-  emit_byte(0xC0 | encode);
-  emit_byte(shift & 0xFF);
+  emit_int8(0x71);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::psrld(XMMRegister dst, int shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
-  emit_byte(0x72);
-  emit_byte(0xC0 | encode);
-  emit_byte(shift & 0xFF);
+  emit_int8(0x72);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::psrlq(XMMRegister dst, int shift) {
@@ -3308,9 +3455,9 @@
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
-  emit_byte(0x73);
-  emit_byte(0xC0 | encode);
-  emit_byte(shift & 0xFF);
+  emit_int8(0x73);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
@@ -3332,21 +3479,21 @@
   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
   emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector256);
-  emit_byte(shift & 0xFF);
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
   emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector256);
-  emit_byte(shift & 0xFF);
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
   emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector256);
-  emit_byte(shift & 0xFF);
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
@@ -3369,18 +3516,18 @@
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
-  emit_byte(0x71);
-  emit_byte(0xC0 | encode);
-  emit_byte(shift & 0xFF);
+  emit_int8(0x71);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::psrad(XMMRegister dst, int shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // XMM4 is for /4 encoding: 66 0F 72 /4 ib
   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
-  emit_byte(0x72);
-  emit_byte(0xC0 | encode);
-  emit_byte(shift & 0xFF);
+  emit_int8(0x72);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
@@ -3397,14 +3544,14 @@
   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
   emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector256);
-  emit_byte(shift & 0xFF);
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
   emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector256);
-  emit_byte(shift & 0xFF);
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
@@ -3469,11 +3616,11 @@
   assert(VM_Version::supports_avx(), "");
   bool vector256 = true;
   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
-  emit_byte(0x18);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x18);
+  emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into lower 128 bits
   // 0x01 - insert into upper 128 bits
-  emit_byte(0x01);
+  emit_int8(0x01);
 }
 
 void Assembler::vinsertf128h(XMMRegister dst, Address src) {
@@ -3484,10 +3631,10 @@
   int dst_enc = dst->encoding();
   // swap src<->dst for encoding
   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
-  emit_byte(0x18);
+  emit_int8(0x18);
   emit_operand(dst, src);
   // 0x01 - insert into upper 128 bits
-  emit_byte(0x01);
+  emit_int8(0x01);
 }
 
 void Assembler::vextractf128h(Address dst, XMMRegister src) {
@@ -3497,21 +3644,21 @@
   assert(src != xnoreg, "sanity");
   int src_enc = src->encoding();
   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
-  emit_byte(0x19);
+  emit_int8(0x19);
   emit_operand(src, dst);
   // 0x01 - extract from upper 128 bits
-  emit_byte(0x01);
+  emit_int8(0x01);
 }
 
 void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx2(), "");
   bool vector256 = true;
   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
-  emit_byte(0x38);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x38);
+  emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into lower 128 bits
   // 0x01 - insert into upper 128 bits
-  emit_byte(0x01);
+  emit_int8(0x01);
 }
 
 void Assembler::vinserti128h(XMMRegister dst, Address src) {
@@ -3522,10 +3669,10 @@
   int dst_enc = dst->encoding();
   // swap src<->dst for encoding
   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
-  emit_byte(0x38);
+  emit_int8(0x38);
   emit_operand(dst, src);
   // 0x01 - insert into upper 128 bits
-  emit_byte(0x01);
+  emit_int8(0x01);
 }
 
 void Assembler::vextracti128h(Address dst, XMMRegister src) {
@@ -3535,16 +3682,25 @@
   assert(src != xnoreg, "sanity");
   int src_enc = src->encoding();
   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
-  emit_byte(0x39);
+  emit_int8(0x39);
   emit_operand(src, dst);
   // 0x01 - extract from upper 128 bits
-  emit_byte(0x01);
+  emit_int8(0x01);
+}
+
+// duplicate 4-bytes integer data from src into 8 locations in dest
+void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
+  assert(VM_Version::supports_avx2(), "");
+  bool vector256 = true;
+  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
+  emit_int8(0x58);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vzeroupper() {
   assert(VM_Version::supports_avx(), "");
   (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
-  emit_byte(0x77);
+  emit_int8(0x77);
 }
 
 
@@ -3554,15 +3710,15 @@
 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
   // NO PREFIX AS NEVER 64BIT
   InstructionMark im(this);
-  emit_byte(0x81);
-  emit_byte(0xF8 | src1->encoding());
+  emit_int8((unsigned char)0x81);
+  emit_int8((unsigned char)(0xF8 | src1->encoding()));
   emit_data(imm32, rspec, 0);
 }
 
 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
   // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
   InstructionMark im(this);
-  emit_byte(0x81);
+  emit_int8((unsigned char)0x81);
   emit_operand(rdi, src1);
   emit_data(imm32, rspec, 0);
 }
@@ -3572,14 +3728,14 @@
 // into rdx:rax.  The ZF is set if the compared values were equal, and cleared otherwise.
 void Assembler::cmpxchg8(Address adr) {
   InstructionMark im(this);
-  emit_byte(0x0F);
-  emit_byte(0xc7);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xC7);
   emit_operand(rcx, adr);
 }
 
 void Assembler::decl(Register dst) {
   // Don't use it directly. Use MacroAssembler::decrementl() instead.
- emit_byte(0x48 | dst->encoding());
+ emit_int8(0x48 | dst->encoding());
 }
 
 #endif // _LP64
@@ -3587,8 +3743,8 @@
 // 64bit typically doesn't use the x87 but needs to for the trig funcs
 
 void Assembler::fabs() {
-  emit_byte(0xD9);
-  emit_byte(0xE1);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xE1);
 }
 
 void Assembler::fadd(int i) {
@@ -3597,13 +3753,13 @@
 
 void Assembler::fadd_d(Address src) {
   InstructionMark im(this);
-  emit_byte(0xDC);
+  emit_int8((unsigned char)0xDC);
   emit_operand32(rax, src);
 }
 
 void Assembler::fadd_s(Address src) {
   InstructionMark im(this);
-  emit_byte(0xD8);
+  emit_int8((unsigned char)0xD8);
   emit_operand32(rax, src);
 }
 
@@ -3616,8 +3772,8 @@
 }
 
 void Assembler::fchs() {
-  emit_byte(0xD9);
-  emit_byte(0xE0);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xE0);
 }
 
 void Assembler::fcom(int i) {
@@ -3630,29 +3786,29 @@
 
 void Assembler::fcomp_d(Address src) {
   InstructionMark im(this);
-  emit_byte(0xDC);
+  emit_int8((unsigned char)0xDC);
   emit_operand32(rbx, src);
 }
 
 void Assembler::fcomp_s(Address src) {
   InstructionMark im(this);
-  emit_byte(0xD8);
+  emit_int8((unsigned char)0xD8);
   emit_operand32(rbx, src);
 }
 
 void Assembler::fcompp() {
-  emit_byte(0xDE);
-  emit_byte(0xD9);
+  emit_int8((unsigned char)0xDE);
+  emit_int8((unsigned char)0xD9);
 }
 
 void Assembler::fcos() {
-  emit_byte(0xD9);
-  emit_byte(0xFF);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xFF);
 }
 
 void Assembler::fdecstp() {
-  emit_byte(0xD9);
-  emit_byte(0xF6);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xF6);
 }
 
 void Assembler::fdiv(int i) {
@@ -3661,13 +3817,13 @@
 
 void Assembler::fdiv_d(Address src) {
   InstructionMark im(this);
-  emit_byte(0xDC);
+  emit_int8((unsigned char)0xDC);
   emit_operand32(rsi, src);
 }
 
 void Assembler::fdiv_s(Address src) {
   InstructionMark im(this);
-  emit_byte(0xD8);
+  emit_int8((unsigned char)0xD8);
   emit_operand32(rsi, src);
 }
 
@@ -3688,13 +3844,13 @@
 
 void Assembler::fdivr_d(Address src) {
   InstructionMark im(this);
-  emit_byte(0xDC);
+  emit_int8((unsigned char)0xDC);
   emit_operand32(rdi, src);
 }
 
 void Assembler::fdivr_s(Address src) {
   InstructionMark im(this);
-  emit_byte(0xD8);
+  emit_int8((unsigned char)0xD8);
   emit_operand32(rdi, src);
 }
 
@@ -3712,59 +3868,59 @@
 
 void Assembler::fild_d(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xDF);
+  emit_int8((unsigned char)0xDF);
   emit_operand32(rbp, adr);
 }
 
 void Assembler::fild_s(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xDB);
+  emit_int8((unsigned char)0xDB);
   emit_operand32(rax, adr);
 }
 
 void Assembler::fincstp() {
-  emit_byte(0xD9);
-  emit_byte(0xF7);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xF7);
 }
 
 void Assembler::finit() {
-  emit_byte(0x9B);
-  emit_byte(0xDB);
-  emit_byte(0xE3);
+  emit_int8((unsigned char)0x9B);
+  emit_int8((unsigned char)0xDB);
+  emit_int8((unsigned char)0xE3);
 }
 
 void Assembler::fist_s(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xDB);
+  emit_int8((unsigned char)0xDB);
   emit_operand32(rdx, adr);
 }
 
 void Assembler::fistp_d(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xDF);
+  emit_int8((unsigned char)0xDF);
   emit_operand32(rdi, adr);
 }
 
 void Assembler::fistp_s(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xDB);
+  emit_int8((unsigned char)0xDB);
   emit_operand32(rbx, adr);
 }
 
 void Assembler::fld1() {
-  emit_byte(0xD9);
-  emit_byte(0xE8);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xE8);
 }
 
 void Assembler::fld_d(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xDD);
+  emit_int8((unsigned char)0xDD);
   emit_operand32(rax, adr);
 }
 
 void Assembler::fld_s(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xD9);
+  emit_int8((unsigned char)0xD9);
   emit_operand32(rax, adr);
 }
 
@@ -3775,35 +3931,35 @@
 
 void Assembler::fld_x(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xDB);
+  emit_int8((unsigned char)0xDB);
   emit_operand32(rbp, adr);
 }
 
 void Assembler::fldcw(Address src) {
   InstructionMark im(this);
-  emit_byte(0xd9);
+  emit_int8((unsigned char)0xD9);
   emit_operand32(rbp, src);
 }
 
 void Assembler::fldenv(Address src) {
   InstructionMark im(this);
-  emit_byte(0xD9);
+  emit_int8((unsigned char)0xD9);
   emit_operand32(rsp, src);
 }
 
 void Assembler::fldlg2() {
-  emit_byte(0xD9);
-  emit_byte(0xEC);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xEC);
 }
 
 void Assembler::fldln2() {
-  emit_byte(0xD9);
-  emit_byte(0xED);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xED);
 }
 
 void Assembler::fldz() {
-  emit_byte(0xD9);
-  emit_byte(0xEE);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xEE);
 }
 
 void Assembler::flog() {
@@ -3824,13 +3980,13 @@
 
 void Assembler::fmul_d(Address src) {
   InstructionMark im(this);
-  emit_byte(0xDC);
+  emit_int8((unsigned char)0xDC);
   emit_operand32(rcx, src);
 }
 
 void Assembler::fmul_s(Address src) {
   InstructionMark im(this);
-  emit_byte(0xD8);
+  emit_int8((unsigned char)0xD8);
   emit_operand32(rcx, src);
 }
 
@@ -3844,63 +4000,63 @@
 
 void Assembler::fnsave(Address dst) {
   InstructionMark im(this);
-  emit_byte(0xDD);
+  emit_int8((unsigned char)0xDD);
   emit_operand32(rsi, dst);
 }
 
 void Assembler::fnstcw(Address src) {
   InstructionMark im(this);
-  emit_byte(0x9B);
-  emit_byte(0xD9);
+  emit_int8((unsigned char)0x9B);
+  emit_int8((unsigned char)0xD9);
   emit_operand32(rdi, src);
 }
 
 void Assembler::fnstsw_ax() {
-  emit_byte(0xdF);
-  emit_byte(0xE0);
+  emit_int8((unsigned char)0xDF);
+  emit_int8((unsigned char)0xE0);
 }
 
 void Assembler::fprem() {
-  emit_byte(0xD9);
-  emit_byte(0xF8);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xF8);
 }
 
 void Assembler::fprem1() {
-  emit_byte(0xD9);
-  emit_byte(0xF5);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xF5);
 }
 
 void Assembler::frstor(Address src) {
   InstructionMark im(this);
-  emit_byte(0xDD);
+  emit_int8((unsigned char)0xDD);
   emit_operand32(rsp, src);
 }
 
 void Assembler::fsin() {
-  emit_byte(0xD9);
-  emit_byte(0xFE);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xFE);
 }
 
 void Assembler::fsqrt() {
-  emit_byte(0xD9);
-  emit_byte(0xFA);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xFA);
 }
 
 void Assembler::fst_d(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xDD);
+  emit_int8((unsigned char)0xDD);
   emit_operand32(rdx, adr);
 }
 
 void Assembler::fst_s(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xD9);
+  emit_int8((unsigned char)0xD9);
   emit_operand32(rdx, adr);
 }
 
 void Assembler::fstp_d(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xDD);
+  emit_int8((unsigned char)0xDD);
   emit_operand32(rbx, adr);
 }
 
@@ -3910,13 +4066,13 @@
 
 void Assembler::fstp_s(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xD9);
+  emit_int8((unsigned char)0xD9);
   emit_operand32(rbx, adr);
 }
 
 void Assembler::fstp_x(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xDB);
+  emit_int8((unsigned char)0xDB);
   emit_operand32(rdi, adr);
 }
 
@@ -3926,13 +4082,13 @@
 
 void Assembler::fsub_d(Address src) {
   InstructionMark im(this);
-  emit_byte(0xDC);
+  emit_int8((unsigned char)0xDC);
   emit_operand32(rsp, src);
 }
 
 void Assembler::fsub_s(Address src) {
   InstructionMark im(this);
-  emit_byte(0xD8);
+  emit_int8((unsigned char)0xD8);
   emit_operand32(rsp, src);
 }
 
@@ -3950,13 +4106,13 @@
 
 void Assembler::fsubr_d(Address src) {
   InstructionMark im(this);
-  emit_byte(0xDC);
+  emit_int8((unsigned char)0xDC);
   emit_operand32(rbp, src);
 }
 
 void Assembler::fsubr_s(Address src) {
   InstructionMark im(this);
-  emit_byte(0xD8);
+  emit_int8((unsigned char)0xD8);
   emit_operand32(rbp, src);
 }
 
@@ -3969,15 +4125,15 @@
 }
 
 void Assembler::ftan() {
-  emit_byte(0xD9);
-  emit_byte(0xF2);
-  emit_byte(0xDD);
-  emit_byte(0xD8);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xF2);
+  emit_int8((unsigned char)0xDD);
+  emit_int8((unsigned char)0xD8);
 }
 
 void Assembler::ftst() {
-  emit_byte(0xD9);
-  emit_byte(0xE4);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xE4);
 }
 
 void Assembler::fucomi(int i) {
@@ -3993,7 +4149,7 @@
 }
 
 void Assembler::fwait() {
-  emit_byte(0x9B);
+  emit_int8((unsigned char)0x9B);
 }
 
 void Assembler::fxch(int i) {
@@ -4001,23 +4157,23 @@
 }
 
 void Assembler::fyl2x() {
-  emit_byte(0xD9);
-  emit_byte(0xF1);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xF1);
 }
 
 void Assembler::frndint() {
-  emit_byte(0xD9);
-  emit_byte(0xFC);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xFC);
 }
 
 void Assembler::f2xm1() {
-  emit_byte(0xD9);
-  emit_byte(0xF0);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xF0);
 }
 
 void Assembler::fldl2e() {
-  emit_byte(0xD9);
-  emit_byte(0xEA);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xEA);
 }
 
 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
@@ -4028,7 +4184,7 @@
 // Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding.
 void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
   if (pre > 0) {
-    emit_byte(simd_pre[pre]);
+    emit_int8(simd_pre[pre]);
   }
   if (rex_w) {
     prefixq(adr, xreg);
@@ -4036,25 +4192,25 @@
     prefix(adr, xreg);
   }
   if (opc > 0) {
-    emit_byte(0x0F);
+    emit_int8(0x0F);
     int opc2 = simd_opc[opc];
     if (opc2 > 0) {
-      emit_byte(opc2);
+      emit_int8(opc2);
     }
   }
 }
 
 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
   if (pre > 0) {
-    emit_byte(simd_pre[pre]);
+    emit_int8(simd_pre[pre]);
   }
   int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) :
                           prefix_and_encode(dst_enc, src_enc);
   if (opc > 0) {
-    emit_byte(0x0F);
+    emit_int8(0x0F);
     int opc2 = simd_opc[opc];
     if (opc2 > 0) {
-      emit_byte(opc2);
+      emit_int8(opc2);
     }
   }
   return encode;
@@ -4068,11 +4224,11 @@
     int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
     byte1 = (~byte1) & 0xE0;
     byte1 |= opc;
-    a_byte(byte1);
+    emit_int8(byte1);
 
     int byte2 = ((~nds_enc) & 0xf) << 3;
     byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre;
-    emit_byte(byte2);
+    emit_int8(byte2);
   } else {
     prefix(VEX_2bytes);
 
@@ -4080,7 +4236,7 @@
     byte1 = (~byte1) & 0x80;
     byte1 |= ((~nds_enc) & 0xf) << 3;
     byte1 |= (vector256 ? 4 : 0) | pre;
-    emit_byte(byte1);
+    emit_int8(byte1);
   }
 }
 
@@ -4126,28 +4282,28 @@
 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
   InstructionMark im(this);
   simd_prefix(dst, dst, src, pre);
-  emit_byte(opcode);
+  emit_int8(opcode);
   emit_operand(dst, src);
 }
 
 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
   int encode = simd_prefix_and_encode(dst, dst, src, pre);
-  emit_byte(opcode);
-  emit_byte(0xC0 | encode);
+  emit_int8(opcode);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 // Versions with no second source register (non-destructive source).
 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
   InstructionMark im(this);
   simd_prefix(dst, xnoreg, src, pre);
-  emit_byte(opcode);
+  emit_int8(opcode);
   emit_operand(dst, src);
 }
 
 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
   int encode = simd_prefix_and_encode(dst, xnoreg, src, pre);
-  emit_byte(opcode);
-  emit_byte(0xC0 | encode);
+  emit_int8(opcode);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 // 3-operands AVX instructions
@@ -4155,22 +4311,22 @@
                                Address src, VexSimdPrefix pre, bool vector256) {
   InstructionMark im(this);
   vex_prefix(dst, nds, src, pre, vector256);
-  emit_byte(opcode);
+  emit_int8(opcode);
   emit_operand(dst, src);
 }
 
 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
                                XMMRegister src, VexSimdPrefix pre, bool vector256) {
   int encode = vex_prefix_and_encode(dst, nds, src, pre, vector256);
-  emit_byte(opcode);
-  emit_byte(0xC0 | encode);
+  emit_int8(opcode);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 #ifndef _LP64
 
 void Assembler::incl(Register dst) {
   // Don't use it directly. Use MacroAssembler::incrementl() instead.
-  emit_byte(0x40 | dst->encoding());
+  emit_int8(0x40 | dst->encoding());
 }
 
 void Assembler::lea(Register dst, Address src) {
@@ -4179,7 +4335,7 @@
 
 void Assembler::mov_literal32(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
   InstructionMark im(this);
-  emit_byte(0xC7);
+  emit_int8((unsigned char)0xC7);
   emit_operand(rax, dst);
   emit_data((int)imm32, rspec, 0);
 }
@@ -4187,49 +4343,49 @@
 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
   InstructionMark im(this);
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xB8 | encode);
+  emit_int8((unsigned char)(0xB8 | encode));
   emit_data((int)imm32, rspec, 0);
 }
 
 void Assembler::popa() { // 32bit
-  emit_byte(0x61);
+  emit_int8(0x61);
 }
 
 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
   InstructionMark im(this);
-  emit_byte(0x68);
+  emit_int8(0x68);
   emit_data(imm32, rspec, 0);
 }
 
 void Assembler::pusha() { // 32bit
-  emit_byte(0x60);
+  emit_int8(0x60);
 }
 
 void Assembler::set_byte_if_not_zero(Register dst) {
-  emit_byte(0x0F);
-  emit_byte(0x95);
-  emit_byte(0xE0 | dst->encoding());
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0x95);
+  emit_int8((unsigned char)(0xE0 | dst->encoding()));
 }
 
 void Assembler::shldl(Register dst, Register src) {
-  emit_byte(0x0F);
-  emit_byte(0xA5);
-  emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xA5);
+  emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
 }
 
 void Assembler::shrdl(Register dst, Register src) {
-  emit_byte(0x0F);
-  emit_byte(0xAD);
-  emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xAD);
+  emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
 }
 
 #else // LP64
 
 void Assembler::set_byte_if_not_zero(Register dst) {
   int enc = prefix_and_encode(dst->encoding(), true);
-  emit_byte(0x0F);
-  emit_byte(0x95);
-  emit_byte(0xE0 | enc);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0x95);
+  emit_int8((unsigned char)(0xE0 | enc));
 }
 
 // 64bit only pieces of the assembler
@@ -4284,7 +4440,7 @@
   disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
   if (!is_simm32(disp)) return false;
 
-  disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int));
+  disp = (int64_t)adr._target - ((int64_t)pc() + sizeof(int));
 
   // Because rip relative is a disp + address_of_next_instruction and we
   // don't know the value of address_of_next_instruction we apply a fudge factor
@@ -4315,7 +4471,7 @@
                             relocInfo::relocType rtype,
                             int format) {
   if (rtype == relocInfo::none) {
-    emit_long64(data);
+    emit_int64(data);
   } else {
     emit_data64(data, Relocation::spec_simple(rtype), format);
   }
@@ -4333,7 +4489,7 @@
 #ifdef ASSERT
   check_relocation(rspec, format);
 #endif
-  emit_long64(data);
+  emit_int64(data);
 }
 
 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
@@ -4567,7 +4723,7 @@
 void Assembler::adcq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x13);
+  emit_int8(0x13);
   emit_operand(dst, src);
 }
 
@@ -4585,7 +4741,7 @@
 void Assembler::addq(Address dst, Register src) {
   InstructionMark im(this);
   prefixq(dst, src);
-  emit_byte(0x01);
+  emit_int8(0x01);
   emit_operand(src, dst);
 }
 
@@ -4597,7 +4753,7 @@
 void Assembler::addq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x03);
+  emit_int8(0x03);
   emit_operand(dst, src);
 }
 
@@ -4609,9 +4765,9 @@
 void Assembler::andq(Address dst, int32_t imm32) {
   InstructionMark im(this);
   prefixq(dst);
-  emit_byte(0x81);
+  emit_int8((unsigned char)0x81);
   emit_operand(rsp, dst, 4);
-  emit_long(imm32);
+  emit_int32(imm32);
 }
 
 void Assembler::andq(Register dst, int32_t imm32) {
@@ -4622,7 +4778,7 @@
 void Assembler::andq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x23);
+  emit_int8(0x23);
   emit_operand(dst, src);
 }
 
@@ -4633,58 +4789,58 @@
 
 void Assembler::bsfq(Register dst, Register src) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xBC);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBC);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::bsrq(Register dst, Register src) {
   assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xBD);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBD);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::bswapq(Register reg) {
   int encode = prefixq_and_encode(reg->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xC8 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)(0xC8 | encode));
 }
 
 void Assembler::cdqq() {
   prefix(REX_W);
-  emit_byte(0x99);
+  emit_int8((unsigned char)0x99);
 }
 
 void Assembler::clflush(Address adr) {
   prefix(adr);
-  emit_byte(0x0F);
-  emit_byte(0xAE);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xAE);
   emit_operand(rdi, adr);
 }
 
 void Assembler::cmovq(Condition cc, Register dst, Register src) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0x40 | cc);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8(0x40 | cc);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cmovq(Condition cc, Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0x40 | cc);
+  emit_int8(0x0F);
+  emit_int8(0x40 | cc);
   emit_operand(dst, src);
 }
 
 void Assembler::cmpq(Address dst, int32_t imm32) {
   InstructionMark im(this);
   prefixq(dst);
-  emit_byte(0x81);
+  emit_int8((unsigned char)0x81);
   emit_operand(rdi, dst, 4);
-  emit_long(imm32);
+  emit_int32(imm32);
 }
 
 void Assembler::cmpq(Register dst, int32_t imm32) {
@@ -4695,7 +4851,7 @@
 void Assembler::cmpq(Address dst, Register src) {
   InstructionMark im(this);
   prefixq(dst, src);
-  emit_byte(0x3B);
+  emit_int8(0x3B);
   emit_operand(src, dst);
 }
 
@@ -4707,123 +4863,123 @@
 void Assembler::cmpq(Register dst, Address  src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x3B);
+  emit_int8(0x3B);
   emit_operand(dst, src);
 }
 
 void Assembler::cmpxchgq(Register reg, Address adr) {
   InstructionMark im(this);
   prefixq(adr, reg);
-  emit_byte(0x0F);
-  emit_byte(0xB1);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xB1);
   emit_operand(reg, adr);
 }
 
 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x2A);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x2A);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
   simd_prefix_q(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x2A);
+  emit_int8(0x2A);
   emit_operand(dst, src);
 }
 
 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x2A);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x2A);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   InstructionMark im(this);
   simd_prefix_q(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x2A);
+  emit_int8(0x2A);
   emit_operand(dst, src);
 }
 
 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2);
-  emit_byte(0x2C);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x2C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3);
-  emit_byte(0x2C);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x2C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::decl(Register dst) {
   // Don't use it directly. Use MacroAssembler::decrementl() instead.
   // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xFF);
-  emit_byte(0xC8 | encode);
+  emit_int8((unsigned char)0xFF);
+  emit_int8((unsigned char)(0xC8 | encode));
 }
 
 void Assembler::decq(Register dst) {
   // Don't use it directly. Use MacroAssembler::decrementq() instead.
   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xFF);
-  emit_byte(0xC8 | encode);
+  emit_int8((unsigned char)0xFF);
+  emit_int8(0xC8 | encode);
 }
 
 void Assembler::decq(Address dst) {
   // Don't use it directly. Use MacroAssembler::decrementq() instead.
   InstructionMark im(this);
   prefixq(dst);
-  emit_byte(0xFF);
+  emit_int8((unsigned char)0xFF);
   emit_operand(rcx, dst);
 }
 
 void Assembler::fxrstor(Address src) {
   prefixq(src);
-  emit_byte(0x0F);
-  emit_byte(0xAE);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xAE);
   emit_operand(as_Register(1), src);
 }
 
 void Assembler::fxsave(Address dst) {
   prefixq(dst);
-  emit_byte(0x0F);
-  emit_byte(0xAE);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xAE);
   emit_operand(as_Register(0), dst);
 }
 
 void Assembler::idivq(Register src) {
   int encode = prefixq_and_encode(src->encoding());
-  emit_byte(0xF7);
-  emit_byte(0xF8 | encode);
+  emit_int8((unsigned char)0xF7);
+  emit_int8((unsigned char)(0xF8 | encode));
 }
 
 void Assembler::imulq(Register dst, Register src) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xAF);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xAF);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::imulq(Register dst, Register src, int value) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
   if (is8bit(value)) {
-    emit_byte(0x6B);
-    emit_byte(0xC0 | encode);
-    emit_byte(value & 0xFF);
+    emit_int8(0x6B);
+    emit_int8((unsigned char)(0xC0 | encode));
+    emit_int8(value & 0xFF);
   } else {
-    emit_byte(0x69);
-    emit_byte(0xC0 | encode);
-    emit_long(value);
+    emit_int8(0x69);
+    emit_int8((unsigned char)(0xC0 | encode));
+    emit_int32(value);
   }
 }
 
@@ -4831,23 +4987,23 @@
   // Don't use it directly. Use MacroAssembler::incrementl() instead.
   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xFF);
-  emit_byte(0xC0 | encode);
+  emit_int8((unsigned char)0xFF);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::incq(Register dst) {
   // Don't use it directly. Use MacroAssembler::incrementq() instead.
   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xFF);
-  emit_byte(0xC0 | encode);
+  emit_int8((unsigned char)0xFF);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::incq(Address dst) {
   // Don't use it directly. Use MacroAssembler::incrementq() instead.
   InstructionMark im(this);
   prefixq(dst);
-  emit_byte(0xFF);
+  emit_int8((unsigned char)0xFF);
   emit_operand(rax, dst);
 }
 
@@ -4858,35 +5014,35 @@
 void Assembler::leaq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x8D);
+  emit_int8((unsigned char)0x8D);
   emit_operand(dst, src);
 }
 
 void Assembler::mov64(Register dst, int64_t imm64) {
   InstructionMark im(this);
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xB8 | encode);
-  emit_long64(imm64);
+  emit_int8((unsigned char)(0xB8 | encode));
+  emit_int64(imm64);
 }
 
 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
   InstructionMark im(this);
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xB8 | encode);
+  emit_int8(0xB8 | encode);
   emit_data64(imm64, rspec);
 }
 
 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
   InstructionMark im(this);
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xB8 | encode);
+  emit_int8((unsigned char)(0xB8 | encode));
   emit_data((int)imm32, rspec, narrow_oop_operand);
 }
 
 void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
   InstructionMark im(this);
   prefix(dst);
-  emit_byte(0xC7);
+  emit_int8((unsigned char)0xC7);
   emit_operand(rax, dst, 4);
   emit_data((int)imm32, rspec, narrow_oop_operand);
 }
@@ -4894,34 +5050,34 @@
 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
   InstructionMark im(this);
   int encode = prefix_and_encode(src1->encoding());
-  emit_byte(0x81);
-  emit_byte(0xF8 | encode);
+  emit_int8((unsigned char)0x81);
+  emit_int8((unsigned char)(0xF8 | encode));
   emit_data((int)imm32, rspec, narrow_oop_operand);
 }
 
 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
   InstructionMark im(this);
   prefix(src1);
-  emit_byte(0x81);
+  emit_int8((unsigned char)0x81);
   emit_operand(rax, src1, 4);
   emit_data((int)imm32, rspec, narrow_oop_operand);
 }
 
 void Assembler::lzcntq(Register dst, Register src) {
   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
-  emit_byte(0xF3);
+  emit_int8((unsigned char)0xF3);
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xBD);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBD);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movdq(XMMRegister dst, Register src) {
   // table D-1 says MMX/SSE2
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66);
-  emit_byte(0x6E);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x6E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movdq(Register dst, XMMRegister src) {
@@ -4929,43 +5085,43 @@
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // swap src/dst to get correct prefix
   int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66);
-  emit_byte(0x7E);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x7E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movq(Register dst, Register src) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x8B);
-  emit_byte(0xC0 | encode);
+  emit_int8((unsigned char)0x8B);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x8B);
+  emit_int8((unsigned char)0x8B);
   emit_operand(dst, src);
 }
 
 void Assembler::movq(Address dst, Register src) {
   InstructionMark im(this);
   prefixq(dst, src);
-  emit_byte(0x89);
+  emit_int8((unsigned char)0x89);
   emit_operand(src, dst);
 }
 
 void Assembler::movsbq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0xBE);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBE);
   emit_operand(dst, src);
 }
 
 void Assembler::movsbq(Register dst, Register src) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xBE);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBE);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movslq(Register dst, int32_t imm32) {
@@ -4975,95 +5131,95 @@
   ShouldNotReachHere();
   InstructionMark im(this);
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xC7 | encode);
-  emit_long(imm32);
+  emit_int8((unsigned char)(0xC7 | encode));
+  emit_int32(imm32);
 }
 
 void Assembler::movslq(Address dst, int32_t imm32) {
   assert(is_simm32(imm32), "lost bits");
   InstructionMark im(this);
   prefixq(dst);
-  emit_byte(0xC7);
+  emit_int8((unsigned char)0xC7);
   emit_operand(rax, dst, 4);
-  emit_long(imm32);
+  emit_int32(imm32);
 }
 
 void Assembler::movslq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x63);
+  emit_int8(0x63);
   emit_operand(dst, src);
 }
 
 void Assembler::movslq(Register dst, Register src) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x63);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x63);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movswq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0xBF);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBF);
   emit_operand(dst, src);
 }
 
 void Assembler::movswq(Register dst, Register src) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xBF);
-  emit_byte(0xC0 | encode);
+  emit_int8((unsigned char)0x0F);
+  emit_int8((unsigned char)0xBF);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movzbq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0xB6);
+  emit_int8((unsigned char)0x0F);
+  emit_int8((unsigned char)0xB6);
   emit_operand(dst, src);
 }
 
 void Assembler::movzbq(Register dst, Register src) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xB6);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xB6);
+  emit_int8(0xC0 | encode);
 }
 
 void Assembler::movzwq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0xB7);
+  emit_int8((unsigned char)0x0F);
+  emit_int8((unsigned char)0xB7);
   emit_operand(dst, src);
 }
 
 void Assembler::movzwq(Register dst, Register src) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xB7);
-  emit_byte(0xC0 | encode);
+  emit_int8((unsigned char)0x0F);
+  emit_int8((unsigned char)0xB7);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::negq(Register dst) {
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xF7);
-  emit_byte(0xD8 | encode);
+  emit_int8((unsigned char)0xF7);
+  emit_int8((unsigned char)(0xD8 | encode));
 }
 
 void Assembler::notq(Register dst) {
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xF7);
-  emit_byte(0xD0 | encode);
+  emit_int8((unsigned char)0xF7);
+  emit_int8((unsigned char)(0xD0 | encode));
 }
 
 void Assembler::orq(Address dst, int32_t imm32) {
   InstructionMark im(this);
   prefixq(dst);
-  emit_byte(0x81);
+  emit_int8((unsigned char)0x81);
   emit_operand(rcx, dst, 4);
-  emit_long(imm32);
+  emit_int32(imm32);
 }
 
 void Assembler::orq(Register dst, int32_t imm32) {
@@ -5074,7 +5230,7 @@
 void Assembler::orq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x0B);
+  emit_int8(0x0B);
   emit_operand(dst, src);
 }
 
@@ -5107,26 +5263,26 @@
 void Assembler::popcntq(Register dst, Address src) {
   assert(VM_Version::supports_popcnt(), "must support");
   InstructionMark im(this);
-  emit_byte(0xF3);
+  emit_int8((unsigned char)0xF3);
   prefixq(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0xB8);
+  emit_int8((unsigned char)0x0F);
+  emit_int8((unsigned char)0xB8);
   emit_operand(dst, src);
 }
 
 void Assembler::popcntq(Register dst, Register src) {
   assert(VM_Version::supports_popcnt(), "must support");
-  emit_byte(0xF3);
+  emit_int8((unsigned char)0xF3);
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xB8);
-  emit_byte(0xC0 | encode);
+  emit_int8((unsigned char)0x0F);
+  emit_int8((unsigned char)0xB8);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::popq(Address dst) {
   InstructionMark im(this);
   prefixq(dst);
-  emit_byte(0x8F);
+  emit_int8((unsigned char)0x8F);
   emit_operand(rax, dst);
 }
 
@@ -5158,7 +5314,7 @@
 void Assembler::pushq(Address src) {
   InstructionMark im(this);
   prefixq(src);
-  emit_byte(0xFF);
+  emit_int8((unsigned char)0xFF);
   emit_operand(rsi, src);
 }
 
@@ -5166,31 +5322,31 @@
   assert(isShiftCount(imm8 >> 1), "illegal shift count");
   int encode = prefixq_and_encode(dst->encoding());
   if (imm8 == 1) {
-    emit_byte(0xD1);
-    emit_byte(0xD0 | encode);
+    emit_int8((unsigned char)0xD1);
+    emit_int8((unsigned char)(0xD0 | encode));
   } else {
-    emit_byte(0xC1);
-    emit_byte(0xD0 | encode);
-    emit_byte(imm8);
+    emit_int8((unsigned char)0xC1);
+    emit_int8((unsigned char)(0xD0 | encode));
+    emit_int8(imm8);
   }
 }
 void Assembler::sarq(Register dst, int imm8) {
   assert(isShiftCount(imm8 >> 1), "illegal shift count");
   int encode = prefixq_and_encode(dst->encoding());
   if (imm8 == 1) {
-    emit_byte(0xD1);
-    emit_byte(0xF8 | encode);
+    emit_int8((unsigned char)0xD1);
+    emit_int8((unsigned char)(0xF8 | encode));
   } else {
-    emit_byte(0xC1);
-    emit_byte(0xF8 | encode);
-    emit_byte(imm8);
+    emit_int8((unsigned char)0xC1);
+    emit_int8((unsigned char)(0xF8 | encode));
+    emit_int8(imm8);
   }
 }
 
 void Assembler::sarq(Register dst) {
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xD3);
-  emit_byte(0xF8 | encode);
+  emit_int8((unsigned char)0xD3);
+  emit_int8((unsigned char)(0xF8 | encode));
 }
 
 void Assembler::sbbq(Address dst, int32_t imm32) {
@@ -5207,7 +5363,7 @@
 void Assembler::sbbq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x1B);
+  emit_int8(0x1B);
   emit_operand(dst, src);
 }
 
@@ -5220,33 +5376,33 @@
   assert(isShiftCount(imm8 >> 1), "illegal shift count");
   int encode = prefixq_and_encode(dst->encoding());
   if (imm8 == 1) {
-    emit_byte(0xD1);
-    emit_byte(0xE0 | encode);
+    emit_int8((unsigned char)0xD1);
+    emit_int8((unsigned char)(0xE0 | encode));
   } else {
-    emit_byte(0xC1);
-    emit_byte(0xE0 | encode);
-    emit_byte(imm8);
+    emit_int8((unsigned char)0xC1);
+    emit_int8((unsigned char)(0xE0 | encode));
+    emit_int8(imm8);
   }
 }
 
 void Assembler::shlq(Register dst) {
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xD3);
-  emit_byte(0xE0 | encode);
+  emit_int8((unsigned char)0xD3);
+  emit_int8((unsigned char)(0xE0 | encode));
 }
 
 void Assembler::shrq(Register dst, int imm8) {
   assert(isShiftCount(imm8 >> 1), "illegal shift count");
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xC1);
-  emit_byte(0xE8 | encode);
-  emit_byte(imm8);
+  emit_int8((unsigned char)0xC1);
+  emit_int8((unsigned char)(0xE8 | encode));
+  emit_int8(imm8);
 }
 
 void Assembler::shrq(Register dst) {
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xD3);
-  emit_byte(0xE8 | encode);
+  emit_int8((unsigned char)0xD3);
+  emit_int8(0xE8 | encode);
 }
 
 void Assembler::subq(Address dst, int32_t imm32) {
@@ -5258,7 +5414,7 @@
 void Assembler::subq(Address dst, Register src) {
   InstructionMark im(this);
   prefixq(dst, src);
-  emit_byte(0x29);
+  emit_int8(0x29);
   emit_operand(src, dst);
 }
 
@@ -5276,7 +5432,7 @@
 void Assembler::subq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x2B);
+  emit_int8(0x2B);
   emit_operand(dst, src);
 }
 
@@ -5292,13 +5448,13 @@
   int encode = dst->encoding();
   if (encode == 0) {
     prefix(REX_W);
-    emit_byte(0xA9);
+    emit_int8((unsigned char)0xA9);
   } else {
     encode = prefixq_and_encode(encode);
-    emit_byte(0xF7);
-    emit_byte(0xC0 | encode);
+    emit_int8((unsigned char)0xF7);
+    emit_int8((unsigned char)(0xC0 | encode));
   }
-  emit_long(imm32);
+  emit_int32(imm32);
 }
 
 void Assembler::testq(Register dst, Register src) {
@@ -5309,22 +5465,22 @@
 void Assembler::xaddq(Address dst, Register src) {
   InstructionMark im(this);
   prefixq(dst, src);
-  emit_byte(0x0F);
-  emit_byte(0xC1);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xC1);
   emit_operand(src, dst);
 }
 
 void Assembler::xchgq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x87);
+  emit_int8((unsigned char)0x87);
   emit_operand(dst, src);
 }
 
 void Assembler::xchgq(Register dst, Register src) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x87);
-  emit_byte(0xc0 | encode);
+  emit_int8((unsigned char)0x87);
+  emit_int8((unsigned char)(0xc0 | encode));
 }
 
 void Assembler::xorq(Register dst, Register src) {
@@ -5335,6028 +5491,8 @@
 void Assembler::xorq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x33);
+  emit_int8(0x33);
   emit_operand(dst, src);
 }
 
 #endif // !LP64
-
-static Assembler::Condition reverse[] = {
-    Assembler::noOverflow     /* overflow      = 0x0 */ ,
-    Assembler::overflow       /* noOverflow    = 0x1 */ ,
-    Assembler::aboveEqual     /* carrySet      = 0x2, below         = 0x2 */ ,
-    Assembler::below          /* aboveEqual    = 0x3, carryClear    = 0x3 */ ,
-    Assembler::notZero        /* zero          = 0x4, equal         = 0x4 */ ,
-    Assembler::zero           /* notZero       = 0x5, notEqual      = 0x5 */ ,
-    Assembler::above          /* belowEqual    = 0x6 */ ,
-    Assembler::belowEqual     /* above         = 0x7 */ ,
-    Assembler::positive       /* negative      = 0x8 */ ,
-    Assembler::negative       /* positive      = 0x9 */ ,
-    Assembler::noParity       /* parity        = 0xa */ ,
-    Assembler::parity         /* noParity      = 0xb */ ,
-    Assembler::greaterEqual   /* less          = 0xc */ ,
-    Assembler::less           /* greaterEqual  = 0xd */ ,
-    Assembler::greater        /* lessEqual     = 0xe */ ,
-    Assembler::lessEqual      /* greater       = 0xf, */
-
-};
-
-
-// Implementation of MacroAssembler
-
-// First all the versions that have distinct versions depending on 32/64 bit
-// Unless the difference is trivial (1 line or so).
-
-#ifndef _LP64
-
-// 32bit versions
-
-Address MacroAssembler::as_Address(AddressLiteral adr) {
-  return Address(adr.target(), adr.rspec());
-}
-
-Address MacroAssembler::as_Address(ArrayAddress adr) {
-  return Address::make_array(adr);
-}
-
-int MacroAssembler::biased_locking_enter(Register lock_reg,
-                                         Register obj_reg,
-                                         Register swap_reg,
-                                         Register tmp_reg,
-                                         bool swap_reg_contains_mark,
-                                         Label& done,
-                                         Label* slow_case,
-                                         BiasedLockingCounters* counters) {
-  assert(UseBiasedLocking, "why call this otherwise?");
-  assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg");
-  assert_different_registers(lock_reg, obj_reg, swap_reg);
-
-  if (PrintBiasedLockingStatistics && counters == NULL)
-    counters = BiasedLocking::counters();
-
-  bool need_tmp_reg = false;
-  if (tmp_reg == noreg) {
-    need_tmp_reg = true;
-    tmp_reg = lock_reg;
-  } else {
-    assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
-  }
-  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
-  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
-  Address klass_addr     (obj_reg, oopDesc::klass_offset_in_bytes());
-  Address saved_mark_addr(lock_reg, 0);
-
-  // Biased locking
-  // See whether the lock is currently biased toward our thread and
-  // whether the epoch is still valid
-  // Note that the runtime guarantees sufficient alignment of JavaThread
-  // pointers to allow age to be placed into low bits
-  // First check to see whether biasing is even enabled for this object
-  Label cas_label;
-  int null_check_offset = -1;
-  if (!swap_reg_contains_mark) {
-    null_check_offset = offset();
-    movl(swap_reg, mark_addr);
-  }
-  if (need_tmp_reg) {
-    push(tmp_reg);
-  }
-  movl(tmp_reg, swap_reg);
-  andl(tmp_reg, markOopDesc::biased_lock_mask_in_place);
-  cmpl(tmp_reg, markOopDesc::biased_lock_pattern);
-  if (need_tmp_reg) {
-    pop(tmp_reg);
-  }
-  jcc(Assembler::notEqual, cas_label);
-  // The bias pattern is present in the object's header. Need to check
-  // whether the bias owner and the epoch are both still current.
-  // Note that because there is no current thread register on x86 we
-  // need to store off the mark word we read out of the object to
-  // avoid reloading it and needing to recheck invariants below. This
-  // store is unfortunate but it makes the overall code shorter and
-  // simpler.
-  movl(saved_mark_addr, swap_reg);
-  if (need_tmp_reg) {
-    push(tmp_reg);
-  }
-  get_thread(tmp_reg);
-  xorl(swap_reg, tmp_reg);
-  if (swap_reg_contains_mark) {
-    null_check_offset = offset();
-  }
-  movl(tmp_reg, klass_addr);
-  xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset()));
-  andl(swap_reg, ~((int) markOopDesc::age_mask_in_place));
-  if (need_tmp_reg) {
-    pop(tmp_reg);
-  }
-  if (counters != NULL) {
-    cond_inc32(Assembler::zero,
-               ExternalAddress((address)counters->biased_lock_entry_count_addr()));
-  }
-  jcc(Assembler::equal, done);
-
-  Label try_revoke_bias;
-  Label try_rebias;
-
-  // At this point we know that the header has the bias pattern and
-  // that we are not the bias owner in the current epoch. We need to
-  // figure out more details about the state of the header in order to
-  // know what operations can be legally performed on the object's
-  // header.
-
-  // If the low three bits in the xor result aren't clear, that means
-  // the prototype header is no longer biased and we have to revoke
-  // the bias on this object.
-  testl(swap_reg, markOopDesc::biased_lock_mask_in_place);
-  jcc(Assembler::notZero, try_revoke_bias);
-
-  // Biasing is still enabled for this data type. See whether the
-  // epoch of the current bias is still valid, meaning that the epoch
-  // bits of the mark word are equal to the epoch bits of the
-  // prototype header. (Note that the prototype header's epoch bits
-  // only change at a safepoint.) If not, attempt to rebias the object
-  // toward the current thread. Note that we must be absolutely sure
-  // that the current epoch is invalid in order to do this because
-  // otherwise the manipulations it performs on the mark word are
-  // illegal.
-  testl(swap_reg, markOopDesc::epoch_mask_in_place);
-  jcc(Assembler::notZero, try_rebias);
-
-  // The epoch of the current bias is still valid but we know nothing
-  // about the owner; it might be set or it might be clear. Try to
-  // acquire the bias of the object using an atomic operation. If this
-  // fails we will go in to the runtime to revoke the object's bias.
-  // Note that we first construct the presumed unbiased header so we
-  // don't accidentally blow away another thread's valid bias.
-  movl(swap_reg, saved_mark_addr);
-  andl(swap_reg,
-       markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
-  if (need_tmp_reg) {
-    push(tmp_reg);
-  }
-  get_thread(tmp_reg);
-  orl(tmp_reg, swap_reg);
-  if (os::is_MP()) {
-    lock();
-  }
-  cmpxchgptr(tmp_reg, Address(obj_reg, 0));
-  if (need_tmp_reg) {
-    pop(tmp_reg);
-  }
-  // If the biasing toward our thread failed, this means that
-  // another thread succeeded in biasing it toward itself and we
-  // need to revoke that bias. The revocation will occur in the
-  // interpreter runtime in the slow case.
-  if (counters != NULL) {
-    cond_inc32(Assembler::zero,
-               ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr()));
-  }
-  if (slow_case != NULL) {
-    jcc(Assembler::notZero, *slow_case);
-  }
-  jmp(done);
-
-  bind(try_rebias);
-  // At this point we know the epoch has expired, meaning that the
-  // current "bias owner", if any, is actually invalid. Under these
-  // circumstances _only_, we are allowed to use the current header's
-  // value as the comparison value when doing the cas to acquire the
-  // bias in the current epoch. In other words, we allow transfer of
-  // the bias from one thread to another directly in this situation.
-  //
-  // FIXME: due to a lack of registers we currently blow away the age
-  // bits in this situation. Should attempt to preserve them.
-  if (need_tmp_reg) {
-    push(tmp_reg);
-  }
-  get_thread(tmp_reg);
-  movl(swap_reg, klass_addr);
-  orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset()));
-  movl(swap_reg, saved_mark_addr);
-  if (os::is_MP()) {
-    lock();
-  }
-  cmpxchgptr(tmp_reg, Address(obj_reg, 0));
-  if (need_tmp_reg) {
-    pop(tmp_reg);
-  }
-  // If the biasing toward our thread failed, then another thread
-  // succeeded in biasing it toward itself and we need to revoke that
-  // bias. The revocation will occur in the runtime in the slow case.
-  if (counters != NULL) {
-    cond_inc32(Assembler::zero,
-               ExternalAddress((address)counters->rebiased_lock_entry_count_addr()));
-  }
-  if (slow_case != NULL) {
-    jcc(Assembler::notZero, *slow_case);
-  }
-  jmp(done);
-
-  bind(try_revoke_bias);
-  // The prototype mark in the klass doesn't have the bias bit set any
-  // more, indicating that objects of this data type are not supposed
-  // to be biased any more. We are going to try to reset the mark of
-  // this object to the prototype value and fall through to the
-  // CAS-based locking scheme. Note that if our CAS fails, it means
-  // that another thread raced us for the privilege of revoking the
-  // bias of this particular object, so it's okay to continue in the
-  // normal locking code.
-  //
-  // FIXME: due to a lack of registers we currently blow away the age
-  // bits in this situation. Should attempt to preserve them.
-  movl(swap_reg, saved_mark_addr);
-  if (need_tmp_reg) {
-    push(tmp_reg);
-  }
-  movl(tmp_reg, klass_addr);
-  movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset()));
-  if (os::is_MP()) {
-    lock();
-  }
-  cmpxchgptr(tmp_reg, Address(obj_reg, 0));
-  if (need_tmp_reg) {
-    pop(tmp_reg);
-  }
-  // Fall through to the normal CAS-based lock, because no matter what
-  // the result of the above CAS, some thread must have succeeded in
-  // removing the bias bit from the object's header.
-  if (counters != NULL) {
-    cond_inc32(Assembler::zero,
-               ExternalAddress((address)counters->revoked_lock_entry_count_addr()));
-  }
-
-  bind(cas_label);
-
-  return null_check_offset;
-}
-void MacroAssembler::call_VM_leaf_base(address entry_point,
-                                       int number_of_arguments) {
-  call(RuntimeAddress(entry_point));
-  increment(rsp, number_of_arguments * wordSize);
-}
-
-void MacroAssembler::cmpklass(Address src1, Metadata* obj) {
-  cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
-}
-
-void MacroAssembler::cmpklass(Register src1, Metadata* obj) {
-  cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
-}
-
-void MacroAssembler::cmpoop(Address src1, jobject obj) {
-  cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
-}
-
-void MacroAssembler::cmpoop(Register src1, jobject obj) {
-  cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
-}
-
-void MacroAssembler::extend_sign(Register hi, Register lo) {
-  // According to Intel Doc. AP-526, "Integer Divide", p.18.
-  if (VM_Version::is_P6() && hi == rdx && lo == rax) {
-    cdql();
-  } else {
-    movl(hi, lo);
-    sarl(hi, 31);
-  }
-}
-
-void MacroAssembler::jC2(Register tmp, Label& L) {
-  // set parity bit if FPU flag C2 is set (via rax)
-  save_rax(tmp);
-  fwait(); fnstsw_ax();
-  sahf();
-  restore_rax(tmp);
-  // branch
-  jcc(Assembler::parity, L);
-}
-
-void MacroAssembler::jnC2(Register tmp, Label& L) {
-  // set parity bit if FPU flag C2 is set (via rax)
-  save_rax(tmp);
-  fwait(); fnstsw_ax();
-  sahf();
-  restore_rax(tmp);
-  // branch
-  jcc(Assembler::noParity, L);
-}
-
-// 32bit can do a case table jump in one instruction but we no longer allow the base
-// to be installed in the Address class
-void MacroAssembler::jump(ArrayAddress entry) {
-  jmp(as_Address(entry));
-}
-
-// Note: y_lo will be destroyed
-void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
-  // Long compare for Java (semantics as described in JVM spec.)
-  Label high, low, done;
-
-  cmpl(x_hi, y_hi);
-  jcc(Assembler::less, low);
-  jcc(Assembler::greater, high);
-  // x_hi is the return register
-  xorl(x_hi, x_hi);
-  cmpl(x_lo, y_lo);
-  jcc(Assembler::below, low);
-  jcc(Assembler::equal, done);
-
-  bind(high);
-  xorl(x_hi, x_hi);
-  increment(x_hi);
-  jmp(done);
-
-  bind(low);
-  xorl(x_hi, x_hi);
-  decrementl(x_hi);
-
-  bind(done);
-}
-
-void MacroAssembler::lea(Register dst, AddressLiteral src) {
-    mov_literal32(dst, (int32_t)src.target(), src.rspec());
-}
-
-void MacroAssembler::lea(Address dst, AddressLiteral adr) {
-  // leal(dst, as_Address(adr));
-  // see note in movl as to why we must use a move
-  mov_literal32(dst, (int32_t) adr.target(), adr.rspec());
-}
-
-void MacroAssembler::leave() {
-  mov(rsp, rbp);
-  pop(rbp);
-}
-
-void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) {
-  // Multiplication of two Java long values stored on the stack
-  // as illustrated below. Result is in rdx:rax.
-  //
-  // rsp ---> [  ??  ] \               \
-  //            ....    | y_rsp_offset  |
-  //          [ y_lo ] /  (in bytes)    | x_rsp_offset
-  //          [ y_hi ]                  | (in bytes)
-  //            ....                    |
-  //          [ x_lo ]                 /
-  //          [ x_hi ]
-  //            ....
-  //
-  // Basic idea: lo(result) = lo(x_lo * y_lo)
-  //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
-  Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset);
-  Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset);
-  Label quick;
-  // load x_hi, y_hi and check if quick
-  // multiplication is possible
-  movl(rbx, x_hi);
-  movl(rcx, y_hi);
-  movl(rax, rbx);
-  orl(rbx, rcx);                                 // rbx, = 0 <=> x_hi = 0 and y_hi = 0
-  jcc(Assembler::zero, quick);                   // if rbx, = 0 do quick multiply
-  // do full multiplication
-  // 1st step
-  mull(y_lo);                                    // x_hi * y_lo
-  movl(rbx, rax);                                // save lo(x_hi * y_lo) in rbx,
-  // 2nd step
-  movl(rax, x_lo);
-  mull(rcx);                                     // x_lo * y_hi
-  addl(rbx, rax);                                // add lo(x_lo * y_hi) to rbx,
-  // 3rd step
-  bind(quick);                                   // note: rbx, = 0 if quick multiply!
-  movl(rax, x_lo);
-  mull(y_lo);                                    // x_lo * y_lo
-  addl(rdx, rbx);                                // correct hi(x_lo * y_lo)
-}
-
-void MacroAssembler::lneg(Register hi, Register lo) {
-  negl(lo);
-  adcl(hi, 0);
-  negl(hi);
-}
-
-void MacroAssembler::lshl(Register hi, Register lo) {
-  // Java shift left long support (semantics as described in JVM spec., p.305)
-  // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n))
-  // shift value is in rcx !
-  assert(hi != rcx, "must not use rcx");
-  assert(lo != rcx, "must not use rcx");
-  const Register s = rcx;                        // shift count
-  const int      n = BitsPerWord;
-  Label L;
-  andl(s, 0x3f);                                 // s := s & 0x3f (s < 0x40)
-  cmpl(s, n);                                    // if (s < n)
-  jcc(Assembler::less, L);                       // else (s >= n)
-  movl(hi, lo);                                  // x := x << n
-  xorl(lo, lo);
-  // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
-  bind(L);                                       // s (mod n) < n
-  shldl(hi, lo);                                 // x := x << s
-  shll(lo);
-}
-
-
-void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) {
-  // Java shift right long support (semantics as described in JVM spec., p.306 & p.310)
-  // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n))
-  assert(hi != rcx, "must not use rcx");
-  assert(lo != rcx, "must not use rcx");
-  const Register s = rcx;                        // shift count
-  const int      n = BitsPerWord;
-  Label L;
-  andl(s, 0x3f);                                 // s := s & 0x3f (s < 0x40)
-  cmpl(s, n);                                    // if (s < n)
-  jcc(Assembler::less, L);                       // else (s >= n)
-  movl(lo, hi);                                  // x := x >> n
-  if (sign_extension) sarl(hi, 31);
-  else                xorl(hi, hi);
-  // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
-  bind(L);                                       // s (mod n) < n
-  shrdl(lo, hi);                                 // x := x >> s
-  if (sign_extension) sarl(hi);
-  else                shrl(hi);
-}
-
-void MacroAssembler::movoop(Register dst, jobject obj) {
-  mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
-}
-
-void MacroAssembler::movoop(Address dst, jobject obj) {
-  mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
-}
-
-void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
-  mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate());
-}
-
-void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
-  mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate());
-}
-
-void MacroAssembler::movptr(Register dst, AddressLiteral src) {
-  if (src.is_lval()) {
-    mov_literal32(dst, (intptr_t)src.target(), src.rspec());
-  } else {
-    movl(dst, as_Address(src));
-  }
-}
-
-void MacroAssembler::movptr(ArrayAddress dst, Register src) {
-  movl(as_Address(dst), src);
-}
-
-void MacroAssembler::movptr(Register dst, ArrayAddress src) {
-  movl(dst, as_Address(src));
-}
-
-// src should NEVER be a real pointer. Use AddressLiteral for true pointers
-void MacroAssembler::movptr(Address dst, intptr_t src) {
-  movl(dst, src);
-}
-
-
-void MacroAssembler::pop_callee_saved_registers() {
-  pop(rcx);
-  pop(rdx);
-  pop(rdi);
-  pop(rsi);
-}
-
-void MacroAssembler::pop_fTOS() {
-  fld_d(Address(rsp, 0));
-  addl(rsp, 2 * wordSize);
-}
-
-void MacroAssembler::push_callee_saved_registers() {
-  push(rsi);
-  push(rdi);
-  push(rdx);
-  push(rcx);
-}
-
-void MacroAssembler::push_fTOS() {
-  subl(rsp, 2 * wordSize);
-  fstp_d(Address(rsp, 0));
-}
-
-
-void MacroAssembler::pushoop(jobject obj) {
-  push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate());
-}
-
-void MacroAssembler::pushklass(Metadata* obj) {
-  push_literal32((int32_t)obj, metadata_Relocation::spec_for_immediate());
-}
-
-void MacroAssembler::pushptr(AddressLiteral src) {
-  if (src.is_lval()) {
-    push_literal32((int32_t)src.target(), src.rspec());
-  } else {
-    pushl(as_Address(src));
-  }
-}
-
-void MacroAssembler::set_word_if_not_zero(Register dst) {
-  xorl(dst, dst);
-  set_byte_if_not_zero(dst);
-}
-
-static void pass_arg0(MacroAssembler* masm, Register arg) {
-  masm->push(arg);
-}
-
-static void pass_arg1(MacroAssembler* masm, Register arg) {
-  masm->push(arg);
-}
-
-static void pass_arg2(MacroAssembler* masm, Register arg) {
-  masm->push(arg);
-}
-
-static void pass_arg3(MacroAssembler* masm, Register arg) {
-  masm->push(arg);
-}
-
-#ifndef PRODUCT
-extern "C" void findpc(intptr_t x);
-#endif
-
-void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
-  // In order to get locks to work, we need to fake a in_VM state
-  JavaThread* thread = JavaThread::current();
-  JavaThreadState saved_state = thread->thread_state();
-  thread->set_thread_state(_thread_in_vm);
-  if (ShowMessageBoxOnError) {
-    JavaThread* thread = JavaThread::current();
-    JavaThreadState saved_state = thread->thread_state();
-    thread->set_thread_state(_thread_in_vm);
-    if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
-      ttyLocker ttyl;
-      BytecodeCounter::print();
-    }
-    // To see where a verify_oop failed, get $ebx+40/X for this frame.
-    // This is the value of eip which points to where verify_oop will return.
-    if (os::message_box(msg, "Execution stopped, print registers?")) {
-      print_state32(rdi, rsi, rbp, rsp, rbx, rdx, rcx, rax, eip);
-      BREAKPOINT;
-    }
-  } else {
-    ttyLocker ttyl;
-    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
-  }
-  // Don't assert holding the ttyLock
-    assert(false, err_msg("DEBUG MESSAGE: %s", msg));
-  ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
-}
-
-void MacroAssembler::print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip) {
-  ttyLocker ttyl;
-  FlagSetting fs(Debugging, true);
-  tty->print_cr("eip = 0x%08x", eip);
-#ifndef PRODUCT
-  if ((WizardMode || Verbose) && PrintMiscellaneous) {
-    tty->cr();
-    findpc(eip);
-    tty->cr();
-  }
-#endif
-#define PRINT_REG(rax) \
-  { tty->print("%s = ", #rax); os::print_location(tty, rax); }
-  PRINT_REG(rax);
-  PRINT_REG(rbx);
-  PRINT_REG(rcx);
-  PRINT_REG(rdx);
-  PRINT_REG(rdi);
-  PRINT_REG(rsi);
-  PRINT_REG(rbp);
-  PRINT_REG(rsp);
-#undef PRINT_REG
-  // Print some words near top of staack.
-  int* dump_sp = (int*) rsp;
-  for (int col1 = 0; col1 < 8; col1++) {
-    tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp);
-    os::print_location(tty, *dump_sp++);
-  }
-  for (int row = 0; row < 16; row++) {
-    tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp);
-    for (int col = 0; col < 8; col++) {
-      tty->print(" 0x%08x", *dump_sp++);
-    }
-    tty->cr();
-  }
-  // Print some instructions around pc:
-  Disassembler::decode((address)eip-64, (address)eip);
-  tty->print_cr("--------");
-  Disassembler::decode((address)eip, (address)eip+32);
-}
-
-void MacroAssembler::stop(const char* msg) {
-  ExternalAddress message((address)msg);
-  // push address of message
-  pushptr(message.addr());
-  { Label L; call(L, relocInfo::none); bind(L); }     // push eip
-  pusha();                                            // push registers
-  call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)));
-  hlt();
-}
-
-void MacroAssembler::warn(const char* msg) {
-  push_CPU_state();
-
-  ExternalAddress message((address) msg);
-  // push address of message
-  pushptr(message.addr());
-
-  call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning)));
-  addl(rsp, wordSize);       // discard argument
-  pop_CPU_state();
-}
-
-void MacroAssembler::print_state() {
-  { Label L; call(L, relocInfo::none); bind(L); }     // push eip
-  pusha();                                            // push registers
-
-  push_CPU_state();
-  call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::print_state32)));
-  pop_CPU_state();
-
-  popa();
-  addl(rsp, wordSize);
-}
-
-#else // _LP64
-
-// 64 bit versions
-
-Address MacroAssembler::as_Address(AddressLiteral adr) {
-  // amd64 always does this as a pc-rel
-  // we can be absolute or disp based on the instruction type
-  // jmp/call are displacements others are absolute
-  assert(!adr.is_lval(), "must be rval");
-  assert(reachable(adr), "must be");
-  return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc());
-
-}
-
-Address MacroAssembler::as_Address(ArrayAddress adr) {
-  AddressLiteral base = adr.base();
-  lea(rscratch1, base);
-  Address index = adr.index();
-  assert(index._disp == 0, "must not have disp"); // maybe it can?
-  Address array(rscratch1, index._index, index._scale, index._disp);
-  return array;
-}
-
-int MacroAssembler::biased_locking_enter(Register lock_reg,
-                                         Register obj_reg,
-                                         Register swap_reg,
-                                         Register tmp_reg,
-                                         bool swap_reg_contains_mark,
-                                         Label& done,
-                                         Label* slow_case,
-                                         BiasedLockingCounters* counters) {
-  assert(UseBiasedLocking, "why call this otherwise?");
-  assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
-  assert(tmp_reg != noreg, "tmp_reg must be supplied");
-  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
-  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
-  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
-  Address saved_mark_addr(lock_reg, 0);
-
-  if (PrintBiasedLockingStatistics && counters == NULL)
-    counters = BiasedLocking::counters();
-
-  // Biased locking
-  // See whether the lock is currently biased toward our thread and
-  // whether the epoch is still valid
-  // Note that the runtime guarantees sufficient alignment of JavaThread
-  // pointers to allow age to be placed into low bits
-  // First check to see whether biasing is even enabled for this object
-  Label cas_label;
-  int null_check_offset = -1;
-  if (!swap_reg_contains_mark) {
-    null_check_offset = offset();
-    movq(swap_reg, mark_addr);
-  }
-  movq(tmp_reg, swap_reg);
-  andq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
-  cmpq(tmp_reg, markOopDesc::biased_lock_pattern);
-  jcc(Assembler::notEqual, cas_label);
-  // The bias pattern is present in the object's header. Need to check
-  // whether the bias owner and the epoch are both still current.
-  load_prototype_header(tmp_reg, obj_reg);
-  orq(tmp_reg, r15_thread);
-  xorq(tmp_reg, swap_reg);
-  andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place));
-  if (counters != NULL) {
-    cond_inc32(Assembler::zero,
-               ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
-  }
-  jcc(Assembler::equal, done);
-
-  Label try_revoke_bias;
-  Label try_rebias;
-
-  // At this point we know that the header has the bias pattern and
-  // that we are not the bias owner in the current epoch. We need to
-  // figure out more details about the state of the header in order to
-  // know what operations can be legally performed on the object's
-  // header.
-
-  // If the low three bits in the xor result aren't clear, that means
-  // the prototype header is no longer biased and we have to revoke
-  // the bias on this object.
-  testq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
-  jcc(Assembler::notZero, try_revoke_bias);
-
-  // Biasing is still enabled for this data type. See whether the
-  // epoch of the current bias is still valid, meaning that the epoch
-  // bits of the mark word are equal to the epoch bits of the
-  // prototype header. (Note that the prototype header's epoch bits
-  // only change at a safepoint.) If not, attempt to rebias the object
-  // toward the current thread. Note that we must be absolutely sure
-  // that the current epoch is invalid in order to do this because
-  // otherwise the manipulations it performs on the mark word are
-  // illegal.
-  testq(tmp_reg, markOopDesc::epoch_mask_in_place);
-  jcc(Assembler::notZero, try_rebias);
-
-  // The epoch of the current bias is still valid but we know nothing
-  // about the owner; it might be set or it might be clear. Try to
-  // acquire the bias of the object using an atomic operation. If this
-  // fails we will go in to the runtime to revoke the object's bias.
-  // Note that we first construct the presumed unbiased header so we
-  // don't accidentally blow away another thread's valid bias.
-  andq(swap_reg,
-       markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
-  movq(tmp_reg, swap_reg);
-  orq(tmp_reg, r15_thread);
-  if (os::is_MP()) {
-    lock();
-  }
-  cmpxchgq(tmp_reg, Address(obj_reg, 0));
-  // If the biasing toward our thread failed, this means that
-  // another thread succeeded in biasing it toward itself and we
-  // need to revoke that bias. The revocation will occur in the
-  // interpreter runtime in the slow case.
-  if (counters != NULL) {
-    cond_inc32(Assembler::zero,
-               ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
-  }
-  if (slow_case != NULL) {
-    jcc(Assembler::notZero, *slow_case);
-  }
-  jmp(done);
-
-  bind(try_rebias);
-  // At this point we know the epoch has expired, meaning that the
-  // current "bias owner", if any, is actually invalid. Under these
-  // circumstances _only_, we are allowed to use the current header's
-  // value as the comparison value when doing the cas to acquire the
-  // bias in the current epoch. In other words, we allow transfer of
-  // the bias from one thread to another directly in this situation.
-  //
-  // FIXME: due to a lack of registers we currently blow away the age
-  // bits in this situation. Should attempt to preserve them.
-  load_prototype_header(tmp_reg, obj_reg);
-  orq(tmp_reg, r15_thread);
-  if (os::is_MP()) {
-    lock();
-  }
-  cmpxchgq(tmp_reg, Address(obj_reg, 0));
-  // If the biasing toward our thread failed, then another thread
-  // succeeded in biasing it toward itself and we need to revoke that
-  // bias. The revocation will occur in the runtime in the slow case.
-  if (counters != NULL) {
-    cond_inc32(Assembler::zero,
-               ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
-  }
-  if (slow_case != NULL) {
-    jcc(Assembler::notZero, *slow_case);
-  }
-  jmp(done);
-
-  bind(try_revoke_bias);
-  // The prototype mark in the klass doesn't have the bias bit set any
-  // more, indicating that objects of this data type are not supposed
-  // to be biased any more. We are going to try to reset the mark of
-  // this object to the prototype value and fall through to the
-  // CAS-based locking scheme. Note that if our CAS fails, it means
-  // that another thread raced us for the privilege of revoking the
-  // bias of this particular object, so it's okay to continue in the
-  // normal locking code.
-  //
-  // FIXME: due to a lack of registers we currently blow away the age
-  // bits in this situation. Should attempt to preserve them.
-  load_prototype_header(tmp_reg, obj_reg);
-  if (os::is_MP()) {
-    lock();
-  }
-  cmpxchgq(tmp_reg, Address(obj_reg, 0));
-  // Fall through to the normal CAS-based lock, because no matter what
-  // the result of the above CAS, some thread must have succeeded in
-  // removing the bias bit from the object's header.
-  if (counters != NULL) {
-    cond_inc32(Assembler::zero,
-               ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
-  }
-
-  bind(cas_label);
-
-  return null_check_offset;
-}
-
-void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {
-  Label L, E;
-
-#ifdef _WIN64
-  // Windows always allocates space for it's register args
-  assert(num_args <= 4, "only register arguments supported");
-  subq(rsp,  frame::arg_reg_save_area_bytes);
-#endif
-
-  // Align stack if necessary
-  testl(rsp, 15);
-  jcc(Assembler::zero, L);
-
-  subq(rsp, 8);
-  {
-    call(RuntimeAddress(entry_point));
-  }
-  addq(rsp, 8);
-  jmp(E);
-
-  bind(L);
-  {
-    call(RuntimeAddress(entry_point));
-  }
-
-  bind(E);
-
-#ifdef _WIN64
-  // restore stack pointer
-  addq(rsp, frame::arg_reg_save_area_bytes);
-#endif
-
-}
-
-void MacroAssembler::cmp64(Register src1, AddressLiteral src2) {
-  assert(!src2.is_lval(), "should use cmpptr");
-
-  if (reachable(src2)) {
-    cmpq(src1, as_Address(src2));
-  } else {
-    lea(rscratch1, src2);
-    Assembler::cmpq(src1, Address(rscratch1, 0));
-  }
-}
-
-int MacroAssembler::corrected_idivq(Register reg) {
-  // Full implementation of Java ldiv and lrem; checks for special
-  // case as described in JVM spec., p.243 & p.271.  The function
-  // returns the (pc) offset of the idivl instruction - may be needed
-  // for implicit exceptions.
-  //
-  //         normal case                           special case
-  //
-  // input : rax: dividend                         min_long
-  //         reg: divisor   (may not be eax/edx)   -1
-  //
-  // output: rax: quotient  (= rax idiv reg)       min_long
-  //         rdx: remainder (= rax irem reg)       0
-  assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register");
-  static const int64_t min_long = 0x8000000000000000;
-  Label normal_case, special_case;
-
-  // check for special case
-  cmp64(rax, ExternalAddress((address) &min_long));
-  jcc(Assembler::notEqual, normal_case);
-  xorl(rdx, rdx); // prepare rdx for possible special case (where
-                  // remainder = 0)
-  cmpq(reg, -1);
-  jcc(Assembler::equal, special_case);
-
-  // handle normal case
-  bind(normal_case);
-  cdqq();
-  int idivq_offset = offset();
-  idivq(reg);
-
-  // normal and special case exit
-  bind(special_case);
-
-  return idivq_offset;
-}
-
-void MacroAssembler::decrementq(Register reg, int value) {
-  if (value == min_jint) { subq(reg, value); return; }
-  if (value <  0) { incrementq(reg, -value); return; }
-  if (value == 0) {                        ; return; }
-  if (value == 1 && UseIncDec) { decq(reg) ; return; }
-  /* else */      { subq(reg, value)       ; return; }
-}
-
-void MacroAssembler::decrementq(Address dst, int value) {
-  if (value == min_jint) { subq(dst, value); return; }
-  if (value <  0) { incrementq(dst, -value); return; }
-  if (value == 0) {                        ; return; }
-  if (value == 1 && UseIncDec) { decq(dst) ; return; }
-  /* else */      { subq(dst, value)       ; return; }
-}
-
-void MacroAssembler::incrementq(Register reg, int value) {
-  if (value == min_jint) { addq(reg, value); return; }
-  if (value <  0) { decrementq(reg, -value); return; }
-  if (value == 0) {                        ; return; }
-  if (value == 1 && UseIncDec) { incq(reg) ; return; }
-  /* else */      { addq(reg, value)       ; return; }
-}
-
-void MacroAssembler::incrementq(Address dst, int value) {
-  if (value == min_jint) { addq(dst, value); return; }
-  if (value <  0) { decrementq(dst, -value); return; }
-  if (value == 0) {                        ; return; }
-  if (value == 1 && UseIncDec) { incq(dst) ; return; }
-  /* else */      { addq(dst, value)       ; return; }
-}
-
-// 32bit can do a case table jump in one instruction but we no longer allow the base
-// to be installed in the Address class
-void MacroAssembler::jump(ArrayAddress entry) {
-  lea(rscratch1, entry.base());
-  Address dispatch = entry.index();
-  assert(dispatch._base == noreg, "must be");
-  dispatch._base = rscratch1;
-  jmp(dispatch);
-}
-
-void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
-  ShouldNotReachHere(); // 64bit doesn't use two regs
-  cmpq(x_lo, y_lo);
-}
-
-void MacroAssembler::lea(Register dst, AddressLiteral src) {
-    mov_literal64(dst, (intptr_t)src.target(), src.rspec());
-}
-
-void MacroAssembler::lea(Address dst, AddressLiteral adr) {
-  mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec());
-  movptr(dst, rscratch1);
-}
-
-void MacroAssembler::leave() {
-  // %%% is this really better? Why not on 32bit too?
-  emit_byte(0xC9); // LEAVE
-}
-
-void MacroAssembler::lneg(Register hi, Register lo) {
-  ShouldNotReachHere(); // 64bit doesn't use two regs
-  negq(lo);
-}
-
-void MacroAssembler::movoop(Register dst, jobject obj) {
-  mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate());
-}
-
-void MacroAssembler::movoop(Address dst, jobject obj) {
-  mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate());
-  movq(dst, rscratch1);
-}
-
-void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
-  mov_literal64(dst, (intptr_t)obj, metadata_Relocation::spec_for_immediate());
-}
-
-void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
-  mov_literal64(rscratch1, (intptr_t)obj, metadata_Relocation::spec_for_immediate());
-  movq(dst, rscratch1);
-}
-
-void MacroAssembler::movptr(Register dst, AddressLiteral src) {
-  if (src.is_lval()) {
-    mov_literal64(dst, (intptr_t)src.target(), src.rspec());
-  } else {
-    if (reachable(src)) {
-      movq(dst, as_Address(src));
-    } else {
-      lea(rscratch1, src);
-      movq(dst, Address(rscratch1,0));
-    }
-  }
-}
-
-void MacroAssembler::movptr(ArrayAddress dst, Register src) {
-  movq(as_Address(dst), src);
-}
-
-void MacroAssembler::movptr(Register dst, ArrayAddress src) {
-  movq(dst, as_Address(src));
-}
-
-// src should NEVER be a real pointer. Use AddressLiteral for true pointers
-void MacroAssembler::movptr(Address dst, intptr_t src) {
-  mov64(rscratch1, src);
-  movq(dst, rscratch1);
-}
-
-// These are mostly for initializing NULL
-void MacroAssembler::movptr(Address dst, int32_t src) {
-  movslq(dst, src);
-}
-
-void MacroAssembler::movptr(Register dst, int32_t src) {
-  mov64(dst, (intptr_t)src);
-}
-
-void MacroAssembler::pushoop(jobject obj) {
-  movoop(rscratch1, obj);
-  push(rscratch1);
-}
-
-void MacroAssembler::pushklass(Metadata* obj) {
-  mov_metadata(rscratch1, obj);
-  push(rscratch1);
-}
-
-void MacroAssembler::pushptr(AddressLiteral src) {
-  lea(rscratch1, src);
-  if (src.is_lval()) {
-    push(rscratch1);
-  } else {
-    pushq(Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::reset_last_Java_frame(bool clear_fp,
-                                           bool clear_pc) {
-  // we must set sp to zero to clear frame
-  movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
-  // must clear fp, so that compiled frames are not confused; it is
-  // possible that we need it only for debugging
-  if (clear_fp) {
-    movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
-  }
-
-  if (clear_pc) {
-    movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
-  }
-}
-
-void MacroAssembler::set_last_Java_frame(Register last_java_sp,
-                                         Register last_java_fp,
-                                         address  last_java_pc) {
-  // determine last_java_sp register
-  if (!last_java_sp->is_valid()) {
-    last_java_sp = rsp;
-  }
-
-  // last_java_fp is optional
-  if (last_java_fp->is_valid()) {
-    movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()),
-           last_java_fp);
-  }
-
-  // last_java_pc is optional
-  if (last_java_pc != NULL) {
-    Address java_pc(r15_thread,
-                    JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
-    lea(rscratch1, InternalAddress(last_java_pc));
-    movptr(java_pc, rscratch1);
-  }
-
-  movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
-}
-
-static void pass_arg0(MacroAssembler* masm, Register arg) {
-  if (c_rarg0 != arg ) {
-    masm->mov(c_rarg0, arg);
-  }
-}
-
-static void pass_arg1(MacroAssembler* masm, Register arg) {
-  if (c_rarg1 != arg ) {
-    masm->mov(c_rarg1, arg);
-  }
-}
-
-static void pass_arg2(MacroAssembler* masm, Register arg) {
-  if (c_rarg2 != arg ) {
-    masm->mov(c_rarg2, arg);
-  }
-}
-
-static void pass_arg3(MacroAssembler* masm, Register arg) {
-  if (c_rarg3 != arg ) {
-    masm->mov(c_rarg3, arg);
-  }
-}
-
-void MacroAssembler::stop(const char* msg) {
-  address rip = pc();
-  pusha(); // get regs on stack
-  lea(c_rarg0, ExternalAddress((address) msg));
-  lea(c_rarg1, InternalAddress(rip));
-  movq(c_rarg2, rsp); // pass pointer to regs array
-  andq(rsp, -16); // align stack as required by ABI
-  call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)));
-  hlt();
-}
-
-void MacroAssembler::warn(const char* msg) {
-  push(rbp);
-  movq(rbp, rsp);
-  andq(rsp, -16);     // align stack as required by push_CPU_state and call
-  push_CPU_state();   // keeps alignment at 16 bytes
-  lea(c_rarg0, ExternalAddress((address) msg));
-  call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0);
-  pop_CPU_state();
-  mov(rsp, rbp);
-  pop(rbp);
-}
-
-void MacroAssembler::print_state() {
-  address rip = pc();
-  pusha();            // get regs on stack
-  push(rbp);
-  movq(rbp, rsp);
-  andq(rsp, -16);     // align stack as required by push_CPU_state and call
-  push_CPU_state();   // keeps alignment at 16 bytes
-
-  lea(c_rarg0, InternalAddress(rip));
-  lea(c_rarg1, Address(rbp, wordSize)); // pass pointer to regs array
-  call_VM_leaf(CAST_FROM_FN_PTR(address, MacroAssembler::print_state64), c_rarg0, c_rarg1);
-
-  pop_CPU_state();
-  mov(rsp, rbp);
-  pop(rbp);
-  popa();
-}
-
-#ifndef PRODUCT
-extern "C" void findpc(intptr_t x);
-#endif
-
-void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) {
-  // In order to get locks to work, we need to fake a in_VM state
-  if (ShowMessageBoxOnError) {
-    JavaThread* thread = JavaThread::current();
-    JavaThreadState saved_state = thread->thread_state();
-    thread->set_thread_state(_thread_in_vm);
-#ifndef PRODUCT
-    if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
-      ttyLocker ttyl;
-      BytecodeCounter::print();
-    }
-#endif
-    // To see where a verify_oop failed, get $ebx+40/X for this frame.
-    // XXX correct this offset for amd64
-    // This is the value of eip which points to where verify_oop will return.
-    if (os::message_box(msg, "Execution stopped, print registers?")) {
-      print_state64(pc, regs);
-      BREAKPOINT;
-      assert(false, "start up GDB");
-    }
-    ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
-  } else {
-    ttyLocker ttyl;
-    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n",
-                    msg);
-    assert(false, err_msg("DEBUG MESSAGE: %s", msg));
-  }
-}
-
-void MacroAssembler::print_state64(int64_t pc, int64_t regs[]) {
-  ttyLocker ttyl;
-  FlagSetting fs(Debugging, true);
-  tty->print_cr("rip = 0x%016lx", pc);
-#ifndef PRODUCT
-  tty->cr();
-  findpc(pc);
-  tty->cr();
-#endif
-#define PRINT_REG(rax, value) \
-  { tty->print("%s = ", #rax); os::print_location(tty, value); }
-  PRINT_REG(rax, regs[15]);
-  PRINT_REG(rbx, regs[12]);
-  PRINT_REG(rcx, regs[14]);
-  PRINT_REG(rdx, regs[13]);
-  PRINT_REG(rdi, regs[8]);
-  PRINT_REG(rsi, regs[9]);
-  PRINT_REG(rbp, regs[10]);
-  PRINT_REG(rsp, regs[11]);
-  PRINT_REG(r8 , regs[7]);
-  PRINT_REG(r9 , regs[6]);
-  PRINT_REG(r10, regs[5]);
-  PRINT_REG(r11, regs[4]);
-  PRINT_REG(r12, regs[3]);
-  PRINT_REG(r13, regs[2]);
-  PRINT_REG(r14, regs[1]);
-  PRINT_REG(r15, regs[0]);
-#undef PRINT_REG
-  // Print some words near top of staack.
-  int64_t* rsp = (int64_t*) regs[11];
-  int64_t* dump_sp = rsp;
-  for (int col1 = 0; col1 < 8; col1++) {
-    tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp);
-    os::print_location(tty, *dump_sp++);
-  }
-  for (int row = 0; row < 25; row++) {
-    tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp);
-    for (int col = 0; col < 4; col++) {
-      tty->print(" 0x%016lx", *dump_sp++);
-    }
-    tty->cr();
-  }
-  // Print some instructions around pc:
-  Disassembler::decode((address)pc-64, (address)pc);
-  tty->print_cr("--------");
-  Disassembler::decode((address)pc, (address)pc+32);
-}
-
-#endif // _LP64
-
-// Now versions that are common to 32/64 bit
-
-void MacroAssembler::addptr(Register dst, int32_t imm32) {
-  LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32));
-}
-
-void MacroAssembler::addptr(Register dst, Register src) {
-  LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
-}
-
-void MacroAssembler::addptr(Address dst, Register src) {
-  LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
-}
-
-void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) {
-  if (reachable(src)) {
-    Assembler::addsd(dst, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    Assembler::addsd(dst, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) {
-  if (reachable(src)) {
-    addss(dst, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    addss(dst, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::align(int modulus) {
-  if (offset() % modulus != 0) {
-    nop(modulus - (offset() % modulus));
-  }
-}
-
-void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) {
-  // Used in sign-masking with aligned address.
-  assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
-  if (reachable(src)) {
-    Assembler::andpd(dst, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    Assembler::andpd(dst, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) {
-  // Used in sign-masking with aligned address.
-  assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
-  if (reachable(src)) {
-    Assembler::andps(dst, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    Assembler::andps(dst, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::andptr(Register dst, int32_t imm32) {
-  LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32));
-}
-
-void MacroAssembler::atomic_incl(AddressLiteral counter_addr) {
-  pushf();
-  if (os::is_MP())
-    lock();
-  incrementl(counter_addr);
-  popf();
-}
-
-// Writes to stack successive pages until offset reached to check for
-// stack overflow + shadow pages.  This clobbers tmp.
-void MacroAssembler::bang_stack_size(Register size, Register tmp) {
-  movptr(tmp, rsp);
-  // Bang stack for total size given plus shadow page size.
-  // Bang one page at a time because large size can bang beyond yellow and
-  // red zones.
-  Label loop;
-  bind(loop);
-  movl(Address(tmp, (-os::vm_page_size())), size );
-  subptr(tmp, os::vm_page_size());
-  subl(size, os::vm_page_size());
-  jcc(Assembler::greater, loop);
-
-  // Bang down shadow pages too.
-  // The -1 because we already subtracted 1 page.
-  for (int i = 0; i< StackShadowPages-1; i++) {
-    // this could be any sized move but this is can be a debugging crumb
-    // so the bigger the better.
-    movptr(Address(tmp, (-i*os::vm_page_size())), size );
-  }
-}
-
-void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
-  assert(UseBiasedLocking, "why call this otherwise?");
-
-  // Check for biased locking unlock case, which is a no-op
-  // Note: we do not have to check the thread ID for two reasons.
-  // First, the interpreter checks for IllegalMonitorStateException at
-  // a higher level. Second, if the bias was revoked while we held the
-  // lock, the object could not be rebiased toward another thread, so
-  // the bias bit would be clear.
-  movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-  andptr(temp_reg, markOopDesc::biased_lock_mask_in_place);
-  cmpptr(temp_reg, markOopDesc::biased_lock_pattern);
-  jcc(Assembler::equal, done);
-}
-
-void MacroAssembler::c2bool(Register x) {
-  // implements x == 0 ? 0 : 1
-  // note: must only look at least-significant byte of x
-  //       since C-style booleans are stored in one byte
-  //       only! (was bug)
-  andl(x, 0xFF);
-  setb(Assembler::notZero, x);
-}
-
-// Wouldn't need if AddressLiteral version had new name
-void MacroAssembler::call(Label& L, relocInfo::relocType rtype) {
-  Assembler::call(L, rtype);
-}
-
-void MacroAssembler::call(Register entry) {
-  Assembler::call(entry);
-}
-
-void MacroAssembler::call(AddressLiteral entry) {
-  if (reachable(entry)) {
-    Assembler::call_literal(entry.target(), entry.rspec());
-  } else {
-    lea(rscratch1, entry);
-    Assembler::call(rscratch1);
-  }
-}
-
-void MacroAssembler::ic_call(address entry) {
-  RelocationHolder rh = virtual_call_Relocation::spec(pc());
-  movptr(rax, (intptr_t)Universe::non_oop_word());
-  call(AddressLiteral(entry, rh));
-}
-
-// Implementation of call_VM versions
-
-void MacroAssembler::call_VM(Register oop_result,
-                             address entry_point,
-                             bool check_exceptions) {
-  Label C, E;
-  call(C, relocInfo::none);
-  jmp(E);
-
-  bind(C);
-  call_VM_helper(oop_result, entry_point, 0, check_exceptions);
-  ret(0);
-
-  bind(E);
-}
-
-void MacroAssembler::call_VM(Register oop_result,
-                             address entry_point,
-                             Register arg_1,
-                             bool check_exceptions) {
-  Label C, E;
-  call(C, relocInfo::none);
-  jmp(E);
-
-  bind(C);
-  pass_arg1(this, arg_1);
-  call_VM_helper(oop_result, entry_point, 1, check_exceptions);
-  ret(0);
-
-  bind(E);
-}
-
-void MacroAssembler::call_VM(Register oop_result,
-                             address entry_point,
-                             Register arg_1,
-                             Register arg_2,
-                             bool check_exceptions) {
-  Label C, E;
-  call(C, relocInfo::none);
-  jmp(E);
-
-  bind(C);
-
-  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
-
-  pass_arg2(this, arg_2);
-  pass_arg1(this, arg_1);
-  call_VM_helper(oop_result, entry_point, 2, check_exceptions);
-  ret(0);
-
-  bind(E);
-}
-
-void MacroAssembler::call_VM(Register oop_result,
-                             address entry_point,
-                             Register arg_1,
-                             Register arg_2,
-                             Register arg_3,
-                             bool check_exceptions) {
-  Label C, E;
-  call(C, relocInfo::none);
-  jmp(E);
-
-  bind(C);
-
-  LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
-  LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
-  pass_arg3(this, arg_3);
-
-  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
-  pass_arg2(this, arg_2);
-
-  pass_arg1(this, arg_1);
-  call_VM_helper(oop_result, entry_point, 3, check_exceptions);
-  ret(0);
-
-  bind(E);
-}
-
-void MacroAssembler::call_VM(Register oop_result,
-                             Register last_java_sp,
-                             address entry_point,
-                             int number_of_arguments,
-                             bool check_exceptions) {
-  Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg);
-  call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
-}
-
-void MacroAssembler::call_VM(Register oop_result,
-                             Register last_java_sp,
-                             address entry_point,
-                             Register arg_1,
-                             bool check_exceptions) {
-  pass_arg1(this, arg_1);
-  call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
-}
-
-void MacroAssembler::call_VM(Register oop_result,
-                             Register last_java_sp,
-                             address entry_point,
-                             Register arg_1,
-                             Register arg_2,
-                             bool check_exceptions) {
-
-  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
-  pass_arg2(this, arg_2);
-  pass_arg1(this, arg_1);
-  call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
-}
-
-void MacroAssembler::call_VM(Register oop_result,
-                             Register last_java_sp,
-                             address entry_point,
-                             Register arg_1,
-                             Register arg_2,
-                             Register arg_3,
-                             bool check_exceptions) {
-  LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
-  LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
-  pass_arg3(this, arg_3);
-  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
-  pass_arg2(this, arg_2);
-  pass_arg1(this, arg_1);
-  call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
-}
-
-void MacroAssembler::super_call_VM(Register oop_result,
-                                   Register last_java_sp,
-                                   address entry_point,
-                                   int number_of_arguments,
-                                   bool check_exceptions) {
-  Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg);
-  MacroAssembler::call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
-}
-
-void MacroAssembler::super_call_VM(Register oop_result,
-                                   Register last_java_sp,
-                                   address entry_point,
-                                   Register arg_1,
-                                   bool check_exceptions) {
-  pass_arg1(this, arg_1);
-  super_call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
-}
-
-void MacroAssembler::super_call_VM(Register oop_result,
-                                   Register last_java_sp,
-                                   address entry_point,
-                                   Register arg_1,
-                                   Register arg_2,
-                                   bool check_exceptions) {
-
-  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
-  pass_arg2(this, arg_2);
-  pass_arg1(this, arg_1);
-  super_call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
-}
-
-void MacroAssembler::super_call_VM(Register oop_result,
-                                   Register last_java_sp,
-                                   address entry_point,
-                                   Register arg_1,
-                                   Register arg_2,
-                                   Register arg_3,
-                                   bool check_exceptions) {
-  LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
-  LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
-  pass_arg3(this, arg_3);
-  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
-  pass_arg2(this, arg_2);
-  pass_arg1(this, arg_1);
-  super_call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
-}
-
-void MacroAssembler::call_VM_base(Register oop_result,
-                                  Register java_thread,
-                                  Register last_java_sp,
-                                  address  entry_point,
-                                  int      number_of_arguments,
-                                  bool     check_exceptions) {
-  // determine java_thread register
-  if (!java_thread->is_valid()) {
-#ifdef _LP64
-    java_thread = r15_thread;
-#else
-    java_thread = rdi;
-    get_thread(java_thread);
-#endif // LP64
-  }
-  // determine last_java_sp register
-  if (!last_java_sp->is_valid()) {
-    last_java_sp = rsp;
-  }
-  // debugging support
-  assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
-  LP64_ONLY(assert(java_thread == r15_thread, "unexpected register"));
-#ifdef ASSERT
-  // TraceBytecodes does not use r12 but saves it over the call, so don't verify
-  // r12 is the heapbase.
-  LP64_ONLY(if ((UseCompressedOops || UseCompressedKlassPointers) && !TraceBytecodes) verify_heapbase("call_VM_base: heap base corrupted?");)
-#endif // ASSERT
-
-  assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
-  assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
-
-  // push java thread (becomes first argument of C function)
-
-  NOT_LP64(push(java_thread); number_of_arguments++);
-  LP64_ONLY(mov(c_rarg0, r15_thread));
-
-  // set last Java frame before call
-  assert(last_java_sp != rbp, "can't use ebp/rbp");
-
-  // Only interpreter should have to set fp
-  set_last_Java_frame(java_thread, last_java_sp, rbp, NULL);
-
-  // do the call, remove parameters
-  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
-
-  // restore the thread (cannot use the pushed argument since arguments
-  // may be overwritten by C code generated by an optimizing compiler);
-  // however can use the register value directly if it is callee saved.
-  if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) {
-    // rdi & rsi (also r15) are callee saved -> nothing to do
-#ifdef ASSERT
-    guarantee(java_thread != rax, "change this code");
-    push(rax);
-    { Label L;
-      get_thread(rax);
-      cmpptr(java_thread, rax);
-      jcc(Assembler::equal, L);
-      STOP("MacroAssembler::call_VM_base: rdi not callee saved?");
-      bind(L);
-    }
-    pop(rax);
-#endif
-  } else {
-    get_thread(java_thread);
-  }
-  // reset last Java frame
-  // Only interpreter should have to clear fp
-  reset_last_Java_frame(java_thread, true, false);
-
-#ifndef CC_INTERP
-   // C++ interp handles this in the interpreter
-  check_and_handle_popframe(java_thread);
-  check_and_handle_earlyret(java_thread);
-#endif /* CC_INTERP */
-
-  if (check_exceptions) {
-    // check for pending exceptions (java_thread is set upon return)
-    cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD);
-#ifndef _LP64
-    jump_cc(Assembler::notEqual,
-            RuntimeAddress(StubRoutines::forward_exception_entry()));
-#else
-    // This used to conditionally jump to forward_exception however it is
-    // possible if we relocate that the branch will not reach. So we must jump
-    // around so we can always reach
-
-    Label ok;
-    jcc(Assembler::equal, ok);
-    jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
-    bind(ok);
-#endif // LP64
-  }
-
-  // get oop result if there is one and reset the value in the thread
-  if (oop_result->is_valid()) {
-    get_vm_result(oop_result, java_thread);
-  }
-}
-
-void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
-
-  // Calculate the value for last_Java_sp
-  // somewhat subtle. call_VM does an intermediate call
-  // which places a return address on the stack just under the
-  // stack pointer as the user finsihed with it. This allows
-  // use to retrieve last_Java_pc from last_Java_sp[-1].
-  // On 32bit we then have to push additional args on the stack to accomplish
-  // the actual requested call. On 64bit call_VM only can use register args
-  // so the only extra space is the return address that call_VM created.
-  // This hopefully explains the calculations here.
-
-#ifdef _LP64
-  // We've pushed one address, correct last_Java_sp
-  lea(rax, Address(rsp, wordSize));
-#else
-  lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize));
-#endif // LP64
-
-  call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions);
-
-}
-
-void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
-  call_VM_leaf_base(entry_point, number_of_arguments);
-}
-
-void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
-  pass_arg0(this, arg_0);
-  call_VM_leaf(entry_point, 1);
-}
-
-void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
-
-  LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
-  pass_arg1(this, arg_1);
-  pass_arg0(this, arg_0);
-  call_VM_leaf(entry_point, 2);
-}
-
-void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
-  LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
-  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
-  pass_arg2(this, arg_2);
-  LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
-  pass_arg1(this, arg_1);
-  pass_arg0(this, arg_0);
-  call_VM_leaf(entry_point, 3);
-}
-
-void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
-  pass_arg0(this, arg_0);
-  MacroAssembler::call_VM_leaf_base(entry_point, 1);
-}
-
-void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
-
-  LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
-  pass_arg1(this, arg_1);
-  pass_arg0(this, arg_0);
-  MacroAssembler::call_VM_leaf_base(entry_point, 2);
-}
-
-void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
-  LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
-  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
-  pass_arg2(this, arg_2);
-  LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
-  pass_arg1(this, arg_1);
-  pass_arg0(this, arg_0);
-  MacroAssembler::call_VM_leaf_base(entry_point, 3);
-}
-
-void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
-  LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg"));
-  LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
-  LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
-  pass_arg3(this, arg_3);
-  LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
-  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
-  pass_arg2(this, arg_2);
-  LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
-  pass_arg1(this, arg_1);
-  pass_arg0(this, arg_0);
-  MacroAssembler::call_VM_leaf_base(entry_point, 4);
-}
-
-void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
-  movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
-  movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD);
-  verify_oop(oop_result, "broken oop in call_VM_base");
-}
-
-void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
-  movptr(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
-  movptr(Address(java_thread, JavaThread::vm_result_2_offset()), NULL_WORD);
-}
-
-void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
-}
-
-void MacroAssembler::check_and_handle_popframe(Register java_thread) {
-}
-
-void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) {
-  if (reachable(src1)) {
-    cmpl(as_Address(src1), imm);
-  } else {
-    lea(rscratch1, src1);
-    cmpl(Address(rscratch1, 0), imm);
-  }
-}
-
-void MacroAssembler::cmp32(Register src1, AddressLiteral src2) {
-  assert(!src2.is_lval(), "use cmpptr");
-  if (reachable(src2)) {
-    cmpl(src1, as_Address(src2));
-  } else {
-    lea(rscratch1, src2);
-    cmpl(src1, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::cmp32(Register src1, int32_t imm) {
-  Assembler::cmpl(src1, imm);
-}
-
-void MacroAssembler::cmp32(Register src1, Address src2) {
-  Assembler::cmpl(src1, src2);
-}
-
-void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
-  ucomisd(opr1, opr2);
-
-  Label L;
-  if (unordered_is_less) {
-    movl(dst, -1);
-    jcc(Assembler::parity, L);
-    jcc(Assembler::below , L);
-    movl(dst, 0);
-    jcc(Assembler::equal , L);
-    increment(dst);
-  } else { // unordered is greater
-    movl(dst, 1);
-    jcc(Assembler::parity, L);
-    jcc(Assembler::above , L);
-    movl(dst, 0);
-    jcc(Assembler::equal , L);
-    decrementl(dst);
-  }
-  bind(L);
-}
-
-void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
-  ucomiss(opr1, opr2);
-
-  Label L;
-  if (unordered_is_less) {
-    movl(dst, -1);
-    jcc(Assembler::parity, L);
-    jcc(Assembler::below , L);
-    movl(dst, 0);
-    jcc(Assembler::equal , L);
-    increment(dst);
-  } else { // unordered is greater
-    movl(dst, 1);
-    jcc(Assembler::parity, L);
-    jcc(Assembler::above , L);
-    movl(dst, 0);
-    jcc(Assembler::equal , L);
-    decrementl(dst);
-  }
-  bind(L);
-}
-
-
-void MacroAssembler::cmp8(AddressLiteral src1, int imm) {
-  if (reachable(src1)) {
-    cmpb(as_Address(src1), imm);
-  } else {
-    lea(rscratch1, src1);
-    cmpb(Address(rscratch1, 0), imm);
-  }
-}
-
-void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) {
-#ifdef _LP64
-  if (src2.is_lval()) {
-    movptr(rscratch1, src2);
-    Assembler::cmpq(src1, rscratch1);
-  } else if (reachable(src2)) {
-    cmpq(src1, as_Address(src2));
-  } else {
-    lea(rscratch1, src2);
-    Assembler::cmpq(src1, Address(rscratch1, 0));
-  }
-#else
-  if (src2.is_lval()) {
-    cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
-  } else {
-    cmpl(src1, as_Address(src2));
-  }
-#endif // _LP64
-}
-
-void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) {
-  assert(src2.is_lval(), "not a mem-mem compare");
-#ifdef _LP64
-  // moves src2's literal address
-  movptr(rscratch1, src2);
-  Assembler::cmpq(src1, rscratch1);
-#else
-  cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
-#endif // _LP64
-}
-
-void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) {
-  if (reachable(adr)) {
-    if (os::is_MP())
-      lock();
-    cmpxchgptr(reg, as_Address(adr));
-  } else {
-    lea(rscratch1, adr);
-    if (os::is_MP())
-      lock();
-    cmpxchgptr(reg, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::cmpxchgptr(Register reg, Address adr) {
-  LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr));
-}
-
-void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) {
-  if (reachable(src)) {
-    Assembler::comisd(dst, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    Assembler::comisd(dst, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) {
-  if (reachable(src)) {
-    Assembler::comiss(dst, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    Assembler::comiss(dst, Address(rscratch1, 0));
-  }
-}
-
-
-void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) {
-  Condition negated_cond = negate_condition(cond);
-  Label L;
-  jcc(negated_cond, L);
-  atomic_incl(counter_addr);
-  bind(L);
-}
-
-int MacroAssembler::corrected_idivl(Register reg) {
-  // Full implementation of Java idiv and irem; checks for
-  // special case as described in JVM spec., p.243 & p.271.
-  // The function returns the (pc) offset of the idivl
-  // instruction - may be needed for implicit exceptions.
-  //
-  //         normal case                           special case
-  //
-  // input : rax,: dividend                         min_int
-  //         reg: divisor   (may not be rax,/rdx)   -1
-  //
-  // output: rax,: quotient  (= rax, idiv reg)       min_int
-  //         rdx: remainder (= rax, irem reg)       0
-  assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register");
-  const int min_int = 0x80000000;
-  Label normal_case, special_case;
-
-  // check for special case
-  cmpl(rax, min_int);
-  jcc(Assembler::notEqual, normal_case);
-  xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0)
-  cmpl(reg, -1);
-  jcc(Assembler::equal, special_case);
-
-  // handle normal case
-  bind(normal_case);
-  cdql();
-  int idivl_offset = offset();
-  idivl(reg);
-
-  // normal and special case exit
-  bind(special_case);
-
-  return idivl_offset;
-}
-
-
-
-void MacroAssembler::decrementl(Register reg, int value) {
-  if (value == min_jint) {subl(reg, value) ; return; }
-  if (value <  0) { incrementl(reg, -value); return; }
-  if (value == 0) {                        ; return; }
-  if (value == 1 && UseIncDec) { decl(reg) ; return; }
-  /* else */      { subl(reg, value)       ; return; }
-}
-
-void MacroAssembler::decrementl(Address dst, int value) {
-  if (value == min_jint) {subl(dst, value) ; return; }
-  if (value <  0) { incrementl(dst, -value); return; }
-  if (value == 0) {                        ; return; }
-  if (value == 1 && UseIncDec) { decl(dst) ; return; }
-  /* else */      { subl(dst, value)       ; return; }
-}
-
-void MacroAssembler::division_with_shift (Register reg, int shift_value) {
-  assert (shift_value > 0, "illegal shift value");
-  Label _is_positive;
-  testl (reg, reg);
-  jcc (Assembler::positive, _is_positive);
-  int offset = (1 << shift_value) - 1 ;
-
-  if (offset == 1) {
-    incrementl(reg);
-  } else {
-    addl(reg, offset);
-  }
-
-  bind (_is_positive);
-  sarl(reg, shift_value);
-}
-
-void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) {
-  if (reachable(src)) {
-    Assembler::divsd(dst, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    Assembler::divsd(dst, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) {
-  if (reachable(src)) {
-    Assembler::divss(dst, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    Assembler::divss(dst, Address(rscratch1, 0));
-  }
-}
-
-// !defined(COMPILER2) is because of stupid core builds
-#if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2)
-void MacroAssembler::empty_FPU_stack() {
-  if (VM_Version::supports_mmx()) {
-    emms();
-  } else {
-    for (int i = 8; i-- > 0; ) ffree(i);
-  }
-}
-#endif // !LP64 || C1 || !C2
-
-
-// Defines obj, preserves var_size_in_bytes
-void MacroAssembler::eden_allocate(Register obj,
-                                   Register var_size_in_bytes,
-                                   int con_size_in_bytes,
-                                   Register t1,
-                                   Label& slow_case) {
-  assert(obj == rax, "obj must be in rax, for cmpxchg");
-  assert_different_registers(obj, var_size_in_bytes, t1);
-  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
-    jmp(slow_case);
-  } else {
-    Register end = t1;
-    Label retry;
-    bind(retry);
-    ExternalAddress heap_top((address) Universe::heap()->top_addr());
-    movptr(obj, heap_top);
-    if (var_size_in_bytes == noreg) {
-      lea(end, Address(obj, con_size_in_bytes));
-    } else {
-      lea(end, Address(obj, var_size_in_bytes, Address::times_1));
-    }
-    // if end < obj then we wrapped around => object too long => slow case
-    cmpptr(end, obj);
-    jcc(Assembler::below, slow_case);
-    cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
-    jcc(Assembler::above, slow_case);
-    // Compare obj with the top addr, and if still equal, store the new top addr in
-    // end at the address of the top addr pointer. Sets ZF if was equal, and clears
-    // it otherwise. Use lock prefix for atomicity on MPs.
-    locked_cmpxchgptr(end, heap_top);
-    jcc(Assembler::notEqual, retry);
-  }
-}
-
-void MacroAssembler::enter() {
-  push(rbp);
-  mov(rbp, rsp);
-}
-
-// A 5 byte nop that is safe for patching (see patch_verified_entry)
-void MacroAssembler::fat_nop() {
-  if (UseAddressNop) {
-    addr_nop_5();
-  } else {
-    emit_byte(0x26); // es:
-    emit_byte(0x2e); // cs:
-    emit_byte(0x64); // fs:
-    emit_byte(0x65); // gs:
-    emit_byte(0x90);
-  }
-}
-
-void MacroAssembler::fcmp(Register tmp) {
-  fcmp(tmp, 1, true, true);
-}
-
-void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) {
-  assert(!pop_right || pop_left, "usage error");
-  if (VM_Version::supports_cmov()) {
-    assert(tmp == noreg, "unneeded temp");
-    if (pop_left) {
-      fucomip(index);
-    } else {
-      fucomi(index);
-    }
-    if (pop_right) {
-      fpop();
-    }
-  } else {
-    assert(tmp != noreg, "need temp");
-    if (pop_left) {
-      if (pop_right) {
-        fcompp();
-      } else {
-        fcomp(index);
-      }
-    } else {
-      fcom(index);
-    }
-    // convert FPU condition into eflags condition via rax,
-    save_rax(tmp);
-    fwait(); fnstsw_ax();
-    sahf();
-    restore_rax(tmp);
-  }
-  // condition codes set as follows:
-  //
-  // CF (corresponds to C0) if x < y
-  // PF (corresponds to C2) if unordered
-  // ZF (corresponds to C3) if x = y
-}
-
-void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) {
-  fcmp2int(dst, unordered_is_less, 1, true, true);
-}
-
-void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) {
-  fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right);
-  Label L;
-  if (unordered_is_less) {
-    movl(dst, -1);
-    jcc(Assembler::parity, L);
-    jcc(Assembler::below , L);
-    movl(dst, 0);
-    jcc(Assembler::equal , L);
-    increment(dst);
-  } else { // unordered is greater
-    movl(dst, 1);
-    jcc(Assembler::parity, L);
-    jcc(Assembler::above , L);
-    movl(dst, 0);
-    jcc(Assembler::equal , L);
-    decrementl(dst);
-  }
-  bind(L);
-}
-
-void MacroAssembler::fld_d(AddressLiteral src) {
-  fld_d(as_Address(src));
-}
-
-void MacroAssembler::fld_s(AddressLiteral src) {
-  fld_s(as_Address(src));
-}
-
-void MacroAssembler::fld_x(AddressLiteral src) {
-  Assembler::fld_x(as_Address(src));
-}
-
-void MacroAssembler::fldcw(AddressLiteral src) {
-  Assembler::fldcw(as_Address(src));
-}
-
-void MacroAssembler::pow_exp_core_encoding() {
-  // kills rax, rcx, rdx
-  subptr(rsp,sizeof(jdouble));
-  // computes 2^X. Stack: X ...
-  // f2xm1 computes 2^X-1 but only operates on -1<=X<=1. Get int(X) and
-  // keep it on the thread's stack to compute 2^int(X) later
-  // then compute 2^(X-int(X)) as (2^(X-int(X)-1+1)
-  // final result is obtained with: 2^X = 2^int(X) * 2^(X-int(X))
-  fld_s(0);                 // Stack: X X ...
-  frndint();                // Stack: int(X) X ...
-  fsuba(1);                 // Stack: int(X) X-int(X) ...
-  fistp_s(Address(rsp,0));  // move int(X) as integer to thread's stack. Stack: X-int(X) ...
-  f2xm1();                  // Stack: 2^(X-int(X))-1 ...
-  fld1();                   // Stack: 1 2^(X-int(X))-1 ...
-  faddp(1);                 // Stack: 2^(X-int(X))
-  // computes 2^(int(X)): add exponent bias (1023) to int(X), then
-  // shift int(X)+1023 to exponent position.
-  // Exponent is limited to 11 bits if int(X)+1023 does not fit in 11
-  // bits, set result to NaN. 0x000 and 0x7FF are reserved exponent
-  // values so detect them and set result to NaN.
-  movl(rax,Address(rsp,0));
-  movl(rcx, -2048); // 11 bit mask and valid NaN binary encoding
-  addl(rax, 1023);
-  movl(rdx,rax);
-  shll(rax,20);
-  // Check that 0 < int(X)+1023 < 2047. Otherwise set rax to NaN.
-  addl(rdx,1);
-  // Check that 1 < int(X)+1023+1 < 2048
-  // in 3 steps:
-  // 1- (int(X)+1023+1)&-2048 == 0 => 0 <= int(X)+1023+1 < 2048
-  // 2- (int(X)+1023+1)&-2048 != 0
-  // 3- (int(X)+1023+1)&-2048 != 1
-  // Do 2- first because addl just updated the flags.
-  cmov32(Assembler::equal,rax,rcx);
-  cmpl(rdx,1);
-  cmov32(Assembler::equal,rax,rcx);
-  testl(rdx,rcx);
-  cmov32(Assembler::notEqual,rax,rcx);
-  movl(Address(rsp,4),rax);
-  movl(Address(rsp,0),0);
-  fmul_d(Address(rsp,0));   // Stack: 2^X ...
-  addptr(rsp,sizeof(jdouble));
-}
-
-void MacroAssembler::increase_precision() {
-  subptr(rsp, BytesPerWord);
-  fnstcw(Address(rsp, 0));
-  movl(rax, Address(rsp, 0));
-  orl(rax, 0x300);
-  push(rax);
-  fldcw(Address(rsp, 0));
-  pop(rax);
-}
-
-void MacroAssembler::restore_precision() {
-  fldcw(Address(rsp, 0));
-  addptr(rsp, BytesPerWord);
-}
-
-void MacroAssembler::fast_pow() {
-  // computes X^Y = 2^(Y * log2(X))
-  // if fast computation is not possible, result is NaN. Requires
-  // fallback from user of this macro.
-  // increase precision for intermediate steps of the computation
-  increase_precision();
-  fyl2x();                 // Stack: (Y*log2(X)) ...
-  pow_exp_core_encoding(); // Stack: exp(X) ...
-  restore_precision();
-}
-
-void MacroAssembler::fast_exp() {
-  // computes exp(X) = 2^(X * log2(e))
-  // if fast computation is not possible, result is NaN. Requires
-  // fallback from user of this macro.
-  // increase precision for intermediate steps of the computation
-  increase_precision();
-  fldl2e();                // Stack: log2(e) X ...
-  fmulp(1);                // Stack: (X*log2(e)) ...
-  pow_exp_core_encoding(); // Stack: exp(X) ...
-  restore_precision();
-}
-
-void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) {
-  // kills rax, rcx, rdx
-  // pow and exp needs 2 extra registers on the fpu stack.
-  Label slow_case, done;
-  Register tmp = noreg;
-  if (!VM_Version::supports_cmov()) {
-    // fcmp needs a temporary so preserve rdx,
-    tmp = rdx;
-  }
-  Register tmp2 = rax;
-  Register tmp3 = rcx;
-
-  if (is_exp) {
-    // Stack: X
-    fld_s(0);                   // duplicate argument for runtime call. Stack: X X
-    fast_exp();                 // Stack: exp(X) X
-    fcmp(tmp, 0, false, false); // Stack: exp(X) X
-    // exp(X) not equal to itself: exp(X) is NaN go to slow case.
-    jcc(Assembler::parity, slow_case);
-    // get rid of duplicate argument. Stack: exp(X)
-    if (num_fpu_regs_in_use > 0) {
-      fxch();
-      fpop();
-    } else {
-      ffree(1);
-    }
-    jmp(done);
-  } else {
-    // Stack: X Y
-    Label x_negative, y_odd;
-
-    fldz();                     // Stack: 0 X Y
-    fcmp(tmp, 1, true, false);  // Stack: X Y
-    jcc(Assembler::above, x_negative);
-
-    // X >= 0
-
-    fld_s(1);                   // duplicate arguments for runtime call. Stack: Y X Y
-    fld_s(1);                   // Stack: X Y X Y
-    fast_pow();                 // Stack: X^Y X Y
-    fcmp(tmp, 0, false, false); // Stack: X^Y X Y
-    // X^Y not equal to itself: X^Y is NaN go to slow case.
-    jcc(Assembler::parity, slow_case);
-    // get rid of duplicate arguments. Stack: X^Y
-    if (num_fpu_regs_in_use > 0) {
-      fxch(); fpop();
-      fxch(); fpop();
-    } else {
-      ffree(2);
-      ffree(1);
-    }
-    jmp(done);
-
-    // X <= 0
-    bind(x_negative);
-
-    fld_s(1);                   // Stack: Y X Y
-    frndint();                  // Stack: int(Y) X Y
-    fcmp(tmp, 2, false, false); // Stack: int(Y) X Y
-    jcc(Assembler::notEqual, slow_case);
-
-    subptr(rsp, 8);
-
-    // For X^Y, when X < 0, Y has to be an integer and the final
-    // result depends on whether it's odd or even. We just checked
-    // that int(Y) == Y.  We move int(Y) to gp registers as a 64 bit
-    // integer to test its parity. If int(Y) is huge and doesn't fit
-    // in the 64 bit integer range, the integer indefinite value will
-    // end up in the gp registers. Huge numbers are all even, the
-    // integer indefinite number is even so it's fine.
-
-#ifdef ASSERT
-    // Let's check we don't end up with an integer indefinite number
-    // when not expected. First test for huge numbers: check whether
-    // int(Y)+1 == int(Y) which is true for very large numbers and
-    // those are all even. A 64 bit integer is guaranteed to not
-    // overflow for numbers where y+1 != y (when precision is set to
-    // double precision).
-    Label y_not_huge;
-
-    fld1();                     // Stack: 1 int(Y) X Y
-    fadd(1);                    // Stack: 1+int(Y) int(Y) X Y
-
-#ifdef _LP64
-    // trip to memory to force the precision down from double extended
-    // precision
-    fstp_d(Address(rsp, 0));
-    fld_d(Address(rsp, 0));
-#endif
-
-    fcmp(tmp, 1, true, false);  // Stack: int(Y) X Y
-#endif
-
-    // move int(Y) as 64 bit integer to thread's stack
-    fistp_d(Address(rsp,0));    // Stack: X Y
-
-#ifdef ASSERT
-    jcc(Assembler::notEqual, y_not_huge);
-
-    // Y is huge so we know it's even. It may not fit in a 64 bit
-    // integer and we don't want the debug code below to see the
-    // integer indefinite value so overwrite int(Y) on the thread's
-    // stack with 0.
-    movl(Address(rsp, 0), 0);
-    movl(Address(rsp, 4), 0);
-
-    bind(y_not_huge);
-#endif
-
-    fld_s(1);                   // duplicate arguments for runtime call. Stack: Y X Y
-    fld_s(1);                   // Stack: X Y X Y
-    fabs();                     // Stack: abs(X) Y X Y
-    fast_pow();                 // Stack: abs(X)^Y X Y
-    fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y
-    // abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case.
-
-    pop(tmp2);
-    NOT_LP64(pop(tmp3));
-    jcc(Assembler::parity, slow_case);
-
-#ifdef ASSERT
-    // Check that int(Y) is not integer indefinite value (int
-    // overflow). Shouldn't happen because for values that would
-    // overflow, 1+int(Y)==Y which was tested earlier.
-#ifndef _LP64
-    {
-      Label integer;
-      testl(tmp2, tmp2);
-      jcc(Assembler::notZero, integer);
-      cmpl(tmp3, 0x80000000);
-      jcc(Assembler::notZero, integer);
-      STOP("integer indefinite value shouldn't be seen here");
-      bind(integer);
-    }
-#else
-    {
-      Label integer;
-      mov(tmp3, tmp2); // preserve tmp2 for parity check below
-      shlq(tmp3, 1);
-      jcc(Assembler::carryClear, integer);
-      jcc(Assembler::notZero, integer);
-      STOP("integer indefinite value shouldn't be seen here");
-      bind(integer);
-    }
-#endif
-#endif
-
-    // get rid of duplicate arguments. Stack: X^Y
-    if (num_fpu_regs_in_use > 0) {
-      fxch(); fpop();
-      fxch(); fpop();
-    } else {
-      ffree(2);
-      ffree(1);
-    }
-
-    testl(tmp2, 1);
-    jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y
-    // X <= 0, Y even: X^Y = -abs(X)^Y
-
-    fchs();                     // Stack: -abs(X)^Y Y
-    jmp(done);
-  }
-
-  // slow case: runtime call
-  bind(slow_case);
-
-  fpop();                       // pop incorrect result or int(Y)
-
-  fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow),
-                      is_exp ? 1 : 2, num_fpu_regs_in_use);
-
-  // Come here with result in F-TOS
-  bind(done);
-}
-
-void MacroAssembler::fpop() {
-  ffree();
-  fincstp();
-}
-
-void MacroAssembler::fremr(Register tmp) {
-  save_rax(tmp);
-  { Label L;
-    bind(L);
-    fprem();
-    fwait(); fnstsw_ax();
-#ifdef _LP64
-    testl(rax, 0x400);
-    jcc(Assembler::notEqual, L);
-#else
-    sahf();
-    jcc(Assembler::parity, L);
-#endif // _LP64
-  }
-  restore_rax(tmp);
-  // Result is in ST0.
-  // Note: fxch & fpop to get rid of ST1
-  // (otherwise FPU stack could overflow eventually)
-  fxch(1);
-  fpop();
-}
-
-
-void MacroAssembler::incrementl(AddressLiteral dst) {
-  if (reachable(dst)) {
-    incrementl(as_Address(dst));
-  } else {
-    lea(rscratch1, dst);
-    incrementl(Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::incrementl(ArrayAddress dst) {
-  incrementl(as_Address(dst));
-}
-
-void MacroAssembler::incrementl(Register reg, int value) {
-  if (value == min_jint) {addl(reg, value) ; return; }
-  if (value <  0) { decrementl(reg, -value); return; }
-  if (value == 0) {                        ; return; }
-  if (value == 1 && UseIncDec) { incl(reg) ; return; }
-  /* else */      { addl(reg, value)       ; return; }
-}
-
-void MacroAssembler::incrementl(Address dst, int value) {
-  if (value == min_jint) {addl(dst, value) ; return; }
-  if (value <  0) { decrementl(dst, -value); return; }
-  if (value == 0) {                        ; return; }
-  if (value == 1 && UseIncDec) { incl(dst) ; return; }
-  /* else */      { addl(dst, value)       ; return; }
-}
-
-void MacroAssembler::jump(AddressLiteral dst) {
-  if (reachable(dst)) {
-    jmp_literal(dst.target(), dst.rspec());
-  } else {
-    lea(rscratch1, dst);
-    jmp(rscratch1);
-  }
-}
-
-void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) {
-  if (reachable(dst)) {
-    InstructionMark im(this);
-    relocate(dst.reloc());
-    const int short_size = 2;
-    const int long_size = 6;
-    int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos);
-    if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) {
-      // 0111 tttn #8-bit disp
-      emit_byte(0x70 | cc);
-      emit_byte((offs - short_size) & 0xFF);
-    } else {
-      // 0000 1111 1000 tttn #32-bit disp
-      emit_byte(0x0F);
-      emit_byte(0x80 | cc);
-      emit_long(offs - long_size);
-    }
-  } else {
-#ifdef ASSERT
-    warning("reversing conditional branch");
-#endif /* ASSERT */
-    Label skip;
-    jccb(reverse[cc], skip);
-    lea(rscratch1, dst);
-    Assembler::jmp(rscratch1);
-    bind(skip);
-  }
-}
-
-void MacroAssembler::ldmxcsr(AddressLiteral src) {
-  if (reachable(src)) {
-    Assembler::ldmxcsr(as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    Assembler::ldmxcsr(Address(rscratch1, 0));
-  }
-}
-
-int MacroAssembler::load_signed_byte(Register dst, Address src) {
-  int off;
-  if (LP64_ONLY(true ||) VM_Version::is_P6()) {
-    off = offset();
-    movsbl(dst, src); // movsxb
-  } else {
-    off = load_unsigned_byte(dst, src);
-    shll(dst, 24);
-    sarl(dst, 24);
-  }
-  return off;
-}
-
-// Note: load_signed_short used to be called load_signed_word.
-// Although the 'w' in x86 opcodes refers to the term "word" in the assembler
-// manual, which means 16 bits, that usage is found nowhere in HotSpot code.
-// The term "word" in HotSpot means a 32- or 64-bit machine word.
-int MacroAssembler::load_signed_short(Register dst, Address src) {
-  int off;
-  if (LP64_ONLY(true ||) VM_Version::is_P6()) {
-    // This is dubious to me since it seems safe to do a signed 16 => 64 bit
-    // version but this is what 64bit has always done. This seems to imply
-    // that users are only using 32bits worth.
-    off = offset();
-    movswl(dst, src); // movsxw
-  } else {
-    off = load_unsigned_short(dst, src);
-    shll(dst, 16);
-    sarl(dst, 16);
-  }
-  return off;
-}
-
-int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
-  // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
-  // and "3.9 Partial Register Penalties", p. 22).
-  int off;
-  if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) {
-    off = offset();
-    movzbl(dst, src); // movzxb
-  } else {
-    xorl(dst, dst);
-    off = offset();
-    movb(dst, src);
-  }
-  return off;
-}
-
-// Note: load_unsigned_short used to be called load_unsigned_word.
-int MacroAssembler::load_unsigned_short(Register dst, Address src) {
-  // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
-  // and "3.9 Partial Register Penalties", p. 22).
-  int off;
-  if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) {
-    off = offset();
-    movzwl(dst, src); // movzxw
-  } else {
-    xorl(dst, dst);
-    off = offset();
-    movw(dst, src);
-  }
-  return off;
-}
-
-void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
-  switch (size_in_bytes) {
-#ifndef _LP64
-  case  8:
-    assert(dst2 != noreg, "second dest register required");
-    movl(dst,  src);
-    movl(dst2, src.plus_disp(BytesPerInt));
-    break;
-#else
-  case  8:  movq(dst, src); break;
-#endif
-  case  4:  movl(dst, src); break;
-  case  2:  is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break;
-  case  1:  is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break;
-  default:  ShouldNotReachHere();
-  }
-}
-
-void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
-  switch (size_in_bytes) {
-#ifndef _LP64
-  case  8:
-    assert(src2 != noreg, "second source register required");
-    movl(dst,                        src);
-    movl(dst.plus_disp(BytesPerInt), src2);
-    break;
-#else
-  case  8:  movq(dst, src); break;
-#endif
-  case  4:  movl(dst, src); break;
-  case  2:  movw(dst, src); break;
-  case  1:  movb(dst, src); break;
-  default:  ShouldNotReachHere();
-  }
-}
-
-void MacroAssembler::mov32(AddressLiteral dst, Register src) {
-  if (reachable(dst)) {
-    movl(as_Address(dst), src);
-  } else {
-    lea(rscratch1, dst);
-    movl(Address(rscratch1, 0), src);
-  }
-}
-
-void MacroAssembler::mov32(Register dst, AddressLiteral src) {
-  if (reachable(src)) {
-    movl(dst, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    movl(dst, Address(rscratch1, 0));
-  }
-}
-
-// C++ bool manipulation
-
-void MacroAssembler::movbool(Register dst, Address src) {
-  if(sizeof(bool) == 1)
-    movb(dst, src);
-  else if(sizeof(bool) == 2)
-    movw(dst, src);
-  else if(sizeof(bool) == 4)
-    movl(dst, src);
-  else
-    // unsupported
-    ShouldNotReachHere();
-}
-
-void MacroAssembler::movbool(Address dst, bool boolconst) {
-  if(sizeof(bool) == 1)
-    movb(dst, (int) boolconst);
-  else if(sizeof(bool) == 2)
-    movw(dst, (int) boolconst);
-  else if(sizeof(bool) == 4)
-    movl(dst, (int) boolconst);
-  else
-    // unsupported
-    ShouldNotReachHere();
-}
-
-void MacroAssembler::movbool(Address dst, Register src) {
-  if(sizeof(bool) == 1)
-    movb(dst, src);
-  else if(sizeof(bool) == 2)
-    movw(dst, src);
-  else if(sizeof(bool) == 4)
-    movl(dst, src);
-  else
-    // unsupported
-    ShouldNotReachHere();
-}
-
-void MacroAssembler::movbyte(ArrayAddress dst, int src) {
-  movb(as_Address(dst), src);
-}
-
-void MacroAssembler::movdl(XMMRegister dst, AddressLiteral src) {
-  if (reachable(src)) {
-    movdl(dst, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    movdl(dst, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) {
-  if (reachable(src)) {
-    movq(dst, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    movq(dst, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {
-  if (reachable(src)) {
-    if (UseXmmLoadAndClearUpper) {
-      movsd (dst, as_Address(src));
-    } else {
-      movlpd(dst, as_Address(src));
-    }
-  } else {
-    lea(rscratch1, src);
-    if (UseXmmLoadAndClearUpper) {
-      movsd (dst, Address(rscratch1, 0));
-    } else {
-      movlpd(dst, Address(rscratch1, 0));
-    }
-  }
-}
-
-void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) {
-  if (reachable(src)) {
-    movss(dst, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    movss(dst, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::movptr(Register dst, Register src) {
-  LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
-}
-
-void MacroAssembler::movptr(Register dst, Address src) {
-  LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
-}
-
-// src should NEVER be a real pointer. Use AddressLiteral for true pointers
-void MacroAssembler::movptr(Register dst, intptr_t src) {
-  LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src));
-}
-
-void MacroAssembler::movptr(Address dst, Register src) {
-  LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
-}
-
-void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
-  if (reachable(src)) {
-    Assembler::movsd(dst, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    Assembler::movsd(dst, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {
-  if (reachable(src)) {
-    Assembler::movss(dst, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    Assembler::movss(dst, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) {
-  if (reachable(src)) {
-    Assembler::mulsd(dst, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    Assembler::mulsd(dst, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) {
-  if (reachable(src)) {
-    Assembler::mulss(dst, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    Assembler::mulss(dst, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::null_check(Register reg, int offset) {
-  if (needs_explicit_null_check(offset)) {
-    // provoke OS NULL exception if reg = NULL by
-    // accessing M[reg] w/o changing any (non-CC) registers
-    // NOTE: cmpl is plenty here to provoke a segv
-    cmpptr(rax, Address(reg, 0));
-    // Note: should probably use testl(rax, Address(reg, 0));
-    //       may be shorter code (however, this version of
-    //       testl needs to be implemented first)
-  } else {
-    // nothing to do, (later) access of M[reg + offset]
-    // will provoke OS NULL exception if reg = NULL
-  }
-}
-
-void MacroAssembler::os_breakpoint() {
-  // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
-  // (e.g., MSVC can't call ps() otherwise)
-  call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
-}
-
-void MacroAssembler::pop_CPU_state() {
-  pop_FPU_state();
-  pop_IU_state();
-}
-
-void MacroAssembler::pop_FPU_state() {
-  NOT_LP64(frstor(Address(rsp, 0));)
-  LP64_ONLY(fxrstor(Address(rsp, 0));)
-  addptr(rsp, FPUStateSizeInWords * wordSize);
-}
-
-void MacroAssembler::pop_IU_state() {
-  popa();
-  LP64_ONLY(addq(rsp, 8));
-  popf();
-}
-
-// Save Integer and Float state
-// Warning: Stack must be 16 byte aligned (64bit)
-void MacroAssembler::push_CPU_state() {
-  push_IU_state();
-  push_FPU_state();
-}
-
-void MacroAssembler::push_FPU_state() {
-  subptr(rsp, FPUStateSizeInWords * wordSize);
-#ifndef _LP64
-  fnsave(Address(rsp, 0));
-  fwait();
-#else
-  fxsave(Address(rsp, 0));
-#endif // LP64
-}
-
-void MacroAssembler::push_IU_state() {
-  // Push flags first because pusha kills them
-  pushf();
-  // Make sure rsp stays 16-byte aligned
-  LP64_ONLY(subq(rsp, 8));
-  pusha();
-}
-
-void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
-  // determine java_thread register
-  if (!java_thread->is_valid()) {
-    java_thread = rdi;
-    get_thread(java_thread);
-  }
-  // we must set sp to zero to clear frame
-  movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
-  if (clear_fp) {
-    movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
-  }
-
-  if (clear_pc)
-    movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
-
-}
-
-void MacroAssembler::restore_rax(Register tmp) {
-  if (tmp == noreg) pop(rax);
-  else if (tmp != rax) mov(rax, tmp);
-}
-
-void MacroAssembler::round_to(Register reg, int modulus) {
-  addptr(reg, modulus - 1);
-  andptr(reg, -modulus);
-}
-
-void MacroAssembler::save_rax(Register tmp) {
-  if (tmp == noreg) push(rax);
-  else if (tmp != rax) mov(tmp, rax);
-}
-
-// Write serialization page so VM thread can do a pseudo remote membar.
-// We use the current thread pointer to calculate a thread specific
-// offset to write to within the page. This minimizes bus traffic
-// due to cache line collision.
-void MacroAssembler::serialize_memory(Register thread, Register tmp) {
-  movl(tmp, thread);
-  shrl(tmp, os::get_serialize_page_shift_count());
-  andl(tmp, (os::vm_page_size() - sizeof(int)));
-
-  Address index(noreg, tmp, Address::times_1);
-  ExternalAddress page(os::get_memory_serialize_page());
-
-  // Size of store must match masking code above
-  movl(as_Address(ArrayAddress(page, index)), tmp);
-}
-
-// Calls to C land
-//
-// When entering C land, the rbp, & rsp of the last Java frame have to be recorded
-// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
-// has to be reset to 0. This is required to allow proper stack traversal.
-void MacroAssembler::set_last_Java_frame(Register java_thread,
-                                         Register last_java_sp,
-                                         Register last_java_fp,
-                                         address  last_java_pc) {
-  // determine java_thread register
-  if (!java_thread->is_valid()) {
-    java_thread = rdi;
-    get_thread(java_thread);
-  }
-  // determine last_java_sp register
-  if (!last_java_sp->is_valid()) {
-    last_java_sp = rsp;
-  }
-
-  // last_java_fp is optional
-
-  if (last_java_fp->is_valid()) {
-    movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp);
-  }
-
-  // last_java_pc is optional
-
-  if (last_java_pc != NULL) {
-    lea(Address(java_thread,
-                 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()),
-        InternalAddress(last_java_pc));
-
-  }
-  movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
-}
-
-void MacroAssembler::shlptr(Register dst, int imm8) {
-  LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8));
-}
-
-void MacroAssembler::shrptr(Register dst, int imm8) {
-  LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8));
-}
-
-void MacroAssembler::sign_extend_byte(Register reg) {
-  if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) {
-    movsbl(reg, reg); // movsxb
-  } else {
-    shll(reg, 24);
-    sarl(reg, 24);
-  }
-}
-
-void MacroAssembler::sign_extend_short(Register reg) {
-  if (LP64_ONLY(true ||) VM_Version::is_P6()) {
-    movswl(reg, reg); // movsxw
-  } else {
-    shll(reg, 16);
-    sarl(reg, 16);
-  }
-}
-
-void MacroAssembler::testl(Register dst, AddressLiteral src) {
-  assert(reachable(src), "Address should be reachable");
-  testl(dst, as_Address(src));
-}
-
-void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) {
-  if (reachable(src)) {
-    Assembler::sqrtsd(dst, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    Assembler::sqrtsd(dst, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) {
-  if (reachable(src)) {
-    Assembler::sqrtss(dst, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    Assembler::sqrtss(dst, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) {
-  if (reachable(src)) {
-    Assembler::subsd(dst, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    Assembler::subsd(dst, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) {
-  if (reachable(src)) {
-    Assembler::subss(dst, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    Assembler::subss(dst, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
-  if (reachable(src)) {
-    Assembler::ucomisd(dst, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    Assembler::ucomisd(dst, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
-  if (reachable(src)) {
-    Assembler::ucomiss(dst, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    Assembler::ucomiss(dst, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
-  // Used in sign-bit flipping with aligned address.
-  assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
-  if (reachable(src)) {
-    Assembler::xorpd(dst, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    Assembler::xorpd(dst, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
-  // Used in sign-bit flipping with aligned address.
-  assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
-  if (reachable(src)) {
-    Assembler::xorps(dst, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    Assembler::xorps(dst, Address(rscratch1, 0));
-  }
-}
-
-// AVX 3-operands instructions
-
-void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
-  if (reachable(src)) {
-    vaddsd(dst, nds, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    vaddsd(dst, nds, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
-  if (reachable(src)) {
-    vaddss(dst, nds, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    vaddss(dst, nds, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
-  if (reachable(src)) {
-    vandpd(dst, nds, as_Address(src), vector256);
-  } else {
-    lea(rscratch1, src);
-    vandpd(dst, nds, Address(rscratch1, 0), vector256);
-  }
-}
-
-void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
-  if (reachable(src)) {
-    vandps(dst, nds, as_Address(src), vector256);
-  } else {
-    lea(rscratch1, src);
-    vandps(dst, nds, Address(rscratch1, 0), vector256);
-  }
-}
-
-void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
-  if (reachable(src)) {
-    vdivsd(dst, nds, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    vdivsd(dst, nds, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
-  if (reachable(src)) {
-    vdivss(dst, nds, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    vdivss(dst, nds, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
-  if (reachable(src)) {
-    vmulsd(dst, nds, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    vmulsd(dst, nds, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
-  if (reachable(src)) {
-    vmulss(dst, nds, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    vmulss(dst, nds, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
-  if (reachable(src)) {
-    vsubsd(dst, nds, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    vsubsd(dst, nds, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
-  if (reachable(src)) {
-    vsubss(dst, nds, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    vsubss(dst, nds, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
-  if (reachable(src)) {
-    vxorpd(dst, nds, as_Address(src), vector256);
-  } else {
-    lea(rscratch1, src);
-    vxorpd(dst, nds, Address(rscratch1, 0), vector256);
-  }
-}
-
-void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
-  if (reachable(src)) {
-    vxorps(dst, nds, as_Address(src), vector256);
-  } else {
-    lea(rscratch1, src);
-    vxorps(dst, nds, Address(rscratch1, 0), vector256);
-  }
-}
-
-
-//////////////////////////////////////////////////////////////////////////////////
-#ifndef SERIALGC
-
-void MacroAssembler::g1_write_barrier_pre(Register obj,
-                                          Register pre_val,
-                                          Register thread,
-                                          Register tmp,
-                                          bool tosca_live,
-                                          bool expand_call) {
-
-  // If expand_call is true then we expand the call_VM_leaf macro
-  // directly to skip generating the check by
-  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
-
-#ifdef _LP64
-  assert(thread == r15_thread, "must be");
-#endif // _LP64
-
-  Label done;
-  Label runtime;
-
-  assert(pre_val != noreg, "check this code");
-
-  if (obj != noreg) {
-    assert_different_registers(obj, pre_val, tmp);
-    assert(pre_val != rax, "check this code");
-  }
-
-  Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
-                                       PtrQueue::byte_offset_of_active()));
-  Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
-                                       PtrQueue::byte_offset_of_index()));
-  Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
-                                       PtrQueue::byte_offset_of_buf()));
-
-
-  // Is marking active?
-  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
-    cmpl(in_progress, 0);
-  } else {
-    assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
-    cmpb(in_progress, 0);
-  }
-  jcc(Assembler::equal, done);
-
-  // Do we need to load the previous value?
-  if (obj != noreg) {
-    load_heap_oop(pre_val, Address(obj, 0));
-  }
-
-  // Is the previous value null?
-  cmpptr(pre_val, (int32_t) NULL_WORD);
-  jcc(Assembler::equal, done);
-
-  // Can we store original value in the thread's buffer?
-  // Is index == 0?
-  // (The index field is typed as size_t.)
-
-  movptr(tmp, index);                   // tmp := *index_adr
-  cmpptr(tmp, 0);                       // tmp == 0?
-  jcc(Assembler::equal, runtime);       // If yes, goto runtime
-
-  subptr(tmp, wordSize);                // tmp := tmp - wordSize
-  movptr(index, tmp);                   // *index_adr := tmp
-  addptr(tmp, buffer);                  // tmp := tmp + *buffer_adr
-
-  // Record the previous value
-  movptr(Address(tmp, 0), pre_val);
-  jmp(done);
-
-  bind(runtime);
-  // save the live input values
-  if(tosca_live) push(rax);
-
-  if (obj != noreg && obj != rax)
-    push(obj);
-
-  if (pre_val != rax)
-    push(pre_val);
-
-  // Calling the runtime using the regular call_VM_leaf mechanism generates
-  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
-  // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
-  //
-  // If we care generating the pre-barrier without a frame (e.g. in the
-  // intrinsified Reference.get() routine) then ebp might be pointing to
-  // the caller frame and so this check will most likely fail at runtime.
-  //
-  // Expanding the call directly bypasses the generation of the check.
-  // So when we do not have have a full interpreter frame on the stack
-  // expand_call should be passed true.
-
-  NOT_LP64( push(thread); )
-
-  if (expand_call) {
-    LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); )
-    pass_arg1(this, thread);
-    pass_arg0(this, pre_val);
-    MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
-  } else {
-    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
-  }
-
-  NOT_LP64( pop(thread); )
-
-  // save the live input values
-  if (pre_val != rax)
-    pop(pre_val);
-
-  if (obj != noreg && obj != rax)
-    pop(obj);
-
-  if(tosca_live) pop(rax);
-
-  bind(done);
-}
-
-void MacroAssembler::g1_write_barrier_post(Register store_addr,
-                                           Register new_val,
-                                           Register thread,
-                                           Register tmp,
-                                           Register tmp2) {
-#ifdef _LP64
-  assert(thread == r15_thread, "must be");
-#endif // _LP64
-
-  Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
-                                       PtrQueue::byte_offset_of_index()));
-  Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
-                                       PtrQueue::byte_offset_of_buf()));
-
-  BarrierSet* bs = Universe::heap()->barrier_set();
-  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
-  Label done;
-  Label runtime;
-
-  // Does store cross heap regions?
-
-  movptr(tmp, store_addr);
-  xorptr(tmp, new_val);
-  shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
-  jcc(Assembler::equal, done);
-
-  // crosses regions, storing NULL?
-
-  cmpptr(new_val, (int32_t) NULL_WORD);
-  jcc(Assembler::equal, done);
-
-  // storing region crossing non-NULL, is card already dirty?
-
-  ExternalAddress cardtable((address) ct->byte_map_base);
-  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
-#ifdef _LP64
-  const Register card_addr = tmp;
-
-  movq(card_addr, store_addr);
-  shrq(card_addr, CardTableModRefBS::card_shift);
-
-  lea(tmp2, cardtable);
-
-  // get the address of the card
-  addq(card_addr, tmp2);
-#else
-  const Register card_index = tmp;
-
-  movl(card_index, store_addr);
-  shrl(card_index, CardTableModRefBS::card_shift);
-
-  Address index(noreg, card_index, Address::times_1);
-  const Register card_addr = tmp;
-  lea(card_addr, as_Address(ArrayAddress(cardtable, index)));
-#endif
-  cmpb(Address(card_addr, 0), 0);
-  jcc(Assembler::equal, done);
-
-  // storing a region crossing, non-NULL oop, card is clean.
-  // dirty card and log.
-
-  movb(Address(card_addr, 0), 0);
-
-  cmpl(queue_index, 0);
-  jcc(Assembler::equal, runtime);
-  subl(queue_index, wordSize);
-  movptr(tmp2, buffer);
-#ifdef _LP64
-  movslq(rscratch1, queue_index);
-  addq(tmp2, rscratch1);
-  movq(Address(tmp2, 0), card_addr);
-#else
-  addl(tmp2, queue_index);
-  movl(Address(tmp2, 0), card_index);
-#endif
-  jmp(done);
-
-  bind(runtime);
-  // save the live input values
-  push(store_addr);
-  push(new_val);
-#ifdef _LP64
-  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
-#else
-  push(thread);
-  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
-  pop(thread);
-#endif
-  pop(new_val);
-  pop(store_addr);
-
-  bind(done);
-}
-
-#endif // SERIALGC
-//////////////////////////////////////////////////////////////////////////////////
-
-
-void MacroAssembler::store_check(Register obj) {
-  // Does a store check for the oop in register obj. The content of
-  // register obj is destroyed afterwards.
-  store_check_part_1(obj);
-  store_check_part_2(obj);
-}
-
-void MacroAssembler::store_check(Register obj, Address dst) {
-  store_check(obj);
-}
-
-
-// split the store check operation so that other instructions can be scheduled inbetween
-void MacroAssembler::store_check_part_1(Register obj) {
-  BarrierSet* bs = Universe::heap()->barrier_set();
-  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
-  shrptr(obj, CardTableModRefBS::card_shift);
-}
-
-void MacroAssembler::store_check_part_2(Register obj) {
-  BarrierSet* bs = Universe::heap()->barrier_set();
-  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
-  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
-  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
-
-  // The calculation for byte_map_base is as follows:
-  // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift);
-  // So this essentially converts an address to a displacement and
-  // it will never need to be relocated. On 64bit however the value may be too
-  // large for a 32bit displacement
-
-  intptr_t disp = (intptr_t) ct->byte_map_base;
-  if (is_simm32(disp)) {
-    Address cardtable(noreg, obj, Address::times_1, disp);
-    movb(cardtable, 0);
-  } else {
-    // By doing it as an ExternalAddress disp could be converted to a rip-relative
-    // displacement and done in a single instruction given favorable mapping and
-    // a smarter version of as_Address. Worst case it is two instructions which
-    // is no worse off then loading disp into a register and doing as a simple
-    // Address() as above.
-    // We can't do as ExternalAddress as the only style since if disp == 0 we'll
-    // assert since NULL isn't acceptable in a reloci (see 6644928). In any case
-    // in some cases we'll get a single instruction version.
-
-    ExternalAddress cardtable((address)disp);
-    Address index(noreg, obj, Address::times_1);
-    movb(as_Address(ArrayAddress(cardtable, index)), 0);
-  }
-}
-
-void MacroAssembler::subptr(Register dst, int32_t imm32) {
-  LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32));
-}
-
-// Force generation of a 4 byte immediate value even if it fits into 8bit
-void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) {
-  LP64_ONLY(subq_imm32(dst, imm32)) NOT_LP64(subl_imm32(dst, imm32));
-}
-
-void MacroAssembler::subptr(Register dst, Register src) {
-  LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src));
-}
-
-// C++ bool manipulation
-void MacroAssembler::testbool(Register dst) {
-  if(sizeof(bool) == 1)
-    testb(dst, 0xff);
-  else if(sizeof(bool) == 2) {
-    // testw implementation needed for two byte bools
-    ShouldNotReachHere();
-  } else if(sizeof(bool) == 4)
-    testl(dst, dst);
-  else
-    // unsupported
-    ShouldNotReachHere();
-}
-
-void MacroAssembler::testptr(Register dst, Register src) {
-  LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src));
-}
-
-// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
-void MacroAssembler::tlab_allocate(Register obj,
-                                   Register var_size_in_bytes,
-                                   int con_size_in_bytes,
-                                   Register t1,
-                                   Register t2,
-                                   Label& slow_case) {
-  assert_different_registers(obj, t1, t2);
-  assert_different_registers(obj, var_size_in_bytes, t1);
-  Register end = t2;
-  Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread);
-
-  verify_tlab();
-
-  NOT_LP64(get_thread(thread));
-
-  movptr(obj, Address(thread, JavaThread::tlab_top_offset()));
-  if (var_size_in_bytes == noreg) {
-    lea(end, Address(obj, con_size_in_bytes));
-  } else {
-    lea(end, Address(obj, var_size_in_bytes, Address::times_1));
-  }
-  cmpptr(end, Address(thread, JavaThread::tlab_end_offset()));
-  jcc(Assembler::above, slow_case);
-
-  // update the tlab top pointer
-  movptr(Address(thread, JavaThread::tlab_top_offset()), end);
-
-  // recover var_size_in_bytes if necessary
-  if (var_size_in_bytes == end) {
-    subptr(var_size_in_bytes, obj);
-  }
-  verify_tlab();
-}
-
-// Preserves rbx, and rdx.
-Register MacroAssembler::tlab_refill(Label& retry,
-                                     Label& try_eden,
-                                     Label& slow_case) {
-  Register top = rax;
-  Register t1  = rcx;
-  Register t2  = rsi;
-  Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread);
-  assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx);
-  Label do_refill, discard_tlab;
-
-  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
-    // No allocation in the shared eden.
-    jmp(slow_case);
-  }
-
-  NOT_LP64(get_thread(thread_reg));
-
-  movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
-  movptr(t1,  Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
-
-  // calculate amount of free space
-  subptr(t1, top);
-  shrptr(t1, LogHeapWordSize);
-
-  // Retain tlab and allocate object in shared space if
-  // the amount free in the tlab is too large to discard.
-  cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())));
-  jcc(Assembler::lessEqual, discard_tlab);
-
-  // Retain
-  // %%% yuck as movptr...
-  movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment());
-  addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2);
-  if (TLABStats) {
-    // increment number of slow_allocations
-    addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1);
-  }
-  jmp(try_eden);
-
-  bind(discard_tlab);
-  if (TLABStats) {
-    // increment number of refills
-    addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1);
-    // accumulate wastage -- t1 is amount free in tlab
-    addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1);
-  }
-
-  // if tlab is currently allocated (top or end != null) then
-  // fill [top, end + alignment_reserve) with array object
-  testptr(top, top);
-  jcc(Assembler::zero, do_refill);
-
-  // set up the mark word
-  movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2));
-  // set the length to the remaining space
-  subptr(t1, typeArrayOopDesc::header_size(T_INT));
-  addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve());
-  shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint)));
-  movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1);
-  // set klass to intArrayKlass
-  // dubious reloc why not an oop reloc?
-  movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr()));
-  // store klass last.  concurrent gcs assumes klass length is valid if
-  // klass field is not null.
-  store_klass(top, t1);
-
-  movptr(t1, top);
-  subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
-  incr_allocated_bytes(thread_reg, t1, 0);
-
-  // refill the tlab with an eden allocation
-  bind(do_refill);
-  movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
-  shlptr(t1, LogHeapWordSize);
-  // allocate new tlab, address returned in top
-  eden_allocate(top, t1, 0, t2, slow_case);
-
-  // Check that t1 was preserved in eden_allocate.
-#ifdef ASSERT
-  if (UseTLAB) {
-    Label ok;
-    Register tsize = rsi;
-    assert_different_registers(tsize, thread_reg, t1);
-    push(tsize);
-    movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
-    shlptr(tsize, LogHeapWordSize);
-    cmpptr(t1, tsize);
-    jcc(Assembler::equal, ok);
-    STOP("assert(t1 != tlab size)");
-    should_not_reach_here();
-
-    bind(ok);
-    pop(tsize);
-  }
-#endif
-  movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top);
-  movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top);
-  addptr(top, t1);
-  subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
-  movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top);
-  verify_tlab();
-  jmp(retry);
-
-  return thread_reg; // for use by caller
-}
-
-void MacroAssembler::incr_allocated_bytes(Register thread,
-                                          Register var_size_in_bytes,
-                                          int con_size_in_bytes,
-                                          Register t1) {
-  if (!thread->is_valid()) {
-#ifdef _LP64
-    thread = r15_thread;
-#else
-    assert(t1->is_valid(), "need temp reg");
-    thread = t1;
-    get_thread(thread);
-#endif
-  }
-
-#ifdef _LP64
-  if (var_size_in_bytes->is_valid()) {
-    addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
-  } else {
-    addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
-  }
-#else
-  if (var_size_in_bytes->is_valid()) {
-    addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
-  } else {
-    addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
-  }
-  adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0);
-#endif
-}
-
-void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) {
-  pusha();
-
-  // if we are coming from c1, xmm registers may be live
-  int off = 0;
-  if (UseSSE == 1)  {
-    subptr(rsp, sizeof(jdouble)*8);
-    movflt(Address(rsp,off++*sizeof(jdouble)),xmm0);
-    movflt(Address(rsp,off++*sizeof(jdouble)),xmm1);
-    movflt(Address(rsp,off++*sizeof(jdouble)),xmm2);
-    movflt(Address(rsp,off++*sizeof(jdouble)),xmm3);
-    movflt(Address(rsp,off++*sizeof(jdouble)),xmm4);
-    movflt(Address(rsp,off++*sizeof(jdouble)),xmm5);
-    movflt(Address(rsp,off++*sizeof(jdouble)),xmm6);
-    movflt(Address(rsp,off++*sizeof(jdouble)),xmm7);
-  } else if (UseSSE >= 2)  {
-#ifdef COMPILER2
-    if (MaxVectorSize > 16) {
-      assert(UseAVX > 0, "256bit vectors are supported only with AVX");
-      // Save upper half of YMM registes
-      subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
-      vextractf128h(Address(rsp,  0),xmm0);
-      vextractf128h(Address(rsp, 16),xmm1);
-      vextractf128h(Address(rsp, 32),xmm2);
-      vextractf128h(Address(rsp, 48),xmm3);
-      vextractf128h(Address(rsp, 64),xmm4);
-      vextractf128h(Address(rsp, 80),xmm5);
-      vextractf128h(Address(rsp, 96),xmm6);
-      vextractf128h(Address(rsp,112),xmm7);
-#ifdef _LP64
-      vextractf128h(Address(rsp,128),xmm8);
-      vextractf128h(Address(rsp,144),xmm9);
-      vextractf128h(Address(rsp,160),xmm10);
-      vextractf128h(Address(rsp,176),xmm11);
-      vextractf128h(Address(rsp,192),xmm12);
-      vextractf128h(Address(rsp,208),xmm13);
-      vextractf128h(Address(rsp,224),xmm14);
-      vextractf128h(Address(rsp,240),xmm15);
-#endif
-    }
-#endif
-    // Save whole 128bit (16 bytes) XMM regiters
-    subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
-    movdqu(Address(rsp,off++*16),xmm0);
-    movdqu(Address(rsp,off++*16),xmm1);
-    movdqu(Address(rsp,off++*16),xmm2);
-    movdqu(Address(rsp,off++*16),xmm3);
-    movdqu(Address(rsp,off++*16),xmm4);
-    movdqu(Address(rsp,off++*16),xmm5);
-    movdqu(Address(rsp,off++*16),xmm6);
-    movdqu(Address(rsp,off++*16),xmm7);
-#ifdef _LP64
-    movdqu(Address(rsp,off++*16),xmm8);
-    movdqu(Address(rsp,off++*16),xmm9);
-    movdqu(Address(rsp,off++*16),xmm10);
-    movdqu(Address(rsp,off++*16),xmm11);
-    movdqu(Address(rsp,off++*16),xmm12);
-    movdqu(Address(rsp,off++*16),xmm13);
-    movdqu(Address(rsp,off++*16),xmm14);
-    movdqu(Address(rsp,off++*16),xmm15);
-#endif
-  }
-
-  // Preserve registers across runtime call
-  int incoming_argument_and_return_value_offset = -1;
-  if (num_fpu_regs_in_use > 1) {
-    // Must preserve all other FPU regs (could alternatively convert
-    // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash
-    // FPU state, but can not trust C compiler)
-    NEEDS_CLEANUP;
-    // NOTE that in this case we also push the incoming argument(s) to
-    // the stack and restore it later; we also use this stack slot to
-    // hold the return value from dsin, dcos etc.
-    for (int i = 0; i < num_fpu_regs_in_use; i++) {
-      subptr(rsp, sizeof(jdouble));
-      fstp_d(Address(rsp, 0));
-    }
-    incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
-    for (int i = nb_args-1; i >= 0; i--) {
-      fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble)));
-    }
-  }
-
-  subptr(rsp, nb_args*sizeof(jdouble));
-  for (int i = 0; i < nb_args; i++) {
-    fstp_d(Address(rsp, i*sizeof(jdouble)));
-  }
-
-#ifdef _LP64
-  if (nb_args > 0) {
-    movdbl(xmm0, Address(rsp, 0));
-  }
-  if (nb_args > 1) {
-    movdbl(xmm1, Address(rsp, sizeof(jdouble)));
-  }
-  assert(nb_args <= 2, "unsupported number of args");
-#endif // _LP64
-
-  // NOTE: we must not use call_VM_leaf here because that requires a
-  // complete interpreter frame in debug mode -- same bug as 4387334
-  // MacroAssembler::call_VM_leaf_base is perfectly safe and will
-  // do proper 64bit abi
-
-  NEEDS_CLEANUP;
-  // Need to add stack banging before this runtime call if it needs to
-  // be taken; however, there is no generic stack banging routine at
-  // the MacroAssembler level
-
-  MacroAssembler::call_VM_leaf_base(runtime_entry, 0);
-
-#ifdef _LP64
-  movsd(Address(rsp, 0), xmm0);
-  fld_d(Address(rsp, 0));
-#endif // _LP64
-  addptr(rsp, sizeof(jdouble) * nb_args);
-  if (num_fpu_regs_in_use > 1) {
-    // Must save return value to stack and then restore entire FPU
-    // stack except incoming arguments
-    fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
-    for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) {
-      fld_d(Address(rsp, 0));
-      addptr(rsp, sizeof(jdouble));
-    }
-    fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble)));
-    addptr(rsp, sizeof(jdouble) * nb_args);
-  }
-
-  off = 0;
-  if (UseSSE == 1)  {
-    movflt(xmm0, Address(rsp,off++*sizeof(jdouble)));
-    movflt(xmm1, Address(rsp,off++*sizeof(jdouble)));
-    movflt(xmm2, Address(rsp,off++*sizeof(jdouble)));
-    movflt(xmm3, Address(rsp,off++*sizeof(jdouble)));
-    movflt(xmm4, Address(rsp,off++*sizeof(jdouble)));
-    movflt(xmm5, Address(rsp,off++*sizeof(jdouble)));
-    movflt(xmm6, Address(rsp,off++*sizeof(jdouble)));
-    movflt(xmm7, Address(rsp,off++*sizeof(jdouble)));
-    addptr(rsp, sizeof(jdouble)*8);
-  } else if (UseSSE >= 2)  {
-    // Restore whole 128bit (16 bytes) XMM regiters
-    movdqu(xmm0, Address(rsp,off++*16));
-    movdqu(xmm1, Address(rsp,off++*16));
-    movdqu(xmm2, Address(rsp,off++*16));
-    movdqu(xmm3, Address(rsp,off++*16));
-    movdqu(xmm4, Address(rsp,off++*16));
-    movdqu(xmm5, Address(rsp,off++*16));
-    movdqu(xmm6, Address(rsp,off++*16));
-    movdqu(xmm7, Address(rsp,off++*16));
-#ifdef _LP64
-    movdqu(xmm8, Address(rsp,off++*16));
-    movdqu(xmm9, Address(rsp,off++*16));
-    movdqu(xmm10, Address(rsp,off++*16));
-    movdqu(xmm11, Address(rsp,off++*16));
-    movdqu(xmm12, Address(rsp,off++*16));
-    movdqu(xmm13, Address(rsp,off++*16));
-    movdqu(xmm14, Address(rsp,off++*16));
-    movdqu(xmm15, Address(rsp,off++*16));
-#endif
-    addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
-#ifdef COMPILER2
-    if (MaxVectorSize > 16) {
-      // Restore upper half of YMM registes.
-      vinsertf128h(xmm0, Address(rsp,  0));
-      vinsertf128h(xmm1, Address(rsp, 16));
-      vinsertf128h(xmm2, Address(rsp, 32));
-      vinsertf128h(xmm3, Address(rsp, 48));
-      vinsertf128h(xmm4, Address(rsp, 64));
-      vinsertf128h(xmm5, Address(rsp, 80));
-      vinsertf128h(xmm6, Address(rsp, 96));
-      vinsertf128h(xmm7, Address(rsp,112));
-#ifdef _LP64
-      vinsertf128h(xmm8, Address(rsp,128));
-      vinsertf128h(xmm9, Address(rsp,144));
-      vinsertf128h(xmm10, Address(rsp,160));
-      vinsertf128h(xmm11, Address(rsp,176));
-      vinsertf128h(xmm12, Address(rsp,192));
-      vinsertf128h(xmm13, Address(rsp,208));
-      vinsertf128h(xmm14, Address(rsp,224));
-      vinsertf128h(xmm15, Address(rsp,240));
-#endif
-      addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
-    }
-#endif
-  }
-  popa();
-}
-
-static const double     pi_4 =  0.7853981633974483;
-
-void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
-  // A hand-coded argument reduction for values in fabs(pi/4, pi/2)
-  // was attempted in this code; unfortunately it appears that the
-  // switch to 80-bit precision and back causes this to be
-  // unprofitable compared with simply performing a runtime call if
-  // the argument is out of the (-pi/4, pi/4) range.
-
-  Register tmp = noreg;
-  if (!VM_Version::supports_cmov()) {
-    // fcmp needs a temporary so preserve rbx,
-    tmp = rbx;
-    push(tmp);
-  }
-
-  Label slow_case, done;
-
-  ExternalAddress pi4_adr = (address)&pi_4;
-  if (reachable(pi4_adr)) {
-    // x ?<= pi/4
-    fld_d(pi4_adr);
-    fld_s(1);                // Stack:  X  PI/4  X
-    fabs();                  // Stack: |X| PI/4  X
-    fcmp(tmp);
-    jcc(Assembler::above, slow_case);
-
-    // fastest case: -pi/4 <= x <= pi/4
-    switch(trig) {
-    case 's':
-      fsin();
-      break;
-    case 'c':
-      fcos();
-      break;
-    case 't':
-      ftan();
-      break;
-    default:
-      assert(false, "bad intrinsic");
-      break;
-    }
-    jmp(done);
-  }
-
-  // slow case: runtime call
-  bind(slow_case);
-
-  switch(trig) {
-  case 's':
-    {
-      fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use);
-    }
-    break;
-  case 'c':
-    {
-      fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use);
-    }
-    break;
-  case 't':
-    {
-      fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use);
-    }
-    break;
-  default:
-    assert(false, "bad intrinsic");
-    break;
-  }
-
-  // Come here with result in F-TOS
-  bind(done);
-
-  if (tmp != noreg) {
-    pop(tmp);
-  }
-}
-
-
-// Look up the method for a megamorphic invokeinterface call.
-// The target method is determined by <intf_klass, itable_index>.
-// The receiver klass is in recv_klass.
-// On success, the result will be in method_result, and execution falls through.
-// On failure, execution transfers to the given label.
-void MacroAssembler::lookup_interface_method(Register recv_klass,
-                                             Register intf_klass,
-                                             RegisterOrConstant itable_index,
-                                             Register method_result,
-                                             Register scan_temp,
-                                             Label& L_no_such_interface) {
-  assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
-  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
-         "caller must use same register for non-constant itable index as for method");
-
-  // Compute start of first itableOffsetEntry (which is at the end of the vtable)
-  int vtable_base = InstanceKlass::vtable_start_offset() * wordSize;
-  int itentry_off = itableMethodEntry::method_offset_in_bytes();
-  int scan_step   = itableOffsetEntry::size() * wordSize;
-  int vte_size    = vtableEntry::size() * wordSize;
-  Address::ScaleFactor times_vte_scale = Address::times_ptr;
-  assert(vte_size == wordSize, "else adjust times_vte_scale");
-
-  movl(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize));
-
-  // %%% Could store the aligned, prescaled offset in the klassoop.
-  lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base));
-  if (HeapWordsPerLong > 1) {
-    // Round up to align_object_offset boundary
-    // see code for InstanceKlass::start_of_itable!
-    round_to(scan_temp, BytesPerLong);
-  }
-
-  // Adjust recv_klass by scaled itable_index, so we can free itable_index.
-  assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
-  lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off));
-
-  // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
-  //   if (scan->interface() == intf) {
-  //     result = (klass + scan->offset() + itable_index);
-  //   }
-  // }
-  Label search, found_method;
-
-  for (int peel = 1; peel >= 0; peel--) {
-    movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
-    cmpptr(intf_klass, method_result);
-
-    if (peel) {
-      jccb(Assembler::equal, found_method);
-    } else {
-      jccb(Assembler::notEqual, search);
-      // (invert the test to fall through to found_method...)
-    }
-
-    if (!peel)  break;
-
-    bind(search);
-
-    // Check that the previous entry is non-null.  A null entry means that
-    // the receiver class doesn't implement the interface, and wasn't the
-    // same as when the caller was compiled.
-    testptr(method_result, method_result);
-    jcc(Assembler::zero, L_no_such_interface);
-    addptr(scan_temp, scan_step);
-  }
-
-  bind(found_method);
-
-  // Got a hit.
-  movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
-  movptr(method_result, Address(recv_klass, scan_temp, Address::times_1));
-}
-
-
-// virtual method calling
-void MacroAssembler::lookup_virtual_method(Register recv_klass,
-                                           RegisterOrConstant vtable_index,
-                                           Register method_result) {
-  const int base = InstanceKlass::vtable_start_offset() * wordSize;
-  assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
-  Address vtable_entry_addr(recv_klass,
-                            vtable_index, Address::times_ptr,
-                            base + vtableEntry::method_offset_in_bytes());
-  movptr(method_result, vtable_entry_addr);
-}
-
-
-void MacroAssembler::check_klass_subtype(Register sub_klass,
-                           Register super_klass,
-                           Register temp_reg,
-                           Label& L_success) {
-  Label L_failure;
-  check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
-  check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
-  bind(L_failure);
-}
-
-
-void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
-                                                   Register super_klass,
-                                                   Register temp_reg,
-                                                   Label* L_success,
-                                                   Label* L_failure,
-                                                   Label* L_slow_path,
-                                        RegisterOrConstant super_check_offset) {
-  assert_different_registers(sub_klass, super_klass, temp_reg);
-  bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
-  if (super_check_offset.is_register()) {
-    assert_different_registers(sub_klass, super_klass,
-                               super_check_offset.as_register());
-  } else if (must_load_sco) {
-    assert(temp_reg != noreg, "supply either a temp or a register offset");
-  }
-
-  Label L_fallthrough;
-  int label_nulls = 0;
-  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
-  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
-  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
-  assert(label_nulls <= 1, "at most one NULL in the batch");
-
-  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
-  int sco_offset = in_bytes(Klass::super_check_offset_offset());
-  Address super_check_offset_addr(super_klass, sco_offset);
-
-  // Hacked jcc, which "knows" that L_fallthrough, at least, is in
-  // range of a jccb.  If this routine grows larger, reconsider at
-  // least some of these.
-#define local_jcc(assembler_cond, label)                                \
-  if (&(label) == &L_fallthrough)  jccb(assembler_cond, label);         \
-  else                             jcc( assembler_cond, label) /*omit semi*/
-
-  // Hacked jmp, which may only be used just before L_fallthrough.
-#define final_jmp(label)                                                \
-  if (&(label) == &L_fallthrough) { /*do nothing*/ }                    \
-  else                            jmp(label)                /*omit semi*/
-
-  // If the pointers are equal, we are done (e.g., String[] elements).
-  // This self-check enables sharing of secondary supertype arrays among
-  // non-primary types such as array-of-interface.  Otherwise, each such
-  // type would need its own customized SSA.
-  // We move this check to the front of the fast path because many
-  // type checks are in fact trivially successful in this manner,
-  // so we get a nicely predicted branch right at the start of the check.
-  cmpptr(sub_klass, super_klass);
-  local_jcc(Assembler::equal, *L_success);
-
-  // Check the supertype display:
-  if (must_load_sco) {
-    // Positive movl does right thing on LP64.
-    movl(temp_reg, super_check_offset_addr);
-    super_check_offset = RegisterOrConstant(temp_reg);
-  }
-  Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
-  cmpptr(super_klass, super_check_addr); // load displayed supertype
-
-  // This check has worked decisively for primary supers.
-  // Secondary supers are sought in the super_cache ('super_cache_addr').
-  // (Secondary supers are interfaces and very deeply nested subtypes.)
-  // This works in the same check above because of a tricky aliasing
-  // between the super_cache and the primary super display elements.
-  // (The 'super_check_addr' can address either, as the case requires.)
-  // Note that the cache is updated below if it does not help us find
-  // what we need immediately.
-  // So if it was a primary super, we can just fail immediately.
-  // Otherwise, it's the slow path for us (no success at this point).
-
-  if (super_check_offset.is_register()) {
-    local_jcc(Assembler::equal, *L_success);
-    cmpl(super_check_offset.as_register(), sc_offset);
-    if (L_failure == &L_fallthrough) {
-      local_jcc(Assembler::equal, *L_slow_path);
-    } else {
-      local_jcc(Assembler::notEqual, *L_failure);
-      final_jmp(*L_slow_path);
-    }
-  } else if (super_check_offset.as_constant() == sc_offset) {
-    // Need a slow path; fast failure is impossible.
-    if (L_slow_path == &L_fallthrough) {
-      local_jcc(Assembler::equal, *L_success);
-    } else {
-      local_jcc(Assembler::notEqual, *L_slow_path);
-      final_jmp(*L_success);
-    }
-  } else {
-    // No slow path; it's a fast decision.
-    if (L_failure == &L_fallthrough) {
-      local_jcc(Assembler::equal, *L_success);
-    } else {
-      local_jcc(Assembler::notEqual, *L_failure);
-      final_jmp(*L_success);
-    }
-  }
-
-  bind(L_fallthrough);
-
-#undef local_jcc
-#undef final_jmp
-}
-
-
-void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
-                                                   Register super_klass,
-                                                   Register temp_reg,
-                                                   Register temp2_reg,
-                                                   Label* L_success,
-                                                   Label* L_failure,
-                                                   bool set_cond_codes) {
-  assert_different_registers(sub_klass, super_klass, temp_reg);
-  if (temp2_reg != noreg)
-    assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
-#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
-
-  Label L_fallthrough;
-  int label_nulls = 0;
-  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
-  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
-  assert(label_nulls <= 1, "at most one NULL in the batch");
-
-  // a couple of useful fields in sub_klass:
-  int ss_offset = in_bytes(Klass::secondary_supers_offset());
-  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
-  Address secondary_supers_addr(sub_klass, ss_offset);
-  Address super_cache_addr(     sub_klass, sc_offset);
-
-  // Do a linear scan of the secondary super-klass chain.
-  // This code is rarely used, so simplicity is a virtue here.
-  // The repne_scan instruction uses fixed registers, which we must spill.
-  // Don't worry too much about pre-existing connections with the input regs.
-
-  assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super)
-  assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter)
-
-  // Get super_klass value into rax (even if it was in rdi or rcx).
-  bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false;
-  if (super_klass != rax || UseCompressedOops) {
-    if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; }
-    mov(rax, super_klass);
-  }
-  if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; }
-  if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; }
-
-#ifndef PRODUCT
-  int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
-  ExternalAddress pst_counter_addr((address) pst_counter);
-  NOT_LP64(  incrementl(pst_counter_addr) );
-  LP64_ONLY( lea(rcx, pst_counter_addr) );
-  LP64_ONLY( incrementl(Address(rcx, 0)) );
-#endif //PRODUCT
-
-  // We will consult the secondary-super array.
-  movptr(rdi, secondary_supers_addr);
-  // Load the array length.  (Positive movl does right thing on LP64.)
-  movl(rcx, Address(rdi, Array<Klass*>::length_offset_in_bytes()));
-  // Skip to start of data.
-  addptr(rdi, Array<Klass*>::base_offset_in_bytes());
-
-  // Scan RCX words at [RDI] for an occurrence of RAX.
-  // Set NZ/Z based on last compare.
-  // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does
-  // not change flags (only scas instruction which is repeated sets flags).
-  // Set Z = 0 (not equal) before 'repne' to indicate that class was not found.
-
-    testptr(rax,rax); // Set Z = 0
-    repne_scan();
-
-  // Unspill the temp. registers:
-  if (pushed_rdi)  pop(rdi);
-  if (pushed_rcx)  pop(rcx);
-  if (pushed_rax)  pop(rax);
-
-  if (set_cond_codes) {
-    // Special hack for the AD files:  rdi is guaranteed non-zero.
-    assert(!pushed_rdi, "rdi must be left non-NULL");
-    // Also, the condition codes are properly set Z/NZ on succeed/failure.
-  }
-
-  if (L_failure == &L_fallthrough)
-        jccb(Assembler::notEqual, *L_failure);
-  else  jcc(Assembler::notEqual, *L_failure);
-
-  // Success.  Cache the super we found and proceed in triumph.
-  movptr(super_cache_addr, super_klass);
-
-  if (L_success != &L_fallthrough) {
-    jmp(*L_success);
-  }
-
-#undef IS_A_TEMP
-
-  bind(L_fallthrough);
-}
-
-
-void MacroAssembler::cmov32(Condition cc, Register dst, Address src) {
-  if (VM_Version::supports_cmov()) {
-    cmovl(cc, dst, src);
-  } else {
-    Label L;
-    jccb(negate_condition(cc), L);
-    movl(dst, src);
-    bind(L);
-  }
-}
-
-void MacroAssembler::cmov32(Condition cc, Register dst, Register src) {
-  if (VM_Version::supports_cmov()) {
-    cmovl(cc, dst, src);
-  } else {
-    Label L;
-    jccb(negate_condition(cc), L);
-    movl(dst, src);
-    bind(L);
-  }
-}
-
-void MacroAssembler::verify_oop(Register reg, const char* s) {
-  if (!VerifyOops) return;
-
-  // Pass register number to verify_oop_subroutine
-  char* b = new char[strlen(s) + 50];
-  sprintf(b, "verify_oop: %s: %s", reg->name(), s);
-  BLOCK_COMMENT("verify_oop {");
-#ifdef _LP64
-  push(rscratch1);                    // save r10, trashed by movptr()
-#endif
-  push(rax);                          // save rax,
-  push(reg);                          // pass register argument
-  ExternalAddress buffer((address) b);
-  // avoid using pushptr, as it modifies scratch registers
-  // and our contract is not to modify anything
-  movptr(rax, buffer.addr());
-  push(rax);
-  // call indirectly to solve generation ordering problem
-  movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
-  call(rax);
-  // Caller pops the arguments (oop, message) and restores rax, r10
-  BLOCK_COMMENT("} verify_oop");
-}
-
-
-RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
-                                                      Register tmp,
-                                                      int offset) {
-  intptr_t value = *delayed_value_addr;
-  if (value != 0)
-    return RegisterOrConstant(value + offset);
-
-  // load indirectly to solve generation ordering problem
-  movptr(tmp, ExternalAddress((address) delayed_value_addr));
-
-#ifdef ASSERT
-  { Label L;
-    testptr(tmp, tmp);
-    if (WizardMode) {
-      jcc(Assembler::notZero, L);
-      char* buf = new char[40];
-      sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]);
-      STOP(buf);
-    } else {
-      jccb(Assembler::notZero, L);
-      hlt();
-    }
-    bind(L);
-  }
-#endif
-
-  if (offset != 0)
-    addptr(tmp, offset);
-
-  return RegisterOrConstant(tmp);
-}
-
-
-Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
-                                         int extra_slot_offset) {
-  // cf. TemplateTable::prepare_invoke(), if (load_receiver).
-  int stackElementSize = Interpreter::stackElementSize;
-  int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
-#ifdef ASSERT
-  int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
-  assert(offset1 - offset == stackElementSize, "correct arithmetic");
-#endif
-  Register             scale_reg    = noreg;
-  Address::ScaleFactor scale_factor = Address::no_scale;
-  if (arg_slot.is_constant()) {
-    offset += arg_slot.as_constant() * stackElementSize;
-  } else {
-    scale_reg    = arg_slot.as_register();
-    scale_factor = Address::times(stackElementSize);
-  }
-  offset += wordSize;           // return PC is on stack
-  return Address(rsp, scale_reg, scale_factor, offset);
-}
-
-
-void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
-  if (!VerifyOops) return;
-
-  // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord);
-  // Pass register number to verify_oop_subroutine
-  char* b = new char[strlen(s) + 50];
-  sprintf(b, "verify_oop_addr: %s", s);
-
-#ifdef _LP64
-  push(rscratch1);                    // save r10, trashed by movptr()
-#endif
-  push(rax);                          // save rax,
-  // addr may contain rsp so we will have to adjust it based on the push
-  // we just did (and on 64 bit we do two pushes)
-  // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
-  // stores rax into addr which is backwards of what was intended.
-  if (addr.uses(rsp)) {
-    lea(rax, addr);
-    pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord));
-  } else {
-    pushptr(addr);
-  }
-
-  ExternalAddress buffer((address) b);
-  // pass msg argument
-  // avoid using pushptr, as it modifies scratch registers
-  // and our contract is not to modify anything
-  movptr(rax, buffer.addr());
-  push(rax);
-
-  // call indirectly to solve generation ordering problem
-  movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
-  call(rax);
-  // Caller pops the arguments (addr, message) and restores rax, r10.
-}
-
-void MacroAssembler::verify_tlab() {
-#ifdef ASSERT
-  if (UseTLAB && VerifyOops) {
-    Label next, ok;
-    Register t1 = rsi;
-    Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread);
-
-    push(t1);
-    NOT_LP64(push(thread_reg));
-    NOT_LP64(get_thread(thread_reg));
-
-    movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
-    cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
-    jcc(Assembler::aboveEqual, next);
-    STOP("assert(top >= start)");
-    should_not_reach_here();
-
-    bind(next);
-    movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
-    cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
-    jcc(Assembler::aboveEqual, ok);
-    STOP("assert(top <= end)");
-    should_not_reach_here();
-
-    bind(ok);
-    NOT_LP64(pop(thread_reg));
-    pop(t1);
-  }
-#endif
-}
-
-class ControlWord {
- public:
-  int32_t _value;
-
-  int  rounding_control() const        { return  (_value >> 10) & 3      ; }
-  int  precision_control() const       { return  (_value >>  8) & 3      ; }
-  bool precision() const               { return ((_value >>  5) & 1) != 0; }
-  bool underflow() const               { return ((_value >>  4) & 1) != 0; }
-  bool overflow() const                { return ((_value >>  3) & 1) != 0; }
-  bool zero_divide() const             { return ((_value >>  2) & 1) != 0; }
-  bool denormalized() const            { return ((_value >>  1) & 1) != 0; }
-  bool invalid() const                 { return ((_value >>  0) & 1) != 0; }
-
-  void print() const {
-    // rounding control
-    const char* rc;
-    switch (rounding_control()) {
-      case 0: rc = "round near"; break;
-      case 1: rc = "round down"; break;
-      case 2: rc = "round up  "; break;
-      case 3: rc = "chop      "; break;
-    };
-    // precision control
-    const char* pc;
-    switch (precision_control()) {
-      case 0: pc = "24 bits "; break;
-      case 1: pc = "reserved"; break;
-      case 2: pc = "53 bits "; break;
-      case 3: pc = "64 bits "; break;
-    };
-    // flags
-    char f[9];
-    f[0] = ' ';
-    f[1] = ' ';
-    f[2] = (precision   ()) ? 'P' : 'p';
-    f[3] = (underflow   ()) ? 'U' : 'u';
-    f[4] = (overflow    ()) ? 'O' : 'o';
-    f[5] = (zero_divide ()) ? 'Z' : 'z';
-    f[6] = (denormalized()) ? 'D' : 'd';
-    f[7] = (invalid     ()) ? 'I' : 'i';
-    f[8] = '\x0';
-    // output
-    printf("%04x  masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc);
-  }
-
-};
-
-class StatusWord {
- public:
-  int32_t _value;
-
-  bool busy() const                    { return ((_value >> 15) & 1) != 0; }
-  bool C3() const                      { return ((_value >> 14) & 1) != 0; }
-  bool C2() const                      { return ((_value >> 10) & 1) != 0; }
-  bool C1() const                      { return ((_value >>  9) & 1) != 0; }
-  bool C0() const                      { return ((_value >>  8) & 1) != 0; }
-  int  top() const                     { return  (_value >> 11) & 7      ; }
-  bool error_status() const            { return ((_value >>  7) & 1) != 0; }
-  bool stack_fault() const             { return ((_value >>  6) & 1) != 0; }
-  bool precision() const               { return ((_value >>  5) & 1) != 0; }
-  bool underflow() const               { return ((_value >>  4) & 1) != 0; }
-  bool overflow() const                { return ((_value >>  3) & 1) != 0; }
-  bool zero_divide() const             { return ((_value >>  2) & 1) != 0; }
-  bool denormalized() const            { return ((_value >>  1) & 1) != 0; }
-  bool invalid() const                 { return ((_value >>  0) & 1) != 0; }
-
-  void print() const {
-    // condition codes
-    char c[5];
-    c[0] = (C3()) ? '3' : '-';
-    c[1] = (C2()) ? '2' : '-';
-    c[2] = (C1()) ? '1' : '-';
-    c[3] = (C0()) ? '0' : '-';
-    c[4] = '\x0';
-    // flags
-    char f[9];
-    f[0] = (error_status()) ? 'E' : '-';
-    f[1] = (stack_fault ()) ? 'S' : '-';
-    f[2] = (precision   ()) ? 'P' : '-';
-    f[3] = (underflow   ()) ? 'U' : '-';
-    f[4] = (overflow    ()) ? 'O' : '-';
-    f[5] = (zero_divide ()) ? 'Z' : '-';
-    f[6] = (denormalized()) ? 'D' : '-';
-    f[7] = (invalid     ()) ? 'I' : '-';
-    f[8] = '\x0';
-    // output
-    printf("%04x  flags = %s, cc =  %s, top = %d", _value & 0xFFFF, f, c, top());
-  }
-
-};
-
-class TagWord {
- public:
-  int32_t _value;
-
-  int tag_at(int i) const              { return (_value >> (i*2)) & 3; }
-
-  void print() const {
-    printf("%04x", _value & 0xFFFF);
-  }
-
-};
-
-class FPU_Register {
- public:
-  int32_t _m0;
-  int32_t _m1;
-  int16_t _ex;
-
-  bool is_indefinite() const           {
-    return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0;
-  }
-
-  void print() const {
-    char  sign = (_ex < 0) ? '-' : '+';
-    const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : "   ";
-    printf("%c%04hx.%08x%08x  %s", sign, _ex, _m1, _m0, kind);
-  };
-
-};
-
-class FPU_State {
- public:
-  enum {
-    register_size       = 10,
-    number_of_registers =  8,
-    register_mask       =  7
-  };
-
-  ControlWord  _control_word;
-  StatusWord   _status_word;
-  TagWord      _tag_word;
-  int32_t      _error_offset;
-  int32_t      _error_selector;
-  int32_t      _data_offset;
-  int32_t      _data_selector;
-  int8_t       _register[register_size * number_of_registers];
-
-  int tag_for_st(int i) const          { return _tag_word.tag_at((_status_word.top() + i) & register_mask); }
-  FPU_Register* st(int i) const        { return (FPU_Register*)&_register[register_size * i]; }
-
-  const char* tag_as_string(int tag) const {
-    switch (tag) {
-      case 0: return "valid";
-      case 1: return "zero";
-      case 2: return "special";
-      case 3: return "empty";
-    }
-    ShouldNotReachHere();
-    return NULL;
-  }
-
-  void print() const {
-    // print computation registers
-    { int t = _status_word.top();
-      for (int i = 0; i < number_of_registers; i++) {
-        int j = (i - t) & register_mask;
-        printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j);
-        st(j)->print();
-        printf(" %s\n", tag_as_string(_tag_word.tag_at(i)));
-      }
-    }
-    printf("\n");
-    // print control registers
-    printf("ctrl = "); _control_word.print(); printf("\n");
-    printf("stat = "); _status_word .print(); printf("\n");
-    printf("tags = "); _tag_word    .print(); printf("\n");
-  }
-
-};
-
-class Flag_Register {
- public:
-  int32_t _value;
-
-  bool overflow() const                { return ((_value >> 11) & 1) != 0; }
-  bool direction() const               { return ((_value >> 10) & 1) != 0; }
-  bool sign() const                    { return ((_value >>  7) & 1) != 0; }
-  bool zero() const                    { return ((_value >>  6) & 1) != 0; }
-  bool auxiliary_carry() const         { return ((_value >>  4) & 1) != 0; }
-  bool parity() const                  { return ((_value >>  2) & 1) != 0; }
-  bool carry() const                   { return ((_value >>  0) & 1) != 0; }
-
-  void print() const {
-    // flags
-    char f[8];
-    f[0] = (overflow       ()) ? 'O' : '-';
-    f[1] = (direction      ()) ? 'D' : '-';
-    f[2] = (sign           ()) ? 'S' : '-';
-    f[3] = (zero           ()) ? 'Z' : '-';
-    f[4] = (auxiliary_carry()) ? 'A' : '-';
-    f[5] = (parity         ()) ? 'P' : '-';
-    f[6] = (carry          ()) ? 'C' : '-';
-    f[7] = '\x0';
-    // output
-    printf("%08x  flags = %s", _value, f);
-  }
-
-};
-
-class IU_Register {
- public:
-  int32_t _value;
-
-  void print() const {
-    printf("%08x  %11d", _value, _value);
-  }
-
-};
-
-class IU_State {
- public:
-  Flag_Register _eflags;
-  IU_Register   _rdi;
-  IU_Register   _rsi;
-  IU_Register   _rbp;
-  IU_Register   _rsp;
-  IU_Register   _rbx;
-  IU_Register   _rdx;
-  IU_Register   _rcx;
-  IU_Register   _rax;
-
-  void print() const {
-    // computation registers
-    printf("rax,  = "); _rax.print(); printf("\n");
-    printf("rbx,  = "); _rbx.print(); printf("\n");
-    printf("rcx  = "); _rcx.print(); printf("\n");
-    printf("rdx  = "); _rdx.print(); printf("\n");
-    printf("rdi  = "); _rdi.print(); printf("\n");
-    printf("rsi  = "); _rsi.print(); printf("\n");
-    printf("rbp,  = "); _rbp.print(); printf("\n");
-    printf("rsp  = "); _rsp.print(); printf("\n");
-    printf("\n");
-    // control registers
-    printf("flgs = "); _eflags.print(); printf("\n");
-  }
-};
-
-
-class CPU_State {
- public:
-  FPU_State _fpu_state;
-  IU_State  _iu_state;
-
-  void print() const {
-    printf("--------------------------------------------------\n");
-    _iu_state .print();
-    printf("\n");
-    _fpu_state.print();
-    printf("--------------------------------------------------\n");
-  }
-
-};
-
-
-static void _print_CPU_state(CPU_State* state) {
-  state->print();
-};
-
-
-void MacroAssembler::print_CPU_state() {
-  push_CPU_state();
-  push(rsp);                // pass CPU state
-  call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state)));
-  addptr(rsp, wordSize);       // discard argument
-  pop_CPU_state();
-}
-
-
-static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) {
-  static int counter = 0;
-  FPU_State* fs = &state->_fpu_state;
-  counter++;
-  // For leaf calls, only verify that the top few elements remain empty.
-  // We only need 1 empty at the top for C2 code.
-  if( stack_depth < 0 ) {
-    if( fs->tag_for_st(7) != 3 ) {
-      printf("FPR7 not empty\n");
-      state->print();
-      assert(false, "error");
-      return false;
-    }
-    return true;                // All other stack states do not matter
-  }
-
-  assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std,
-         "bad FPU control word");
-
-  // compute stack depth
-  int i = 0;
-  while (i < FPU_State::number_of_registers && fs->tag_for_st(i)  < 3) i++;
-  int d = i;
-  while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++;
-  // verify findings
-  if (i != FPU_State::number_of_registers) {
-    // stack not contiguous
-    printf("%s: stack not contiguous at ST%d\n", s, i);
-    state->print();
-    assert(false, "error");
-    return false;
-  }
-  // check if computed stack depth corresponds to expected stack depth
-  if (stack_depth < 0) {
-    // expected stack depth is -stack_depth or less
-    if (d > -stack_depth) {
-      // too many elements on the stack
-      printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d);
-      state->print();
-      assert(false, "error");
-      return false;
-    }
-  } else {
-    // expected stack depth is stack_depth
-    if (d != stack_depth) {
-      // wrong stack depth
-      printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d);
-      state->print();
-      assert(false, "error");
-      return false;
-    }
-  }
-  // everything is cool
-  return true;
-}
-
-
-void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
-  if (!VerifyFPU) return;
-  push_CPU_state();
-  push(rsp);                // pass CPU state
-  ExternalAddress msg((address) s);
-  // pass message string s
-  pushptr(msg.addr());
-  push(stack_depth);        // pass stack depth
-  call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU)));
-  addptr(rsp, 3 * wordSize);   // discard arguments
-  // check for error
-  { Label L;
-    testl(rax, rax);
-    jcc(Assembler::notZero, L);
-    int3();                  // break if error condition
-    bind(L);
-  }
-  pop_CPU_state();
-}
-
-void MacroAssembler::load_klass(Register dst, Register src) {
-#ifdef _LP64
-  if (UseCompressedKlassPointers) {
-    movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
-    decode_klass_not_null(dst);
-  } else
-#endif
-    movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
-}
-
-void MacroAssembler::load_prototype_header(Register dst, Register src) {
-#ifdef _LP64
-  if (UseCompressedKlassPointers) {
-    assert (Universe::heap() != NULL, "java heap should be initialized");
-    movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
-    if (Universe::narrow_klass_shift() != 0) {
-      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-      assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
-      movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset()));
-    } else {
-      movq(dst, Address(dst, Klass::prototype_header_offset()));
-    }
-  } else
-#endif
-  {
-    movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
-    movptr(dst, Address(dst, Klass::prototype_header_offset()));
-  }
-}
-
-void MacroAssembler::store_klass(Register dst, Register src) {
-#ifdef _LP64
-  if (UseCompressedKlassPointers) {
-    encode_klass_not_null(src);
-    movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
-  } else
-#endif
-    movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
-}
-
-void MacroAssembler::load_heap_oop(Register dst, Address src) {
-#ifdef _LP64
-  // FIXME: Must change all places where we try to load the klass.
-  if (UseCompressedOops) {
-    movl(dst, src);
-    decode_heap_oop(dst);
-  } else
-#endif
-    movptr(dst, src);
-}
-
-// Doesn't do verfication, generates fixed size code
-void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) {
-#ifdef _LP64
-  if (UseCompressedOops) {
-    movl(dst, src);
-    decode_heap_oop_not_null(dst);
-  } else
-#endif
-    movptr(dst, src);
-}
-
-void MacroAssembler::store_heap_oop(Address dst, Register src) {
-#ifdef _LP64
-  if (UseCompressedOops) {
-    assert(!dst.uses(src), "not enough registers");
-    encode_heap_oop(src);
-    movl(dst, src);
-  } else
-#endif
-    movptr(dst, src);
-}
-
-void MacroAssembler::cmp_heap_oop(Register src1, Address src2, Register tmp) {
-  assert_different_registers(src1, tmp);
-#ifdef _LP64
-  if (UseCompressedOops) {
-    bool did_push = false;
-    if (tmp == noreg) {
-      tmp = rax;
-      push(tmp);
-      did_push = true;
-      assert(!src2.uses(rsp), "can't push");
-    }
-    load_heap_oop(tmp, src2);
-    cmpptr(src1, tmp);
-    if (did_push)  pop(tmp);
-  } else
-#endif
-    cmpptr(src1, src2);
-}
-
-// Used for storing NULLs.
-void MacroAssembler::store_heap_oop_null(Address dst) {
-#ifdef _LP64
-  if (UseCompressedOops) {
-    movl(dst, (int32_t)NULL_WORD);
-  } else {
-    movslq(dst, (int32_t)NULL_WORD);
-  }
-#else
-  movl(dst, (int32_t)NULL_WORD);
-#endif
-}
-
-#ifdef _LP64
-void MacroAssembler::store_klass_gap(Register dst, Register src) {
-  if (UseCompressedKlassPointers) {
-    // Store to klass gap in destination
-    movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src);
-  }
-}
-
-#ifdef ASSERT
-void MacroAssembler::verify_heapbase(const char* msg) {
-  assert (UseCompressedOops || UseCompressedKlassPointers, "should be compressed");
-  assert (Universe::heap() != NULL, "java heap should be initialized");
-  if (CheckCompressedOops) {
-    Label ok;
-    push(rscratch1); // cmpptr trashes rscratch1
-    cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()));
-    jcc(Assembler::equal, ok);
-    STOP(msg);
-    bind(ok);
-    pop(rscratch1);
-  }
-}
-#endif
-
-// Algorithm must match oop.inline.hpp encode_heap_oop.
-void MacroAssembler::encode_heap_oop(Register r) {
-#ifdef ASSERT
-  verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
-#endif
-  verify_oop(r, "broken oop in encode_heap_oop");
-  if (Universe::narrow_oop_base() == NULL) {
-    if (Universe::narrow_oop_shift() != 0) {
-      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
-      shrq(r, LogMinObjAlignmentInBytes);
-    }
-    return;
-  }
-  testq(r, r);
-  cmovq(Assembler::equal, r, r12_heapbase);
-  subq(r, r12_heapbase);
-  shrq(r, LogMinObjAlignmentInBytes);
-}
-
-void MacroAssembler::encode_heap_oop_not_null(Register r) {
-#ifdef ASSERT
-  verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?");
-  if (CheckCompressedOops) {
-    Label ok;
-    testq(r, r);
-    jcc(Assembler::notEqual, ok);
-    STOP("null oop passed to encode_heap_oop_not_null");
-    bind(ok);
-  }
-#endif
-  verify_oop(r, "broken oop in encode_heap_oop_not_null");
-  if (Universe::narrow_oop_base() != NULL) {
-    subq(r, r12_heapbase);
-  }
-  if (Universe::narrow_oop_shift() != 0) {
-    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
-    shrq(r, LogMinObjAlignmentInBytes);
-  }
-}
-
-void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
-#ifdef ASSERT
-  verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?");
-  if (CheckCompressedOops) {
-    Label ok;
-    testq(src, src);
-    jcc(Assembler::notEqual, ok);
-    STOP("null oop passed to encode_heap_oop_not_null2");
-    bind(ok);
-  }
-#endif
-  verify_oop(src, "broken oop in encode_heap_oop_not_null2");
-  if (dst != src) {
-    movq(dst, src);
-  }
-  if (Universe::narrow_oop_base() != NULL) {
-    subq(dst, r12_heapbase);
-  }
-  if (Universe::narrow_oop_shift() != 0) {
-    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
-    shrq(dst, LogMinObjAlignmentInBytes);
-  }
-}
-
-void  MacroAssembler::decode_heap_oop(Register r) {
-#ifdef ASSERT
-  verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
-#endif
-  if (Universe::narrow_oop_base() == NULL) {
-    if (Universe::narrow_oop_shift() != 0) {
-      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
-      shlq(r, LogMinObjAlignmentInBytes);
-    }
-  } else {
-    Label done;
-    shlq(r, LogMinObjAlignmentInBytes);
-    jccb(Assembler::equal, done);
-    addq(r, r12_heapbase);
-    bind(done);
-  }
-  verify_oop(r, "broken oop in decode_heap_oop");
-}
-
-void  MacroAssembler::decode_heap_oop_not_null(Register r) {
-  // Note: it will change flags
-  assert (UseCompressedOops, "should only be used for compressed headers");
-  assert (Universe::heap() != NULL, "java heap should be initialized");
-  // Cannot assert, unverified entry point counts instructions (see .ad file)
-  // vtableStubs also counts instructions in pd_code_size_limit.
-  // Also do not verify_oop as this is called by verify_oop.
-  if (Universe::narrow_oop_shift() != 0) {
-    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
-    shlq(r, LogMinObjAlignmentInBytes);
-    if (Universe::narrow_oop_base() != NULL) {
-      addq(r, r12_heapbase);
-    }
-  } else {
-    assert (Universe::narrow_oop_base() == NULL, "sanity");
-  }
-}
-
-void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
-  // Note: it will change flags
-  assert (UseCompressedOops, "should only be used for compressed headers");
-  assert (Universe::heap() != NULL, "java heap should be initialized");
-  // Cannot assert, unverified entry point counts instructions (see .ad file)
-  // vtableStubs also counts instructions in pd_code_size_limit.
-  // Also do not verify_oop as this is called by verify_oop.
-  if (Universe::narrow_oop_shift() != 0) {
-    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
-    if (LogMinObjAlignmentInBytes == Address::times_8) {
-      leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
-    } else {
-      if (dst != src) {
-        movq(dst, src);
-      }
-      shlq(dst, LogMinObjAlignmentInBytes);
-      if (Universe::narrow_oop_base() != NULL) {
-        addq(dst, r12_heapbase);
-      }
-    }
-  } else {
-    assert (Universe::narrow_oop_base() == NULL, "sanity");
-    if (dst != src) {
-      movq(dst, src);
-    }
-  }
-}
-
-void MacroAssembler::encode_klass_not_null(Register r) {
-  assert(Metaspace::is_initialized(), "metaspace should be initialized");
-#ifdef ASSERT
-  verify_heapbase("MacroAssembler::encode_klass_not_null: heap base corrupted?");
-#endif
-  if (Universe::narrow_klass_base() != NULL) {
-    subq(r, r12_heapbase);
-  }
-  if (Universe::narrow_klass_shift() != 0) {
-    assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-    shrq(r, LogKlassAlignmentInBytes);
-  }
-}
-
-void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
-  assert(Metaspace::is_initialized(), "metaspace should be initialized");
-#ifdef ASSERT
-  verify_heapbase("MacroAssembler::encode_klass_not_null2: heap base corrupted?");
-#endif
-  if (dst != src) {
-    movq(dst, src);
-  }
-  if (Universe::narrow_klass_base() != NULL) {
-    subq(dst, r12_heapbase);
-  }
-  if (Universe::narrow_klass_shift() != 0) {
-    assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-    shrq(dst, LogKlassAlignmentInBytes);
-  }
-}
-
-void  MacroAssembler::decode_klass_not_null(Register r) {
-  assert(Metaspace::is_initialized(), "metaspace should be initialized");
-  // Note: it will change flags
-  assert (UseCompressedKlassPointers, "should only be used for compressed headers");
-  // Cannot assert, unverified entry point counts instructions (see .ad file)
-  // vtableStubs also counts instructions in pd_code_size_limit.
-  // Also do not verify_oop as this is called by verify_oop.
-  if (Universe::narrow_klass_shift() != 0) {
-    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-    shlq(r, LogKlassAlignmentInBytes);
-    if (Universe::narrow_klass_base() != NULL) {
-      addq(r, r12_heapbase);
-    }
-  } else {
-    assert (Universe::narrow_klass_base() == NULL, "sanity");
-  }
-}
-
-void  MacroAssembler::decode_klass_not_null(Register dst, Register src) {
-  assert(Metaspace::is_initialized(), "metaspace should be initialized");
-  // Note: it will change flags
-  assert (UseCompressedKlassPointers, "should only be used for compressed headers");
-  // Cannot assert, unverified entry point counts instructions (see .ad file)
-  // vtableStubs also counts instructions in pd_code_size_limit.
-  // Also do not verify_oop as this is called by verify_oop.
-  if (Universe::narrow_klass_shift() != 0) {
-    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-    assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
-    leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
-  } else {
-    assert (Universe::narrow_klass_base() == NULL, "sanity");
-    if (dst != src) {
-      movq(dst, src);
-    }
-  }
-}
-
-void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
-  assert (UseCompressedOops, "should only be used for compressed headers");
-  assert (Universe::heap() != NULL, "java heap should be initialized");
-  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
-  int oop_index = oop_recorder()->find_index(obj);
-  RelocationHolder rspec = oop_Relocation::spec(oop_index);
-  mov_narrow_oop(dst, oop_index, rspec);
-}
-
-void  MacroAssembler::set_narrow_oop(Address dst, jobject obj) {
-  assert (UseCompressedOops, "should only be used for compressed headers");
-  assert (Universe::heap() != NULL, "java heap should be initialized");
-  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
-  int oop_index = oop_recorder()->find_index(obj);
-  RelocationHolder rspec = oop_Relocation::spec(oop_index);
-  mov_narrow_oop(dst, oop_index, rspec);
-}
-
-void  MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
-  assert (UseCompressedKlassPointers, "should only be used for compressed headers");
-  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
-  int klass_index = oop_recorder()->find_index(k);
-  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
-  mov_narrow_oop(dst, oopDesc::encode_klass(k), rspec);
-}
-
-void  MacroAssembler::set_narrow_klass(Address dst, Klass* k) {
-  assert (UseCompressedKlassPointers, "should only be used for compressed headers");
-  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
-  int klass_index = oop_recorder()->find_index(k);
-  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
-  mov_narrow_oop(dst, oopDesc::encode_klass(k), rspec);
-}
-
-void  MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) {
-  assert (UseCompressedOops, "should only be used for compressed headers");
-  assert (Universe::heap() != NULL, "java heap should be initialized");
-  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
-  int oop_index = oop_recorder()->find_index(obj);
-  RelocationHolder rspec = oop_Relocation::spec(oop_index);
-  Assembler::cmp_narrow_oop(dst, oop_index, rspec);
-}
-
-void  MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) {
-  assert (UseCompressedOops, "should only be used for compressed headers");
-  assert (Universe::heap() != NULL, "java heap should be initialized");
-  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
-  int oop_index = oop_recorder()->find_index(obj);
-  RelocationHolder rspec = oop_Relocation::spec(oop_index);
-  Assembler::cmp_narrow_oop(dst, oop_index, rspec);
-}
-
-void  MacroAssembler::cmp_narrow_klass(Register dst, Klass* k) {
-  assert (UseCompressedKlassPointers, "should only be used for compressed headers");
-  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
-  int klass_index = oop_recorder()->find_index(k);
-  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
-  Assembler::cmp_narrow_oop(dst, oopDesc::encode_klass(k), rspec);
-}
-
-void  MacroAssembler::cmp_narrow_klass(Address dst, Klass* k) {
-  assert (UseCompressedKlassPointers, "should only be used for compressed headers");
-  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
-  int klass_index = oop_recorder()->find_index(k);
-  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
-  Assembler::cmp_narrow_oop(dst, oopDesc::encode_klass(k), rspec);
-}
-
-void MacroAssembler::reinit_heapbase() {
-  if (UseCompressedOops || UseCompressedKlassPointers) {
-    movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()));
-  }
-}
-#endif // _LP64
-
-
-// C2 compiled method's prolog code.
-void MacroAssembler::verified_entry(int framesize, bool stack_bang, bool fp_mode_24b) {
-
-  // WARNING: Initial instruction MUST be 5 bytes or longer so that
-  // NativeJump::patch_verified_entry will be able to patch out the entry
-  // code safely. The push to verify stack depth is ok at 5 bytes,
-  // the frame allocation can be either 3 or 6 bytes. So if we don't do
-  // stack bang then we must use the 6 byte frame allocation even if
-  // we have no frame. :-(
-
-  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
-  // Remove word for return addr
-  framesize -= wordSize;
-
-  // Calls to C2R adapters often do not accept exceptional returns.
-  // We require that their callers must bang for them.  But be careful, because
-  // some VM calls (such as call site linkage) can use several kilobytes of
-  // stack.  But the stack safety zone should account for that.
-  // See bugs 4446381, 4468289, 4497237.
-  if (stack_bang) {
-    generate_stack_overflow_check(framesize);
-
-    // We always push rbp, so that on return to interpreter rbp, will be
-    // restored correctly and we can correct the stack.
-    push(rbp);
-    // Remove word for ebp
-    framesize -= wordSize;
-
-    // Create frame
-    if (framesize) {
-      subptr(rsp, framesize);
-    }
-  } else {
-    // Create frame (force generation of a 4 byte immediate value)
-    subptr_imm32(rsp, framesize);
-
-    // Save RBP register now.
-    framesize -= wordSize;
-    movptr(Address(rsp, framesize), rbp);
-  }
-
-  if (VerifyStackAtCalls) { // Majik cookie to verify stack depth
-    framesize -= wordSize;
-    movptr(Address(rsp, framesize), (int32_t)0xbadb100d);
-  }
-
-#ifndef _LP64
-  // If method sets FPU control word do it now
-  if (fp_mode_24b) {
-    fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
-  }
-  if (UseSSE >= 2 && VerifyFPU) {
-    verify_FPU(0, "FPU stack must be clean on entry");
-  }
-#endif
-
-#ifdef ASSERT
-  if (VerifyStackAtCalls) {
-    Label L;
-    push(rax);
-    mov(rax, rsp);
-    andptr(rax, StackAlignmentInBytes-1);
-    cmpptr(rax, StackAlignmentInBytes-wordSize);
-    pop(rax);
-    jcc(Assembler::equal, L);
-    STOP("Stack is not properly aligned!");
-    bind(L);
-  }
-#endif
-
-}
-
-
-// IndexOf for constant substrings with size >= 8 chars
-// which don't need to be loaded through stack.
-void MacroAssembler::string_indexofC8(Register str1, Register str2,
-                                      Register cnt1, Register cnt2,
-                                      int int_cnt2,  Register result,
-                                      XMMRegister vec, Register tmp) {
-  ShortBranchVerifier sbv(this);
-  assert(UseSSE42Intrinsics, "SSE4.2 is required");
-
-  // This method uses pcmpestri inxtruction with bound registers
-  //   inputs:
-  //     xmm - substring
-  //     rax - substring length (elements count)
-  //     mem - scanned string
-  //     rdx - string length (elements count)
-  //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
-  //   outputs:
-  //     rcx - matched index in string
-  assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
-
-  Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR,
-        RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR,
-        MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE;
-
-  // Note, inline_string_indexOf() generates checks:
-  // if (substr.count > string.count) return -1;
-  // if (substr.count == 0) return 0;
-  assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars");
-
-  // Load substring.
-  movdqu(vec, Address(str2, 0));
-  movl(cnt2, int_cnt2);
-  movptr(result, str1); // string addr
-
-  if (int_cnt2 > 8) {
-    jmpb(SCAN_TO_SUBSTR);
-
-    // Reload substr for rescan, this code
-    // is executed only for large substrings (> 8 chars)
-    bind(RELOAD_SUBSTR);
-    movdqu(vec, Address(str2, 0));
-    negptr(cnt2); // Jumped here with negative cnt2, convert to positive
-
-    bind(RELOAD_STR);
-    // We came here after the beginning of the substring was
-    // matched but the rest of it was not so we need to search
-    // again. Start from the next element after the previous match.
-
-    // cnt2 is number of substring reminding elements and
-    // cnt1 is number of string reminding elements when cmp failed.
-    // Restored cnt1 = cnt1 - cnt2 + int_cnt2
-    subl(cnt1, cnt2);
-    addl(cnt1, int_cnt2);
-    movl(cnt2, int_cnt2); // Now restore cnt2
-
-    decrementl(cnt1);     // Shift to next element
-    cmpl(cnt1, cnt2);
-    jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
-
-    addptr(result, 2);
-
-  } // (int_cnt2 > 8)
-
-  // Scan string for start of substr in 16-byte vectors
-  bind(SCAN_TO_SUBSTR);
-  pcmpestri(vec, Address(result, 0), 0x0d);
-  jccb(Assembler::below, FOUND_CANDIDATE);   // CF == 1
-  subl(cnt1, 8);
-  jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
-  cmpl(cnt1, cnt2);
-  jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
-  addptr(result, 16);
-  jmpb(SCAN_TO_SUBSTR);
-
-  // Found a potential substr
-  bind(FOUND_CANDIDATE);
-  // Matched whole vector if first element matched (tmp(rcx) == 0).
-  if (int_cnt2 == 8) {
-    jccb(Assembler::overflow, RET_FOUND);    // OF == 1
-  } else { // int_cnt2 > 8
-    jccb(Assembler::overflow, FOUND_SUBSTR);
-  }
-  // After pcmpestri tmp(rcx) contains matched element index
-  // Compute start addr of substr
-  lea(result, Address(result, tmp, Address::times_2));
-
-  // Make sure string is still long enough
-  subl(cnt1, tmp);
-  cmpl(cnt1, cnt2);
-  if (int_cnt2 == 8) {
-    jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
-  } else { // int_cnt2 > 8
-    jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD);
-  }
-  // Left less then substring.
-
-  bind(RET_NOT_FOUND);
-  movl(result, -1);
-  jmpb(EXIT);
-
-  if (int_cnt2 > 8) {
-    // This code is optimized for the case when whole substring
-    // is matched if its head is matched.
-    bind(MATCH_SUBSTR_HEAD);
-    pcmpestri(vec, Address(result, 0), 0x0d);
-    // Reload only string if does not match
-    jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0
-
-    Label CONT_SCAN_SUBSTR;
-    // Compare the rest of substring (> 8 chars).
-    bind(FOUND_SUBSTR);
-    // First 8 chars are already matched.
-    negptr(cnt2);
-    addptr(cnt2, 8);
-
-    bind(SCAN_SUBSTR);
-    subl(cnt1, 8);
-    cmpl(cnt2, -8); // Do not read beyond substring
-    jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR);
-    // Back-up strings to avoid reading beyond substring:
-    // cnt1 = cnt1 - cnt2 + 8
-    addl(cnt1, cnt2); // cnt2 is negative
-    addl(cnt1, 8);
-    movl(cnt2, 8); negptr(cnt2);
-    bind(CONT_SCAN_SUBSTR);
-    if (int_cnt2 < (int)G) {
-      movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2));
-      pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d);
-    } else {
-      // calculate index in register to avoid integer overflow (int_cnt2*2)
-      movl(tmp, int_cnt2);
-      addptr(tmp, cnt2);
-      movdqu(vec, Address(str2, tmp, Address::times_2, 0));
-      pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d);
-    }
-    // Need to reload strings pointers if not matched whole vector
-    jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
-    addptr(cnt2, 8);
-    jcc(Assembler::negative, SCAN_SUBSTR);
-    // Fall through if found full substring
-
-  } // (int_cnt2 > 8)
-
-  bind(RET_FOUND);
-  // Found result if we matched full small substring.
-  // Compute substr offset
-  subptr(result, str1);
-  shrl(result, 1); // index
-  bind(EXIT);
-
-} // string_indexofC8
-
-// Small strings are loaded through stack if they cross page boundary.
-void MacroAssembler::string_indexof(Register str1, Register str2,
-                                    Register cnt1, Register cnt2,
-                                    int int_cnt2,  Register result,
-                                    XMMRegister vec, Register tmp) {
-  ShortBranchVerifier sbv(this);
-  assert(UseSSE42Intrinsics, "SSE4.2 is required");
-  //
-  // int_cnt2 is length of small (< 8 chars) constant substring
-  // or (-1) for non constant substring in which case its length
-  // is in cnt2 register.
-  //
-  // Note, inline_string_indexOf() generates checks:
-  // if (substr.count > string.count) return -1;
-  // if (substr.count == 0) return 0;
-  //
-  assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0");
-
-  // This method uses pcmpestri inxtruction with bound registers
-  //   inputs:
-  //     xmm - substring
-  //     rax - substring length (elements count)
-  //     mem - scanned string
-  //     rdx - string length (elements count)
-  //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
-  //   outputs:
-  //     rcx - matched index in string
-  assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
-
-  Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR,
-        RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR,
-        FOUND_CANDIDATE;
-
-  { //========================================================
-    // We don't know where these strings are located
-    // and we can't read beyond them. Load them through stack.
-    Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR;
-
-    movptr(tmp, rsp); // save old SP
-
-    if (int_cnt2 > 0) {     // small (< 8 chars) constant substring
-      if (int_cnt2 == 1) {  // One char
-        load_unsigned_short(result, Address(str2, 0));
-        movdl(vec, result); // move 32 bits
-      } else if (int_cnt2 == 2) { // Two chars
-        movdl(vec, Address(str2, 0)); // move 32 bits
-      } else if (int_cnt2 == 4) { // Four chars
-        movq(vec, Address(str2, 0));  // move 64 bits
-      } else { // cnt2 = { 3, 5, 6, 7 }
-        // Array header size is 12 bytes in 32-bit VM
-        // + 6 bytes for 3 chars == 18 bytes,
-        // enough space to load vec and shift.
-        assert(HeapWordSize*TypeArrayKlass::header_size() >= 12,"sanity");
-        movdqu(vec, Address(str2, (int_cnt2*2)-16));
-        psrldq(vec, 16-(int_cnt2*2));
-      }
-    } else { // not constant substring
-      cmpl(cnt2, 8);
-      jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough
-
-      // We can read beyond string if srt+16 does not cross page boundary
-      // since heaps are aligned and mapped by pages.
-      assert(os::vm_page_size() < (int)G, "default page should be small");
-      movl(result, str2); // We need only low 32 bits
-      andl(result, (os::vm_page_size()-1));
-      cmpl(result, (os::vm_page_size()-16));
-      jccb(Assembler::belowEqual, CHECK_STR);
-
-      // Move small strings to stack to allow load 16 bytes into vec.
-      subptr(rsp, 16);
-      int stk_offset = wordSize-2;
-      push(cnt2);
-
-      bind(COPY_SUBSTR);
-      load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2));
-      movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
-      decrement(cnt2);
-      jccb(Assembler::notZero, COPY_SUBSTR);
-
-      pop(cnt2);
-      movptr(str2, rsp);  // New substring address
-    } // non constant
-
-    bind(CHECK_STR);
-    cmpl(cnt1, 8);
-    jccb(Assembler::aboveEqual, BIG_STRINGS);
-
-    // Check cross page boundary.
-    movl(result, str1); // We need only low 32 bits
-    andl(result, (os::vm_page_size()-1));
-    cmpl(result, (os::vm_page_size()-16));
-    jccb(Assembler::belowEqual, BIG_STRINGS);
-
-    subptr(rsp, 16);
-    int stk_offset = -2;
-    if (int_cnt2 < 0) { // not constant
-      push(cnt2);
-      stk_offset += wordSize;
-    }
-    movl(cnt2, cnt1);
-
-    bind(COPY_STR);
-    load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2));
-    movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
-    decrement(cnt2);
-    jccb(Assembler::notZero, COPY_STR);
-
-    if (int_cnt2 < 0) { // not constant
-      pop(cnt2);
-    }
-    movptr(str1, rsp);  // New string address
-
-    bind(BIG_STRINGS);
-    // Load substring.
-    if (int_cnt2 < 0) { // -1
-      movdqu(vec, Address(str2, 0));
-      push(cnt2);       // substr count
-      push(str2);       // substr addr
-      push(str1);       // string addr
-    } else {
-      // Small (< 8 chars) constant substrings are loaded already.
-      movl(cnt2, int_cnt2);
-    }
-    push(tmp);  // original SP
-
-  } // Finished loading
-
-  //========================================================
-  // Start search
-  //
-
-  movptr(result, str1); // string addr
-
-  if (int_cnt2  < 0) {  // Only for non constant substring
-    jmpb(SCAN_TO_SUBSTR);
-
-    // SP saved at sp+0
-    // String saved at sp+1*wordSize
-    // Substr saved at sp+2*wordSize
-    // Substr count saved at sp+3*wordSize
-
-    // Reload substr for rescan, this code
-    // is executed only for large substrings (> 8 chars)
-    bind(RELOAD_SUBSTR);
-    movptr(str2, Address(rsp, 2*wordSize));
-    movl(cnt2, Address(rsp, 3*wordSize));
-    movdqu(vec, Address(str2, 0));
-    // We came here after the beginning of the substring was
-    // matched but the rest of it was not so we need to search
-    // again. Start from the next element after the previous match.
-    subptr(str1, result); // Restore counter
-    shrl(str1, 1);
-    addl(cnt1, str1);
-    decrementl(cnt1);   // Shift to next element
-    cmpl(cnt1, cnt2);
-    jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
-
-    addptr(result, 2);
-  } // non constant
-
-  // Scan string for start of substr in 16-byte vectors
-  bind(SCAN_TO_SUBSTR);
-  assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
-  pcmpestri(vec, Address(result, 0), 0x0d);
-  jccb(Assembler::below, FOUND_CANDIDATE);   // CF == 1
-  subl(cnt1, 8);
-  jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
-  cmpl(cnt1, cnt2);
-  jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
-  addptr(result, 16);
-
-  bind(ADJUST_STR);
-  cmpl(cnt1, 8); // Do not read beyond string
-  jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
-  // Back-up string to avoid reading beyond string.
-  lea(result, Address(result, cnt1, Address::times_2, -16));
-  movl(cnt1, 8);
-  jmpb(SCAN_TO_SUBSTR);
-
-  // Found a potential substr
-  bind(FOUND_CANDIDATE);
-  // After pcmpestri tmp(rcx) contains matched element index
-
-  // Make sure string is still long enough
-  subl(cnt1, tmp);
-  cmpl(cnt1, cnt2);
-  jccb(Assembler::greaterEqual, FOUND_SUBSTR);
-  // Left less then substring.
-
-  bind(RET_NOT_FOUND);
-  movl(result, -1);
-  jmpb(CLEANUP);
-
-  bind(FOUND_SUBSTR);
-  // Compute start addr of substr
-  lea(result, Address(result, tmp, Address::times_2));
-
-  if (int_cnt2 > 0) { // Constant substring
-    // Repeat search for small substring (< 8 chars)
-    // from new point without reloading substring.
-    // Have to check that we don't read beyond string.
-    cmpl(tmp, 8-int_cnt2);
-    jccb(Assembler::greater, ADJUST_STR);
-    // Fall through if matched whole substring.
-  } else { // non constant
-    assert(int_cnt2 == -1, "should be != 0");
-
-    addl(tmp, cnt2);
-    // Found result if we matched whole substring.
-    cmpl(tmp, 8);
-    jccb(Assembler::lessEqual, RET_FOUND);
-
-    // Repeat search for small substring (<= 8 chars)
-    // from new point 'str1' without reloading substring.
-    cmpl(cnt2, 8);
-    // Have to check that we don't read beyond string.
-    jccb(Assembler::lessEqual, ADJUST_STR);
-
-    Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG;
-    // Compare the rest of substring (> 8 chars).
-    movptr(str1, result);
-
-    cmpl(tmp, cnt2);
-    // First 8 chars are already matched.
-    jccb(Assembler::equal, CHECK_NEXT);
-
-    bind(SCAN_SUBSTR);
-    pcmpestri(vec, Address(str1, 0), 0x0d);
-    // Need to reload strings pointers if not matched whole vector
-    jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
-
-    bind(CHECK_NEXT);
-    subl(cnt2, 8);
-    jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring
-    addptr(str1, 16);
-    addptr(str2, 16);
-    subl(cnt1, 8);
-    cmpl(cnt2, 8); // Do not read beyond substring
-    jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR);
-    // Back-up strings to avoid reading beyond substring.
-    lea(str2, Address(str2, cnt2, Address::times_2, -16));
-    lea(str1, Address(str1, cnt2, Address::times_2, -16));
-    subl(cnt1, cnt2);
-    movl(cnt2, 8);
-    addl(cnt1, 8);
-    bind(CONT_SCAN_SUBSTR);
-    movdqu(vec, Address(str2, 0));
-    jmpb(SCAN_SUBSTR);
-
-    bind(RET_FOUND_LONG);
-    movptr(str1, Address(rsp, wordSize));
-  } // non constant
-
-  bind(RET_FOUND);
-  // Compute substr offset
-  subptr(result, str1);
-  shrl(result, 1); // index
-
-  bind(CLEANUP);
-  pop(rsp); // restore SP
-
-} // string_indexof
-
-// Compare strings.
-void MacroAssembler::string_compare(Register str1, Register str2,
-                                    Register cnt1, Register cnt2, Register result,
-                                    XMMRegister vec1) {
-  ShortBranchVerifier sbv(this);
-  Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
-
-  // Compute the minimum of the string lengths and the
-  // difference of the string lengths (stack).
-  // Do the conditional move stuff
-  movl(result, cnt1);
-  subl(cnt1, cnt2);
-  push(cnt1);
-  cmov32(Assembler::lessEqual, cnt2, result);
-
-  // Is the minimum length zero?
-  testl(cnt2, cnt2);
-  jcc(Assembler::zero, LENGTH_DIFF_LABEL);
-
-  // Load first characters
-  load_unsigned_short(result, Address(str1, 0));
-  load_unsigned_short(cnt1, Address(str2, 0));
-
-  // Compare first characters
-  subl(result, cnt1);
-  jcc(Assembler::notZero,  POP_LABEL);
-  decrementl(cnt2);
-  jcc(Assembler::zero, LENGTH_DIFF_LABEL);
-
-  {
-    // Check after comparing first character to see if strings are equivalent
-    Label LSkip2;
-    // Check if the strings start at same location
-    cmpptr(str1, str2);
-    jccb(Assembler::notEqual, LSkip2);
-
-    // Check if the length difference is zero (from stack)
-    cmpl(Address(rsp, 0), 0x0);
-    jcc(Assembler::equal,  LENGTH_DIFF_LABEL);
-
-    // Strings might not be equivalent
-    bind(LSkip2);
-  }
-
-  Address::ScaleFactor scale = Address::times_2;
-  int stride = 8;
-
-  // Advance to next element
-  addptr(str1, 16/stride);
-  addptr(str2, 16/stride);
-
-  if (UseSSE42Intrinsics) {
-    Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
-    int pcmpmask = 0x19;
-    // Setup to compare 16-byte vectors
-    movl(result, cnt2);
-    andl(cnt2, ~(stride - 1));   // cnt2 holds the vector count
-    jccb(Assembler::zero, COMPARE_TAIL);
-
-    lea(str1, Address(str1, result, scale));
-    lea(str2, Address(str2, result, scale));
-    negptr(result);
-
-    // pcmpestri
-    //   inputs:
-    //     vec1- substring
-    //     rax - negative string length (elements count)
-    //     mem - scaned string
-    //     rdx - string length (elements count)
-    //     pcmpmask - cmp mode: 11000 (string compare with negated result)
-    //               + 00 (unsigned bytes) or  + 01 (unsigned shorts)
-    //   outputs:
-    //     rcx - first mismatched element index
-    assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri");
-
-    bind(COMPARE_WIDE_VECTORS);
-    movdqu(vec1, Address(str1, result, scale));
-    pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
-    // After pcmpestri cnt1(rcx) contains mismatched element index
-
-    jccb(Assembler::below, VECTOR_NOT_EQUAL);  // CF==1
-    addptr(result, stride);
-    subptr(cnt2, stride);
-    jccb(Assembler::notZero, COMPARE_WIDE_VECTORS);
-
-    // compare wide vectors tail
-    testl(result, result);
-    jccb(Assembler::zero, LENGTH_DIFF_LABEL);
-
-    movl(cnt2, stride);
-    movl(result, stride);
-    negptr(result);
-    movdqu(vec1, Address(str1, result, scale));
-    pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
-    jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL);
-
-    // Mismatched characters in the vectors
-    bind(VECTOR_NOT_EQUAL);
-    addptr(result, cnt1);
-    movptr(cnt2, result);
-    load_unsigned_short(result, Address(str1, cnt2, scale));
-    load_unsigned_short(cnt1, Address(str2, cnt2, scale));
-    subl(result, cnt1);
-    jmpb(POP_LABEL);
-
-    bind(COMPARE_TAIL); // limit is zero
-    movl(cnt2, result);
-    // Fallthru to tail compare
-  }
-
-  // Shift str2 and str1 to the end of the arrays, negate min
-  lea(str1, Address(str1, cnt2, scale, 0));
-  lea(str2, Address(str2, cnt2, scale, 0));
-  negptr(cnt2);
-
-  // Compare the rest of the elements
-  bind(WHILE_HEAD_LABEL);
-  load_unsigned_short(result, Address(str1, cnt2, scale, 0));
-  load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0));
-  subl(result, cnt1);
-  jccb(Assembler::notZero, POP_LABEL);
-  increment(cnt2);
-  jccb(Assembler::notZero, WHILE_HEAD_LABEL);
-
-  // Strings are equal up to min length.  Return the length difference.
-  bind(LENGTH_DIFF_LABEL);
-  pop(result);
-  jmpb(DONE_LABEL);
-
-  // Discard the stored length difference
-  bind(POP_LABEL);
-  pop(cnt1);
-
-  // That's it
-  bind(DONE_LABEL);
-}
-
-// Compare char[] arrays aligned to 4 bytes or substrings.
-void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
-                                        Register limit, Register result, Register chr,
-                                        XMMRegister vec1, XMMRegister vec2) {
-  ShortBranchVerifier sbv(this);
-  Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
-
-  int length_offset  = arrayOopDesc::length_offset_in_bytes();
-  int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
-
-  // Check the input args
-  cmpptr(ary1, ary2);
-  jcc(Assembler::equal, TRUE_LABEL);
-
-  if (is_array_equ) {
-    // Need additional checks for arrays_equals.
-    testptr(ary1, ary1);
-    jcc(Assembler::zero, FALSE_LABEL);
-    testptr(ary2, ary2);
-    jcc(Assembler::zero, FALSE_LABEL);
-
-    // Check the lengths
-    movl(limit, Address(ary1, length_offset));
-    cmpl(limit, Address(ary2, length_offset));
-    jcc(Assembler::notEqual, FALSE_LABEL);
-  }
-
-  // count == 0
-  testl(limit, limit);
-  jcc(Assembler::zero, TRUE_LABEL);
-
-  if (is_array_equ) {
-    // Load array address
-    lea(ary1, Address(ary1, base_offset));
-    lea(ary2, Address(ary2, base_offset));
-  }
-
-  shll(limit, 1);      // byte count != 0
-  movl(result, limit); // copy
-
-  if (UseSSE42Intrinsics) {
-    // With SSE4.2, use double quad vector compare
-    Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
-
-    // Compare 16-byte vectors
-    andl(result, 0x0000000e);  //   tail count (in bytes)
-    andl(limit, 0xfffffff0);   // vector count (in bytes)
-    jccb(Assembler::zero, COMPARE_TAIL);
-
-    lea(ary1, Address(ary1, limit, Address::times_1));
-    lea(ary2, Address(ary2, limit, Address::times_1));
-    negptr(limit);
-
-    bind(COMPARE_WIDE_VECTORS);
-    movdqu(vec1, Address(ary1, limit, Address::times_1));
-    movdqu(vec2, Address(ary2, limit, Address::times_1));
-    pxor(vec1, vec2);
-
-    ptest(vec1, vec1);
-    jccb(Assembler::notZero, FALSE_LABEL);
-    addptr(limit, 16);
-    jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
-
-    testl(result, result);
-    jccb(Assembler::zero, TRUE_LABEL);
-
-    movdqu(vec1, Address(ary1, result, Address::times_1, -16));
-    movdqu(vec2, Address(ary2, result, Address::times_1, -16));
-    pxor(vec1, vec2);
-
-    ptest(vec1, vec1);
-    jccb(Assembler::notZero, FALSE_LABEL);
-    jmpb(TRUE_LABEL);
-
-    bind(COMPARE_TAIL); // limit is zero
-    movl(limit, result);
-    // Fallthru to tail compare
-  }
-
-  // Compare 4-byte vectors
-  andl(limit, 0xfffffffc); // vector count (in bytes)
-  jccb(Assembler::zero, COMPARE_CHAR);
-
-  lea(ary1, Address(ary1, limit, Address::times_1));
-  lea(ary2, Address(ary2, limit, Address::times_1));
-  negptr(limit);
-
-  bind(COMPARE_VECTORS);
-  movl(chr, Address(ary1, limit, Address::times_1));
-  cmpl(chr, Address(ary2, limit, Address::times_1));
-  jccb(Assembler::notEqual, FALSE_LABEL);
-  addptr(limit, 4);
-  jcc(Assembler::notZero, COMPARE_VECTORS);
-
-  // Compare trailing char (final 2 bytes), if any
-  bind(COMPARE_CHAR);
-  testl(result, 0x2);   // tail  char
-  jccb(Assembler::zero, TRUE_LABEL);
-  load_unsigned_short(chr, Address(ary1, 0));
-  load_unsigned_short(limit, Address(ary2, 0));
-  cmpl(chr, limit);
-  jccb(Assembler::notEqual, FALSE_LABEL);
-
-  bind(TRUE_LABEL);
-  movl(result, 1);   // return true
-  jmpb(DONE);
-
-  bind(FALSE_LABEL);
-  xorl(result, result); // return false
-
-  // That's it
-  bind(DONE);
-}
-
-void MacroAssembler::generate_fill(BasicType t, bool aligned,
-                                   Register to, Register value, Register count,
-                                   Register rtmp, XMMRegister xtmp) {
-  ShortBranchVerifier sbv(this);
-  assert_different_registers(to, value, count, rtmp);
-  Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
-  Label L_fill_2_bytes, L_fill_4_bytes;
-
-  int shift = -1;
-  switch (t) {
-    case T_BYTE:
-      shift = 2;
-      break;
-    case T_SHORT:
-      shift = 1;
-      break;
-    case T_INT:
-      shift = 0;
-      break;
-    default: ShouldNotReachHere();
-  }
-
-  if (t == T_BYTE) {
-    andl(value, 0xff);
-    movl(rtmp, value);
-    shll(rtmp, 8);
-    orl(value, rtmp);
-  }
-  if (t == T_SHORT) {
-    andl(value, 0xffff);
-  }
-  if (t == T_BYTE || t == T_SHORT) {
-    movl(rtmp, value);
-    shll(rtmp, 16);
-    orl(value, rtmp);
-  }
-
-  cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element
-  jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp
-  if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {
-    // align source address at 4 bytes address boundary
-    if (t == T_BYTE) {
-      // One byte misalignment happens only for byte arrays
-      testptr(to, 1);
-      jccb(Assembler::zero, L_skip_align1);
-      movb(Address(to, 0), value);
-      increment(to);
-      decrement(count);
-      BIND(L_skip_align1);
-    }
-    // Two bytes misalignment happens only for byte and short (char) arrays
-    testptr(to, 2);
-    jccb(Assembler::zero, L_skip_align2);
-    movw(Address(to, 0), value);
-    addptr(to, 2);
-    subl(count, 1<<(shift-1));
-    BIND(L_skip_align2);
-  }
-  if (UseSSE < 2) {
-    Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
-    // Fill 32-byte chunks
-    subl(count, 8 << shift);
-    jcc(Assembler::less, L_check_fill_8_bytes);
-    align(16);
-
-    BIND(L_fill_32_bytes_loop);
-
-    for (int i = 0; i < 32; i += 4) {
-      movl(Address(to, i), value);
-    }
-
-    addptr(to, 32);
-    subl(count, 8 << shift);
-    jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
-    BIND(L_check_fill_8_bytes);
-    addl(count, 8 << shift);
-    jccb(Assembler::zero, L_exit);
-    jmpb(L_fill_8_bytes);
-
-    //
-    // length is too short, just fill qwords
-    //
-    BIND(L_fill_8_bytes_loop);
-    movl(Address(to, 0), value);
-    movl(Address(to, 4), value);
-    addptr(to, 8);
-    BIND(L_fill_8_bytes);
-    subl(count, 1 << (shift + 1));
-    jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
-    // fall through to fill 4 bytes
-  } else {
-    Label L_fill_32_bytes;
-    if (!UseUnalignedLoadStores) {
-      // align to 8 bytes, we know we are 4 byte aligned to start
-      testptr(to, 4);
-      jccb(Assembler::zero, L_fill_32_bytes);
-      movl(Address(to, 0), value);
-      addptr(to, 4);
-      subl(count, 1<<shift);
-    }
-    BIND(L_fill_32_bytes);
-    {
-      assert( UseSSE >= 2, "supported cpu only" );
-      Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
-      // Fill 32-byte chunks
-      movdl(xtmp, value);
-      pshufd(xtmp, xtmp, 0);
-
-      subl(count, 8 << shift);
-      jcc(Assembler::less, L_check_fill_8_bytes);
-      align(16);
-
-      BIND(L_fill_32_bytes_loop);
-
-      if (UseUnalignedLoadStores) {
-        movdqu(Address(to, 0), xtmp);
-        movdqu(Address(to, 16), xtmp);
-      } else {
-        movq(Address(to, 0), xtmp);
-        movq(Address(to, 8), xtmp);
-        movq(Address(to, 16), xtmp);
-        movq(Address(to, 24), xtmp);
-      }
-
-      addptr(to, 32);
-      subl(count, 8 << shift);
-      jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
-      BIND(L_check_fill_8_bytes);
-      addl(count, 8 << shift);
-      jccb(Assembler::zero, L_exit);
-      jmpb(L_fill_8_bytes);
-
-      //
-      // length is too short, just fill qwords
-      //
-      BIND(L_fill_8_bytes_loop);
-      movq(Address(to, 0), xtmp);
-      addptr(to, 8);
-      BIND(L_fill_8_bytes);
-      subl(count, 1 << (shift + 1));
-      jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
-    }
-  }
-  // fill trailing 4 bytes
-  BIND(L_fill_4_bytes);
-  testl(count, 1<<shift);
-  jccb(Assembler::zero, L_fill_2_bytes);
-  movl(Address(to, 0), value);
-  if (t == T_BYTE || t == T_SHORT) {
-    addptr(to, 4);
-    BIND(L_fill_2_bytes);
-    // fill trailing 2 bytes
-    testl(count, 1<<(shift-1));
-    jccb(Assembler::zero, L_fill_byte);
-    movw(Address(to, 0), value);
-    if (t == T_BYTE) {
-      addptr(to, 2);
-      BIND(L_fill_byte);
-      // fill trailing byte
-      testl(count, 1);
-      jccb(Assembler::zero, L_exit);
-      movb(Address(to, 0), value);
-    } else {
-      BIND(L_fill_byte);
-    }
-  } else {
-    BIND(L_fill_2_bytes);
-  }
-  BIND(L_exit);
-}
-#undef BIND
-#undef BLOCK_COMMENT
-
-
-Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
-  switch (cond) {
-    // Note some conditions are synonyms for others
-    case Assembler::zero:         return Assembler::notZero;
-    case Assembler::notZero:      return Assembler::zero;
-    case Assembler::less:         return Assembler::greaterEqual;
-    case Assembler::lessEqual:    return Assembler::greater;
-    case Assembler::greater:      return Assembler::lessEqual;
-    case Assembler::greaterEqual: return Assembler::less;
-    case Assembler::below:        return Assembler::aboveEqual;
-    case Assembler::belowEqual:   return Assembler::above;
-    case Assembler::above:        return Assembler::belowEqual;
-    case Assembler::aboveEqual:   return Assembler::below;
-    case Assembler::overflow:     return Assembler::noOverflow;
-    case Assembler::noOverflow:   return Assembler::overflow;
-    case Assembler::negative:     return Assembler::positive;
-    case Assembler::positive:     return Assembler::negative;
-    case Assembler::parity:       return Assembler::noParity;
-    case Assembler::noParity:     return Assembler::parity;
-  }
-  ShouldNotReachHere(); return Assembler::overflow;
-}
-
-SkipIfEqual::SkipIfEqual(
-    MacroAssembler* masm, const bool* flag_addr, bool value) {
-  _masm = masm;
-  _masm->cmp8(ExternalAddress((address)flag_addr), value);
-  _masm->jcc(Assembler::equal, _label);
-}
-
-SkipIfEqual::~SkipIfEqual() {
-  _masm->bind(_label);
-}
--- a/src/cpu/x86/vm/assembler_x86.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -25,6 +25,8 @@
 #ifndef CPU_X86_VM_ASSEMBLER_X86_HPP
 #define CPU_X86_VM_ASSEMBLER_X86_HPP
 
+#include "asm/register.hpp"
+
 class BiasedLockingCounters;
 
 // Contains all the definitions needed for x86 assembly code generation.
@@ -706,8 +708,6 @@
   void check_relocation(RelocationHolder const& rspec, int format);
   #endif
 
-  inline void emit_long64(jlong x);
-
   void emit_data(jint data, relocInfo::relocType    rtype, int format);
   void emit_data(jint data, RelocationHolder const& rspec, int format);
   void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0);
@@ -832,7 +832,8 @@
 
   // These do register sized moves/scans
   void rep_mov();
-  void rep_set();
+  void rep_stos();
+  void rep_stosb();
   void repne_scan();
 #ifdef _LP64
   void repne_scanl();
@@ -875,6 +876,17 @@
   void addss(XMMRegister dst, Address src);
   void addss(XMMRegister dst, XMMRegister src);
 
+  // AES instructions
+  void aesdec(XMMRegister dst, Address src);
+  void aesdec(XMMRegister dst, XMMRegister src);
+  void aesdeclast(XMMRegister dst, Address src);
+  void aesdeclast(XMMRegister dst, XMMRegister src);
+  void aesenc(XMMRegister dst, Address src);
+  void aesenc(XMMRegister dst, XMMRegister src);
+  void aesenclast(XMMRegister dst, Address src);
+  void aesenclast(XMMRegister dst, XMMRegister src);
+
+
   void andl(Address  dst, int32_t imm32);
   void andl(Register dst, int32_t imm32);
   void andl(Register dst, Address src);
@@ -905,7 +917,7 @@
 
   void cdqq();
 
-  void cld() { emit_byte(0xfc); }
+  void cld();
 
   void clflush(Address adr);
 
@@ -952,10 +964,7 @@
   void comiss(XMMRegister dst, XMMRegister src);
 
   // Identify processor type and features
-  void cpuid() {
-    emit_byte(0x0F);
-    emit_byte(0xA2);
-  }
+  void cpuid();
 
   // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
   void cvtsd2ss(XMMRegister dst, XMMRegister src);
@@ -1200,11 +1209,7 @@
 
   void leaq(Register dst, Address src);
 
-  void lfence() {
-    emit_byte(0x0F);
-    emit_byte(0xAE);
-    emit_byte(0xE8);
-  }
+  void lfence();
 
   void lock();
 
@@ -1390,6 +1395,10 @@
   // Pack with unsigned saturation
   void packuswb(XMMRegister dst, XMMRegister src);
   void packuswb(XMMRegister dst, Address src);
+  void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+
+  // Pemutation of 64bit words
+  void vpermq(XMMRegister dst, XMMRegister src, int imm8, bool vector256);
 
   // SSE4.2 string instructions
   void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
@@ -1424,6 +1433,10 @@
   void prefetcht2(Address src);
   void prefetchw(Address src);
 
+  // Shuffle Bytes
+  void pshufb(XMMRegister dst, XMMRegister src);
+  void pshufb(XMMRegister dst, Address src);
+
   // Shuffle Packed Doublewords
   void pshufd(XMMRegister dst, XMMRegister src, int mode);
   void pshufd(XMMRegister dst, Address src,     int mode);
@@ -1435,9 +1448,12 @@
   // Shift Right by bytes Logical DoubleQuadword Immediate
   void psrldq(XMMRegister dst, int shift);
 
-  // Logical Compare Double Quadword
+  // Logical Compare 128bit
   void ptest(XMMRegister dst, XMMRegister src);
   void ptest(XMMRegister dst, Address src);
+  // Logical Compare 256bit
+  void vptest(XMMRegister dst, XMMRegister src);
+  void vptest(XMMRegister dst, Address src);
 
   // Interleave Low Bytes
   void punpcklbw(XMMRegister dst, XMMRegister src);
@@ -1508,7 +1524,7 @@
   void sqrtss(XMMRegister dst, Address src);
   void sqrtss(XMMRegister dst, XMMRegister src);
 
-  void std() { emit_byte(0xfd); }
+  void std();
 
   void stmxcsr( Address dst );
 
@@ -1565,11 +1581,7 @@
   void xchgq(Register dst, Register src);
 
   // Get Value of Extended Control Register
-  void xgetbv() {
-    emit_byte(0x0F);
-    emit_byte(0x01);
-    emit_byte(0xD0);
-  }
+  void xgetbv();
 
   void xorl(Register dst, int32_t imm32);
   void xorl(Register dst, Address src);
@@ -1749,6 +1761,9 @@
   void vextractf128h(Address dst, XMMRegister src);
   void vextracti128h(Address dst, XMMRegister src);
 
+  // duplicate 4-bytes integer data from src into 8 locations in dest
+  void vpbroadcastd(XMMRegister dst, XMMRegister src);
+
   // AVX instruction which is used to clear upper 128 bits of YMM registers and
   // to avoid transaction penalty between AVX and SSE states. There is no
   // penalty if legacy SSE instructions are encoded using VEX prefix because
@@ -1766,1104 +1781,4 @@
 
 };
 
-
-// MacroAssembler extends Assembler by frequently used macros.
-//
-// Instructions for which a 'better' code sequence exists depending
-// on arguments should also go in here.
-
-class MacroAssembler: public Assembler {
-  friend class LIR_Assembler;
-  friend class Runtime1;      // as_Address()
-
- protected:
-
-  Address as_Address(AddressLiteral adr);
-  Address as_Address(ArrayAddress adr);
-
-  // Support for VM calls
-  //
-  // This is the base routine called by the different versions of call_VM_leaf. The interpreter
-  // may customize this version by overriding it for its purposes (e.g., to save/restore
-  // additional registers when doing a VM call).
-#ifdef CC_INTERP
-  // c++ interpreter never wants to use interp_masm version of call_VM
-  #define VIRTUAL
-#else
-  #define VIRTUAL virtual
-#endif
-
-  VIRTUAL void call_VM_leaf_base(
-    address entry_point,               // the entry point
-    int     number_of_arguments        // the number of arguments to pop after the call
-  );
-
-  // This is the base routine called by the different versions of call_VM. The interpreter
-  // may customize this version by overriding it for its purposes (e.g., to save/restore
-  // additional registers when doing a VM call).
-  //
-  // If no java_thread register is specified (noreg) than rdi will be used instead. call_VM_base
-  // returns the register which contains the thread upon return. If a thread register has been
-  // specified, the return value will correspond to that register. If no last_java_sp is specified
-  // (noreg) than rsp will be used instead.
-  VIRTUAL void call_VM_base(           // returns the register containing the thread upon return
-    Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
-    Register java_thread,              // the thread if computed before     ; use noreg otherwise
-    Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
-    address  entry_point,              // the entry point
-    int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
-    bool     check_exceptions          // whether to check for pending exceptions after return
-  );
-
-  // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code.
-  // The implementation is only non-empty for the InterpreterMacroAssembler,
-  // as only the interpreter handles PopFrame and ForceEarlyReturn requests.
-  virtual void check_and_handle_popframe(Register java_thread);
-  virtual void check_and_handle_earlyret(Register java_thread);
-
-  void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true);
-
-  // helpers for FPU flag access
-  // tmp is a temporary register, if none is available use noreg
-  void save_rax   (Register tmp);
-  void restore_rax(Register tmp);
-
- public:
-  MacroAssembler(CodeBuffer* code) : Assembler(code) {}
-
-  // Support for NULL-checks
-  //
-  // Generates code that causes a NULL OS exception if the content of reg is NULL.
-  // If the accessed location is M[reg + offset] and the offset is known, provide the
-  // offset. No explicit code generation is needed if the offset is within a certain
-  // range (0 <= offset <= page_size).
-
-  void null_check(Register reg, int offset = -1);
-  static bool needs_explicit_null_check(intptr_t offset);
-
-  // Required platform-specific helpers for Label::patch_instructions.
-  // They _shadow_ the declarations in AbstractAssembler, which are undefined.
-  void pd_patch_instruction(address branch, address target);
-#ifndef PRODUCT
-  static void pd_print_patched_instruction(address branch);
-#endif
-
-  // The following 4 methods return the offset of the appropriate move instruction
-
-  // Support for fast byte/short loading with zero extension (depending on particular CPU)
-  int load_unsigned_byte(Register dst, Address src);
-  int load_unsigned_short(Register dst, Address src);
-
-  // Support for fast byte/short loading with sign extension (depending on particular CPU)
-  int load_signed_byte(Register dst, Address src);
-  int load_signed_short(Register dst, Address src);
-
-  // Support for sign-extension (hi:lo = extend_sign(lo))
-  void extend_sign(Register hi, Register lo);
-
-  // Load and store values by size and signed-ness
-  void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
-  void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
-
-  // Support for inc/dec with optimal instruction selection depending on value
-
-  void increment(Register reg, int value = 1) { LP64_ONLY(incrementq(reg, value)) NOT_LP64(incrementl(reg, value)) ; }
-  void decrement(Register reg, int value = 1) { LP64_ONLY(decrementq(reg, value)) NOT_LP64(decrementl(reg, value)) ; }
-
-  void decrementl(Address dst, int value = 1);
-  void decrementl(Register reg, int value = 1);
-
-  void decrementq(Register reg, int value = 1);
-  void decrementq(Address dst, int value = 1);
-
-  void incrementl(Address dst, int value = 1);
-  void incrementl(Register reg, int value = 1);
-
-  void incrementq(Register reg, int value = 1);
-  void incrementq(Address dst, int value = 1);
-
-
-  // Support optimal SSE move instructions.
-  void movflt(XMMRegister dst, XMMRegister src) {
-    if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; }
-    else                       { movss (dst, src); return; }
-  }
-  void movflt(XMMRegister dst, Address src) { movss(dst, src); }
-  void movflt(XMMRegister dst, AddressLiteral src);
-  void movflt(Address dst, XMMRegister src) { movss(dst, src); }
-
-  void movdbl(XMMRegister dst, XMMRegister src) {
-    if (UseXmmRegToRegMoveAll) { movapd(dst, src); return; }
-    else                       { movsd (dst, src); return; }
-  }
-
-  void movdbl(XMMRegister dst, AddressLiteral src);
-
-  void movdbl(XMMRegister dst, Address src) {
-    if (UseXmmLoadAndClearUpper) { movsd (dst, src); return; }
-    else                         { movlpd(dst, src); return; }
-  }
-  void movdbl(Address dst, XMMRegister src) { movsd(dst, src); }
-
-  void incrementl(AddressLiteral dst);
-  void incrementl(ArrayAddress dst);
-
-  // Alignment
-  void align(int modulus);
-
-  // A 5 byte nop that is safe for patching (see patch_verified_entry)
-  void fat_nop();
-
-  // Stack frame creation/removal
-  void enter();
-  void leave();
-
-  // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
-  // The pointer will be loaded into the thread register.
-  void get_thread(Register thread);
-
-
-  // Support for VM calls
-  //
-  // It is imperative that all calls into the VM are handled via the call_VM macros.
-  // They make sure that the stack linkage is setup correctly. call_VM's correspond
-  // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
-
-
-  void call_VM(Register oop_result,
-               address entry_point,
-               bool check_exceptions = true);
-  void call_VM(Register oop_result,
-               address entry_point,
-               Register arg_1,
-               bool check_exceptions = true);
-  void call_VM(Register oop_result,
-               address entry_point,
-               Register arg_1, Register arg_2,
-               bool check_exceptions = true);
-  void call_VM(Register oop_result,
-               address entry_point,
-               Register arg_1, Register arg_2, Register arg_3,
-               bool check_exceptions = true);
-
-  // Overloadings with last_Java_sp
-  void call_VM(Register oop_result,
-               Register last_java_sp,
-               address entry_point,
-               int number_of_arguments = 0,
-               bool check_exceptions = true);
-  void call_VM(Register oop_result,
-               Register last_java_sp,
-               address entry_point,
-               Register arg_1, bool
-               check_exceptions = true);
-  void call_VM(Register oop_result,
-               Register last_java_sp,
-               address entry_point,
-               Register arg_1, Register arg_2,
-               bool check_exceptions = true);
-  void call_VM(Register oop_result,
-               Register last_java_sp,
-               address entry_point,
-               Register arg_1, Register arg_2, Register arg_3,
-               bool check_exceptions = true);
-
-  void get_vm_result  (Register oop_result, Register thread);
-  void get_vm_result_2(Register metadata_result, Register thread);
-
-  // These always tightly bind to MacroAssembler::call_VM_base
-  // bypassing the virtual implementation
-  void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true);
-  void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true);
-  void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true);
-  void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true);
-  void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4, bool check_exceptions = true);
-
-  void call_VM_leaf(address entry_point,
-                    int number_of_arguments = 0);
-  void call_VM_leaf(address entry_point,
-                    Register arg_1);
-  void call_VM_leaf(address entry_point,
-                    Register arg_1, Register arg_2);
-  void call_VM_leaf(address entry_point,
-                    Register arg_1, Register arg_2, Register arg_3);
-
-  // These always tightly bind to MacroAssembler::call_VM_leaf_base
-  // bypassing the virtual implementation
-  void super_call_VM_leaf(address entry_point);
-  void super_call_VM_leaf(address entry_point, Register arg_1);
-  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
-  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
-  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4);
-
-  // last Java Frame (fills frame anchor)
-  void set_last_Java_frame(Register thread,
-                           Register last_java_sp,
-                           Register last_java_fp,
-                           address last_java_pc);
-
-  // thread in the default location (r15_thread on 64bit)
-  void set_last_Java_frame(Register last_java_sp,
-                           Register last_java_fp,
-                           address last_java_pc);
-
-  void reset_last_Java_frame(Register thread, bool clear_fp, bool clear_pc);
-
-  // thread in the default location (r15_thread on 64bit)
-  void reset_last_Java_frame(bool clear_fp, bool clear_pc);
-
-  // Stores
-  void store_check(Register obj);                // store check for obj - register is destroyed afterwards
-  void store_check(Register obj, Address dst);   // same as above, dst is exact store location (reg. is destroyed)
-
-#ifndef SERIALGC
-
-  void g1_write_barrier_pre(Register obj,
-                            Register pre_val,
-                            Register thread,
-                            Register tmp,
-                            bool tosca_live,
-                            bool expand_call);
-
-  void g1_write_barrier_post(Register store_addr,
-                             Register new_val,
-                             Register thread,
-                             Register tmp,
-                             Register tmp2);
-
-#endif // SERIALGC
-
-  // split store_check(Register obj) to enhance instruction interleaving
-  void store_check_part_1(Register obj);
-  void store_check_part_2(Register obj);
-
-  // C 'boolean' to Java boolean: x == 0 ? 0 : 1
-  void c2bool(Register x);
-
-  // C++ bool manipulation
-
-  void movbool(Register dst, Address src);
-  void movbool(Address dst, bool boolconst);
-  void movbool(Address dst, Register src);
-  void testbool(Register dst);
-
-  // oop manipulations
-  void load_klass(Register dst, Register src);
-  void store_klass(Register dst, Register src);
-
-  void load_heap_oop(Register dst, Address src);
-  void load_heap_oop_not_null(Register dst, Address src);
-  void store_heap_oop(Address dst, Register src);
-  void cmp_heap_oop(Register src1, Address src2, Register tmp = noreg);
-
-  // Used for storing NULL. All other oop constants should be
-  // stored using routines that take a jobject.
-  void store_heap_oop_null(Address dst);
-
-  void load_prototype_header(Register dst, Register src);
-
-#ifdef _LP64
-  void store_klass_gap(Register dst, Register src);
-
-  // This dummy is to prevent a call to store_heap_oop from
-  // converting a zero (like NULL) into a Register by giving
-  // the compiler two choices it can't resolve
-
-  void store_heap_oop(Address dst, void* dummy);
-
-  void encode_heap_oop(Register r);
-  void decode_heap_oop(Register r);
-  void encode_heap_oop_not_null(Register r);
-  void decode_heap_oop_not_null(Register r);
-  void encode_heap_oop_not_null(Register dst, Register src);
-  void decode_heap_oop_not_null(Register dst, Register src);
-
-  void set_narrow_oop(Register dst, jobject obj);
-  void set_narrow_oop(Address dst, jobject obj);
-  void cmp_narrow_oop(Register dst, jobject obj);
-  void cmp_narrow_oop(Address dst, jobject obj);
-
-  void encode_klass_not_null(Register r);
-  void decode_klass_not_null(Register r);
-  void encode_klass_not_null(Register dst, Register src);
-  void decode_klass_not_null(Register dst, Register src);
-  void set_narrow_klass(Register dst, Klass* k);
-  void set_narrow_klass(Address dst, Klass* k);
-  void cmp_narrow_klass(Register dst, Klass* k);
-  void cmp_narrow_klass(Address dst, Klass* k);
-
-  // if heap base register is used - reinit it with the correct value
-  void reinit_heapbase();
-
-  DEBUG_ONLY(void verify_heapbase(const char* msg);)
-
-#endif // _LP64
-
-  // Int division/remainder for Java
-  // (as idivl, but checks for special case as described in JVM spec.)
-  // returns idivl instruction offset for implicit exception handling
-  int corrected_idivl(Register reg);
-
-  // Long division/remainder for Java
-  // (as idivq, but checks for special case as described in JVM spec.)
-  // returns idivq instruction offset for implicit exception handling
-  int corrected_idivq(Register reg);
-
-  void int3();
-
-  // Long operation macros for a 32bit cpu
-  // Long negation for Java
-  void lneg(Register hi, Register lo);
-
-  // Long multiplication for Java
-  // (destroys contents of eax, ebx, ecx and edx)
-  void lmul(int x_rsp_offset, int y_rsp_offset); // rdx:rax = x * y
-
-  // Long shifts for Java
-  // (semantics as described in JVM spec.)
-  void lshl(Register hi, Register lo);                               // hi:lo << (rcx & 0x3f)
-  void lshr(Register hi, Register lo, bool sign_extension = false);  // hi:lo >> (rcx & 0x3f)
-
-  // Long compare for Java
-  // (semantics as described in JVM spec.)
-  void lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo); // x_hi = lcmp(x, y)
-
-
-  // misc
-
-  // Sign extension
-  void sign_extend_short(Register reg);
-  void sign_extend_byte(Register reg);
-
-  // Division by power of 2, rounding towards 0
-  void division_with_shift(Register reg, int shift_value);
-
-  // Compares the top-most stack entries on the FPU stack and sets the eflags as follows:
-  //
-  // CF (corresponds to C0) if x < y
-  // PF (corresponds to C2) if unordered
-  // ZF (corresponds to C3) if x = y
-  //
-  // The arguments are in reversed order on the stack (i.e., top of stack is first argument).
-  // tmp is a temporary register, if none is available use noreg (only matters for non-P6 code)
-  void fcmp(Register tmp);
-  // Variant of the above which allows y to be further down the stack
-  // and which only pops x and y if specified. If pop_right is
-  // specified then pop_left must also be specified.
-  void fcmp(Register tmp, int index, bool pop_left, bool pop_right);
-
-  // Floating-point comparison for Java
-  // Compares the top-most stack entries on the FPU stack and stores the result in dst.
-  // The arguments are in reversed order on the stack (i.e., top of stack is first argument).
-  // (semantics as described in JVM spec.)
-  void fcmp2int(Register dst, bool unordered_is_less);
-  // Variant of the above which allows y to be further down the stack
-  // and which only pops x and y if specified. If pop_right is
-  // specified then pop_left must also be specified.
-  void fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right);
-
-  // Floating-point remainder for Java (ST0 = ST0 fremr ST1, ST1 is empty afterwards)
-  // tmp is a temporary register, if none is available use noreg
-  void fremr(Register tmp);
-
-
-  // same as fcmp2int, but using SSE2
-  void cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less);
-  void cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less);
-
-  // Inlined sin/cos generator for Java; must not use CPU instruction
-  // directly on Intel as it does not have high enough precision
-  // outside of the range [-pi/4, pi/4]. Extra argument indicate the
-  // number of FPU stack slots in use; all but the topmost will
-  // require saving if a slow case is necessary. Assumes argument is
-  // on FP TOS; result is on FP TOS.  No cpu registers are changed by
-  // this code.
-  void trigfunc(char trig, int num_fpu_regs_in_use = 1);
-
-  // branch to L if FPU flag C2 is set/not set
-  // tmp is a temporary register, if none is available use noreg
-  void jC2 (Register tmp, Label& L);
-  void jnC2(Register tmp, Label& L);
-
-  // Pop ST (ffree & fincstp combined)
-  void fpop();
-
-  // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack
-  void push_fTOS();
-
-  // pops double TOS element from CPU stack and pushes on FPU stack
-  void pop_fTOS();
-
-  void empty_FPU_stack();
-
-  void push_IU_state();
-  void pop_IU_state();
-
-  void push_FPU_state();
-  void pop_FPU_state();
-
-  void push_CPU_state();
-  void pop_CPU_state();
-
-  // Round up to a power of two
-  void round_to(Register reg, int modulus);
-
-  // Callee saved registers handling
-  void push_callee_saved_registers();
-  void pop_callee_saved_registers();
-
-  // allocation
-  void eden_allocate(
-    Register obj,                      // result: pointer to object after successful allocation
-    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
-    int      con_size_in_bytes,        // object size in bytes if   known at compile time
-    Register t1,                       // temp register
-    Label&   slow_case                 // continuation point if fast allocation fails
-  );
-  void tlab_allocate(
-    Register obj,                      // result: pointer to object after successful allocation
-    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
-    int      con_size_in_bytes,        // object size in bytes if   known at compile time
-    Register t1,                       // temp register
-    Register t2,                       // temp register
-    Label&   slow_case                 // continuation point if fast allocation fails
-  );
-  Register tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case); // returns TLS address
-  void incr_allocated_bytes(Register thread,
-                            Register var_size_in_bytes, int con_size_in_bytes,
-                            Register t1 = noreg);
-
-  // interface method calling
-  void lookup_interface_method(Register recv_klass,
-                               Register intf_klass,
-                               RegisterOrConstant itable_index,
-                               Register method_result,
-                               Register scan_temp,
-                               Label& no_such_interface);
-
-  // virtual method calling
-  void lookup_virtual_method(Register recv_klass,
-                             RegisterOrConstant vtable_index,
-                             Register method_result);
-
-  // Test sub_klass against super_klass, with fast and slow paths.
-
-  // The fast path produces a tri-state answer: yes / no / maybe-slow.
-  // One of the three labels can be NULL, meaning take the fall-through.
-  // If super_check_offset is -1, the value is loaded up from super_klass.
-  // No registers are killed, except temp_reg.
-  void check_klass_subtype_fast_path(Register sub_klass,
-                                     Register super_klass,
-                                     Register temp_reg,
-                                     Label* L_success,
-                                     Label* L_failure,
-                                     Label* L_slow_path,
-                RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
-
-  // The rest of the type check; must be wired to a corresponding fast path.
-  // It does not repeat the fast path logic, so don't use it standalone.
-  // The temp_reg and temp2_reg can be noreg, if no temps are available.
-  // Updates the sub's secondary super cache as necessary.
-  // If set_cond_codes, condition codes will be Z on success, NZ on failure.
-  void check_klass_subtype_slow_path(Register sub_klass,
-                                     Register super_klass,
-                                     Register temp_reg,
-                                     Register temp2_reg,
-                                     Label* L_success,
-                                     Label* L_failure,
-                                     bool set_cond_codes = false);
-
-  // Simplified, combined version, good for typical uses.
-  // Falls through on failure.
-  void check_klass_subtype(Register sub_klass,
-                           Register super_klass,
-                           Register temp_reg,
-                           Label& L_success);
-
-  // method handles (JSR 292)
-  Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
-
-  //----
-  void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0
-
-  // Debugging
-
-  // only if +VerifyOops
-  // TODO: Make these macros with file and line like sparc version!
-  void verify_oop(Register reg, const char* s = "broken oop");
-  void verify_oop_addr(Address addr, const char * s = "broken oop addr");
-
-  // TODO: verify method and klass metadata (compare against vptr?)
-  void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
-  void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){}
-
-#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
-#define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
-
-  // only if +VerifyFPU
-  void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
-
-  // prints msg, dumps registers and stops execution
-  void stop(const char* msg);
-
-  // prints msg and continues
-  void warn(const char* msg);
-
-  // dumps registers and other state
-  void print_state();
-
-  static void debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg);
-  static void debug64(char* msg, int64_t pc, int64_t regs[]);
-  static void print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip);
-  static void print_state64(int64_t pc, int64_t regs[]);
-
-  void os_breakpoint();
-
-  void untested()                                { stop("untested"); }
-
-  void unimplemented(const char* what = "")      { char* b = new char[1024];  jio_snprintf(b, 1024, "unimplemented: %s", what);  stop(b); }
-
-  void should_not_reach_here()                   { stop("should not reach here"); }
-
-  void print_CPU_state();
-
-  // Stack overflow checking
-  void bang_stack_with_offset(int offset) {
-    // stack grows down, caller passes positive offset
-    assert(offset > 0, "must bang with negative offset");
-    movl(Address(rsp, (-offset)), rax);
-  }
-
-  // Writes to stack successive pages until offset reached to check for
-  // stack overflow + shadow pages.  Also, clobbers tmp
-  void bang_stack_size(Register size, Register tmp);
-
-  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
-                                                Register tmp,
-                                                int offset);
-
-  // Support for serializing memory accesses between threads
-  void serialize_memory(Register thread, Register tmp);
-
-  void verify_tlab();
-
-  // Biased locking support
-  // lock_reg and obj_reg must be loaded up with the appropriate values.
-  // swap_reg must be rax, and is killed.
-  // tmp_reg is optional. If it is supplied (i.e., != noreg) it will
-  // be killed; if not supplied, push/pop will be used internally to
-  // allocate a temporary (inefficient, avoid if possible).
-  // Optional slow case is for implementations (interpreter and C1) which branch to
-  // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
-  // Returns offset of first potentially-faulting instruction for null
-  // check info (currently consumed only by C1). If
-  // swap_reg_contains_mark is true then returns -1 as it is assumed
-  // the calling code has already passed any potential faults.
-  int biased_locking_enter(Register lock_reg, Register obj_reg,
-                           Register swap_reg, Register tmp_reg,
-                           bool swap_reg_contains_mark,
-                           Label& done, Label* slow_case = NULL,
-                           BiasedLockingCounters* counters = NULL);
-  void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done);
-
-
-  Condition negate_condition(Condition cond);
-
-  // Instructions that use AddressLiteral operands. These instruction can handle 32bit/64bit
-  // operands. In general the names are modified to avoid hiding the instruction in Assembler
-  // so that we don't need to implement all the varieties in the Assembler with trivial wrappers
-  // here in MacroAssembler. The major exception to this rule is call
-
-  // Arithmetics
-
-
-  void addptr(Address dst, int32_t src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)) ; }
-  void addptr(Address dst, Register src);
-
-  void addptr(Register dst, Address src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); }
-  void addptr(Register dst, int32_t src);
-  void addptr(Register dst, Register src);
-  void addptr(Register dst, RegisterOrConstant src) {
-    if (src.is_constant()) addptr(dst, (int) src.as_constant());
-    else                   addptr(dst,       src.as_register());
-  }
-
-  void andptr(Register dst, int32_t src);
-  void andptr(Register src1, Register src2) { LP64_ONLY(andq(src1, src2)) NOT_LP64(andl(src1, src2)) ; }
-
-  void cmp8(AddressLiteral src1, int imm);
-
-  // renamed to drag out the casting of address to int32_t/intptr_t
-  void cmp32(Register src1, int32_t imm);
-
-  void cmp32(AddressLiteral src1, int32_t imm);
-  // compare reg - mem, or reg - &mem
-  void cmp32(Register src1, AddressLiteral src2);
-
-  void cmp32(Register src1, Address src2);
-
-#ifndef _LP64
-  void cmpklass(Address dst, Metadata* obj);
-  void cmpklass(Register dst, Metadata* obj);
-  void cmpoop(Address dst, jobject obj);
-  void cmpoop(Register dst, jobject obj);
-#endif // _LP64
-
-  // NOTE src2 must be the lval. This is NOT an mem-mem compare
-  void cmpptr(Address src1, AddressLiteral src2);
-
-  void cmpptr(Register src1, AddressLiteral src2);
-
-  void cmpptr(Register src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
-  void cmpptr(Register src1, Address src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
-  // void cmpptr(Address src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
-
-  void cmpptr(Register src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
-  void cmpptr(Address src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
-
-  // cmp64 to avoild hiding cmpq
-  void cmp64(Register src1, AddressLiteral src);
-
-  void cmpxchgptr(Register reg, Address adr);
-
-  void locked_cmpxchgptr(Register reg, AddressLiteral adr);
-
-
-  void imulptr(Register dst, Register src) { LP64_ONLY(imulq(dst, src)) NOT_LP64(imull(dst, src)); }
-
-
-  void negptr(Register dst) { LP64_ONLY(negq(dst)) NOT_LP64(negl(dst)); }
-
-  void notptr(Register dst) { LP64_ONLY(notq(dst)) NOT_LP64(notl(dst)); }
-
-  void shlptr(Register dst, int32_t shift);
-  void shlptr(Register dst) { LP64_ONLY(shlq(dst)) NOT_LP64(shll(dst)); }
-
-  void shrptr(Register dst, int32_t shift);
-  void shrptr(Register dst) { LP64_ONLY(shrq(dst)) NOT_LP64(shrl(dst)); }
-
-  void sarptr(Register dst) { LP64_ONLY(sarq(dst)) NOT_LP64(sarl(dst)); }
-  void sarptr(Register dst, int32_t src) { LP64_ONLY(sarq(dst, src)) NOT_LP64(sarl(dst, src)); }
-
-  void subptr(Address dst, int32_t src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); }
-
-  void subptr(Register dst, Address src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); }
-  void subptr(Register dst, int32_t src);
-  // Force generation of a 4 byte immediate value even if it fits into 8bit
-  void subptr_imm32(Register dst, int32_t src);
-  void subptr(Register dst, Register src);
-  void subptr(Register dst, RegisterOrConstant src) {
-    if (src.is_constant()) subptr(dst, (int) src.as_constant());
-    else                   subptr(dst,       src.as_register());
-  }
-
-  void sbbptr(Address dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); }
-  void sbbptr(Register dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); }
-
-  void xchgptr(Register src1, Register src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; }
-  void xchgptr(Register src1, Address src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; }
-
-  void xaddptr(Address src1, Register src2) { LP64_ONLY(xaddq(src1, src2)) NOT_LP64(xaddl(src1, src2)) ; }
-
-
-
-  // Helper functions for statistics gathering.
-  // Conditionally (atomically, on MPs) increments passed counter address, preserving condition codes.
-  void cond_inc32(Condition cond, AddressLiteral counter_addr);
-  // Unconditional atomic increment.
-  void atomic_incl(AddressLiteral counter_addr);
-
-  void lea(Register dst, AddressLiteral adr);
-  void lea(Address dst, AddressLiteral adr);
-  void lea(Register dst, Address adr) { Assembler::lea(dst, adr); }
-
-  void leal32(Register dst, Address src) { leal(dst, src); }
-
-  // Import other testl() methods from the parent class or else
-  // they will be hidden by the following overriding declaration.
-  using Assembler::testl;
-  void testl(Register dst, AddressLiteral src);
-
-  void orptr(Register dst, Address src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); }
-  void orptr(Register dst, Register src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); }
-  void orptr(Register dst, int32_t src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); }
-
-  void testptr(Register src, int32_t imm32) {  LP64_ONLY(testq(src, imm32)) NOT_LP64(testl(src, imm32)); }
-  void testptr(Register src1, Register src2);
-
-  void xorptr(Register dst, Register src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); }
-  void xorptr(Register dst, Address src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); }
-
-  // Calls
-
-  void call(Label& L, relocInfo::relocType rtype);
-  void call(Register entry);
-
-  // NOTE: this call tranfers to the effective address of entry NOT
-  // the address contained by entry. This is because this is more natural
-  // for jumps/calls.
-  void call(AddressLiteral entry);
-
-  // Emit the CompiledIC call idiom
-  void ic_call(address entry);
-
-  // Jumps
-
-  // NOTE: these jumps tranfer to the effective address of dst NOT
-  // the address contained by dst. This is because this is more natural
-  // for jumps/calls.
-  void jump(AddressLiteral dst);
-  void jump_cc(Condition cc, AddressLiteral dst);
-
-  // 32bit can do a case table jump in one instruction but we no longer allow the base
-  // to be installed in the Address class. This jump will tranfers to the address
-  // contained in the location described by entry (not the address of entry)
-  void jump(ArrayAddress entry);
-
-  // Floating
-
-  void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); }
-  void andpd(XMMRegister dst, AddressLiteral src);
-
-  void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); }
-  void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); }
-  void andps(XMMRegister dst, AddressLiteral src);
-
-  void comiss(XMMRegister dst, XMMRegister src) { Assembler::comiss(dst, src); }
-  void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); }
-  void comiss(XMMRegister dst, AddressLiteral src);
-
-  void comisd(XMMRegister dst, XMMRegister src) { Assembler::comisd(dst, src); }
-  void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); }
-  void comisd(XMMRegister dst, AddressLiteral src);
-
-  void fadd_s(Address src)        { Assembler::fadd_s(src); }
-  void fadd_s(AddressLiteral src) { Assembler::fadd_s(as_Address(src)); }
-
-  void fldcw(Address src) { Assembler::fldcw(src); }
-  void fldcw(AddressLiteral src);
-
-  void fld_s(int index)   { Assembler::fld_s(index); }
-  void fld_s(Address src) { Assembler::fld_s(src); }
-  void fld_s(AddressLiteral src);
-
-  void fld_d(Address src) { Assembler::fld_d(src); }
-  void fld_d(AddressLiteral src);
-
-  void fld_x(Address src) { Assembler::fld_x(src); }
-  void fld_x(AddressLiteral src);
-
-  void fmul_s(Address src)        { Assembler::fmul_s(src); }
-  void fmul_s(AddressLiteral src) { Assembler::fmul_s(as_Address(src)); }
-
-  void ldmxcsr(Address src) { Assembler::ldmxcsr(src); }
-  void ldmxcsr(AddressLiteral src);
-
-  // compute pow(x,y) and exp(x) with x86 instructions. Don't cover
-  // all corner cases and may result in NaN and require fallback to a
-  // runtime call.
-  void fast_pow();
-  void fast_exp();
-  void increase_precision();
-  void restore_precision();
-
-  // computes exp(x). Fallback to runtime call included.
-  void exp_with_fallback(int num_fpu_regs_in_use) { pow_or_exp(true, num_fpu_regs_in_use); }
-  // computes pow(x,y). Fallback to runtime call included.
-  void pow_with_fallback(int num_fpu_regs_in_use) { pow_or_exp(false, num_fpu_regs_in_use); }
-
-private:
-
-  // call runtime as a fallback for trig functions and pow/exp.
-  void fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use);
-
-  // computes 2^(Ylog2X); Ylog2X in ST(0)
-  void pow_exp_core_encoding();
-
-  // computes pow(x,y) or exp(x). Fallback to runtime call included.
-  void pow_or_exp(bool is_exp, int num_fpu_regs_in_use);
-
-  // these are private because users should be doing movflt/movdbl
-
-  void movss(Address dst, XMMRegister src)     { Assembler::movss(dst, src); }
-  void movss(XMMRegister dst, XMMRegister src) { Assembler::movss(dst, src); }
-  void movss(XMMRegister dst, Address src)     { Assembler::movss(dst, src); }
-  void movss(XMMRegister dst, AddressLiteral src);
-
-  void movlpd(XMMRegister dst, Address src)    {Assembler::movlpd(dst, src); }
-  void movlpd(XMMRegister dst, AddressLiteral src);
-
-public:
-
-  void addsd(XMMRegister dst, XMMRegister src)    { Assembler::addsd(dst, src); }
-  void addsd(XMMRegister dst, Address src)        { Assembler::addsd(dst, src); }
-  void addsd(XMMRegister dst, AddressLiteral src);
-
-  void addss(XMMRegister dst, XMMRegister src)    { Assembler::addss(dst, src); }
-  void addss(XMMRegister dst, Address src)        { Assembler::addss(dst, src); }
-  void addss(XMMRegister dst, AddressLiteral src);
-
-  void divsd(XMMRegister dst, XMMRegister src)    { Assembler::divsd(dst, src); }
-  void divsd(XMMRegister dst, Address src)        { Assembler::divsd(dst, src); }
-  void divsd(XMMRegister dst, AddressLiteral src);
-
-  void divss(XMMRegister dst, XMMRegister src)    { Assembler::divss(dst, src); }
-  void divss(XMMRegister dst, Address src)        { Assembler::divss(dst, src); }
-  void divss(XMMRegister dst, AddressLiteral src);
-
-  void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); }
-  void movsd(Address dst, XMMRegister src)     { Assembler::movsd(dst, src); }
-  void movsd(XMMRegister dst, Address src)     { Assembler::movsd(dst, src); }
-  void movsd(XMMRegister dst, AddressLiteral src);
-
-  void mulsd(XMMRegister dst, XMMRegister src)    { Assembler::mulsd(dst, src); }
-  void mulsd(XMMRegister dst, Address src)        { Assembler::mulsd(dst, src); }
-  void mulsd(XMMRegister dst, AddressLiteral src);
-
-  void mulss(XMMRegister dst, XMMRegister src)    { Assembler::mulss(dst, src); }
-  void mulss(XMMRegister dst, Address src)        { Assembler::mulss(dst, src); }
-  void mulss(XMMRegister dst, AddressLiteral src);
-
-  void sqrtsd(XMMRegister dst, XMMRegister src)    { Assembler::sqrtsd(dst, src); }
-  void sqrtsd(XMMRegister dst, Address src)        { Assembler::sqrtsd(dst, src); }
-  void sqrtsd(XMMRegister dst, AddressLiteral src);
-
-  void sqrtss(XMMRegister dst, XMMRegister src)    { Assembler::sqrtss(dst, src); }
-  void sqrtss(XMMRegister dst, Address src)        { Assembler::sqrtss(dst, src); }
-  void sqrtss(XMMRegister dst, AddressLiteral src);
-
-  void subsd(XMMRegister dst, XMMRegister src)    { Assembler::subsd(dst, src); }
-  void subsd(XMMRegister dst, Address src)        { Assembler::subsd(dst, src); }
-  void subsd(XMMRegister dst, AddressLiteral src);
-
-  void subss(XMMRegister dst, XMMRegister src)    { Assembler::subss(dst, src); }
-  void subss(XMMRegister dst, Address src)        { Assembler::subss(dst, src); }
-  void subss(XMMRegister dst, AddressLiteral src);
-
-  void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); }
-  void ucomiss(XMMRegister dst, Address src)     { Assembler::ucomiss(dst, src); }
-  void ucomiss(XMMRegister dst, AddressLiteral src);
-
-  void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); }
-  void ucomisd(XMMRegister dst, Address src)     { Assembler::ucomisd(dst, src); }
-  void ucomisd(XMMRegister dst, AddressLiteral src);
-
-  // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
-  void xorpd(XMMRegister dst, XMMRegister src) { Assembler::xorpd(dst, src); }
-  void xorpd(XMMRegister dst, Address src)     { Assembler::xorpd(dst, src); }
-  void xorpd(XMMRegister dst, AddressLiteral src);
-
-  // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
-  void xorps(XMMRegister dst, XMMRegister src) { Assembler::xorps(dst, src); }
-  void xorps(XMMRegister dst, Address src)     { Assembler::xorps(dst, src); }
-  void xorps(XMMRegister dst, AddressLiteral src);
-
-  // AVX 3-operands instructions
-
-  void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); }
-  void vaddsd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vaddsd(dst, nds, src); }
-  void vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
-
-  void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddss(dst, nds, src); }
-  void vaddss(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vaddss(dst, nds, src); }
-  void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
-
-  void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vandpd(dst, nds, src, vector256); }
-  void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256)     { Assembler::vandpd(dst, nds, src, vector256); }
-  void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
-
-  void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vandps(dst, nds, src, vector256); }
-  void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256)     { Assembler::vandps(dst, nds, src, vector256); }
-  void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
-
-  void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); }
-  void vdivsd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vdivsd(dst, nds, src); }
-  void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
-
-  void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivss(dst, nds, src); }
-  void vdivss(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vdivss(dst, nds, src); }
-  void vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
-
-  void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulsd(dst, nds, src); }
-  void vmulsd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vmulsd(dst, nds, src); }
-  void vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
-
-  void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulss(dst, nds, src); }
-  void vmulss(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vmulss(dst, nds, src); }
-  void vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
-
-  void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubsd(dst, nds, src); }
-  void vsubsd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vsubsd(dst, nds, src); }
-  void vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
-
-  void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubss(dst, nds, src); }
-  void vsubss(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vsubss(dst, nds, src); }
-  void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
-
-  // AVX Vector instructions
-
-  void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); }
-  void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); }
-  void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
-
-  void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); }
-  void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); }
-  void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
-
-  void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
-    if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2
-      Assembler::vpxor(dst, nds, src, vector256);
-    else
-      Assembler::vxorpd(dst, nds, src, vector256);
-  }
-  void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
-    if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2
-      Assembler::vpxor(dst, nds, src, vector256);
-    else
-      Assembler::vxorpd(dst, nds, src, vector256);
-  }
-
-  // Move packed integer values from low 128 bit to hign 128 bit in 256 bit vector.
-  void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
-    if (UseAVX > 1) // vinserti128h is available only in AVX2
-      Assembler::vinserti128h(dst, nds, src);
-    else
-      Assembler::vinsertf128h(dst, nds, src);
-  }
-
-  // Data
-
-  void cmov32( Condition cc, Register dst, Address  src);
-  void cmov32( Condition cc, Register dst, Register src);
-
-  void cmov(   Condition cc, Register dst, Register src) { cmovptr(cc, dst, src); }
-
-  void cmovptr(Condition cc, Register dst, Address  src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); }
-  void cmovptr(Condition cc, Register dst, Register src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); }
-
-  void movoop(Register dst, jobject obj);
-  void movoop(Address dst, jobject obj);
-
-  void mov_metadata(Register dst, Metadata* obj);
-  void mov_metadata(Address dst, Metadata* obj);
-
-  void movptr(ArrayAddress dst, Register src);
-  // can this do an lea?
-  void movptr(Register dst, ArrayAddress src);
-
-  void movptr(Register dst, Address src);
-
-  void movptr(Register dst, AddressLiteral src);
-
-  void movptr(Register dst, intptr_t src);
-  void movptr(Register dst, Register src);
-  void movptr(Address dst, intptr_t src);
-
-  void movptr(Address dst, Register src);
-
-  void movptr(Register dst, RegisterOrConstant src) {
-    if (src.is_constant()) movptr(dst, src.as_constant());
-    else                   movptr(dst, src.as_register());
-  }
-
-#ifdef _LP64
-  // Generally the next two are only used for moving NULL
-  // Although there are situations in initializing the mark word where
-  // they could be used. They are dangerous.
-
-  // They only exist on LP64 so that int32_t and intptr_t are not the same
-  // and we have ambiguous declarations.
-
-  void movptr(Address dst, int32_t imm32);
-  void movptr(Register dst, int32_t imm32);
-#endif // _LP64
-
-  // to avoid hiding movl
-  void mov32(AddressLiteral dst, Register src);
-  void mov32(Register dst, AddressLiteral src);
-
-  // to avoid hiding movb
-  void movbyte(ArrayAddress dst, int src);
-
-  // Import other mov() methods from the parent class or else
-  // they will be hidden by the following overriding declaration.
-  using Assembler::movdl;
-  using Assembler::movq;
-  void movdl(XMMRegister dst, AddressLiteral src);
-  void movq(XMMRegister dst, AddressLiteral src);
-
-  // Can push value or effective address
-  void pushptr(AddressLiteral src);
-
-  void pushptr(Address src) { LP64_ONLY(pushq(src)) NOT_LP64(pushl(src)); }
-  void popptr(Address src) { LP64_ONLY(popq(src)) NOT_LP64(popl(src)); }
-
-  void pushoop(jobject obj);
-  void pushklass(Metadata* obj);
-
-  // sign extend as need a l to ptr sized element
-  void movl2ptr(Register dst, Address src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(movl(dst, src)); }
-  void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(if (dst != src) movl(dst, src)); }
-
-  // C2 compiled method's prolog code.
-  void verified_entry(int framesize, bool stack_bang, bool fp_mode_24b);
-
-  // IndexOf strings.
-  // Small strings are loaded through stack if they cross page boundary.
-  void string_indexof(Register str1, Register str2,
-                      Register cnt1, Register cnt2,
-                      int int_cnt2,  Register result,
-                      XMMRegister vec, Register tmp);
-
-  // IndexOf for constant substrings with size >= 8 elements
-  // which don't need to be loaded through stack.
-  void string_indexofC8(Register str1, Register str2,
-                      Register cnt1, Register cnt2,
-                      int int_cnt2,  Register result,
-                      XMMRegister vec, Register tmp);
-
-    // Smallest code: we don't need to load through stack,
-    // check string tail.
-
-  // Compare strings.
-  void string_compare(Register str1, Register str2,
-                      Register cnt1, Register cnt2, Register result,
-                      XMMRegister vec1);
-
-  // Compare char[] arrays.
-  void char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
-                          Register limit, Register result, Register chr,
-                          XMMRegister vec1, XMMRegister vec2);
-
-  // Fill primitive arrays
-  void generate_fill(BasicType t, bool aligned,
-                     Register to, Register value, Register count,
-                     Register rtmp, XMMRegister xtmp);
-
-#undef VIRTUAL
-
-};
-
-/**
- * class SkipIfEqual:
- *
- * Instantiating this class will result in assembly code being output that will
- * jump around any code emitted between the creation of the instance and it's
- * automatic destruction at the end of a scope block, depending on the value of
- * the flag passed to the constructor, which will be checked at run-time.
- */
-class SkipIfEqual {
- private:
-  MacroAssembler* _masm;
-  Label _label;
-
- public:
-   SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
-   ~SkipIfEqual();
-};
-
-#ifdef ASSERT
-inline bool AbstractAssembler::pd_check_instruction_mark() { return true; }
-#endif
-
 #endif // CPU_X86_VM_ASSEMBLER_X86_HPP
--- a/src/cpu/x86/vm/assembler_x86.inline.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/assembler_x86.inline.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -28,48 +28,6 @@
 #include "asm/assembler.inline.hpp"
 #include "asm/codeBuffer.hpp"
 #include "code/codeCache.hpp"
-#include "runtime/handles.inline.hpp"
-
-inline void MacroAssembler::pd_patch_instruction(address branch, address target) {
-  unsigned char op = branch[0];
-  assert(op == 0xE8 /* call */ ||
-         op == 0xE9 /* jmp */ ||
-         op == 0xEB /* short jmp */ ||
-         (op & 0xF0) == 0x70 /* short jcc */ ||
-         op == 0x0F && (branch[1] & 0xF0) == 0x80 /* jcc */,
-         "Invalid opcode at patch point");
-
-  if (op == 0xEB || (op & 0xF0) == 0x70) {
-    // short offset operators (jmp and jcc)
-    char* disp = (char*) &branch[1];
-    int imm8 = target - (address) &disp[1];
-    guarantee(this->is8bit(imm8), "Short forward jump exceeds 8-bit offset");
-    *disp = imm8;
-  } else {
-    int* disp = (int*) &branch[(op == 0x0F)? 2: 1];
-    int imm32 = target - (address) &disp[1];
-    *disp = imm32;
-  }
-}
-
-#ifndef PRODUCT
-inline void MacroAssembler::pd_print_patched_instruction(address branch) {
-  const char* s;
-  unsigned char op = branch[0];
-  if (op == 0xE8) {
-    s = "call";
-  } else if (op == 0xE9 || op == 0xEB) {
-    s = "jmp";
-  } else if ((op & 0xF0) == 0x70) {
-    s = "jcc";
-  } else if (op == 0x0F) {
-    s = "jcc";
-  } else {
-    s = "????";
-  }
-  tty->print("%s (unresolved)", s);
-}
-#endif // ndef PRODUCT
 
 #ifndef _LP64
 inline int Assembler::prefix_and_encode(int reg_enc, bool byteinst) { return reg_enc; }
@@ -87,12 +45,6 @@
 
 inline void Assembler::prefix(Address adr, XMMRegister reg) {}
 inline void Assembler::prefixq(Address adr, XMMRegister reg) {}
-#else
-inline void Assembler::emit_long64(jlong x) {
-  *(jlong*) _code_pos = x;
-  _code_pos += sizeof(jlong);
-  code_section()->set_end(_code_pos);
-}
 #endif // _LP64
 
 #endif // CPU_X86_VM_ASSEMBLER_X86_INLINE_HPP
--- a/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -313,10 +313,10 @@
 #endif
   } else {
     // make a copy the code which is going to be patched.
-    for ( int i = 0; i < _bytes_to_copy; i++) {
+    for (int i = 0; i < _bytes_to_copy; i++) {
       address ptr = (address)(_pc_start + i);
       int a_byte = (*ptr) & 0xFF;
-      __ a_byte (a_byte);
+      __ emit_int8(a_byte);
       *ptr = 0x90; // make the site look like a nop
     }
   }
@@ -363,11 +363,11 @@
   // emit the offsets needed to find the code to patch
   int being_initialized_entry_offset = __ pc() - being_initialized_entry + sizeof_patch_record;
 
-  __ a_byte(0xB8);
-  __ a_byte(0);
-  __ a_byte(being_initialized_entry_offset);
-  __ a_byte(bytes_to_skip);
-  __ a_byte(_bytes_to_copy);
+  __ emit_int8((unsigned char)0xB8);
+  __ emit_int8(0);
+  __ emit_int8(being_initialized_entry_offset);
+  __ emit_int8(bytes_to_skip);
+  __ emit_int8(_bytes_to_copy);
   address patch_info_pc = __ pc();
   assert(patch_info_pc - end_of_patch == bytes_to_skip, "incorrect patch info");
 
--- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,7 +23,8 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
 #include "c1/c1_Compilation.hpp"
 #include "c1/c1_LIRAssembler.hpp"
 #include "c1/c1_MacroAssembler.hpp"
--- a/src/cpu/x86/vm/cppInterpreter_x86.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/cppInterpreter_x86.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,7 +23,7 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
+#include "asm/macroAssembler.hpp"
 #include "interpreter/bytecodeHistogram.hpp"
 #include "interpreter/cppInterpreter.hpp"
 #include "interpreter/interpreter.hpp"
@@ -538,9 +538,9 @@
 
     // compute full expression stack limit
 
-    const Address size_of_stack    (rbx, Method::max_stack_offset());
     const int extra_stack = 0; //6815692//Method::extra_stack_words();
-    __ load_unsigned_short(rdx, size_of_stack);                           // get size of expression stack in words
+    __ movptr(rdx, Address(rbx, Method::const_offset()));
+    __ load_unsigned_short(rdx, Address(rdx, ConstMethod::max_stack_offset())); // get size of expression stack in words
     __ negptr(rdx);                                                       // so we can subtract in next step
     // Allocate expression stack
     __ lea(rsp, Address(rsp, rdx, Address::times_ptr, -extra_stack));
@@ -611,8 +611,6 @@
   // C++ interpreter only
   // rsi/r13 - previous interpreter state pointer
 
-  const Address size_of_parameters(rbx, Method::size_of_parameters_offset());
-
   // InterpreterRuntime::frequency_counter_overflow takes one argument
   // indicating if the counter overflow occurs at a backwards branch (non-NULL bcp).
   // The call returns the address of the verified entry point for the method or NULL
@@ -682,12 +680,12 @@
   const Address stack_size(thread, Thread::stack_size_offset());
 
   // locals + overhead, in bytes
-    const Address size_of_stack    (rbx, Method::max_stack_offset());
-    // Always give one monitor to allow us to start interp if sync method.
-    // Any additional monitors need a check when moving the expression stack
-    const int one_monitor = frame::interpreter_frame_monitor_size() * wordSize;
-    const int extra_stack = 0; //6815692//Method::extra_stack_entries();
-  __ load_unsigned_short(rax, size_of_stack);                           // get size of expression stack in words
+  // Always give one monitor to allow us to start interp if sync method.
+  // Any additional monitors need a check when moving the expression stack
+  const int one_monitor = frame::interpreter_frame_monitor_size() * wordSize;
+  const int extra_stack = 0; //6815692//Method::extra_stack_entries();
+  __ movptr(rax, Address(rbx, Method::const_offset()));
+  __ load_unsigned_short(rax, Address(rax, ConstMethod::max_stack_offset())); // get size of expression stack in words
   __ lea(rax, Address(noreg, rax, Interpreter::stackElementScale(), extra_stack + one_monitor));
   __ lea(rax, Address(rax, rdx, Interpreter::stackElementScale(), overhead_size));
 
@@ -977,15 +975,16 @@
   //      to save/restore.
   address entry_point = __ pc();
 
-  const Address size_of_parameters(rbx, Method::size_of_parameters_offset());
-  const Address size_of_locals    (rbx, Method::size_of_locals_offset());
+  const Address constMethod       (rbx, Method::const_offset());
   const Address invocation_counter(rbx, Method::invocation_counter_offset() + InvocationCounter::counter_offset());
   const Address access_flags      (rbx, Method::access_flags_offset());
+  const Address size_of_parameters(rcx, ConstMethod::size_of_parameters_offset());
 
   // rsi/r13 == state/locals rdi == prevstate
   const Register locals = rdi;
 
   // get parameter size (always needed)
+  __ movptr(rcx, constMethod);
   __ load_unsigned_short(rcx, size_of_parameters);
 
   // rbx: Method*
@@ -994,6 +993,7 @@
   // for natives the size of locals is zero
 
   // compute beginning of parameters /locals
+
   __ lea(locals, Address(rsp, rcx, Address::times_ptr, -wordSize));
 
   // initialize fixed part of activation frame
@@ -1107,11 +1107,14 @@
   const Register method = rbx;
   const Register thread = LP64_ONLY(r15_thread) NOT_LP64(rdi);
   const Register t      = InterpreterRuntime::SignatureHandlerGenerator::temp();    // rcx|rscratch1
+  const Address constMethod       (method, Method::const_offset());
+  const Address size_of_parameters(t, ConstMethod::size_of_parameters_offset());
 
   // allocate space for parameters
   __ movptr(method, STATE(_method));
   __ verify_method_ptr(method);
-  __ load_unsigned_short(t, Address(method, Method::size_of_parameters_offset()));
+  __ movptr(t, constMethod);
+  __ load_unsigned_short(t, size_of_parameters);
   __ shll(t, 2);
 #ifdef _LP64
   __ subptr(rsp, t);
@@ -1700,15 +1703,17 @@
   // save sender sp
   __ push(rcx);
 
-  const Address size_of_parameters(rbx, Method::size_of_parameters_offset());
-  const Address size_of_locals    (rbx, Method::size_of_locals_offset());
+  const Address constMethod       (rbx, Method::const_offset());
   const Address access_flags      (rbx, Method::access_flags_offset());
+  const Address size_of_parameters(rdx, ConstMethod::size_of_parameters_offset());
+  const Address size_of_locals    (rdx, ConstMethod::size_of_locals_offset());
 
   // const Address monitor_block_top (rbp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
   // const Address monitor_block_bot (rbp, frame::interpreter_frame_initial_sp_offset        * wordSize);
   // const Address monitor(rbp, frame::interpreter_frame_initial_sp_offset * wordSize - (int)sizeof(BasicObjectLock));
 
   // get parameter size (always needed)
+  __ movptr(rdx, constMethod);
   __ load_unsigned_short(rcx, size_of_parameters);
 
   // rbx: Method*
@@ -1989,7 +1994,9 @@
   __ movptr(rbx, STATE(_result._to_call._callee));
 
   // callee left args on top of expression stack, remove them
-  __ load_unsigned_short(rcx, Address(rbx, Method::size_of_parameters_offset()));
+  __ movptr(rcx, constMethod);
+  __ load_unsigned_short(rcx, Address(rcx, ConstMethod::size_of_parameters_offset()));
+
   __ lea(rsp, Address(rsp, rcx, Address::times_ptr));
 
   __ movl(rcx, Address(rbx, Method::result_index_offset()));
@@ -2159,7 +2166,9 @@
   // Make it look like call_stub calling conventions
 
   // Get (potential) receiver
-  __ load_unsigned_short(rcx, size_of_parameters);                   // get size of parameters in words
+  // get size of parameters in words
+  __ movptr(rcx, constMethod);
+  __ load_unsigned_short(rcx, Address(rcx, ConstMethod::size_of_parameters_offset()));
 
   ExternalAddress recursive(CAST_FROM_FN_PTR(address, RecursiveInterpreterActivation));
   __ pushptr(recursive.addr());                                      // make it look good in the debugger
--- a/src/cpu/x86/vm/frame_x86.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/frame_x86.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -534,7 +534,7 @@
   Method* m = *interpreter_frame_method_addr();
 
   // validate the method we'd find in this potential sender
-  if (!Universe::heap()->is_valid_method(m)) return false;
+  if (!m->is_valid_method()) return false;
 
   // stack frames shouldn't be much larger than max_stack elements
 
--- a/src/cpu/x86/vm/frame_x86.inline.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/frame_x86.inline.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -25,6 +25,8 @@
 #ifndef CPU_X86_VM_FRAME_X86_INLINE_HPP
 #define CPU_X86_VM_FRAME_X86_INLINE_HPP
 
+#include "code/codeCache.hpp"
+
 // Inline functions for Intel frames:
 
 // Constructors:
--- a/src/cpu/x86/vm/globals_x86.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/globals_x86.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -120,6 +120,9 @@
   product(bool, UseUnalignedLoadStores, false,                              \
           "Use SSE2 MOVDQU instruction for Arraycopy")                      \
                                                                             \
+  product(bool, UseFastStosb, false,                                        \
+          "Use fast-string operation for zeroing: rep stosb")               \
+                                                                            \
   /* assembler */                                                           \
   product(bool, Use486InstrsOnly, false,                                    \
           "Use 80486 Compliant instruction subset")                         \
--- a/src/cpu/x86/vm/icBuffer_x86.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/icBuffer_x86.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,8 +23,8 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
 #include "code/icBuffer.hpp"
 #include "gc_interface/collectedHeap.inline.hpp"
 #include "interpreter/bytecodes.hpp"
--- a/src/cpu/x86/vm/icache_x86.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/icache_x86.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,7 +23,7 @@
  */
 
 #include "precompiled.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
 #include "runtime/icache.hpp"
 
 #define __ _masm->
--- a/src/cpu/x86/vm/interp_masm_x86_32.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/interp_masm_x86_32.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -36,18 +36,7 @@
 #include "runtime/basicLock.hpp"
 #include "runtime/biasedLocking.hpp"
 #include "runtime/sharedRuntime.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
+#include "runtime/thread.inline.hpp"
 
 
 // Implementation of InterpreterMacroAssembler
--- a/src/cpu/x86/vm/interp_masm_x86_32.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/interp_masm_x86_32.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -25,8 +25,10 @@
 #ifndef CPU_X86_VM_INTERP_MASM_X86_32_HPP
 #define CPU_X86_VM_INTERP_MASM_X86_32_HPP
 
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
 #include "interpreter/invocationCounter.hpp"
+#include "runtime/frame.hpp"
 
 // This file specializes the assember with interpreter-specific macros
 
--- a/src/cpu/x86/vm/interp_masm_x86_64.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/interp_masm_x86_64.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -36,18 +36,7 @@
 #include "runtime/basicLock.hpp"
 #include "runtime/biasedLocking.hpp"
 #include "runtime/sharedRuntime.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
+#include "runtime/thread.inline.hpp"
 
 
 // Implementation of InterpreterMacroAssembler
--- a/src/cpu/x86/vm/interp_masm_x86_64.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/interp_masm_x86_64.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -25,8 +25,10 @@
 #ifndef CPU_X86_VM_INTERP_MASM_X86_64_HPP
 #define CPU_X86_VM_INTERP_MASM_X86_64_HPP
 
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
 #include "interpreter/invocationCounter.hpp"
+#include "runtime/frame.hpp"
 
 // This file specializes the assember with interpreter-specific macros
 
--- a/src/cpu/x86/vm/interpreter_x86_32.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/interpreter_x86_32.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,7 +23,7 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
+#include "asm/macroAssembler.hpp"
 #include "interpreter/bytecodeHistogram.hpp"
 #include "interpreter/interpreter.hpp"
 #include "interpreter/interpreterGenerator.hpp"
--- a/src/cpu/x86/vm/interpreter_x86_64.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/interpreter_x86_64.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,7 +23,7 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
+#include "asm/macroAssembler.hpp"
 #include "interpreter/bytecodeHistogram.hpp"
 #include "interpreter/interpreter.hpp"
 #include "interpreter/interpreterGenerator.hpp"
--- a/src/cpu/x86/vm/jniFastGetField_x86_32.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/jniFastGetField_x86_32.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,7 +23,7 @@
  */
 
 #include "precompiled.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
 #include "memory/resourceArea.hpp"
 #include "prims/jniFastGetField.hpp"
 #include "prims/jvm_misc.hpp"
--- a/src/cpu/x86/vm/jniFastGetField_x86_64.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/jniFastGetField_x86_64.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,7 +23,7 @@
  */
 
 #include "precompiled.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
 #include "memory/resourceArea.hpp"
 #include "prims/jniFastGetField.hpp"
 #include "prims/jvm_misc.hpp"
--- a/src/cpu/x86/vm/jni_x86.h	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/jni_x86.h	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -38,14 +38,9 @@
 
   #define JNICALL
   typedef int jint;
-#if defined(_LP64) && !defined(__APPLE__)
+#if defined(_LP64)
   typedef long jlong;
 #else
-  /*
-   * On _LP64 __APPLE__ "long" and "long long" are both 64 bits,
-   * but we use the "long long" typedef to avoid complaints from
-   * the __APPLE__ compiler about fprintf formats.
-   */
   typedef long long jlong;
 #endif
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/x86/vm/macroAssembler_x86.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,6370 @@
+/*
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/assembler.inline.hpp"
+#include "compiler/disassembler.hpp"
+#include "gc_interface/collectedHeap.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/cardTableModRefBS.hpp"
+#include "memory/resourceArea.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/objectMonitor.hpp"
+#include "runtime/os.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#ifndef SERIALGC
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#include "gc_implementation/g1/heapRegion.hpp"
+#endif
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) /* nothing */
+#define STOP(error) stop(error)
+#else
+#define BLOCK_COMMENT(str) block_comment(str)
+#define STOP(error) block_comment(error); stop(error)
+#endif
+
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+
+
+#ifdef ASSERT
+bool AbstractAssembler::pd_check_instruction_mark() { return true; }
+#endif
+
+static Assembler::Condition reverse[] = {
+    Assembler::noOverflow     /* overflow      = 0x0 */ ,
+    Assembler::overflow       /* noOverflow    = 0x1 */ ,
+    Assembler::aboveEqual     /* carrySet      = 0x2, below         = 0x2 */ ,
+    Assembler::below          /* aboveEqual    = 0x3, carryClear    = 0x3 */ ,
+    Assembler::notZero        /* zero          = 0x4, equal         = 0x4 */ ,
+    Assembler::zero           /* notZero       = 0x5, notEqual      = 0x5 */ ,
+    Assembler::above          /* belowEqual    = 0x6 */ ,
+    Assembler::belowEqual     /* above         = 0x7 */ ,
+    Assembler::positive       /* negative      = 0x8 */ ,
+    Assembler::negative       /* positive      = 0x9 */ ,
+    Assembler::noParity       /* parity        = 0xa */ ,
+    Assembler::parity         /* noParity      = 0xb */ ,
+    Assembler::greaterEqual   /* less          = 0xc */ ,
+    Assembler::less           /* greaterEqual  = 0xd */ ,
+    Assembler::greater        /* lessEqual     = 0xe */ ,
+    Assembler::lessEqual      /* greater       = 0xf, */
+
+};
+
+
+// Implementation of MacroAssembler
+
+// First all the versions that have distinct versions depending on 32/64 bit
+// Unless the difference is trivial (1 line or so).
+
+#ifndef _LP64
+
+// 32bit versions
+
+Address MacroAssembler::as_Address(AddressLiteral adr) {
+  return Address(adr.target(), adr.rspec());
+}
+
+Address MacroAssembler::as_Address(ArrayAddress adr) {
+  return Address::make_array(adr);
+}
+
+int MacroAssembler::biased_locking_enter(Register lock_reg,
+                                         Register obj_reg,
+                                         Register swap_reg,
+                                         Register tmp_reg,
+                                         bool swap_reg_contains_mark,
+                                         Label& done,
+                                         Label* slow_case,
+                                         BiasedLockingCounters* counters) {
+  assert(UseBiasedLocking, "why call this otherwise?");
+  assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg");
+  assert_different_registers(lock_reg, obj_reg, swap_reg);
+
+  if (PrintBiasedLockingStatistics && counters == NULL)
+    counters = BiasedLocking::counters();
+
+  bool need_tmp_reg = false;
+  if (tmp_reg == noreg) {
+    need_tmp_reg = true;
+    tmp_reg = lock_reg;
+  } else {
+    assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
+  }
+  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
+  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
+  Address klass_addr     (obj_reg, oopDesc::klass_offset_in_bytes());
+  Address saved_mark_addr(lock_reg, 0);
+
+  // Biased locking
+  // See whether the lock is currently biased toward our thread and
+  // whether the epoch is still valid
+  // Note that the runtime guarantees sufficient alignment of JavaThread
+  // pointers to allow age to be placed into low bits
+  // First check to see whether biasing is even enabled for this object
+  Label cas_label;
+  int null_check_offset = -1;
+  if (!swap_reg_contains_mark) {
+    null_check_offset = offset();
+    movl(swap_reg, mark_addr);
+  }
+  if (need_tmp_reg) {
+    push(tmp_reg);
+  }
+  movl(tmp_reg, swap_reg);
+  andl(tmp_reg, markOopDesc::biased_lock_mask_in_place);
+  cmpl(tmp_reg, markOopDesc::biased_lock_pattern);
+  if (need_tmp_reg) {
+    pop(tmp_reg);
+  }
+  jcc(Assembler::notEqual, cas_label);
+  // The bias pattern is present in the object's header. Need to check
+  // whether the bias owner and the epoch are both still current.
+  // Note that because there is no current thread register on x86 we
+  // need to store off the mark word we read out of the object to
+  // avoid reloading it and needing to recheck invariants below. This
+  // store is unfortunate but it makes the overall code shorter and
+  // simpler.
+  movl(saved_mark_addr, swap_reg);
+  if (need_tmp_reg) {
+    push(tmp_reg);
+  }
+  get_thread(tmp_reg);
+  xorl(swap_reg, tmp_reg);
+  if (swap_reg_contains_mark) {
+    null_check_offset = offset();
+  }
+  movl(tmp_reg, klass_addr);
+  xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset()));
+  andl(swap_reg, ~((int) markOopDesc::age_mask_in_place));
+  if (need_tmp_reg) {
+    pop(tmp_reg);
+  }
+  if (counters != NULL) {
+    cond_inc32(Assembler::zero,
+               ExternalAddress((address)counters->biased_lock_entry_count_addr()));
+  }
+  jcc(Assembler::equal, done);
+
+  Label try_revoke_bias;
+  Label try_rebias;
+
+  // At this point we know that the header has the bias pattern and
+  // that we are not the bias owner in the current epoch. We need to
+  // figure out more details about the state of the header in order to
+  // know what operations can be legally performed on the object's
+  // header.
+
+  // If the low three bits in the xor result aren't clear, that means
+  // the prototype header is no longer biased and we have to revoke
+  // the bias on this object.
+  testl(swap_reg, markOopDesc::biased_lock_mask_in_place);
+  jcc(Assembler::notZero, try_revoke_bias);
+
+  // Biasing is still enabled for this data type. See whether the
+  // epoch of the current bias is still valid, meaning that the epoch
+  // bits of the mark word are equal to the epoch bits of the
+  // prototype header. (Note that the prototype header's epoch bits
+  // only change at a safepoint.) If not, attempt to rebias the object
+  // toward the current thread. Note that we must be absolutely sure
+  // that the current epoch is invalid in order to do this because
+  // otherwise the manipulations it performs on the mark word are
+  // illegal.
+  testl(swap_reg, markOopDesc::epoch_mask_in_place);
+  jcc(Assembler::notZero, try_rebias);
+
+  // The epoch of the current bias is still valid but we know nothing
+  // about the owner; it might be set or it might be clear. Try to
+  // acquire the bias of the object using an atomic operation. If this
+  // fails we will go in to the runtime to revoke the object's bias.
+  // Note that we first construct the presumed unbiased header so we
+  // don't accidentally blow away another thread's valid bias.
+  movl(swap_reg, saved_mark_addr);
+  andl(swap_reg,
+       markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
+  if (need_tmp_reg) {
+    push(tmp_reg);
+  }
+  get_thread(tmp_reg);
+  orl(tmp_reg, swap_reg);
+  if (os::is_MP()) {
+    lock();
+  }
+  cmpxchgptr(tmp_reg, Address(obj_reg, 0));
+  if (need_tmp_reg) {
+    pop(tmp_reg);
+  }
+  // If the biasing toward our thread failed, this means that
+  // another thread succeeded in biasing it toward itself and we
+  // need to revoke that bias. The revocation will occur in the
+  // interpreter runtime in the slow case.
+  if (counters != NULL) {
+    cond_inc32(Assembler::zero,
+               ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr()));
+  }
+  if (slow_case != NULL) {
+    jcc(Assembler::notZero, *slow_case);
+  }
+  jmp(done);
+
+  bind(try_rebias);
+  // At this point we know the epoch has expired, meaning that the
+  // current "bias owner", if any, is actually invalid. Under these
+  // circumstances _only_, we are allowed to use the current header's
+  // value as the comparison value when doing the cas to acquire the
+  // bias in the current epoch. In other words, we allow transfer of
+  // the bias from one thread to another directly in this situation.
+  //
+  // FIXME: due to a lack of registers we currently blow away the age
+  // bits in this situation. Should attempt to preserve them.
+  if (need_tmp_reg) {
+    push(tmp_reg);
+  }
+  get_thread(tmp_reg);
+  movl(swap_reg, klass_addr);
+  orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset()));
+  movl(swap_reg, saved_mark_addr);
+  if (os::is_MP()) {
+    lock();
+  }
+  cmpxchgptr(tmp_reg, Address(obj_reg, 0));
+  if (need_tmp_reg) {
+    pop(tmp_reg);
+  }
+  // If the biasing toward our thread failed, then another thread
+  // succeeded in biasing it toward itself and we need to revoke that
+  // bias. The revocation will occur in the runtime in the slow case.
+  if (counters != NULL) {
+    cond_inc32(Assembler::zero,
+               ExternalAddress((address)counters->rebiased_lock_entry_count_addr()));
+  }
+  if (slow_case != NULL) {
+    jcc(Assembler::notZero, *slow_case);
+  }
+  jmp(done);
+
+  bind(try_revoke_bias);
+  // The prototype mark in the klass doesn't have the bias bit set any
+  // more, indicating that objects of this data type are not supposed
+  // to be biased any more. We are going to try to reset the mark of
+  // this object to the prototype value and fall through to the
+  // CAS-based locking scheme. Note that if our CAS fails, it means
+  // that another thread raced us for the privilege of revoking the
+  // bias of this particular object, so it's okay to continue in the
+  // normal locking code.
+  //
+  // FIXME: due to a lack of registers we currently blow away the age
+  // bits in this situation. Should attempt to preserve them.
+  movl(swap_reg, saved_mark_addr);
+  if (need_tmp_reg) {
+    push(tmp_reg);
+  }
+  movl(tmp_reg, klass_addr);
+  movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset()));
+  if (os::is_MP()) {
+    lock();
+  }
+  cmpxchgptr(tmp_reg, Address(obj_reg, 0));
+  if (need_tmp_reg) {
+    pop(tmp_reg);
+  }
+  // Fall through to the normal CAS-based lock, because no matter what
+  // the result of the above CAS, some thread must have succeeded in
+  // removing the bias bit from the object's header.
+  if (counters != NULL) {
+    cond_inc32(Assembler::zero,
+               ExternalAddress((address)counters->revoked_lock_entry_count_addr()));
+  }
+
+  bind(cas_label);
+
+  return null_check_offset;
+}
+void MacroAssembler::call_VM_leaf_base(address entry_point,
+                                       int number_of_arguments) {
+  call(RuntimeAddress(entry_point));
+  increment(rsp, number_of_arguments * wordSize);
+}
+
+void MacroAssembler::cmpklass(Address src1, Metadata* obj) {
+  cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
+}
+
+void MacroAssembler::cmpklass(Register src1, Metadata* obj) {
+  cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
+}
+
+void MacroAssembler::cmpoop(Address src1, jobject obj) {
+  cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
+}
+
+void MacroAssembler::cmpoop(Register src1, jobject obj) {
+  cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
+}
+
+void MacroAssembler::extend_sign(Register hi, Register lo) {
+  // According to Intel Doc. AP-526, "Integer Divide", p.18.
+  if (VM_Version::is_P6() && hi == rdx && lo == rax) {
+    cdql();
+  } else {
+    movl(hi, lo);
+    sarl(hi, 31);
+  }
+}
+
+void MacroAssembler::jC2(Register tmp, Label& L) {
+  // set parity bit if FPU flag C2 is set (via rax)
+  save_rax(tmp);
+  fwait(); fnstsw_ax();
+  sahf();
+  restore_rax(tmp);
+  // branch
+  jcc(Assembler::parity, L);
+}
+
+void MacroAssembler::jnC2(Register tmp, Label& L) {
+  // set parity bit if FPU flag C2 is set (via rax)
+  save_rax(tmp);
+  fwait(); fnstsw_ax();
+  sahf();
+  restore_rax(tmp);
+  // branch
+  jcc(Assembler::noParity, L);
+}
+
+// 32bit can do a case table jump in one instruction but we no longer allow the base
+// to be installed in the Address class
+void MacroAssembler::jump(ArrayAddress entry) {
+  jmp(as_Address(entry));
+}
+
+// Note: y_lo will be destroyed
+void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
+  // Long compare for Java (semantics as described in JVM spec.)
+  Label high, low, done;
+
+  cmpl(x_hi, y_hi);
+  jcc(Assembler::less, low);
+  jcc(Assembler::greater, high);
+  // x_hi is the return register
+  xorl(x_hi, x_hi);
+  cmpl(x_lo, y_lo);
+  jcc(Assembler::below, low);
+  jcc(Assembler::equal, done);
+
+  bind(high);
+  xorl(x_hi, x_hi);
+  increment(x_hi);
+  jmp(done);
+
+  bind(low);
+  xorl(x_hi, x_hi);
+  decrementl(x_hi);
+
+  bind(done);
+}
+
+void MacroAssembler::lea(Register dst, AddressLiteral src) {
+    mov_literal32(dst, (int32_t)src.target(), src.rspec());
+}
+
+void MacroAssembler::lea(Address dst, AddressLiteral adr) {
+  // leal(dst, as_Address(adr));
+  // see note in movl as to why we must use a move
+  mov_literal32(dst, (int32_t) adr.target(), adr.rspec());
+}
+
+void MacroAssembler::leave() {
+  mov(rsp, rbp);
+  pop(rbp);
+}
+
+void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) {
+  // Multiplication of two Java long values stored on the stack
+  // as illustrated below. Result is in rdx:rax.
+  //
+  // rsp ---> [  ??  ] \               \
+  //            ....    | y_rsp_offset  |
+  //          [ y_lo ] /  (in bytes)    | x_rsp_offset
+  //          [ y_hi ]                  | (in bytes)
+  //            ....                    |
+  //          [ x_lo ]                 /
+  //          [ x_hi ]
+  //            ....
+  //
+  // Basic idea: lo(result) = lo(x_lo * y_lo)
+  //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
+  Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset);
+  Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset);
+  Label quick;
+  // load x_hi, y_hi and check if quick
+  // multiplication is possible
+  movl(rbx, x_hi);
+  movl(rcx, y_hi);
+  movl(rax, rbx);
+  orl(rbx, rcx);                                 // rbx, = 0 <=> x_hi = 0 and y_hi = 0
+  jcc(Assembler::zero, quick);                   // if rbx, = 0 do quick multiply
+  // do full multiplication
+  // 1st step
+  mull(y_lo);                                    // x_hi * y_lo
+  movl(rbx, rax);                                // save lo(x_hi * y_lo) in rbx,
+  // 2nd step
+  movl(rax, x_lo);
+  mull(rcx);                                     // x_lo * y_hi
+  addl(rbx, rax);                                // add lo(x_lo * y_hi) to rbx,
+  // 3rd step
+  bind(quick);                                   // note: rbx, = 0 if quick multiply!
+  movl(rax, x_lo);
+  mull(y_lo);                                    // x_lo * y_lo
+  addl(rdx, rbx);                                // correct hi(x_lo * y_lo)
+}
+
+void MacroAssembler::lneg(Register hi, Register lo) {
+  negl(lo);
+  adcl(hi, 0);
+  negl(hi);
+}
+
+void MacroAssembler::lshl(Register hi, Register lo) {
+  // Java shift left long support (semantics as described in JVM spec., p.305)
+  // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n))
+  // shift value is in rcx !
+  assert(hi != rcx, "must not use rcx");
+  assert(lo != rcx, "must not use rcx");
+  const Register s = rcx;                        // shift count
+  const int      n = BitsPerWord;
+  Label L;
+  andl(s, 0x3f);                                 // s := s & 0x3f (s < 0x40)
+  cmpl(s, n);                                    // if (s < n)
+  jcc(Assembler::less, L);                       // else (s >= n)
+  movl(hi, lo);                                  // x := x << n
+  xorl(lo, lo);
+  // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
+  bind(L);                                       // s (mod n) < n
+  shldl(hi, lo);                                 // x := x << s
+  shll(lo);
+}
+
+
+void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) {
+  // Java shift right long support (semantics as described in JVM spec., p.306 & p.310)
+  // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n))
+  assert(hi != rcx, "must not use rcx");
+  assert(lo != rcx, "must not use rcx");
+  const Register s = rcx;                        // shift count
+  const int      n = BitsPerWord;
+  Label L;
+  andl(s, 0x3f);                                 // s := s & 0x3f (s < 0x40)
+  cmpl(s, n);                                    // if (s < n)
+  jcc(Assembler::less, L);                       // else (s >= n)
+  movl(lo, hi);                                  // x := x >> n
+  if (sign_extension) sarl(hi, 31);
+  else                xorl(hi, hi);
+  // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
+  bind(L);                                       // s (mod n) < n
+  shrdl(lo, hi);                                 // x := x >> s
+  if (sign_extension) sarl(hi);
+  else                shrl(hi);
+}
+
+void MacroAssembler::movoop(Register dst, jobject obj) {
+  mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
+}
+
+void MacroAssembler::movoop(Address dst, jobject obj) {
+  mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
+}
+
+void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
+  mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate());
+}
+
+void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
+  mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate());
+}
+
+void MacroAssembler::movptr(Register dst, AddressLiteral src) {
+  if (src.is_lval()) {
+    mov_literal32(dst, (intptr_t)src.target(), src.rspec());
+  } else {
+    movl(dst, as_Address(src));
+  }
+}
+
+void MacroAssembler::movptr(ArrayAddress dst, Register src) {
+  movl(as_Address(dst), src);
+}
+
+void MacroAssembler::movptr(Register dst, ArrayAddress src) {
+  movl(dst, as_Address(src));
+}
+
+// src should NEVER be a real pointer. Use AddressLiteral for true pointers
+void MacroAssembler::movptr(Address dst, intptr_t src) {
+  movl(dst, src);
+}
+
+
+void MacroAssembler::pop_callee_saved_registers() {
+  pop(rcx);
+  pop(rdx);
+  pop(rdi);
+  pop(rsi);
+}
+
+void MacroAssembler::pop_fTOS() {
+  fld_d(Address(rsp, 0));
+  addl(rsp, 2 * wordSize);
+}
+
+void MacroAssembler::push_callee_saved_registers() {
+  push(rsi);
+  push(rdi);
+  push(rdx);
+  push(rcx);
+}
+
+void MacroAssembler::push_fTOS() {
+  subl(rsp, 2 * wordSize);
+  fstp_d(Address(rsp, 0));
+}
+
+
+void MacroAssembler::pushoop(jobject obj) {
+  push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate());
+}
+
+void MacroAssembler::pushklass(Metadata* obj) {
+  push_literal32((int32_t)obj, metadata_Relocation::spec_for_immediate());
+}
+
+void MacroAssembler::pushptr(AddressLiteral src) {
+  if (src.is_lval()) {
+    push_literal32((int32_t)src.target(), src.rspec());
+  } else {
+    pushl(as_Address(src));
+  }
+}
+
+void MacroAssembler::set_word_if_not_zero(Register dst) {
+  xorl(dst, dst);
+  set_byte_if_not_zero(dst);
+}
+
+static void pass_arg0(MacroAssembler* masm, Register arg) {
+  masm->push(arg);
+}
+
+static void pass_arg1(MacroAssembler* masm, Register arg) {
+  masm->push(arg);
+}
+
+static void pass_arg2(MacroAssembler* masm, Register arg) {
+  masm->push(arg);
+}
+
+static void pass_arg3(MacroAssembler* masm, Register arg) {
+  masm->push(arg);
+}
+
+#ifndef PRODUCT
+extern "C" void findpc(intptr_t x);
+#endif
+
+void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
+  // In order to get locks to work, we need to fake a in_VM state
+  JavaThread* thread = JavaThread::current();
+  JavaThreadState saved_state = thread->thread_state();
+  thread->set_thread_state(_thread_in_vm);
+  if (ShowMessageBoxOnError) {
+    JavaThread* thread = JavaThread::current();
+    JavaThreadState saved_state = thread->thread_state();
+    thread->set_thread_state(_thread_in_vm);
+    if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
+      ttyLocker ttyl;
+      BytecodeCounter::print();
+    }
+    // To see where a verify_oop failed, get $ebx+40/X for this frame.
+    // This is the value of eip which points to where verify_oop will return.
+    if (os::message_box(msg, "Execution stopped, print registers?")) {
+      print_state32(rdi, rsi, rbp, rsp, rbx, rdx, rcx, rax, eip);
+      BREAKPOINT;
+    }
+  } else {
+    ttyLocker ttyl;
+    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
+  }
+  // Don't assert holding the ttyLock
+    assert(false, err_msg("DEBUG MESSAGE: %s", msg));
+  ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
+}
+
+void MacroAssembler::print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip) {
+  ttyLocker ttyl;
+  FlagSetting fs(Debugging, true);
+  tty->print_cr("eip = 0x%08x", eip);
+#ifndef PRODUCT
+  if ((WizardMode || Verbose) && PrintMiscellaneous) {
+    tty->cr();
+    findpc(eip);
+    tty->cr();
+  }
+#endif
+#define PRINT_REG(rax) \
+  { tty->print("%s = ", #rax); os::print_location(tty, rax); }
+  PRINT_REG(rax);
+  PRINT_REG(rbx);
+  PRINT_REG(rcx);
+  PRINT_REG(rdx);
+  PRINT_REG(rdi);
+  PRINT_REG(rsi);
+  PRINT_REG(rbp);
+  PRINT_REG(rsp);
+#undef PRINT_REG
+  // Print some words near top of staack.
+  int* dump_sp = (int*) rsp;
+  for (int col1 = 0; col1 < 8; col1++) {
+    tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp);
+    os::print_location(tty, *dump_sp++);
+  }
+  for (int row = 0; row < 16; row++) {
+    tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp);
+    for (int col = 0; col < 8; col++) {
+      tty->print(" 0x%08x", *dump_sp++);
+    }
+    tty->cr();
+  }
+  // Print some instructions around pc:
+  Disassembler::decode((address)eip-64, (address)eip);
+  tty->print_cr("--------");
+  Disassembler::decode((address)eip, (address)eip+32);
+}
+
+void MacroAssembler::stop(const char* msg) {
+  ExternalAddress message((address)msg);
+  // push address of message
+  pushptr(message.addr());
+  { Label L; call(L, relocInfo::none); bind(L); }     // push eip
+  pusha();                                            // push registers
+  call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)));
+  hlt();
+}
+
+void MacroAssembler::warn(const char* msg) {
+  push_CPU_state();
+
+  ExternalAddress message((address) msg);
+  // push address of message
+  pushptr(message.addr());
+
+  call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning)));
+  addl(rsp, wordSize);       // discard argument
+  pop_CPU_state();
+}
+
+void MacroAssembler::print_state() {
+  { Label L; call(L, relocInfo::none); bind(L); }     // push eip
+  pusha();                                            // push registers
+
+  push_CPU_state();
+  call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::print_state32)));
+  pop_CPU_state();
+
+  popa();
+  addl(rsp, wordSize);
+}
+
+#else // _LP64
+
+// 64 bit versions
+
+Address MacroAssembler::as_Address(AddressLiteral adr) {
+  // amd64 always does this as a pc-rel
+  // we can be absolute or disp based on the instruction type
+  // jmp/call are displacements others are absolute
+  assert(!adr.is_lval(), "must be rval");
+  assert(reachable(adr), "must be");
+  return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc());
+
+}
+
+Address MacroAssembler::as_Address(ArrayAddress adr) {
+  AddressLiteral base = adr.base();
+  lea(rscratch1, base);
+  Address index = adr.index();
+  assert(index._disp == 0, "must not have disp"); // maybe it can?
+  Address array(rscratch1, index._index, index._scale, index._disp);
+  return array;
+}
+
+int MacroAssembler::biased_locking_enter(Register lock_reg,
+                                         Register obj_reg,
+                                         Register swap_reg,
+                                         Register tmp_reg,
+                                         bool swap_reg_contains_mark,
+                                         Label& done,
+                                         Label* slow_case,
+                                         BiasedLockingCounters* counters) {
+  assert(UseBiasedLocking, "why call this otherwise?");
+  assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
+  assert(tmp_reg != noreg, "tmp_reg must be supplied");
+  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
+  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
+  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
+  Address saved_mark_addr(lock_reg, 0);
+
+  if (PrintBiasedLockingStatistics && counters == NULL)
+    counters = BiasedLocking::counters();
+
+  // Biased locking
+  // See whether the lock is currently biased toward our thread and
+  // whether the epoch is still valid
+  // Note that the runtime guarantees sufficient alignment of JavaThread
+  // pointers to allow age to be placed into low bits
+  // First check to see whether biasing is even enabled for this object
+  Label cas_label;
+  int null_check_offset = -1;
+  if (!swap_reg_contains_mark) {
+    null_check_offset = offset();
+    movq(swap_reg, mark_addr);
+  }
+  movq(tmp_reg, swap_reg);
+  andq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
+  cmpq(tmp_reg, markOopDesc::biased_lock_pattern);
+  jcc(Assembler::notEqual, cas_label);
+  // The bias pattern is present in the object's header. Need to check
+  // whether the bias owner and the epoch are both still current.
+  load_prototype_header(tmp_reg, obj_reg);
+  orq(tmp_reg, r15_thread);
+  xorq(tmp_reg, swap_reg);
+  andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place));
+  if (counters != NULL) {
+    cond_inc32(Assembler::zero,
+               ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
+  }
+  jcc(Assembler::equal, done);
+
+  Label try_revoke_bias;
+  Label try_rebias;
+
+  // At this point we know that the header has the bias pattern and
+  // that we are not the bias owner in the current epoch. We need to
+  // figure out more details about the state of the header in order to
+  // know what operations can be legally performed on the object's
+  // header.
+
+  // If the low three bits in the xor result aren't clear, that means
+  // the prototype header is no longer biased and we have to revoke
+  // the bias on this object.
+  testq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
+  jcc(Assembler::notZero, try_revoke_bias);
+
+  // Biasing is still enabled for this data type. See whether the
+  // epoch of the current bias is still valid, meaning that the epoch
+  // bits of the mark word are equal to the epoch bits of the
+  // prototype header. (Note that the prototype header's epoch bits
+  // only change at a safepoint.) If not, attempt to rebias the object
+  // toward the current thread. Note that we must be absolutely sure
+  // that the current epoch is invalid in order to do this because
+  // otherwise the manipulations it performs on the mark word are
+  // illegal.
+  testq(tmp_reg, markOopDesc::epoch_mask_in_place);
+  jcc(Assembler::notZero, try_rebias);
+
+  // The epoch of the current bias is still valid but we know nothing
+  // about the owner; it might be set or it might be clear. Try to
+  // acquire the bias of the object using an atomic operation. If this
+  // fails we will go in to the runtime to revoke the object's bias.
+  // Note that we first construct the presumed unbiased header so we
+  // don't accidentally blow away another thread's valid bias.
+  andq(swap_reg,
+       markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
+  movq(tmp_reg, swap_reg);
+  orq(tmp_reg, r15_thread);
+  if (os::is_MP()) {
+    lock();
+  }
+  cmpxchgq(tmp_reg, Address(obj_reg, 0));
+  // If the biasing toward our thread failed, this means that
+  // another thread succeeded in biasing it toward itself and we
+  // need to revoke that bias. The revocation will occur in the
+  // interpreter runtime in the slow case.
+  if (counters != NULL) {
+    cond_inc32(Assembler::zero,
+               ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
+  }
+  if (slow_case != NULL) {
+    jcc(Assembler::notZero, *slow_case);
+  }
+  jmp(done);
+
+  bind(try_rebias);
+  // At this point we know the epoch has expired, meaning that the
+  // current "bias owner", if any, is actually invalid. Under these
+  // circumstances _only_, we are allowed to use the current header's
+  // value as the comparison value when doing the cas to acquire the
+  // bias in the current epoch. In other words, we allow transfer of
+  // the bias from one thread to another directly in this situation.
+  //
+  // FIXME: due to a lack of registers we currently blow away the age
+  // bits in this situation. Should attempt to preserve them.
+  load_prototype_header(tmp_reg, obj_reg);
+  orq(tmp_reg, r15_thread);
+  if (os::is_MP()) {
+    lock();
+  }
+  cmpxchgq(tmp_reg, Address(obj_reg, 0));
+  // If the biasing toward our thread failed, then another thread
+  // succeeded in biasing it toward itself and we need to revoke that
+  // bias. The revocation will occur in the runtime in the slow case.
+  if (counters != NULL) {
+    cond_inc32(Assembler::zero,
+               ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
+  }
+  if (slow_case != NULL) {
+    jcc(Assembler::notZero, *slow_case);
+  }
+  jmp(done);
+
+  bind(try_revoke_bias);
+  // The prototype mark in the klass doesn't have the bias bit set any
+  // more, indicating that objects of this data type are not supposed
+  // to be biased any more. We are going to try to reset the mark of
+  // this object to the prototype value and fall through to the
+  // CAS-based locking scheme. Note that if our CAS fails, it means
+  // that another thread raced us for the privilege of revoking the
+  // bias of this particular object, so it's okay to continue in the
+  // normal locking code.
+  //
+  // FIXME: due to a lack of registers we currently blow away the age
+  // bits in this situation. Should attempt to preserve them.
+  load_prototype_header(tmp_reg, obj_reg);
+  if (os::is_MP()) {
+    lock();
+  }
+  cmpxchgq(tmp_reg, Address(obj_reg, 0));
+  // Fall through to the normal CAS-based lock, because no matter what
+  // the result of the above CAS, some thread must have succeeded in
+  // removing the bias bit from the object's header.
+  if (counters != NULL) {
+    cond_inc32(Assembler::zero,
+               ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
+  }
+
+  bind(cas_label);
+
+  return null_check_offset;
+}
+
+void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {
+  Label L, E;
+
+#ifdef _WIN64
+  // Windows always allocates space for it's register args
+  assert(num_args <= 4, "only register arguments supported");
+  subq(rsp,  frame::arg_reg_save_area_bytes);
+#endif
+
+  // Align stack if necessary
+  testl(rsp, 15);
+  jcc(Assembler::zero, L);
+
+  subq(rsp, 8);
+  {
+    call(RuntimeAddress(entry_point));
+  }
+  addq(rsp, 8);
+  jmp(E);
+
+  bind(L);
+  {
+    call(RuntimeAddress(entry_point));
+  }
+
+  bind(E);
+
+#ifdef _WIN64
+  // restore stack pointer
+  addq(rsp, frame::arg_reg_save_area_bytes);
+#endif
+
+}
+
+void MacroAssembler::cmp64(Register src1, AddressLiteral src2) {
+  assert(!src2.is_lval(), "should use cmpptr");
+
+  if (reachable(src2)) {
+    cmpq(src1, as_Address(src2));
+  } else {
+    lea(rscratch1, src2);
+    Assembler::cmpq(src1, Address(rscratch1, 0));
+  }
+}
+
+int MacroAssembler::corrected_idivq(Register reg) {
+  // Full implementation of Java ldiv and lrem; checks for special
+  // case as described in JVM spec., p.243 & p.271.  The function
+  // returns the (pc) offset of the idivl instruction - may be needed
+  // for implicit exceptions.
+  //
+  //         normal case                           special case
+  //
+  // input : rax: dividend                         min_long
+  //         reg: divisor   (may not be eax/edx)   -1
+  //
+  // output: rax: quotient  (= rax idiv reg)       min_long
+  //         rdx: remainder (= rax irem reg)       0
+  assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register");
+  static const int64_t min_long = 0x8000000000000000;
+  Label normal_case, special_case;
+
+  // check for special case
+  cmp64(rax, ExternalAddress((address) &min_long));
+  jcc(Assembler::notEqual, normal_case);
+  xorl(rdx, rdx); // prepare rdx for possible special case (where
+                  // remainder = 0)
+  cmpq(reg, -1);
+  jcc(Assembler::equal, special_case);
+
+  // handle normal case
+  bind(normal_case);
+  cdqq();
+  int idivq_offset = offset();
+  idivq(reg);
+
+  // normal and special case exit
+  bind(special_case);
+
+  return idivq_offset;
+}
+
+void MacroAssembler::decrementq(Register reg, int value) {
+  if (value == min_jint) { subq(reg, value); return; }
+  if (value <  0) { incrementq(reg, -value); return; }
+  if (value == 0) {                        ; return; }
+  if (value == 1 && UseIncDec) { decq(reg) ; return; }
+  /* else */      { subq(reg, value)       ; return; }
+}
+
+void MacroAssembler::decrementq(Address dst, int value) {
+  if (value == min_jint) { subq(dst, value); return; }
+  if (value <  0) { incrementq(dst, -value); return; }
+  if (value == 0) {                        ; return; }
+  if (value == 1 && UseIncDec) { decq(dst) ; return; }
+  /* else */      { subq(dst, value)       ; return; }
+}
+
+void MacroAssembler::incrementq(Register reg, int value) {
+  if (value == min_jint) { addq(reg, value); return; }
+  if (value <  0) { decrementq(reg, -value); return; }
+  if (value == 0) {                        ; return; }
+  if (value == 1 && UseIncDec) { incq(reg) ; return; }
+  /* else */      { addq(reg, value)       ; return; }
+}
+
+void MacroAssembler::incrementq(Address dst, int value) {
+  if (value == min_jint) { addq(dst, value); return; }
+  if (value <  0) { decrementq(dst, -value); return; }
+  if (value == 0) {                        ; return; }
+  if (value == 1 && UseIncDec) { incq(dst) ; return; }
+  /* else */      { addq(dst, value)       ; return; }
+}
+
+// 32bit can do a case table jump in one instruction but we no longer allow the base
+// to be installed in the Address class
+void MacroAssembler::jump(ArrayAddress entry) {
+  lea(rscratch1, entry.base());
+  Address dispatch = entry.index();
+  assert(dispatch._base == noreg, "must be");
+  dispatch._base = rscratch1;
+  jmp(dispatch);
+}
+
+void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
+  ShouldNotReachHere(); // 64bit doesn't use two regs
+  cmpq(x_lo, y_lo);
+}
+
+void MacroAssembler::lea(Register dst, AddressLiteral src) {
+    mov_literal64(dst, (intptr_t)src.target(), src.rspec());
+}
+
+void MacroAssembler::lea(Address dst, AddressLiteral adr) {
+  mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec());
+  movptr(dst, rscratch1);
+}
+
+void MacroAssembler::leave() {
+  // %%% is this really better? Why not on 32bit too?
+  emit_int8((unsigned char)0xC9); // LEAVE
+}
+
+void MacroAssembler::lneg(Register hi, Register lo) {
+  ShouldNotReachHere(); // 64bit doesn't use two regs
+  negq(lo);
+}
+
+void MacroAssembler::movoop(Register dst, jobject obj) {
+  mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate());
+}
+
+void MacroAssembler::movoop(Address dst, jobject obj) {
+  mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate());
+  movq(dst, rscratch1);
+}
+
+void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
+  mov_literal64(dst, (intptr_t)obj, metadata_Relocation::spec_for_immediate());
+}
+
+void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
+  mov_literal64(rscratch1, (intptr_t)obj, metadata_Relocation::spec_for_immediate());
+  movq(dst, rscratch1);
+}
+
+void MacroAssembler::movptr(Register dst, AddressLiteral src) {
+  if (src.is_lval()) {
+    mov_literal64(dst, (intptr_t)src.target(), src.rspec());
+  } else {
+    if (reachable(src)) {
+      movq(dst, as_Address(src));
+    } else {
+      lea(rscratch1, src);
+      movq(dst, Address(rscratch1,0));
+    }
+  }
+}
+
+void MacroAssembler::movptr(ArrayAddress dst, Register src) {
+  movq(as_Address(dst), src);
+}
+
+void MacroAssembler::movptr(Register dst, ArrayAddress src) {
+  movq(dst, as_Address(src));
+}
+
+// src should NEVER be a real pointer. Use AddressLiteral for true pointers
+void MacroAssembler::movptr(Address dst, intptr_t src) {
+  mov64(rscratch1, src);
+  movq(dst, rscratch1);
+}
+
+// These are mostly for initializing NULL
+void MacroAssembler::movptr(Address dst, int32_t src) {
+  movslq(dst, src);
+}
+
+void MacroAssembler::movptr(Register dst, int32_t src) {
+  mov64(dst, (intptr_t)src);
+}
+
+void MacroAssembler::pushoop(jobject obj) {
+  movoop(rscratch1, obj);
+  push(rscratch1);
+}
+
+void MacroAssembler::pushklass(Metadata* obj) {
+  mov_metadata(rscratch1, obj);
+  push(rscratch1);
+}
+
+void MacroAssembler::pushptr(AddressLiteral src) {
+  lea(rscratch1, src);
+  if (src.is_lval()) {
+    push(rscratch1);
+  } else {
+    pushq(Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::reset_last_Java_frame(bool clear_fp,
+                                           bool clear_pc) {
+  // we must set sp to zero to clear frame
+  movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
+  // must clear fp, so that compiled frames are not confused; it is
+  // possible that we need it only for debugging
+  if (clear_fp) {
+    movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
+  }
+
+  if (clear_pc) {
+    movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
+  }
+}
+
+void MacroAssembler::set_last_Java_frame(Register last_java_sp,
+                                         Register last_java_fp,
+                                         address  last_java_pc) {
+  // determine last_java_sp register
+  if (!last_java_sp->is_valid()) {
+    last_java_sp = rsp;
+  }
+
+  // last_java_fp is optional
+  if (last_java_fp->is_valid()) {
+    movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()),
+           last_java_fp);
+  }
+
+  // last_java_pc is optional
+  if (last_java_pc != NULL) {
+    Address java_pc(r15_thread,
+                    JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
+    lea(rscratch1, InternalAddress(last_java_pc));
+    movptr(java_pc, rscratch1);
+  }
+
+  movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
+}
+
+static void pass_arg0(MacroAssembler* masm, Register arg) {
+  if (c_rarg0 != arg ) {
+    masm->mov(c_rarg0, arg);
+  }
+}
+
+static void pass_arg1(MacroAssembler* masm, Register arg) {
+  if (c_rarg1 != arg ) {
+    masm->mov(c_rarg1, arg);
+  }
+}
+
+static void pass_arg2(MacroAssembler* masm, Register arg) {
+  if (c_rarg2 != arg ) {
+    masm->mov(c_rarg2, arg);
+  }
+}
+
+static void pass_arg3(MacroAssembler* masm, Register arg) {
+  if (c_rarg3 != arg ) {
+    masm->mov(c_rarg3, arg);
+  }
+}
+
+void MacroAssembler::stop(const char* msg) {
+  address rip = pc();
+  pusha(); // get regs on stack
+  lea(c_rarg0, ExternalAddress((address) msg));
+  lea(c_rarg1, InternalAddress(rip));
+  movq(c_rarg2, rsp); // pass pointer to regs array
+  andq(rsp, -16); // align stack as required by ABI
+  call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)));
+  hlt();
+}
+
+void MacroAssembler::warn(const char* msg) {
+  push(rbp);
+  movq(rbp, rsp);
+  andq(rsp, -16);     // align stack as required by push_CPU_state and call
+  push_CPU_state();   // keeps alignment at 16 bytes
+  lea(c_rarg0, ExternalAddress((address) msg));
+  call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0);
+  pop_CPU_state();
+  mov(rsp, rbp);
+  pop(rbp);
+}
+
+void MacroAssembler::print_state() {
+  address rip = pc();
+  pusha();            // get regs on stack
+  push(rbp);
+  movq(rbp, rsp);
+  andq(rsp, -16);     // align stack as required by push_CPU_state and call
+  push_CPU_state();   // keeps alignment at 16 bytes
+
+  lea(c_rarg0, InternalAddress(rip));
+  lea(c_rarg1, Address(rbp, wordSize)); // pass pointer to regs array
+  call_VM_leaf(CAST_FROM_FN_PTR(address, MacroAssembler::print_state64), c_rarg0, c_rarg1);
+
+  pop_CPU_state();
+  mov(rsp, rbp);
+  pop(rbp);
+  popa();
+}
+
+#ifndef PRODUCT
+extern "C" void findpc(intptr_t x);
+#endif
+
+void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) {
+  // In order to get locks to work, we need to fake a in_VM state
+  if (ShowMessageBoxOnError) {
+    JavaThread* thread = JavaThread::current();
+    JavaThreadState saved_state = thread->thread_state();
+    thread->set_thread_state(_thread_in_vm);
+#ifndef PRODUCT
+    if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
+      ttyLocker ttyl;
+      BytecodeCounter::print();
+    }
+#endif
+    // To see where a verify_oop failed, get $ebx+40/X for this frame.
+    // XXX correct this offset for amd64
+    // This is the value of eip which points to where verify_oop will return.
+    if (os::message_box(msg, "Execution stopped, print registers?")) {
+      print_state64(pc, regs);
+      BREAKPOINT;
+      assert(false, "start up GDB");
+    }
+    ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
+  } else {
+    ttyLocker ttyl;
+    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n",
+                    msg);
+    assert(false, err_msg("DEBUG MESSAGE: %s", msg));
+  }
+}
+
+void MacroAssembler::print_state64(int64_t pc, int64_t regs[]) {
+  ttyLocker ttyl;
+  FlagSetting fs(Debugging, true);
+  tty->print_cr("rip = 0x%016lx", pc);
+#ifndef PRODUCT
+  tty->cr();
+  findpc(pc);
+  tty->cr();
+#endif
+#define PRINT_REG(rax, value) \
+  { tty->print("%s = ", #rax); os::print_location(tty, value); }
+  PRINT_REG(rax, regs[15]);
+  PRINT_REG(rbx, regs[12]);
+  PRINT_REG(rcx, regs[14]);
+  PRINT_REG(rdx, regs[13]);
+  PRINT_REG(rdi, regs[8]);
+  PRINT_REG(rsi, regs[9]);
+  PRINT_REG(rbp, regs[10]);
+  PRINT_REG(rsp, regs[11]);
+  PRINT_REG(r8 , regs[7]);
+  PRINT_REG(r9 , regs[6]);
+  PRINT_REG(r10, regs[5]);
+  PRINT_REG(r11, regs[4]);
+  PRINT_REG(r12, regs[3]);
+  PRINT_REG(r13, regs[2]);
+  PRINT_REG(r14, regs[1]);
+  PRINT_REG(r15, regs[0]);
+#undef PRINT_REG
+  // Print some words near top of staack.
+  int64_t* rsp = (int64_t*) regs[11];
+  int64_t* dump_sp = rsp;
+  for (int col1 = 0; col1 < 8; col1++) {
+    tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp);
+    os::print_location(tty, *dump_sp++);
+  }
+  for (int row = 0; row < 25; row++) {
+    tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp);
+    for (int col = 0; col < 4; col++) {
+      tty->print(" 0x%016lx", *dump_sp++);
+    }
+    tty->cr();
+  }
+  // Print some instructions around pc:
+  Disassembler::decode((address)pc-64, (address)pc);
+  tty->print_cr("--------");
+  Disassembler::decode((address)pc, (address)pc+32);
+}
+
+#endif // _LP64
+
+// Now versions that are common to 32/64 bit
+
+void MacroAssembler::addptr(Register dst, int32_t imm32) {
+  LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32));
+}
+
+void MacroAssembler::addptr(Register dst, Register src) {
+  LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
+}
+
+void MacroAssembler::addptr(Address dst, Register src) {
+  LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
+}
+
+void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::addsd(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::addsd(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    addss(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    addss(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::align(int modulus) {
+  if (offset() % modulus != 0) {
+    nop(modulus - (offset() % modulus));
+  }
+}
+
+void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) {
+  // Used in sign-masking with aligned address.
+  assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
+  if (reachable(src)) {
+    Assembler::andpd(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::andpd(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) {
+  // Used in sign-masking with aligned address.
+  assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
+  if (reachable(src)) {
+    Assembler::andps(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::andps(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::andptr(Register dst, int32_t imm32) {
+  LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32));
+}
+
+void MacroAssembler::atomic_incl(AddressLiteral counter_addr) {
+  pushf();
+  if (os::is_MP())
+    lock();
+  incrementl(counter_addr);
+  popf();
+}
+
+// Writes to stack successive pages until offset reached to check for
+// stack overflow + shadow pages.  This clobbers tmp.
+void MacroAssembler::bang_stack_size(Register size, Register tmp) {
+  movptr(tmp, rsp);
+  // Bang stack for total size given plus shadow page size.
+  // Bang one page at a time because large size can bang beyond yellow and
+  // red zones.
+  Label loop;
+  bind(loop);
+  movl(Address(tmp, (-os::vm_page_size())), size );
+  subptr(tmp, os::vm_page_size());
+  subl(size, os::vm_page_size());
+  jcc(Assembler::greater, loop);
+
+  // Bang down shadow pages too.
+  // The -1 because we already subtracted 1 page.
+  for (int i = 0; i< StackShadowPages-1; i++) {
+    // this could be any sized move but this is can be a debugging crumb
+    // so the bigger the better.
+    movptr(Address(tmp, (-i*os::vm_page_size())), size );
+  }
+}
+
+void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
+  assert(UseBiasedLocking, "why call this otherwise?");
+
+  // Check for biased locking unlock case, which is a no-op
+  // Note: we do not have to check the thread ID for two reasons.
+  // First, the interpreter checks for IllegalMonitorStateException at
+  // a higher level. Second, if the bias was revoked while we held the
+  // lock, the object could not be rebiased toward another thread, so
+  // the bias bit would be clear.
+  movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+  andptr(temp_reg, markOopDesc::biased_lock_mask_in_place);
+  cmpptr(temp_reg, markOopDesc::biased_lock_pattern);
+  jcc(Assembler::equal, done);
+}
+
+void MacroAssembler::c2bool(Register x) {
+  // implements x == 0 ? 0 : 1
+  // note: must only look at least-significant byte of x
+  //       since C-style booleans are stored in one byte
+  //       only! (was bug)
+  andl(x, 0xFF);
+  setb(Assembler::notZero, x);
+}
+
+// Wouldn't need if AddressLiteral version had new name
+void MacroAssembler::call(Label& L, relocInfo::relocType rtype) {
+  Assembler::call(L, rtype);
+}
+
+void MacroAssembler::call(Register entry) {
+  Assembler::call(entry);
+}
+
+void MacroAssembler::call(AddressLiteral entry) {
+  if (reachable(entry)) {
+    Assembler::call_literal(entry.target(), entry.rspec());
+  } else {
+    lea(rscratch1, entry);
+    Assembler::call(rscratch1);
+  }
+}
+
+void MacroAssembler::ic_call(address entry) {
+  RelocationHolder rh = virtual_call_Relocation::spec(pc());
+  movptr(rax, (intptr_t)Universe::non_oop_word());
+  call(AddressLiteral(entry, rh));
+}
+
+// Implementation of call_VM versions
+
+void MacroAssembler::call_VM(Register oop_result,
+                             address entry_point,
+                             bool check_exceptions) {
+  Label C, E;
+  call(C, relocInfo::none);
+  jmp(E);
+
+  bind(C);
+  call_VM_helper(oop_result, entry_point, 0, check_exceptions);
+  ret(0);
+
+  bind(E);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             address entry_point,
+                             Register arg_1,
+                             bool check_exceptions) {
+  Label C, E;
+  call(C, relocInfo::none);
+  jmp(E);
+
+  bind(C);
+  pass_arg1(this, arg_1);
+  call_VM_helper(oop_result, entry_point, 1, check_exceptions);
+  ret(0);
+
+  bind(E);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             address entry_point,
+                             Register arg_1,
+                             Register arg_2,
+                             bool check_exceptions) {
+  Label C, E;
+  call(C, relocInfo::none);
+  jmp(E);
+
+  bind(C);
+
+  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
+
+  pass_arg2(this, arg_2);
+  pass_arg1(this, arg_1);
+  call_VM_helper(oop_result, entry_point, 2, check_exceptions);
+  ret(0);
+
+  bind(E);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             address entry_point,
+                             Register arg_1,
+                             Register arg_2,
+                             Register arg_3,
+                             bool check_exceptions) {
+  Label C, E;
+  call(C, relocInfo::none);
+  jmp(E);
+
+  bind(C);
+
+  LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
+  LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
+  pass_arg3(this, arg_3);
+
+  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
+  pass_arg2(this, arg_2);
+
+  pass_arg1(this, arg_1);
+  call_VM_helper(oop_result, entry_point, 3, check_exceptions);
+  ret(0);
+
+  bind(E);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             Register last_java_sp,
+                             address entry_point,
+                             int number_of_arguments,
+                             bool check_exceptions) {
+  Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg);
+  call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             Register last_java_sp,
+                             address entry_point,
+                             Register arg_1,
+                             bool check_exceptions) {
+  pass_arg1(this, arg_1);
+  call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             Register last_java_sp,
+                             address entry_point,
+                             Register arg_1,
+                             Register arg_2,
+                             bool check_exceptions) {
+
+  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
+  pass_arg2(this, arg_2);
+  pass_arg1(this, arg_1);
+  call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             Register last_java_sp,
+                             address entry_point,
+                             Register arg_1,
+                             Register arg_2,
+                             Register arg_3,
+                             bool check_exceptions) {
+  LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
+  LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
+  pass_arg3(this, arg_3);
+  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
+  pass_arg2(this, arg_2);
+  pass_arg1(this, arg_1);
+  call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
+}
+
+void MacroAssembler::super_call_VM(Register oop_result,
+                                   Register last_java_sp,
+                                   address entry_point,
+                                   int number_of_arguments,
+                                   bool check_exceptions) {
+  Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg);
+  MacroAssembler::call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
+}
+
+void MacroAssembler::super_call_VM(Register oop_result,
+                                   Register last_java_sp,
+                                   address entry_point,
+                                   Register arg_1,
+                                   bool check_exceptions) {
+  pass_arg1(this, arg_1);
+  super_call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
+}
+
+void MacroAssembler::super_call_VM(Register oop_result,
+                                   Register last_java_sp,
+                                   address entry_point,
+                                   Register arg_1,
+                                   Register arg_2,
+                                   bool check_exceptions) {
+
+  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
+  pass_arg2(this, arg_2);
+  pass_arg1(this, arg_1);
+  super_call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
+}
+
+void MacroAssembler::super_call_VM(Register oop_result,
+                                   Register last_java_sp,
+                                   address entry_point,
+                                   Register arg_1,
+                                   Register arg_2,
+                                   Register arg_3,
+                                   bool check_exceptions) {
+  LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
+  LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
+  pass_arg3(this, arg_3);
+  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
+  pass_arg2(this, arg_2);
+  pass_arg1(this, arg_1);
+  super_call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
+}
+
+void MacroAssembler::call_VM_base(Register oop_result,
+                                  Register java_thread,
+                                  Register last_java_sp,
+                                  address  entry_point,
+                                  int      number_of_arguments,
+                                  bool     check_exceptions) {
+  // determine java_thread register
+  if (!java_thread->is_valid()) {
+#ifdef _LP64
+    java_thread = r15_thread;
+#else
+    java_thread = rdi;
+    get_thread(java_thread);
+#endif // LP64
+  }
+  // determine last_java_sp register
+  if (!last_java_sp->is_valid()) {
+    last_java_sp = rsp;
+  }
+  // debugging support
+  assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
+  LP64_ONLY(assert(java_thread == r15_thread, "unexpected register"));
+#ifdef ASSERT
+  // TraceBytecodes does not use r12 but saves it over the call, so don't verify
+  // r12 is the heapbase.
+  LP64_ONLY(if ((UseCompressedOops || UseCompressedKlassPointers) && !TraceBytecodes) verify_heapbase("call_VM_base: heap base corrupted?");)
+#endif // ASSERT
+
+  assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
+  assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
+
+  // push java thread (becomes first argument of C function)
+
+  NOT_LP64(push(java_thread); number_of_arguments++);
+  LP64_ONLY(mov(c_rarg0, r15_thread));
+
+  // set last Java frame before call
+  assert(last_java_sp != rbp, "can't use ebp/rbp");
+
+  // Only interpreter should have to set fp
+  set_last_Java_frame(java_thread, last_java_sp, rbp, NULL);
+
+  // do the call, remove parameters
+  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
+
+  // restore the thread (cannot use the pushed argument since arguments
+  // may be overwritten by C code generated by an optimizing compiler);
+  // however can use the register value directly if it is callee saved.
+  if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) {
+    // rdi & rsi (also r15) are callee saved -> nothing to do
+#ifdef ASSERT
+    guarantee(java_thread != rax, "change this code");
+    push(rax);
+    { Label L;
+      get_thread(rax);
+      cmpptr(java_thread, rax);
+      jcc(Assembler::equal, L);
+      STOP("MacroAssembler::call_VM_base: rdi not callee saved?");
+      bind(L);
+    }
+    pop(rax);
+#endif
+  } else {
+    get_thread(java_thread);
+  }
+  // reset last Java frame
+  // Only interpreter should have to clear fp
+  reset_last_Java_frame(java_thread, true, false);
+
+#ifndef CC_INTERP
+   // C++ interp handles this in the interpreter
+  check_and_handle_popframe(java_thread);
+  check_and_handle_earlyret(java_thread);
+#endif /* CC_INTERP */
+
+  if (check_exceptions) {
+    // check for pending exceptions (java_thread is set upon return)
+    cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD);
+#ifndef _LP64
+    jump_cc(Assembler::notEqual,
+            RuntimeAddress(StubRoutines::forward_exception_entry()));
+#else
+    // This used to conditionally jump to forward_exception however it is
+    // possible if we relocate that the branch will not reach. So we must jump
+    // around so we can always reach
+
+    Label ok;
+    jcc(Assembler::equal, ok);
+    jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+    bind(ok);
+#endif // LP64
+  }
+
+  // get oop result if there is one and reset the value in the thread
+  if (oop_result->is_valid()) {
+    get_vm_result(oop_result, java_thread);
+  }
+}
+
+void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
+
+  // Calculate the value for last_Java_sp
+  // somewhat subtle. call_VM does an intermediate call
+  // which places a return address on the stack just under the
+  // stack pointer as the user finsihed with it. This allows
+  // use to retrieve last_Java_pc from last_Java_sp[-1].
+  // On 32bit we then have to push additional args on the stack to accomplish
+  // the actual requested call. On 64bit call_VM only can use register args
+  // so the only extra space is the return address that call_VM created.
+  // This hopefully explains the calculations here.
+
+#ifdef _LP64
+  // We've pushed one address, correct last_Java_sp
+  lea(rax, Address(rsp, wordSize));
+#else
+  lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize));
+#endif // LP64
+
+  call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions);
+
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
+  call_VM_leaf_base(entry_point, number_of_arguments);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
+  pass_arg0(this, arg_0);
+  call_VM_leaf(entry_point, 1);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
+
+  LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
+  pass_arg1(this, arg_1);
+  pass_arg0(this, arg_0);
+  call_VM_leaf(entry_point, 2);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
+  LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
+  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
+  pass_arg2(this, arg_2);
+  LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
+  pass_arg1(this, arg_1);
+  pass_arg0(this, arg_0);
+  call_VM_leaf(entry_point, 3);
+}
+
+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
+  pass_arg0(this, arg_0);
+  MacroAssembler::call_VM_leaf_base(entry_point, 1);
+}
+
+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
+
+  LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
+  pass_arg1(this, arg_1);
+  pass_arg0(this, arg_0);
+  MacroAssembler::call_VM_leaf_base(entry_point, 2);
+}
+
+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
+  LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
+  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
+  pass_arg2(this, arg_2);
+  LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
+  pass_arg1(this, arg_1);
+  pass_arg0(this, arg_0);
+  MacroAssembler::call_VM_leaf_base(entry_point, 3);
+}
+
+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
+  LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg"));
+  LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
+  LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
+  pass_arg3(this, arg_3);
+  LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
+  LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
+  pass_arg2(this, arg_2);
+  LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
+  pass_arg1(this, arg_1);
+  pass_arg0(this, arg_0);
+  MacroAssembler::call_VM_leaf_base(entry_point, 4);
+}
+
+void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
+  movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
+  movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD);
+  verify_oop(oop_result, "broken oop in call_VM_base");
+}
+
+void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
+  movptr(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
+  movptr(Address(java_thread, JavaThread::vm_result_2_offset()), NULL_WORD);
+}
+
+void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
+}
+
+void MacroAssembler::check_and_handle_popframe(Register java_thread) {
+}
+
+void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) {
+  if (reachable(src1)) {
+    cmpl(as_Address(src1), imm);
+  } else {
+    lea(rscratch1, src1);
+    cmpl(Address(rscratch1, 0), imm);
+  }
+}
+
+void MacroAssembler::cmp32(Register src1, AddressLiteral src2) {
+  assert(!src2.is_lval(), "use cmpptr");
+  if (reachable(src2)) {
+    cmpl(src1, as_Address(src2));
+  } else {
+    lea(rscratch1, src2);
+    cmpl(src1, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::cmp32(Register src1, int32_t imm) {
+  Assembler::cmpl(src1, imm);
+}
+
+void MacroAssembler::cmp32(Register src1, Address src2) {
+  Assembler::cmpl(src1, src2);
+}
+
+void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
+  ucomisd(opr1, opr2);
+
+  Label L;
+  if (unordered_is_less) {
+    movl(dst, -1);
+    jcc(Assembler::parity, L);
+    jcc(Assembler::below , L);
+    movl(dst, 0);
+    jcc(Assembler::equal , L);
+    increment(dst);
+  } else { // unordered is greater
+    movl(dst, 1);
+    jcc(Assembler::parity, L);
+    jcc(Assembler::above , L);
+    movl(dst, 0);
+    jcc(Assembler::equal , L);
+    decrementl(dst);
+  }
+  bind(L);
+}
+
+void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
+  ucomiss(opr1, opr2);
+
+  Label L;
+  if (unordered_is_less) {
+    movl(dst, -1);
+    jcc(Assembler::parity, L);
+    jcc(Assembler::below , L);
+    movl(dst, 0);
+    jcc(Assembler::equal , L);
+    increment(dst);
+  } else { // unordered is greater
+    movl(dst, 1);
+    jcc(Assembler::parity, L);
+    jcc(Assembler::above , L);
+    movl(dst, 0);
+    jcc(Assembler::equal , L);
+    decrementl(dst);
+  }
+  bind(L);
+}
+
+
+void MacroAssembler::cmp8(AddressLiteral src1, int imm) {
+  if (reachable(src1)) {
+    cmpb(as_Address(src1), imm);
+  } else {
+    lea(rscratch1, src1);
+    cmpb(Address(rscratch1, 0), imm);
+  }
+}
+
+void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) {
+#ifdef _LP64
+  if (src2.is_lval()) {
+    movptr(rscratch1, src2);
+    Assembler::cmpq(src1, rscratch1);
+  } else if (reachable(src2)) {
+    cmpq(src1, as_Address(src2));
+  } else {
+    lea(rscratch1, src2);
+    Assembler::cmpq(src1, Address(rscratch1, 0));
+  }
+#else
+  if (src2.is_lval()) {
+    cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
+  } else {
+    cmpl(src1, as_Address(src2));
+  }
+#endif // _LP64
+}
+
+void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) {
+  assert(src2.is_lval(), "not a mem-mem compare");
+#ifdef _LP64
+  // moves src2's literal address
+  movptr(rscratch1, src2);
+  Assembler::cmpq(src1, rscratch1);
+#else
+  cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
+#endif // _LP64
+}
+
+void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) {
+  if (reachable(adr)) {
+    if (os::is_MP())
+      lock();
+    cmpxchgptr(reg, as_Address(adr));
+  } else {
+    lea(rscratch1, adr);
+    if (os::is_MP())
+      lock();
+    cmpxchgptr(reg, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::cmpxchgptr(Register reg, Address adr) {
+  LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr));
+}
+
+void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::comisd(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::comisd(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::comiss(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::comiss(dst, Address(rscratch1, 0));
+  }
+}
+
+
+void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) {
+  Condition negated_cond = negate_condition(cond);
+  Label L;
+  jcc(negated_cond, L);
+  atomic_incl(counter_addr);
+  bind(L);
+}
+
+int MacroAssembler::corrected_idivl(Register reg) {
+  // Full implementation of Java idiv and irem; checks for
+  // special case as described in JVM spec., p.243 & p.271.
+  // The function returns the (pc) offset of the idivl
+  // instruction - may be needed for implicit exceptions.
+  //
+  //         normal case                           special case
+  //
+  // input : rax,: dividend                         min_int
+  //         reg: divisor   (may not be rax,/rdx)   -1
+  //
+  // output: rax,: quotient  (= rax, idiv reg)       min_int
+  //         rdx: remainder (= rax, irem reg)       0
+  assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register");
+  const int min_int = 0x80000000;
+  Label normal_case, special_case;
+
+  // check for special case
+  cmpl(rax, min_int);
+  jcc(Assembler::notEqual, normal_case);
+  xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0)
+  cmpl(reg, -1);
+  jcc(Assembler::equal, special_case);
+
+  // handle normal case
+  bind(normal_case);
+  cdql();
+  int idivl_offset = offset();
+  idivl(reg);
+
+  // normal and special case exit
+  bind(special_case);
+
+  return idivl_offset;
+}
+
+
+
+void MacroAssembler::decrementl(Register reg, int value) {
+  if (value == min_jint) {subl(reg, value) ; return; }
+  if (value <  0) { incrementl(reg, -value); return; }
+  if (value == 0) {                        ; return; }
+  if (value == 1 && UseIncDec) { decl(reg) ; return; }
+  /* else */      { subl(reg, value)       ; return; }
+}
+
+void MacroAssembler::decrementl(Address dst, int value) {
+  if (value == min_jint) {subl(dst, value) ; return; }
+  if (value <  0) { incrementl(dst, -value); return; }
+  if (value == 0) {                        ; return; }
+  if (value == 1 && UseIncDec) { decl(dst) ; return; }
+  /* else */      { subl(dst, value)       ; return; }
+}
+
+void MacroAssembler::division_with_shift (Register reg, int shift_value) {
+  assert (shift_value > 0, "illegal shift value");
+  Label _is_positive;
+  testl (reg, reg);
+  jcc (Assembler::positive, _is_positive);
+  int offset = (1 << shift_value) - 1 ;
+
+  if (offset == 1) {
+    incrementl(reg);
+  } else {
+    addl(reg, offset);
+  }
+
+  bind (_is_positive);
+  sarl(reg, shift_value);
+}
+
+void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::divsd(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::divsd(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::divss(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::divss(dst, Address(rscratch1, 0));
+  }
+}
+
+// !defined(COMPILER2) is because of stupid core builds
+#if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2)
+void MacroAssembler::empty_FPU_stack() {
+  if (VM_Version::supports_mmx()) {
+    emms();
+  } else {
+    for (int i = 8; i-- > 0; ) ffree(i);
+  }
+}
+#endif // !LP64 || C1 || !C2
+
+
+// Defines obj, preserves var_size_in_bytes
+void MacroAssembler::eden_allocate(Register obj,
+                                   Register var_size_in_bytes,
+                                   int con_size_in_bytes,
+                                   Register t1,
+                                   Label& slow_case) {
+  assert(obj == rax, "obj must be in rax, for cmpxchg");
+  assert_different_registers(obj, var_size_in_bytes, t1);
+  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
+    jmp(slow_case);
+  } else {
+    Register end = t1;
+    Label retry;
+    bind(retry);
+    ExternalAddress heap_top((address) Universe::heap()->top_addr());
+    movptr(obj, heap_top);
+    if (var_size_in_bytes == noreg) {
+      lea(end, Address(obj, con_size_in_bytes));
+    } else {
+      lea(end, Address(obj, var_size_in_bytes, Address::times_1));
+    }
+    // if end < obj then we wrapped around => object too long => slow case
+    cmpptr(end, obj);
+    jcc(Assembler::below, slow_case);
+    cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
+    jcc(Assembler::above, slow_case);
+    // Compare obj with the top addr, and if still equal, store the new top addr in
+    // end at the address of the top addr pointer. Sets ZF if was equal, and clears
+    // it otherwise. Use lock prefix for atomicity on MPs.
+    locked_cmpxchgptr(end, heap_top);
+    jcc(Assembler::notEqual, retry);
+  }
+}
+
+void MacroAssembler::enter() {
+  push(rbp);
+  mov(rbp, rsp);
+}
+
+// A 5 byte nop that is safe for patching (see patch_verified_entry)
+void MacroAssembler::fat_nop() {
+  if (UseAddressNop) {
+    addr_nop_5();
+  } else {
+    emit_int8(0x26); // es:
+    emit_int8(0x2e); // cs:
+    emit_int8(0x64); // fs:
+    emit_int8(0x65); // gs:
+    emit_int8((unsigned char)0x90);
+  }
+}
+
+void MacroAssembler::fcmp(Register tmp) {
+  fcmp(tmp, 1, true, true);
+}
+
+void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) {
+  assert(!pop_right || pop_left, "usage error");
+  if (VM_Version::supports_cmov()) {
+    assert(tmp == noreg, "unneeded temp");
+    if (pop_left) {
+      fucomip(index);
+    } else {
+      fucomi(index);
+    }
+    if (pop_right) {
+      fpop();
+    }
+  } else {
+    assert(tmp != noreg, "need temp");
+    if (pop_left) {
+      if (pop_right) {
+        fcompp();
+      } else {
+        fcomp(index);
+      }
+    } else {
+      fcom(index);
+    }
+    // convert FPU condition into eflags condition via rax,
+    save_rax(tmp);
+    fwait(); fnstsw_ax();
+    sahf();
+    restore_rax(tmp);
+  }
+  // condition codes set as follows:
+  //
+  // CF (corresponds to C0) if x < y
+  // PF (corresponds to C2) if unordered
+  // ZF (corresponds to C3) if x = y
+}
+
+void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) {
+  fcmp2int(dst, unordered_is_less, 1, true, true);
+}
+
+void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) {
+  fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right);
+  Label L;
+  if (unordered_is_less) {
+    movl(dst, -1);
+    jcc(Assembler::parity, L);
+    jcc(Assembler::below , L);
+    movl(dst, 0);
+    jcc(Assembler::equal , L);
+    increment(dst);
+  } else { // unordered is greater
+    movl(dst, 1);
+    jcc(Assembler::parity, L);
+    jcc(Assembler::above , L);
+    movl(dst, 0);
+    jcc(Assembler::equal , L);
+    decrementl(dst);
+  }
+  bind(L);
+}
+
+void MacroAssembler::fld_d(AddressLiteral src) {
+  fld_d(as_Address(src));
+}
+
+void MacroAssembler::fld_s(AddressLiteral src) {
+  fld_s(as_Address(src));
+}
+
+void MacroAssembler::fld_x(AddressLiteral src) {
+  Assembler::fld_x(as_Address(src));
+}
+
+void MacroAssembler::fldcw(AddressLiteral src) {
+  Assembler::fldcw(as_Address(src));
+}
+
+void MacroAssembler::pow_exp_core_encoding() {
+  // kills rax, rcx, rdx
+  subptr(rsp,sizeof(jdouble));
+  // computes 2^X. Stack: X ...
+  // f2xm1 computes 2^X-1 but only operates on -1<=X<=1. Get int(X) and
+  // keep it on the thread's stack to compute 2^int(X) later
+  // then compute 2^(X-int(X)) as (2^(X-int(X)-1+1)
+  // final result is obtained with: 2^X = 2^int(X) * 2^(X-int(X))
+  fld_s(0);                 // Stack: X X ...
+  frndint();                // Stack: int(X) X ...
+  fsuba(1);                 // Stack: int(X) X-int(X) ...
+  fistp_s(Address(rsp,0));  // move int(X) as integer to thread's stack. Stack: X-int(X) ...
+  f2xm1();                  // Stack: 2^(X-int(X))-1 ...
+  fld1();                   // Stack: 1 2^(X-int(X))-1 ...
+  faddp(1);                 // Stack: 2^(X-int(X))
+  // computes 2^(int(X)): add exponent bias (1023) to int(X), then
+  // shift int(X)+1023 to exponent position.
+  // Exponent is limited to 11 bits if int(X)+1023 does not fit in 11
+  // bits, set result to NaN. 0x000 and 0x7FF are reserved exponent
+  // values so detect them and set result to NaN.
+  movl(rax,Address(rsp,0));
+  movl(rcx, -2048); // 11 bit mask and valid NaN binary encoding
+  addl(rax, 1023);
+  movl(rdx,rax);
+  shll(rax,20);
+  // Check that 0 < int(X)+1023 < 2047. Otherwise set rax to NaN.
+  addl(rdx,1);
+  // Check that 1 < int(X)+1023+1 < 2048
+  // in 3 steps:
+  // 1- (int(X)+1023+1)&-2048 == 0 => 0 <= int(X)+1023+1 < 2048
+  // 2- (int(X)+1023+1)&-2048 != 0
+  // 3- (int(X)+1023+1)&-2048 != 1
+  // Do 2- first because addl just updated the flags.
+  cmov32(Assembler::equal,rax,rcx);
+  cmpl(rdx,1);
+  cmov32(Assembler::equal,rax,rcx);
+  testl(rdx,rcx);
+  cmov32(Assembler::notEqual,rax,rcx);
+  movl(Address(rsp,4),rax);
+  movl(Address(rsp,0),0);
+  fmul_d(Address(rsp,0));   // Stack: 2^X ...
+  addptr(rsp,sizeof(jdouble));
+}
+
+void MacroAssembler::increase_precision() {
+  subptr(rsp, BytesPerWord);
+  fnstcw(Address(rsp, 0));
+  movl(rax, Address(rsp, 0));
+  orl(rax, 0x300);
+  push(rax);
+  fldcw(Address(rsp, 0));
+  pop(rax);
+}
+
+void MacroAssembler::restore_precision() {
+  fldcw(Address(rsp, 0));
+  addptr(rsp, BytesPerWord);
+}
+
+void MacroAssembler::fast_pow() {
+  // computes X^Y = 2^(Y * log2(X))
+  // if fast computation is not possible, result is NaN. Requires
+  // fallback from user of this macro.
+  // increase precision for intermediate steps of the computation
+  increase_precision();
+  fyl2x();                 // Stack: (Y*log2(X)) ...
+  pow_exp_core_encoding(); // Stack: exp(X) ...
+  restore_precision();
+}
+
+void MacroAssembler::fast_exp() {
+  // computes exp(X) = 2^(X * log2(e))
+  // if fast computation is not possible, result is NaN. Requires
+  // fallback from user of this macro.
+  // increase precision for intermediate steps of the computation
+  increase_precision();
+  fldl2e();                // Stack: log2(e) X ...
+  fmulp(1);                // Stack: (X*log2(e)) ...
+  pow_exp_core_encoding(); // Stack: exp(X) ...
+  restore_precision();
+}
+
+void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) {
+  // kills rax, rcx, rdx
+  // pow and exp needs 2 extra registers on the fpu stack.
+  Label slow_case, done;
+  Register tmp = noreg;
+  if (!VM_Version::supports_cmov()) {
+    // fcmp needs a temporary so preserve rdx,
+    tmp = rdx;
+  }
+  Register tmp2 = rax;
+  Register tmp3 = rcx;
+
+  if (is_exp) {
+    // Stack: X
+    fld_s(0);                   // duplicate argument for runtime call. Stack: X X
+    fast_exp();                 // Stack: exp(X) X
+    fcmp(tmp, 0, false, false); // Stack: exp(X) X
+    // exp(X) not equal to itself: exp(X) is NaN go to slow case.
+    jcc(Assembler::parity, slow_case);
+    // get rid of duplicate argument. Stack: exp(X)
+    if (num_fpu_regs_in_use > 0) {
+      fxch();
+      fpop();
+    } else {
+      ffree(1);
+    }
+    jmp(done);
+  } else {
+    // Stack: X Y
+    Label x_negative, y_odd;
+
+    fldz();                     // Stack: 0 X Y
+    fcmp(tmp, 1, true, false);  // Stack: X Y
+    jcc(Assembler::above, x_negative);
+
+    // X >= 0
+
+    fld_s(1);                   // duplicate arguments for runtime call. Stack: Y X Y
+    fld_s(1);                   // Stack: X Y X Y
+    fast_pow();                 // Stack: X^Y X Y
+    fcmp(tmp, 0, false, false); // Stack: X^Y X Y
+    // X^Y not equal to itself: X^Y is NaN go to slow case.
+    jcc(Assembler::parity, slow_case);
+    // get rid of duplicate arguments. Stack: X^Y
+    if (num_fpu_regs_in_use > 0) {
+      fxch(); fpop();
+      fxch(); fpop();
+    } else {
+      ffree(2);
+      ffree(1);
+    }
+    jmp(done);
+
+    // X <= 0
+    bind(x_negative);
+
+    fld_s(1);                   // Stack: Y X Y
+    frndint();                  // Stack: int(Y) X Y
+    fcmp(tmp, 2, false, false); // Stack: int(Y) X Y
+    jcc(Assembler::notEqual, slow_case);
+
+    subptr(rsp, 8);
+
+    // For X^Y, when X < 0, Y has to be an integer and the final
+    // result depends on whether it's odd or even. We just checked
+    // that int(Y) == Y.  We move int(Y) to gp registers as a 64 bit
+    // integer to test its parity. If int(Y) is huge and doesn't fit
+    // in the 64 bit integer range, the integer indefinite value will
+    // end up in the gp registers. Huge numbers are all even, the
+    // integer indefinite number is even so it's fine.
+
+#ifdef ASSERT
+    // Let's check we don't end up with an integer indefinite number
+    // when not expected. First test for huge numbers: check whether
+    // int(Y)+1 == int(Y) which is true for very large numbers and
+    // those are all even. A 64 bit integer is guaranteed to not
+    // overflow for numbers where y+1 != y (when precision is set to
+    // double precision).
+    Label y_not_huge;
+
+    fld1();                     // Stack: 1 int(Y) X Y
+    fadd(1);                    // Stack: 1+int(Y) int(Y) X Y
+
+#ifdef _LP64
+    // trip to memory to force the precision down from double extended
+    // precision
+    fstp_d(Address(rsp, 0));
+    fld_d(Address(rsp, 0));
+#endif
+
+    fcmp(tmp, 1, true, false);  // Stack: int(Y) X Y
+#endif
+
+    // move int(Y) as 64 bit integer to thread's stack
+    fistp_d(Address(rsp,0));    // Stack: X Y
+
+#ifdef ASSERT
+    jcc(Assembler::notEqual, y_not_huge);
+
+    // Y is huge so we know it's even. It may not fit in a 64 bit
+    // integer and we don't want the debug code below to see the
+    // integer indefinite value so overwrite int(Y) on the thread's
+    // stack with 0.
+    movl(Address(rsp, 0), 0);
+    movl(Address(rsp, 4), 0);
+
+    bind(y_not_huge);
+#endif
+
+    fld_s(1);                   // duplicate arguments for runtime call. Stack: Y X Y
+    fld_s(1);                   // Stack: X Y X Y
+    fabs();                     // Stack: abs(X) Y X Y
+    fast_pow();                 // Stack: abs(X)^Y X Y
+    fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y
+    // abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case.
+
+    pop(tmp2);
+    NOT_LP64(pop(tmp3));
+    jcc(Assembler::parity, slow_case);
+
+#ifdef ASSERT
+    // Check that int(Y) is not integer indefinite value (int
+    // overflow). Shouldn't happen because for values that would
+    // overflow, 1+int(Y)==Y which was tested earlier.
+#ifndef _LP64
+    {
+      Label integer;
+      testl(tmp2, tmp2);
+      jcc(Assembler::notZero, integer);
+      cmpl(tmp3, 0x80000000);
+      jcc(Assembler::notZero, integer);
+      STOP("integer indefinite value shouldn't be seen here");
+      bind(integer);
+    }
+#else
+    {
+      Label integer;
+      mov(tmp3, tmp2); // preserve tmp2 for parity check below
+      shlq(tmp3, 1);
+      jcc(Assembler::carryClear, integer);
+      jcc(Assembler::notZero, integer);
+      STOP("integer indefinite value shouldn't be seen here");
+      bind(integer);
+    }
+#endif
+#endif
+
+    // get rid of duplicate arguments. Stack: X^Y
+    if (num_fpu_regs_in_use > 0) {
+      fxch(); fpop();
+      fxch(); fpop();
+    } else {
+      ffree(2);
+      ffree(1);
+    }
+
+    testl(tmp2, 1);
+    jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y
+    // X <= 0, Y even: X^Y = -abs(X)^Y
+
+    fchs();                     // Stack: -abs(X)^Y Y
+    jmp(done);
+  }
+
+  // slow case: runtime call
+  bind(slow_case);
+
+  fpop();                       // pop incorrect result or int(Y)
+
+  fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow),
+                      is_exp ? 1 : 2, num_fpu_regs_in_use);
+
+  // Come here with result in F-TOS
+  bind(done);
+}
+
+void MacroAssembler::fpop() {
+  ffree();
+  fincstp();
+}
+
+void MacroAssembler::fremr(Register tmp) {
+  save_rax(tmp);
+  { Label L;
+    bind(L);
+    fprem();
+    fwait(); fnstsw_ax();
+#ifdef _LP64
+    testl(rax, 0x400);
+    jcc(Assembler::notEqual, L);
+#else
+    sahf();
+    jcc(Assembler::parity, L);
+#endif // _LP64
+  }
+  restore_rax(tmp);
+  // Result is in ST0.
+  // Note: fxch & fpop to get rid of ST1
+  // (otherwise FPU stack could overflow eventually)
+  fxch(1);
+  fpop();
+}
+
+
+void MacroAssembler::incrementl(AddressLiteral dst) {
+  if (reachable(dst)) {
+    incrementl(as_Address(dst));
+  } else {
+    lea(rscratch1, dst);
+    incrementl(Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::incrementl(ArrayAddress dst) {
+  incrementl(as_Address(dst));
+}
+
+void MacroAssembler::incrementl(Register reg, int value) {
+  if (value == min_jint) {addl(reg, value) ; return; }
+  if (value <  0) { decrementl(reg, -value); return; }
+  if (value == 0) {                        ; return; }
+  if (value == 1 && UseIncDec) { incl(reg) ; return; }
+  /* else */      { addl(reg, value)       ; return; }
+}
+
+void MacroAssembler::incrementl(Address dst, int value) {
+  if (value == min_jint) {addl(dst, value) ; return; }
+  if (value <  0) { decrementl(dst, -value); return; }
+  if (value == 0) {                        ; return; }
+  if (value == 1 && UseIncDec) { incl(dst) ; return; }
+  /* else */      { addl(dst, value)       ; return; }
+}
+
+void MacroAssembler::jump(AddressLiteral dst) {
+  if (reachable(dst)) {
+    jmp_literal(dst.target(), dst.rspec());
+  } else {
+    lea(rscratch1, dst);
+    jmp(rscratch1);
+  }
+}
+
+void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) {
+  if (reachable(dst)) {
+    InstructionMark im(this);
+    relocate(dst.reloc());
+    const int short_size = 2;
+    const int long_size = 6;
+    int offs = (intptr_t)dst.target() - ((intptr_t)pc());
+    if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) {
+      // 0111 tttn #8-bit disp
+      emit_int8(0x70 | cc);
+      emit_int8((offs - short_size) & 0xFF);
+    } else {
+      // 0000 1111 1000 tttn #32-bit disp
+      emit_int8(0x0F);
+      emit_int8((unsigned char)(0x80 | cc));
+      emit_int32(offs - long_size);
+    }
+  } else {
+#ifdef ASSERT
+    warning("reversing conditional branch");
+#endif /* ASSERT */
+    Label skip;
+    jccb(reverse[cc], skip);
+    lea(rscratch1, dst);
+    Assembler::jmp(rscratch1);
+    bind(skip);
+  }
+}
+
+void MacroAssembler::ldmxcsr(AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::ldmxcsr(as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::ldmxcsr(Address(rscratch1, 0));
+  }
+}
+
+int MacroAssembler::load_signed_byte(Register dst, Address src) {
+  int off;
+  if (LP64_ONLY(true ||) VM_Version::is_P6()) {
+    off = offset();
+    movsbl(dst, src); // movsxb
+  } else {
+    off = load_unsigned_byte(dst, src);
+    shll(dst, 24);
+    sarl(dst, 24);
+  }
+  return off;
+}
+
+// Note: load_signed_short used to be called load_signed_word.
+// Although the 'w' in x86 opcodes refers to the term "word" in the assembler
+// manual, which means 16 bits, that usage is found nowhere in HotSpot code.
+// The term "word" in HotSpot means a 32- or 64-bit machine word.
+int MacroAssembler::load_signed_short(Register dst, Address src) {
+  int off;
+  if (LP64_ONLY(true ||) VM_Version::is_P6()) {
+    // This is dubious to me since it seems safe to do a signed 16 => 64 bit
+    // version but this is what 64bit has always done. This seems to imply
+    // that users are only using 32bits worth.
+    off = offset();
+    movswl(dst, src); // movsxw
+  } else {
+    off = load_unsigned_short(dst, src);
+    shll(dst, 16);
+    sarl(dst, 16);
+  }
+  return off;
+}
+
+int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
+  // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
+  // and "3.9 Partial Register Penalties", p. 22).
+  int off;
+  if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) {
+    off = offset();
+    movzbl(dst, src); // movzxb
+  } else {
+    xorl(dst, dst);
+    off = offset();
+    movb(dst, src);
+  }
+  return off;
+}
+
+// Note: load_unsigned_short used to be called load_unsigned_word.
+int MacroAssembler::load_unsigned_short(Register dst, Address src) {
+  // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
+  // and "3.9 Partial Register Penalties", p. 22).
+  int off;
+  if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) {
+    off = offset();
+    movzwl(dst, src); // movzxw
+  } else {
+    xorl(dst, dst);
+    off = offset();
+    movw(dst, src);
+  }
+  return off;
+}
+
+void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
+  switch (size_in_bytes) {
+#ifndef _LP64
+  case  8:
+    assert(dst2 != noreg, "second dest register required");
+    movl(dst,  src);
+    movl(dst2, src.plus_disp(BytesPerInt));
+    break;
+#else
+  case  8:  movq(dst, src); break;
+#endif
+  case  4:  movl(dst, src); break;
+  case  2:  is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break;
+  case  1:  is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break;
+  default:  ShouldNotReachHere();
+  }
+}
+
+void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
+  switch (size_in_bytes) {
+#ifndef _LP64
+  case  8:
+    assert(src2 != noreg, "second source register required");
+    movl(dst,                        src);
+    movl(dst.plus_disp(BytesPerInt), src2);
+    break;
+#else
+  case  8:  movq(dst, src); break;
+#endif
+  case  4:  movl(dst, src); break;
+  case  2:  movw(dst, src); break;
+  case  1:  movb(dst, src); break;
+  default:  ShouldNotReachHere();
+  }
+}
+
+void MacroAssembler::mov32(AddressLiteral dst, Register src) {
+  if (reachable(dst)) {
+    movl(as_Address(dst), src);
+  } else {
+    lea(rscratch1, dst);
+    movl(Address(rscratch1, 0), src);
+  }
+}
+
+void MacroAssembler::mov32(Register dst, AddressLiteral src) {
+  if (reachable(src)) {
+    movl(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    movl(dst, Address(rscratch1, 0));
+  }
+}
+
+// C++ bool manipulation
+
+void MacroAssembler::movbool(Register dst, Address src) {
+  if(sizeof(bool) == 1)
+    movb(dst, src);
+  else if(sizeof(bool) == 2)
+    movw(dst, src);
+  else if(sizeof(bool) == 4)
+    movl(dst, src);
+  else
+    // unsupported
+    ShouldNotReachHere();
+}
+
+void MacroAssembler::movbool(Address dst, bool boolconst) {
+  if(sizeof(bool) == 1)
+    movb(dst, (int) boolconst);
+  else if(sizeof(bool) == 2)
+    movw(dst, (int) boolconst);
+  else if(sizeof(bool) == 4)
+    movl(dst, (int) boolconst);
+  else
+    // unsupported
+    ShouldNotReachHere();
+}
+
+void MacroAssembler::movbool(Address dst, Register src) {
+  if(sizeof(bool) == 1)
+    movb(dst, src);
+  else if(sizeof(bool) == 2)
+    movw(dst, src);
+  else if(sizeof(bool) == 4)
+    movl(dst, src);
+  else
+    // unsupported
+    ShouldNotReachHere();
+}
+
+void MacroAssembler::movbyte(ArrayAddress dst, int src) {
+  movb(as_Address(dst), src);
+}
+
+void MacroAssembler::movdl(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    movdl(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    movdl(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    movq(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    movq(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    if (UseXmmLoadAndClearUpper) {
+      movsd (dst, as_Address(src));
+    } else {
+      movlpd(dst, as_Address(src));
+    }
+  } else {
+    lea(rscratch1, src);
+    if (UseXmmLoadAndClearUpper) {
+      movsd (dst, Address(rscratch1, 0));
+    } else {
+      movlpd(dst, Address(rscratch1, 0));
+    }
+  }
+}
+
+void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    movss(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    movss(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::movptr(Register dst, Register src) {
+  LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
+}
+
+void MacroAssembler::movptr(Register dst, Address src) {
+  LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
+}
+
+// src should NEVER be a real pointer. Use AddressLiteral for true pointers
+void MacroAssembler::movptr(Register dst, intptr_t src) {
+  LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src));
+}
+
+void MacroAssembler::movptr(Address dst, Register src) {
+  LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
+}
+
+void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::movdqu(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::movdqu(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::movsd(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::movsd(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::movss(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::movss(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::mulsd(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::mulsd(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::mulss(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::mulss(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::null_check(Register reg, int offset) {
+  if (needs_explicit_null_check(offset)) {
+    // provoke OS NULL exception if reg = NULL by
+    // accessing M[reg] w/o changing any (non-CC) registers
+    // NOTE: cmpl is plenty here to provoke a segv
+    cmpptr(rax, Address(reg, 0));
+    // Note: should probably use testl(rax, Address(reg, 0));
+    //       may be shorter code (however, this version of
+    //       testl needs to be implemented first)
+  } else {
+    // nothing to do, (later) access of M[reg + offset]
+    // will provoke OS NULL exception if reg = NULL
+  }
+}
+
+void MacroAssembler::os_breakpoint() {
+  // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
+  // (e.g., MSVC can't call ps() otherwise)
+  call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
+}
+
+void MacroAssembler::pop_CPU_state() {
+  pop_FPU_state();
+  pop_IU_state();
+}
+
+void MacroAssembler::pop_FPU_state() {
+  NOT_LP64(frstor(Address(rsp, 0));)
+  LP64_ONLY(fxrstor(Address(rsp, 0));)
+  addptr(rsp, FPUStateSizeInWords * wordSize);
+}
+
+void MacroAssembler::pop_IU_state() {
+  popa();
+  LP64_ONLY(addq(rsp, 8));
+  popf();
+}
+
+// Save Integer and Float state
+// Warning: Stack must be 16 byte aligned (64bit)
+void MacroAssembler::push_CPU_state() {
+  push_IU_state();
+  push_FPU_state();
+}
+
+void MacroAssembler::push_FPU_state() {
+  subptr(rsp, FPUStateSizeInWords * wordSize);
+#ifndef _LP64
+  fnsave(Address(rsp, 0));
+  fwait();
+#else
+  fxsave(Address(rsp, 0));
+#endif // LP64
+}
+
+void MacroAssembler::push_IU_state() {
+  // Push flags first because pusha kills them
+  pushf();
+  // Make sure rsp stays 16-byte aligned
+  LP64_ONLY(subq(rsp, 8));
+  pusha();
+}
+
+void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
+  // determine java_thread register
+  if (!java_thread->is_valid()) {
+    java_thread = rdi;
+    get_thread(java_thread);
+  }
+  // we must set sp to zero to clear frame
+  movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
+  if (clear_fp) {
+    movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
+  }
+
+  if (clear_pc)
+    movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
+
+}
+
+void MacroAssembler::restore_rax(Register tmp) {
+  if (tmp == noreg) pop(rax);
+  else if (tmp != rax) mov(rax, tmp);
+}
+
+void MacroAssembler::round_to(Register reg, int modulus) {
+  addptr(reg, modulus - 1);
+  andptr(reg, -modulus);
+}
+
+void MacroAssembler::save_rax(Register tmp) {
+  if (tmp == noreg) push(rax);
+  else if (tmp != rax) mov(tmp, rax);
+}
+
+// Write serialization page so VM thread can do a pseudo remote membar.
+// We use the current thread pointer to calculate a thread specific
+// offset to write to within the page. This minimizes bus traffic
+// due to cache line collision.
+void MacroAssembler::serialize_memory(Register thread, Register tmp) {
+  movl(tmp, thread);
+  shrl(tmp, os::get_serialize_page_shift_count());
+  andl(tmp, (os::vm_page_size() - sizeof(int)));
+
+  Address index(noreg, tmp, Address::times_1);
+  ExternalAddress page(os::get_memory_serialize_page());
+
+  // Size of store must match masking code above
+  movl(as_Address(ArrayAddress(page, index)), tmp);
+}
+
+// Calls to C land
+//
+// When entering C land, the rbp, & rsp of the last Java frame have to be recorded
+// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
+// has to be reset to 0. This is required to allow proper stack traversal.
+void MacroAssembler::set_last_Java_frame(Register java_thread,
+                                         Register last_java_sp,
+                                         Register last_java_fp,
+                                         address  last_java_pc) {
+  // determine java_thread register
+  if (!java_thread->is_valid()) {
+    java_thread = rdi;
+    get_thread(java_thread);
+  }
+  // determine last_java_sp register
+  if (!last_java_sp->is_valid()) {
+    last_java_sp = rsp;
+  }
+
+  // last_java_fp is optional
+
+  if (last_java_fp->is_valid()) {
+    movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp);
+  }
+
+  // last_java_pc is optional
+
+  if (last_java_pc != NULL) {
+    lea(Address(java_thread,
+                 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()),
+        InternalAddress(last_java_pc));
+
+  }
+  movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
+}
+
+void MacroAssembler::shlptr(Register dst, int imm8) {
+  LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8));
+}
+
+void MacroAssembler::shrptr(Register dst, int imm8) {
+  LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8));
+}
+
+void MacroAssembler::sign_extend_byte(Register reg) {
+  if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) {
+    movsbl(reg, reg); // movsxb
+  } else {
+    shll(reg, 24);
+    sarl(reg, 24);
+  }
+}
+
+void MacroAssembler::sign_extend_short(Register reg) {
+  if (LP64_ONLY(true ||) VM_Version::is_P6()) {
+    movswl(reg, reg); // movsxw
+  } else {
+    shll(reg, 16);
+    sarl(reg, 16);
+  }
+}
+
+void MacroAssembler::testl(Register dst, AddressLiteral src) {
+  assert(reachable(src), "Address should be reachable");
+  testl(dst, as_Address(src));
+}
+
+void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::sqrtsd(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::sqrtsd(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::sqrtss(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::sqrtss(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::subsd(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::subsd(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::subss(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::subss(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::ucomisd(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::ucomisd(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::ucomiss(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::ucomiss(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
+  // Used in sign-bit flipping with aligned address.
+  assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
+  if (reachable(src)) {
+    Assembler::xorpd(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::xorpd(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
+  // Used in sign-bit flipping with aligned address.
+  assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
+  if (reachable(src)) {
+    Assembler::xorps(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::xorps(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::pshufb(XMMRegister dst, AddressLiteral src) {
+  // Used in sign-bit flipping with aligned address.
+  bool aligned_adr = (((intptr_t)src.target() & 15) == 0);
+  assert((UseAVX > 0) || aligned_adr, "SSE mode requires address alignment 16 bytes");
+  if (reachable(src)) {
+    Assembler::pshufb(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::pshufb(dst, Address(rscratch1, 0));
+  }
+}
+
+// AVX 3-operands instructions
+
+void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  if (reachable(src)) {
+    vaddsd(dst, nds, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    vaddsd(dst, nds, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  if (reachable(src)) {
+    vaddss(dst, nds, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    vaddss(dst, nds, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
+  if (reachable(src)) {
+    vandpd(dst, nds, as_Address(src), vector256);
+  } else {
+    lea(rscratch1, src);
+    vandpd(dst, nds, Address(rscratch1, 0), vector256);
+  }
+}
+
+void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
+  if (reachable(src)) {
+    vandps(dst, nds, as_Address(src), vector256);
+  } else {
+    lea(rscratch1, src);
+    vandps(dst, nds, Address(rscratch1, 0), vector256);
+  }
+}
+
+void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  if (reachable(src)) {
+    vdivsd(dst, nds, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    vdivsd(dst, nds, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  if (reachable(src)) {
+    vdivss(dst, nds, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    vdivss(dst, nds, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  if (reachable(src)) {
+    vmulsd(dst, nds, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    vmulsd(dst, nds, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  if (reachable(src)) {
+    vmulss(dst, nds, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    vmulss(dst, nds, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  if (reachable(src)) {
+    vsubsd(dst, nds, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    vsubsd(dst, nds, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  if (reachable(src)) {
+    vsubss(dst, nds, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    vsubss(dst, nds, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
+  if (reachable(src)) {
+    vxorpd(dst, nds, as_Address(src), vector256);
+  } else {
+    lea(rscratch1, src);
+    vxorpd(dst, nds, Address(rscratch1, 0), vector256);
+  }
+}
+
+void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
+  if (reachable(src)) {
+    vxorps(dst, nds, as_Address(src), vector256);
+  } else {
+    lea(rscratch1, src);
+    vxorps(dst, nds, Address(rscratch1, 0), vector256);
+  }
+}
+
+
+//////////////////////////////////////////////////////////////////////////////////
+#ifndef SERIALGC
+
+void MacroAssembler::g1_write_barrier_pre(Register obj,
+                                          Register pre_val,
+                                          Register thread,
+                                          Register tmp,
+                                          bool tosca_live,
+                                          bool expand_call) {
+
+  // If expand_call is true then we expand the call_VM_leaf macro
+  // directly to skip generating the check by
+  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
+
+#ifdef _LP64
+  assert(thread == r15_thread, "must be");
+#endif // _LP64
+
+  Label done;
+  Label runtime;
+
+  assert(pre_val != noreg, "check this code");
+
+  if (obj != noreg) {
+    assert_different_registers(obj, pre_val, tmp);
+    assert(pre_val != rax, "check this code");
+  }
+
+  Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_active()));
+  Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_index()));
+  Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_buf()));
+
+
+  // Is marking active?
+  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
+    cmpl(in_progress, 0);
+  } else {
+    assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
+    cmpb(in_progress, 0);
+  }
+  jcc(Assembler::equal, done);
+
+  // Do we need to load the previous value?
+  if (obj != noreg) {
+    load_heap_oop(pre_val, Address(obj, 0));
+  }
+
+  // Is the previous value null?
+  cmpptr(pre_val, (int32_t) NULL_WORD);
+  jcc(Assembler::equal, done);
+
+  // Can we store original value in the thread's buffer?
+  // Is index == 0?
+  // (The index field is typed as size_t.)
+
+  movptr(tmp, index);                   // tmp := *index_adr
+  cmpptr(tmp, 0);                       // tmp == 0?
+  jcc(Assembler::equal, runtime);       // If yes, goto runtime
+
+  subptr(tmp, wordSize);                // tmp := tmp - wordSize
+  movptr(index, tmp);                   // *index_adr := tmp
+  addptr(tmp, buffer);                  // tmp := tmp + *buffer_adr
+
+  // Record the previous value
+  movptr(Address(tmp, 0), pre_val);
+  jmp(done);
+
+  bind(runtime);
+  // save the live input values
+  if(tosca_live) push(rax);
+
+  if (obj != noreg && obj != rax)
+    push(obj);
+
+  if (pre_val != rax)
+    push(pre_val);
+
+  // Calling the runtime using the regular call_VM_leaf mechanism generates
+  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
+  // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
+  //
+  // If we care generating the pre-barrier without a frame (e.g. in the
+  // intrinsified Reference.get() routine) then ebp might be pointing to
+  // the caller frame and so this check will most likely fail at runtime.
+  //
+  // Expanding the call directly bypasses the generation of the check.
+  // So when we do not have have a full interpreter frame on the stack
+  // expand_call should be passed true.
+
+  NOT_LP64( push(thread); )
+
+  if (expand_call) {
+    LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); )
+    pass_arg1(this, thread);
+    pass_arg0(this, pre_val);
+    MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
+  } else {
+    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
+  }
+
+  NOT_LP64( pop(thread); )
+
+  // save the live input values
+  if (pre_val != rax)
+    pop(pre_val);
+
+  if (obj != noreg && obj != rax)
+    pop(obj);
+
+  if(tosca_live) pop(rax);
+
+  bind(done);
+}
+
+void MacroAssembler::g1_write_barrier_post(Register store_addr,
+                                           Register new_val,
+                                           Register thread,
+                                           Register tmp,
+                                           Register tmp2) {
+#ifdef _LP64
+  assert(thread == r15_thread, "must be");
+#endif // _LP64
+
+  Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                       PtrQueue::byte_offset_of_index()));
+  Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                       PtrQueue::byte_offset_of_buf()));
+
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+  Label done;
+  Label runtime;
+
+  // Does store cross heap regions?
+
+  movptr(tmp, store_addr);
+  xorptr(tmp, new_val);
+  shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
+  jcc(Assembler::equal, done);
+
+  // crosses regions, storing NULL?
+
+  cmpptr(new_val, (int32_t) NULL_WORD);
+  jcc(Assembler::equal, done);
+
+  // storing region crossing non-NULL, is card already dirty?
+
+  ExternalAddress cardtable((address) ct->byte_map_base);
+  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+#ifdef _LP64
+  const Register card_addr = tmp;
+
+  movq(card_addr, store_addr);
+  shrq(card_addr, CardTableModRefBS::card_shift);
+
+  lea(tmp2, cardtable);
+
+  // get the address of the card
+  addq(card_addr, tmp2);
+#else
+  const Register card_index = tmp;
+
+  movl(card_index, store_addr);
+  shrl(card_index, CardTableModRefBS::card_shift);
+
+  Address index(noreg, card_index, Address::times_1);
+  const Register card_addr = tmp;
+  lea(card_addr, as_Address(ArrayAddress(cardtable, index)));
+#endif
+  cmpb(Address(card_addr, 0), 0);
+  jcc(Assembler::equal, done);
+
+  // storing a region crossing, non-NULL oop, card is clean.
+  // dirty card and log.
+
+  movb(Address(card_addr, 0), 0);
+
+  cmpl(queue_index, 0);
+  jcc(Assembler::equal, runtime);
+  subl(queue_index, wordSize);
+  movptr(tmp2, buffer);
+#ifdef _LP64
+  movslq(rscratch1, queue_index);
+  addq(tmp2, rscratch1);
+  movq(Address(tmp2, 0), card_addr);
+#else
+  addl(tmp2, queue_index);
+  movl(Address(tmp2, 0), card_index);
+#endif
+  jmp(done);
+
+  bind(runtime);
+  // save the live input values
+  push(store_addr);
+  push(new_val);
+#ifdef _LP64
+  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
+#else
+  push(thread);
+  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
+  pop(thread);
+#endif
+  pop(new_val);
+  pop(store_addr);
+
+  bind(done);
+}
+
+#endif // SERIALGC
+//////////////////////////////////////////////////////////////////////////////////
+
+
+void MacroAssembler::store_check(Register obj) {
+  // Does a store check for the oop in register obj. The content of
+  // register obj is destroyed afterwards.
+  store_check_part_1(obj);
+  store_check_part_2(obj);
+}
+
+void MacroAssembler::store_check(Register obj, Address dst) {
+  store_check(obj);
+}
+
+
+// split the store check operation so that other instructions can be scheduled inbetween
+void MacroAssembler::store_check_part_1(Register obj) {
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
+  shrptr(obj, CardTableModRefBS::card_shift);
+}
+
+void MacroAssembler::store_check_part_2(Register obj) {
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
+  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+
+  // The calculation for byte_map_base is as follows:
+  // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift);
+  // So this essentially converts an address to a displacement and
+  // it will never need to be relocated. On 64bit however the value may be too
+  // large for a 32bit displacement
+
+  intptr_t disp = (intptr_t) ct->byte_map_base;
+  if (is_simm32(disp)) {
+    Address cardtable(noreg, obj, Address::times_1, disp);
+    movb(cardtable, 0);
+  } else {
+    // By doing it as an ExternalAddress disp could be converted to a rip-relative
+    // displacement and done in a single instruction given favorable mapping and
+    // a smarter version of as_Address. Worst case it is two instructions which
+    // is no worse off then loading disp into a register and doing as a simple
+    // Address() as above.
+    // We can't do as ExternalAddress as the only style since if disp == 0 we'll
+    // assert since NULL isn't acceptable in a reloci (see 6644928). In any case
+    // in some cases we'll get a single instruction version.
+
+    ExternalAddress cardtable((address)disp);
+    Address index(noreg, obj, Address::times_1);
+    movb(as_Address(ArrayAddress(cardtable, index)), 0);
+  }
+}
+
+void MacroAssembler::subptr(Register dst, int32_t imm32) {
+  LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32));
+}
+
+// Force generation of a 4 byte immediate value even if it fits into 8bit
+void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) {
+  LP64_ONLY(subq_imm32(dst, imm32)) NOT_LP64(subl_imm32(dst, imm32));
+}
+
+void MacroAssembler::subptr(Register dst, Register src) {
+  LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src));
+}
+
+// C++ bool manipulation
+void MacroAssembler::testbool(Register dst) {
+  if(sizeof(bool) == 1)
+    testb(dst, 0xff);
+  else if(sizeof(bool) == 2) {
+    // testw implementation needed for two byte bools
+    ShouldNotReachHere();
+  } else if(sizeof(bool) == 4)
+    testl(dst, dst);
+  else
+    // unsupported
+    ShouldNotReachHere();
+}
+
+void MacroAssembler::testptr(Register dst, Register src) {
+  LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src));
+}
+
+// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
+void MacroAssembler::tlab_allocate(Register obj,
+                                   Register var_size_in_bytes,
+                                   int con_size_in_bytes,
+                                   Register t1,
+                                   Register t2,
+                                   Label& slow_case) {
+  assert_different_registers(obj, t1, t2);
+  assert_different_registers(obj, var_size_in_bytes, t1);
+  Register end = t2;
+  Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread);
+
+  verify_tlab();
+
+  NOT_LP64(get_thread(thread));
+
+  movptr(obj, Address(thread, JavaThread::tlab_top_offset()));
+  if (var_size_in_bytes == noreg) {
+    lea(end, Address(obj, con_size_in_bytes));
+  } else {
+    lea(end, Address(obj, var_size_in_bytes, Address::times_1));
+  }
+  cmpptr(end, Address(thread, JavaThread::tlab_end_offset()));
+  jcc(Assembler::above, slow_case);
+
+  // update the tlab top pointer
+  movptr(Address(thread, JavaThread::tlab_top_offset()), end);
+
+  // recover var_size_in_bytes if necessary
+  if (var_size_in_bytes == end) {
+    subptr(var_size_in_bytes, obj);
+  }
+  verify_tlab();
+}
+
+// Preserves rbx, and rdx.
+Register MacroAssembler::tlab_refill(Label& retry,
+                                     Label& try_eden,
+                                     Label& slow_case) {
+  Register top = rax;
+  Register t1  = rcx;
+  Register t2  = rsi;
+  Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread);
+  assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx);
+  Label do_refill, discard_tlab;
+
+  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
+    // No allocation in the shared eden.
+    jmp(slow_case);
+  }
+
+  NOT_LP64(get_thread(thread_reg));
+
+  movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
+  movptr(t1,  Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
+
+  // calculate amount of free space
+  subptr(t1, top);
+  shrptr(t1, LogHeapWordSize);
+
+  // Retain tlab and allocate object in shared space if
+  // the amount free in the tlab is too large to discard.
+  cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())));
+  jcc(Assembler::lessEqual, discard_tlab);
+
+  // Retain
+  // %%% yuck as movptr...
+  movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment());
+  addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2);
+  if (TLABStats) {
+    // increment number of slow_allocations
+    addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1);
+  }
+  jmp(try_eden);
+
+  bind(discard_tlab);
+  if (TLABStats) {
+    // increment number of refills
+    addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1);
+    // accumulate wastage -- t1 is amount free in tlab
+    addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1);
+  }
+
+  // if tlab is currently allocated (top or end != null) then
+  // fill [top, end + alignment_reserve) with array object
+  testptr(top, top);
+  jcc(Assembler::zero, do_refill);
+
+  // set up the mark word
+  movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2));
+  // set the length to the remaining space
+  subptr(t1, typeArrayOopDesc::header_size(T_INT));
+  addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve());
+  shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint)));
+  movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1);
+  // set klass to intArrayKlass
+  // dubious reloc why not an oop reloc?
+  movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr()));
+  // store klass last.  concurrent gcs assumes klass length is valid if
+  // klass field is not null.
+  store_klass(top, t1);
+
+  movptr(t1, top);
+  subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
+  incr_allocated_bytes(thread_reg, t1, 0);
+
+  // refill the tlab with an eden allocation
+  bind(do_refill);
+  movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
+  shlptr(t1, LogHeapWordSize);
+  // allocate new tlab, address returned in top
+  eden_allocate(top, t1, 0, t2, slow_case);
+
+  // Check that t1 was preserved in eden_allocate.
+#ifdef ASSERT
+  if (UseTLAB) {
+    Label ok;
+    Register tsize = rsi;
+    assert_different_registers(tsize, thread_reg, t1);
+    push(tsize);
+    movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
+    shlptr(tsize, LogHeapWordSize);
+    cmpptr(t1, tsize);
+    jcc(Assembler::equal, ok);
+    STOP("assert(t1 != tlab size)");
+    should_not_reach_here();
+
+    bind(ok);
+    pop(tsize);
+  }
+#endif
+  movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top);
+  movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top);
+  addptr(top, t1);
+  subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
+  movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top);
+  verify_tlab();
+  jmp(retry);
+
+  return thread_reg; // for use by caller
+}
+
+void MacroAssembler::incr_allocated_bytes(Register thread,
+                                          Register var_size_in_bytes,
+                                          int con_size_in_bytes,
+                                          Register t1) {
+  if (!thread->is_valid()) {
+#ifdef _LP64
+    thread = r15_thread;
+#else
+    assert(t1->is_valid(), "need temp reg");
+    thread = t1;
+    get_thread(thread);
+#endif
+  }
+
+#ifdef _LP64
+  if (var_size_in_bytes->is_valid()) {
+    addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
+  } else {
+    addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
+  }
+#else
+  if (var_size_in_bytes->is_valid()) {
+    addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
+  } else {
+    addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
+  }
+  adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0);
+#endif
+}
+
+void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) {
+  pusha();
+
+  // if we are coming from c1, xmm registers may be live
+  int off = 0;
+  if (UseSSE == 1)  {
+    subptr(rsp, sizeof(jdouble)*8);
+    movflt(Address(rsp,off++*sizeof(jdouble)),xmm0);
+    movflt(Address(rsp,off++*sizeof(jdouble)),xmm1);
+    movflt(Address(rsp,off++*sizeof(jdouble)),xmm2);
+    movflt(Address(rsp,off++*sizeof(jdouble)),xmm3);
+    movflt(Address(rsp,off++*sizeof(jdouble)),xmm4);
+    movflt(Address(rsp,off++*sizeof(jdouble)),xmm5);
+    movflt(Address(rsp,off++*sizeof(jdouble)),xmm6);
+    movflt(Address(rsp,off++*sizeof(jdouble)),xmm7);
+  } else if (UseSSE >= 2)  {
+#ifdef COMPILER2
+    if (MaxVectorSize > 16) {
+      assert(UseAVX > 0, "256bit vectors are supported only with AVX");
+      // Save upper half of YMM registes
+      subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
+      vextractf128h(Address(rsp,  0),xmm0);
+      vextractf128h(Address(rsp, 16),xmm1);
+      vextractf128h(Address(rsp, 32),xmm2);
+      vextractf128h(Address(rsp, 48),xmm3);
+      vextractf128h(Address(rsp, 64),xmm4);
+      vextractf128h(Address(rsp, 80),xmm5);
+      vextractf128h(Address(rsp, 96),xmm6);
+      vextractf128h(Address(rsp,112),xmm7);
+#ifdef _LP64
+      vextractf128h(Address(rsp,128),xmm8);
+      vextractf128h(Address(rsp,144),xmm9);
+      vextractf128h(Address(rsp,160),xmm10);
+      vextractf128h(Address(rsp,176),xmm11);
+      vextractf128h(Address(rsp,192),xmm12);
+      vextractf128h(Address(rsp,208),xmm13);
+      vextractf128h(Address(rsp,224),xmm14);
+      vextractf128h(Address(rsp,240),xmm15);
+#endif
+    }
+#endif
+    // Save whole 128bit (16 bytes) XMM regiters
+    subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
+    movdqu(Address(rsp,off++*16),xmm0);
+    movdqu(Address(rsp,off++*16),xmm1);
+    movdqu(Address(rsp,off++*16),xmm2);
+    movdqu(Address(rsp,off++*16),xmm3);
+    movdqu(Address(rsp,off++*16),xmm4);
+    movdqu(Address(rsp,off++*16),xmm5);
+    movdqu(Address(rsp,off++*16),xmm6);
+    movdqu(Address(rsp,off++*16),xmm7);
+#ifdef _LP64
+    movdqu(Address(rsp,off++*16),xmm8);
+    movdqu(Address(rsp,off++*16),xmm9);
+    movdqu(Address(rsp,off++*16),xmm10);
+    movdqu(Address(rsp,off++*16),xmm11);
+    movdqu(Address(rsp,off++*16),xmm12);
+    movdqu(Address(rsp,off++*16),xmm13);
+    movdqu(Address(rsp,off++*16),xmm14);
+    movdqu(Address(rsp,off++*16),xmm15);
+#endif
+  }
+
+  // Preserve registers across runtime call
+  int incoming_argument_and_return_value_offset = -1;
+  if (num_fpu_regs_in_use > 1) {
+    // Must preserve all other FPU regs (could alternatively convert
+    // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash
+    // FPU state, but can not trust C compiler)
+    NEEDS_CLEANUP;
+    // NOTE that in this case we also push the incoming argument(s) to
+    // the stack and restore it later; we also use this stack slot to
+    // hold the return value from dsin, dcos etc.
+    for (int i = 0; i < num_fpu_regs_in_use; i++) {
+      subptr(rsp, sizeof(jdouble));
+      fstp_d(Address(rsp, 0));
+    }
+    incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
+    for (int i = nb_args-1; i >= 0; i--) {
+      fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble)));
+    }
+  }
+
+  subptr(rsp, nb_args*sizeof(jdouble));
+  for (int i = 0; i < nb_args; i++) {
+    fstp_d(Address(rsp, i*sizeof(jdouble)));
+  }
+
+#ifdef _LP64
+  if (nb_args > 0) {
+    movdbl(xmm0, Address(rsp, 0));
+  }
+  if (nb_args > 1) {
+    movdbl(xmm1, Address(rsp, sizeof(jdouble)));
+  }
+  assert(nb_args <= 2, "unsupported number of args");
+#endif // _LP64
+
+  // NOTE: we must not use call_VM_leaf here because that requires a
+  // complete interpreter frame in debug mode -- same bug as 4387334
+  // MacroAssembler::call_VM_leaf_base is perfectly safe and will
+  // do proper 64bit abi
+
+  NEEDS_CLEANUP;
+  // Need to add stack banging before this runtime call if it needs to
+  // be taken; however, there is no generic stack banging routine at
+  // the MacroAssembler level
+
+  MacroAssembler::call_VM_leaf_base(runtime_entry, 0);
+
+#ifdef _LP64
+  movsd(Address(rsp, 0), xmm0);
+  fld_d(Address(rsp, 0));
+#endif // _LP64
+  addptr(rsp, sizeof(jdouble) * nb_args);
+  if (num_fpu_regs_in_use > 1) {
+    // Must save return value to stack and then restore entire FPU
+    // stack except incoming arguments
+    fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
+    for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) {
+      fld_d(Address(rsp, 0));
+      addptr(rsp, sizeof(jdouble));
+    }
+    fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble)));
+    addptr(rsp, sizeof(jdouble) * nb_args);
+  }
+
+  off = 0;
+  if (UseSSE == 1)  {
+    movflt(xmm0, Address(rsp,off++*sizeof(jdouble)));
+    movflt(xmm1, Address(rsp,off++*sizeof(jdouble)));
+    movflt(xmm2, Address(rsp,off++*sizeof(jdouble)));
+    movflt(xmm3, Address(rsp,off++*sizeof(jdouble)));
+    movflt(xmm4, Address(rsp,off++*sizeof(jdouble)));
+    movflt(xmm5, Address(rsp,off++*sizeof(jdouble)));
+    movflt(xmm6, Address(rsp,off++*sizeof(jdouble)));
+    movflt(xmm7, Address(rsp,off++*sizeof(jdouble)));
+    addptr(rsp, sizeof(jdouble)*8);
+  } else if (UseSSE >= 2)  {
+    // Restore whole 128bit (16 bytes) XMM regiters
+    movdqu(xmm0, Address(rsp,off++*16));
+    movdqu(xmm1, Address(rsp,off++*16));
+    movdqu(xmm2, Address(rsp,off++*16));
+    movdqu(xmm3, Address(rsp,off++*16));
+    movdqu(xmm4, Address(rsp,off++*16));
+    movdqu(xmm5, Address(rsp,off++*16));
+    movdqu(xmm6, Address(rsp,off++*16));
+    movdqu(xmm7, Address(rsp,off++*16));
+#ifdef _LP64
+    movdqu(xmm8, Address(rsp,off++*16));
+    movdqu(xmm9, Address(rsp,off++*16));
+    movdqu(xmm10, Address(rsp,off++*16));
+    movdqu(xmm11, Address(rsp,off++*16));
+    movdqu(xmm12, Address(rsp,off++*16));
+    movdqu(xmm13, Address(rsp,off++*16));
+    movdqu(xmm14, Address(rsp,off++*16));
+    movdqu(xmm15, Address(rsp,off++*16));
+#endif
+    addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
+#ifdef COMPILER2
+    if (MaxVectorSize > 16) {
+      // Restore upper half of YMM registes.
+      vinsertf128h(xmm0, Address(rsp,  0));
+      vinsertf128h(xmm1, Address(rsp, 16));
+      vinsertf128h(xmm2, Address(rsp, 32));
+      vinsertf128h(xmm3, Address(rsp, 48));
+      vinsertf128h(xmm4, Address(rsp, 64));
+      vinsertf128h(xmm5, Address(rsp, 80));
+      vinsertf128h(xmm6, Address(rsp, 96));
+      vinsertf128h(xmm7, Address(rsp,112));
+#ifdef _LP64
+      vinsertf128h(xmm8, Address(rsp,128));
+      vinsertf128h(xmm9, Address(rsp,144));
+      vinsertf128h(xmm10, Address(rsp,160));
+      vinsertf128h(xmm11, Address(rsp,176));
+      vinsertf128h(xmm12, Address(rsp,192));
+      vinsertf128h(xmm13, Address(rsp,208));
+      vinsertf128h(xmm14, Address(rsp,224));
+      vinsertf128h(xmm15, Address(rsp,240));
+#endif
+      addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
+    }
+#endif
+  }
+  popa();
+}
+
+static const double     pi_4 =  0.7853981633974483;
+
+void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
+  // A hand-coded argument reduction for values in fabs(pi/4, pi/2)
+  // was attempted in this code; unfortunately it appears that the
+  // switch to 80-bit precision and back causes this to be
+  // unprofitable compared with simply performing a runtime call if
+  // the argument is out of the (-pi/4, pi/4) range.
+
+  Register tmp = noreg;
+  if (!VM_Version::supports_cmov()) {
+    // fcmp needs a temporary so preserve rbx,
+    tmp = rbx;
+    push(tmp);
+  }
+
+  Label slow_case, done;
+
+  ExternalAddress pi4_adr = (address)&pi_4;
+  if (reachable(pi4_adr)) {
+    // x ?<= pi/4
+    fld_d(pi4_adr);
+    fld_s(1);                // Stack:  X  PI/4  X
+    fabs();                  // Stack: |X| PI/4  X
+    fcmp(tmp);
+    jcc(Assembler::above, slow_case);
+
+    // fastest case: -pi/4 <= x <= pi/4
+    switch(trig) {
+    case 's':
+      fsin();
+      break;
+    case 'c':
+      fcos();
+      break;
+    case 't':
+      ftan();
+      break;
+    default:
+      assert(false, "bad intrinsic");
+      break;
+    }
+    jmp(done);
+  }
+
+  // slow case: runtime call
+  bind(slow_case);
+
+  switch(trig) {
+  case 's':
+    {
+      fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use);
+    }
+    break;
+  case 'c':
+    {
+      fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use);
+    }
+    break;
+  case 't':
+    {
+      fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use);
+    }
+    break;
+  default:
+    assert(false, "bad intrinsic");
+    break;
+  }
+
+  // Come here with result in F-TOS
+  bind(done);
+
+  if (tmp != noreg) {
+    pop(tmp);
+  }
+}
+
+
+// Look up the method for a megamorphic invokeinterface call.
+// The target method is determined by <intf_klass, itable_index>.
+// The receiver klass is in recv_klass.
+// On success, the result will be in method_result, and execution falls through.
+// On failure, execution transfers to the given label.
+void MacroAssembler::lookup_interface_method(Register recv_klass,
+                                             Register intf_klass,
+                                             RegisterOrConstant itable_index,
+                                             Register method_result,
+                                             Register scan_temp,
+                                             Label& L_no_such_interface) {
+  assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
+  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
+         "caller must use same register for non-constant itable index as for method");
+
+  // Compute start of first itableOffsetEntry (which is at the end of the vtable)
+  int vtable_base = InstanceKlass::vtable_start_offset() * wordSize;
+  int itentry_off = itableMethodEntry::method_offset_in_bytes();
+  int scan_step   = itableOffsetEntry::size() * wordSize;
+  int vte_size    = vtableEntry::size() * wordSize;
+  Address::ScaleFactor times_vte_scale = Address::times_ptr;
+  assert(vte_size == wordSize, "else adjust times_vte_scale");
+
+  movl(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize));
+
+  // %%% Could store the aligned, prescaled offset in the klassoop.
+  lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base));
+  if (HeapWordsPerLong > 1) {
+    // Round up to align_object_offset boundary
+    // see code for InstanceKlass::start_of_itable!
+    round_to(scan_temp, BytesPerLong);
+  }
+
+  // Adjust recv_klass by scaled itable_index, so we can free itable_index.
+  assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
+  lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off));
+
+  // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
+  //   if (scan->interface() == intf) {
+  //     result = (klass + scan->offset() + itable_index);
+  //   }
+  // }
+  Label search, found_method;
+
+  for (int peel = 1; peel >= 0; peel--) {
+    movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
+    cmpptr(intf_klass, method_result);
+
+    if (peel) {
+      jccb(Assembler::equal, found_method);
+    } else {
+      jccb(Assembler::notEqual, search);
+      // (invert the test to fall through to found_method...)
+    }
+
+    if (!peel)  break;
+
+    bind(search);
+
+    // Check that the previous entry is non-null.  A null entry means that
+    // the receiver class doesn't implement the interface, and wasn't the
+    // same as when the caller was compiled.
+    testptr(method_result, method_result);
+    jcc(Assembler::zero, L_no_such_interface);
+    addptr(scan_temp, scan_step);
+  }
+
+  bind(found_method);
+
+  // Got a hit.
+  movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
+  movptr(method_result, Address(recv_klass, scan_temp, Address::times_1));
+}
+
+
+// virtual method calling
+void MacroAssembler::lookup_virtual_method(Register recv_klass,
+                                           RegisterOrConstant vtable_index,
+                                           Register method_result) {
+  const int base = InstanceKlass::vtable_start_offset() * wordSize;
+  assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
+  Address vtable_entry_addr(recv_klass,
+                            vtable_index, Address::times_ptr,
+                            base + vtableEntry::method_offset_in_bytes());
+  movptr(method_result, vtable_entry_addr);
+}
+
+
+void MacroAssembler::check_klass_subtype(Register sub_klass,
+                           Register super_klass,
+                           Register temp_reg,
+                           Label& L_success) {
+  Label L_failure;
+  check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
+  check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
+  bind(L_failure);
+}
+
+
+void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
+                                                   Register super_klass,
+                                                   Register temp_reg,
+                                                   Label* L_success,
+                                                   Label* L_failure,
+                                                   Label* L_slow_path,
+                                        RegisterOrConstant super_check_offset) {
+  assert_different_registers(sub_klass, super_klass, temp_reg);
+  bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
+  if (super_check_offset.is_register()) {
+    assert_different_registers(sub_klass, super_klass,
+                               super_check_offset.as_register());
+  } else if (must_load_sco) {
+    assert(temp_reg != noreg, "supply either a temp or a register offset");
+  }
+
+  Label L_fallthrough;
+  int label_nulls = 0;
+  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
+  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
+  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
+  assert(label_nulls <= 1, "at most one NULL in the batch");
+
+  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
+  int sco_offset = in_bytes(Klass::super_check_offset_offset());
+  Address super_check_offset_addr(super_klass, sco_offset);
+
+  // Hacked jcc, which "knows" that L_fallthrough, at least, is in
+  // range of a jccb.  If this routine grows larger, reconsider at
+  // least some of these.
+#define local_jcc(assembler_cond, label)                                \
+  if (&(label) == &L_fallthrough)  jccb(assembler_cond, label);         \
+  else                             jcc( assembler_cond, label) /*omit semi*/
+
+  // Hacked jmp, which may only be used just before L_fallthrough.
+#define final_jmp(label)                                                \
+  if (&(label) == &L_fallthrough) { /*do nothing*/ }                    \
+  else                            jmp(label)                /*omit semi*/
+
+  // If the pointers are equal, we are done (e.g., String[] elements).
+  // This self-check enables sharing of secondary supertype arrays among
+  // non-primary types such as array-of-interface.  Otherwise, each such
+  // type would need its own customized SSA.
+  // We move this check to the front of the fast path because many
+  // type checks are in fact trivially successful in this manner,
+  // so we get a nicely predicted branch right at the start of the check.
+  cmpptr(sub_klass, super_klass);
+  local_jcc(Assembler::equal, *L_success);
+
+  // Check the supertype display:
+  if (must_load_sco) {
+    // Positive movl does right thing on LP64.
+    movl(temp_reg, super_check_offset_addr);
+    super_check_offset = RegisterOrConstant(temp_reg);
+  }
+  Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
+  cmpptr(super_klass, super_check_addr); // load displayed supertype
+
+  // This check has worked decisively for primary supers.
+  // Secondary supers are sought in the super_cache ('super_cache_addr').
+  // (Secondary supers are interfaces and very deeply nested subtypes.)
+  // This works in the same check above because of a tricky aliasing
+  // between the super_cache and the primary super display elements.
+  // (The 'super_check_addr' can address either, as the case requires.)
+  // Note that the cache is updated below if it does not help us find
+  // what we need immediately.
+  // So if it was a primary super, we can just fail immediately.
+  // Otherwise, it's the slow path for us (no success at this point).
+
+  if (super_check_offset.is_register()) {
+    local_jcc(Assembler::equal, *L_success);
+    cmpl(super_check_offset.as_register(), sc_offset);
+    if (L_failure == &L_fallthrough) {
+      local_jcc(Assembler::equal, *L_slow_path);
+    } else {
+      local_jcc(Assembler::notEqual, *L_failure);
+      final_jmp(*L_slow_path);
+    }
+  } else if (super_check_offset.as_constant() == sc_offset) {
+    // Need a slow path; fast failure is impossible.
+    if (L_slow_path == &L_fallthrough) {
+      local_jcc(Assembler::equal, *L_success);
+    } else {
+      local_jcc(Assembler::notEqual, *L_slow_path);
+      final_jmp(*L_success);
+    }
+  } else {
+    // No slow path; it's a fast decision.
+    if (L_failure == &L_fallthrough) {
+      local_jcc(Assembler::equal, *L_success);
+    } else {
+      local_jcc(Assembler::notEqual, *L_failure);
+      final_jmp(*L_success);
+    }
+  }
+
+  bind(L_fallthrough);
+
+#undef local_jcc
+#undef final_jmp
+}
+
+
+void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
+                                                   Register super_klass,
+                                                   Register temp_reg,
+                                                   Register temp2_reg,
+                                                   Label* L_success,
+                                                   Label* L_failure,
+                                                   bool set_cond_codes) {
+  assert_different_registers(sub_klass, super_klass, temp_reg);
+  if (temp2_reg != noreg)
+    assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
+#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
+
+  Label L_fallthrough;
+  int label_nulls = 0;
+  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
+  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
+  assert(label_nulls <= 1, "at most one NULL in the batch");
+
+  // a couple of useful fields in sub_klass:
+  int ss_offset = in_bytes(Klass::secondary_supers_offset());
+  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
+  Address secondary_supers_addr(sub_klass, ss_offset);
+  Address super_cache_addr(     sub_klass, sc_offset);
+
+  // Do a linear scan of the secondary super-klass chain.
+  // This code is rarely used, so simplicity is a virtue here.
+  // The repne_scan instruction uses fixed registers, which we must spill.
+  // Don't worry too much about pre-existing connections with the input regs.
+
+  assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super)
+  assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter)
+
+  // Get super_klass value into rax (even if it was in rdi or rcx).
+  bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false;
+  if (super_klass != rax || UseCompressedOops) {
+    if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; }
+    mov(rax, super_klass);
+  }
+  if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; }
+  if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; }
+
+#ifndef PRODUCT
+  int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
+  ExternalAddress pst_counter_addr((address) pst_counter);
+  NOT_LP64(  incrementl(pst_counter_addr) );
+  LP64_ONLY( lea(rcx, pst_counter_addr) );
+  LP64_ONLY( incrementl(Address(rcx, 0)) );
+#endif //PRODUCT
+
+  // We will consult the secondary-super array.
+  movptr(rdi, secondary_supers_addr);
+  // Load the array length.  (Positive movl does right thing on LP64.)
+  movl(rcx, Address(rdi, Array<Klass*>::length_offset_in_bytes()));
+  // Skip to start of data.
+  addptr(rdi, Array<Klass*>::base_offset_in_bytes());
+
+  // Scan RCX words at [RDI] for an occurrence of RAX.
+  // Set NZ/Z based on last compare.
+  // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does
+  // not change flags (only scas instruction which is repeated sets flags).
+  // Set Z = 0 (not equal) before 'repne' to indicate that class was not found.
+
+    testptr(rax,rax); // Set Z = 0
+    repne_scan();
+
+  // Unspill the temp. registers:
+  if (pushed_rdi)  pop(rdi);
+  if (pushed_rcx)  pop(rcx);
+  if (pushed_rax)  pop(rax);
+
+  if (set_cond_codes) {
+    // Special hack for the AD files:  rdi is guaranteed non-zero.
+    assert(!pushed_rdi, "rdi must be left non-NULL");
+    // Also, the condition codes are properly set Z/NZ on succeed/failure.
+  }
+
+  if (L_failure == &L_fallthrough)
+        jccb(Assembler::notEqual, *L_failure);
+  else  jcc(Assembler::notEqual, *L_failure);
+
+  // Success.  Cache the super we found and proceed in triumph.
+  movptr(super_cache_addr, super_klass);
+
+  if (L_success != &L_fallthrough) {
+    jmp(*L_success);
+  }
+
+#undef IS_A_TEMP
+
+  bind(L_fallthrough);
+}
+
+
+void MacroAssembler::cmov32(Condition cc, Register dst, Address src) {
+  if (VM_Version::supports_cmov()) {
+    cmovl(cc, dst, src);
+  } else {
+    Label L;
+    jccb(negate_condition(cc), L);
+    movl(dst, src);
+    bind(L);
+  }
+}
+
+void MacroAssembler::cmov32(Condition cc, Register dst, Register src) {
+  if (VM_Version::supports_cmov()) {
+    cmovl(cc, dst, src);
+  } else {
+    Label L;
+    jccb(negate_condition(cc), L);
+    movl(dst, src);
+    bind(L);
+  }
+}
+
+void MacroAssembler::verify_oop(Register reg, const char* s) {
+  if (!VerifyOops) return;
+
+  // Pass register number to verify_oop_subroutine
+  char* b = new char[strlen(s) + 50];
+  sprintf(b, "verify_oop: %s: %s", reg->name(), s);
+  BLOCK_COMMENT("verify_oop {");
+#ifdef _LP64
+  push(rscratch1);                    // save r10, trashed by movptr()
+#endif
+  push(rax);                          // save rax,
+  push(reg);                          // pass register argument
+  ExternalAddress buffer((address) b);
+  // avoid using pushptr, as it modifies scratch registers
+  // and our contract is not to modify anything
+  movptr(rax, buffer.addr());
+  push(rax);
+  // call indirectly to solve generation ordering problem
+  movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
+  call(rax);
+  // Caller pops the arguments (oop, message) and restores rax, r10
+  BLOCK_COMMENT("} verify_oop");
+}
+
+
+RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
+                                                      Register tmp,
+                                                      int offset) {
+  intptr_t value = *delayed_value_addr;
+  if (value != 0)
+    return RegisterOrConstant(value + offset);
+
+  // load indirectly to solve generation ordering problem
+  movptr(tmp, ExternalAddress((address) delayed_value_addr));
+
+#ifdef ASSERT
+  { Label L;
+    testptr(tmp, tmp);
+    if (WizardMode) {
+      jcc(Assembler::notZero, L);
+      char* buf = new char[40];
+      sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]);
+      STOP(buf);
+    } else {
+      jccb(Assembler::notZero, L);
+      hlt();
+    }
+    bind(L);
+  }
+#endif
+
+  if (offset != 0)
+    addptr(tmp, offset);
+
+  return RegisterOrConstant(tmp);
+}
+
+
+Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
+                                         int extra_slot_offset) {
+  // cf. TemplateTable::prepare_invoke(), if (load_receiver).
+  int stackElementSize = Interpreter::stackElementSize;
+  int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
+#ifdef ASSERT
+  int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
+  assert(offset1 - offset == stackElementSize, "correct arithmetic");
+#endif
+  Register             scale_reg    = noreg;
+  Address::ScaleFactor scale_factor = Address::no_scale;
+  if (arg_slot.is_constant()) {
+    offset += arg_slot.as_constant() * stackElementSize;
+  } else {
+    scale_reg    = arg_slot.as_register();
+    scale_factor = Address::times(stackElementSize);
+  }
+  offset += wordSize;           // return PC is on stack
+  return Address(rsp, scale_reg, scale_factor, offset);
+}
+
+
+void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
+  if (!VerifyOops) return;
+
+  // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord);
+  // Pass register number to verify_oop_subroutine
+  char* b = new char[strlen(s) + 50];
+  sprintf(b, "verify_oop_addr: %s", s);
+
+#ifdef _LP64
+  push(rscratch1);                    // save r10, trashed by movptr()
+#endif
+  push(rax);                          // save rax,
+  // addr may contain rsp so we will have to adjust it based on the push
+  // we just did (and on 64 bit we do two pushes)
+  // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
+  // stores rax into addr which is backwards of what was intended.
+  if (addr.uses(rsp)) {
+    lea(rax, addr);
+    pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord));
+  } else {
+    pushptr(addr);
+  }
+
+  ExternalAddress buffer((address) b);
+  // pass msg argument
+  // avoid using pushptr, as it modifies scratch registers
+  // and our contract is not to modify anything
+  movptr(rax, buffer.addr());
+  push(rax);
+
+  // call indirectly to solve generation ordering problem
+  movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
+  call(rax);
+  // Caller pops the arguments (addr, message) and restores rax, r10.
+}
+
+void MacroAssembler::verify_tlab() {
+#ifdef ASSERT
+  if (UseTLAB && VerifyOops) {
+    Label next, ok;
+    Register t1 = rsi;
+    Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread);
+
+    push(t1);
+    NOT_LP64(push(thread_reg));
+    NOT_LP64(get_thread(thread_reg));
+
+    movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
+    cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
+    jcc(Assembler::aboveEqual, next);
+    STOP("assert(top >= start)");
+    should_not_reach_here();
+
+    bind(next);
+    movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
+    cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
+    jcc(Assembler::aboveEqual, ok);
+    STOP("assert(top <= end)");
+    should_not_reach_here();
+
+    bind(ok);
+    NOT_LP64(pop(thread_reg));
+    pop(t1);
+  }
+#endif
+}
+
+class ControlWord {
+ public:
+  int32_t _value;
+
+  int  rounding_control() const        { return  (_value >> 10) & 3      ; }
+  int  precision_control() const       { return  (_value >>  8) & 3      ; }
+  bool precision() const               { return ((_value >>  5) & 1) != 0; }
+  bool underflow() const               { return ((_value >>  4) & 1) != 0; }
+  bool overflow() const                { return ((_value >>  3) & 1) != 0; }
+  bool zero_divide() const             { return ((_value >>  2) & 1) != 0; }
+  bool denormalized() const            { return ((_value >>  1) & 1) != 0; }
+  bool invalid() const                 { return ((_value >>  0) & 1) != 0; }
+
+  void print() const {
+    // rounding control
+    const char* rc;
+    switch (rounding_control()) {
+      case 0: rc = "round near"; break;
+      case 1: rc = "round down"; break;
+      case 2: rc = "round up  "; break;
+      case 3: rc = "chop      "; break;
+    };
+    // precision control
+    const char* pc;
+    switch (precision_control()) {
+      case 0: pc = "24 bits "; break;
+      case 1: pc = "reserved"; break;
+      case 2: pc = "53 bits "; break;
+      case 3: pc = "64 bits "; break;
+    };
+    // flags
+    char f[9];
+    f[0] = ' ';
+    f[1] = ' ';
+    f[2] = (precision   ()) ? 'P' : 'p';
+    f[3] = (underflow   ()) ? 'U' : 'u';
+    f[4] = (overflow    ()) ? 'O' : 'o';
+    f[5] = (zero_divide ()) ? 'Z' : 'z';
+    f[6] = (denormalized()) ? 'D' : 'd';
+    f[7] = (invalid     ()) ? 'I' : 'i';
+    f[8] = '\x0';
+    // output
+    printf("%04x  masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc);
+  }
+
+};
+
+class StatusWord {
+ public:
+  int32_t _value;
+
+  bool busy() const                    { return ((_value >> 15) & 1) != 0; }
+  bool C3() const                      { return ((_value >> 14) & 1) != 0; }
+  bool C2() const                      { return ((_value >> 10) & 1) != 0; }
+  bool C1() const                      { return ((_value >>  9) & 1) != 0; }
+  bool C0() const                      { return ((_value >>  8) & 1) != 0; }
+  int  top() const                     { return  (_value >> 11) & 7      ; }
+  bool error_status() const            { return ((_value >>  7) & 1) != 0; }
+  bool stack_fault() const             { return ((_value >>  6) & 1) != 0; }
+  bool precision() const               { return ((_value >>  5) & 1) != 0; }
+  bool underflow() const               { return ((_value >>  4) & 1) != 0; }
+  bool overflow() const                { return ((_value >>  3) & 1) != 0; }
+  bool zero_divide() const             { return ((_value >>  2) & 1) != 0; }
+  bool denormalized() const            { return ((_value >>  1) & 1) != 0; }
+  bool invalid() const                 { return ((_value >>  0) & 1) != 0; }
+
+  void print() const {
+    // condition codes
+    char c[5];
+    c[0] = (C3()) ? '3' : '-';
+    c[1] = (C2()) ? '2' : '-';
+    c[2] = (C1()) ? '1' : '-';
+    c[3] = (C0()) ? '0' : '-';
+    c[4] = '\x0';
+    // flags
+    char f[9];
+    f[0] = (error_status()) ? 'E' : '-';
+    f[1] = (stack_fault ()) ? 'S' : '-';
+    f[2] = (precision   ()) ? 'P' : '-';
+    f[3] = (underflow   ()) ? 'U' : '-';
+    f[4] = (overflow    ()) ? 'O' : '-';
+    f[5] = (zero_divide ()) ? 'Z' : '-';
+    f[6] = (denormalized()) ? 'D' : '-';
+    f[7] = (invalid     ()) ? 'I' : '-';
+    f[8] = '\x0';
+    // output
+    printf("%04x  flags = %s, cc =  %s, top = %d", _value & 0xFFFF, f, c, top());
+  }
+
+};
+
+class TagWord {
+ public:
+  int32_t _value;
+
+  int tag_at(int i) const              { return (_value >> (i*2)) & 3; }
+
+  void print() const {
+    printf("%04x", _value & 0xFFFF);
+  }
+
+};
+
+class FPU_Register {
+ public:
+  int32_t _m0;
+  int32_t _m1;
+  int16_t _ex;
+
+  bool is_indefinite() const           {
+    return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0;
+  }
+
+  void print() const {
+    char  sign = (_ex < 0) ? '-' : '+';
+    const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : "   ";
+    printf("%c%04hx.%08x%08x  %s", sign, _ex, _m1, _m0, kind);
+  };
+
+};
+
+class FPU_State {
+ public:
+  enum {
+    register_size       = 10,
+    number_of_registers =  8,
+    register_mask       =  7
+  };
+
+  ControlWord  _control_word;
+  StatusWord   _status_word;
+  TagWord      _tag_word;
+  int32_t      _error_offset;
+  int32_t      _error_selector;
+  int32_t      _data_offset;
+  int32_t      _data_selector;
+  int8_t       _register[register_size * number_of_registers];
+
+  int tag_for_st(int i) const          { return _tag_word.tag_at((_status_word.top() + i) & register_mask); }
+  FPU_Register* st(int i) const        { return (FPU_Register*)&_register[register_size * i]; }
+
+  const char* tag_as_string(int tag) const {
+    switch (tag) {
+      case 0: return "valid";
+      case 1: return "zero";
+      case 2: return "special";
+      case 3: return "empty";
+    }
+    ShouldNotReachHere();
+    return NULL;
+  }
+
+  void print() const {
+    // print computation registers
+    { int t = _status_word.top();
+      for (int i = 0; i < number_of_registers; i++) {
+        int j = (i - t) & register_mask;
+        printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j);
+        st(j)->print();
+        printf(" %s\n", tag_as_string(_tag_word.tag_at(i)));
+      }
+    }
+    printf("\n");
+    // print control registers
+    printf("ctrl = "); _control_word.print(); printf("\n");
+    printf("stat = "); _status_word .print(); printf("\n");
+    printf("tags = "); _tag_word    .print(); printf("\n");
+  }
+
+};
+
+class Flag_Register {
+ public:
+  int32_t _value;
+
+  bool overflow() const                { return ((_value >> 11) & 1) != 0; }
+  bool direction() const               { return ((_value >> 10) & 1) != 0; }
+  bool sign() const                    { return ((_value >>  7) & 1) != 0; }
+  bool zero() const                    { return ((_value >>  6) & 1) != 0; }
+  bool auxiliary_carry() const         { return ((_value >>  4) & 1) != 0; }
+  bool parity() const                  { return ((_value >>  2) & 1) != 0; }
+  bool carry() const                   { return ((_value >>  0) & 1) != 0; }
+
+  void print() const {
+    // flags
+    char f[8];
+    f[0] = (overflow       ()) ? 'O' : '-';
+    f[1] = (direction      ()) ? 'D' : '-';
+    f[2] = (sign           ()) ? 'S' : '-';
+    f[3] = (zero           ()) ? 'Z' : '-';
+    f[4] = (auxiliary_carry()) ? 'A' : '-';
+    f[5] = (parity         ()) ? 'P' : '-';
+    f[6] = (carry          ()) ? 'C' : '-';
+    f[7] = '\x0';
+    // output
+    printf("%08x  flags = %s", _value, f);
+  }
+
+};
+
+class IU_Register {
+ public:
+  int32_t _value;
+
+  void print() const {
+    printf("%08x  %11d", _value, _value);
+  }
+
+};
+
+class IU_State {
+ public:
+  Flag_Register _eflags;
+  IU_Register   _rdi;
+  IU_Register   _rsi;
+  IU_Register   _rbp;
+  IU_Register   _rsp;
+  IU_Register   _rbx;
+  IU_Register   _rdx;
+  IU_Register   _rcx;
+  IU_Register   _rax;
+
+  void print() const {
+    // computation registers
+    printf("rax,  = "); _rax.print(); printf("\n");
+    printf("rbx,  = "); _rbx.print(); printf("\n");
+    printf("rcx  = "); _rcx.print(); printf("\n");
+    printf("rdx  = "); _rdx.print(); printf("\n");
+    printf("rdi  = "); _rdi.print(); printf("\n");
+    printf("rsi  = "); _rsi.print(); printf("\n");
+    printf("rbp,  = "); _rbp.print(); printf("\n");
+    printf("rsp  = "); _rsp.print(); printf("\n");
+    printf("\n");
+    // control registers
+    printf("flgs = "); _eflags.print(); printf("\n");
+  }
+};
+
+
+class CPU_State {
+ public:
+  FPU_State _fpu_state;
+  IU_State  _iu_state;
+
+  void print() const {
+    printf("--------------------------------------------------\n");
+    _iu_state .print();
+    printf("\n");
+    _fpu_state.print();
+    printf("--------------------------------------------------\n");
+  }
+
+};
+
+
+static void _print_CPU_state(CPU_State* state) {
+  state->print();
+};
+
+
+void MacroAssembler::print_CPU_state() {
+  push_CPU_state();
+  push(rsp);                // pass CPU state
+  call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state)));
+  addptr(rsp, wordSize);       // discard argument
+  pop_CPU_state();
+}
+
+
+static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) {
+  static int counter = 0;
+  FPU_State* fs = &state->_fpu_state;
+  counter++;
+  // For leaf calls, only verify that the top few elements remain empty.
+  // We only need 1 empty at the top for C2 code.
+  if( stack_depth < 0 ) {
+    if( fs->tag_for_st(7) != 3 ) {
+      printf("FPR7 not empty\n");
+      state->print();
+      assert(false, "error");
+      return false;
+    }
+    return true;                // All other stack states do not matter
+  }
+
+  assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std,
+         "bad FPU control word");
+
+  // compute stack depth
+  int i = 0;
+  while (i < FPU_State::number_of_registers && fs->tag_for_st(i)  < 3) i++;
+  int d = i;
+  while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++;
+  // verify findings
+  if (i != FPU_State::number_of_registers) {
+    // stack not contiguous
+    printf("%s: stack not contiguous at ST%d\n", s, i);
+    state->print();
+    assert(false, "error");
+    return false;
+  }
+  // check if computed stack depth corresponds to expected stack depth
+  if (stack_depth < 0) {
+    // expected stack depth is -stack_depth or less
+    if (d > -stack_depth) {
+      // too many elements on the stack
+      printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d);
+      state->print();
+      assert(false, "error");
+      return false;
+    }
+  } else {
+    // expected stack depth is stack_depth
+    if (d != stack_depth) {
+      // wrong stack depth
+      printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d);
+      state->print();
+      assert(false, "error");
+      return false;
+    }
+  }
+  // everything is cool
+  return true;
+}
+
+
+void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
+  if (!VerifyFPU) return;
+  push_CPU_state();
+  push(rsp);                // pass CPU state
+  ExternalAddress msg((address) s);
+  // pass message string s
+  pushptr(msg.addr());
+  push(stack_depth);        // pass stack depth
+  call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU)));
+  addptr(rsp, 3 * wordSize);   // discard arguments
+  // check for error
+  { Label L;
+    testl(rax, rax);
+    jcc(Assembler::notZero, L);
+    int3();                  // break if error condition
+    bind(L);
+  }
+  pop_CPU_state();
+}
+
+void MacroAssembler::load_klass(Register dst, Register src) {
+#ifdef _LP64
+  if (UseCompressedKlassPointers) {
+    movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+    decode_klass_not_null(dst);
+  } else
+#endif
+    movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+}
+
+void MacroAssembler::load_prototype_header(Register dst, Register src) {
+#ifdef _LP64
+  if (UseCompressedKlassPointers) {
+    assert (Universe::heap() != NULL, "java heap should be initialized");
+    movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+    if (Universe::narrow_klass_shift() != 0) {
+      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+      assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
+      movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset()));
+    } else {
+      movq(dst, Address(dst, Klass::prototype_header_offset()));
+    }
+  } else
+#endif
+  {
+    movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+    movptr(dst, Address(dst, Klass::prototype_header_offset()));
+  }
+}
+
+void MacroAssembler::store_klass(Register dst, Register src) {
+#ifdef _LP64
+  if (UseCompressedKlassPointers) {
+    encode_klass_not_null(src);
+    movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
+  } else
+#endif
+    movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
+}
+
+void MacroAssembler::load_heap_oop(Register dst, Address src) {
+#ifdef _LP64
+  // FIXME: Must change all places where we try to load the klass.
+  if (UseCompressedOops) {
+    movl(dst, src);
+    decode_heap_oop(dst);
+  } else
+#endif
+    movptr(dst, src);
+}
+
+// Doesn't do verfication, generates fixed size code
+void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) {
+#ifdef _LP64
+  if (UseCompressedOops) {
+    movl(dst, src);
+    decode_heap_oop_not_null(dst);
+  } else
+#endif
+    movptr(dst, src);
+}
+
+void MacroAssembler::store_heap_oop(Address dst, Register src) {
+#ifdef _LP64
+  if (UseCompressedOops) {
+    assert(!dst.uses(src), "not enough registers");
+    encode_heap_oop(src);
+    movl(dst, src);
+  } else
+#endif
+    movptr(dst, src);
+}
+
+void MacroAssembler::cmp_heap_oop(Register src1, Address src2, Register tmp) {
+  assert_different_registers(src1, tmp);
+#ifdef _LP64
+  if (UseCompressedOops) {
+    bool did_push = false;
+    if (tmp == noreg) {
+      tmp = rax;
+      push(tmp);
+      did_push = true;
+      assert(!src2.uses(rsp), "can't push");
+    }
+    load_heap_oop(tmp, src2);
+    cmpptr(src1, tmp);
+    if (did_push)  pop(tmp);
+  } else
+#endif
+    cmpptr(src1, src2);
+}
+
+// Used for storing NULLs.
+void MacroAssembler::store_heap_oop_null(Address dst) {
+#ifdef _LP64
+  if (UseCompressedOops) {
+    movl(dst, (int32_t)NULL_WORD);
+  } else {
+    movslq(dst, (int32_t)NULL_WORD);
+  }
+#else
+  movl(dst, (int32_t)NULL_WORD);
+#endif
+}
+
+#ifdef _LP64
+void MacroAssembler::store_klass_gap(Register dst, Register src) {
+  if (UseCompressedKlassPointers) {
+    // Store to klass gap in destination
+    movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src);
+  }
+}
+
+#ifdef ASSERT
+void MacroAssembler::verify_heapbase(const char* msg) {
+  assert (UseCompressedOops || UseCompressedKlassPointers, "should be compressed");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  if (CheckCompressedOops) {
+    Label ok;
+    push(rscratch1); // cmpptr trashes rscratch1
+    cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()));
+    jcc(Assembler::equal, ok);
+    STOP(msg);
+    bind(ok);
+    pop(rscratch1);
+  }
+}
+#endif
+
+// Algorithm must match oop.inline.hpp encode_heap_oop.
+void MacroAssembler::encode_heap_oop(Register r) {
+#ifdef ASSERT
+  verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
+#endif
+  verify_oop(r, "broken oop in encode_heap_oop");
+  if (Universe::narrow_oop_base() == NULL) {
+    if (Universe::narrow_oop_shift() != 0) {
+      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+      shrq(r, LogMinObjAlignmentInBytes);
+    }
+    return;
+  }
+  testq(r, r);
+  cmovq(Assembler::equal, r, r12_heapbase);
+  subq(r, r12_heapbase);
+  shrq(r, LogMinObjAlignmentInBytes);
+}
+
+void MacroAssembler::encode_heap_oop_not_null(Register r) {
+#ifdef ASSERT
+  verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?");
+  if (CheckCompressedOops) {
+    Label ok;
+    testq(r, r);
+    jcc(Assembler::notEqual, ok);
+    STOP("null oop passed to encode_heap_oop_not_null");
+    bind(ok);
+  }
+#endif
+  verify_oop(r, "broken oop in encode_heap_oop_not_null");
+  if (Universe::narrow_oop_base() != NULL) {
+    subq(r, r12_heapbase);
+  }
+  if (Universe::narrow_oop_shift() != 0) {
+    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+    shrq(r, LogMinObjAlignmentInBytes);
+  }
+}
+
+void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
+#ifdef ASSERT
+  verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?");
+  if (CheckCompressedOops) {
+    Label ok;
+    testq(src, src);
+    jcc(Assembler::notEqual, ok);
+    STOP("null oop passed to encode_heap_oop_not_null2");
+    bind(ok);
+  }
+#endif
+  verify_oop(src, "broken oop in encode_heap_oop_not_null2");
+  if (dst != src) {
+    movq(dst, src);
+  }
+  if (Universe::narrow_oop_base() != NULL) {
+    subq(dst, r12_heapbase);
+  }
+  if (Universe::narrow_oop_shift() != 0) {
+    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+    shrq(dst, LogMinObjAlignmentInBytes);
+  }
+}
+
+void  MacroAssembler::decode_heap_oop(Register r) {
+#ifdef ASSERT
+  verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
+#endif
+  if (Universe::narrow_oop_base() == NULL) {
+    if (Universe::narrow_oop_shift() != 0) {
+      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+      shlq(r, LogMinObjAlignmentInBytes);
+    }
+  } else {
+    Label done;
+    shlq(r, LogMinObjAlignmentInBytes);
+    jccb(Assembler::equal, done);
+    addq(r, r12_heapbase);
+    bind(done);
+  }
+  verify_oop(r, "broken oop in decode_heap_oop");
+}
+
+void  MacroAssembler::decode_heap_oop_not_null(Register r) {
+  // Note: it will change flags
+  assert (UseCompressedOops, "should only be used for compressed headers");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  // Cannot assert, unverified entry point counts instructions (see .ad file)
+  // vtableStubs also counts instructions in pd_code_size_limit.
+  // Also do not verify_oop as this is called by verify_oop.
+  if (Universe::narrow_oop_shift() != 0) {
+    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+    shlq(r, LogMinObjAlignmentInBytes);
+    if (Universe::narrow_oop_base() != NULL) {
+      addq(r, r12_heapbase);
+    }
+  } else {
+    assert (Universe::narrow_oop_base() == NULL, "sanity");
+  }
+}
+
+void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
+  // Note: it will change flags
+  assert (UseCompressedOops, "should only be used for compressed headers");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  // Cannot assert, unverified entry point counts instructions (see .ad file)
+  // vtableStubs also counts instructions in pd_code_size_limit.
+  // Also do not verify_oop as this is called by verify_oop.
+  if (Universe::narrow_oop_shift() != 0) {
+    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+    if (LogMinObjAlignmentInBytes == Address::times_8) {
+      leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
+    } else {
+      if (dst != src) {
+        movq(dst, src);
+      }
+      shlq(dst, LogMinObjAlignmentInBytes);
+      if (Universe::narrow_oop_base() != NULL) {
+        addq(dst, r12_heapbase);
+      }
+    }
+  } else {
+    assert (Universe::narrow_oop_base() == NULL, "sanity");
+    if (dst != src) {
+      movq(dst, src);
+    }
+  }
+}
+
+void MacroAssembler::encode_klass_not_null(Register r) {
+  assert(Metaspace::is_initialized(), "metaspace should be initialized");
+#ifdef ASSERT
+  verify_heapbase("MacroAssembler::encode_klass_not_null: heap base corrupted?");
+#endif
+  if (Universe::narrow_klass_base() != NULL) {
+    subq(r, r12_heapbase);
+  }
+  if (Universe::narrow_klass_shift() != 0) {
+    assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+    shrq(r, LogKlassAlignmentInBytes);
+  }
+}
+
+void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
+  assert(Metaspace::is_initialized(), "metaspace should be initialized");
+#ifdef ASSERT
+  verify_heapbase("MacroAssembler::encode_klass_not_null2: heap base corrupted?");
+#endif
+  if (dst != src) {
+    movq(dst, src);
+  }
+  if (Universe::narrow_klass_base() != NULL) {
+    subq(dst, r12_heapbase);
+  }
+  if (Universe::narrow_klass_shift() != 0) {
+    assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+    shrq(dst, LogKlassAlignmentInBytes);
+  }
+}
+
+void  MacroAssembler::decode_klass_not_null(Register r) {
+  assert(Metaspace::is_initialized(), "metaspace should be initialized");
+  // Note: it will change flags
+  assert (UseCompressedKlassPointers, "should only be used for compressed headers");
+  // Cannot assert, unverified entry point counts instructions (see .ad file)
+  // vtableStubs also counts instructions in pd_code_size_limit.
+  // Also do not verify_oop as this is called by verify_oop.
+  if (Universe::narrow_klass_shift() != 0) {
+    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+    shlq(r, LogKlassAlignmentInBytes);
+    if (Universe::narrow_klass_base() != NULL) {
+      addq(r, r12_heapbase);
+    }
+  } else {
+    assert (Universe::narrow_klass_base() == NULL, "sanity");
+  }
+}
+
+void  MacroAssembler::decode_klass_not_null(Register dst, Register src) {
+  assert(Metaspace::is_initialized(), "metaspace should be initialized");
+  // Note: it will change flags
+  assert (UseCompressedKlassPointers, "should only be used for compressed headers");
+  // Cannot assert, unverified entry point counts instructions (see .ad file)
+  // vtableStubs also counts instructions in pd_code_size_limit.
+  // Also do not verify_oop as this is called by verify_oop.
+  if (Universe::narrow_klass_shift() != 0) {
+    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+    assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
+    leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
+  } else {
+    assert (Universe::narrow_klass_base() == NULL, "sanity");
+    if (dst != src) {
+      movq(dst, src);
+    }
+  }
+}
+
+void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
+  assert (UseCompressedOops, "should only be used for compressed headers");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
+  int oop_index = oop_recorder()->find_index(obj);
+  RelocationHolder rspec = oop_Relocation::spec(oop_index);
+  mov_narrow_oop(dst, oop_index, rspec);
+}
+
+void  MacroAssembler::set_narrow_oop(Address dst, jobject obj) {
+  assert (UseCompressedOops, "should only be used for compressed headers");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
+  int oop_index = oop_recorder()->find_index(obj);
+  RelocationHolder rspec = oop_Relocation::spec(oop_index);
+  mov_narrow_oop(dst, oop_index, rspec);
+}
+
+void  MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
+  assert (UseCompressedKlassPointers, "should only be used for compressed headers");
+  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
+  int klass_index = oop_recorder()->find_index(k);
+  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
+  mov_narrow_oop(dst, oopDesc::encode_klass(k), rspec);
+}
+
+void  MacroAssembler::set_narrow_klass(Address dst, Klass* k) {
+  assert (UseCompressedKlassPointers, "should only be used for compressed headers");
+  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
+  int klass_index = oop_recorder()->find_index(k);
+  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
+  mov_narrow_oop(dst, oopDesc::encode_klass(k), rspec);
+}
+
+void  MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) {
+  assert (UseCompressedOops, "should only be used for compressed headers");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
+  int oop_index = oop_recorder()->find_index(obj);
+  RelocationHolder rspec = oop_Relocation::spec(oop_index);
+  Assembler::cmp_narrow_oop(dst, oop_index, rspec);
+}
+
+void  MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) {
+  assert (UseCompressedOops, "should only be used for compressed headers");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
+  int oop_index = oop_recorder()->find_index(obj);
+  RelocationHolder rspec = oop_Relocation::spec(oop_index);
+  Assembler::cmp_narrow_oop(dst, oop_index, rspec);
+}
+
+void  MacroAssembler::cmp_narrow_klass(Register dst, Klass* k) {
+  assert (UseCompressedKlassPointers, "should only be used for compressed headers");
+  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
+  int klass_index = oop_recorder()->find_index(k);
+  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
+  Assembler::cmp_narrow_oop(dst, oopDesc::encode_klass(k), rspec);
+}
+
+void  MacroAssembler::cmp_narrow_klass(Address dst, Klass* k) {
+  assert (UseCompressedKlassPointers, "should only be used for compressed headers");
+  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
+  int klass_index = oop_recorder()->find_index(k);
+  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
+  Assembler::cmp_narrow_oop(dst, oopDesc::encode_klass(k), rspec);
+}
+
+void MacroAssembler::reinit_heapbase() {
+  if (UseCompressedOops || UseCompressedKlassPointers) {
+    movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()));
+  }
+}
+#endif // _LP64
+
+
+// C2 compiled method's prolog code.
+void MacroAssembler::verified_entry(int framesize, bool stack_bang, bool fp_mode_24b) {
+
+  // WARNING: Initial instruction MUST be 5 bytes or longer so that
+  // NativeJump::patch_verified_entry will be able to patch out the entry
+  // code safely. The push to verify stack depth is ok at 5 bytes,
+  // the frame allocation can be either 3 or 6 bytes. So if we don't do
+  // stack bang then we must use the 6 byte frame allocation even if
+  // we have no frame. :-(
+
+  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
+  // Remove word for return addr
+  framesize -= wordSize;
+
+  // Calls to C2R adapters often do not accept exceptional returns.
+  // We require that their callers must bang for them.  But be careful, because
+  // some VM calls (such as call site linkage) can use several kilobytes of
+  // stack.  But the stack safety zone should account for that.
+  // See bugs 4446381, 4468289, 4497237.
+  if (stack_bang) {
+    generate_stack_overflow_check(framesize);
+
+    // We always push rbp, so that on return to interpreter rbp, will be
+    // restored correctly and we can correct the stack.
+    push(rbp);
+    // Remove word for ebp
+    framesize -= wordSize;
+
+    // Create frame
+    if (framesize) {
+      subptr(rsp, framesize);
+    }
+  } else {
+    // Create frame (force generation of a 4 byte immediate value)
+    subptr_imm32(rsp, framesize);
+
+    // Save RBP register now.
+    framesize -= wordSize;
+    movptr(Address(rsp, framesize), rbp);
+  }
+
+  if (VerifyStackAtCalls) { // Majik cookie to verify stack depth
+    framesize -= wordSize;
+    movptr(Address(rsp, framesize), (int32_t)0xbadb100d);
+  }
+
+#ifndef _LP64
+  // If method sets FPU control word do it now
+  if (fp_mode_24b) {
+    fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
+  }
+  if (UseSSE >= 2 && VerifyFPU) {
+    verify_FPU(0, "FPU stack must be clean on entry");
+  }
+#endif
+
+#ifdef ASSERT
+  if (VerifyStackAtCalls) {
+    Label L;
+    push(rax);
+    mov(rax, rsp);
+    andptr(rax, StackAlignmentInBytes-1);
+    cmpptr(rax, StackAlignmentInBytes-wordSize);
+    pop(rax);
+    jcc(Assembler::equal, L);
+    STOP("Stack is not properly aligned!");
+    bind(L);
+  }
+#endif
+
+}
+
+void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp) {
+  // cnt - number of qwords (8-byte words).
+  // base - start address, qword aligned.
+  assert(base==rdi, "base register must be edi for rep stos");
+  assert(tmp==rax,   "tmp register must be eax for rep stos");
+  assert(cnt==rcx,   "cnt register must be ecx for rep stos");
+
+  xorptr(tmp, tmp);
+  if (UseFastStosb) {
+    shlptr(cnt,3); // convert to number of bytes
+    rep_stosb();
+  } else {
+    NOT_LP64(shlptr(cnt,1);) // convert to number of dwords for 32-bit VM
+    rep_stos();
+  }
+}
+
+// IndexOf for constant substrings with size >= 8 chars
+// which don't need to be loaded through stack.
+void MacroAssembler::string_indexofC8(Register str1, Register str2,
+                                      Register cnt1, Register cnt2,
+                                      int int_cnt2,  Register result,
+                                      XMMRegister vec, Register tmp) {
+  ShortBranchVerifier sbv(this);
+  assert(UseSSE42Intrinsics, "SSE4.2 is required");
+
+  // This method uses pcmpestri inxtruction with bound registers
+  //   inputs:
+  //     xmm - substring
+  //     rax - substring length (elements count)
+  //     mem - scanned string
+  //     rdx - string length (elements count)
+  //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
+  //   outputs:
+  //     rcx - matched index in string
+  assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
+
+  Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR,
+        RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR,
+        MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE;
+
+  // Note, inline_string_indexOf() generates checks:
+  // if (substr.count > string.count) return -1;
+  // if (substr.count == 0) return 0;
+  assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars");
+
+  // Load substring.
+  movdqu(vec, Address(str2, 0));
+  movl(cnt2, int_cnt2);
+  movptr(result, str1); // string addr
+
+  if (int_cnt2 > 8) {
+    jmpb(SCAN_TO_SUBSTR);
+
+    // Reload substr for rescan, this code
+    // is executed only for large substrings (> 8 chars)
+    bind(RELOAD_SUBSTR);
+    movdqu(vec, Address(str2, 0));
+    negptr(cnt2); // Jumped here with negative cnt2, convert to positive
+
+    bind(RELOAD_STR);
+    // We came here after the beginning of the substring was
+    // matched but the rest of it was not so we need to search
+    // again. Start from the next element after the previous match.
+
+    // cnt2 is number of substring reminding elements and
+    // cnt1 is number of string reminding elements when cmp failed.
+    // Restored cnt1 = cnt1 - cnt2 + int_cnt2
+    subl(cnt1, cnt2);
+    addl(cnt1, int_cnt2);
+    movl(cnt2, int_cnt2); // Now restore cnt2
+
+    decrementl(cnt1);     // Shift to next element
+    cmpl(cnt1, cnt2);
+    jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
+
+    addptr(result, 2);
+
+  } // (int_cnt2 > 8)
+
+  // Scan string for start of substr in 16-byte vectors
+  bind(SCAN_TO_SUBSTR);
+  pcmpestri(vec, Address(result, 0), 0x0d);
+  jccb(Assembler::below, FOUND_CANDIDATE);   // CF == 1
+  subl(cnt1, 8);
+  jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
+  cmpl(cnt1, cnt2);
+  jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
+  addptr(result, 16);
+  jmpb(SCAN_TO_SUBSTR);
+
+  // Found a potential substr
+  bind(FOUND_CANDIDATE);
+  // Matched whole vector if first element matched (tmp(rcx) == 0).
+  if (int_cnt2 == 8) {
+    jccb(Assembler::overflow, RET_FOUND);    // OF == 1
+  } else { // int_cnt2 > 8
+    jccb(Assembler::overflow, FOUND_SUBSTR);
+  }
+  // After pcmpestri tmp(rcx) contains matched element index
+  // Compute start addr of substr
+  lea(result, Address(result, tmp, Address::times_2));
+
+  // Make sure string is still long enough
+  subl(cnt1, tmp);
+  cmpl(cnt1, cnt2);
+  if (int_cnt2 == 8) {
+    jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
+  } else { // int_cnt2 > 8
+    jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD);
+  }
+  // Left less then substring.
+
+  bind(RET_NOT_FOUND);
+  movl(result, -1);
+  jmpb(EXIT);
+
+  if (int_cnt2 > 8) {
+    // This code is optimized for the case when whole substring
+    // is matched if its head is matched.
+    bind(MATCH_SUBSTR_HEAD);
+    pcmpestri(vec, Address(result, 0), 0x0d);
+    // Reload only string if does not match
+    jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0
+
+    Label CONT_SCAN_SUBSTR;
+    // Compare the rest of substring (> 8 chars).
+    bind(FOUND_SUBSTR);
+    // First 8 chars are already matched.
+    negptr(cnt2);
+    addptr(cnt2, 8);
+
+    bind(SCAN_SUBSTR);
+    subl(cnt1, 8);
+    cmpl(cnt2, -8); // Do not read beyond substring
+    jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR);
+    // Back-up strings to avoid reading beyond substring:
+    // cnt1 = cnt1 - cnt2 + 8
+    addl(cnt1, cnt2); // cnt2 is negative
+    addl(cnt1, 8);
+    movl(cnt2, 8); negptr(cnt2);
+    bind(CONT_SCAN_SUBSTR);
+    if (int_cnt2 < (int)G) {
+      movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2));
+      pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d);
+    } else {
+      // calculate index in register to avoid integer overflow (int_cnt2*2)
+      movl(tmp, int_cnt2);
+      addptr(tmp, cnt2);
+      movdqu(vec, Address(str2, tmp, Address::times_2, 0));
+      pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d);
+    }
+    // Need to reload strings pointers if not matched whole vector
+    jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
+    addptr(cnt2, 8);
+    jcc(Assembler::negative, SCAN_SUBSTR);
+    // Fall through if found full substring
+
+  } // (int_cnt2 > 8)
+
+  bind(RET_FOUND);
+  // Found result if we matched full small substring.
+  // Compute substr offset
+  subptr(result, str1);
+  shrl(result, 1); // index
+  bind(EXIT);
+
+} // string_indexofC8
+
+// Small strings are loaded through stack if they cross page boundary.
+void MacroAssembler::string_indexof(Register str1, Register str2,
+                                    Register cnt1, Register cnt2,
+                                    int int_cnt2,  Register result,
+                                    XMMRegister vec, Register tmp) {
+  ShortBranchVerifier sbv(this);
+  assert(UseSSE42Intrinsics, "SSE4.2 is required");
+  //
+  // int_cnt2 is length of small (< 8 chars) constant substring
+  // or (-1) for non constant substring in which case its length
+  // is in cnt2 register.
+  //
+  // Note, inline_string_indexOf() generates checks:
+  // if (substr.count > string.count) return -1;
+  // if (substr.count == 0) return 0;
+  //
+  assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0");
+
+  // This method uses pcmpestri inxtruction with bound registers
+  //   inputs:
+  //     xmm - substring
+  //     rax - substring length (elements count)
+  //     mem - scanned string
+  //     rdx - string length (elements count)
+  //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
+  //   outputs:
+  //     rcx - matched index in string
+  assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
+
+  Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR,
+        RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR,
+        FOUND_CANDIDATE;
+
+  { //========================================================
+    // We don't know where these strings are located
+    // and we can't read beyond them. Load them through stack.
+    Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR;
+
+    movptr(tmp, rsp); // save old SP
+
+    if (int_cnt2 > 0) {     // small (< 8 chars) constant substring
+      if (int_cnt2 == 1) {  // One char
+        load_unsigned_short(result, Address(str2, 0));
+        movdl(vec, result); // move 32 bits
+      } else if (int_cnt2 == 2) { // Two chars
+        movdl(vec, Address(str2, 0)); // move 32 bits
+      } else if (int_cnt2 == 4) { // Four chars
+        movq(vec, Address(str2, 0));  // move 64 bits
+      } else { // cnt2 = { 3, 5, 6, 7 }
+        // Array header size is 12 bytes in 32-bit VM
+        // + 6 bytes for 3 chars == 18 bytes,
+        // enough space to load vec and shift.
+        assert(HeapWordSize*TypeArrayKlass::header_size() >= 12,"sanity");
+        movdqu(vec, Address(str2, (int_cnt2*2)-16));
+        psrldq(vec, 16-(int_cnt2*2));
+      }
+    } else { // not constant substring
+      cmpl(cnt2, 8);
+      jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough
+
+      // We can read beyond string if srt+16 does not cross page boundary
+      // since heaps are aligned and mapped by pages.
+      assert(os::vm_page_size() < (int)G, "default page should be small");
+      movl(result, str2); // We need only low 32 bits
+      andl(result, (os::vm_page_size()-1));
+      cmpl(result, (os::vm_page_size()-16));
+      jccb(Assembler::belowEqual, CHECK_STR);
+
+      // Move small strings to stack to allow load 16 bytes into vec.
+      subptr(rsp, 16);
+      int stk_offset = wordSize-2;
+      push(cnt2);
+
+      bind(COPY_SUBSTR);
+      load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2));
+      movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
+      decrement(cnt2);
+      jccb(Assembler::notZero, COPY_SUBSTR);
+
+      pop(cnt2);
+      movptr(str2, rsp);  // New substring address
+    } // non constant
+
+    bind(CHECK_STR);
+    cmpl(cnt1, 8);
+    jccb(Assembler::aboveEqual, BIG_STRINGS);
+
+    // Check cross page boundary.
+    movl(result, str1); // We need only low 32 bits
+    andl(result, (os::vm_page_size()-1));
+    cmpl(result, (os::vm_page_size()-16));
+    jccb(Assembler::belowEqual, BIG_STRINGS);
+
+    subptr(rsp, 16);
+    int stk_offset = -2;
+    if (int_cnt2 < 0) { // not constant
+      push(cnt2);
+      stk_offset += wordSize;
+    }
+    movl(cnt2, cnt1);
+
+    bind(COPY_STR);
+    load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2));
+    movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
+    decrement(cnt2);
+    jccb(Assembler::notZero, COPY_STR);
+
+    if (int_cnt2 < 0) { // not constant
+      pop(cnt2);
+    }
+    movptr(str1, rsp);  // New string address
+
+    bind(BIG_STRINGS);
+    // Load substring.
+    if (int_cnt2 < 0) { // -1
+      movdqu(vec, Address(str2, 0));
+      push(cnt2);       // substr count
+      push(str2);       // substr addr
+      push(str1);       // string addr
+    } else {
+      // Small (< 8 chars) constant substrings are loaded already.
+      movl(cnt2, int_cnt2);
+    }
+    push(tmp);  // original SP
+
+  } // Finished loading
+
+  //========================================================
+  // Start search
+  //
+
+  movptr(result, str1); // string addr
+
+  if (int_cnt2  < 0) {  // Only for non constant substring
+    jmpb(SCAN_TO_SUBSTR);
+
+    // SP saved at sp+0
+    // String saved at sp+1*wordSize
+    // Substr saved at sp+2*wordSize
+    // Substr count saved at sp+3*wordSize
+
+    // Reload substr for rescan, this code
+    // is executed only for large substrings (> 8 chars)
+    bind(RELOAD_SUBSTR);
+    movptr(str2, Address(rsp, 2*wordSize));
+    movl(cnt2, Address(rsp, 3*wordSize));
+    movdqu(vec, Address(str2, 0));
+    // We came here after the beginning of the substring was
+    // matched but the rest of it was not so we need to search
+    // again. Start from the next element after the previous match.
+    subptr(str1, result); // Restore counter
+    shrl(str1, 1);
+    addl(cnt1, str1);
+    decrementl(cnt1);   // Shift to next element
+    cmpl(cnt1, cnt2);
+    jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
+
+    addptr(result, 2);
+  } // non constant
+
+  // Scan string for start of substr in 16-byte vectors
+  bind(SCAN_TO_SUBSTR);
+  assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
+  pcmpestri(vec, Address(result, 0), 0x0d);
+  jccb(Assembler::below, FOUND_CANDIDATE);   // CF == 1
+  subl(cnt1, 8);
+  jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
+  cmpl(cnt1, cnt2);
+  jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
+  addptr(result, 16);
+
+  bind(ADJUST_STR);
+  cmpl(cnt1, 8); // Do not read beyond string
+  jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
+  // Back-up string to avoid reading beyond string.
+  lea(result, Address(result, cnt1, Address::times_2, -16));
+  movl(cnt1, 8);
+  jmpb(SCAN_TO_SUBSTR);
+
+  // Found a potential substr
+  bind(FOUND_CANDIDATE);
+  // After pcmpestri tmp(rcx) contains matched element index
+
+  // Make sure string is still long enough
+  subl(cnt1, tmp);
+  cmpl(cnt1, cnt2);
+  jccb(Assembler::greaterEqual, FOUND_SUBSTR);
+  // Left less then substring.
+
+  bind(RET_NOT_FOUND);
+  movl(result, -1);
+  jmpb(CLEANUP);
+
+  bind(FOUND_SUBSTR);
+  // Compute start addr of substr
+  lea(result, Address(result, tmp, Address::times_2));
+
+  if (int_cnt2 > 0) { // Constant substring
+    // Repeat search for small substring (< 8 chars)
+    // from new point without reloading substring.
+    // Have to check that we don't read beyond string.
+    cmpl(tmp, 8-int_cnt2);
+    jccb(Assembler::greater, ADJUST_STR);
+    // Fall through if matched whole substring.
+  } else { // non constant
+    assert(int_cnt2 == -1, "should be != 0");
+
+    addl(tmp, cnt2);
+    // Found result if we matched whole substring.
+    cmpl(tmp, 8);
+    jccb(Assembler::lessEqual, RET_FOUND);
+
+    // Repeat search for small substring (<= 8 chars)
+    // from new point 'str1' without reloading substring.
+    cmpl(cnt2, 8);
+    // Have to check that we don't read beyond string.
+    jccb(Assembler::lessEqual, ADJUST_STR);
+
+    Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG;
+    // Compare the rest of substring (> 8 chars).
+    movptr(str1, result);
+
+    cmpl(tmp, cnt2);
+    // First 8 chars are already matched.
+    jccb(Assembler::equal, CHECK_NEXT);
+
+    bind(SCAN_SUBSTR);
+    pcmpestri(vec, Address(str1, 0), 0x0d);
+    // Need to reload strings pointers if not matched whole vector
+    jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
+
+    bind(CHECK_NEXT);
+    subl(cnt2, 8);
+    jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring
+    addptr(str1, 16);
+    addptr(str2, 16);
+    subl(cnt1, 8);
+    cmpl(cnt2, 8); // Do not read beyond substring
+    jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR);
+    // Back-up strings to avoid reading beyond substring.
+    lea(str2, Address(str2, cnt2, Address::times_2, -16));
+    lea(str1, Address(str1, cnt2, Address::times_2, -16));
+    subl(cnt1, cnt2);
+    movl(cnt2, 8);
+    addl(cnt1, 8);
+    bind(CONT_SCAN_SUBSTR);
+    movdqu(vec, Address(str2, 0));
+    jmpb(SCAN_SUBSTR);
+
+    bind(RET_FOUND_LONG);
+    movptr(str1, Address(rsp, wordSize));
+  } // non constant
+
+  bind(RET_FOUND);
+  // Compute substr offset
+  subptr(result, str1);
+  shrl(result, 1); // index
+
+  bind(CLEANUP);
+  pop(rsp); // restore SP
+
+} // string_indexof
+
+// Compare strings.
+void MacroAssembler::string_compare(Register str1, Register str2,
+                                    Register cnt1, Register cnt2, Register result,
+                                    XMMRegister vec1) {
+  ShortBranchVerifier sbv(this);
+  Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
+
+  // Compute the minimum of the string lengths and the
+  // difference of the string lengths (stack).
+  // Do the conditional move stuff
+  movl(result, cnt1);
+  subl(cnt1, cnt2);
+  push(cnt1);
+  cmov32(Assembler::lessEqual, cnt2, result);
+
+  // Is the minimum length zero?
+  testl(cnt2, cnt2);
+  jcc(Assembler::zero, LENGTH_DIFF_LABEL);
+
+  // Compare first characters
+  load_unsigned_short(result, Address(str1, 0));
+  load_unsigned_short(cnt1, Address(str2, 0));
+  subl(result, cnt1);
+  jcc(Assembler::notZero,  POP_LABEL);
+  cmpl(cnt2, 1);
+  jcc(Assembler::equal, LENGTH_DIFF_LABEL);
+
+  // Check if the strings start at the same location.
+  cmpptr(str1, str2);
+  jcc(Assembler::equal, LENGTH_DIFF_LABEL);
+
+  Address::ScaleFactor scale = Address::times_2;
+  int stride = 8;
+
+  if (UseAVX >= 2) {
+    Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_WIDE_TAIL, COMPARE_SMALL_STR;
+    Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR;
+    Label COMPARE_TAIL_LONG;
+    int pcmpmask = 0x19;
+
+    // Setup to compare 16-chars (32-bytes) vectors,
+    // start from first character again because it has aligned address.
+    int stride2 = 16;
+    int adr_stride  = stride  << scale;
+    int adr_stride2 = stride2 << scale;
+
+    assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri");
+    // rax and rdx are used by pcmpestri as elements counters
+    movl(result, cnt2);
+    andl(cnt2, ~(stride2-1));   // cnt2 holds the vector count
+    jcc(Assembler::zero, COMPARE_TAIL_LONG);
+
+    // fast path : compare first 2 8-char vectors.
+    bind(COMPARE_16_CHARS);
+    movdqu(vec1, Address(str1, 0));
+    pcmpestri(vec1, Address(str2, 0), pcmpmask);
+    jccb(Assembler::below, COMPARE_INDEX_CHAR);
+
+    movdqu(vec1, Address(str1, adr_stride));
+    pcmpestri(vec1, Address(str2, adr_stride), pcmpmask);
+    jccb(Assembler::aboveEqual, COMPARE_WIDE_VECTORS);
+    addl(cnt1, stride);
+
+    // Compare the characters at index in cnt1
+    bind(COMPARE_INDEX_CHAR); //cnt1 has the offset of the mismatching character
+    load_unsigned_short(result, Address(str1, cnt1, scale));
+    load_unsigned_short(cnt2, Address(str2, cnt1, scale));
+    subl(result, cnt2);
+    jmp(POP_LABEL);
+
+    // Setup the registers to start vector comparison loop
+    bind(COMPARE_WIDE_VECTORS);
+    lea(str1, Address(str1, result, scale));
+    lea(str2, Address(str2, result, scale));
+    subl(result, stride2);
+    subl(cnt2, stride2);
+    jccb(Assembler::zero, COMPARE_WIDE_TAIL);
+    negptr(result);
+
+    //  In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest)
+    bind(COMPARE_WIDE_VECTORS_LOOP);
+    vmovdqu(vec1, Address(str1, result, scale));
+    vpxor(vec1, Address(str2, result, scale));
+    vptest(vec1, vec1);
+    jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
+    addptr(result, stride2);
+    subl(cnt2, stride2);
+    jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP);
+
+    // compare wide vectors tail
+    bind(COMPARE_WIDE_TAIL);
+    testptr(result, result);
+    jccb(Assembler::zero, LENGTH_DIFF_LABEL);
+
+    movl(result, stride2);
+    movl(cnt2, result);
+    negptr(result);
+    jmpb(COMPARE_WIDE_VECTORS_LOOP);
+
+    // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors.
+    bind(VECTOR_NOT_EQUAL);
+    lea(str1, Address(str1, result, scale));
+    lea(str2, Address(str2, result, scale));
+    jmp(COMPARE_16_CHARS);
+
+    // Compare tail chars, length between 1 to 15 chars
+    bind(COMPARE_TAIL_LONG);
+    movl(cnt2, result);
+    cmpl(cnt2, stride);
+    jccb(Assembler::less, COMPARE_SMALL_STR);
+
+    movdqu(vec1, Address(str1, 0));
+    pcmpestri(vec1, Address(str2, 0), pcmpmask);
+    jcc(Assembler::below, COMPARE_INDEX_CHAR);
+    subptr(cnt2, stride);
+    jccb(Assembler::zero, LENGTH_DIFF_LABEL);
+    lea(str1, Address(str1, result, scale));
+    lea(str2, Address(str2, result, scale));
+    negptr(cnt2);
+    jmpb(WHILE_HEAD_LABEL);
+
+    bind(COMPARE_SMALL_STR);
+  } else if (UseSSE42Intrinsics) {
+    Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
+    int pcmpmask = 0x19;
+    // Setup to compare 8-char (16-byte) vectors,
+    // start from first character again because it has aligned address.
+    movl(result, cnt2);
+    andl(cnt2, ~(stride - 1));   // cnt2 holds the vector count
+    jccb(Assembler::zero, COMPARE_TAIL);
+
+    lea(str1, Address(str1, result, scale));
+    lea(str2, Address(str2, result, scale));
+    negptr(result);
+
+    // pcmpestri
+    //   inputs:
+    //     vec1- substring
+    //     rax - negative string length (elements count)
+    //     mem - scaned string
+    //     rdx - string length (elements count)
+    //     pcmpmask - cmp mode: 11000 (string compare with negated result)
+    //               + 00 (unsigned bytes) or  + 01 (unsigned shorts)
+    //   outputs:
+    //     rcx - first mismatched element index
+    assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri");
+
+    bind(COMPARE_WIDE_VECTORS);
+    movdqu(vec1, Address(str1, result, scale));
+    pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
+    // After pcmpestri cnt1(rcx) contains mismatched element index
+
+    jccb(Assembler::below, VECTOR_NOT_EQUAL);  // CF==1
+    addptr(result, stride);
+    subptr(cnt2, stride);
+    jccb(Assembler::notZero, COMPARE_WIDE_VECTORS);
+
+    // compare wide vectors tail
+    testptr(result, result);
+    jccb(Assembler::zero, LENGTH_DIFF_LABEL);
+
+    movl(cnt2, stride);
+    movl(result, stride);
+    negptr(result);
+    movdqu(vec1, Address(str1, result, scale));
+    pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
+    jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL);
+
+    // Mismatched characters in the vectors
+    bind(VECTOR_NOT_EQUAL);
+    addptr(cnt1, result);
+    load_unsigned_short(result, Address(str1, cnt1, scale));
+    load_unsigned_short(cnt2, Address(str2, cnt1, scale));
+    subl(result, cnt2);
+    jmpb(POP_LABEL);
+
+    bind(COMPARE_TAIL); // limit is zero
+    movl(cnt2, result);
+    // Fallthru to tail compare
+  }
+  // Shift str2 and str1 to the end of the arrays, negate min
+  lea(str1, Address(str1, cnt2, scale));
+  lea(str2, Address(str2, cnt2, scale));
+  decrementl(cnt2);  // first character was compared already
+  negptr(cnt2);
+
+  // Compare the rest of the elements
+  bind(WHILE_HEAD_LABEL);
+  load_unsigned_short(result, Address(str1, cnt2, scale, 0));
+  load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0));
+  subl(result, cnt1);
+  jccb(Assembler::notZero, POP_LABEL);
+  increment(cnt2);
+  jccb(Assembler::notZero, WHILE_HEAD_LABEL);
+
+  // Strings are equal up to min length.  Return the length difference.
+  bind(LENGTH_DIFF_LABEL);
+  pop(result);
+  jmpb(DONE_LABEL);
+
+  // Discard the stored length difference
+  bind(POP_LABEL);
+  pop(cnt1);
+
+  // That's it
+  bind(DONE_LABEL);
+}
+
+// Compare char[] arrays aligned to 4 bytes or substrings.
+void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
+                                        Register limit, Register result, Register chr,
+                                        XMMRegister vec1, XMMRegister vec2) {
+  ShortBranchVerifier sbv(this);
+  Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
+
+  int length_offset  = arrayOopDesc::length_offset_in_bytes();
+  int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
+
+  // Check the input args
+  cmpptr(ary1, ary2);
+  jcc(Assembler::equal, TRUE_LABEL);
+
+  if (is_array_equ) {
+    // Need additional checks for arrays_equals.
+    testptr(ary1, ary1);
+    jcc(Assembler::zero, FALSE_LABEL);
+    testptr(ary2, ary2);
+    jcc(Assembler::zero, FALSE_LABEL);
+
+    // Check the lengths
+    movl(limit, Address(ary1, length_offset));
+    cmpl(limit, Address(ary2, length_offset));
+    jcc(Assembler::notEqual, FALSE_LABEL);
+  }
+
+  // count == 0
+  testl(limit, limit);
+  jcc(Assembler::zero, TRUE_LABEL);
+
+  if (is_array_equ) {
+    // Load array address
+    lea(ary1, Address(ary1, base_offset));
+    lea(ary2, Address(ary2, base_offset));
+  }
+
+  shll(limit, 1);      // byte count != 0
+  movl(result, limit); // copy
+
+  if (UseAVX >= 2) {
+    // With AVX2, use 32-byte vector compare
+    Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
+
+    // Compare 32-byte vectors
+    andl(result, 0x0000001e);  //   tail count (in bytes)
+    andl(limit, 0xffffffe0);   // vector count (in bytes)
+    jccb(Assembler::zero, COMPARE_TAIL);
+
+    lea(ary1, Address(ary1, limit, Address::times_1));
+    lea(ary2, Address(ary2, limit, Address::times_1));
+    negptr(limit);
+
+    bind(COMPARE_WIDE_VECTORS);
+    vmovdqu(vec1, Address(ary1, limit, Address::times_1));
+    vmovdqu(vec2, Address(ary2, limit, Address::times_1));
+    vpxor(vec1, vec2);
+
+    vptest(vec1, vec1);
+    jccb(Assembler::notZero, FALSE_LABEL);
+    addptr(limit, 32);
+    jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
+
+    testl(result, result);
+    jccb(Assembler::zero, TRUE_LABEL);
+
+    vmovdqu(vec1, Address(ary1, result, Address::times_1, -32));
+    vmovdqu(vec2, Address(ary2, result, Address::times_1, -32));
+    vpxor(vec1, vec2);
+
+    vptest(vec1, vec1);
+    jccb(Assembler::notZero, FALSE_LABEL);
+    jmpb(TRUE_LABEL);
+
+    bind(COMPARE_TAIL); // limit is zero
+    movl(limit, result);
+    // Fallthru to tail compare
+  } else if (UseSSE42Intrinsics) {
+    // With SSE4.2, use double quad vector compare
+    Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
+
+    // Compare 16-byte vectors
+    andl(result, 0x0000000e);  //   tail count (in bytes)
+    andl(limit, 0xfffffff0);   // vector count (in bytes)
+    jccb(Assembler::zero, COMPARE_TAIL);
+
+    lea(ary1, Address(ary1, limit, Address::times_1));
+    lea(ary2, Address(ary2, limit, Address::times_1));
+    negptr(limit);
+
+    bind(COMPARE_WIDE_VECTORS);
+    movdqu(vec1, Address(ary1, limit, Address::times_1));
+    movdqu(vec2, Address(ary2, limit, Address::times_1));
+    pxor(vec1, vec2);
+
+    ptest(vec1, vec1);
+    jccb(Assembler::notZero, FALSE_LABEL);
+    addptr(limit, 16);
+    jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
+
+    testl(result, result);
+    jccb(Assembler::zero, TRUE_LABEL);
+
+    movdqu(vec1, Address(ary1, result, Address::times_1, -16));
+    movdqu(vec2, Address(ary2, result, Address::times_1, -16));
+    pxor(vec1, vec2);
+
+    ptest(vec1, vec1);
+    jccb(Assembler::notZero, FALSE_LABEL);
+    jmpb(TRUE_LABEL);
+
+    bind(COMPARE_TAIL); // limit is zero
+    movl(limit, result);
+    // Fallthru to tail compare
+  }
+
+  // Compare 4-byte vectors
+  andl(limit, 0xfffffffc); // vector count (in bytes)
+  jccb(Assembler::zero, COMPARE_CHAR);
+
+  lea(ary1, Address(ary1, limit, Address::times_1));
+  lea(ary2, Address(ary2, limit, Address::times_1));
+  negptr(limit);
+
+  bind(COMPARE_VECTORS);
+  movl(chr, Address(ary1, limit, Address::times_1));
+  cmpl(chr, Address(ary2, limit, Address::times_1));
+  jccb(Assembler::notEqual, FALSE_LABEL);
+  addptr(limit, 4);
+  jcc(Assembler::notZero, COMPARE_VECTORS);
+
+  // Compare trailing char (final 2 bytes), if any
+  bind(COMPARE_CHAR);
+  testl(result, 0x2);   // tail  char
+  jccb(Assembler::zero, TRUE_LABEL);
+  load_unsigned_short(chr, Address(ary1, 0));
+  load_unsigned_short(limit, Address(ary2, 0));
+  cmpl(chr, limit);
+  jccb(Assembler::notEqual, FALSE_LABEL);
+
+  bind(TRUE_LABEL);
+  movl(result, 1);   // return true
+  jmpb(DONE);
+
+  bind(FALSE_LABEL);
+  xorl(result, result); // return false
+
+  // That's it
+  bind(DONE);
+}
+
+void MacroAssembler::generate_fill(BasicType t, bool aligned,
+                                   Register to, Register value, Register count,
+                                   Register rtmp, XMMRegister xtmp) {
+  ShortBranchVerifier sbv(this);
+  assert_different_registers(to, value, count, rtmp);
+  Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
+  Label L_fill_2_bytes, L_fill_4_bytes;
+
+  int shift = -1;
+  switch (t) {
+    case T_BYTE:
+      shift = 2;
+      break;
+    case T_SHORT:
+      shift = 1;
+      break;
+    case T_INT:
+      shift = 0;
+      break;
+    default: ShouldNotReachHere();
+  }
+
+  if (t == T_BYTE) {
+    andl(value, 0xff);
+    movl(rtmp, value);
+    shll(rtmp, 8);
+    orl(value, rtmp);
+  }
+  if (t == T_SHORT) {
+    andl(value, 0xffff);
+  }
+  if (t == T_BYTE || t == T_SHORT) {
+    movl(rtmp, value);
+    shll(rtmp, 16);
+    orl(value, rtmp);
+  }
+
+  cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element
+  jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp
+  if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {
+    // align source address at 4 bytes address boundary
+    if (t == T_BYTE) {
+      // One byte misalignment happens only for byte arrays
+      testptr(to, 1);
+      jccb(Assembler::zero, L_skip_align1);
+      movb(Address(to, 0), value);
+      increment(to);
+      decrement(count);
+      BIND(L_skip_align1);
+    }
+    // Two bytes misalignment happens only for byte and short (char) arrays
+    testptr(to, 2);
+    jccb(Assembler::zero, L_skip_align2);
+    movw(Address(to, 0), value);
+    addptr(to, 2);
+    subl(count, 1<<(shift-1));
+    BIND(L_skip_align2);
+  }
+  if (UseSSE < 2) {
+    Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
+    // Fill 32-byte chunks
+    subl(count, 8 << shift);
+    jcc(Assembler::less, L_check_fill_8_bytes);
+    align(16);
+
+    BIND(L_fill_32_bytes_loop);
+
+    for (int i = 0; i < 32; i += 4) {
+      movl(Address(to, i), value);
+    }
+
+    addptr(to, 32);
+    subl(count, 8 << shift);
+    jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
+    BIND(L_check_fill_8_bytes);
+    addl(count, 8 << shift);
+    jccb(Assembler::zero, L_exit);
+    jmpb(L_fill_8_bytes);
+
+    //
+    // length is too short, just fill qwords
+    //
+    BIND(L_fill_8_bytes_loop);
+    movl(Address(to, 0), value);
+    movl(Address(to, 4), value);
+    addptr(to, 8);
+    BIND(L_fill_8_bytes);
+    subl(count, 1 << (shift + 1));
+    jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
+    // fall through to fill 4 bytes
+  } else {
+    Label L_fill_32_bytes;
+    if (!UseUnalignedLoadStores) {
+      // align to 8 bytes, we know we are 4 byte aligned to start
+      testptr(to, 4);
+      jccb(Assembler::zero, L_fill_32_bytes);
+      movl(Address(to, 0), value);
+      addptr(to, 4);
+      subl(count, 1<<shift);
+    }
+    BIND(L_fill_32_bytes);
+    {
+      assert( UseSSE >= 2, "supported cpu only" );
+      Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
+      movdl(xtmp, value);
+      if (UseAVX >= 2 && UseUnalignedLoadStores) {
+        // Fill 64-byte chunks
+        Label L_fill_64_bytes_loop, L_check_fill_32_bytes;
+        vpbroadcastd(xtmp, xtmp);
+
+        subl(count, 16 << shift);
+        jcc(Assembler::less, L_check_fill_32_bytes);
+        align(16);
+
+        BIND(L_fill_64_bytes_loop);
+        vmovdqu(Address(to, 0), xtmp);
+        vmovdqu(Address(to, 32), xtmp);
+        addptr(to, 64);
+        subl(count, 16 << shift);
+        jcc(Assembler::greaterEqual, L_fill_64_bytes_loop);
+
+        BIND(L_check_fill_32_bytes);
+        addl(count, 8 << shift);
+        jccb(Assembler::less, L_check_fill_8_bytes);
+        vmovdqu(Address(to, 0), xtmp);
+        addptr(to, 32);
+        subl(count, 8 << shift);
+      } else {
+        // Fill 32-byte chunks
+        pshufd(xtmp, xtmp, 0);
+
+        subl(count, 8 << shift);
+        jcc(Assembler::less, L_check_fill_8_bytes);
+        align(16);
+
+        BIND(L_fill_32_bytes_loop);
+
+        if (UseUnalignedLoadStores) {
+          movdqu(Address(to, 0), xtmp);
+          movdqu(Address(to, 16), xtmp);
+        } else {
+          movq(Address(to, 0), xtmp);
+          movq(Address(to, 8), xtmp);
+          movq(Address(to, 16), xtmp);
+          movq(Address(to, 24), xtmp);
+        }
+
+        addptr(to, 32);
+        subl(count, 8 << shift);
+        jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
+      }
+      BIND(L_check_fill_8_bytes);
+      addl(count, 8 << shift);
+      jccb(Assembler::zero, L_exit);
+      jmpb(L_fill_8_bytes);
+
+      //
+      // length is too short, just fill qwords
+      //
+      BIND(L_fill_8_bytes_loop);
+      movq(Address(to, 0), xtmp);
+      addptr(to, 8);
+      BIND(L_fill_8_bytes);
+      subl(count, 1 << (shift + 1));
+      jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
+    }
+  }
+  // fill trailing 4 bytes
+  BIND(L_fill_4_bytes);
+  testl(count, 1<<shift);
+  jccb(Assembler::zero, L_fill_2_bytes);
+  movl(Address(to, 0), value);
+  if (t == T_BYTE || t == T_SHORT) {
+    addptr(to, 4);
+    BIND(L_fill_2_bytes);
+    // fill trailing 2 bytes
+    testl(count, 1<<(shift-1));
+    jccb(Assembler::zero, L_fill_byte);
+    movw(Address(to, 0), value);
+    if (t == T_BYTE) {
+      addptr(to, 2);
+      BIND(L_fill_byte);
+      // fill trailing byte
+      testl(count, 1);
+      jccb(Assembler::zero, L_exit);
+      movb(Address(to, 0), value);
+    } else {
+      BIND(L_fill_byte);
+    }
+  } else {
+    BIND(L_fill_2_bytes);
+  }
+  BIND(L_exit);
+}
+
+// encode char[] to byte[] in ISO_8859_1
+void MacroAssembler::encode_iso_array(Register src, Register dst, Register len,
+                                      XMMRegister tmp1Reg, XMMRegister tmp2Reg,
+                                      XMMRegister tmp3Reg, XMMRegister tmp4Reg,
+                                      Register tmp5, Register result) {
+  // rsi: src
+  // rdi: dst
+  // rdx: len
+  // rcx: tmp5
+  // rax: result
+  ShortBranchVerifier sbv(this);
+  assert_different_registers(src, dst, len, tmp5, result);
+  Label L_done, L_copy_1_char, L_copy_1_char_exit;
+
+  // set result
+  xorl(result, result);
+  // check for zero length
+  testl(len, len);
+  jcc(Assembler::zero, L_done);
+  movl(result, len);
+
+  // Setup pointers
+  lea(src, Address(src, len, Address::times_2)); // char[]
+  lea(dst, Address(dst, len, Address::times_1)); // byte[]
+  negptr(len);
+
+  if (UseSSE42Intrinsics || UseAVX >= 2) {
+    Label L_chars_8_check, L_copy_8_chars, L_copy_8_chars_exit;
+    Label L_chars_16_check, L_copy_16_chars, L_copy_16_chars_exit;
+
+    if (UseAVX >= 2) {
+      Label L_chars_32_check, L_copy_32_chars, L_copy_32_chars_exit;
+      movl(tmp5, 0xff00ff00);   // create mask to test for Unicode chars in vector
+      movdl(tmp1Reg, tmp5);
+      vpbroadcastd(tmp1Reg, tmp1Reg);
+      jmpb(L_chars_32_check);
+
+      bind(L_copy_32_chars);
+      vmovdqu(tmp3Reg, Address(src, len, Address::times_2, -64));
+      vmovdqu(tmp4Reg, Address(src, len, Address::times_2, -32));
+      vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector256 */ true);
+      vptest(tmp2Reg, tmp1Reg);       // check for Unicode chars in  vector
+      jccb(Assembler::notZero, L_copy_32_chars_exit);
+      vpackuswb(tmp3Reg, tmp3Reg, tmp4Reg, /* vector256 */ true);
+      vpermq(tmp4Reg, tmp3Reg, 0xD8, /* vector256 */ true);
+      vmovdqu(Address(dst, len, Address::times_1, -32), tmp4Reg);
+
+      bind(L_chars_32_check);
+      addptr(len, 32);
+      jccb(Assembler::lessEqual, L_copy_32_chars);
+
+      bind(L_copy_32_chars_exit);
+      subptr(len, 16);
+      jccb(Assembler::greater, L_copy_16_chars_exit);
+
+    } else if (UseSSE42Intrinsics) {
+      movl(tmp5, 0xff00ff00);   // create mask to test for Unicode chars in vector
+      movdl(tmp1Reg, tmp5);
+      pshufd(tmp1Reg, tmp1Reg, 0);
+      jmpb(L_chars_16_check);
+    }
+
+    bind(L_copy_16_chars);
+    if (UseAVX >= 2) {
+      vmovdqu(tmp2Reg, Address(src, len, Address::times_2, -32));
+      vptest(tmp2Reg, tmp1Reg);
+      jccb(Assembler::notZero, L_copy_16_chars_exit);
+      vpackuswb(tmp2Reg, tmp2Reg, tmp1Reg, /* vector256 */ true);
+      vpermq(tmp3Reg, tmp2Reg, 0xD8, /* vector256 */ true);
+    } else {
+      if (UseAVX > 0) {
+        movdqu(tmp3Reg, Address(src, len, Address::times_2, -32));
+        movdqu(tmp4Reg, Address(src, len, Address::times_2, -16));
+        vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector256 */ false);
+      } else {
+        movdqu(tmp3Reg, Address(src, len, Address::times_2, -32));
+        por(tmp2Reg, tmp3Reg);
+        movdqu(tmp4Reg, Address(src, len, Address::times_2, -16));
+        por(tmp2Reg, tmp4Reg);
+      }
+      ptest(tmp2Reg, tmp1Reg);       // check for Unicode chars in  vector
+      jccb(Assembler::notZero, L_copy_16_chars_exit);
+      packuswb(tmp3Reg, tmp4Reg);
+    }
+    movdqu(Address(dst, len, Address::times_1, -16), tmp3Reg);
+
+    bind(L_chars_16_check);
+    addptr(len, 16);
+    jccb(Assembler::lessEqual, L_copy_16_chars);
+
+    bind(L_copy_16_chars_exit);
+    subptr(len, 8);
+    jccb(Assembler::greater, L_copy_8_chars_exit);
+
+    bind(L_copy_8_chars);
+    movdqu(tmp3Reg, Address(src, len, Address::times_2, -16));
+    ptest(tmp3Reg, tmp1Reg);
+    jccb(Assembler::notZero, L_copy_8_chars_exit);
+    packuswb(tmp3Reg, tmp1Reg);
+    movq(Address(dst, len, Address::times_1, -8), tmp3Reg);
+    addptr(len, 8);
+    jccb(Assembler::lessEqual, L_copy_8_chars);
+
+    bind(L_copy_8_chars_exit);
+    subptr(len, 8);
+    jccb(Assembler::zero, L_done);
+  }
+
+  bind(L_copy_1_char);
+  load_unsigned_short(tmp5, Address(src, len, Address::times_2, 0));
+  testl(tmp5, 0xff00);      // check if Unicode char
+  jccb(Assembler::notZero, L_copy_1_char_exit);
+  movb(Address(dst, len, Address::times_1, 0), tmp5);
+  addptr(len, 1);
+  jccb(Assembler::less, L_copy_1_char);
+
+  bind(L_copy_1_char_exit);
+  addptr(result, len); // len is negative count of not processed elements
+  bind(L_done);
+}
+
+#undef BIND
+#undef BLOCK_COMMENT
+
+
+Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
+  switch (cond) {
+    // Note some conditions are synonyms for others
+    case Assembler::zero:         return Assembler::notZero;
+    case Assembler::notZero:      return Assembler::zero;
+    case Assembler::less:         return Assembler::greaterEqual;
+    case Assembler::lessEqual:    return Assembler::greater;
+    case Assembler::greater:      return Assembler::lessEqual;
+    case Assembler::greaterEqual: return Assembler::less;
+    case Assembler::below:        return Assembler::aboveEqual;
+    case Assembler::belowEqual:   return Assembler::above;
+    case Assembler::above:        return Assembler::belowEqual;
+    case Assembler::aboveEqual:   return Assembler::below;
+    case Assembler::overflow:     return Assembler::noOverflow;
+    case Assembler::noOverflow:   return Assembler::overflow;
+    case Assembler::negative:     return Assembler::positive;
+    case Assembler::positive:     return Assembler::negative;
+    case Assembler::parity:       return Assembler::noParity;
+    case Assembler::noParity:     return Assembler::parity;
+  }
+  ShouldNotReachHere(); return Assembler::overflow;
+}
+
+SkipIfEqual::SkipIfEqual(
+    MacroAssembler* masm, const bool* flag_addr, bool value) {
+  _masm = masm;
+  _masm->cmp8(ExternalAddress((address)flag_addr), value);
+  _masm->jcc(Assembler::equal, _label);
+}
+
+SkipIfEqual::~SkipIfEqual() {
+  _masm->bind(_label);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/x86/vm/macroAssembler_x86.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,1164 @@
+/*
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_X86_VM_MACROASSEMBLER_X86_HPP
+#define CPU_X86_VM_MACROASSEMBLER_X86_HPP
+
+#include "asm/assembler.hpp"
+
+
+// MacroAssembler extends Assembler by frequently used macros.
+//
+// Instructions for which a 'better' code sequence exists depending
+// on arguments should also go in here.
+
+class MacroAssembler: public Assembler {
+  friend class LIR_Assembler;
+  friend class Runtime1;      // as_Address()
+
+ protected:
+
+  Address as_Address(AddressLiteral adr);
+  Address as_Address(ArrayAddress adr);
+
+  // Support for VM calls
+  //
+  // This is the base routine called by the different versions of call_VM_leaf. The interpreter
+  // may customize this version by overriding it for its purposes (e.g., to save/restore
+  // additional registers when doing a VM call).
+#ifdef CC_INTERP
+  // c++ interpreter never wants to use interp_masm version of call_VM
+  #define VIRTUAL
+#else
+  #define VIRTUAL virtual
+#endif
+
+  VIRTUAL void call_VM_leaf_base(
+    address entry_point,               // the entry point
+    int     number_of_arguments        // the number of arguments to pop after the call
+  );
+
+  // This is the base routine called by the different versions of call_VM. The interpreter
+  // may customize this version by overriding it for its purposes (e.g., to save/restore
+  // additional registers when doing a VM call).
+  //
+  // If no java_thread register is specified (noreg) than rdi will be used instead. call_VM_base
+  // returns the register which contains the thread upon return. If a thread register has been
+  // specified, the return value will correspond to that register. If no last_java_sp is specified
+  // (noreg) than rsp will be used instead.
+  VIRTUAL void call_VM_base(           // returns the register containing the thread upon return
+    Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
+    Register java_thread,              // the thread if computed before     ; use noreg otherwise
+    Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
+    address  entry_point,              // the entry point
+    int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
+    bool     check_exceptions          // whether to check for pending exceptions after return
+  );
+
+  // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code.
+  // The implementation is only non-empty for the InterpreterMacroAssembler,
+  // as only the interpreter handles PopFrame and ForceEarlyReturn requests.
+  virtual void check_and_handle_popframe(Register java_thread);
+  virtual void check_and_handle_earlyret(Register java_thread);
+
+  void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true);
+
+  // helpers for FPU flag access
+  // tmp is a temporary register, if none is available use noreg
+  void save_rax   (Register tmp);
+  void restore_rax(Register tmp);
+
+ public:
+  MacroAssembler(CodeBuffer* code) : Assembler(code) {}
+
+  // Support for NULL-checks
+  //
+  // Generates code that causes a NULL OS exception if the content of reg is NULL.
+  // If the accessed location is M[reg + offset] and the offset is known, provide the
+  // offset. No explicit code generation is needed if the offset is within a certain
+  // range (0 <= offset <= page_size).
+
+  void null_check(Register reg, int offset = -1);
+  static bool needs_explicit_null_check(intptr_t offset);
+
+  // Required platform-specific helpers for Label::patch_instructions.
+  // They _shadow_ the declarations in AbstractAssembler, which are undefined.
+  void pd_patch_instruction(address branch, address target) {
+    unsigned char op = branch[0];
+    assert(op == 0xE8 /* call */ ||
+        op == 0xE9 /* jmp */ ||
+        op == 0xEB /* short jmp */ ||
+        (op & 0xF0) == 0x70 /* short jcc */ ||
+        op == 0x0F && (branch[1] & 0xF0) == 0x80 /* jcc */,
+        "Invalid opcode at patch point");
+
+    if (op == 0xEB || (op & 0xF0) == 0x70) {
+      // short offset operators (jmp and jcc)
+      char* disp = (char*) &branch[1];
+      int imm8 = target - (address) &disp[1];
+      guarantee(this->is8bit(imm8), "Short forward jump exceeds 8-bit offset");
+      *disp = imm8;
+    } else {
+      int* disp = (int*) &branch[(op == 0x0F)? 2: 1];
+      int imm32 = target - (address) &disp[1];
+      *disp = imm32;
+    }
+  }
+
+  // The following 4 methods return the offset of the appropriate move instruction
+
+  // Support for fast byte/short loading with zero extension (depending on particular CPU)
+  int load_unsigned_byte(Register dst, Address src);
+  int load_unsigned_short(Register dst, Address src);
+
+  // Support for fast byte/short loading with sign extension (depending on particular CPU)
+  int load_signed_byte(Register dst, Address src);
+  int load_signed_short(Register dst, Address src);
+
+  // Support for sign-extension (hi:lo = extend_sign(lo))
+  void extend_sign(Register hi, Register lo);
+
+  // Load and store values by size and signed-ness
+  void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
+  void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
+
+  // Support for inc/dec with optimal instruction selection depending on value
+
+  void increment(Register reg, int value = 1) { LP64_ONLY(incrementq(reg, value)) NOT_LP64(incrementl(reg, value)) ; }
+  void decrement(Register reg, int value = 1) { LP64_ONLY(decrementq(reg, value)) NOT_LP64(decrementl(reg, value)) ; }
+
+  void decrementl(Address dst, int value = 1);
+  void decrementl(Register reg, int value = 1);
+
+  void decrementq(Register reg, int value = 1);
+  void decrementq(Address dst, int value = 1);
+
+  void incrementl(Address dst, int value = 1);
+  void incrementl(Register reg, int value = 1);
+
+  void incrementq(Register reg, int value = 1);
+  void incrementq(Address dst, int value = 1);
+
+
+  // Support optimal SSE move instructions.
+  void movflt(XMMRegister dst, XMMRegister src) {
+    if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; }
+    else                       { movss (dst, src); return; }
+  }
+  void movflt(XMMRegister dst, Address src) { movss(dst, src); }
+  void movflt(XMMRegister dst, AddressLiteral src);
+  void movflt(Address dst, XMMRegister src) { movss(dst, src); }
+
+  void movdbl(XMMRegister dst, XMMRegister src) {
+    if (UseXmmRegToRegMoveAll) { movapd(dst, src); return; }
+    else                       { movsd (dst, src); return; }
+  }
+
+  void movdbl(XMMRegister dst, AddressLiteral src);
+
+  void movdbl(XMMRegister dst, Address src) {
+    if (UseXmmLoadAndClearUpper) { movsd (dst, src); return; }
+    else                         { movlpd(dst, src); return; }
+  }
+  void movdbl(Address dst, XMMRegister src) { movsd(dst, src); }
+
+  void incrementl(AddressLiteral dst);
+  void incrementl(ArrayAddress dst);
+
+  // Alignment
+  void align(int modulus);
+
+  // A 5 byte nop that is safe for patching (see patch_verified_entry)
+  void fat_nop();
+
+  // Stack frame creation/removal
+  void enter();
+  void leave();
+
+  // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
+  // The pointer will be loaded into the thread register.
+  void get_thread(Register thread);
+
+
+  // Support for VM calls
+  //
+  // It is imperative that all calls into the VM are handled via the call_VM macros.
+  // They make sure that the stack linkage is setup correctly. call_VM's correspond
+  // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
+
+
+  void call_VM(Register oop_result,
+               address entry_point,
+               bool check_exceptions = true);
+  void call_VM(Register oop_result,
+               address entry_point,
+               Register arg_1,
+               bool check_exceptions = true);
+  void call_VM(Register oop_result,
+               address entry_point,
+               Register arg_1, Register arg_2,
+               bool check_exceptions = true);
+  void call_VM(Register oop_result,
+               address entry_point,
+               Register arg_1, Register arg_2, Register arg_3,
+               bool check_exceptions = true);
+
+  // Overloadings with last_Java_sp
+  void call_VM(Register oop_result,
+               Register last_java_sp,
+               address entry_point,
+               int number_of_arguments = 0,
+               bool check_exceptions = true);
+  void call_VM(Register oop_result,
+               Register last_java_sp,
+               address entry_point,
+               Register arg_1, bool
+               check_exceptions = true);
+  void call_VM(Register oop_result,
+               Register last_java_sp,
+               address entry_point,
+               Register arg_1, Register arg_2,
+               bool check_exceptions = true);
+  void call_VM(Register oop_result,
+               Register last_java_sp,
+               address entry_point,
+               Register arg_1, Register arg_2, Register arg_3,
+               bool check_exceptions = true);
+
+  void get_vm_result  (Register oop_result, Register thread);
+  void get_vm_result_2(Register metadata_result, Register thread);
+
+  // These always tightly bind to MacroAssembler::call_VM_base
+  // bypassing the virtual implementation
+  void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true);
+  void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true);
+  void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true);
+  void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true);
+  void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4, bool check_exceptions = true);
+
+  void call_VM_leaf(address entry_point,
+                    int number_of_arguments = 0);
+  void call_VM_leaf(address entry_point,
+                    Register arg_1);
+  void call_VM_leaf(address entry_point,
+                    Register arg_1, Register arg_2);
+  void call_VM_leaf(address entry_point,
+                    Register arg_1, Register arg_2, Register arg_3);
+
+  // These always tightly bind to MacroAssembler::call_VM_leaf_base
+  // bypassing the virtual implementation
+  void super_call_VM_leaf(address entry_point);
+  void super_call_VM_leaf(address entry_point, Register arg_1);
+  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
+  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
+  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4);
+
+  // last Java Frame (fills frame anchor)
+  void set_last_Java_frame(Register thread,
+                           Register last_java_sp,
+                           Register last_java_fp,
+                           address last_java_pc);
+
+  // thread in the default location (r15_thread on 64bit)
+  void set_last_Java_frame(Register last_java_sp,
+                           Register last_java_fp,
+                           address last_java_pc);
+
+  void reset_last_Java_frame(Register thread, bool clear_fp, bool clear_pc);
+
+  // thread in the default location (r15_thread on 64bit)
+  void reset_last_Java_frame(bool clear_fp, bool clear_pc);
+
+  // Stores
+  void store_check(Register obj);                // store check for obj - register is destroyed afterwards
+  void store_check(Register obj, Address dst);   // same as above, dst is exact store location (reg. is destroyed)
+
+#ifndef SERIALGC
+
+  void g1_write_barrier_pre(Register obj,
+                            Register pre_val,
+                            Register thread,
+                            Register tmp,
+                            bool tosca_live,
+                            bool expand_call);
+
+  void g1_write_barrier_post(Register store_addr,
+                             Register new_val,
+                             Register thread,
+                             Register tmp,
+                             Register tmp2);
+
+#endif // SERIALGC
+
+  // split store_check(Register obj) to enhance instruction interleaving
+  void store_check_part_1(Register obj);
+  void store_check_part_2(Register obj);
+
+  // C 'boolean' to Java boolean: x == 0 ? 0 : 1
+  void c2bool(Register x);
+
+  // C++ bool manipulation
+
+  void movbool(Register dst, Address src);
+  void movbool(Address dst, bool boolconst);
+  void movbool(Address dst, Register src);
+  void testbool(Register dst);
+
+  // oop manipulations
+  void load_klass(Register dst, Register src);
+  void store_klass(Register dst, Register src);
+
+  void load_heap_oop(Register dst, Address src);
+  void load_heap_oop_not_null(Register dst, Address src);
+  void store_heap_oop(Address dst, Register src);
+  void cmp_heap_oop(Register src1, Address src2, Register tmp = noreg);
+
+  // Used for storing NULL. All other oop constants should be
+  // stored using routines that take a jobject.
+  void store_heap_oop_null(Address dst);
+
+  void load_prototype_header(Register dst, Register src);
+
+#ifdef _LP64
+  void store_klass_gap(Register dst, Register src);
+
+  // This dummy is to prevent a call to store_heap_oop from
+  // converting a zero (like NULL) into a Register by giving
+  // the compiler two choices it can't resolve
+
+  void store_heap_oop(Address dst, void* dummy);
+
+  void encode_heap_oop(Register r);
+  void decode_heap_oop(Register r);
+  void encode_heap_oop_not_null(Register r);
+  void decode_heap_oop_not_null(Register r);
+  void encode_heap_oop_not_null(Register dst, Register src);
+  void decode_heap_oop_not_null(Register dst, Register src);
+
+  void set_narrow_oop(Register dst, jobject obj);
+  void set_narrow_oop(Address dst, jobject obj);
+  void cmp_narrow_oop(Register dst, jobject obj);
+  void cmp_narrow_oop(Address dst, jobject obj);
+
+  void encode_klass_not_null(Register r);
+  void decode_klass_not_null(Register r);
+  void encode_klass_not_null(Register dst, Register src);
+  void decode_klass_not_null(Register dst, Register src);
+  void set_narrow_klass(Register dst, Klass* k);
+  void set_narrow_klass(Address dst, Klass* k);
+  void cmp_narrow_klass(Register dst, Klass* k);
+  void cmp_narrow_klass(Address dst, Klass* k);
+
+  // if heap base register is used - reinit it with the correct value
+  void reinit_heapbase();
+
+  DEBUG_ONLY(void verify_heapbase(const char* msg);)
+
+#endif // _LP64
+
+  // Int division/remainder for Java
+  // (as idivl, but checks for special case as described in JVM spec.)
+  // returns idivl instruction offset for implicit exception handling
+  int corrected_idivl(Register reg);
+
+  // Long division/remainder for Java
+  // (as idivq, but checks for special case as described in JVM spec.)
+  // returns idivq instruction offset for implicit exception handling
+  int corrected_idivq(Register reg);
+
+  void int3();
+
+  // Long operation macros for a 32bit cpu
+  // Long negation for Java
+  void lneg(Register hi, Register lo);
+
+  // Long multiplication for Java
+  // (destroys contents of eax, ebx, ecx and edx)
+  void lmul(int x_rsp_offset, int y_rsp_offset); // rdx:rax = x * y
+
+  // Long shifts for Java
+  // (semantics as described in JVM spec.)
+  void lshl(Register hi, Register lo);                               // hi:lo << (rcx & 0x3f)
+  void lshr(Register hi, Register lo, bool sign_extension = false);  // hi:lo >> (rcx & 0x3f)
+
+  // Long compare for Java
+  // (semantics as described in JVM spec.)
+  void lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo); // x_hi = lcmp(x, y)
+
+
+  // misc
+
+  // Sign extension
+  void sign_extend_short(Register reg);
+  void sign_extend_byte(Register reg);
+
+  // Division by power of 2, rounding towards 0
+  void division_with_shift(Register reg, int shift_value);
+
+  // Compares the top-most stack entries on the FPU stack and sets the eflags as follows:
+  //
+  // CF (corresponds to C0) if x < y
+  // PF (corresponds to C2) if unordered
+  // ZF (corresponds to C3) if x = y
+  //
+  // The arguments are in reversed order on the stack (i.e., top of stack is first argument).
+  // tmp is a temporary register, if none is available use noreg (only matters for non-P6 code)
+  void fcmp(Register tmp);
+  // Variant of the above which allows y to be further down the stack
+  // and which only pops x and y if specified. If pop_right is
+  // specified then pop_left must also be specified.
+  void fcmp(Register tmp, int index, bool pop_left, bool pop_right);
+
+  // Floating-point comparison for Java
+  // Compares the top-most stack entries on the FPU stack and stores the result in dst.
+  // The arguments are in reversed order on the stack (i.e., top of stack is first argument).
+  // (semantics as described in JVM spec.)
+  void fcmp2int(Register dst, bool unordered_is_less);
+  // Variant of the above which allows y to be further down the stack
+  // and which only pops x and y if specified. If pop_right is
+  // specified then pop_left must also be specified.
+  void fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right);
+
+  // Floating-point remainder for Java (ST0 = ST0 fremr ST1, ST1 is empty afterwards)
+  // tmp is a temporary register, if none is available use noreg
+  void fremr(Register tmp);
+
+
+  // same as fcmp2int, but using SSE2
+  void cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less);
+  void cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less);
+
+  // Inlined sin/cos generator for Java; must not use CPU instruction
+  // directly on Intel as it does not have high enough precision
+  // outside of the range [-pi/4, pi/4]. Extra argument indicate the
+  // number of FPU stack slots in use; all but the topmost will
+  // require saving if a slow case is necessary. Assumes argument is
+  // on FP TOS; result is on FP TOS.  No cpu registers are changed by
+  // this code.
+  void trigfunc(char trig, int num_fpu_regs_in_use = 1);
+
+  // branch to L if FPU flag C2 is set/not set
+  // tmp is a temporary register, if none is available use noreg
+  void jC2 (Register tmp, Label& L);
+  void jnC2(Register tmp, Label& L);
+
+  // Pop ST (ffree & fincstp combined)
+  void fpop();
+
+  // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack
+  void push_fTOS();
+
+  // pops double TOS element from CPU stack and pushes on FPU stack
+  void pop_fTOS();
+
+  void empty_FPU_stack();
+
+  void push_IU_state();
+  void pop_IU_state();
+
+  void push_FPU_state();
+  void pop_FPU_state();
+
+  void push_CPU_state();
+  void pop_CPU_state();
+
+  // Round up to a power of two
+  void round_to(Register reg, int modulus);
+
+  // Callee saved registers handling
+  void push_callee_saved_registers();
+  void pop_callee_saved_registers();
+
+  // allocation
+  void eden_allocate(
+    Register obj,                      // result: pointer to object after successful allocation
+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
+    Register t1,                       // temp register
+    Label&   slow_case                 // continuation point if fast allocation fails
+  );
+  void tlab_allocate(
+    Register obj,                      // result: pointer to object after successful allocation
+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
+    Register t1,                       // temp register
+    Register t2,                       // temp register
+    Label&   slow_case                 // continuation point if fast allocation fails
+  );
+  Register tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case); // returns TLS address
+  void incr_allocated_bytes(Register thread,
+                            Register var_size_in_bytes, int con_size_in_bytes,
+                            Register t1 = noreg);
+
+  // interface method calling
+  void lookup_interface_method(Register recv_klass,
+                               Register intf_klass,
+                               RegisterOrConstant itable_index,
+                               Register method_result,
+                               Register scan_temp,
+                               Label& no_such_interface);
+
+  // virtual method calling
+  void lookup_virtual_method(Register recv_klass,
+                             RegisterOrConstant vtable_index,
+                             Register method_result);
+
+  // Test sub_klass against super_klass, with fast and slow paths.
+
+  // The fast path produces a tri-state answer: yes / no / maybe-slow.
+  // One of the three labels can be NULL, meaning take the fall-through.
+  // If super_check_offset is -1, the value is loaded up from super_klass.
+  // No registers are killed, except temp_reg.
+  void check_klass_subtype_fast_path(Register sub_klass,
+                                     Register super_klass,
+                                     Register temp_reg,
+                                     Label* L_success,
+                                     Label* L_failure,
+                                     Label* L_slow_path,
+                RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
+
+  // The rest of the type check; must be wired to a corresponding fast path.
+  // It does not repeat the fast path logic, so don't use it standalone.
+  // The temp_reg and temp2_reg can be noreg, if no temps are available.
+  // Updates the sub's secondary super cache as necessary.
+  // If set_cond_codes, condition codes will be Z on success, NZ on failure.
+  void check_klass_subtype_slow_path(Register sub_klass,
+                                     Register super_klass,
+                                     Register temp_reg,
+                                     Register temp2_reg,
+                                     Label* L_success,
+                                     Label* L_failure,
+                                     bool set_cond_codes = false);
+
+  // Simplified, combined version, good for typical uses.
+  // Falls through on failure.
+  void check_klass_subtype(Register sub_klass,
+                           Register super_klass,
+                           Register temp_reg,
+                           Label& L_success);
+
+  // method handles (JSR 292)
+  Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
+
+  //----
+  void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0
+
+  // Debugging
+
+  // only if +VerifyOops
+  // TODO: Make these macros with file and line like sparc version!
+  void verify_oop(Register reg, const char* s = "broken oop");
+  void verify_oop_addr(Address addr, const char * s = "broken oop addr");
+
+  // TODO: verify method and klass metadata (compare against vptr?)
+  void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
+  void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){}
+
+#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
+#define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
+
+  // only if +VerifyFPU
+  void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
+
+  // prints msg, dumps registers and stops execution
+  void stop(const char* msg);
+
+  // prints msg and continues
+  void warn(const char* msg);
+
+  // dumps registers and other state
+  void print_state();
+
+  static void debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg);
+  static void debug64(char* msg, int64_t pc, int64_t regs[]);
+  static void print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip);
+  static void print_state64(int64_t pc, int64_t regs[]);
+
+  void os_breakpoint();
+
+  void untested()                                { stop("untested"); }
+
+  void unimplemented(const char* what = "")      { char* b = new char[1024];  jio_snprintf(b, 1024, "unimplemented: %s", what);  stop(b); }
+
+  void should_not_reach_here()                   { stop("should not reach here"); }
+
+  void print_CPU_state();
+
+  // Stack overflow checking
+  void bang_stack_with_offset(int offset) {
+    // stack grows down, caller passes positive offset
+    assert(offset > 0, "must bang with negative offset");
+    movl(Address(rsp, (-offset)), rax);
+  }
+
+  // Writes to stack successive pages until offset reached to check for
+  // stack overflow + shadow pages.  Also, clobbers tmp
+  void bang_stack_size(Register size, Register tmp);
+
+  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
+                                                Register tmp,
+                                                int offset);
+
+  // Support for serializing memory accesses between threads
+  void serialize_memory(Register thread, Register tmp);
+
+  void verify_tlab();
+
+  // Biased locking support
+  // lock_reg and obj_reg must be loaded up with the appropriate values.
+  // swap_reg must be rax, and is killed.
+  // tmp_reg is optional. If it is supplied (i.e., != noreg) it will
+  // be killed; if not supplied, push/pop will be used internally to
+  // allocate a temporary (inefficient, avoid if possible).
+  // Optional slow case is for implementations (interpreter and C1) which branch to
+  // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
+  // Returns offset of first potentially-faulting instruction for null
+  // check info (currently consumed only by C1). If
+  // swap_reg_contains_mark is true then returns -1 as it is assumed
+  // the calling code has already passed any potential faults.
+  int biased_locking_enter(Register lock_reg, Register obj_reg,
+                           Register swap_reg, Register tmp_reg,
+                           bool swap_reg_contains_mark,
+                           Label& done, Label* slow_case = NULL,
+                           BiasedLockingCounters* counters = NULL);
+  void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done);
+
+
+  Condition negate_condition(Condition cond);
+
+  // Instructions that use AddressLiteral operands. These instruction can handle 32bit/64bit
+  // operands. In general the names are modified to avoid hiding the instruction in Assembler
+  // so that we don't need to implement all the varieties in the Assembler with trivial wrappers
+  // here in MacroAssembler. The major exception to this rule is call
+
+  // Arithmetics
+
+
+  void addptr(Address dst, int32_t src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)) ; }
+  void addptr(Address dst, Register src);
+
+  void addptr(Register dst, Address src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); }
+  void addptr(Register dst, int32_t src);
+  void addptr(Register dst, Register src);
+  void addptr(Register dst, RegisterOrConstant src) {
+    if (src.is_constant()) addptr(dst, (int) src.as_constant());
+    else                   addptr(dst,       src.as_register());
+  }
+
+  void andptr(Register dst, int32_t src);
+  void andptr(Register src1, Register src2) { LP64_ONLY(andq(src1, src2)) NOT_LP64(andl(src1, src2)) ; }
+
+  void cmp8(AddressLiteral src1, int imm);
+
+  // renamed to drag out the casting of address to int32_t/intptr_t
+  void cmp32(Register src1, int32_t imm);
+
+  void cmp32(AddressLiteral src1, int32_t imm);
+  // compare reg - mem, or reg - &mem
+  void cmp32(Register src1, AddressLiteral src2);
+
+  void cmp32(Register src1, Address src2);
+
+#ifndef _LP64
+  void cmpklass(Address dst, Metadata* obj);
+  void cmpklass(Register dst, Metadata* obj);
+  void cmpoop(Address dst, jobject obj);
+  void cmpoop(Register dst, jobject obj);
+#endif // _LP64
+
+  // NOTE src2 must be the lval. This is NOT an mem-mem compare
+  void cmpptr(Address src1, AddressLiteral src2);
+
+  void cmpptr(Register src1, AddressLiteral src2);
+
+  void cmpptr(Register src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
+  void cmpptr(Register src1, Address src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
+  // void cmpptr(Address src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
+
+  void cmpptr(Register src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
+  void cmpptr(Address src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
+
+  // cmp64 to avoild hiding cmpq
+  void cmp64(Register src1, AddressLiteral src);
+
+  void cmpxchgptr(Register reg, Address adr);
+
+  void locked_cmpxchgptr(Register reg, AddressLiteral adr);
+
+
+  void imulptr(Register dst, Register src) { LP64_ONLY(imulq(dst, src)) NOT_LP64(imull(dst, src)); }
+
+
+  void negptr(Register dst) { LP64_ONLY(negq(dst)) NOT_LP64(negl(dst)); }
+
+  void notptr(Register dst) { LP64_ONLY(notq(dst)) NOT_LP64(notl(dst)); }
+
+  void shlptr(Register dst, int32_t shift);
+  void shlptr(Register dst) { LP64_ONLY(shlq(dst)) NOT_LP64(shll(dst)); }
+
+  void shrptr(Register dst, int32_t shift);
+  void shrptr(Register dst) { LP64_ONLY(shrq(dst)) NOT_LP64(shrl(dst)); }
+
+  void sarptr(Register dst) { LP64_ONLY(sarq(dst)) NOT_LP64(sarl(dst)); }
+  void sarptr(Register dst, int32_t src) { LP64_ONLY(sarq(dst, src)) NOT_LP64(sarl(dst, src)); }
+
+  void subptr(Address dst, int32_t src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); }
+
+  void subptr(Register dst, Address src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); }
+  void subptr(Register dst, int32_t src);
+  // Force generation of a 4 byte immediate value even if it fits into 8bit
+  void subptr_imm32(Register dst, int32_t src);
+  void subptr(Register dst, Register src);
+  void subptr(Register dst, RegisterOrConstant src) {
+    if (src.is_constant()) subptr(dst, (int) src.as_constant());
+    else                   subptr(dst,       src.as_register());
+  }
+
+  void sbbptr(Address dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); }
+  void sbbptr(Register dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); }
+
+  void xchgptr(Register src1, Register src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; }
+  void xchgptr(Register src1, Address src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; }
+
+  void xaddptr(Address src1, Register src2) { LP64_ONLY(xaddq(src1, src2)) NOT_LP64(xaddl(src1, src2)) ; }
+
+
+
+  // Helper functions for statistics gathering.
+  // Conditionally (atomically, on MPs) increments passed counter address, preserving condition codes.
+  void cond_inc32(Condition cond, AddressLiteral counter_addr);
+  // Unconditional atomic increment.
+  void atomic_incl(AddressLiteral counter_addr);
+
+  void lea(Register dst, AddressLiteral adr);
+  void lea(Address dst, AddressLiteral adr);
+  void lea(Register dst, Address adr) { Assembler::lea(dst, adr); }
+
+  void leal32(Register dst, Address src) { leal(dst, src); }
+
+  // Import other testl() methods from the parent class or else
+  // they will be hidden by the following overriding declaration.
+  using Assembler::testl;
+  void testl(Register dst, AddressLiteral src);
+
+  void orptr(Register dst, Address src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); }
+  void orptr(Register dst, Register src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); }
+  void orptr(Register dst, int32_t src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); }
+
+  void testptr(Register src, int32_t imm32) {  LP64_ONLY(testq(src, imm32)) NOT_LP64(testl(src, imm32)); }
+  void testptr(Register src1, Register src2);
+
+  void xorptr(Register dst, Register src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); }
+  void xorptr(Register dst, Address src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); }
+
+  // Calls
+
+  void call(Label& L, relocInfo::relocType rtype);
+  void call(Register entry);
+
+  // NOTE: this call tranfers to the effective address of entry NOT
+  // the address contained by entry. This is because this is more natural
+  // for jumps/calls.
+  void call(AddressLiteral entry);
+
+  // Emit the CompiledIC call idiom
+  void ic_call(address entry);
+
+  // Jumps
+
+  // NOTE: these jumps tranfer to the effective address of dst NOT
+  // the address contained by dst. This is because this is more natural
+  // for jumps/calls.
+  void jump(AddressLiteral dst);
+  void jump_cc(Condition cc, AddressLiteral dst);
+
+  // 32bit can do a case table jump in one instruction but we no longer allow the base
+  // to be installed in the Address class. This jump will tranfers to the address
+  // contained in the location described by entry (not the address of entry)
+  void jump(ArrayAddress entry);
+
+  // Floating
+
+  void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); }
+  void andpd(XMMRegister dst, AddressLiteral src);
+
+  void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); }
+  void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); }
+  void andps(XMMRegister dst, AddressLiteral src);
+
+  void comiss(XMMRegister dst, XMMRegister src) { Assembler::comiss(dst, src); }
+  void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); }
+  void comiss(XMMRegister dst, AddressLiteral src);
+
+  void comisd(XMMRegister dst, XMMRegister src) { Assembler::comisd(dst, src); }
+  void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); }
+  void comisd(XMMRegister dst, AddressLiteral src);
+
+  void fadd_s(Address src)        { Assembler::fadd_s(src); }
+  void fadd_s(AddressLiteral src) { Assembler::fadd_s(as_Address(src)); }
+
+  void fldcw(Address src) { Assembler::fldcw(src); }
+  void fldcw(AddressLiteral src);
+
+  void fld_s(int index)   { Assembler::fld_s(index); }
+  void fld_s(Address src) { Assembler::fld_s(src); }
+  void fld_s(AddressLiteral src);
+
+  void fld_d(Address src) { Assembler::fld_d(src); }
+  void fld_d(AddressLiteral src);
+
+  void fld_x(Address src) { Assembler::fld_x(src); }
+  void fld_x(AddressLiteral src);
+
+  void fmul_s(Address src)        { Assembler::fmul_s(src); }
+  void fmul_s(AddressLiteral src) { Assembler::fmul_s(as_Address(src)); }
+
+  void ldmxcsr(Address src) { Assembler::ldmxcsr(src); }
+  void ldmxcsr(AddressLiteral src);
+
+  // compute pow(x,y) and exp(x) with x86 instructions. Don't cover
+  // all corner cases and may result in NaN and require fallback to a
+  // runtime call.
+  void fast_pow();
+  void fast_exp();
+  void increase_precision();
+  void restore_precision();
+
+  // computes exp(x). Fallback to runtime call included.
+  void exp_with_fallback(int num_fpu_regs_in_use) { pow_or_exp(true, num_fpu_regs_in_use); }
+  // computes pow(x,y). Fallback to runtime call included.
+  void pow_with_fallback(int num_fpu_regs_in_use) { pow_or_exp(false, num_fpu_regs_in_use); }
+
+private:
+
+  // call runtime as a fallback for trig functions and pow/exp.
+  void fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use);
+
+  // computes 2^(Ylog2X); Ylog2X in ST(0)
+  void pow_exp_core_encoding();
+
+  // computes pow(x,y) or exp(x). Fallback to runtime call included.
+  void pow_or_exp(bool is_exp, int num_fpu_regs_in_use);
+
+  // these are private because users should be doing movflt/movdbl
+
+  void movss(Address dst, XMMRegister src)     { Assembler::movss(dst, src); }
+  void movss(XMMRegister dst, XMMRegister src) { Assembler::movss(dst, src); }
+  void movss(XMMRegister dst, Address src)     { Assembler::movss(dst, src); }
+  void movss(XMMRegister dst, AddressLiteral src);
+
+  void movlpd(XMMRegister dst, Address src)    {Assembler::movlpd(dst, src); }
+  void movlpd(XMMRegister dst, AddressLiteral src);
+
+public:
+
+  void addsd(XMMRegister dst, XMMRegister src)    { Assembler::addsd(dst, src); }
+  void addsd(XMMRegister dst, Address src)        { Assembler::addsd(dst, src); }
+  void addsd(XMMRegister dst, AddressLiteral src);
+
+  void addss(XMMRegister dst, XMMRegister src)    { Assembler::addss(dst, src); }
+  void addss(XMMRegister dst, Address src)        { Assembler::addss(dst, src); }
+  void addss(XMMRegister dst, AddressLiteral src);
+
+  void divsd(XMMRegister dst, XMMRegister src)    { Assembler::divsd(dst, src); }
+  void divsd(XMMRegister dst, Address src)        { Assembler::divsd(dst, src); }
+  void divsd(XMMRegister dst, AddressLiteral src);
+
+  void divss(XMMRegister dst, XMMRegister src)    { Assembler::divss(dst, src); }
+  void divss(XMMRegister dst, Address src)        { Assembler::divss(dst, src); }
+  void divss(XMMRegister dst, AddressLiteral src);
+
+  // Move Unaligned Double Quadword
+  void movdqu(Address     dst, XMMRegister src)   { Assembler::movdqu(dst, src); }
+  void movdqu(XMMRegister dst, Address src)       { Assembler::movdqu(dst, src); }
+  void movdqu(XMMRegister dst, XMMRegister src)   { Assembler::movdqu(dst, src); }
+  void movdqu(XMMRegister dst, AddressLiteral src);
+
+  void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); }
+  void movsd(Address dst, XMMRegister src)     { Assembler::movsd(dst, src); }
+  void movsd(XMMRegister dst, Address src)     { Assembler::movsd(dst, src); }
+  void movsd(XMMRegister dst, AddressLiteral src);
+
+  void mulsd(XMMRegister dst, XMMRegister src)    { Assembler::mulsd(dst, src); }
+  void mulsd(XMMRegister dst, Address src)        { Assembler::mulsd(dst, src); }
+  void mulsd(XMMRegister dst, AddressLiteral src);
+
+  void mulss(XMMRegister dst, XMMRegister src)    { Assembler::mulss(dst, src); }
+  void mulss(XMMRegister dst, Address src)        { Assembler::mulss(dst, src); }
+  void mulss(XMMRegister dst, AddressLiteral src);
+
+  void sqrtsd(XMMRegister dst, XMMRegister src)    { Assembler::sqrtsd(dst, src); }
+  void sqrtsd(XMMRegister dst, Address src)        { Assembler::sqrtsd(dst, src); }
+  void sqrtsd(XMMRegister dst, AddressLiteral src);
+
+  void sqrtss(XMMRegister dst, XMMRegister src)    { Assembler::sqrtss(dst, src); }
+  void sqrtss(XMMRegister dst, Address src)        { Assembler::sqrtss(dst, src); }
+  void sqrtss(XMMRegister dst, AddressLiteral src);
+
+  void subsd(XMMRegister dst, XMMRegister src)    { Assembler::subsd(dst, src); }
+  void subsd(XMMRegister dst, Address src)        { Assembler::subsd(dst, src); }
+  void subsd(XMMRegister dst, AddressLiteral src);
+
+  void subss(XMMRegister dst, XMMRegister src)    { Assembler::subss(dst, src); }
+  void subss(XMMRegister dst, Address src)        { Assembler::subss(dst, src); }
+  void subss(XMMRegister dst, AddressLiteral src);
+
+  void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); }
+  void ucomiss(XMMRegister dst, Address src)     { Assembler::ucomiss(dst, src); }
+  void ucomiss(XMMRegister dst, AddressLiteral src);
+
+  void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); }
+  void ucomisd(XMMRegister dst, Address src)     { Assembler::ucomisd(dst, src); }
+  void ucomisd(XMMRegister dst, AddressLiteral src);
+
+  // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
+  void xorpd(XMMRegister dst, XMMRegister src) { Assembler::xorpd(dst, src); }
+  void xorpd(XMMRegister dst, Address src)     { Assembler::xorpd(dst, src); }
+  void xorpd(XMMRegister dst, AddressLiteral src);
+
+  // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
+  void xorps(XMMRegister dst, XMMRegister src) { Assembler::xorps(dst, src); }
+  void xorps(XMMRegister dst, Address src)     { Assembler::xorps(dst, src); }
+  void xorps(XMMRegister dst, AddressLiteral src);
+
+  // Shuffle Bytes
+  void pshufb(XMMRegister dst, XMMRegister src) { Assembler::pshufb(dst, src); }
+  void pshufb(XMMRegister dst, Address src)     { Assembler::pshufb(dst, src); }
+  void pshufb(XMMRegister dst, AddressLiteral src);
+  // AVX 3-operands instructions
+
+  void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); }
+  void vaddsd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vaddsd(dst, nds, src); }
+  void vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+  void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddss(dst, nds, src); }
+  void vaddss(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vaddss(dst, nds, src); }
+  void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+  void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vandpd(dst, nds, src, vector256); }
+  void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256)     { Assembler::vandpd(dst, nds, src, vector256); }
+  void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
+
+  void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vandps(dst, nds, src, vector256); }
+  void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256)     { Assembler::vandps(dst, nds, src, vector256); }
+  void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
+
+  void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); }
+  void vdivsd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vdivsd(dst, nds, src); }
+  void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+  void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivss(dst, nds, src); }
+  void vdivss(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vdivss(dst, nds, src); }
+  void vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+  void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulsd(dst, nds, src); }
+  void vmulsd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vmulsd(dst, nds, src); }
+  void vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+  void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulss(dst, nds, src); }
+  void vmulss(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vmulss(dst, nds, src); }
+  void vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+  void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubsd(dst, nds, src); }
+  void vsubsd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vsubsd(dst, nds, src); }
+  void vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+  void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubss(dst, nds, src); }
+  void vsubss(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vsubss(dst, nds, src); }
+  void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+  // AVX Vector instructions
+
+  void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); }
+  void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); }
+  void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
+
+  void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); }
+  void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); }
+  void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
+
+  void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+    if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2
+      Assembler::vpxor(dst, nds, src, vector256);
+    else
+      Assembler::vxorpd(dst, nds, src, vector256);
+  }
+  void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+    if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2
+      Assembler::vpxor(dst, nds, src, vector256);
+    else
+      Assembler::vxorpd(dst, nds, src, vector256);
+  }
+
+  // Simple version for AVX2 256bit vectors
+  void vpxor(XMMRegister dst, XMMRegister src) { Assembler::vpxor(dst, dst, src, true); }
+  void vpxor(XMMRegister dst, Address src) { Assembler::vpxor(dst, dst, src, true); }
+
+  // Move packed integer values from low 128 bit to hign 128 bit in 256 bit vector.
+  void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+    if (UseAVX > 1) // vinserti128h is available only in AVX2
+      Assembler::vinserti128h(dst, nds, src);
+    else
+      Assembler::vinsertf128h(dst, nds, src);
+  }
+
+  // Data
+
+  void cmov32( Condition cc, Register dst, Address  src);
+  void cmov32( Condition cc, Register dst, Register src);
+
+  void cmov(   Condition cc, Register dst, Register src) { cmovptr(cc, dst, src); }
+
+  void cmovptr(Condition cc, Register dst, Address  src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); }
+  void cmovptr(Condition cc, Register dst, Register src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); }
+
+  void movoop(Register dst, jobject obj);
+  void movoop(Address dst, jobject obj);
+
+  void mov_metadata(Register dst, Metadata* obj);
+  void mov_metadata(Address dst, Metadata* obj);
+
+  void movptr(ArrayAddress dst, Register src);
+  // can this do an lea?
+  void movptr(Register dst, ArrayAddress src);
+
+  void movptr(Register dst, Address src);
+
+  void movptr(Register dst, AddressLiteral src);
+
+  void movptr(Register dst, intptr_t src);
+  void movptr(Register dst, Register src);
+  void movptr(Address dst, intptr_t src);
+
+  void movptr(Address dst, Register src);
+
+  void movptr(Register dst, RegisterOrConstant src) {
+    if (src.is_constant()) movptr(dst, src.as_constant());
+    else                   movptr(dst, src.as_register());
+  }
+
+#ifdef _LP64
+  // Generally the next two are only used for moving NULL
+  // Although there are situations in initializing the mark word where
+  // they could be used. They are dangerous.
+
+  // They only exist on LP64 so that int32_t and intptr_t are not the same
+  // and we have ambiguous declarations.
+
+  void movptr(Address dst, int32_t imm32);
+  void movptr(Register dst, int32_t imm32);
+#endif // _LP64
+
+  // to avoid hiding movl
+  void mov32(AddressLiteral dst, Register src);
+  void mov32(Register dst, AddressLiteral src);
+
+  // to avoid hiding movb
+  void movbyte(ArrayAddress dst, int src);
+
+  // Import other mov() methods from the parent class or else
+  // they will be hidden by the following overriding declaration.
+  using Assembler::movdl;
+  using Assembler::movq;
+  void movdl(XMMRegister dst, AddressLiteral src);
+  void movq(XMMRegister dst, AddressLiteral src);
+
+  // Can push value or effective address
+  void pushptr(AddressLiteral src);
+
+  void pushptr(Address src) { LP64_ONLY(pushq(src)) NOT_LP64(pushl(src)); }
+  void popptr(Address src) { LP64_ONLY(popq(src)) NOT_LP64(popl(src)); }
+
+  void pushoop(jobject obj);
+  void pushklass(Metadata* obj);
+
+  // sign extend as need a l to ptr sized element
+  void movl2ptr(Register dst, Address src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(movl(dst, src)); }
+  void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(if (dst != src) movl(dst, src)); }
+
+  // C2 compiled method's prolog code.
+  void verified_entry(int framesize, bool stack_bang, bool fp_mode_24b);
+
+  // clear memory of size 'cnt' qwords, starting at 'base'.
+  void clear_mem(Register base, Register cnt, Register rtmp);
+
+  // IndexOf strings.
+  // Small strings are loaded through stack if they cross page boundary.
+  void string_indexof(Register str1, Register str2,
+                      Register cnt1, Register cnt2,
+                      int int_cnt2,  Register result,
+                      XMMRegister vec, Register tmp);
+
+  // IndexOf for constant substrings with size >= 8 elements
+  // which don't need to be loaded through stack.
+  void string_indexofC8(Register str1, Register str2,
+                      Register cnt1, Register cnt2,
+                      int int_cnt2,  Register result,
+                      XMMRegister vec, Register tmp);
+
+    // Smallest code: we don't need to load through stack,
+    // check string tail.
+
+  // Compare strings.
+  void string_compare(Register str1, Register str2,
+                      Register cnt1, Register cnt2, Register result,
+                      XMMRegister vec1);
+
+  // Compare char[] arrays.
+  void char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
+                          Register limit, Register result, Register chr,
+                          XMMRegister vec1, XMMRegister vec2);
+
+  // Fill primitive arrays
+  void generate_fill(BasicType t, bool aligned,
+                     Register to, Register value, Register count,
+                     Register rtmp, XMMRegister xtmp);
+
+  void encode_iso_array(Register src, Register dst, Register len,
+                        XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3,
+                        XMMRegister tmp4, Register tmp5, Register result);
+
+#undef VIRTUAL
+
+};
+
+/**
+ * class SkipIfEqual:
+ *
+ * Instantiating this class will result in assembly code being output that will
+ * jump around any code emitted between the creation of the instance and it's
+ * automatic destruction at the end of a scope block, depending on the value of
+ * the flag passed to the constructor, which will be checked at run-time.
+ */
+class SkipIfEqual {
+ private:
+  MacroAssembler* _masm;
+  Label _label;
+
+ public:
+   SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
+   ~SkipIfEqual();
+};
+
+#endif // CPU_X86_VM_MACROASSEMBLER_X86_HPP
--- a/src/cpu/x86/vm/metaspaceShared_x86_32.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/metaspaceShared_x86_32.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,7 +23,8 @@
  */
 
 #include "precompiled.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/codeBuffer.hpp"
 #include "memory/metaspaceShared.hpp"
 
 // Generate the self-patching vtable method:
--- a/src/cpu/x86/vm/metaspaceShared_x86_64.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/metaspaceShared_x86_64.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,7 +23,8 @@
  */
 
 #include "precompiled.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/codeBuffer.hpp"
 #include "memory/metaspaceShared.hpp"
 
 // Generate the self-patching vtable method:
--- a/src/cpu/x86/vm/methodHandles_x86.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/methodHandles_x86.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,6 +23,7 @@
  */
 
 #include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
 #include "interpreter/interpreter.hpp"
 #include "interpreter/interpreterRuntime.hpp"
 #include "memory/allocation.inline.hpp"
@@ -168,8 +169,9 @@
 
   if (VerifyMethodHandles && !for_compiler_entry) {
     // make sure recv is already on stack
+    __ movptr(temp2, Address(method_temp, Method::const_offset()));
     __ load_sized_value(temp2,
-                        Address(method_temp, Method::size_of_parameters_offset()),
+                        Address(temp2, ConstMethod::size_of_parameters_offset()),
                         sizeof(u2), /*is_signed*/ false);
     // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
     Label L;
@@ -233,8 +235,9 @@
   int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
   assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
   if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
+    __ movptr(rdx_argp, Address(rbx_method, Method::const_offset()));
     __ load_sized_value(rdx_argp,
-                        Address(rbx_method, Method::size_of_parameters_offset()),
+                        Address(rdx_argp, ConstMethod::size_of_parameters_offset()),
                         sizeof(u2), /*is_signed*/ false);
     // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
     rdx_first_arg_addr = __ argument_address(rdx_argp, -1);
--- a/src/cpu/x86/vm/nativeInst_x86.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/nativeInst_x86.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,7 +23,7 @@
  */
 
 #include "precompiled.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
 #include "memory/resourceArea.hpp"
 #include "nativeInst_x86.hpp"
 #include "oops/oop.inline.hpp"
--- a/src/cpu/x86/vm/relocInfo_x86.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/relocInfo_x86.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,8 +23,7 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.inline.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
 #include "code/relocInfo.hpp"
 #include "nativeInst_x86.hpp"
 #include "oops/oop.inline.hpp"
--- a/src/cpu/x86/vm/runtime_x86_32.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/runtime_x86_32.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -24,12 +24,11 @@
 
 #include "precompiled.hpp"
 #ifdef COMPILER2
-#include "asm/assembler.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "code/vmreg.hpp"
 #include "interpreter/interpreter.hpp"
-#include "nativeInst_x86.hpp"
 #include "opto/runtime.hpp"
 #include "runtime/interfaceSupport.hpp"
 #include "runtime/sharedRuntime.hpp"
--- a/src/cpu/x86/vm/runtime_x86_64.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/runtime_x86_64.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -24,12 +24,11 @@
 
 #include "precompiled.hpp"
 #ifdef COMPILER2
-#include "asm/assembler.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "code/vmreg.hpp"
 #include "interpreter/interpreter.hpp"
-#include "nativeInst_x86.hpp"
 #include "opto/runtime.hpp"
 #include "runtime/interfaceSupport.hpp"
 #include "runtime/sharedRuntime.hpp"
--- a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,8 +23,8 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
 #include "code/debugInfoRec.hpp"
 #include "code/icBuffer.hpp"
 #include "code/vtableStubs.hpp"
@@ -1936,7 +1936,7 @@
   if (method->is_static() && !is_critical_native) {
 
     //  load opp into a register
-    __ movoop(oop_handle_reg, JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()));
+    __ movoop(oop_handle_reg, JNIHandles::make_local(method->method_holder()->java_mirror()));
 
     // Now handlize the static class mirror it's known not-null.
     __ movptr(Address(rsp, klass_offset), oop_handle_reg);
--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,8 +23,8 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
 #include "code/debugInfoRec.hpp"
 #include "code/icBuffer.hpp"
 #include "code/vtableStubs.hpp"
@@ -2179,7 +2179,7 @@
   if (method->is_static() && !is_critical_native) {
 
     //  load oop into a register
-    __ movoop(oop_handle_reg, JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()));
+    __ movoop(oop_handle_reg, JNIHandles::make_local(method->method_holder()->java_mirror()));
 
     // Now handlize the static class mirror it's known not-null.
     __ movptr(Address(rsp, klass_offset), oop_handle_reg);
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,8 +23,8 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
 #include "interpreter/interpreter.hpp"
 #include "nativeInst_x86.hpp"
 #include "oops/instanceOop.hpp"
@@ -37,19 +37,8 @@
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubCodeGenerator.hpp"
 #include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
 #include "utilities/top.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 #ifdef COMPILER2
 #include "opto/runtime.hpp"
 #endif
@@ -807,16 +796,22 @@
     __ align(OptoLoopAlignment);
   __ BIND(L_copy_64_bytes_loop);
 
-    if(UseUnalignedLoadStores) {
-      __ movdqu(xmm0, Address(from, 0));
-      __ movdqu(Address(from, to_from, Address::times_1, 0), xmm0);
-      __ movdqu(xmm1, Address(from, 16));
-      __ movdqu(Address(from, to_from, Address::times_1, 16), xmm1);
-      __ movdqu(xmm2, Address(from, 32));
-      __ movdqu(Address(from, to_from, Address::times_1, 32), xmm2);
-      __ movdqu(xmm3, Address(from, 48));
-      __ movdqu(Address(from, to_from, Address::times_1, 48), xmm3);
-
+    if (UseUnalignedLoadStores) {
+      if (UseAVX >= 2) {
+        __ vmovdqu(xmm0, Address(from,  0));
+        __ vmovdqu(Address(from, to_from, Address::times_1,  0), xmm0);
+        __ vmovdqu(xmm1, Address(from, 32));
+        __ vmovdqu(Address(from, to_from, Address::times_1, 32), xmm1);
+      } else {
+        __ movdqu(xmm0, Address(from, 0));
+        __ movdqu(Address(from, to_from, Address::times_1, 0), xmm0);
+        __ movdqu(xmm1, Address(from, 16));
+        __ movdqu(Address(from, to_from, Address::times_1, 16), xmm1);
+        __ movdqu(xmm2, Address(from, 32));
+        __ movdqu(Address(from, to_from, Address::times_1, 32), xmm2);
+        __ movdqu(xmm3, Address(from, 48));
+        __ movdqu(Address(from, to_from, Address::times_1, 48), xmm3);
+      }
     } else {
       __ movq(xmm0, Address(from, 0));
       __ movq(Address(from, to_from, Address::times_1, 0), xmm0);
@@ -2137,6 +2132,581 @@
     }
   }
 
+  // AES intrinsic stubs
+  enum {AESBlockSize = 16};
+
+  address generate_key_shuffle_mask() {
+    __ align(16);
+    StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask");
+    address start = __ pc();
+    __ emit_data(0x00010203, relocInfo::none, 0 );
+    __ emit_data(0x04050607, relocInfo::none, 0 );
+    __ emit_data(0x08090a0b, relocInfo::none, 0 );
+    __ emit_data(0x0c0d0e0f, relocInfo::none, 0 );
+    return start;
+  }
+
+  // Utility routine for loading a 128-bit key word in little endian format
+  // can optionally specify that the shuffle mask is already in an xmmregister
+  void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
+    __ movdqu(xmmdst, Address(key, offset));
+    if (xmm_shuf_mask != NULL) {
+      __ pshufb(xmmdst, xmm_shuf_mask);
+    } else {
+      __ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+    }
+  }
+
+  // aesenc using specified key+offset
+  // can optionally specify that the shuffle mask is already in an xmmregister
+  void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
+    load_key(xmmtmp, key, offset, xmm_shuf_mask);
+    __ aesenc(xmmdst, xmmtmp);
+  }
+
+  // aesdec using specified key+offset
+  // can optionally specify that the shuffle mask is already in an xmmregister
+  void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
+    load_key(xmmtmp, key, offset, xmm_shuf_mask);
+    __ aesdec(xmmdst, xmmtmp);
+  }
+
+
+  // Arguments:
+  //
+  // Inputs:
+  //   c_rarg0   - source byte array address
+  //   c_rarg1   - destination byte array address
+  //   c_rarg2   - K (key) in little endian int array
+  //
+  address generate_aescrypt_encryptBlock() {
+    assert(UseAES, "need AES instructions and misaligned SSE support");
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
+    Label L_doLast;
+    address start = __ pc();
+
+    const Register from        = rdx;      // source array address
+    const Register to          = rdx;      // destination array address
+    const Register key         = rcx;      // key array address
+    const Register keylen      = rax;
+    const Address  from_param(rbp, 8+0);
+    const Address  to_param  (rbp, 8+4);
+    const Address  key_param (rbp, 8+8);
+
+    const XMMRegister xmm_result = xmm0;
+    const XMMRegister xmm_key_shuf_mask = xmm1;
+    const XMMRegister xmm_temp1  = xmm2;
+    const XMMRegister xmm_temp2  = xmm3;
+    const XMMRegister xmm_temp3  = xmm4;
+    const XMMRegister xmm_temp4  = xmm5;
+
+    __ enter();   // required for proper stackwalking of RuntimeStub frame
+    __ movptr(from, from_param);
+    __ movptr(key, key_param);
+
+    // keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
+    __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+
+    __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+    __ movdqu(xmm_result, Address(from, 0));  // get 16 bytes of input
+    __ movptr(to, to_param);
+
+    // For encryption, the java expanded key ordering is just what we need
+
+    load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask);
+    __ pxor(xmm_result, xmm_temp1);
+
+    load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
+    load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
+    load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
+
+    __ aesenc(xmm_result, xmm_temp1);
+    __ aesenc(xmm_result, xmm_temp2);
+    __ aesenc(xmm_result, xmm_temp3);
+    __ aesenc(xmm_result, xmm_temp4);
+
+    load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
+    load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
+    load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
+
+    __ aesenc(xmm_result, xmm_temp1);
+    __ aesenc(xmm_result, xmm_temp2);
+    __ aesenc(xmm_result, xmm_temp3);
+    __ aesenc(xmm_result, xmm_temp4);
+
+    load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
+
+    __ cmpl(keylen, 44);
+    __ jccb(Assembler::equal, L_doLast);
+
+    __ aesenc(xmm_result, xmm_temp1);
+    __ aesenc(xmm_result, xmm_temp2);
+
+    load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
+
+    __ cmpl(keylen, 52);
+    __ jccb(Assembler::equal, L_doLast);
+
+    __ aesenc(xmm_result, xmm_temp1);
+    __ aesenc(xmm_result, xmm_temp2);
+
+    load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
+
+    __ BIND(L_doLast);
+    __ aesenc(xmm_result, xmm_temp1);
+    __ aesenclast(xmm_result, xmm_temp2);
+    __ movdqu(Address(to, 0), xmm_result);        // store the result
+    __ xorptr(rax, rax); // return 0
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+
+    return start;
+  }
+
+
+  // Arguments:
+  //
+  // Inputs:
+  //   c_rarg0   - source byte array address
+  //   c_rarg1   - destination byte array address
+  //   c_rarg2   - K (key) in little endian int array
+  //
+  address generate_aescrypt_decryptBlock() {
+    assert(UseAES, "need AES instructions and misaligned SSE support");
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
+    Label L_doLast;
+    address start = __ pc();
+
+    const Register from        = rdx;      // source array address
+    const Register to          = rdx;      // destination array address
+    const Register key         = rcx;      // key array address
+    const Register keylen      = rax;
+    const Address  from_param(rbp, 8+0);
+    const Address  to_param  (rbp, 8+4);
+    const Address  key_param (rbp, 8+8);
+
+    const XMMRegister xmm_result = xmm0;
+    const XMMRegister xmm_key_shuf_mask = xmm1;
+    const XMMRegister xmm_temp1  = xmm2;
+    const XMMRegister xmm_temp2  = xmm3;
+    const XMMRegister xmm_temp3  = xmm4;
+    const XMMRegister xmm_temp4  = xmm5;
+
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+    __ movptr(from, from_param);
+    __ movptr(key, key_param);
+
+    // keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
+    __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+
+    __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+    __ movdqu(xmm_result, Address(from, 0));
+    __ movptr(to, to_param);
+
+    // for decryption java expanded key ordering is rotated one position from what we want
+    // so we start from 0x10 here and hit 0x00 last
+    // we don't know if the key is aligned, hence not using load-execute form
+    load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
+    load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
+    load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
+
+    __ pxor  (xmm_result, xmm_temp1);
+    __ aesdec(xmm_result, xmm_temp2);
+    __ aesdec(xmm_result, xmm_temp3);
+    __ aesdec(xmm_result, xmm_temp4);
+
+    load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
+    load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
+    load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
+
+    __ aesdec(xmm_result, xmm_temp1);
+    __ aesdec(xmm_result, xmm_temp2);
+    __ aesdec(xmm_result, xmm_temp3);
+    __ aesdec(xmm_result, xmm_temp4);
+
+    load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
+    load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask);
+
+    __ cmpl(keylen, 44);
+    __ jccb(Assembler::equal, L_doLast);
+
+    __ aesdec(xmm_result, xmm_temp1);
+    __ aesdec(xmm_result, xmm_temp2);
+
+    load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
+
+    __ cmpl(keylen, 52);
+    __ jccb(Assembler::equal, L_doLast);
+
+    __ aesdec(xmm_result, xmm_temp1);
+    __ aesdec(xmm_result, xmm_temp2);
+
+    load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
+
+    __ BIND(L_doLast);
+    __ aesdec(xmm_result, xmm_temp1);
+    __ aesdec(xmm_result, xmm_temp2);
+
+    // for decryption the aesdeclast operation is always on key+0x00
+    __ aesdeclast(xmm_result, xmm_temp3);
+    __ movdqu(Address(to, 0), xmm_result);  // store the result
+    __ xorptr(rax, rax); // return 0
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+
+    return start;
+  }
+
+  void handleSOERegisters(bool saving) {
+    const int saveFrameSizeInBytes = 4 * wordSize;
+    const Address saved_rbx     (rbp, -3 * wordSize);
+    const Address saved_rsi     (rbp, -2 * wordSize);
+    const Address saved_rdi     (rbp, -1 * wordSize);
+
+    if (saving) {
+      __ subptr(rsp, saveFrameSizeInBytes);
+      __ movptr(saved_rsi, rsi);
+      __ movptr(saved_rdi, rdi);
+      __ movptr(saved_rbx, rbx);
+    } else {
+      // restoring
+      __ movptr(rsi, saved_rsi);
+      __ movptr(rdi, saved_rdi);
+      __ movptr(rbx, saved_rbx);
+    }
+  }
+
+  // Arguments:
+  //
+  // Inputs:
+  //   c_rarg0   - source byte array address
+  //   c_rarg1   - destination byte array address
+  //   c_rarg2   - K (key) in little endian int array
+  //   c_rarg3   - r vector byte array address
+  //   c_rarg4   - input length
+  //
+  address generate_cipherBlockChaining_encryptAESCrypt() {
+    assert(UseAES, "need AES instructions and misaligned SSE support");
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
+    address start = __ pc();
+
+    Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256;
+    const Register from        = rsi;      // source array address
+    const Register to          = rdx;      // destination array address
+    const Register key         = rcx;      // key array address
+    const Register rvec        = rdi;      // r byte array initialized from initvector array address
+                                           // and left with the results of the last encryption block
+    const Register len_reg     = rbx;      // src len (must be multiple of blocksize 16)
+    const Register pos         = rax;
+
+    // xmm register assignments for the loops below
+    const XMMRegister xmm_result = xmm0;
+    const XMMRegister xmm_temp   = xmm1;
+    // first 6 keys preloaded into xmm2-xmm7
+    const int XMM_REG_NUM_KEY_FIRST = 2;
+    const int XMM_REG_NUM_KEY_LAST  = 7;
+    const XMMRegister xmm_key0   = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
+
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+    handleSOERegisters(true /*saving*/);
+
+    // load registers from incoming parameters
+    const Address  from_param(rbp, 8+0);
+    const Address  to_param  (rbp, 8+4);
+    const Address  key_param (rbp, 8+8);
+    const Address  rvec_param (rbp, 8+12);
+    const Address  len_param  (rbp, 8+16);
+    __ movptr(from , from_param);
+    __ movptr(to   , to_param);
+    __ movptr(key  , key_param);
+    __ movptr(rvec , rvec_param);
+    __ movptr(len_reg , len_param);
+
+    const XMMRegister xmm_key_shuf_mask = xmm_temp;  // used temporarily to swap key bytes up front
+    __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+    // load up xmm regs 2 thru 7 with keys 0-5
+    for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
+      load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
+      offset += 0x10;
+    }
+
+    __ movdqu(xmm_result, Address(rvec, 0x00));   // initialize xmm_result with r vec
+
+    // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
+    __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+    __ cmpl(rax, 44);
+    __ jcc(Assembler::notEqual, L_key_192_256);
+
+    // 128 bit code follows here
+    __ movl(pos, 0);
+    __ align(OptoLoopAlignment);
+    __ BIND(L_loopTop_128);
+    __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
+    __ pxor  (xmm_result, xmm_temp);                                // xor with the current r vector
+
+    __ pxor  (xmm_result, xmm_key0);                                // do the aes rounds
+    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
+      __ aesenc(xmm_result, as_XMMRegister(rnum));
+    }
+    for (int key_offset = 0x60; key_offset <= 0x90; key_offset += 0x10) {
+      aes_enc_key(xmm_result, xmm_temp, key, key_offset);
+    }
+    load_key(xmm_temp, key, 0xa0);
+    __ aesenclast(xmm_result, xmm_temp);
+
+    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
+    // no need to store r to memory until we exit
+    __ addptr(pos, AESBlockSize);
+    __ subptr(len_reg, AESBlockSize);
+    __ jcc(Assembler::notEqual, L_loopTop_128);
+
+    __ BIND(L_exit);
+    __ movdqu(Address(rvec, 0), xmm_result);     // final value of r stored in rvec of CipherBlockChaining object
+
+    handleSOERegisters(false /*restoring*/);
+    __ movl(rax, 0);                             // return 0 (why?)
+    __ leave();                                  // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+
+    __ BIND(L_key_192_256);
+    // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
+    __ cmpl(rax, 52);
+    __ jcc(Assembler::notEqual, L_key_256);
+
+    // 192-bit code follows here (could be changed to use more xmm registers)
+    __ movl(pos, 0);
+    __ align(OptoLoopAlignment);
+    __ BIND(L_loopTop_192);
+    __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
+    __ pxor  (xmm_result, xmm_temp);                                // xor with the current r vector
+
+    __ pxor  (xmm_result, xmm_key0);                                // do the aes rounds
+    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
+      __ aesenc(xmm_result, as_XMMRegister(rnum));
+    }
+    for (int key_offset = 0x60; key_offset <= 0xb0; key_offset += 0x10) {
+      aes_enc_key(xmm_result, xmm_temp, key, key_offset);
+    }
+    load_key(xmm_temp, key, 0xc0);
+    __ aesenclast(xmm_result, xmm_temp);
+
+    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);   // store into the next 16 bytes of output
+    // no need to store r to memory until we exit
+    __ addptr(pos, AESBlockSize);
+    __ subptr(len_reg, AESBlockSize);
+    __ jcc(Assembler::notEqual, L_loopTop_192);
+    __ jmp(L_exit);
+
+    __ BIND(L_key_256);
+    // 256-bit code follows here (could be changed to use more xmm registers)
+    __ movl(pos, 0);
+    __ align(OptoLoopAlignment);
+    __ BIND(L_loopTop_256);
+    __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
+    __ pxor  (xmm_result, xmm_temp);                                // xor with the current r vector
+
+    __ pxor  (xmm_result, xmm_key0);                                // do the aes rounds
+    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
+      __ aesenc(xmm_result, as_XMMRegister(rnum));
+    }
+    for (int key_offset = 0x60; key_offset <= 0xd0; key_offset += 0x10) {
+      aes_enc_key(xmm_result, xmm_temp, key, key_offset);
+    }
+    load_key(xmm_temp, key, 0xe0);
+    __ aesenclast(xmm_result, xmm_temp);
+
+    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);   // store into the next 16 bytes of output
+    // no need to store r to memory until we exit
+    __ addptr(pos, AESBlockSize);
+    __ subptr(len_reg, AESBlockSize);
+    __ jcc(Assembler::notEqual, L_loopTop_256);
+    __ jmp(L_exit);
+
+    return start;
+  }
+
+
+  // CBC AES Decryption.
+  // In 32-bit stub, because of lack of registers we do not try to parallelize 4 blocks at a time.
+  //
+  // Arguments:
+  //
+  // Inputs:
+  //   c_rarg0   - source byte array address
+  //   c_rarg1   - destination byte array address
+  //   c_rarg2   - K (key) in little endian int array
+  //   c_rarg3   - r vector byte array address
+  //   c_rarg4   - input length
+  //
+
+  address generate_cipherBlockChaining_decryptAESCrypt() {
+    assert(UseAES, "need AES instructions and misaligned SSE support");
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
+    address start = __ pc();
+
+    Label L_exit, L_key_192_256, L_key_256;
+    Label L_singleBlock_loopTop_128;
+    Label L_singleBlock_loopTop_192, L_singleBlock_loopTop_256;
+    const Register from        = rsi;      // source array address
+    const Register to          = rdx;      // destination array address
+    const Register key         = rcx;      // key array address
+    const Register rvec        = rdi;      // r byte array initialized from initvector array address
+                                           // and left with the results of the last encryption block
+    const Register len_reg     = rbx;      // src len (must be multiple of blocksize 16)
+    const Register pos         = rax;
+
+    // xmm register assignments for the loops below
+    const XMMRegister xmm_result = xmm0;
+    const XMMRegister xmm_temp   = xmm1;
+    // first 6 keys preloaded into xmm2-xmm7
+    const int XMM_REG_NUM_KEY_FIRST = 2;
+    const int XMM_REG_NUM_KEY_LAST  = 7;
+    const int FIRST_NON_REG_KEY_offset = 0x70;
+    const XMMRegister xmm_key_first   = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
+
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+    handleSOERegisters(true /*saving*/);
+
+    // load registers from incoming parameters
+    const Address  from_param(rbp, 8+0);
+    const Address  to_param  (rbp, 8+4);
+    const Address  key_param (rbp, 8+8);
+    const Address  rvec_param (rbp, 8+12);
+    const Address  len_param  (rbp, 8+16);
+    __ movptr(from , from_param);
+    __ movptr(to   , to_param);
+    __ movptr(key  , key_param);
+    __ movptr(rvec , rvec_param);
+    __ movptr(len_reg , len_param);
+
+    // the java expanded key ordering is rotated one position from what we want
+    // so we start from 0x10 here and hit 0x00 last
+    const XMMRegister xmm_key_shuf_mask = xmm1;  // used temporarily to swap key bytes up front
+    __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+    // load up xmm regs 2 thru 6 with first 5 keys
+    for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
+      load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
+      offset += 0x10;
+    }
+
+    // inside here, use the rvec register to point to previous block cipher
+    // with which we xor at the end of each newly decrypted block
+    const Register  prev_block_cipher_ptr = rvec;
+
+    // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
+    __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+    __ cmpl(rax, 44);
+    __ jcc(Assembler::notEqual, L_key_192_256);
+
+
+    // 128-bit code follows here, parallelized
+    __ movl(pos, 0);
+    __ align(OptoLoopAlignment);
+    __ BIND(L_singleBlock_loopTop_128);
+    __ cmpptr(len_reg, 0);           // any blocks left??
+    __ jcc(Assembler::equal, L_exit);
+    __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of cipher input
+    __ pxor  (xmm_result, xmm_key_first);                             // do the aes dec rounds
+    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
+      __ aesdec(xmm_result, as_XMMRegister(rnum));
+    }
+    for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xa0; key_offset += 0x10) {   // 128-bit runs up to key offset a0
+      aes_dec_key(xmm_result, xmm_temp, key, key_offset);
+    }
+    load_key(xmm_temp, key, 0x00);                                     // final key is stored in java expanded array at offset 0
+    __ aesdeclast(xmm_result, xmm_temp);
+    __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
+    __ pxor  (xmm_result, xmm_temp);                                  // xor with the current r vector
+    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
+    // no need to store r to memory until we exit
+    __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0));     // set up new ptr
+    __ addptr(pos, AESBlockSize);
+    __ subptr(len_reg, AESBlockSize);
+    __ jmp(L_singleBlock_loopTop_128);
+
+
+    __ BIND(L_exit);
+    __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
+    __ movptr(rvec , rvec_param);                                     // restore this since used in loop
+    __ movdqu(Address(rvec, 0), xmm_temp);                            // final value of r stored in rvec of CipherBlockChaining object
+    handleSOERegisters(false /*restoring*/);
+    __ movl(rax, 0);                                                  // return 0 (why?)
+    __ leave();                                                       // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+
+
+    __ BIND(L_key_192_256);
+    // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
+    __ cmpl(rax, 52);
+    __ jcc(Assembler::notEqual, L_key_256);
+
+    // 192-bit code follows here (could be optimized to use parallelism)
+    __ movl(pos, 0);
+    __ align(OptoLoopAlignment);
+    __ BIND(L_singleBlock_loopTop_192);
+    __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of cipher input
+    __ pxor  (xmm_result, xmm_key_first);                             // do the aes dec rounds
+    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+      __ aesdec(xmm_result, as_XMMRegister(rnum));
+    }
+    for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xc0; key_offset += 0x10) {   // 192-bit runs up to key offset c0
+      aes_dec_key(xmm_result, xmm_temp, key, key_offset);
+    }
+    load_key(xmm_temp, key, 0x00);                                     // final key is stored in java expanded array at offset 0
+    __ aesdeclast(xmm_result, xmm_temp);
+    __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
+    __ pxor  (xmm_result, xmm_temp);                                  // xor with the current r vector
+    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
+    // no need to store r to memory until we exit
+    __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0));     // set up new ptr
+    __ addptr(pos, AESBlockSize);
+    __ subptr(len_reg, AESBlockSize);
+    __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192);
+    __ jmp(L_exit);
+
+    __ BIND(L_key_256);
+    // 256-bit code follows here (could be optimized to use parallelism)
+    __ movl(pos, 0);
+    __ align(OptoLoopAlignment);
+    __ BIND(L_singleBlock_loopTop_256);
+    __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of cipher input
+    __ pxor  (xmm_result, xmm_key_first);                             // do the aes dec rounds
+    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+      __ aesdec(xmm_result, as_XMMRegister(rnum));
+    }
+    for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) {   // 256-bit runs up to key offset e0
+      aes_dec_key(xmm_result, xmm_temp, key, key_offset);
+    }
+    load_key(xmm_temp, key, 0x00);                                     // final key is stored in java expanded array at offset 0
+    __ aesdeclast(xmm_result, xmm_temp);
+    __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
+    __ pxor  (xmm_result, xmm_temp);                                  // xor with the current r vector
+    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
+    // no need to store r to memory until we exit
+    __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0));     // set up new ptr
+    __ addptr(pos, AESBlockSize);
+    __ subptr(len_reg, AESBlockSize);
+    __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
+    __ jmp(L_exit);
+
+    return start;
+  }
+
+
  public:
   // Information about frame layout at time of blocking runtime call.
   // Note that we only have to preserve callee-saved registers since
@@ -2332,6 +2902,16 @@
     generate_arraycopy_stubs();
 
     generate_math_stubs();
+
+    // don't bother generating these AES intrinsic stubs unless global flag is set
+    if (UseAESIntrinsics) {
+      StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask();  // might be needed by the others
+
+      StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
+      StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
+      StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
+      StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
+    }
   }
 
 
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,8 +23,8 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
 #include "interpreter/interpreter.hpp"
 #include "nativeInst_x86.hpp"
 #include "oops/instanceOop.hpp"
@@ -37,19 +37,8 @@
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubCodeGenerator.hpp"
 #include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
 #include "utilities/top.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 #ifdef COMPILER2
 #include "opto/runtime.hpp"
 #endif
@@ -1297,23 +1286,54 @@
   //   end_to       - destination array end address
   //   qword_count  - 64-bits element count, negative
   //   to           - scratch
-  //   L_copy_32_bytes - entry label
+  //   L_copy_bytes - entry label
   //   L_copy_8_bytes  - exit  label
   //
-  void copy_32_bytes_forward(Register end_from, Register end_to,
+  void copy_bytes_forward(Register end_from, Register end_to,
                              Register qword_count, Register to,
-                             Label& L_copy_32_bytes, Label& L_copy_8_bytes) {
+                             Label& L_copy_bytes, Label& L_copy_8_bytes) {
     DEBUG_ONLY(__ stop("enter at entry label, not here"));
     Label L_loop;
     __ align(OptoLoopAlignment);
-  __ BIND(L_loop);
-    if(UseUnalignedLoadStores) {
-      __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
-      __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
-      __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8));
-      __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1);
-
+    if (UseUnalignedLoadStores) {
+      Label L_end;
+      // Copy 64-bytes per iteration
+      __ BIND(L_loop);
+      if (UseAVX >= 2) {
+        __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
+        __ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
+        __ vmovdqu(xmm1, Address(end_from, qword_count, Address::times_8, -24));
+        __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1);
+      } else {
+        __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
+        __ movdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
+        __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, -40));
+        __ movdqu(Address(end_to, qword_count, Address::times_8, -40), xmm1);
+        __ movdqu(xmm2, Address(end_from, qword_count, Address::times_8, -24));
+        __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm2);
+        __ movdqu(xmm3, Address(end_from, qword_count, Address::times_8, - 8));
+        __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm3);
+      }
+      __ BIND(L_copy_bytes);
+      __ addptr(qword_count, 8);
+      __ jcc(Assembler::lessEqual, L_loop);
+      __ subptr(qword_count, 4);  // sub(8) and add(4)
+      __ jccb(Assembler::greater, L_end);
+      // Copy trailing 32 bytes
+      if (UseAVX >= 2) {
+        __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
+        __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
+      } else {
+        __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
+        __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
+        __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8));
+        __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1);
+      }
+      __ addptr(qword_count, 4);
+      __ BIND(L_end);
     } else {
+      // Copy 32-bytes per iteration
+      __ BIND(L_loop);
       __ movq(to, Address(end_from, qword_count, Address::times_8, -24));
       __ movq(Address(end_to, qword_count, Address::times_8, -24), to);
       __ movq(to, Address(end_from, qword_count, Address::times_8, -16));
@@ -1322,15 +1342,15 @@
       __ movq(Address(end_to, qword_count, Address::times_8, - 8), to);
       __ movq(to, Address(end_from, qword_count, Address::times_8, - 0));
       __ movq(Address(end_to, qword_count, Address::times_8, - 0), to);
+
+      __ BIND(L_copy_bytes);
+      __ addptr(qword_count, 4);
+      __ jcc(Assembler::lessEqual, L_loop);
     }
-  __ BIND(L_copy_32_bytes);
-    __ addptr(qword_count, 4);
-    __ jcc(Assembler::lessEqual, L_loop);
     __ subptr(qword_count, 4);
     __ jcc(Assembler::less, L_copy_8_bytes); // Copy trailing qwords
   }
 
-
   // Copy big chunks backward
   //
   // Inputs:
@@ -1338,23 +1358,55 @@
   //   dest         - destination array address
   //   qword_count  - 64-bits element count
   //   to           - scratch
-  //   L_copy_32_bytes - entry label
+  //   L_copy_bytes - entry label
   //   L_copy_8_bytes  - exit  label
   //
-  void copy_32_bytes_backward(Register from, Register dest,
+  void copy_bytes_backward(Register from, Register dest,
                               Register qword_count, Register to,
-                              Label& L_copy_32_bytes, Label& L_copy_8_bytes) {
+                              Label& L_copy_bytes, Label& L_copy_8_bytes) {
     DEBUG_ONLY(__ stop("enter at entry label, not here"));
     Label L_loop;
     __ align(OptoLoopAlignment);
-  __ BIND(L_loop);
-    if(UseUnalignedLoadStores) {
-      __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16));
-      __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0);
-      __ movdqu(xmm1, Address(from, qword_count, Address::times_8,  0));
-      __ movdqu(Address(dest, qword_count, Address::times_8,  0), xmm1);
-
+    if (UseUnalignedLoadStores) {
+      Label L_end;
+      // Copy 64-bytes per iteration
+      __ BIND(L_loop);
+      if (UseAVX >= 2) {
+        __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32));
+        __ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0);
+        __ vmovdqu(xmm1, Address(from, qword_count, Address::times_8,  0));
+        __ vmovdqu(Address(dest, qword_count, Address::times_8,  0), xmm1);
+      } else {
+        __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 48));
+        __ movdqu(Address(dest, qword_count, Address::times_8, 48), xmm0);
+        __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 32));
+        __ movdqu(Address(dest, qword_count, Address::times_8, 32), xmm1);
+        __ movdqu(xmm2, Address(from, qword_count, Address::times_8, 16));
+        __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm2);
+        __ movdqu(xmm3, Address(from, qword_count, Address::times_8,  0));
+        __ movdqu(Address(dest, qword_count, Address::times_8,  0), xmm3);
+      }
+      __ BIND(L_copy_bytes);
+      __ subptr(qword_count, 8);
+      __ jcc(Assembler::greaterEqual, L_loop);
+
+      __ addptr(qword_count, 4);  // add(8) and sub(4)
+      __ jccb(Assembler::less, L_end);
+      // Copy trailing 32 bytes
+      if (UseAVX >= 2) {
+        __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 0));
+        __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm0);
+      } else {
+        __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16));
+        __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0);
+        __ movdqu(xmm1, Address(from, qword_count, Address::times_8,  0));
+        __ movdqu(Address(dest, qword_count, Address::times_8,  0), xmm1);
+      }
+      __ subptr(qword_count, 4);
+      __ BIND(L_end);
     } else {
+      // Copy 32-bytes per iteration
+      __ BIND(L_loop);
       __ movq(to, Address(from, qword_count, Address::times_8, 24));
       __ movq(Address(dest, qword_count, Address::times_8, 24), to);
       __ movq(to, Address(from, qword_count, Address::times_8, 16));
@@ -1363,10 +1415,11 @@
       __ movq(Address(dest, qword_count, Address::times_8,  8), to);
       __ movq(to, Address(from, qword_count, Address::times_8,  0));
       __ movq(Address(dest, qword_count, Address::times_8,  0), to);
+
+      __ BIND(L_copy_bytes);
+      __ subptr(qword_count, 4);
+      __ jcc(Assembler::greaterEqual, L_loop);
     }
-  __ BIND(L_copy_32_bytes);
-    __ subptr(qword_count, 4);
-    __ jcc(Assembler::greaterEqual, L_loop);
     __ addptr(qword_count, 4);
     __ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords
   }
@@ -1396,7 +1449,7 @@
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
 
-    Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes;
+    Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes;
     Label L_copy_byte, L_exit;
     const Register from        = rdi;  // source array address
     const Register to          = rsi;  // destination array address
@@ -1428,7 +1481,7 @@
     __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
     __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
     __ negptr(qword_count); // make the count negative
-    __ jmp(L_copy_32_bytes);
+    __ jmp(L_copy_bytes);
 
     // Copy trailing qwords
   __ BIND(L_copy_8_bytes);
@@ -1471,8 +1524,8 @@
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
 
-    // Copy in 32-bytes chunks
-    copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
+    // Copy in multi-bytes chunks
+    copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
     __ jmp(L_copy_4_bytes);
 
     return start;
@@ -1499,7 +1552,7 @@
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
 
-    Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes;
+    Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes;
     const Register from        = rdi;  // source array address
     const Register to          = rsi;  // destination array address
     const Register count       = rdx;  // elements count
@@ -1542,10 +1595,10 @@
     // Check for and copy trailing dword
   __ BIND(L_copy_4_bytes);
     __ testl(byte_count, 4);
-    __ jcc(Assembler::zero, L_copy_32_bytes);
+    __ jcc(Assembler::zero, L_copy_bytes);
     __ movl(rax, Address(from, qword_count, Address::times_8));
     __ movl(Address(to, qword_count, Address::times_8), rax);
-    __ jmp(L_copy_32_bytes);
+    __ jmp(L_copy_bytes);
 
     // Copy trailing qwords
   __ BIND(L_copy_8_bytes);
@@ -1560,8 +1613,8 @@
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
 
-    // Copy in 32-bytes chunks
-    copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
+    // Copy in multi-bytes chunks
+    copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
 
     restore_arg_regs();
     inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free
@@ -1596,7 +1649,7 @@
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
 
-    Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes,L_copy_2_bytes,L_exit;
+    Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes,L_copy_2_bytes,L_exit;
     const Register from        = rdi;  // source array address
     const Register to          = rsi;  // destination array address
     const Register count       = rdx;  // elements count
@@ -1627,7 +1680,7 @@
     __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
     __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
     __ negptr(qword_count);
-    __ jmp(L_copy_32_bytes);
+    __ jmp(L_copy_bytes);
 
     // Copy trailing qwords
   __ BIND(L_copy_8_bytes);
@@ -1663,8 +1716,8 @@
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
 
-    // Copy in 32-bytes chunks
-    copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
+    // Copy in multi-bytes chunks
+    copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
     __ jmp(L_copy_4_bytes);
 
     return start;
@@ -1711,7 +1764,7 @@
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
 
-    Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes;
+    Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes;
     const Register from        = rdi;  // source array address
     const Register to          = rsi;  // destination array address
     const Register count       = rdx;  // elements count
@@ -1746,10 +1799,10 @@
     // Check for and copy trailing dword
   __ BIND(L_copy_4_bytes);
     __ testl(word_count, 2);
-    __ jcc(Assembler::zero, L_copy_32_bytes);
+    __ jcc(Assembler::zero, L_copy_bytes);
     __ movl(rax, Address(from, qword_count, Address::times_8));
     __ movl(Address(to, qword_count, Address::times_8), rax);
-    __ jmp(L_copy_32_bytes);
+    __ jmp(L_copy_bytes);
 
     // Copy trailing qwords
   __ BIND(L_copy_8_bytes);
@@ -1764,8 +1817,8 @@
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
 
-    // Copy in 32-bytes chunks
-    copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
+    // Copy in multi-bytes chunks
+    copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
 
     restore_arg_regs();
     inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free
@@ -1801,7 +1854,7 @@
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
 
-    Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit;
+    Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit;
     const Register from        = rdi;  // source array address
     const Register to          = rsi;  // destination array address
     const Register count       = rdx;  // elements count
@@ -1837,7 +1890,7 @@
     __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
     __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
     __ negptr(qword_count);
-    __ jmp(L_copy_32_bytes);
+    __ jmp(L_copy_bytes);
 
     // Copy trailing qwords
   __ BIND(L_copy_8_bytes);
@@ -1864,8 +1917,8 @@
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
 
-    // Copy 32-bytes chunks
-    copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
+    // Copy in multi-bytes chunks
+    copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
     __ jmp(L_copy_4_bytes);
 
     return start;
@@ -1893,7 +1946,7 @@
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
 
-    Label L_copy_32_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit;
+    Label L_copy_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit;
     const Register from        = rdi;  // source array address
     const Register to          = rsi;  // destination array address
     const Register count       = rdx;  // elements count
@@ -1927,10 +1980,10 @@
 
     // Check for and copy trailing dword
     __ testl(dword_count, 1);
-    __ jcc(Assembler::zero, L_copy_32_bytes);
+    __ jcc(Assembler::zero, L_copy_bytes);
     __ movl(rax, Address(from, dword_count, Address::times_4, -4));
     __ movl(Address(to, dword_count, Address::times_4, -4), rax);
-    __ jmp(L_copy_32_bytes);
+    __ jmp(L_copy_bytes);
 
     // Copy trailing qwords
   __ BIND(L_copy_8_bytes);
@@ -1948,8 +2001,8 @@
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
 
-    // Copy in 32-bytes chunks
-    copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
+    // Copy in multi-bytes chunks
+    copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
 
    __ bind(L_exit);
      if (is_oop) {
@@ -1987,7 +2040,7 @@
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
 
-    Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
+    Label L_copy_bytes, L_copy_8_bytes, L_exit;
     const Register from        = rdi;  // source array address
     const Register to          = rsi;  // destination array address
     const Register qword_count = rdx;  // elements count
@@ -2019,7 +2072,7 @@
     __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
     __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
     __ negptr(qword_count);
-    __ jmp(L_copy_32_bytes);
+    __ jmp(L_copy_bytes);
 
     // Copy trailing qwords
   __ BIND(L_copy_8_bytes);
@@ -2038,8 +2091,8 @@
       __ ret(0);
     }
 
-    // Copy 64-byte chunks
-    copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
+    // Copy in multi-bytes chunks
+    copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
 
     if (is_oop) {
     __ BIND(L_exit);
@@ -2076,7 +2129,7 @@
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
 
-    Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
+    Label L_copy_bytes, L_copy_8_bytes, L_exit;
     const Register from        = rdi;  // source array address
     const Register to          = rsi;  // destination array address
     const Register qword_count = rdx;  // elements count
@@ -2102,7 +2155,7 @@
       gen_write_ref_array_pre_barrier(to, saved_count, dest_uninitialized);
     }
 
-    __ jmp(L_copy_32_bytes);
+    __ jmp(L_copy_bytes);
 
     // Copy trailing qwords
   __ BIND(L_copy_8_bytes);
@@ -2121,8 +2174,8 @@
       __ ret(0);
     }
 
-    // Copy in 32-bytes chunks
-    copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
+    // Copy in multi-bytes chunks
+    copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
 
     if (is_oop) {
     __ BIND(L_exit);
@@ -2941,6 +2994,596 @@
     }
   }
 
+  // AES intrinsic stubs
+  enum {AESBlockSize = 16};
+
+  address generate_key_shuffle_mask() {
+    __ align(16);
+    StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask");
+    address start = __ pc();
+    __ emit_data64( 0x0405060700010203, relocInfo::none );
+    __ emit_data64( 0x0c0d0e0f08090a0b, relocInfo::none );
+    return start;
+  }
+
+  // Utility routine for loading a 128-bit key word in little endian format
+  // can optionally specify that the shuffle mask is already in an xmmregister
+  void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
+    __ movdqu(xmmdst, Address(key, offset));
+    if (xmm_shuf_mask != NULL) {
+      __ pshufb(xmmdst, xmm_shuf_mask);
+    } else {
+      __ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+    }
+  }
+
+  // Arguments:
+  //
+  // Inputs:
+  //   c_rarg0   - source byte array address
+  //   c_rarg1   - destination byte array address
+  //   c_rarg2   - K (key) in little endian int array
+  //
+  address generate_aescrypt_encryptBlock() {
+    assert(UseAES, "need AES instructions and misaligned SSE support");
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
+    Label L_doLast;
+    address start = __ pc();
+
+    const Register from        = c_rarg0;  // source array address
+    const Register to          = c_rarg1;  // destination array address
+    const Register key         = c_rarg2;  // key array address
+    const Register keylen      = rax;
+
+    const XMMRegister xmm_result = xmm0;
+    const XMMRegister xmm_key_shuf_mask = xmm1;
+    // On win64 xmm6-xmm15 must be preserved so don't use them.
+    const XMMRegister xmm_temp1  = xmm2;
+    const XMMRegister xmm_temp2  = xmm3;
+    const XMMRegister xmm_temp3  = xmm4;
+    const XMMRegister xmm_temp4  = xmm5;
+
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+    // keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
+    __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+
+    __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+    __ movdqu(xmm_result, Address(from, 0));  // get 16 bytes of input
+
+    // For encryption, the java expanded key ordering is just what we need
+    // we don't know if the key is aligned, hence not using load-execute form
+
+    load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask);
+    __ pxor(xmm_result, xmm_temp1);
+
+    load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
+    load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
+    load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
+
+    __ aesenc(xmm_result, xmm_temp1);
+    __ aesenc(xmm_result, xmm_temp2);
+    __ aesenc(xmm_result, xmm_temp3);
+    __ aesenc(xmm_result, xmm_temp4);
+
+    load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
+    load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
+    load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
+
+    __ aesenc(xmm_result, xmm_temp1);
+    __ aesenc(xmm_result, xmm_temp2);
+    __ aesenc(xmm_result, xmm_temp3);
+    __ aesenc(xmm_result, xmm_temp4);
+
+    load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
+
+    __ cmpl(keylen, 44);
+    __ jccb(Assembler::equal, L_doLast);
+
+    __ aesenc(xmm_result, xmm_temp1);
+    __ aesenc(xmm_result, xmm_temp2);
+
+    load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
+
+    __ cmpl(keylen, 52);
+    __ jccb(Assembler::equal, L_doLast);
+
+    __ aesenc(xmm_result, xmm_temp1);
+    __ aesenc(xmm_result, xmm_temp2);
+
+    load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
+
+    __ BIND(L_doLast);
+    __ aesenc(xmm_result, xmm_temp1);
+    __ aesenclast(xmm_result, xmm_temp2);
+    __ movdqu(Address(to, 0), xmm_result);        // store the result
+    __ xorptr(rax, rax); // return 0
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+
+    return start;
+  }
+
+
+  // Arguments:
+  //
+  // Inputs:
+  //   c_rarg0   - source byte array address
+  //   c_rarg1   - destination byte array address
+  //   c_rarg2   - K (key) in little endian int array
+  //
+  address generate_aescrypt_decryptBlock() {
+    assert(UseAES, "need AES instructions and misaligned SSE support");
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
+    Label L_doLast;
+    address start = __ pc();
+
+    const Register from        = c_rarg0;  // source array address
+    const Register to          = c_rarg1;  // destination array address
+    const Register key         = c_rarg2;  // key array address
+    const Register keylen      = rax;
+
+    const XMMRegister xmm_result = xmm0;
+    const XMMRegister xmm_key_shuf_mask = xmm1;
+    // On win64 xmm6-xmm15 must be preserved so don't use them.
+    const XMMRegister xmm_temp1  = xmm2;
+    const XMMRegister xmm_temp2  = xmm3;
+    const XMMRegister xmm_temp3  = xmm4;
+    const XMMRegister xmm_temp4  = xmm5;
+
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+    // keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
+    __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+
+    __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+    __ movdqu(xmm_result, Address(from, 0));
+
+    // for decryption java expanded key ordering is rotated one position from what we want
+    // so we start from 0x10 here and hit 0x00 last
+    // we don't know if the key is aligned, hence not using load-execute form
+    load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
+    load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
+    load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
+
+    __ pxor  (xmm_result, xmm_temp1);
+    __ aesdec(xmm_result, xmm_temp2);
+    __ aesdec(xmm_result, xmm_temp3);
+    __ aesdec(xmm_result, xmm_temp4);
+
+    load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
+    load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
+    load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
+
+    __ aesdec(xmm_result, xmm_temp1);
+    __ aesdec(xmm_result, xmm_temp2);
+    __ aesdec(xmm_result, xmm_temp3);
+    __ aesdec(xmm_result, xmm_temp4);
+
+    load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
+    load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask);
+
+    __ cmpl(keylen, 44);
+    __ jccb(Assembler::equal, L_doLast);
+
+    __ aesdec(xmm_result, xmm_temp1);
+    __ aesdec(xmm_result, xmm_temp2);
+
+    load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
+
+    __ cmpl(keylen, 52);
+    __ jccb(Assembler::equal, L_doLast);
+
+    __ aesdec(xmm_result, xmm_temp1);
+    __ aesdec(xmm_result, xmm_temp2);
+
+    load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
+
+    __ BIND(L_doLast);
+    __ aesdec(xmm_result, xmm_temp1);
+    __ aesdec(xmm_result, xmm_temp2);
+
+    // for decryption the aesdeclast operation is always on key+0x00
+    __ aesdeclast(xmm_result, xmm_temp3);
+    __ movdqu(Address(to, 0), xmm_result);  // store the result
+    __ xorptr(rax, rax); // return 0
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+
+    return start;
+  }
+
+
+  // Arguments:
+  //
+  // Inputs:
+  //   c_rarg0   - source byte array address
+  //   c_rarg1   - destination byte array address
+  //   c_rarg2   - K (key) in little endian int array
+  //   c_rarg3   - r vector byte array address
+  //   c_rarg4   - input length
+  //
+  address generate_cipherBlockChaining_encryptAESCrypt() {
+    assert(UseAES, "need AES instructions and misaligned SSE support");
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
+    address start = __ pc();
+
+    Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256;
+    const Register from        = c_rarg0;  // source array address
+    const Register to          = c_rarg1;  // destination array address
+    const Register key         = c_rarg2;  // key array address
+    const Register rvec        = c_rarg3;  // r byte array initialized from initvector array address
+                                           // and left with the results of the last encryption block
+#ifndef _WIN64
+    const Register len_reg     = c_rarg4;  // src len (must be multiple of blocksize 16)
+#else
+    const Address  len_mem(rsp, 6 * wordSize);  // length is on stack on Win64
+    const Register len_reg     = r10;      // pick the first volatile windows register
+#endif
+    const Register pos         = rax;
+
+    // xmm register assignments for the loops below
+    const XMMRegister xmm_result = xmm0;
+    const XMMRegister xmm_temp   = xmm1;
+    // keys 0-10 preloaded into xmm2-xmm12
+    const int XMM_REG_NUM_KEY_FIRST = 2;
+    const int XMM_REG_NUM_KEY_LAST  = 15;
+    const XMMRegister xmm_key0   = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
+    const XMMRegister xmm_key10  = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+10);
+    const XMMRegister xmm_key11  = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+11);
+    const XMMRegister xmm_key12  = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+12);
+    const XMMRegister xmm_key13  = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+13);
+
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+#ifdef _WIN64
+    // on win64, fill len_reg from stack position
+    __ movl(len_reg, len_mem);
+    // save the xmm registers which must be preserved 6-15
+    __ subptr(rsp, -rsp_after_call_off * wordSize);
+    for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
+      __ movdqu(xmm_save(i), as_XMMRegister(i));
+    }
+#endif
+
+    const XMMRegister xmm_key_shuf_mask = xmm_temp;  // used temporarily to swap key bytes up front
+    __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+    // load up xmm regs xmm2 thru xmm12 with key 0x00 - 0xa0
+    for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_FIRST+10; rnum++) {
+      load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
+      offset += 0x10;
+    }
+    __ movdqu(xmm_result, Address(rvec, 0x00));   // initialize xmm_result with r vec
+
+    // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
+    __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+    __ cmpl(rax, 44);
+    __ jcc(Assembler::notEqual, L_key_192_256);
+
+    // 128 bit code follows here
+    __ movptr(pos, 0);
+    __ align(OptoLoopAlignment);
+
+    __ BIND(L_loopTop_128);
+    __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
+    __ pxor  (xmm_result, xmm_temp);               // xor with the current r vector
+    __ pxor  (xmm_result, xmm_key0);               // do the aes rounds
+    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_FIRST + 9; rnum++) {
+      __ aesenc(xmm_result, as_XMMRegister(rnum));
+    }
+    __ aesenclast(xmm_result, xmm_key10);
+    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
+    // no need to store r to memory until we exit
+    __ addptr(pos, AESBlockSize);
+    __ subptr(len_reg, AESBlockSize);
+    __ jcc(Assembler::notEqual, L_loopTop_128);
+
+    __ BIND(L_exit);
+    __ movdqu(Address(rvec, 0), xmm_result);     // final value of r stored in rvec of CipherBlockChaining object
+
+#ifdef _WIN64
+    // restore xmm regs belonging to calling function
+    for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
+      __ movdqu(as_XMMRegister(i), xmm_save(i));
+    }
+#endif
+    __ movl(rax, 0); // return 0 (why?)
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+
+    __ BIND(L_key_192_256);
+    // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
+    load_key(xmm_key11, key, 0xb0, xmm_key_shuf_mask);
+    load_key(xmm_key12, key, 0xc0, xmm_key_shuf_mask);
+    __ cmpl(rax, 52);
+    __ jcc(Assembler::notEqual, L_key_256);
+
+    // 192-bit code follows here (could be changed to use more xmm registers)
+    __ movptr(pos, 0);
+    __ align(OptoLoopAlignment);
+
+    __ BIND(L_loopTop_192);
+    __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
+    __ pxor  (xmm_result, xmm_temp);               // xor with the current r vector
+    __ pxor  (xmm_result, xmm_key0);               // do the aes rounds
+    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_FIRST + 11; rnum++) {
+      __ aesenc(xmm_result, as_XMMRegister(rnum));
+    }
+    __ aesenclast(xmm_result, xmm_key12);
+    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
+    // no need to store r to memory until we exit
+    __ addptr(pos, AESBlockSize);
+    __ subptr(len_reg, AESBlockSize);
+    __ jcc(Assembler::notEqual, L_loopTop_192);
+    __ jmp(L_exit);
+
+    __ BIND(L_key_256);
+    // 256-bit code follows here (could be changed to use more xmm registers)
+    load_key(xmm_key13, key, 0xd0, xmm_key_shuf_mask);
+    __ movptr(pos, 0);
+    __ align(OptoLoopAlignment);
+
+    __ BIND(L_loopTop_256);
+    __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
+    __ pxor  (xmm_result, xmm_temp);               // xor with the current r vector
+    __ pxor  (xmm_result, xmm_key0);               // do the aes rounds
+    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_FIRST + 13; rnum++) {
+      __ aesenc(xmm_result, as_XMMRegister(rnum));
+    }
+    load_key(xmm_temp, key, 0xe0);
+    __ aesenclast(xmm_result, xmm_temp);
+    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
+    // no need to store r to memory until we exit
+    __ addptr(pos, AESBlockSize);
+    __ subptr(len_reg, AESBlockSize);
+    __ jcc(Assembler::notEqual, L_loopTop_256);
+    __ jmp(L_exit);
+
+    return start;
+  }
+
+
+
+  // This is a version of CBC/AES Decrypt which does 4 blocks in a loop at a time
+  // to hide instruction latency
+  //
+  // Arguments:
+  //
+  // Inputs:
+  //   c_rarg0   - source byte array address
+  //   c_rarg1   - destination byte array address
+  //   c_rarg2   - K (key) in little endian int array
+  //   c_rarg3   - r vector byte array address
+  //   c_rarg4   - input length
+  //
+
+  address generate_cipherBlockChaining_decryptAESCrypt_Parallel() {
+    assert(UseAES, "need AES instructions and misaligned SSE support");
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
+    address start = __ pc();
+
+    Label L_exit, L_key_192_256, L_key_256;
+    Label L_singleBlock_loopTop_128, L_multiBlock_loopTop_128;
+    Label L_singleBlock_loopTop_192, L_singleBlock_loopTop_256;
+    const Register from        = c_rarg0;  // source array address
+    const Register to          = c_rarg1;  // destination array address
+    const Register key         = c_rarg2;  // key array address
+    const Register rvec        = c_rarg3;  // r byte array initialized from initvector array address
+                                           // and left with the results of the last encryption block
+#ifndef _WIN64
+    const Register len_reg     = c_rarg4;  // src len (must be multiple of blocksize 16)
+#else
+    const Address  len_mem(rsp, 6 * wordSize);  // length is on stack on Win64
+    const Register len_reg     = r10;      // pick the first volatile windows register
+#endif
+    const Register pos         = rax;
+
+    // keys 0-10 preloaded into xmm2-xmm12
+    const int XMM_REG_NUM_KEY_FIRST = 5;
+    const int XMM_REG_NUM_KEY_LAST  = 15;
+    const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
+    const XMMRegister xmm_key_last  = as_XMMRegister(XMM_REG_NUM_KEY_LAST);
+
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+#ifdef _WIN64
+    // on win64, fill len_reg from stack position
+    __ movl(len_reg, len_mem);
+    // save the xmm registers which must be preserved 6-15
+    __ subptr(rsp, -rsp_after_call_off * wordSize);
+    for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
+      __ movdqu(xmm_save(i), as_XMMRegister(i));
+    }
+#endif
+    // the java expanded key ordering is rotated one position from what we want
+    // so we start from 0x10 here and hit 0x00 last
+    const XMMRegister xmm_key_shuf_mask = xmm1;  // used temporarily to swap key bytes up front
+    __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+    // load up xmm regs 5 thru 15 with key 0x10 - 0xa0 - 0x00
+    for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum < XMM_REG_NUM_KEY_LAST; rnum++) {
+      load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
+      offset += 0x10;
+    }
+    load_key(xmm_key_last, key, 0x00, xmm_key_shuf_mask);
+
+    const XMMRegister xmm_prev_block_cipher = xmm1;  // holds cipher of previous block
+
+    // registers holding the four results in the parallelized loop
+    const XMMRegister xmm_result0 = xmm0;
+    const XMMRegister xmm_result1 = xmm2;
+    const XMMRegister xmm_result2 = xmm3;
+    const XMMRegister xmm_result3 = xmm4;
+
+    __ movdqu(xmm_prev_block_cipher, Address(rvec, 0x00));   // initialize with initial rvec
+
+    // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
+    __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+    __ cmpl(rax, 44);
+    __ jcc(Assembler::notEqual, L_key_192_256);
+
+
+    // 128-bit code follows here, parallelized
+    __ movptr(pos, 0);
+    __ align(OptoLoopAlignment);
+    __ BIND(L_multiBlock_loopTop_128);
+    __ cmpptr(len_reg, 4*AESBlockSize);           // see if at least 4 blocks left
+    __ jcc(Assembler::less, L_singleBlock_loopTop_128);
+
+    __ movdqu(xmm_result0, Address(from, pos, Address::times_1, 0*AESBlockSize));   // get next 4 blocks into xmmresult registers
+    __ movdqu(xmm_result1, Address(from, pos, Address::times_1, 1*AESBlockSize));
+    __ movdqu(xmm_result2, Address(from, pos, Address::times_1, 2*AESBlockSize));
+    __ movdqu(xmm_result3, Address(from, pos, Address::times_1, 3*AESBlockSize));
+
+#define DoFour(opc, src_reg)                    \
+    __ opc(xmm_result0, src_reg);               \
+    __ opc(xmm_result1, src_reg);               \
+    __ opc(xmm_result2, src_reg);               \
+    __ opc(xmm_result3, src_reg);
+
+    DoFour(pxor, xmm_key_first);
+    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST - 1; rnum++) {
+      DoFour(aesdec, as_XMMRegister(rnum));
+    }
+    DoFour(aesdeclast, xmm_key_last);
+    // for each result, xor with the r vector of previous cipher block
+    __ pxor(xmm_result0, xmm_prev_block_cipher);
+    __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 0*AESBlockSize));
+    __ pxor(xmm_result1, xmm_prev_block_cipher);
+    __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 1*AESBlockSize));
+    __ pxor(xmm_result2, xmm_prev_block_cipher);
+    __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 2*AESBlockSize));
+    __ pxor(xmm_result3, xmm_prev_block_cipher);
+    __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 3*AESBlockSize));   // this will carry over to next set of blocks
+
+    __ movdqu(Address(to, pos, Address::times_1, 0*AESBlockSize), xmm_result0);     // store 4 results into the next 64 bytes of output
+    __ movdqu(Address(to, pos, Address::times_1, 1*AESBlockSize), xmm_result1);
+    __ movdqu(Address(to, pos, Address::times_1, 2*AESBlockSize), xmm_result2);
+    __ movdqu(Address(to, pos, Address::times_1, 3*AESBlockSize), xmm_result3);
+
+    __ addptr(pos, 4*AESBlockSize);
+    __ subptr(len_reg, 4*AESBlockSize);
+    __ jmp(L_multiBlock_loopTop_128);
+
+    // registers used in the non-parallelized loops
+    // xmm register assignments for the loops below
+    const XMMRegister xmm_result = xmm0;
+    const XMMRegister xmm_prev_block_cipher_save = xmm2;
+    const XMMRegister xmm_key11 = xmm3;
+    const XMMRegister xmm_key12 = xmm4;
+    const XMMRegister xmm_temp  = xmm4;
+
+    __ align(OptoLoopAlignment);
+    __ BIND(L_singleBlock_loopTop_128);
+    __ cmpptr(len_reg, 0);           // any blocks left??
+    __ jcc(Assembler::equal, L_exit);
+    __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of cipher input
+    __ movdqa(xmm_prev_block_cipher_save, xmm_result);              // save for next r vector
+    __ pxor  (xmm_result, xmm_key_first);               // do the aes dec rounds
+    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST - 1; rnum++) {
+      __ aesdec(xmm_result, as_XMMRegister(rnum));
+    }
+    __ aesdeclast(xmm_result, xmm_key_last);
+    __ pxor  (xmm_result, xmm_prev_block_cipher);               // xor with the current r vector
+    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
+    // no need to store r to memory until we exit
+    __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save);              // set up next r vector with cipher input from this block
+
+    __ addptr(pos, AESBlockSize);
+    __ subptr(len_reg, AESBlockSize);
+    __ jmp(L_singleBlock_loopTop_128);
+
+
+    __ BIND(L_exit);
+    __ movdqu(Address(rvec, 0), xmm_prev_block_cipher);     // final value of r stored in rvec of CipherBlockChaining object
+#ifdef _WIN64
+    // restore regs belonging to calling function
+    for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
+      __ movdqu(as_XMMRegister(i), xmm_save(i));
+    }
+#endif
+    __ movl(rax, 0); // return 0 (why?)
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+
+
+    __ BIND(L_key_192_256);
+    // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
+    load_key(xmm_key11, key, 0xb0);
+    __ cmpl(rax, 52);
+    __ jcc(Assembler::notEqual, L_key_256);
+
+    // 192-bit code follows here (could be optimized to use parallelism)
+    load_key(xmm_key12, key, 0xc0);     // 192-bit key goes up to c0
+    __ movptr(pos, 0);
+    __ align(OptoLoopAlignment);
+
+    __ BIND(L_singleBlock_loopTop_192);
+    __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of cipher input
+    __ movdqa(xmm_prev_block_cipher_save, xmm_result);              // save for next r vector
+    __ pxor  (xmm_result, xmm_key_first);               // do the aes dec rounds
+    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) {
+      __ aesdec(xmm_result, as_XMMRegister(rnum));
+    }
+    __ aesdec(xmm_result, xmm_key11);
+    __ aesdec(xmm_result, xmm_key12);
+    __ aesdeclast(xmm_result, xmm_key_last);                    // xmm15 always came from key+0
+    __ pxor  (xmm_result, xmm_prev_block_cipher);               // xor with the current r vector
+    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);  // store into the next 16 bytes of output
+    // no need to store r to memory until we exit
+    __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save);  // set up next r vector with cipher input from this block
+    __ addptr(pos, AESBlockSize);
+    __ subptr(len_reg, AESBlockSize);
+    __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192);
+    __ jmp(L_exit);
+
+    __ BIND(L_key_256);
+    // 256-bit code follows here (could be optimized to use parallelism)
+    __ movptr(pos, 0);
+    __ align(OptoLoopAlignment);
+
+    __ BIND(L_singleBlock_loopTop_256);
+    __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
+    __ movdqa(xmm_prev_block_cipher_save, xmm_result);              // save for next r vector
+    __ pxor  (xmm_result, xmm_key_first);               // do the aes dec rounds
+    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) {
+      __ aesdec(xmm_result, as_XMMRegister(rnum));
+    }
+    __ aesdec(xmm_result, xmm_key11);
+    load_key(xmm_temp, key, 0xc0);
+    __ aesdec(xmm_result, xmm_temp);
+    load_key(xmm_temp, key, 0xd0);
+    __ aesdec(xmm_result, xmm_temp);
+    load_key(xmm_temp, key, 0xe0);     // 256-bit key goes up to e0
+    __ aesdec(xmm_result, xmm_temp);
+    __ aesdeclast(xmm_result, xmm_key_last);          // xmm15 came from key+0
+    __ pxor  (xmm_result, xmm_prev_block_cipher);               // xor with the current r vector
+    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);  // store into the next 16 bytes of output
+    // no need to store r to memory until we exit
+    __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save);  // set up next r vector with cipher input from this block
+    __ addptr(pos, AESBlockSize);
+    __ subptr(len_reg, AESBlockSize);
+    __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
+    __ jmp(L_exit);
+
+    return start;
+  }
+
+
+
 #undef __
 #define __ masm->
 
@@ -3135,6 +3778,16 @@
     generate_arraycopy_stubs();
 
     generate_math_stubs();
+
+    // don't bother generating these AES intrinsic stubs unless global flag is set
+    if (UseAESIntrinsics) {
+      StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask();  // needed by the others
+
+      StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
+      StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
+      StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
+      StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
+    }
   }
 
  public:
--- a/src/cpu/x86/vm/stubRoutines_x86_32.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/stubRoutines_x86_32.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -26,21 +26,11 @@
 #include "runtime/deoptimization.hpp"
 #include "runtime/frame.inline.hpp"
 #include "runtime/stubRoutines.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
+#include "runtime/thread.inline.hpp"
 
 // Implementation of the platform-specific part of StubRoutines - for
 // a description of how to extend it, see the stubRoutines.hpp file.
 
 address StubRoutines::x86::_verify_mxcsr_entry         = NULL;
 address StubRoutines::x86::_verify_fpu_cntrl_wrd_entry = NULL;
+address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
--- a/src/cpu/x86/vm/stubRoutines_x86_32.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/stubRoutines_x86_32.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -41,10 +41,14 @@
  private:
   static address _verify_mxcsr_entry;
   static address _verify_fpu_cntrl_wrd_entry;
+  // shuffle mask for fixing up 128-bit words consisting of big-endian 32-bit integers
+  static address _key_shuffle_mask_addr;
 
  public:
   static address verify_mxcsr_entry()                        { return _verify_mxcsr_entry; }
   static address verify_fpu_cntrl_wrd_entry()                { return _verify_fpu_cntrl_wrd_entry; }
+  static address key_shuffle_mask_addr()                     { return _key_shuffle_mask_addr; }
+
 };
 
   static bool    returns_to_call_stub(address return_pc)     { return return_pc == _call_stub_return_address; }
--- a/src/cpu/x86/vm/stubRoutines_x86_64.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/stubRoutines_x86_64.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -26,18 +26,7 @@
 #include "runtime/deoptimization.hpp"
 #include "runtime/frame.inline.hpp"
 #include "runtime/stubRoutines.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
+#include "runtime/thread.inline.hpp"
 
 // Implementation of the platform-specific part of StubRoutines - for
 // a description of how to extend it, see the stubRoutines.hpp file.
@@ -56,3 +45,4 @@
 address StubRoutines::x86::_double_sign_mask = NULL;
 address StubRoutines::x86::_double_sign_flip = NULL;
 address StubRoutines::x86::_mxcsr_std = NULL;
+address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
--- a/src/cpu/x86/vm/stubRoutines_x86_64.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/stubRoutines_x86_64.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -54,6 +54,8 @@
   static address _double_sign_mask;
   static address _double_sign_flip;
   static address _mxcsr_std;
+  // shuffle mask for fixing up 128-bit words consisting of big-endian 32-bit integers
+  static address _key_shuffle_mask_addr;
 
  public:
 
@@ -116,6 +118,9 @@
   {
     return _mxcsr_std;
   }
+
+  static address key_shuffle_mask_addr()                     { return _key_shuffle_mask_addr; }
+
 };
 
 #endif // CPU_X86_VM_STUBROUTINES_X86_64_HPP
--- a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,7 +23,7 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
+#include "asm/macroAssembler.hpp"
 #include "interpreter/bytecodeHistogram.hpp"
 #include "interpreter/interpreter.hpp"
 #include "interpreter/interpreterGenerator.hpp"
@@ -424,8 +424,6 @@
   // C++ interpreter only
   // rsi - previous interpreter state pointer
 
-  const Address size_of_parameters(rbx, Method::size_of_parameters_offset());
-
   // InterpreterRuntime::frequency_counter_overflow takes one argument
   // indicating if the counter overflow occurs at a backwards branch (non-NULL bcp).
   // The call returns the address of the verified entry point for the method or NULL
@@ -868,12 +866,13 @@
   // rsi: previous interpreter state (C++ interpreter) must preserve
   address entry_point = __ pc();
 
-
-  const Address size_of_parameters(rbx, Method::size_of_parameters_offset());
+  const Address constMethod       (rbx, Method::const_offset());
   const Address invocation_counter(rbx, Method::invocation_counter_offset() + InvocationCounter::counter_offset());
   const Address access_flags      (rbx, Method::access_flags_offset());
+  const Address size_of_parameters(rcx, ConstMethod::size_of_parameters_offset());
 
   // get parameter size (always needed)
+  __ movptr(rcx, constMethod);
   __ load_unsigned_short(rcx, size_of_parameters);
 
   // native calls don't need the stack size check since they have no expression stack
@@ -988,7 +987,9 @@
 
   // allocate space for parameters
   __ get_method(method);
-  __ load_unsigned_short(t, Address(method, Method::size_of_parameters_offset()));
+  __ movptr(t, Address(method, Method::const_offset()));
+  __ load_unsigned_short(t, Address(t, ConstMethod::size_of_parameters_offset()));
+
   __ shlptr(t, Interpreter::logStackElementSize);
   __ addptr(t, 2*wordSize);     // allocate two more slots for JNIEnv and possible mirror
   __ subptr(rsp, t);
@@ -1297,13 +1298,14 @@
   // rsi: sender sp
   address entry_point = __ pc();
 
-
-  const Address size_of_parameters(rbx, Method::size_of_parameters_offset());
-  const Address size_of_locals    (rbx, Method::size_of_locals_offset());
+  const Address constMethod       (rbx, Method::const_offset());
   const Address invocation_counter(rbx, Method::invocation_counter_offset() + InvocationCounter::counter_offset());
   const Address access_flags      (rbx, Method::access_flags_offset());
+  const Address size_of_parameters(rdx, ConstMethod::size_of_parameters_offset());
+  const Address size_of_locals    (rdx, ConstMethod::size_of_locals_offset());
 
   // get parameter size (always needed)
+  __ movptr(rdx, constMethod);
   __ load_unsigned_short(rcx, size_of_parameters);
 
   // rbx,: Method*
@@ -1734,7 +1736,8 @@
 
     // Compute size of arguments for saving when returning to deoptimized caller
     __ get_method(rax);
-    __ load_unsigned_short(rax, Address(rax, in_bytes(Method::size_of_parameters_offset())));
+    __ movptr(rax, Address(rax, Method::const_offset()));
+    __ load_unsigned_short(rax, Address(rax, ConstMethod::size_of_parameters_offset()));
     __ shlptr(rax, Interpreter::logStackElementSize);
     __ restore_locals();
     __ subptr(rdi, rax);
--- a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,7 +23,7 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
+#include "asm/macroAssembler.hpp"
 #include "interpreter/bytecodeHistogram.hpp"
 #include "interpreter/interpreter.hpp"
 #include "interpreter/interpreterGenerator.hpp"
@@ -369,9 +369,6 @@
   // Everything as it was on entry
   // rdx is not restored. Doesn't appear to really be set.
 
-  const Address size_of_parameters(rbx,
-                                   Method::size_of_parameters_offset());
-
   // InterpreterRuntime::frequency_counter_overflow takes two
   // arguments, the first (thread) is passed by call_VM, the second
   // indicates if the counter overflow occurs at a backwards branch
@@ -844,14 +841,17 @@
 
   address entry_point = __ pc();
 
-  const Address size_of_parameters(rbx, Method::
-                                        size_of_parameters_offset());
+  const Address constMethod       (rbx, Method::const_offset());
   const Address invocation_counter(rbx, Method::
                                         invocation_counter_offset() +
                                         InvocationCounter::counter_offset());
   const Address access_flags      (rbx, Method::access_flags_offset());
+  const Address size_of_parameters(rcx, ConstMethod::
+                                        size_of_parameters_offset());
+
 
   // get parameter size (always needed)
+  __ movptr(rcx, constMethod);
   __ load_unsigned_short(rcx, size_of_parameters);
 
   // native calls don't need the stack size check since they have no
@@ -967,9 +967,8 @@
 
   // allocate space for parameters
   __ get_method(method);
-  __ load_unsigned_short(t,
-                         Address(method,
-                                 Method::size_of_parameters_offset()));
+  __ movptr(t, Address(method, Method::const_offset()));
+  __ load_unsigned_short(t, Address(t, ConstMethod::size_of_parameters_offset()));
   __ shll(t, Interpreter::logStackElementSize);
 
   __ subptr(rsp, t);
@@ -1302,15 +1301,18 @@
   // r13: sender sp
   address entry_point = __ pc();
 
-  const Address size_of_parameters(rbx,
-                                   Method::size_of_parameters_offset());
-  const Address size_of_locals(rbx, Method::size_of_locals_offset());
+  const Address constMethod(rbx, Method::const_offset());
   const Address invocation_counter(rbx,
                                    Method::invocation_counter_offset() +
                                    InvocationCounter::counter_offset());
   const Address access_flags(rbx, Method::access_flags_offset());
+  const Address size_of_parameters(rdx,
+                                   ConstMethod::size_of_parameters_offset());
+  const Address size_of_locals(rdx, ConstMethod::size_of_locals_offset());
+
 
   // get parameter size (always needed)
+  __ movptr(rdx, constMethod);
   __ load_unsigned_short(rcx, size_of_parameters);
 
   // rbx: Method*
@@ -1752,7 +1754,8 @@
     // Compute size of arguments for saving when returning to
     // deoptimized caller
     __ get_method(rax);
-    __ load_unsigned_short(rax, Address(rax, in_bytes(Method::
+    __ movptr(rax, Address(rax, Method::const_offset()));
+    __ load_unsigned_short(rax, Address(rax, in_bytes(ConstMethod::
                                                 size_of_parameters_offset())));
     __ shll(rax, Interpreter::logStackElementSize);
     __ restore_locals(); // XXX do we need this?
--- a/src/cpu/x86/vm/templateTable_x86_32.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/templateTable_x86_32.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,7 +23,7 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
+#include "asm/macroAssembler.hpp"
 #include "interpreter/interpreter.hpp"
 #include "interpreter/interpreterRuntime.hpp"
 #include "interpreter/templateTable.hpp"
--- a/src/cpu/x86/vm/templateTable_x86_64.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/templateTable_x86_64.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,6 +23,7 @@
  */
 
 #include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
 #include "interpreter/interpreter.hpp"
 #include "interpreter/interpreterRuntime.hpp"
 #include "interpreter/templateTable.hpp"
--- a/src/cpu/x86/vm/vmStructs_x86.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/vmStructs_x86.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -29,7 +29,7 @@
 // constants required by the Serviceability Agent. This file is
 // referenced by vmStructs.cpp.
 
-#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field, last_entry)            \
+#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)            \
                                                                                                                                      \
   /******************************/                                                                                                   \
   /* JavaCallWrapper            */                                                                                                   \
@@ -37,31 +37,14 @@
   /******************************/                                                                                                   \
   /* JavaFrameAnchor            */                                                                                                   \
   /******************************/                                                                                                   \
-  volatile_nonstatic_field(JavaFrameAnchor,     _last_Java_fp,                                    intptr_t*)                              \
-                                                                                                                                     \
+  volatile_nonstatic_field(JavaFrameAnchor,     _last_Java_fp,                                    intptr_t*)
 
-  /* NOTE that we do not use the last_entry() macro here; it is used  */
-  /* in vmStructs_<os>_<cpu>.hpp's VM_STRUCTS_OS_CPU macro (and must  */
-  /* be present there)                                                */
 
 
-#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type, last_entry)                               \
-
-  /* NOTE that we do not use the last_entry() macro here; it is used  */
-  /* in vmStructs_<os>_<cpu>.hpp's VM_TYPES_OS_CPU macro (and must    */
-  /* be present there)                                                */
-
-
-#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant, last_entry)                                                              \
+#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
 
-  /* NOTE that we do not use the last_entry() macro here; it is used        */
-  /* in vmStructs_<os>_<cpu>.hpp's VM_INT_CONSTANTS_OS_CPU macro (and must  */
-  /* be present there)                                                      */
+#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
 
-#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant, last_entry)                                                              \
-
-  /* NOTE that we do not use the last_entry() macro here; it is used         */
-  /* in vmStructs_<os>_<cpu>.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and must  */
-  /* be present there)                                                       */
+#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
 
 #endif // CPU_X86_VM_VMSTRUCTS_X86_HPP
--- a/src/cpu/x86/vm/vm_version_x86.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/vm_version_x86.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,7 +23,8 @@
  */
 
 #include "precompiled.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
 #include "memory/resourceArea.hpp"
 #include "runtime/java.hpp"
 #include "runtime/stubCodeGenerator.hpp"
@@ -419,13 +420,16 @@
   if (UseAVX < 1)
     _cpuFeatures &= ~CPU_AVX;
 
+  if (!UseAES && !FLAG_IS_DEFAULT(UseAES))
+    _cpuFeatures &= ~CPU_AES;
+
   if (logical_processors_per_package() == 1) {
     // HT processor could be installed on a system which doesn't support HT.
     _cpuFeatures &= ~CPU_HT;
   }
 
   char buf[256];
-  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
                cores_per_cpu(), threads_per_core(),
                cpu_family(), _model, _stepping,
                (supports_cmov() ? ", cmov" : ""),
@@ -441,6 +445,8 @@
                (supports_popcnt() ? ", popcnt" : ""),
                (supports_avx()    ? ", avx" : ""),
                (supports_avx2()   ? ", avx2" : ""),
+               (supports_aes()    ? ", aes" : ""),
+               (supports_erms()   ? ", erms" : ""),
                (supports_mmx_ext() ? ", mmxext" : ""),
                (supports_3dnow_prefetch() ? ", 3dnowpref" : ""),
                (supports_lzcnt()   ? ", lzcnt": ""),
@@ -472,6 +478,29 @@
   if (!supports_avx ()) // Drop to 0 if no AVX  support
     UseAVX = 0;
 
+  // Use AES instructions if available.
+  if (supports_aes()) {
+    if (FLAG_IS_DEFAULT(UseAES)) {
+      UseAES = true;
+    }
+  } else if (UseAES) {
+    if (!FLAG_IS_DEFAULT(UseAES))
+      warning("AES instructions not available on this CPU");
+    FLAG_SET_DEFAULT(UseAES, false);
+  }
+
+  // The AES intrinsic stubs require AES instruction support (of course)
+  // but also require sse3 mode for instructions it use.
+  if (UseAES && (UseSSE > 2)) {
+    if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+      UseAESIntrinsics = true;
+    }
+  } else if (UseAESIntrinsics) {
+    if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
+      warning("AES intrinsics not available on this CPU");
+    FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+  }
+
 #ifdef COMPILER2
   if (UseFPUForSpilling) {
     if (UseSSE < 2) {
@@ -632,6 +661,14 @@
       }
     }
   }
+#if defined(COMPILER2) && defined(_ALLBSD_SOURCE)
+    if (MaxVectorSize > 16) {
+      // Limit vectors size to 16 bytes on BSD until it fixes
+      // restoring upper 128bit of YMM registers on return
+      // from signal handler.
+      FLAG_SET_DEFAULT(MaxVectorSize, 16);
+    }
+#endif // COMPILER2
 
   // Use population count instruction if available.
   if (supports_popcnt()) {
@@ -643,6 +680,16 @@
     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
   }
 
+  // Use fast-string operations if available.
+  if (supports_erms()) {
+    if (FLAG_IS_DEFAULT(UseFastStosb)) {
+      UseFastStosb = true;
+    }
+  } else if (UseFastStosb) {
+    warning("fast-string operations are not available on this CPU");
+    FLAG_SET_DEFAULT(UseFastStosb, false);
+  }
+
 #ifdef COMPILER2
   if (FLAG_IS_DEFAULT(AlignVector)) {
     // Modern processors allow misaligned memory operations for vectors.
@@ -706,6 +753,10 @@
   PrefetchFieldsAhead         = prefetch_fields_ahead();
 #endif
 
+  if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
+     (cache_line_size > ContendedPaddingWidth))
+     ContendedPaddingWidth = cache_line_size;
+
 #ifndef PRODUCT
   if (PrintMiscellaneous && Verbose) {
     tty->print_cr("Logical CPUs per core: %u",
@@ -714,6 +765,9 @@
     if (UseAVX > 0) {
       tty->print("  UseAVX=%d",UseAVX);
     }
+    if (UseAES) {
+      tty->print("  UseAES=1");
+    }
     tty->cr();
     tty->print("Allocation");
     if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) {
@@ -749,6 +803,9 @@
     if (PrefetchFieldsAhead > 0) {
       tty->print_cr("PrefetchFieldsAhead %d", PrefetchFieldsAhead);
     }
+    if (ContendedPaddingWidth > 0) {
+      tty->print_cr("ContendedPaddingWidth %d", ContendedPaddingWidth);
+    }
   }
 #endif // !PRODUCT
 }
--- a/src/cpu/x86/vm/vm_version_x86.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/vm_version_x86.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -78,7 +78,9 @@
                sse4_2   : 1,
                         : 2,
                popcnt   : 1,
-                        : 3,
+                        : 1,
+               aes      : 1,
+                        : 1,
                osxsave  : 1,
                avx      : 1,
                         : 3;
@@ -202,7 +204,8 @@
                    avx2 : 1,
                         : 2,
                    bmi2 : 1,
-                        : 23;
+                   erms : 1,
+                        : 22;
     } bits;
   };
 
@@ -244,7 +247,9 @@
     CPU_TSC    = (1 << 15),
     CPU_TSCINV = (1 << 16),
     CPU_AVX    = (1 << 17),
-    CPU_AVX2   = (1 << 18)
+    CPU_AVX2   = (1 << 18),
+    CPU_AES    = (1 << 19),
+    CPU_ERMS   = (1 << 20) // enhanced 'rep movsb/stosb' instructions
   } cpuFeatureFlags;
 
   enum {
@@ -420,6 +425,10 @@
       result |= CPU_TSC;
     if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
       result |= CPU_TSCINV;
+    if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
+      result |= CPU_AES;
+    if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0)
+      result |= CPU_ERMS;
 
     // AMD features.
     if (is_amd()) {
@@ -484,7 +493,7 @@
     return (_cpuid_info.std_max_function >= 0xB) &&
            // eax[4:0] | ebx[0:15] == 0 indicates invalid topology level.
            // Some cpus have max cpuid >= 0xB but do not support processor topology.
-           ((_cpuid_info.tpl_cpuidB0_eax & 0x1f | _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus) != 0);
+           (((_cpuid_info.tpl_cpuidB0_eax & 0x1f) | _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus) != 0);
   }
 
   static uint cores_per_cpu()  {
@@ -544,6 +553,8 @@
   static bool supports_avx()      { return (_cpuFeatures & CPU_AVX) != 0; }
   static bool supports_avx2()     { return (_cpuFeatures & CPU_AVX2) != 0; }
   static bool supports_tsc()      { return (_cpuFeatures & CPU_TSC)    != 0; }
+  static bool supports_aes()      { return (_cpuFeatures & CPU_AES) != 0; }
+  static bool supports_erms()     { return (_cpuFeatures & CPU_ERMS) != 0; }
 
   // Intel features
   static bool is_intel_family_core() { return is_intel() &&
--- a/src/cpu/x86/vm/vtableStubs_x86_32.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/vtableStubs_x86_32.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,8 +23,7 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
 #include "code/vtableStubs.hpp"
 #include "interp_masm_x86_32.hpp"
 #include "memory/resourceArea.hpp"
--- a/src/cpu/x86/vm/vtableStubs_x86_64.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/vtableStubs_x86_64.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,8 +23,7 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
 #include "code/vtableStubs.hpp"
 #include "interp_masm_x86_64.hpp"
 #include "memory/resourceArea.hpp"
--- a/src/cpu/x86/vm/x86.ad	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/x86.ad	Sun Feb 03 22:43:57 2013 +0100
@@ -4102,9 +4102,158 @@
 
 // ----------------------- LogicalRightShift -----------------------------------
 
-// Shorts/Chars vector logical right shift produces incorrect Java result
+// Shorts vector logical right shift produces incorrect Java result
 // for negative data because java code convert short value into int with
-// sign extension before a shift.
+// sign extension before a shift. But char vectors are fine since chars are
+// unsigned values.
+
+instruct vsrl2S(vecS dst, vecS shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVS dst shift));
+  format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
+  ins_encode %{
+    __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2S_imm(vecS dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVS dst shift));
+  format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
+  ins_encode %{
+    __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4S(vecD dst, vecS shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVS dst shift));
+  format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
+  ins_encode %{
+    __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4S_imm(vecD dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVS dst shift));
+  format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
+  ins_encode %{
+    __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl8S(vecX dst, vecS shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (URShiftVS dst shift));
+  format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
+  ins_encode %{
+    __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl8S_imm(vecX dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (URShiftVS dst shift));
+  format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
+  ins_encode %{
+    __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
 
 // Integers vector logical right shift
 instruct vsrl2I(vecD dst, vecS shift) %{
--- a/src/cpu/x86/vm/x86_32.ad	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/x86_32.ad	Sun Feb 03 22:43:57 2013 +0100
@@ -11572,15 +11572,28 @@
 // =======================================================================
 // fast clearing of an array
 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
+  predicate(!UseFastStosb);
   match(Set dummy (ClearArray cnt base));
   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
-  format %{ "SHL    ECX,1\t# Convert doublewords to words\n\t"
-            "XOR    EAX,EAX\n\t"
+  format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
+            "SHL    ECX,1\t# Convert doublewords to words\n\t"
             "REP STOS\t# store EAX into [EDI++] while ECX--" %}
-  opcode(0,0x4);
-  ins_encode( Opcode(0xD1), RegOpc(ECX),
-              OpcRegReg(0x33,EAX,EAX),
-              Opcode(0xF3), Opcode(0xAB) );
+  ins_encode %{ 
+    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
+  predicate(UseFastStosb);
+  match(Set dummy (ClearArray cnt base));
+  effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
+  format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
+            "SHL    ECX,3\t# Convert doublewords to bytes\n\t"
+            "REP STOSB\t# store EAX into [EDI++] while ECX--" %}
+  ins_encode %{ 
+    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -11674,6 +11687,23 @@
   ins_pipe( pipe_slow );
 %}
 
+// encode char[] to byte[] in ISO_8859_1
+instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
+                          regD tmp1, regD tmp2, regD tmp3, regD tmp4,
+                          eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
+  match(Set result (EncodeISOArray src (Binary dst len)));
+  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
+
+  format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
+  ins_encode %{
+    __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
+                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
+                        $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+
 //----------Control Flow Instructions------------------------------------------
 // Signed compare Instructions
 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
--- a/src/cpu/x86/vm/x86_64.ad	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/x86/vm/x86_64.ad	Sun Feb 03 22:43:57 2013 +0100
@@ -10374,16 +10374,33 @@
 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
                   rFlagsReg cr)
 %{
+  predicate(!UseFastStosb);
   match(Set dummy (ClearArray cnt base));
   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
 
-  format %{ "xorl    rax, rax\t# ClearArray:\n\t"
-            "rep stosq\t# Store rax to *rdi++ while rcx--" %}
-  ins_encode(opc_reg_reg(0x33, RAX, RAX), // xorl %eax, %eax
-             Opcode(0xF3), Opcode(0x48), Opcode(0xAB)); // rep REX_W stos
+  format %{ "xorq    rax, rax\t# ClearArray:\n\t"
+            "rep     stosq\t# Store rax to *rdi++ while rcx--" %}
+  ins_encode %{ 
+    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
+  %}
   ins_pipe(pipe_slow);
 %}
 
+instruct rep_fast_stosb(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
+                        rFlagsReg cr)
+%{
+  predicate(UseFastStosb);
+  match(Set dummy (ClearArray cnt base));
+  effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
+  format %{ "xorq    rax, rax\t# ClearArray:\n\t"
+            "shlq    rcx,3\t# Convert doublewords to bytes\n\t"
+            "rep     stosb\t# Store rax to *rdi++ while rcx--" %}
+  ins_encode %{ 
+    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct string_compare(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
                         rax_RegI result, regD tmp1, rFlagsReg cr)
 %{
@@ -10478,6 +10495,23 @@
   ins_pipe( pipe_slow );
 %}
 
+// encode char[] to byte[] in ISO_8859_1
+instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
+                          regD tmp1, regD tmp2, regD tmp3, regD tmp4,
+                          rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
+  match(Set result (EncodeISOArray src (Binary dst len)));
+  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
+
+  format %{ "Encode array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
+  ins_encode %{
+    __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
+                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
+                        $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+
 //----------Control Flow Instructions------------------------------------------
 // Signed compare Instructions
 
--- a/src/cpu/zero/vm/assembler_zero.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/zero/vm/assembler_zero.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -46,19 +46,19 @@
   return 0;
 }
 
+#ifdef ASSERT
+bool AbstractAssembler::pd_check_instruction_mark() {
+  ShouldNotCallThis();
+}
+#endif
+
 void Assembler::pd_patch_instruction(address branch, address target) {
   ShouldNotCallThis();
 }
 
-#ifndef PRODUCT
-void Assembler::pd_print_patched_instruction(address branch) {
-  ShouldNotCallThis();
-}
-#endif // PRODUCT
-
 void MacroAssembler::align(int modulus) {
   while (offset() % modulus != 0)
-    emit_byte(AbstractAssembler::code_fill_byte());
+    emit_int8(AbstractAssembler::code_fill_byte());
 }
 
 void MacroAssembler::bang_stack_with_offset(int offset) {
@@ -66,8 +66,7 @@
 }
 
 void MacroAssembler::advance(int bytes) {
-  _code_pos += bytes;
-  sync();
+  code_section()->set_end(code_section()->end() + bytes);
 }
 
 RegisterOrConstant MacroAssembler::delayed_value_impl(
@@ -80,6 +79,11 @@
   emit_address((address) obj);
 }
 
+void MacroAssembler::store_Metadata(Metadata* md) {
+  code_section()->relocate(pc(), metadata_Relocation::spec_for_immediate());
+  emit_address((address) md);
+}
+
 static void should_not_call() {
   report_should_not_call(__FILE__, __LINE__);
 }
--- a/src/cpu/zero/vm/assembler_zero.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/zero/vm/assembler_zero.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -37,9 +37,6 @@
 
  public:
   void pd_patch_instruction(address branch, address target);
-#ifndef PRODUCT
-  static void pd_print_patched_instruction(address branch);
-#endif // PRODUCT
 };
 
 class MacroAssembler : public Assembler {
@@ -55,14 +52,9 @@
  public:
   void advance(int bytes);
   void store_oop(jobject obj);
+  void store_Metadata(Metadata* obj);
 };
 
-#ifdef ASSERT
-inline bool AbstractAssembler::pd_check_instruction_mark() {
-  ShouldNotCallThis();
-}
-#endif
-
 address ShouldNotCallThisStub();
 address ShouldNotCallThisEntry();
 
--- a/src/cpu/zero/vm/cppInterpreterGenerator_zero.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/zero/vm/cppInterpreterGenerator_zero.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -31,12 +31,17 @@
     return _masm;
   }
 
- protected:
-  address generate_entry(address entry_point) {
-    ZeroEntry *entry = (ZeroEntry *) assembler()->pc();
-    assembler()->advance(sizeof(ZeroEntry));
+ public:
+  static address generate_entry_impl(MacroAssembler* masm, address entry_point) {
+    ZeroEntry *entry = (ZeroEntry *) masm->pc();
+    masm->advance(sizeof(ZeroEntry));
     entry->set_entry_point(entry_point);
     return (address) entry;
   }
 
+ protected:
+  address generate_entry(address entry_point) {
+        return generate_entry_impl(assembler(), entry_point);
+  }
+
 #endif // CPU_ZERO_VM_CPPINTERPRETERGENERATOR_ZERO_HPP
--- a/src/cpu/zero/vm/cppInterpreter_zero.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/zero/vm/cppInterpreter_zero.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -180,25 +180,6 @@
         method, istate->osr_entry(), istate->osr_buf(), THREAD);
       return;
     }
-    else if (istate->msg() == BytecodeInterpreter::call_method_handle) {
-      oop method_handle = istate->callee();
-
-      // Trim back the stack to put the parameters at the top
-      stack->set_sp(istate->stack() + 1);
-
-      // Make the call
-      process_method_handle(method_handle, THREAD);
-      fixup_after_potential_safepoint();
-
-      // Convert the result
-      istate->set_stack(stack->sp() - 1);
-
-      // Restore the stack
-      stack->set_sp(istate->stack_limit() + 1);
-
-      // Resume the interpreter
-      istate->set_msg(BytecodeInterpreter::method_resume);
-    }
     else {
       ShouldNotReachHere();
     }
@@ -535,35 +516,35 @@
   if (entry->is_volatile()) {
     switch (entry->flag_state()) {
     case ctos:
-      SET_LOCALS_INT(object->char_field_acquire(entry->f2()), 0);
+      SET_LOCALS_INT(object->char_field_acquire(entry->f2_as_index()), 0);
       break;
 
     case btos:
-      SET_LOCALS_INT(object->byte_field_acquire(entry->f2()), 0);
+      SET_LOCALS_INT(object->byte_field_acquire(entry->f2_as_index()), 0);
       break;
 
     case stos:
-      SET_LOCALS_INT(object->short_field_acquire(entry->f2()), 0);
+      SET_LOCALS_INT(object->short_field_acquire(entry->f2_as_index()), 0);
       break;
 
     case itos:
-      SET_LOCALS_INT(object->int_field_acquire(entry->f2()), 0);
+      SET_LOCALS_INT(object->int_field_acquire(entry->f2_as_index()), 0);
       break;
 
     case ltos:
-      SET_LOCALS_LONG(object->long_field_acquire(entry->f2()), 0);
+      SET_LOCALS_LONG(object->long_field_acquire(entry->f2_as_index()), 0);
       break;
 
     case ftos:
-      SET_LOCALS_FLOAT(object->float_field_acquire(entry->f2()), 0);
+      SET_LOCALS_FLOAT(object->float_field_acquire(entry->f2_as_index()), 0);
       break;
 
     case dtos:
-      SET_LOCALS_DOUBLE(object->double_field_acquire(entry->f2()), 0);
+      SET_LOCALS_DOUBLE(object->double_field_acquire(entry->f2_as_index()), 0);
       break;
 
     case atos:
-      SET_LOCALS_OBJECT(object->obj_field_acquire(entry->f2()), 0);
+      SET_LOCALS_OBJECT(object->obj_field_acquire(entry->f2_as_index()), 0);
       break;
 
     default:
@@ -573,35 +554,35 @@
   else {
     switch (entry->flag_state()) {
     case ctos:
-      SET_LOCALS_INT(object->char_field(entry->f2()), 0);
+      SET_LOCALS_INT(object->char_field(entry->f2_as_index()), 0);
       break;
 
     case btos:
-      SET_LOCALS_INT(object->byte_field(entry->f2()), 0);
+      SET_LOCALS_INT(object->byte_field(entry->f2_as_index()), 0);
       break;
 
     case stos:
-      SET_LOCALS_INT(object->short_field(entry->f2()), 0);
+      SET_LOCALS_INT(object->short_field(entry->f2_as_index()), 0);
       break;
 
     case itos:
-      SET_LOCALS_INT(object->int_field(entry->f2()), 0);
+      SET_LOCALS_INT(object->int_field(entry->f2_as_index()), 0);
       break;
 
     case ltos:
-      SET_LOCALS_LONG(object->long_field(entry->f2()), 0);
+      SET_LOCALS_LONG(object->long_field(entry->f2_as_index()), 0);
       break;
 
     case ftos:
-      SET_LOCALS_FLOAT(object->float_field(entry->f2()), 0);
+      SET_LOCALS_FLOAT(object->float_field(entry->f2_as_index()), 0);
       break;
 
     case dtos:
-      SET_LOCALS_DOUBLE(object->double_field(entry->f2()), 0);
+      SET_LOCALS_DOUBLE(object->double_field(entry->f2_as_index()), 0);
       break;
 
     case atos:
-      SET_LOCALS_OBJECT(object->obj_field(entry->f2()), 0);
+      SET_LOCALS_OBJECT(object->obj_field(entry->f2_as_index()), 0);
       break;
 
     default:
@@ -629,516 +610,6 @@
   return 0;
 }
 
-int CppInterpreter::method_handle_entry(Method* method,
-                                        intptr_t UNUSED, TRAPS) {
-  JavaThread *thread = (JavaThread *) THREAD;
-  ZeroStack *stack = thread->zero_stack();
-  int argument_slots = method->size_of_parameters();
-  int result_slots = type2size[result_type_of(method)];
-  intptr_t *vmslots = stack->sp();
-  intptr_t *unwind_sp = vmslots + argument_slots;
-
-  // Find the MethodType
-  address p = (address) method;
-  for (jint* pc = method->method_type_offsets_chain(); (*pc) != -1; pc++) {
-    p = *(address*)(p + (*pc));
-  }
-  oop method_type = (oop) p;
-
-  // The MethodHandle is in the slot after the arguments
-  int num_vmslots = argument_slots - 1;
-  oop method_handle = VMSLOTS_OBJECT(num_vmslots);
-
-  // InvokeGeneric requires some extra shuffling
-  oop mhtype = java_lang_invoke_MethodHandle::type(method_handle);
-  bool is_exact = mhtype == method_type;
-  if (!is_exact) {
-    if (true || // FIXME
-        method->intrinsic_id() == vmIntrinsics::_invokeExact) {
-      CALL_VM_NOCHECK_NOFIX(
-        SharedRuntime::throw_WrongMethodTypeException(
-          thread, method_type, mhtype));
-      // NB all oops trashed!
-      assert(HAS_PENDING_EXCEPTION, "should do");
-      stack->set_sp(unwind_sp);
-      return 0;
-    }
-    assert(method->intrinsic_id() == vmIntrinsics::_invokeGeneric, "should be");
-
-    // Load up an adapter from the calling type
-    // NB the x86 code for this (in methodHandles_x86.cpp, search for
-    // "genericInvoker") is really really odd.  I'm hoping it's trying
-    // to accomodate odd VM/class library combinations I can ignore.
-    oop adapter = NULL; //FIXME: load the adapter from the CP cache
-    IF (adapter == NULL) {
-      CALL_VM_NOCHECK_NOFIX(
-        SharedRuntime::throw_WrongMethodTypeException(
-          thread, method_type, mhtype));
-      // NB all oops trashed!
-      assert(HAS_PENDING_EXCEPTION, "should do");
-      stack->set_sp(unwind_sp);
-      return 0;
-    }
-
-    // Adapters are shared among form-families of method-type.  The
-    // type being called is passed as a trusted first argument so that
-    // the adapter knows the actual types of its arguments and return
-    // values.
-    insert_vmslots(num_vmslots + 1, 1, THREAD);
-    if (HAS_PENDING_EXCEPTION) {
-      // NB all oops trashed!
-      stack->set_sp(unwind_sp);
-      return 0;
-    }
-
-    vmslots = stack->sp();
-    num_vmslots++;
-    SET_VMSLOTS_OBJECT(method_type, num_vmslots);
-
-    method_handle = adapter;
-  }
-
-  // Start processing
-  process_method_handle(method_handle, THREAD);
-  if (HAS_PENDING_EXCEPTION)
-    result_slots = 0;
-
-  // If this is an invokeExact then the eventual callee will not
-  // have unwound the method handle argument so we have to do it.
-  // If a result is being returned the it will be above the method
-  // handle argument we're unwinding.
-  if (is_exact) {
-    intptr_t result[2];
-    for (int i = 0; i < result_slots; i++)
-      result[i] = stack->pop();
-    stack->pop();
-    for (int i = result_slots - 1; i >= 0; i--)
-      stack->push(result[i]);
-  }
-
-  // Check
-  assert(stack->sp() == unwind_sp - result_slots, "should be");
-
-  // No deoptimized frames on the stack
-  return 0;
-}
-
-void CppInterpreter::process_method_handle(oop method_handle, TRAPS) {
-  JavaThread *thread = (JavaThread *) THREAD;
-  ZeroStack *stack = thread->zero_stack();
-  intptr_t *vmslots = stack->sp();
-
-  bool direct_to_method = false;
-  BasicType src_rtype = T_ILLEGAL;
-  BasicType dst_rtype = T_ILLEGAL;
-
-  MethodHandleEntry *entry =
-    java_lang_invoke_MethodHandle::vmentry(method_handle);
-  MethodHandles::EntryKind entry_kind =
-    (MethodHandles::EntryKind) (((intptr_t) entry) & 0xffffffff);
-
-  Method* method = NULL;
-  switch (entry_kind) {
-  case MethodHandles::_invokestatic_mh:
-    direct_to_method = true;
-    break;
-
-  case MethodHandles::_invokespecial_mh:
-  case MethodHandles::_invokevirtual_mh:
-  case MethodHandles::_invokeinterface_mh:
-    {
-      oop receiver =
-        VMSLOTS_OBJECT(
-          java_lang_invoke_MethodHandle::vmslots(method_handle) - 1);
-      if (receiver == NULL) {
-          stack->set_sp(calculate_unwind_sp(stack, method_handle));
-          CALL_VM_NOCHECK_NOFIX(
-            throw_exception(
-              thread, vmSymbols::java_lang_NullPointerException()));
-          // NB all oops trashed!
-          assert(HAS_PENDING_EXCEPTION, "should do");
-          return;
-      }
-      if (entry_kind != MethodHandles::_invokespecial_mh) {
-        intptr_t index = java_lang_invoke_DirectMethodHandle::vmindex(method_handle);
-        InstanceKlass* rcvrKlass =
-          (InstanceKlass *) receiver->klass();
-        if (entry_kind == MethodHandles::_invokevirtual_mh) {
-          method = (Method*) rcvrKlass->start_of_vtable()[index];
-        }
-        else {
-          oop iclass = java_lang_invoke_MethodHandle::next_target(method_handle);
-          itableOffsetEntry* ki =
-            (itableOffsetEntry *) rcvrKlass->start_of_itable();
-          int i, length = rcvrKlass->itable_length();
-          for (i = 0; i < length; i++, ki++ ) {
-            if (ki->interface_klass() == iclass)
-              break;
-          }
-          if (i == length) {
-            stack->set_sp(calculate_unwind_sp(stack, method_handle));
-            CALL_VM_NOCHECK_NOFIX(
-              throw_exception(
-                thread, vmSymbols::java_lang_IncompatibleClassChangeError()));
-            // NB all oops trashed!
-            assert(HAS_PENDING_EXCEPTION, "should do");
-            return;
-          }
-          itableMethodEntry* im = ki->first_method_entry(receiver->klass());
-          method = im[index].method();
-          if (method == NULL) {
-            stack->set_sp(calculate_unwind_sp(stack, method_handle));
-            CALL_VM_NOCHECK_NOFIX(
-              throw_exception(
-                thread, vmSymbols::java_lang_AbstractMethodError()));
-            // NB all oops trashed!
-            assert(HAS_PENDING_EXCEPTION, "should do");
-            return;
-          }
-        }
-      }
-    }
-    direct_to_method = true;
-    break;
-
-  case MethodHandles::_bound_ref_direct_mh:
-  case MethodHandles::_bound_int_direct_mh:
-  case MethodHandles::_bound_long_direct_mh:
-    direct_to_method = true;
-    // fall through
-  case MethodHandles::_bound_ref_mh:
-  case MethodHandles::_bound_int_mh:
-  case MethodHandles::_bound_long_mh:
-    {
-      BasicType arg_type  = T_ILLEGAL;
-      int       arg_mask  = -1;
-      int       arg_slots = -1;
-      MethodHandles::get_ek_bound_mh_info(
-        entry_kind, arg_type, arg_mask, arg_slots);
-      int arg_slot =
-        java_lang_invoke_BoundMethodHandle::vmargslot(method_handle);
-
-      // Create the new slot(s)
-      intptr_t *unwind_sp = calculate_unwind_sp(stack, method_handle);
-      insert_vmslots(arg_slot, arg_slots, THREAD);
-      if (HAS_PENDING_EXCEPTION) {
-        // all oops trashed
-        stack->set_sp(unwind_sp);
-        return;
-      }
-      vmslots = stack->sp();
-
-      // Store bound argument into new stack slot
-      oop arg = java_lang_invoke_BoundMethodHandle::argument(method_handle);
-      if (arg_type == T_OBJECT) {
-        assert(arg_slots == 1, "should be");
-        SET_VMSLOTS_OBJECT(arg, arg_slot);
-      }
-      else {
-        jvalue arg_value;
-        arg_type = java_lang_boxing_object::get_value(arg, &arg_value);
-        switch (arg_type) {
-        case T_BOOLEAN:
-          SET_VMSLOTS_INT(arg_value.z, arg_slot);
-          break;
-        case T_CHAR:
-          SET_VMSLOTS_INT(arg_value.c, arg_slot);
-          break;
-        case T_BYTE:
-          SET_VMSLOTS_INT(arg_value.b, arg_slot);
-          break;
-        case T_SHORT:
-          SET_VMSLOTS_INT(arg_value.s, arg_slot);
-          break;
-        case T_INT:
-          SET_VMSLOTS_INT(arg_value.i, arg_slot);
-          break;
-        case T_FLOAT:
-          SET_VMSLOTS_FLOAT(arg_value.f, arg_slot);
-          break;
-        case T_LONG:
-          SET_VMSLOTS_LONG(arg_value.j, arg_slot + 1);
-          break;
-        case T_DOUBLE:
-          SET_VMSLOTS_DOUBLE(arg_value.d, arg_slot + 1);
-          break;
-        default:
-          tty->print_cr("unhandled type %s", type2name(arg_type));
-          ShouldNotReachHere();
-        }
-      }
-    }
-    break;
-
-  case MethodHandles::_adapter_retype_only:
-  case MethodHandles::_adapter_retype_raw:
-    src_rtype = result_type_of_handle(
-      java_lang_invoke_MethodHandle::next_target(method_handle));
-    dst_rtype = result_type_of_handle(method_handle);
-    break;
-
-  case MethodHandles::_adapter_check_cast:
-    {
-      int arg_slot =
-        java_lang_invoke_AdapterMethodHandle::vmargslot(method_handle);
-      oop arg = VMSLOTS_OBJECT(arg_slot);
-      if (arg != NULL) {
-        Klass* objKlassOop = arg->klass();
-        Klass* klassOf = java_lang_Class::as_Klass(
-          java_lang_invoke_AdapterMethodHandle::argument(method_handle));
-
-        if (objKlassOop != klassOf &&
-            !objKlassOop->is_subtype_of(klassOf)) {
-          ResourceMark rm(THREAD);
-          const char* objName = Klass::cast(objKlassOop)->external_name();
-          const char* klassName = Klass::cast(klassOf)->external_name();
-          char* message = SharedRuntime::generate_class_cast_message(
-            objName, klassName);
-
-          stack->set_sp(calculate_unwind_sp(stack, method_handle));
-          CALL_VM_NOCHECK_NOFIX(
-            throw_exception(
-              thread, vmSymbols::java_lang_ClassCastException(), message));
-          // NB all oops trashed!
-          assert(HAS_PENDING_EXCEPTION, "should do");
-          return;
-        }
-      }
-    }
-    break;
-
-  case MethodHandles::_adapter_dup_args:
-    {
-      int arg_slot =
-        java_lang_invoke_AdapterMethodHandle::vmargslot(method_handle);
-      int conv =
-        java_lang_invoke_AdapterMethodHandle::conversion(method_handle);
-      int num_slots = -MethodHandles::adapter_conversion_stack_move(conv);
-      assert(num_slots > 0, "should be");
-
-      // Create the new slot(s)
-      intptr_t *unwind_sp = calculate_unwind_sp(stack, method_handle);
-      stack->overflow_check(num_slots, THREAD);
-      if (HAS_PENDING_EXCEPTION) {
-        // all oops trashed
-        stack->set_sp(unwind_sp);
-        return;
-      }
-
-      // Duplicate the arguments
-      for (int i = num_slots - 1; i >= 0; i--)
-        stack->push(*VMSLOTS_SLOT(arg_slot + i));
-
-      vmslots = stack->sp(); // unused, but let the compiler figure that out
-    }
-    break;
-
-  case MethodHandles::_adapter_drop_args:
-    {
-      int arg_slot =
-        java_lang_invoke_AdapterMethodHandle::vmargslot(method_handle);
-      int conv =
-        java_lang_invoke_AdapterMethodHandle::conversion(method_handle);
-      int num_slots = MethodHandles::adapter_conversion_stack_move(conv);
-      assert(num_slots > 0, "should be");
-
-      remove_vmslots(arg_slot, num_slots, THREAD); // doesn't trap
-      vmslots = stack->sp(); // unused, but let the compiler figure that out
-    }
-    break;
-
-  case MethodHandles::_adapter_opt_swap_1:
-  case MethodHandles::_adapter_opt_swap_2:
-  case MethodHandles::_adapter_opt_rot_1_up:
-  case MethodHandles::_adapter_opt_rot_1_down:
-  case MethodHandles::_adapter_opt_rot_2_up:
-  case MethodHandles::_adapter_opt_rot_2_down:
-    {
-      int arg1 =
-        java_lang_invoke_AdapterMethodHandle::vmargslot(method_handle);
-      int conv =
-        java_lang_invoke_AdapterMethodHandle::conversion(method_handle);
-      int arg2 = MethodHandles::adapter_conversion_vminfo(conv);
-
-      int swap_bytes = 0, rotate = 0;
-      MethodHandles::get_ek_adapter_opt_swap_rot_info(
-        entry_kind, swap_bytes, rotate);
-      int swap_slots = swap_bytes >> LogBytesPerWord;
-
-      intptr_t tmp;
-      switch (rotate) {
-      case 0: // swap
-        for (int i = 0; i < swap_slots; i++) {
-          tmp = *VMSLOTS_SLOT(arg1 + i);
-          SET_VMSLOTS_SLOT(VMSLOTS_SLOT(arg2 + i), arg1 + i);
-          SET_VMSLOTS_SLOT(&tmp, arg2 + i);
-        }
-        break;
-
-      case 1: // up
-        assert(arg1 - swap_slots > arg2, "should be");
-
-        tmp = *VMSLOTS_SLOT(arg1);
-        for (int i = arg1 - swap_slots; i >= arg2; i--)
-          SET_VMSLOTS_SLOT(VMSLOTS_SLOT(i), i + swap_slots);
-        SET_VMSLOTS_SLOT(&tmp, arg2);
-
-        break;
-
-      case -1: // down
-        assert(arg2 - swap_slots > arg1, "should be");
-
-        tmp = *VMSLOTS_SLOT(arg1);
-        for (int i = arg1 + swap_slots; i <= arg2; i++)
-          SET_VMSLOTS_SLOT(VMSLOTS_SLOT(i), i - swap_slots);
-        SET_VMSLOTS_SLOT(&tmp, arg2);
-        break;
-
-      default:
-        ShouldNotReachHere();
-      }
-    }
-    break;
-
-  case MethodHandles::_adapter_opt_i2l:
-    {
-      int arg_slot =
-        java_lang_invoke_AdapterMethodHandle::vmargslot(method_handle);
-      int arg = VMSLOTS_INT(arg_slot);
-      intptr_t *unwind_sp = calculate_unwind_sp(stack, method_handle);
-      insert_vmslots(arg_slot, 1, THREAD);
-      if (HAS_PENDING_EXCEPTION) {
-        // all oops trashed
-        stack->set_sp(unwind_sp);
-        return;
-      }
-      vmslots = stack->sp();
-      arg_slot++;
-      SET_VMSLOTS_LONG(arg, arg_slot);
-    }
-    break;
-
-  case MethodHandles::_adapter_opt_unboxi:
-  case MethodHandles::_adapter_opt_unboxl:
-    {
-      int arg_slot =
-        java_lang_invoke_AdapterMethodHandle::vmargslot(method_handle);
-      oop arg = VMSLOTS_OBJECT(arg_slot);
-      jvalue arg_value;
-      if (arg == NULL) {
-        // queue a nullpointer exception for the caller
-        stack->set_sp(calculate_unwind_sp(stack, method_handle));
-        CALL_VM_NOCHECK_NOFIX(
-          throw_exception(
-            thread, vmSymbols::java_lang_NullPointerException()));
-        // NB all oops trashed!
-        assert(HAS_PENDING_EXCEPTION, "should do");
-        return;
-      }
-      BasicType arg_type = java_lang_boxing_object::get_value(arg, &arg_value);
-      if (arg_type == T_LONG || arg_type == T_DOUBLE) {
-        intptr_t *unwind_sp = calculate_unwind_sp(stack, method_handle);
-        insert_vmslots(arg_slot, 1, THREAD);
-        if (HAS_PENDING_EXCEPTION) {
-          // all oops trashed
-          stack->set_sp(unwind_sp);
-          return;
-        }
-        vmslots = stack->sp();
-        arg_slot++;
-      }
-      switch (arg_type) {
-      case T_BOOLEAN:
-        SET_VMSLOTS_INT(arg_value.z, arg_slot);
-        break;
-      case T_CHAR:
-        SET_VMSLOTS_INT(arg_value.c, arg_slot);
-        break;
-      case T_BYTE:
-        SET_VMSLOTS_INT(arg_value.b, arg_slot);
-        break;
-      case T_SHORT:
-        SET_VMSLOTS_INT(arg_value.s, arg_slot);
-        break;
-      case T_INT:
-        SET_VMSLOTS_INT(arg_value.i, arg_slot);
-        break;
-      case T_FLOAT:
-        SET_VMSLOTS_FLOAT(arg_value.f, arg_slot);
-        break;
-      case T_LONG:
-        SET_VMSLOTS_LONG(arg_value.j, arg_slot);
-        break;
-      case T_DOUBLE:
-        SET_VMSLOTS_DOUBLE(arg_value.d, arg_slot);
-        break;
-      default:
-        tty->print_cr("unhandled type %s", type2name(arg_type));
-        ShouldNotReachHere();
-      }
-    }
-    break;
-
-  default:
-    tty->print_cr("unhandled entry_kind %s",
-                  MethodHandles::entry_name(entry_kind));
-    ShouldNotReachHere();
-  }
-
-  // Continue along the chain
-  if (direct_to_method) {
-    if (method == NULL) {
-      method =
-        (Method*) java_lang_invoke_MethodHandle::vmtarget(method_handle);
-    }
-    address entry_point = method->from_interpreted_entry();
-    Interpreter::invoke_method(method, entry_point, THREAD);
-  }
-  else {
-    process_method_handle(
-      java_lang_invoke_MethodHandle::next_target(method_handle), THREAD);
-  }
-  // NB all oops now trashed
-
-  // Adapt the result type, if necessary
-  if (src_rtype != dst_rtype && !HAS_PENDING_EXCEPTION) {
-    switch (dst_rtype) {
-    case T_VOID:
-      for (int i = 0; i < type2size[src_rtype]; i++)
-        stack->pop();
-      return;
-
-    case T_INT:
-      switch (src_rtype) {
-      case T_VOID:
-        stack->overflow_check(1, CHECK);
-        stack->push(0);
-        return;
-
-      case T_BOOLEAN:
-      case T_CHAR:
-      case T_BYTE:
-      case T_SHORT:
-        return;
-      }
-      // INT results sometimes need narrowing
-    case T_BOOLEAN:
-    case T_CHAR:
-    case T_BYTE:
-    case T_SHORT:
-      switch (src_rtype) {
-      case T_INT:
-        return;
-      }
-    }
-
-    tty->print_cr("unhandled conversion:");
-    tty->print_cr("src_rtype = %s", type2name(src_rtype));
-    tty->print_cr("dst_rtype = %s", type2name(dst_rtype));
-    ShouldNotReachHere();
-  }
-}
-
 // The new slots will be inserted before slot insert_before.
 // Slots < insert_before will have the same slot number after the insert.
 // Slots >= insert_before will become old_slot + num_slots.
@@ -1380,10 +851,6 @@
     entry_point = ((InterpreterGenerator*) this)->generate_abstract_entry();
     break;
 
-  case Interpreter::method_handle:
-    entry_point = ((InterpreterGenerator*) this)->generate_method_handle_entry();
-    break;
-
   case Interpreter::java_lang_math_sin:
   case Interpreter::java_lang_math_cos:
   case Interpreter::java_lang_math_tan:
@@ -1391,6 +858,8 @@
   case Interpreter::java_lang_math_log:
   case Interpreter::java_lang_math_log10:
   case Interpreter::java_lang_math_sqrt:
+  case Interpreter::java_lang_math_pow:
+  case Interpreter::java_lang_math_exp:
     entry_point = ((InterpreterGenerator*) this)->generate_math_entry(kind);
     break;
 
@@ -1546,11 +1015,7 @@
 // Helper for figuring out if frames are interpreter frames
 
 bool CppInterpreter::contains(address pc) {
-#ifdef PRODUCT
-  ShouldNotCallThis();
-#else
   return false; // make frame::print_value_on work
-#endif // !PRODUCT
 }
 
 // Result handlers and convertors
--- a/src/cpu/zero/vm/cppInterpreter_zero.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/zero/vm/cppInterpreter_zero.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -36,7 +36,6 @@
   static int native_entry(Method* method, intptr_t UNUSED, TRAPS);
   static int accessor_entry(Method* method, intptr_t UNUSED, TRAPS);
   static int empty_entry(Method* method, intptr_t UNUSED, TRAPS);
-  static int method_handle_entry(Method* method, intptr_t UNUSED, TRAPS);
 
  public:
   // Main loop of normal_entry
@@ -44,7 +43,6 @@
 
  private:
   // Helpers for method_handle_entry
-  static void process_method_handle(oop method_handle, TRAPS);
   static void insert_vmslots(int insert_before, int num_slots, TRAPS);
   static void remove_vmslots(int first_slot, int num_slots, TRAPS);
   static BasicType result_type_of_handle(oop method_handle);
--- a/src/cpu/zero/vm/frame_zero.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/zero/vm/frame_zero.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -98,10 +98,20 @@
 #endif // CC_INTERP
 
 void frame::patch_pc(Thread* thread, address pc) {
-  // We borrow this call to set the thread pointer in the interpreter
-  // state; the hook to set up deoptimized frames isn't supplied it.
-  assert(pc == NULL, "should be");
-  get_interpreterState()->set_thread((JavaThread *) thread);
+
+  if (pc != NULL) {
+    _cb = CodeCache::find_blob(pc);
+    SharkFrame* sharkframe = zeroframe()->as_shark_frame();
+    sharkframe->set_pc(pc);
+    _pc = pc;
+    _deopt_state = is_deoptimized;
+
+  } else {
+    // We borrow this call to set the thread pointer in the interpreter
+    // state; the hook to set up deoptimized frames isn't supplied it.
+    assert(pc == NULL, "should be");
+    get_interpreterState()->set_thread((JavaThread *) thread);
+  }
 }
 
 bool frame::safe_for_sender(JavaThread *thread) {
@@ -351,7 +361,7 @@
   switch (offset) {
   case pc_off:
     strncpy(fieldbuf, "pc", buflen);
-    if (method()->is_oop()) {
+    if (method()->is_method()) {
       nmethod *code = method()->code();
       if (code && code->pc_desc_at(pc())) {
         SimpleScopeDesc ssd(code, pc());
@@ -367,7 +377,7 @@
 
   case method_off:
     strncpy(fieldbuf, "method", buflen);
-    if (method()->is_oop()) {
+    if (method()->is_method()) {
       method()->name_and_sig_as_C_string(valuebuf, buflen);
     }
     return;
@@ -378,7 +388,7 @@
   }
 
   // Variable part
-  if (method()->is_oop()) {
+  if (method()->is_method()) {
     identify_vp_word(frame_index, addr_of_word(offset),
                      addr_of_word(header_words + 1),
                      unextended_sp() + method()->max_stack(),
@@ -430,4 +440,3 @@
   // unused... but returns fp() to minimize changes introduced by 7087445
   return fp();
 }
-
--- a/src/cpu/zero/vm/frame_zero.inline.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/zero/vm/frame_zero.inline.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -36,6 +36,8 @@
   _deopt_state = unknown;
 }
 
+inline address  frame::sender_pc()           const { ShouldNotCallThis();  }
+
 inline frame::frame(ZeroFrame* zf, intptr_t* sp) {
   _zeroframe = zf;
   _sp = sp;
@@ -43,27 +45,36 @@
   case ZeroFrame::ENTRY_FRAME:
     _pc = StubRoutines::call_stub_return_pc();
     _cb = NULL;
+    _deopt_state = not_deoptimized;
     break;
 
   case ZeroFrame::INTERPRETER_FRAME:
     _pc = NULL;
     _cb = NULL;
+    _deopt_state = not_deoptimized;
     break;
 
-  case ZeroFrame::SHARK_FRAME:
+  case ZeroFrame::SHARK_FRAME: {
     _pc = zero_sharkframe()->pc();
     _cb = CodeCache::find_blob_unsafe(pc());
+    address original_pc = nmethod::get_deopt_original_pc(this);
+    if (original_pc != NULL) {
+      _pc = original_pc;
+      _deopt_state = is_deoptimized;
+    } else {
+      _deopt_state = not_deoptimized;
+    }
     break;
-
+  }
   case ZeroFrame::FAKE_STUB_FRAME:
     _pc = NULL;
     _cb = NULL;
+    _deopt_state = not_deoptimized;
     break;
 
   default:
     ShouldNotReachHere();
   }
-  _deopt_state = not_deoptimized;
 }
 
 // Accessors
--- a/src/cpu/zero/vm/globals_zero.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/zero/vm/globals_zero.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -52,11 +52,7 @@
 define_pd_global(bool,  RewriteBytecodes,     true);
 define_pd_global(bool,  RewriteFrequentPairs, true);
 
-#ifdef _ALLBSD_SOURCE
 define_pd_global(bool,  UseMembar,            true);
-#else
-define_pd_global(bool,  UseMembar,            false);
-#endif
 
 // GC Ergo Flags
 define_pd_global(intx, CMSYoungGenPerWorker, 16*M);  // default max size of CMS young gen, per GC worker thread
--- a/src/cpu/zero/vm/icBuffer_zero.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/zero/vm/icBuffer_zero.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -40,7 +40,7 @@
 }
 
 void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin,
-                                                Metadata* cached_oop,
+                                                void* cached_oop,
                                                 address entry_point) {
   // NB ic_stub_code_size() must return the size of the code we generate
   ShouldNotCallThis();
@@ -51,7 +51,6 @@
   ShouldNotCallThis();
 }
 
-Metadata* InlineCacheBuffer::ic_buffer_cached_oop(address code_begin) {
-  // NB ic_stub_code_size() must return the size of the code we generate
+void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
   ShouldNotCallThis();
 }
--- a/src/cpu/zero/vm/interp_masm_zero.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/zero/vm/interp_masm_zero.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -37,11 +37,6 @@
 #include "runtime/basicLock.hpp"
 #include "runtime/biasedLocking.hpp"
 #include "runtime/sharedRuntime.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
+#include "runtime/thread.inline.hpp"
 
 // This file is intentionally empty
--- a/src/cpu/zero/vm/methodHandles_zero.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/zero/vm/methodHandles_zero.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -24,26 +24,159 @@
  */
 
 #include "precompiled.hpp"
+#include "interpreter/interpreterGenerator.hpp"
 #include "interpreter/interpreter.hpp"
 #include "memory/allocation.inline.hpp"
 #include "prims/methodHandles.hpp"
 
-int MethodHandles::adapter_conversion_ops_supported_mask() {
-  return ((1<<java_lang_invoke_AdapterMethodHandle::OP_RETYPE_ONLY)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_RETYPE_RAW)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_CHECK_CAST)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_PRIM)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_REF_TO_PRIM)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_SWAP_ARGS)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_ROT_ARGS)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_DUP_ARGS)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_DROP_ARGS)
-         //|(1<<java_lang_invoke_AdapterMethodHandle::OP_SPREAD_ARGS) //BUG!
-         );
-  // FIXME: MethodHandlesTest gets a crash if we enable OP_SPREAD_ARGS.
+void MethodHandles::invoke_target(Method* method, TRAPS) {
+
+  JavaThread *thread = (JavaThread *) THREAD;
+  ZeroStack *stack = thread->zero_stack();
+  InterpreterFrame *frame = thread->top_zero_frame()->as_interpreter_frame();
+  interpreterState istate = frame->interpreter_state();
+
+  // Trim back the stack to put the parameters at the top
+  stack->set_sp(istate->stack() + 1);
+
+  Interpreter::invoke_method(method, method->from_interpreted_entry(), THREAD);
+
+  // Convert the result
+  istate->set_stack(stack->sp() - 1);
+
+}
+
+oop MethodHandles::popFromStack(TRAPS) {
+
+  JavaThread *thread = (JavaThread *) THREAD;
+  InterpreterFrame *frame = thread->top_zero_frame()->as_interpreter_frame();
+  interpreterState istate = frame->interpreter_state();
+  intptr_t* topOfStack = istate->stack();
+
+  oop top = STACK_OBJECT(-1);
+  MORE_STACK(-1);
+  istate->set_stack(topOfStack);
+
+  return top;
+
+}
+
+int MethodHandles::method_handle_entry_invokeBasic(Method* method, intptr_t UNUSED, TRAPS) {
+
+  JavaThread *thread = (JavaThread *) THREAD;
+  InterpreterFrame *frame = thread->top_zero_frame()->as_interpreter_frame();
+  interpreterState istate = frame->interpreter_state();
+  intptr_t* topOfStack = istate->stack();
+
+  // 'this' is a MethodHandle. We resolve the target method by accessing this.form.vmentry.vmtarget.
+  int numArgs = method->size_of_parameters();
+  oop lform1 = java_lang_invoke_MethodHandle::form(STACK_OBJECT(-numArgs)); // this.form
+  oop vmEntry1 = java_lang_invoke_LambdaForm::vmentry(lform1);
+  Method* vmtarget = (Method*) java_lang_invoke_MemberName::vmtarget(vmEntry1);
+
+  invoke_target(vmtarget, THREAD);
+
+  // No deoptimized frames on the stack
+  return 0;
+}
+
+int MethodHandles::method_handle_entry_linkToStaticOrSpecial(Method* method, intptr_t UNUSED, TRAPS) {
+
+  // Pop appendix argument from stack. This is a MemberName which we resolve to the
+  // target method.
+  oop vmentry = popFromStack(THREAD);
+
+  Method* vmtarget = (Method*) java_lang_invoke_MemberName::vmtarget(vmentry);
+
+  invoke_target(vmtarget, THREAD);
+
+  return 0;
 }
 
-void MethodHandles::generate_method_handle_stub(MacroAssembler*          masm,
-                                                MethodHandles::EntryKind ek) {
-  init_entry(ek, (MethodHandleEntry *) ek);
+int MethodHandles::method_handle_entry_linkToInterface(Method* method, intptr_t UNUSED, TRAPS) {
+  JavaThread *thread = (JavaThread *) THREAD;
+  InterpreterFrame *frame = thread->top_zero_frame()->as_interpreter_frame();
+  interpreterState istate = frame->interpreter_state();
+
+  // Pop appendix argument from stack. This is a MemberName which we resolve to the
+  // target method.
+  oop vmentry = popFromStack(THREAD);
+  intptr_t* topOfStack = istate->stack();
+
+  // Resolve target method by looking up in the receiver object's itable.
+  Klass* clazz = java_lang_Class::as_Klass(java_lang_invoke_MemberName::clazz(vmentry));
+  intptr_t vmindex = java_lang_invoke_MemberName::vmindex(vmentry);
+  Method* target = (Method*) java_lang_invoke_MemberName::vmtarget(vmentry);
+
+  int numArgs = target->size_of_parameters();
+  oop recv = STACK_OBJECT(-numArgs);
+
+  InstanceKlass* klass_part = InstanceKlass::cast(recv->klass());
+  itableOffsetEntry* ki = (itableOffsetEntry*) klass_part->start_of_itable();
+  int i;
+  for ( i = 0 ; i < klass_part->itable_length() ; i++, ki++ ) {
+    if (ki->interface_klass() == clazz) break;
+  }
+
+  itableMethodEntry* im = ki->first_method_entry(recv->klass());
+  Method* vmtarget = im[vmindex].method();
+
+  invoke_target(vmtarget, THREAD);
+
+  return 0;
 }
+
+int MethodHandles::method_handle_entry_linkToVirtual(Method* method, intptr_t UNUSED, TRAPS) {
+  JavaThread *thread = (JavaThread *) THREAD;
+
+  InterpreterFrame *frame = thread->top_zero_frame()->as_interpreter_frame();
+  interpreterState istate = frame->interpreter_state();
+
+  // Pop appendix argument from stack. This is a MemberName which we resolve to the
+  // target method.
+  oop vmentry = popFromStack(THREAD);
+  intptr_t* topOfStack = istate->stack();
+
+  // Resolve target method by looking up in the receiver object's vtable.
+  intptr_t vmindex = java_lang_invoke_MemberName::vmindex(vmentry);
+  Method* target = (Method*) java_lang_invoke_MemberName::vmtarget(vmentry);
+  int numArgs = target->size_of_parameters();
+  oop recv = STACK_OBJECT(-numArgs);
+  Klass* clazz = recv->klass();
+  Klass* klass_part = InstanceKlass::cast(clazz);
+  klassVtable* vtable = klass_part->vtable();
+  Method* vmtarget = vtable->method_at(vmindex);
+
+  invoke_target(vmtarget, THREAD);
+
+  return 0;
+}
+
+int MethodHandles::method_handle_entry_invalid(Method* method, intptr_t UNUSED, TRAPS) {
+  ShouldNotReachHere();
+  return 0;
+}
+
+address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* masm,
+                                                                vmIntrinsics::ID iid) {
+  switch (iid) {
+  case vmIntrinsics::_invokeGeneric:
+  case vmIntrinsics::_compiledLambdaForm:
+    // Perhaps surprisingly, the symbolic references visible to Java are not directly used.
+    // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod.
+    // They all allow an appendix argument.
+    return InterpreterGenerator::generate_entry_impl(masm, (address) MethodHandles::method_handle_entry_invalid);
+  case vmIntrinsics::_invokeBasic:
+    return InterpreterGenerator::generate_entry_impl(masm, (address) MethodHandles::method_handle_entry_invokeBasic);
+  case vmIntrinsics::_linkToStatic:
+  case vmIntrinsics::_linkToSpecial:
+    return InterpreterGenerator::generate_entry_impl(masm, (address) MethodHandles::method_handle_entry_linkToStaticOrSpecial);
+  case vmIntrinsics::_linkToInterface:
+    return InterpreterGenerator::generate_entry_impl(masm, (address) MethodHandles::method_handle_entry_linkToInterface);
+  case vmIntrinsics::_linkToVirtual:
+    return InterpreterGenerator::generate_entry_impl(masm, (address) MethodHandles::method_handle_entry_linkToVirtual);
+  default:
+    ShouldNotReachHere();
+    return NULL;
+  }
+}
--- a/src/cpu/zero/vm/methodHandles_zero.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/zero/vm/methodHandles_zero.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -26,6 +26,14 @@
 
 // Adapters
 enum /* platform_dependent_constants */ {
-  adapter_code_size = 0
+  adapter_code_size = sizeof(ZeroEntry) * (Interpreter::method_handle_invoke_LAST - Interpreter::method_handle_invoke_FIRST + 1)
 };
 
+private:
+  static oop popFromStack(TRAPS);
+  static void invoke_target(Method* method, TRAPS);
+  static int method_handle_entry_invokeBasic(Method* method, intptr_t UNUSED, TRAPS);
+  static int method_handle_entry_linkToStaticOrSpecial(Method* method, intptr_t UNUSED, TRAPS);
+  static int method_handle_entry_linkToVirtual(Method* method, intptr_t UNUSED, TRAPS);
+  static int method_handle_entry_linkToInterface(Method* method, intptr_t UNUSED, TRAPS);
+  static int method_handle_entry_invalid(Method* method, intptr_t UNUSED, TRAPS);
--- a/src/cpu/zero/vm/register_zero.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/zero/vm/register_zero.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -114,5 +114,8 @@
 };
 
 CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
+#ifndef DONT_USE_REGISTER_DEFINES
+#define noreg ((Register)(noreg_RegisterEnumValue))
+#endif
 
 #endif // CPU_ZERO_VM_REGISTER_ZERO_HPP
--- a/src/cpu/zero/vm/relocInfo_zero.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/zero/vm/relocInfo_zero.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -77,3 +77,7 @@
                                                        CodeBuffer*       dst) {
   ShouldNotCallThis();
 }
+
+void metadata_Relocation::pd_fix_value(address x) {
+  ShouldNotCallThis();
+}
--- a/src/cpu/zero/vm/sharedRuntime_zero.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/zero/vm/sharedRuntime_zero.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -35,6 +35,7 @@
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/vframeArray.hpp"
 #include "vmreg_zero.inline.hpp"
+
 #ifdef COMPILER1
 #include "c1/c1_Runtime1.hpp"
 #endif
@@ -47,6 +48,12 @@
 #endif
 
 
+
+static address zero_null_code_stub() {
+  address start = ShouldNotCallThisStub();
+  return start;
+}
+
 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
                                            VMRegPair *regs,
                                            int total_args_passed,
@@ -63,16 +70,14 @@
                         AdapterFingerPrint *fingerprint) {
   return AdapterHandlerLibrary::new_entry(
     fingerprint,
-    ShouldNotCallThisStub(),
-    ShouldNotCallThisStub(),
-    ShouldNotCallThisStub());
+    CAST_FROM_FN_PTR(address,zero_null_code_stub),
+    CAST_FROM_FN_PTR(address,zero_null_code_stub),
+    CAST_FROM_FN_PTR(address,zero_null_code_stub));
 }
 
 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
                                                 methodHandle method,
                                                 int compile_id,
-                                                int total_args_passed,
-                                                int max_arg,
                                                 BasicType *sig_bt,
                                                 VMRegPair *regs,
                                                 BasicType ret_type) {
@@ -96,19 +101,20 @@
   ShouldNotCallThis();
 }
 
+JRT_LEAF(void, zero_stub())
+  ShouldNotCallThis();
+JRT_END
+
 static RuntimeStub* generate_empty_runtime_stub(const char* name) {
-  CodeBuffer buffer(name, 0, 0);
-  return RuntimeStub::new_runtime_stub(name, &buffer, 0, 0, NULL, false);
+  return CAST_FROM_FN_PTR(RuntimeStub*,zero_stub);
 }
 
 static SafepointBlob* generate_empty_safepoint_blob() {
-  CodeBuffer buffer("handler_blob", 0, 0);
-  return SafepointBlob::create(&buffer, NULL, 0);
+  return CAST_FROM_FN_PTR(SafepointBlob*,zero_stub);
 }
 
 static DeoptimizationBlob* generate_empty_deopt_blob() {
-  CodeBuffer buffer("handler_blob", 0, 0);
-  return DeoptimizationBlob::create(&buffer, NULL, 0, 0, 0, 0);
+  return CAST_FROM_FN_PTR(DeoptimizationBlob*,zero_stub);
 }
 
 
@@ -116,7 +122,7 @@
   _deopt_blob = generate_empty_deopt_blob();
 }
 
-SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause_return) {
+SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
   return generate_empty_safepoint_blob();
 }
 
@@ -124,6 +130,7 @@
   return generate_empty_runtime_stub("resolve_blob");
 }
 
+
 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
                                          VMRegPair *regs,
                                          int total_args_passed) {
--- a/src/cpu/zero/vm/sharkFrame_zero.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/zero/vm/sharkFrame_zero.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -68,6 +68,10 @@
     return (address) value_of_word(pc_off);
   }
 
+  void set_pc(address pc) const {
+    *((address*) addr_of_word(pc_off)) = pc;
+  }
+
   intptr_t* unextended_sp() const {
     return (intptr_t *) value_of_word(unextended_sp_off);
   }
--- a/src/cpu/zero/vm/stubGenerator_zero.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/zero/vm/stubGenerator_zero.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -38,14 +38,9 @@
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubCodeGenerator.hpp"
 #include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
 #include "stack_zero.inline.hpp"
 #include "utilities/top.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 #ifdef COMPILER2
 #include "opto/runtime.hpp"
 #endif
--- a/src/cpu/zero/vm/stubRoutines_zero.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/zero/vm/stubRoutines_zero.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -27,9 +27,4 @@
 #include "runtime/deoptimization.hpp"
 #include "runtime/frame.inline.hpp"
 #include "runtime/stubRoutines.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
+#include "runtime/thread.inline.hpp"
--- a/src/cpu/zero/vm/vmStructs_zero.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/cpu/zero/vm/vmStructs_zero.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -30,28 +30,12 @@
 // constants required by the Serviceability Agent. This file is
 // referenced by vmStructs.cpp.
 
-#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field, last_entry) \
-
-  /* NOTE that we do not use the last_entry() macro here; it is used  */
-  /* in vmStructs_<os>_<cpu>.hpp's VM_STRUCTS_OS_CPU macro (and must  */
-  /* be present there)                                                */
+#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)
 
-#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type, last_entry) \
-
-  /* NOTE that we do not use the last_entry() macro here; it is used  */
-  /* in vmStructs_<os>_<cpu>.hpp's VM_TYPES_OS_CPU macro (and must    */
-  /* be present there)                                                */
+#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
 
-#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant, last_entry) \
-
-  /* NOTE that we do not use the last_entry() macro here; it is used        */
-  /* in vmStructs_<os>_<cpu>.hpp's VM_INT_CONSTANTS_OS_CPU macro (and must  */
-  /* be present there)                                                      */
+#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
 
-#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant, last_entry) \
-
-  /* NOTE that we do not use the last_entry() macro here; it is used         */
-  /* in vmStructs_<os>_<cpu>.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and must  */
-  /* be present there)                                                       */
+#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
 
 #endif // CPU_ZERO_VM_VMSTRUCTS_ZERO_HPP
--- a/src/os/bsd/vm/attachListener_bsd.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/bsd/vm/attachListener_bsd.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -342,7 +342,6 @@
 
     // get the credentials of the peer and check the effective uid/guid
     // - check with jeff on this.
-#ifdef _ALLBSD_SOURCE
     uid_t puid;
     gid_t pgid;
     if (::getpeereid(s, &puid, &pgid) != 0) {
@@ -350,17 +349,6 @@
       RESTARTABLE(::close(s), res);
       continue;
     }
-#else
-    struct ucred cred_info;
-    socklen_t optlen = sizeof(cred_info);
-    if (::getsockopt(s, SOL_SOCKET, SO_PEERCRED, (void*)&cred_info, &optlen) == -1) {
-      int res;
-      RESTARTABLE(::close(s), res);
-      continue;
-    }
-    uid_t puid = cred_info.uid;
-    gid_t pgid = cred_info.gid;
-#endif
     uid_t euid = geteuid();
     gid_t egid = getegid();
 
--- a/src/os/bsd/vm/mutex_bsd.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/bsd/vm/mutex_bsd.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -26,7 +26,7 @@
 #include "mutex_bsd.inline.hpp"
 #include "runtime/interfaceSupport.hpp"
 #include "runtime/mutex.hpp"
-#include "thread_bsd.inline.hpp"
+#include "runtime/thread.inline.hpp"
 #include "utilities/events.hpp"
 
 // put OS-includes here
--- a/src/os/bsd/vm/mutex_bsd.inline.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/bsd/vm/mutex_bsd.inline.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -27,7 +27,7 @@
 
 #include "os_bsd.inline.hpp"
 #include "runtime/interfaceSupport.hpp"
-#include "thread_bsd.inline.hpp"
+#include "runtime/thread.inline.hpp"
 
 
 // Reconciliation History
--- a/src/os/bsd/vm/osThread_bsd.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/bsd/vm/osThread_bsd.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,29 +23,10 @@
  */
 
 // no precompiled headers
-#include "runtime/atomic.hpp"
-#include "runtime/handles.inline.hpp"
 #include "runtime/mutexLocker.hpp"
-#include "runtime/os.hpp"
 #include "runtime/osThread.hpp"
-#include "runtime/safepoint.hpp"
-#include "runtime/vmThread.hpp"
-#ifdef TARGET_ARCH_x86
-# include "assembler_x86.inline.hpp"
-#endif
-#ifdef TARGET_ARCH_sparc
-# include "assembler_sparc.inline.hpp"
-#endif
-#ifdef TARGET_ARCH_zero
-# include "assembler_zero.inline.hpp"
-#endif
-#ifdef TARGET_ARCH_arm
-# include "assembler_arm.inline.hpp"
-#endif
-#ifdef TARGET_ARCH_ppc
-# include "assembler_ppc.inline.hpp"
-#endif
 
+#include <signal.h>
 
 void OSThread::pd_initialize() {
   assert(this != NULL, "check");
--- a/src/os/bsd/vm/osThread_bsd.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/bsd/vm/osThread_bsd.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -39,18 +39,12 @@
 
  private:
 
-#ifdef _ALLBSD_SOURCE
-
 #ifdef __APPLE__
   typedef thread_t thread_id_t;
 #else
   typedef pthread_t thread_id_t;
 #endif
 
-#else
-  typedef pid_t thread_id_t;
-#endif
-
   // _pthread_id is the pthread id, which is used by library calls
   // (e.g. pthread_kill).
   pthread_t _pthread_id;
--- a/src/os/bsd/vm/os_bsd.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/bsd/vm/os_bsd.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,6 +29,7 @@
 #include "code/icBuffer.hpp"
 #include "code/vtableStubs.hpp"
 #include "compiler/compileBroker.hpp"
+#include "compiler/disassembler.hpp"
 #include "interpreter/interpreter.hpp"
 #include "jvm_bsd.h"
 #include "memory/allocation.inline.hpp"
@@ -52,36 +53,16 @@
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/statSampler.hpp"
 #include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/threadCritical.hpp"
 #include "runtime/timer.hpp"
 #include "services/attachListener.hpp"
 #include "services/runtimeService.hpp"
-#include "thread_bsd.inline.hpp"
 #include "utilities/decoder.hpp"
 #include "utilities/defaultStream.hpp"
 #include "utilities/events.hpp"
 #include "utilities/growableArray.hpp"
 #include "utilities/vmError.hpp"
-#ifdef TARGET_ARCH_x86
-# include "assembler_x86.inline.hpp"
-# include "nativeInst_x86.hpp"
-#endif
-#ifdef TARGET_ARCH_sparc
-# include "assembler_sparc.inline.hpp"
-# include "nativeInst_sparc.hpp"
-#endif
-#ifdef TARGET_ARCH_zero
-# include "assembler_zero.inline.hpp"
-# include "nativeInst_zero.hpp"
-#endif
-#ifdef TARGET_ARCH_arm
-# include "assembler_arm.inline.hpp"
-# include "nativeInst_arm.hpp"
-#endif
-#ifdef TARGET_ARCH_ppc
-# include "assembler_ppc.inline.hpp"
-# include "nativeInst_ppc.hpp"
-#endif
 
 // put OS-includes here
 # include <sys/types.h>
@@ -108,14 +89,8 @@
 # include <semaphore.h>
 # include <fcntl.h>
 # include <string.h>
-#ifdef _ALLBSD_SOURCE
 # include <sys/param.h>
 # include <sys/sysctl.h>
-#else
-# include <syscall.h>
-# include <sys/sysinfo.h>
-# include <gnu/libc-version.h>
-#endif
 # include <sys/ipc.h>
 # include <sys/shm.h>
 #ifndef __APPLE__
@@ -150,25 +125,10 @@
 // global variables
 julong os::Bsd::_physical_memory = 0;
 
-#ifndef _ALLBSD_SOURCE
-address   os::Bsd::_initial_thread_stack_bottom = NULL;
-uintptr_t os::Bsd::_initial_thread_stack_size   = 0;
-#endif
 
 int (*os::Bsd::_clock_gettime)(clockid_t, struct timespec *) = NULL;
-#ifndef _ALLBSD_SOURCE
-int (*os::Bsd::_pthread_getcpuclockid)(pthread_t, clockid_t *) = NULL;
-Mutex* os::Bsd::_createThread_lock = NULL;
-#endif
 pthread_t os::Bsd::_main_thread;
 int os::Bsd::_page_size = -1;
-#ifndef _ALLBSD_SOURCE
-bool os::Bsd::_is_floating_stack = false;
-bool os::Bsd::_is_NPTL = false;
-bool os::Bsd::_supports_fast_thread_cpu_time = false;
-const char * os::Bsd::_glibc_version = NULL;
-const char * os::Bsd::_libpthread_version = NULL;
-#endif
 
 static jlong initial_time_count=0;
 
@@ -176,7 +136,7 @@
 
 // For diagnostics to print a message once. see run_periodic_checks
 static sigset_t check_signal_done;
-static bool check_signals = true;;
+static bool check_signals = true;
 
 static pid_t _initial_pid = 0;
 
@@ -198,16 +158,8 @@
 }
 
 julong os::Bsd::available_memory() {
-#ifdef _ALLBSD_SOURCE
   // XXXBSD: this is just a stopgap implementation
   return physical_memory() >> 2;
-#else
-  // values in struct sysinfo are "unsigned long"
-  struct sysinfo si;
-  sysinfo(&si);
-
-  return (julong)si.freeram * si.mem_unit;
-#endif
 }
 
 julong os::physical_memory() {
@@ -255,22 +207,6 @@
 }
 
 
-#ifndef _ALLBSD_SOURCE
-#ifndef SYS_gettid
-// i386: 224, ia64: 1105, amd64: 186, sparc 143
-#ifdef __ia64__
-#define SYS_gettid 1105
-#elif __i386__
-#define SYS_gettid 224
-#elif __amd64__
-#define SYS_gettid 186
-#elif __sparc__
-#define SYS_gettid 143
-#else
-#error define gettid for the arch
-#endif
-#endif
-#endif
 
 // Cpu architecture string
 #if   defined(ZERO)
@@ -302,63 +238,37 @@
 #define COMPILER_VARIANT "client"
 #endif
 
-#ifndef _ALLBSD_SOURCE
-// pid_t gettid()
-//
-// Returns the kernel thread id of the currently running thread. Kernel
-// thread id is used to access /proc.
-//
-// (Note that getpid() on BsdThreads returns kernel thread id too; but
-// on NPTL, it returns the same pid for all threads, as required by POSIX.)
-//
-pid_t os::Bsd::gettid() {
-  int rslt = syscall(SYS_gettid);
-  if (rslt == -1) {
-     // old kernel, no NPTL support
-     return getpid();
-  } else {
-     return (pid_t)rslt;
-  }
-}
-
-// Most versions of bsd have a bug where the number of processors are
-// determined by looking at the /proc file system.  In a chroot environment,
-// the system call returns 1.  This causes the VM to act as if it is
-// a single processor and elide locking (see is_MP() call).
-static bool unsafe_chroot_detected = false;
-static const char *unstable_chroot_error = "/proc file system not found.\n"
-                     "Java may be unstable running multithreaded in a chroot "
-                     "environment on Bsd when /proc filesystem is not mounted.";
-#endif
-
-#ifdef _ALLBSD_SOURCE
+
 void os::Bsd::initialize_system_info() {
   int mib[2];
   size_t len;
   int cpu_val;
-  u_long mem_val;
+  julong mem_val;
 
   /* get processors count via hw.ncpus sysctl */
   mib[0] = CTL_HW;
   mib[1] = HW_NCPU;
   len = sizeof(cpu_val);
   if (sysctl(mib, 2, &cpu_val, &len, NULL, 0) != -1 && cpu_val >= 1) {
+       assert(len == sizeof(cpu_val), "unexpected data size");
        set_processor_count(cpu_val);
   }
   else {
        set_processor_count(1);   // fallback
   }
 
-  /* get physical memory via hw.usermem sysctl (hw.usermem is used
-   * instead of hw.physmem because we need size of allocatable memory
+  /* get physical memory via hw.memsize sysctl (hw.memsize is used
+   * since it returns a 64 bit value)
    */
   mib[0] = CTL_HW;
-  mib[1] = HW_USERMEM;
+  mib[1] = HW_MEMSIZE;
   len = sizeof(mem_val);
-  if (sysctl(mib, 2, &mem_val, &len, NULL, 0) != -1)
+  if (sysctl(mib, 2, &mem_val, &len, NULL, 0) != -1) {
+       assert(len == sizeof(mem_val), "unexpected data size");
        _physical_memory = mem_val;
-  else
+  } else {
        _physical_memory = 256*1024*1024;       // fallback (XXXBSD?)
+  }
 
 #ifdef __OpenBSD__
   {
@@ -370,24 +280,6 @@
   }
 #endif
 }
-#else
-void os::Bsd::initialize_system_info() {
-  set_processor_count(sysconf(_SC_NPROCESSORS_CONF));
-  if (processor_count() == 1) {
-    pid_t pid = os::Bsd::gettid();
-    char fname[32];
-    jio_snprintf(fname, sizeof(fname), "/proc/%d", pid);
-    FILE *fp = fopen(fname, "r");
-    if (fp == NULL) {
-      unsafe_chroot_detected = true;
-    } else {
-      fclose(fp);
-    }
-  }
-  _physical_memory = (julong)sysconf(_SC_PHYS_PAGES) * (julong)sysconf(_SC_PAGESIZE);
-  assert(processor_count() > 0, "bsd error");
-}
-#endif
 
 #ifdef __APPLE__
 static const char *get_home() {
@@ -409,12 +301,12 @@
 
   // The next steps are taken in the product version:
   //
-  // Obtain the JAVA_HOME value from the location of libjvm[_g].so.
+  // Obtain the JAVA_HOME value from the location of libjvm.so.
   // This library should be located at:
-  // <JAVA_HOME>/jre/lib/<arch>/{client|server}/libjvm[_g].so.
+  // <JAVA_HOME>/jre/lib/<arch>/{client|server}/libjvm.so.
   //
   // If "/jre/lib/" appears at the right place in the path, then we
-  // assume libjvm[_g].so is installed in a JDK and we use this path.
+  // assume libjvm.so is installed in a JDK and we use this path.
   //
   // Otherwise exit with message: "Could not create the Java virtual machine."
   //
@@ -424,9 +316,9 @@
   // instead of exit check for $JAVA_HOME environment variable.
   //
   // If it is defined and we are able to locate $JAVA_HOME/jre/lib/<arch>,
-  // then we append a fake suffix "hotspot/libjvm[_g].so" to this path so
-  // it looks like libjvm[_g].so is installed there
-  // <JAVA_HOME>/jre/lib/<arch>/hotspot/libjvm[_g].so.
+  // then we append a fake suffix "hotspot/libjvm.so" to this path so
+  // it looks like libjvm.so is installed there
+  // <JAVA_HOME>/jre/lib/<arch>/hotspot/libjvm.so.
   //
   // Otherwise exit.
   //
@@ -744,171 +636,6 @@
   }
 }
 
-#ifndef _ALLBSD_SOURCE
-//////////////////////////////////////////////////////////////////////////////
-// detecting pthread library
-
-void os::Bsd::libpthread_init() {
-  // Save glibc and pthread version strings. Note that _CS_GNU_LIBC_VERSION
-  // and _CS_GNU_LIBPTHREAD_VERSION are supported in glibc >= 2.3.2. Use a
-  // generic name for earlier versions.
-  // Define macros here so we can build HotSpot on old systems.
-# ifndef _CS_GNU_LIBC_VERSION
-# define _CS_GNU_LIBC_VERSION 2
-# endif
-# ifndef _CS_GNU_LIBPTHREAD_VERSION
-# define _CS_GNU_LIBPTHREAD_VERSION 3
-# endif
-
-  size_t n = confstr(_CS_GNU_LIBC_VERSION, NULL, 0);
-  if (n > 0) {
-     char *str = (char *)malloc(n);
-     confstr(_CS_GNU_LIBC_VERSION, str, n);
-     os::Bsd::set_glibc_version(str);
-  } else {
-     // _CS_GNU_LIBC_VERSION is not supported, try gnu_get_libc_version()
-     static char _gnu_libc_version[32];
-     jio_snprintf(_gnu_libc_version, sizeof(_gnu_libc_version),
-              "glibc %s %s", gnu_get_libc_version(), gnu_get_libc_release());
-     os::Bsd::set_glibc_version(_gnu_libc_version);
-  }
-
-  n = confstr(_CS_GNU_LIBPTHREAD_VERSION, NULL, 0);
-  if (n > 0) {
-     char *str = (char *)malloc(n);
-     confstr(_CS_GNU_LIBPTHREAD_VERSION, str, n);
-     // Vanilla RH-9 (glibc 2.3.2) has a bug that confstr() always tells
-     // us "NPTL-0.29" even we are running with BsdThreads. Check if this
-     // is the case. BsdThreads has a hard limit on max number of threads.
-     // So sysconf(_SC_THREAD_THREADS_MAX) will return a positive value.
-     // On the other hand, NPTL does not have such a limit, sysconf()
-     // will return -1 and errno is not changed. Check if it is really NPTL.
-     if (strcmp(os::Bsd::glibc_version(), "glibc 2.3.2") == 0 &&
-         strstr(str, "NPTL") &&
-         sysconf(_SC_THREAD_THREADS_MAX) > 0) {
-       free(str);
-       os::Bsd::set_libpthread_version("bsdthreads");
-     } else {
-       os::Bsd::set_libpthread_version(str);
-     }
-  } else {
-    // glibc before 2.3.2 only has BsdThreads.
-    os::Bsd::set_libpthread_version("bsdthreads");
-  }
-
-  if (strstr(libpthread_version(), "NPTL")) {
-     os::Bsd::set_is_NPTL();
-  } else {
-     os::Bsd::set_is_BsdThreads();
-  }
-
-  // BsdThreads have two flavors: floating-stack mode, which allows variable
-  // stack size; and fixed-stack mode. NPTL is always floating-stack.
-  if (os::Bsd::is_NPTL() || os::Bsd::supports_variable_stack_size()) {
-     os::Bsd::set_is_floating_stack();
-  }
-}
-
-/////////////////////////////////////////////////////////////////////////////
-// thread stack
-
-// Force Bsd kernel to expand current thread stack. If "bottom" is close
-// to the stack guard, caller should block all signals.
-//
-// MAP_GROWSDOWN:
-//   A special mmap() flag that is used to implement thread stacks. It tells
-//   kernel that the memory region should extend downwards when needed. This
-//   allows early versions of BsdThreads to only mmap the first few pages
-//   when creating a new thread. Bsd kernel will automatically expand thread
-//   stack as needed (on page faults).
-//
-//   However, because the memory region of a MAP_GROWSDOWN stack can grow on
-//   demand, if a page fault happens outside an already mapped MAP_GROWSDOWN
-//   region, it's hard to tell if the fault is due to a legitimate stack
-//   access or because of reading/writing non-exist memory (e.g. buffer
-//   overrun). As a rule, if the fault happens below current stack pointer,
-//   Bsd kernel does not expand stack, instead a SIGSEGV is sent to the
-//   application (see Bsd kernel fault.c).
-//
-//   This Bsd feature can cause SIGSEGV when VM bangs thread stack for
-//   stack overflow detection.
-//
-//   Newer version of BsdThreads (since glibc-2.2, or, RH-7.x) and NPTL do
-//   not use this flag. However, the stack of initial thread is not created
-//   by pthread, it is still MAP_GROWSDOWN. Also it's possible (though
-//   unlikely) that user code can create a thread with MAP_GROWSDOWN stack
-//   and then attach the thread to JVM.
-//
-// To get around the problem and allow stack banging on Bsd, we need to
-// manually expand thread stack after receiving the SIGSEGV.
-//
-// There are two ways to expand thread stack to address "bottom", we used
-// both of them in JVM before 1.5:
-//   1. adjust stack pointer first so that it is below "bottom", and then
-//      touch "bottom"
-//   2. mmap() the page in question
-//
-// Now alternate signal stack is gone, it's harder to use 2. For instance,
-// if current sp is already near the lower end of page 101, and we need to
-// call mmap() to map page 100, it is possible that part of the mmap() frame
-// will be placed in page 100. When page 100 is mapped, it is zero-filled.
-// That will destroy the mmap() frame and cause VM to crash.
-//
-// The following code works by adjusting sp first, then accessing the "bottom"
-// page to force a page fault. Bsd kernel will then automatically expand the
-// stack mapping.
-//
-// _expand_stack_to() assumes its frame size is less than page size, which
-// should always be true if the function is not inlined.
-
-#if __GNUC__ < 3    // gcc 2.x does not support noinline attribute
-#define NOINLINE
-#else
-#define NOINLINE __attribute__ ((noinline))
-#endif
-
-static void _expand_stack_to(address bottom) NOINLINE;
-
-static void _expand_stack_to(address bottom) {
-  address sp;
-  size_t size;
-  volatile char *p;
-
-  // Adjust bottom to point to the largest address within the same page, it
-  // gives us a one-page buffer if alloca() allocates slightly more memory.
-  bottom = (address)align_size_down((uintptr_t)bottom, os::Bsd::page_size());
-  bottom += os::Bsd::page_size() - 1;
-
-  // sp might be slightly above current stack pointer; if that's the case, we
-  // will alloca() a little more space than necessary, which is OK. Don't use
-  // os::current_stack_pointer(), as its result can be slightly below current
-  // stack pointer, causing us to not alloca enough to reach "bottom".
-  sp = (address)&sp;
-
-  if (sp > bottom) {
-    size = sp - bottom;
-    p = (volatile char *)alloca(size);
-    assert(p != NULL && p <= (volatile char *)bottom, "alloca problem?");
-    p[0] = '\0';
-  }
-}
-
-bool os::Bsd::manually_expand_stack(JavaThread * t, address addr) {
-  assert(t!=NULL, "just checking");
-  assert(t->osthread()->expanding_stack(), "expand should be set");
-  assert(t->stack_base() != NULL, "stack_base was not initialized");
-
-  if (addr <  t->stack_base() && addr >= t->stack_yellow_zone_base()) {
-    sigset_t mask_all, old_sigset;
-    sigfillset(&mask_all);
-    pthread_sigmask(SIG_SETMASK, &mask_all, &old_sigset);
-    _expand_stack_to(addr);
-    pthread_sigmask(SIG_SETMASK, &old_sigset, NULL);
-    return true;
-  }
-  return false;
-}
-#endif
 
 //////////////////////////////////////////////////////////////////////////////
 // create new thread
@@ -917,43 +644,7 @@
 
 // check if it's safe to start a new thread
 static bool _thread_safety_check(Thread* thread) {
-#ifdef _ALLBSD_SOURCE
-    return true;
-#else
-  if (os::Bsd::is_BsdThreads() && !os::Bsd::is_floating_stack()) {
-    // Fixed stack BsdThreads (SuSE Bsd/x86, and some versions of Redhat)
-    //   Heap is mmap'ed at lower end of memory space. Thread stacks are
-    //   allocated (MAP_FIXED) from high address space. Every thread stack
-    //   occupies a fixed size slot (usually 2Mbytes, but user can change
-    //   it to other values if they rebuild BsdThreads).
-    //
-    // Problem with MAP_FIXED is that mmap() can still succeed even part of
-    // the memory region has already been mmap'ed. That means if we have too
-    // many threads and/or very large heap, eventually thread stack will
-    // collide with heap.
-    //
-    // Here we try to prevent heap/stack collision by comparing current
-    // stack bottom with the highest address that has been mmap'ed by JVM
-    // plus a safety margin for memory maps created by native code.
-    //
-    // This feature can be disabled by setting ThreadSafetyMargin to 0
-    //
-    if (ThreadSafetyMargin > 0) {
-      address stack_bottom = os::current_stack_base() - os::current_stack_size();
-
-      // not safe if our stack extends below the safety margin
-      return stack_bottom - ThreadSafetyMargin >= highest_vm_reserved_address();
-    } else {
-      return true;
-    }
-  } else {
-    // Floating stack BsdThreads or NPTL:
-    //   Unlike fixed stack BsdThreads, thread stacks are not MAP_FIXED. When
-    //   there's not enough space left, pthread_create() will fail. If we come
-    //   here, that means enough space has been reserved for stack.
-    return true;
-  }
-#endif
+  return true;
 }
 
 #ifdef __APPLE__
@@ -991,7 +682,6 @@
     return NULL;
   }
 
-#ifdef _ALLBSD_SOURCE
 #ifdef __APPLE__
   // thread_id is mach thread on macos
   osthread->set_thread_id(::mach_thread_self());
@@ -999,17 +689,6 @@
   // thread_id is pthread_id on BSD
   osthread->set_thread_id(::pthread_self());
 #endif
-#else
-  // thread_id is kernel thread id (similar to Solaris LWP id)
-  osthread->set_thread_id(os::Bsd::gettid());
-
-  if (UseNUMA) {
-    int lgrp_id = os::numa_get_group_id();
-    if (lgrp_id != -1) {
-      thread->set_lgrp_id(lgrp_id);
-    }
-  }
-#endif
   // initialize signal mask for this thread
   os::Bsd::hotspot_sigmask(thread);
 
@@ -1099,23 +778,9 @@
     // let pthread_create() pick the default value.
   }
 
-#ifndef _ALLBSD_SOURCE
-  // glibc guard page
-  pthread_attr_setguardsize(&attr, os::Bsd::default_guard_size(thr_type));
-#endif
-
   ThreadState state;
 
   {
-
-#ifndef _ALLBSD_SOURCE
-    // Serialize thread creation if we are running with fixed stack BsdThreads
-    bool lock = os::Bsd::is_BsdThreads() && !os::Bsd::is_floating_stack();
-    if (lock) {
-      os::Bsd::createThread_lock()->lock_without_safepoint_check();
-    }
-#endif
-
     pthread_t tid;
     int ret = pthread_create(&tid, &attr, (void* (*)(void*)) java_start, thread);
 
@@ -1128,9 +793,6 @@
       // Need to clean up stuff we've allocated so far
       thread->set_osthread(NULL);
       delete osthread;
-#ifndef _ALLBSD_SOURCE
-      if (lock) os::Bsd::createThread_lock()->unlock();
-#endif
       return false;
     }
 
@@ -1146,11 +808,6 @@
       }
     }
 
-#ifndef _ALLBSD_SOURCE
-    if (lock) {
-      os::Bsd::createThread_lock()->unlock();
-    }
-#endif
   }
 
   // Aborted due to thread limit being reached
@@ -1188,15 +845,11 @@
   }
 
   // Store pthread info into the OSThread
-#ifdef _ALLBSD_SOURCE
 #ifdef __APPLE__
   osthread->set_thread_id(::mach_thread_self());
 #else
   osthread->set_thread_id(::pthread_self());
 #endif
-#else
-  osthread->set_thread_id(os::Bsd::gettid());
-#endif
   osthread->set_pthread_id(::pthread_self());
 
   // initialize floating point control register
@@ -1207,35 +860,6 @@
 
   thread->set_osthread(osthread);
 
-#ifndef _ALLBSD_SOURCE
-  if (UseNUMA) {
-    int lgrp_id = os::numa_get_group_id();
-    if (lgrp_id != -1) {
-      thread->set_lgrp_id(lgrp_id);
-    }
-  }
-
-  if (os::Bsd::is_initial_thread()) {
-    // If current thread is initial thread, its stack is mapped on demand,
-    // see notes about MAP_GROWSDOWN. Here we try to force kernel to map
-    // the entire stack region to avoid SEGV in stack banging.
-    // It is also useful to get around the heap-stack-gap problem on SuSE
-    // kernel (see 4821821 for details). We first expand stack to the top
-    // of yellow zone, then enable stack yellow zone (order is significant,
-    // enabling yellow zone first will crash JVM on SuSE Bsd), so there
-    // is no gap between the last two virtual memory regions.
-
-    JavaThread *jt = (JavaThread *)thread;
-    address addr = jt->stack_yellow_zone_base();
-    assert(addr != NULL, "initialization problem?");
-    assert(jt->stack_available(addr) > 0, "stack guard should not be enabled");
-
-    osthread->set_expanding_stack();
-    os::Bsd::manually_expand_stack(jt, addr);
-    osthread->clear_expanding_stack();
-  }
-#endif
-
   // initialize signal mask for this thread
   // and save the caller's signal mask
   os::Bsd::hotspot_sigmask(thread);
@@ -1290,247 +914,6 @@
   return ThreadLocalStorage::thread();
 }
 
-//////////////////////////////////////////////////////////////////////////////
-// initial thread
-
-#ifndef _ALLBSD_SOURCE
-// Check if current thread is the initial thread, similar to Solaris thr_main.
-bool os::Bsd::is_initial_thread(void) {
-  char dummy;
-  // If called before init complete, thread stack bottom will be null.
-  // Can be called if fatal error occurs before initialization.
-  if (initial_thread_stack_bottom() == NULL) return false;
-  assert(initial_thread_stack_bottom() != NULL &&
-         initial_thread_stack_size()   != 0,
-         "os::init did not locate initial thread's stack region");
-  if ((address)&dummy >= initial_thread_stack_bottom() &&
-      (address)&dummy < initial_thread_stack_bottom() + initial_thread_stack_size())
-       return true;
-  else return false;
-}
-
-// Find the virtual memory area that contains addr
-static bool find_vma(address addr, address* vma_low, address* vma_high) {
-  FILE *fp = fopen("/proc/self/maps", "r");
-  if (fp) {
-    address low, high;
-    while (!feof(fp)) {
-      if (fscanf(fp, "%p-%p", &low, &high) == 2) {
-        if (low <= addr && addr < high) {
-           if (vma_low)  *vma_low  = low;
-           if (vma_high) *vma_high = high;
-           fclose (fp);
-           return true;
-        }
-      }
-      for (;;) {
-        int ch = fgetc(fp);
-        if (ch == EOF || ch == (int)'\n') break;
-      }
-    }
-    fclose(fp);
-  }
-  return false;
-}
-
-// Locate initial thread stack. This special handling of initial thread stack
-// is needed because pthread_getattr_np() on most (all?) Bsd distros returns
-// bogus value for initial thread.
-void os::Bsd::capture_initial_stack(size_t max_size) {
-  // stack size is the easy part, get it from RLIMIT_STACK
-  size_t stack_size;
-  struct rlimit rlim;
-  getrlimit(RLIMIT_STACK, &rlim);
-  stack_size = rlim.rlim_cur;
-
-  // 6308388: a bug in ld.so will relocate its own .data section to the
-  //   lower end of primordial stack; reduce ulimit -s value a little bit
-  //   so we won't install guard page on ld.so's data section.
-  stack_size -= 2 * page_size();
-
-  // 4441425: avoid crash with "unlimited" stack size on SuSE 7.1 or Redhat
-  //   7.1, in both cases we will get 2G in return value.
-  // 4466587: glibc 2.2.x compiled w/o "--enable-kernel=2.4.0" (RH 7.0,
-  //   SuSE 7.2, Debian) can not handle alternate signal stack correctly
-  //   for initial thread if its stack size exceeds 6M. Cap it at 2M,
-  //   in case other parts in glibc still assumes 2M max stack size.
-  // FIXME: alt signal stack is gone, maybe we can relax this constraint?
-#ifndef IA64
-  if (stack_size > 2 * K * K) stack_size = 2 * K * K;
-#else
-  // Problem still exists RH7.2 (IA64 anyway) but 2MB is a little small
-  if (stack_size > 4 * K * K) stack_size = 4 * K * K;
-#endif
-
-  // Try to figure out where the stack base (top) is. This is harder.
-  //
-  // When an application is started, glibc saves the initial stack pointer in
-  // a global variable "__libc_stack_end", which is then used by system
-  // libraries. __libc_stack_end should be pretty close to stack top. The
-  // variable is available since the very early days. However, because it is
-  // a private interface, it could disappear in the future.
-  //
-  // Bsd kernel saves start_stack information in /proc/<pid>/stat. Similar
-  // to __libc_stack_end, it is very close to stack top, but isn't the real
-  // stack top. Note that /proc may not exist if VM is running as a chroot
-  // program, so reading /proc/<pid>/stat could fail. Also the contents of
-  // /proc/<pid>/stat could change in the future (though unlikely).
-  //
-  // We try __libc_stack_end first. If that doesn't work, look for
-  // /proc/<pid>/stat. If neither of them works, we use current stack pointer
-  // as a hint, which should work well in most cases.
-
-  uintptr_t stack_start;
-
-  // try __libc_stack_end first
-  uintptr_t *p = (uintptr_t *)dlsym(RTLD_DEFAULT, "__libc_stack_end");
-  if (p && *p) {
-    stack_start = *p;
-  } else {
-    // see if we can get the start_stack field from /proc/self/stat
-    FILE *fp;
-    int pid;
-    char state;
-    int ppid;
-    int pgrp;
-    int session;
-    int nr;
-    int tpgrp;
-    unsigned long flags;
-    unsigned long minflt;
-    unsigned long cminflt;
-    unsigned long majflt;
-    unsigned long cmajflt;
-    unsigned long utime;
-    unsigned long stime;
-    long cutime;
-    long cstime;
-    long prio;
-    long nice;
-    long junk;
-    long it_real;
-    uintptr_t start;
-    uintptr_t vsize;
-    intptr_t rss;
-    uintptr_t rsslim;
-    uintptr_t scodes;
-    uintptr_t ecode;
-    int i;
-
-    // Figure what the primordial thread stack base is. Code is inspired
-    // by email from Hans Boehm. /proc/self/stat begins with current pid,
-    // followed by command name surrounded by parentheses, state, etc.
-    char stat[2048];
-    int statlen;
-
-    fp = fopen("/proc/self/stat", "r");
-    if (fp) {
-      statlen = fread(stat, 1, 2047, fp);
-      stat[statlen] = '\0';
-      fclose(fp);
-
-      // Skip pid and the command string. Note that we could be dealing with
-      // weird command names, e.g. user could decide to rename java launcher
-      // to "java 1.4.2 :)", then the stat file would look like
-      //                1234 (java 1.4.2 :)) R ... ...
-      // We don't really need to know the command string, just find the last
-      // occurrence of ")" and then start parsing from there. See bug 4726580.
-      char * s = strrchr(stat, ')');
-
-      i = 0;
-      if (s) {
-        // Skip blank chars
-        do s++; while (isspace(*s));
-
-#define _UFM UINTX_FORMAT
-#define _DFM INTX_FORMAT
-
-        /*                                     1   1   1   1   1   1   1   1   1   1   2   2    2    2    2    2    2    2    2 */
-        /*              3  4  5  6  7  8   9   0   1   2   3   4   5   6   7   8   9   0   1    2    3    4    5    6    7    8 */
-        i = sscanf(s, "%c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld " _UFM _UFM _DFM _UFM _UFM _UFM _UFM,
-             &state,          /* 3  %c  */
-             &ppid,           /* 4  %d  */
-             &pgrp,           /* 5  %d  */
-             &session,        /* 6  %d  */
-             &nr,             /* 7  %d  */
-             &tpgrp,          /* 8  %d  */
-             &flags,          /* 9  %lu  */
-             &minflt,         /* 10 %lu  */
-             &cminflt,        /* 11 %lu  */
-             &majflt,         /* 12 %lu  */
-             &cmajflt,        /* 13 %lu  */
-             &utime,          /* 14 %lu  */
-             &stime,          /* 15 %lu  */
-             &cutime,         /* 16 %ld  */
-             &cstime,         /* 17 %ld  */
-             &prio,           /* 18 %ld  */
-             &nice,           /* 19 %ld  */
-             &junk,           /* 20 %ld  */
-             &it_real,        /* 21 %ld  */
-             &start,          /* 22 UINTX_FORMAT */
-             &vsize,          /* 23 UINTX_FORMAT */
-             &rss,            /* 24 INTX_FORMAT  */
-             &rsslim,         /* 25 UINTX_FORMAT */
-             &scodes,         /* 26 UINTX_FORMAT */
-             &ecode,          /* 27 UINTX_FORMAT */
-             &stack_start);   /* 28 UINTX_FORMAT */
-      }
-
-#undef _UFM
-#undef _DFM
-
-      if (i != 28 - 2) {
-         assert(false, "Bad conversion from /proc/self/stat");
-         // product mode - assume we are the initial thread, good luck in the
-         // embedded case.
-         warning("Can't detect initial thread stack location - bad conversion");
-         stack_start = (uintptr_t) &rlim;
-      }
-    } else {
-      // For some reason we can't open /proc/self/stat (for example, running on
-      // FreeBSD with a Bsd emulator, or inside chroot), this should work for
-      // most cases, so don't abort:
-      warning("Can't detect initial thread stack location - no /proc/self/stat");
-      stack_start = (uintptr_t) &rlim;
-    }
-  }
-
-  // Now we have a pointer (stack_start) very close to the stack top, the
-  // next thing to do is to figure out the exact location of stack top. We
-  // can find out the virtual memory area that contains stack_start by
-  // reading /proc/self/maps, it should be the last vma in /proc/self/maps,
-  // and its upper limit is the real stack top. (again, this would fail if
-  // running inside chroot, because /proc may not exist.)
-
-  uintptr_t stack_top;
-  address low, high;
-  if (find_vma((address)stack_start, &low, &high)) {
-    // success, "high" is the true stack top. (ignore "low", because initial
-    // thread stack grows on demand, its real bottom is high - RLIMIT_STACK.)
-    stack_top = (uintptr_t)high;
-  } else {
-    // failed, likely because /proc/self/maps does not exist
-    warning("Can't detect initial thread stack location - find_vma failed");
-    // best effort: stack_start is normally within a few pages below the real
-    // stack top, use it as stack top, and reduce stack size so we won't put
-    // guard page outside stack.
-    stack_top = stack_start;
-    stack_size -= 16 * page_size();
-  }
-
-  // stack_top could be partially down the page so align it
-  stack_top = align_size_up(stack_top, page_size());
-
-  if (max_size && stack_size > max_size) {
-     _initial_thread_stack_size = max_size;
-  } else {
-     _initial_thread_stack_size = stack_size;
-  }
-
-  _initial_thread_stack_size = align_size_down(_initial_thread_stack_size, page_size());
-  _initial_thread_stack_bottom = (address)stack_top - _initial_thread_stack_size;
-}
-#endif
 
 ////////////////////////////////////////////////////////////////////////////////
 // time support
@@ -1576,7 +959,7 @@
 void os::Bsd::clock_init() {
         // XXXDARWIN: Investigate replacement monotonic clock
 }
-#elif defined(_ALLBSD_SOURCE)
+#else
 void os::Bsd::clock_init() {
   struct timespec res;
   struct timespec tp;
@@ -1586,86 +969,8 @@
     _clock_gettime = ::clock_gettime;
   }
 }
-#else
-void os::Bsd::clock_init() {
-  // we do dlopen's in this particular order due to bug in bsd
-  // dynamical loader (see 6348968) leading to crash on exit
-  void* handle = dlopen("librt.so.1", RTLD_LAZY);
-  if (handle == NULL) {
-    handle = dlopen("librt.so", RTLD_LAZY);
-  }
-
-  if (handle) {
-    int (*clock_getres_func)(clockid_t, struct timespec*) =
-           (int(*)(clockid_t, struct timespec*))dlsym(handle, "clock_getres");
-    int (*clock_gettime_func)(clockid_t, struct timespec*) =
-           (int(*)(clockid_t, struct timespec*))dlsym(handle, "clock_gettime");
-    if (clock_getres_func && clock_gettime_func) {
-      // See if monotonic clock is supported by the kernel. Note that some
-      // early implementations simply return kernel jiffies (updated every
-      // 1/100 or 1/1000 second). It would be bad to use such a low res clock
-      // for nano time (though the monotonic property is still nice to have).
-      // It's fixed in newer kernels, however clock_getres() still returns
-      // 1/HZ. We check if clock_getres() works, but will ignore its reported
-      // resolution for now. Hopefully as people move to new kernels, this
-      // won't be a problem.
-      struct timespec res;
-      struct timespec tp;
-      if (clock_getres_func (CLOCK_MONOTONIC, &res) == 0 &&
-          clock_gettime_func(CLOCK_MONOTONIC, &tp)  == 0) {
-        // yes, monotonic clock is supported
-        _clock_gettime = clock_gettime_func;
-      } else {
-        // close librt if there is no monotonic clock
-        dlclose(handle);
-      }
-    }
-  }
-}
 #endif
 
-#ifndef _ALLBSD_SOURCE
-#ifndef SYS_clock_getres
-
-#if defined(IA32) || defined(AMD64)
-#define SYS_clock_getres IA32_ONLY(266)  AMD64_ONLY(229)
-#define sys_clock_getres(x,y)  ::syscall(SYS_clock_getres, x, y)
-#else
-#warning "SYS_clock_getres not defined for this platform, disabling fast_thread_cpu_time"
-#define sys_clock_getres(x,y)  -1
-#endif
-
-#else
-#define sys_clock_getres(x,y)  ::syscall(SYS_clock_getres, x, y)
-#endif
-
-void os::Bsd::fast_thread_clock_init() {
-  if (!UseBsdPosixThreadCPUClocks) {
-    return;
-  }
-  clockid_t clockid;
-  struct timespec tp;
-  int (*pthread_getcpuclockid_func)(pthread_t, clockid_t *) =
-      (int(*)(pthread_t, clockid_t *)) dlsym(RTLD_DEFAULT, "pthread_getcpuclockid");
-
-  // Switch to using fast clocks for thread cpu time if
-  // the sys_clock_getres() returns 0 error code.
-  // Note, that some kernels may support the current thread
-  // clock (CLOCK_THREAD_CPUTIME_ID) but not the clocks
-  // returned by the pthread_getcpuclockid().
-  // If the fast Posix clocks are supported then the sys_clock_getres()
-  // must return at least tp.tv_sec == 0 which means a resolution
-  // better than 1 sec. This is extra check for reliability.
-
-  if(pthread_getcpuclockid_func &&
-     pthread_getcpuclockid_func(_main_thread, &clockid) == 0 &&
-     sys_clock_getres(clockid, &tp) == 0 && tp.tv_sec == 0) {
-
-    _supports_fast_thread_cpu_time = true;
-    _pthread_getcpuclockid = pthread_getcpuclockid_func;
-  }
-}
-#endif
 
 jlong os::javaTimeNanos() {
   if (Bsd::supports_monotonic_clock()) {
@@ -1877,19 +1182,20 @@
   return os::stat(filename, &statbuf) == 0;
 }
 
-void os::dll_build_name(char* buffer, size_t buflen,
+bool os::dll_build_name(char* buffer, size_t buflen,
                         const char* pname, const char* fname) {
+  bool retval = false;
   // Copied from libhpi
   const size_t pnamelen = pname ? strlen(pname) : 0;
 
-  // Quietly truncate on buffer overflow.  Should be an error.
+  // Return error on buffer overflow.
   if (pnamelen + strlen(fname) + strlen(JNI_LIB_PREFIX) + strlen(JNI_LIB_SUFFIX) + 2 > buflen) {
-      *buffer = '\0';
-      return;
+    return retval;
   }
 
   if (pnamelen == 0) {
     snprintf(buffer, buflen, JNI_LIB_PREFIX "%s" JNI_LIB_SUFFIX, fname);
+    retval = true;
   } else if (strchr(pname, *os::path_separator()) != NULL) {
     int n;
     char** pelements = split_path(pname, &n);
@@ -1901,6 +1207,7 @@
       snprintf(buffer, buflen, "%s/" JNI_LIB_PREFIX "%s" JNI_LIB_SUFFIX,
           pelements[i], fname);
       if (file_exists(buffer)) {
+        retval = true;
         break;
       }
     }
@@ -1915,14 +1222,16 @@
     }
   } else {
     snprintf(buffer, buflen, "%s/" JNI_LIB_PREFIX "%s" JNI_LIB_SUFFIX, pname, fname);
+    retval = true;
   }
+  return retval;
 }
 
 const char* os::get_current_directory(char *buf, int buflen) {
   return getcwd(buf, buflen);
 }
 
-// check if addr is inside libjvm[_g].so
+// check if addr is inside libjvm.so
 bool os::address_is_in_vm(address addr) {
   static address libjvm_base_addr;
   Dl_info dlinfo;
@@ -1978,7 +1287,6 @@
   return false;
 }
 
-#ifdef _ALLBSD_SOURCE
 // ported from solaris version
 bool os::dll_address_to_library_name(address addr, char* buf,
                                      int buflen, int* offset) {
@@ -1994,86 +1302,10 @@
      return false;
   }
 }
-#else
-struct _address_to_library_name {
-  address addr;          // input : memory address
-  size_t  buflen;        //         size of fname
-  char*   fname;         // output: library name
-  address base;          //         library base addr
-};
-
-static int address_to_library_name_callback(struct dl_phdr_info *info,
-                                            size_t size, void *data) {
-  int i;
-  bool found = false;
-  address libbase = NULL;
-  struct _address_to_library_name * d = (struct _address_to_library_name *)data;
-
-  // iterate through all loadable segments
-  for (i = 0; i < info->dlpi_phnum; i++) {
-    address segbase = (address)(info->dlpi_addr + info->dlpi_phdr[i].p_vaddr);
-    if (info->dlpi_phdr[i].p_type == PT_LOAD) {
-      // base address of a library is the lowest address of its loaded
-      // segments.
-      if (libbase == NULL || libbase > segbase) {
-        libbase = segbase;
-      }
-      // see if 'addr' is within current segment
-      if (segbase <= d->addr &&
-          d->addr < segbase + info->dlpi_phdr[i].p_memsz) {
-        found = true;
-      }
-    }
-  }
-
-  // dlpi_name is NULL or empty if the ELF file is executable, return 0
-  // so dll_address_to_library_name() can fall through to use dladdr() which
-  // can figure out executable name from argv[0].
-  if (found && info->dlpi_name && info->dlpi_name[0]) {
-    d->base = libbase;
-    if (d->fname) {
-      jio_snprintf(d->fname, d->buflen, "%s", info->dlpi_name);
-    }
-    return 1;
-  }
-  return 0;
-}
-
-bool os::dll_address_to_library_name(address addr, char* buf,
-                                     int buflen, int* offset) {
-  Dl_info dlinfo;
-  struct _address_to_library_name data;
-
-  // There is a bug in old glibc dladdr() implementation that it could resolve
-  // to wrong library name if the .so file has a base address != NULL. Here
-  // we iterate through the program headers of all loaded libraries to find
-  // out which library 'addr' really belongs to. This workaround can be
-  // removed once the minimum requirement for glibc is moved to 2.3.x.
-  data.addr = addr;
-  data.fname = buf;
-  data.buflen = buflen;
-  data.base = NULL;
-  int rslt = dl_iterate_phdr(address_to_library_name_callback, (void *)&data);
-
-  if (rslt) {
-     // buf already contains library name
-     if (offset) *offset = addr - data.base;
-     return true;
-  } else if (dladdr((void*)addr, &dlinfo)){
-     if (buf) jio_snprintf(buf, buflen, "%s", dlinfo.dli_fname);
-     if (offset) *offset = addr - (address)dlinfo.dli_fbase;
-     return true;
-  } else {
-     if (buf) buf[0] = '\0';
-     if (offset) *offset = -1;
-     return false;
-  }
-}
-#endif
-
-  // Loads .dll/.so and
-  // in case of error it checks if .dll/.so was built for the
-  // same architecture as Hotspot is running on
+
+// Loads .dll/.so and
+// in case of error it checks if .dll/.so was built for the
+// same architecture as Hotspot is running on
 
 #ifdef __APPLE__
 void * os::dll_load(const char *filename, char *ebuf, int ebuflen) {
@@ -2292,7 +1524,6 @@
 
 void os::print_dll_info(outputStream *st) {
    st->print_cr("Dynamic libraries:");
-#ifdef _ALLBSD_SOURCE
 #ifdef RTLD_DI_LINKMAP
     Dl_info dli;
     void *handle;
@@ -2336,16 +1567,6 @@
 #else
    st->print_cr("Error: Cannot print dynamic libraries.");
 #endif
-#else
-   char fname[32];
-   pid_t pid = os::Bsd::gettid();
-
-   jio_snprintf(fname, sizeof(fname), "/proc/%d/maps", pid);
-
-   if (!_print_ascii_file(fname, st)) {
-     st->print("Can not get library information for pid = %d\n", pid);
-   }
-#endif
 }
 
 void os::print_os_info_brief(outputStream* st) {
@@ -2374,22 +1595,10 @@
   st->print("Memory:");
   st->print(" %dk page", os::vm_page_size()>>10);
 
-#ifndef _ALLBSD_SOURCE
-  // values in struct sysinfo are "unsigned long"
-  struct sysinfo si;
-  sysinfo(&si);
-#endif
-
   st->print(", physical " UINT64_FORMAT "k",
             os::physical_memory() >> 10);
   st->print("(" UINT64_FORMAT "k free)",
             os::available_memory() >> 10);
-#ifndef _ALLBSD_SOURCE
-  st->print(", swap " UINT64_FORMAT "k",
-            ((jlong)si.totalswap * si.mem_unit) >> 10);
-  st->print("(" UINT64_FORMAT "k free)",
-            ((jlong)si.freeswap * si.mem_unit) >> 10);
-#endif
   st->cr();
 
   // meminfo
@@ -2483,7 +1692,7 @@
 
 static char saved_jvm_path[MAXPATHLEN] = {0};
 
-// Find the full path to the current module, libjvm or libjvm_g
+// Find the full path to the current module, libjvm
 void os::jvm_path(char *buf, jint buflen) {
   // Error checking.
   if (buflen < MAXPATHLEN) {
@@ -2526,10 +1735,9 @@
         char* jrelib_p;
         int len;
 
-        // Check the current module name "libjvm" or "libjvm_g".
+        // Check the current module name "libjvm"
         p = strrchr(buf, '/');
         assert(strstr(p, "/libjvm") == p, "invalid library name");
-        p = strstr(p, "_g") ? "_g" : "";
 
         rp = realpath(java_home_var, buf);
         if (rp == NULL)
@@ -2558,11 +1766,9 @@
         // to complete the path to JVM being overridden.  Otherwise fallback
         // to the path to the current library.
         if (0 == access(buf, F_OK)) {
-          // Use current module name "libjvm[_g]" instead of
-          // "libjvm"debug_only("_g")"" since for fastdebug version
-          // we should have "libjvm" but debug_only("_g") adds "_g"!
+          // Use current module name "libjvm"
           len = strlen(buf);
-          snprintf(buf + len, buflen-len, "/libjvm%s%s", p, JNI_LIB_SUFFIX);
+          snprintf(buf + len, buflen-len, "/libjvm%s", JNI_LIB_SUFFIX);
         } else {
           // Fall back to path of current library
           rp = realpath(dli_fname, buf);
@@ -2786,42 +1992,13 @@
 #endif
 }
 
-#ifndef _ALLBSD_SOURCE
-// Define MAP_HUGETLB here so we can build HotSpot on old systems.
-#ifndef MAP_HUGETLB
-#define MAP_HUGETLB 0x40000
-#endif
-
-// Define MADV_HUGEPAGE here so we can build HotSpot on old systems.
-#ifndef MADV_HUGEPAGE
-#define MADV_HUGEPAGE 14
-#endif
-#endif
 
 bool os::pd_commit_memory(char* addr, size_t size, size_t alignment_hint,
                        bool exec) {
-#ifndef _ALLBSD_SOURCE
-  if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
-    int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
-    uintptr_t res =
-      (uintptr_t) ::mmap(addr, size, prot,
-                         MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS|MAP_HUGETLB,
-                         -1, 0);
-    return res != (uintptr_t) MAP_FAILED;
-  }
-#endif
-
   return commit_memory(addr, size, exec);
 }
 
 void os::pd_realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
-#ifndef _ALLBSD_SOURCE
-  if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
-    // We don't check the return value: madvise(MADV_HUGEPAGE) may not
-    // be supported or the memory may already be backed by huge pages.
-    ::madvise(addr, bytes, MADV_HUGEPAGE);
-  }
-#endif
 }
 
 void os::pd_free_memory(char *addr, size_t bytes, size_t alignment_hint) {
@@ -2860,111 +2037,6 @@
   return end;
 }
 
-#ifndef _ALLBSD_SOURCE
-// Something to do with the numa-aware allocator needs these symbols
-extern "C" JNIEXPORT void numa_warn(int number, char *where, ...) { }
-extern "C" JNIEXPORT void numa_error(char *where) { }
-extern "C" JNIEXPORT int fork1() { return fork(); }
-
-
-// If we are running with libnuma version > 2, then we should
-// be trying to use symbols with versions 1.1
-// If we are running with earlier version, which did not have symbol versions,
-// we should use the base version.
-void* os::Bsd::libnuma_dlsym(void* handle, const char *name) {
-  void *f = dlvsym(handle, name, "libnuma_1.1");
-  if (f == NULL) {
-    f = dlsym(handle, name);
-  }
-  return f;
-}
-
-bool os::Bsd::libnuma_init() {
-  // sched_getcpu() should be in libc.
-  set_sched_getcpu(CAST_TO_FN_PTR(sched_getcpu_func_t,
-                                  dlsym(RTLD_DEFAULT, "sched_getcpu")));
-
-  if (sched_getcpu() != -1) { // Does it work?
-    void *handle = dlopen("libnuma.so.1", RTLD_LAZY);
-    if (handle != NULL) {
-      set_numa_node_to_cpus(CAST_TO_FN_PTR(numa_node_to_cpus_func_t,
-                                           libnuma_dlsym(handle, "numa_node_to_cpus")));
-      set_numa_max_node(CAST_TO_FN_PTR(numa_max_node_func_t,
-                                       libnuma_dlsym(handle, "numa_max_node")));
-      set_numa_available(CAST_TO_FN_PTR(numa_available_func_t,
-                                        libnuma_dlsym(handle, "numa_available")));
-      set_numa_tonode_memory(CAST_TO_FN_PTR(numa_tonode_memory_func_t,
-                                            libnuma_dlsym(handle, "numa_tonode_memory")));
-      set_numa_interleave_memory(CAST_TO_FN_PTR(numa_interleave_memory_func_t,
-                                            libnuma_dlsym(handle, "numa_interleave_memory")));
-
-
-      if (numa_available() != -1) {
-        set_numa_all_nodes((unsigned long*)libnuma_dlsym(handle, "numa_all_nodes"));
-        // Create a cpu -> node mapping
-        _cpu_to_node = new (ResourceObj::C_HEAP) GrowableArray<int>(0, true);
-        rebuild_cpu_to_node_map();
-        return true;
-      }
-    }
-  }
-  return false;
-}
-
-// rebuild_cpu_to_node_map() constructs a table mapping cpud id to node id.
-// The table is later used in get_node_by_cpu().
-void os::Bsd::rebuild_cpu_to_node_map() {
-  const size_t NCPUS = 32768; // Since the buffer size computation is very obscure
-                              // in libnuma (possible values are starting from 16,
-                              // and continuing up with every other power of 2, but less
-                              // than the maximum number of CPUs supported by kernel), and
-                              // is a subject to change (in libnuma version 2 the requirements
-                              // are more reasonable) we'll just hardcode the number they use
-                              // in the library.
-  const size_t BitsPerCLong = sizeof(long) * CHAR_BIT;
-
-  size_t cpu_num = os::active_processor_count();
-  size_t cpu_map_size = NCPUS / BitsPerCLong;
-  size_t cpu_map_valid_size =
-    MIN2((cpu_num + BitsPerCLong - 1) / BitsPerCLong, cpu_map_size);
-
-  cpu_to_node()->clear();
-  cpu_to_node()->at_grow(cpu_num - 1);
-  size_t node_num = numa_get_groups_num();
-
-  unsigned long *cpu_map = NEW_C_HEAP_ARRAY(unsigned long, cpu_map_size);
-  for (size_t i = 0; i < node_num; i++) {
-    if (numa_node_to_cpus(i, cpu_map, cpu_map_size * sizeof(unsigned long)) != -1) {
-      for (size_t j = 0; j < cpu_map_valid_size; j++) {
-        if (cpu_map[j] != 0) {
-          for (size_t k = 0; k < BitsPerCLong; k++) {
-            if (cpu_map[j] & (1UL << k)) {
-              cpu_to_node()->at_put(j * BitsPerCLong + k, i);
-            }
-          }
-        }
-      }
-    }
-  }
-  FREE_C_HEAP_ARRAY(unsigned long, cpu_map);
-}
-
-int os::Bsd::get_node_by_cpu(int cpu_id) {
-  if (cpu_to_node() != NULL && cpu_id >= 0 && cpu_id < cpu_to_node()->length()) {
-    return cpu_to_node()->at(cpu_id);
-  }
-  return -1;
-}
-
-GrowableArray<int>* os::Bsd::_cpu_to_node;
-os::Bsd::sched_getcpu_func_t os::Bsd::_sched_getcpu;
-os::Bsd::numa_node_to_cpus_func_t os::Bsd::_numa_node_to_cpus;
-os::Bsd::numa_max_node_func_t os::Bsd::_numa_max_node;
-os::Bsd::numa_available_func_t os::Bsd::_numa_available;
-os::Bsd::numa_tonode_memory_func_t os::Bsd::_numa_tonode_memory;
-os::Bsd::numa_interleave_memory_func_t os::Bsd::_numa_interleave_memory;
-unsigned long* os::Bsd::_numa_all_nodes;
-#endif
 
 bool os::pd_uncommit_memory(char* addr, size_t size) {
 #ifdef __OpenBSD__
@@ -3084,42 +2156,7 @@
 }
 
 bool os::Bsd::hugetlbfs_sanity_check(bool warn, size_t page_size) {
-  bool result = false;
-#ifndef _ALLBSD_SOURCE
-  void *p = mmap (NULL, page_size, PROT_READ|PROT_WRITE,
-                  MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB,
-                  -1, 0);
-
-  if (p != (void *) -1) {
-    // We don't know if this really is a huge page or not.
-    FILE *fp = fopen("/proc/self/maps", "r");
-    if (fp) {
-      while (!feof(fp)) {
-        char chars[257];
-        long x = 0;
-        if (fgets(chars, sizeof(chars), fp)) {
-          if (sscanf(chars, "%lx-%*x", &x) == 1
-              && x == (long)p) {
-            if (strstr (chars, "hugepage")) {
-              result = true;
-              break;
-            }
-          }
-        }
-      }
-      fclose(fp);
-    }
-    munmap (p, page_size);
-    if (result)
-      return true;
-  }
-
-  if (warn) {
-    warning("HugeTLBFS is not supported by the operating system.");
-  }
-#endif
-
-  return result;
+  return false;
 }
 
 /*
@@ -3164,92 +2201,8 @@
 static size_t _large_page_size = 0;
 
 void os::large_page_init() {
-#ifndef _ALLBSD_SOURCE
-  if (!UseLargePages) {
-    UseHugeTLBFS = false;
-    UseSHM = false;
-    return;
-  }
-
-  if (FLAG_IS_DEFAULT(UseHugeTLBFS) && FLAG_IS_DEFAULT(UseSHM)) {
-    // If UseLargePages is specified on the command line try both methods,
-    // if it's default, then try only HugeTLBFS.
-    if (FLAG_IS_DEFAULT(UseLargePages)) {
-      UseHugeTLBFS = true;
-    } else {
-      UseHugeTLBFS = UseSHM = true;
-    }
-  }
-
-  if (LargePageSizeInBytes) {
-    _large_page_size = LargePageSizeInBytes;
-  } else {
-    // large_page_size on Bsd is used to round up heap size. x86 uses either
-    // 2M or 4M page, depending on whether PAE (Physical Address Extensions)
-    // mode is enabled. AMD64/EM64T uses 2M page in 64bit mode. IA64 can use
-    // page as large as 256M.
-    //
-    // Here we try to figure out page size by parsing /proc/meminfo and looking
-    // for a line with the following format:
-    //    Hugepagesize:     2048 kB
-    //
-    // If we can't determine the value (e.g. /proc is not mounted, or the text
-    // format has been changed), we'll use the largest page size supported by
-    // the processor.
-
-#ifndef ZERO
-    _large_page_size = IA32_ONLY(4 * M) AMD64_ONLY(2 * M) IA64_ONLY(256 * M) SPARC_ONLY(4 * M)
-                       ARM_ONLY(2 * M) PPC_ONLY(4 * M);
-#endif // ZERO
-
-    FILE *fp = fopen("/proc/meminfo", "r");
-    if (fp) {
-      while (!feof(fp)) {
-        int x = 0;
-        char buf[16];
-        if (fscanf(fp, "Hugepagesize: %d", &x) == 1) {
-          if (x && fgets(buf, sizeof(buf), fp) && strcmp(buf, " kB\n") == 0) {
-            _large_page_size = x * K;
-            break;
-          }
-        } else {
-          // skip to next line
-          for (;;) {
-            int ch = fgetc(fp);
-            if (ch == EOF || ch == (int)'\n') break;
-          }
-        }
-      }
-      fclose(fp);
-    }
-  }
-
-  // print a warning if any large page related flag is specified on command line
-  bool warn_on_failure = !FLAG_IS_DEFAULT(UseHugeTLBFS);
-
-  const size_t default_page_size = (size_t)Bsd::page_size();
-  if (_large_page_size > default_page_size) {
-    _page_sizes[0] = _large_page_size;
-    _page_sizes[1] = default_page_size;
-    _page_sizes[2] = 0;
-  }
-  UseHugeTLBFS = UseHugeTLBFS &&
-                 Bsd::hugetlbfs_sanity_check(warn_on_failure, _large_page_size);
-
-  if (UseHugeTLBFS)
-    UseSHM = false;
-
-  UseLargePages = UseHugeTLBFS || UseSHM;
-
-  set_coredump_filter();
-#endif
 }
 
-#ifndef _ALLBSD_SOURCE
-#ifndef SHM_HUGETLB
-#define SHM_HUGETLB 04000
-#endif
-#endif
 
 char* os::reserve_memory_special(size_t bytes, char* req_addr, bool exec) {
   // "exec" is passed in but not used.  Creating the shared image for
@@ -3267,11 +2220,7 @@
 
   // Create a large shared memory region to attach to based on size.
   // Currently, size is the total size of the heap
-#ifndef _ALLBSD_SOURCE
-  int shmid = shmget(key, bytes, SHM_HUGETLB|IPC_CREAT|SHM_R|SHM_W);
-#else
   int shmid = shmget(key, bytes, IPC_CREAT|SHM_R|SHM_W);
-#endif
   if (shmid == -1) {
      // Possible reasons for shmget failure:
      // 1. shmmax is too small for Java heap.
@@ -3558,7 +2507,7 @@
 // this reason, the code should not be used as default (ThreadPriorityPolicy=0).
 // It is only used when ThreadPriorityPolicy=1 and requires root privilege.
 
-#if defined(_ALLBSD_SOURCE) && !defined(__APPLE__)
+#if !defined(__APPLE__)
 int os::java_to_os_priority[CriticalPriority + 1] = {
   19,              // 0 Entry should never be used
 
@@ -3578,7 +2527,7 @@
 
   31               // 11 CriticalPriority
 };
-#elif defined(__APPLE__)
+#else
 /* Using Mach high-level priority assignments */
 int os::java_to_os_priority[CriticalPriority + 1] = {
    0,              // 0 Entry should never be used (MINPRI_USER)
@@ -3599,26 +2548,6 @@
 
   36               // 11 CriticalPriority
 };
-#else
-int os::java_to_os_priority[CriticalPriority + 1] = {
-  19,              // 0 Entry should never be used
-
-   4,              // 1 MinPriority
-   3,              // 2
-   2,              // 3
-
-   1,              // 4
-   0,              // 5 NormPriority
-  -1,              // 6
-
-  -2,              // 7
-  -3,              // 8
-  -4,              // 9 NearMaxPriority
-
-  -5,              // 10 MaxPriority
-
-  -5               // 11 CriticalPriority
-};
 #endif
 
 static int prio_init() {
@@ -4179,22 +3108,6 @@
   }
 }
 
-#ifndef _ALLBSD_SOURCE
-// This is the fastest way to get thread cpu time on Bsd.
-// Returns cpu time (user+sys) for any thread, not only for current.
-// POSIX compliant clocks are implemented in the kernels 2.6.16+.
-// It might work on 2.6.10+ with a special kernel/glibc patch.
-// For reference, please, see IEEE Std 1003.1-2004:
-//   http://www.unix.org/single_unix_specification
-
-jlong os::Bsd::fast_thread_cpu_time(clockid_t clockid) {
-  struct timespec tp;
-  int rc = os::Bsd::clock_gettime(clockid, &tp);
-  assert(rc == 0, "clock_gettime is expected to return 0 code");
-
-  return (tp.tv_sec * NANOSECS_PER_SEC) + tp.tv_nsec;
-}
-#endif
 
 /////
 // glibc on Bsd platform uses non-documented flag
@@ -4458,10 +3371,6 @@
 // this is called _after_ the global arguments have been parsed
 jint os::init_2(void)
 {
-#ifndef _ALLBSD_SOURCE
-  Bsd::fast_thread_clock_init();
-#endif
-
   // Allocate a single page and mark it as readable for safepoint polling
   address polling_page = (address) ::mmap(NULL, Bsd::page_size(), PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
   guarantee( polling_page != MAP_FAILED, "os::init_2: failed to allocate polling page" );
@@ -4518,48 +3427,6 @@
   JavaThread::set_stack_size_at_create(round_to(threadStackSizeInBytes,
         vm_page_size()));
 
-#ifndef _ALLBSD_SOURCE
-  Bsd::capture_initial_stack(JavaThread::stack_size_at_create());
-
-  Bsd::libpthread_init();
-  if (PrintMiscellaneous && (Verbose || WizardMode)) {
-     tty->print_cr("[HotSpot is running with %s, %s(%s)]\n",
-          Bsd::glibc_version(), Bsd::libpthread_version(),
-          Bsd::is_floating_stack() ? "floating stack" : "fixed stack");
-  }
-
-  if (UseNUMA) {
-    if (!Bsd::libnuma_init()) {
-      UseNUMA = false;
-    } else {
-      if ((Bsd::numa_max_node() < 1)) {
-        // There's only one node(they start from 0), disable NUMA.
-        UseNUMA = false;
-      }
-    }
-    // With SHM large pages we cannot uncommit a page, so there's not way
-    // we can make the adaptive lgrp chunk resizing work. If the user specified
-    // both UseNUMA and UseLargePages (or UseSHM) on the command line - warn and
-    // disable adaptive resizing.
-    if (UseNUMA && UseLargePages && UseSHM) {
-      if (!FLAG_IS_DEFAULT(UseNUMA)) {
-        if (FLAG_IS_DEFAULT(UseLargePages) && FLAG_IS_DEFAULT(UseSHM)) {
-          UseLargePages = false;
-        } else {
-          warning("UseNUMA is not fully compatible with SHM large pages, disabling adaptive resizing");
-          UseAdaptiveSizePolicy = false;
-          UseAdaptiveNUMAChunkSizing = false;
-        }
-      } else {
-        UseNUMA = false;
-      }
-    }
-    if (!UseNUMA && ForceNUMA) {
-      UseNUMA = true;
-    }
-  }
-#endif
-
   if (MaxFDLimit) {
     // set the number of file descriptors to max. print out error
     // if getrlimit/setrlimit fails but continue regardless.
@@ -4586,11 +3453,6 @@
     }
   }
 
-#ifndef _ALLBSD_SOURCE
-  // Initialize lock used to serialize thread creation (see os::create_thread)
-  Bsd::set_createThread_lock(new Mutex(Mutex::leaf, "createThread_lock", false));
-#endif
-
   // at-exit methods are called in the reverse order of their registration.
   // atexit functions are called on return from main or as a result of a
   // call to exit(3C). There can be only 32 of these functions registered
@@ -4641,15 +3503,7 @@
 };
 
 int os::active_processor_count() {
-#ifdef _ALLBSD_SOURCE
   return _processor_count;
-#else
-  // Bsd doesn't yet have a (official) notion of processor sets,
-  // so just return the number of online processors.
-  int online_cpus = ::sysconf(_SC_NPROCESSORS_ONLN);
-  assert(online_cpus > 0 && online_cpus <= processor_count(), "sanity check");
-  return online_cpus;
-#endif
 }
 
 void os::set_native_thread_name(const char *name) {
@@ -4703,25 +3557,7 @@
 
 int os::Bsd::safe_cond_timedwait(pthread_cond_t *_cond, pthread_mutex_t *_mutex, const struct timespec *_abstime)
 {
-#ifdef _ALLBSD_SOURCE
   return pthread_cond_timedwait(_cond, _mutex, _abstime);
-#else
-   if (is_NPTL()) {
-      return pthread_cond_timedwait(_cond, _mutex, _abstime);
-   } else {
-#ifndef IA64
-      // 6292965: BsdThreads pthread_cond_timedwait() resets FPU control
-      // word back to default 64bit precision if condvar is signaled. Java
-      // wants 53bit precision.  Save and restore current value.
-      int fpu = get_fpu_control_word();
-#endif // IA64
-      int status = pthread_cond_timedwait(_cond, _mutex, _abstime);
-#ifndef IA64
-      set_fpu_control_word(fpu);
-#endif // IA64
-      return status;
-   }
-#endif
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -5041,20 +3877,6 @@
   return munmap(addr, bytes) == 0;
 }
 
-#ifndef _ALLBSD_SOURCE
-static jlong slow_thread_cpu_time(Thread *thread, bool user_sys_cpu_time);
-
-static clockid_t thread_cpu_clockid(Thread* thread) {
-  pthread_t tid = thread->osthread()->pthread_id();
-  clockid_t clockid;
-
-  // Get thread clockid
-  int rc = os::Bsd::pthread_getcpuclockid(tid, &clockid);
-  assert(rc == 0, "pthread_getcpuclockid is expected to return 0 code");
-  return clockid;
-}
-#endif
-
 // current_thread_cpu_time(bool) and thread_cpu_time(Thread*, bool)
 // are used by JVM M&M and JVMTI to get user+sys or user CPU time
 // of a thread.
@@ -5065,36 +3887,15 @@
 jlong os::current_thread_cpu_time() {
 #ifdef __APPLE__
   return os::thread_cpu_time(Thread::current(), true /* user + sys */);
-#elif !defined(_ALLBSD_SOURCE)
-  if (os::Bsd::supports_fast_thread_cpu_time()) {
-    return os::Bsd::fast_thread_cpu_time(CLOCK_THREAD_CPUTIME_ID);
-  } else {
-    // return user + sys since the cost is the same
-    return slow_thread_cpu_time(Thread::current(), true /* user + sys */);
-  }
 #endif
 }
 
 jlong os::thread_cpu_time(Thread* thread) {
-#ifndef _ALLBSD_SOURCE
-  // consistent with what current_thread_cpu_time() returns
-  if (os::Bsd::supports_fast_thread_cpu_time()) {
-    return os::Bsd::fast_thread_cpu_time(thread_cpu_clockid(thread));
-  } else {
-    return slow_thread_cpu_time(thread, true /* user + sys */);
-  }
-#endif
 }
 
 jlong os::current_thread_cpu_time(bool user_sys_cpu_time) {
 #ifdef __APPLE__
   return os::thread_cpu_time(Thread::current(), user_sys_cpu_time);
-#elif !defined(_ALLBSD_SOURCE)
-  if (user_sys_cpu_time && os::Bsd::supports_fast_thread_cpu_time()) {
-    return os::Bsd::fast_thread_cpu_time(CLOCK_THREAD_CPUTIME_ID);
-  } else {
-    return slow_thread_cpu_time(Thread::current(), user_sys_cpu_time);
-  }
 #endif
 }
 
@@ -5118,106 +3919,9 @@
   } else {
     return ((jlong)tinfo.user_time.seconds * 1000000000) + ((jlong)tinfo.user_time.microseconds * (jlong)1000);
   }
-#elif !defined(_ALLBSD_SOURCE)
-  if (user_sys_cpu_time && os::Bsd::supports_fast_thread_cpu_time()) {
-    return os::Bsd::fast_thread_cpu_time(thread_cpu_clockid(thread));
-  } else {
-    return slow_thread_cpu_time(thread, user_sys_cpu_time);
-  }
 #endif
 }
 
-#ifndef _ALLBSD_SOURCE
-//
-//  -1 on error.
-//
-
-static jlong slow_thread_cpu_time(Thread *thread, bool user_sys_cpu_time) {
-  static bool proc_pid_cpu_avail = true;
-  static bool proc_task_unchecked = true;
-  static const char *proc_stat_path = "/proc/%d/stat";
-  pid_t  tid = thread->osthread()->thread_id();
-  int i;
-  char *s;
-  char stat[2048];
-  int statlen;
-  char proc_name[64];
-  int count;
-  long sys_time, user_time;
-  char string[64];
-  char cdummy;
-  int idummy;
-  long ldummy;
-  FILE *fp;
-
-  // We first try accessing /proc/<pid>/cpu since this is faster to
-  // process.  If this file is not present (bsd kernels 2.5 and above)
-  // then we open /proc/<pid>/stat.
-  if ( proc_pid_cpu_avail ) {
-    sprintf(proc_name, "/proc/%d/cpu", tid);
-    fp =  fopen(proc_name, "r");
-    if ( fp != NULL ) {
-      count = fscanf( fp, "%s %lu %lu\n", string, &user_time, &sys_time);
-      fclose(fp);
-      if ( count != 3 ) return -1;
-
-      if (user_sys_cpu_time) {
-        return ((jlong)sys_time + (jlong)user_time) * (1000000000 / clock_tics_per_sec);
-      } else {
-        return (jlong)user_time * (1000000000 / clock_tics_per_sec);
-      }
-    }
-    else proc_pid_cpu_avail = false;
-  }
-
-  // The /proc/<tid>/stat aggregates per-process usage on
-  // new Bsd kernels 2.6+ where NPTL is supported.
-  // The /proc/self/task/<tid>/stat still has the per-thread usage.
-  // See bug 6328462.
-  // There can be no directory /proc/self/task on kernels 2.4 with NPTL
-  // and possibly in some other cases, so we check its availability.
-  if (proc_task_unchecked && os::Bsd::is_NPTL()) {
-    // This is executed only once
-    proc_task_unchecked = false;
-    fp = fopen("/proc/self/task", "r");
-    if (fp != NULL) {
-      proc_stat_path = "/proc/self/task/%d/stat";
-      fclose(fp);
-    }
-  }
-
-  sprintf(proc_name, proc_stat_path, tid);
-  fp = fopen(proc_name, "r");
-  if ( fp == NULL ) return -1;
-  statlen = fread(stat, 1, 2047, fp);
-  stat[statlen] = '\0';
-  fclose(fp);
-
-  // Skip pid and the command string. Note that we could be dealing with
-  // weird command names, e.g. user could decide to rename java launcher
-  // to "java 1.4.2 :)", then the stat file would look like
-  //                1234 (java 1.4.2 :)) R ... ...
-  // We don't really need to know the command string, just find the last
-  // occurrence of ")" and then start parsing from there. See bug 4726580.
-  s = strrchr(stat, ')');
-  i = 0;
-  if (s == NULL ) return -1;
-
-  // Skip blank chars
-  do s++; while (isspace(*s));
-
-  count = sscanf(s,"%c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu",
-                 &cdummy, &idummy, &idummy, &idummy, &idummy, &idummy,
-                 &ldummy, &ldummy, &ldummy, &ldummy, &ldummy,
-                 &user_time, &sys_time);
-  if ( count != 13 ) return -1;
-  if (user_sys_cpu_time) {
-    return ((jlong)sys_time + (jlong)user_time) * (1000000000 / clock_tics_per_sec);
-  } else {
-    return (jlong)user_time * (1000000000 / clock_tics_per_sec);
-  }
-}
-#endif
 
 void os::current_thread_cpu_time_info(jvmtiTimerInfo *info_ptr) {
   info_ptr->max_value = ALL_64_BITS;       // will not wrap in less than 64 bits
@@ -5236,10 +3940,8 @@
 bool os::is_thread_cpu_time_supported() {
 #ifdef __APPLE__
   return true;
-#elif defined(_ALLBSD_SOURCE)
+#else
   return false;
-#else
-  return true;
 #endif
 }
 
@@ -5392,11 +4094,12 @@
      }
      -- _nParked ;
 
-    // In theory we could move the ST of 0 into _Event past the unlock(),
-    // but then we'd need a MEMBAR after the ST.
     _Event = 0 ;
      status = pthread_mutex_unlock(_mutex);
      assert_status(status == 0, status, "mutex_unlock");
+    // Paranoia to ensure our locked and lock-free paths interact
+    // correctly with each other.
+    OrderAccess::fence();
   }
   guarantee (_Event >= 0, "invariant") ;
 }
@@ -5459,40 +4162,44 @@
   status = pthread_mutex_unlock(_mutex);
   assert_status(status == 0, status, "mutex_unlock");
   assert (_nParked == 0, "invariant") ;
+  // Paranoia to ensure our locked and lock-free paths interact
+  // correctly with each other.
+  OrderAccess::fence();
   return ret;
 }
 
 void os::PlatformEvent::unpark() {
-  int v, AnyWaiters ;
-  for (;;) {
-      v = _Event ;
-      if (v > 0) {
-         // The LD of _Event could have reordered or be satisfied
-         // by a read-aside from this processor's write buffer.
-         // To avoid problems execute a barrier and then
-         // ratify the value.
-         OrderAccess::fence() ;
-         if (_Event == v) return ;
-         continue ;
-      }
-      if (Atomic::cmpxchg (v+1, &_Event, v) == v) break ;
+  // Transitions for _Event:
+  //    0 :=> 1
+  //    1 :=> 1
+  //   -1 :=> either 0 or 1; must signal target thread
+  //          That is, we can safely transition _Event from -1 to either
+  //          0 or 1. Forcing 1 is slightly more efficient for back-to-back
+  //          unpark() calls.
+  // See also: "Semaphores in Plan 9" by Mullender & Cox
+  //
+  // Note: Forcing a transition from "-1" to "1" on an unpark() means
+  // that it will take two back-to-back park() calls for the owning
+  // thread to block. This has the benefit of forcing a spurious return
+  // from the first park() call after an unpark() call which will help
+  // shake out uses of park() and unpark() without condition variables.
+
+  if (Atomic::xchg(1, &_Event) >= 0) return;
+
+  // Wait for the thread associated with the event to vacate
+  int status = pthread_mutex_lock(_mutex);
+  assert_status(status == 0, status, "mutex_lock");
+  int AnyWaiters = _nParked;
+  assert(AnyWaiters == 0 || AnyWaiters == 1, "invariant");
+  if (AnyWaiters != 0 && WorkAroundNPTLTimedWaitHang) {
+    AnyWaiters = 0;
+    pthread_cond_signal(_cond);
   }
-  if (v < 0) {
-     // Wait for the thread associated with the event to vacate
-     int status = pthread_mutex_lock(_mutex);
-     assert_status(status == 0, status, "mutex_lock");
-     AnyWaiters = _nParked ;
-     assert (AnyWaiters == 0 || AnyWaiters == 1, "invariant") ;
-     if (AnyWaiters != 0 && WorkAroundNPTLTimedWaitHang) {
-        AnyWaiters = 0 ;
-        pthread_cond_signal (_cond);
-     }
-     status = pthread_mutex_unlock(_mutex);
-     assert_status(status == 0, status, "mutex_unlock");
-     if (AnyWaiters != 0) {
-        status = pthread_cond_signal(_cond);
-        assert_status(status == 0, status, "cond_signal");
-     }
+  status = pthread_mutex_unlock(_mutex);
+  assert_status(status == 0, status, "mutex_unlock");
+  if (AnyWaiters != 0) {
+    status = pthread_cond_signal(_cond);
+    assert_status(status == 0, status, "cond_signal");
   }
 
   // Note that we signal() _after dropping the lock for "immortal" Events.
@@ -5578,13 +4285,14 @@
 }
 
 void Parker::park(bool isAbsolute, jlong time) {
+  // Ideally we'd do something useful while spinning, such
+  // as calling unpackTime().
+
   // Optional fast-path check:
   // Return immediately if a permit is available.
-  if (_counter > 0) {
-      _counter = 0 ;
-      OrderAccess::fence();
-      return ;
-  }
+  // We depend on Atomic::xchg() having full barrier semantics
+  // since we are doing a lock-free update to _counter.
+  if (Atomic::xchg(0, &_counter) > 0) return;
 
   Thread* thread = Thread::current();
   assert(thread->is_Java_thread(), "Must be JavaThread");
@@ -5625,6 +4333,8 @@
     _counter = 0;
     status = pthread_mutex_unlock(_mutex);
     assert (status == 0, "invariant") ;
+    // Paranoia to ensure our locked and lock-free paths interact
+    // correctly with each other and Java-level accesses.
     OrderAccess::fence();
     return;
   }
@@ -5661,12 +4371,14 @@
   _counter = 0 ;
   status = pthread_mutex_unlock(_mutex) ;
   assert_status(status == 0, status, "invariant") ;
+  // Paranoia to ensure our locked and lock-free paths interact
+  // correctly with each other and Java-level accesses.
+  OrderAccess::fence();
+
   // If externally suspended while waiting, re-suspend
   if (jt->handle_special_suspend_equivalent_condition()) {
     jt->java_suspend_self();
   }
-
-  OrderAccess::fence();
 }
 
 void Parker::unpark() {
--- a/src/os/bsd/vm/os_bsd.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/bsd/vm/os_bsd.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -56,19 +56,6 @@
   static int sigflags[MAXSIGNUM];
 
   static int (*_clock_gettime)(clockid_t, struct timespec *);
-#ifndef _ALLBSD_SOURCE
-  static int (*_pthread_getcpuclockid)(pthread_t, clockid_t *);
-
-  static address   _initial_thread_stack_bottom;
-  static uintptr_t _initial_thread_stack_size;
-
-  static const char *_glibc_version;
-  static const char *_libpthread_version;
-
-  static bool _is_floating_stack;
-  static bool _is_NPTL;
-  static bool _supports_fast_thread_cpu_time;
-#endif
 
   static GrowableArray<int>* _cpu_to_node;
 
@@ -76,28 +63,14 @@
 
   static julong _physical_memory;
   static pthread_t _main_thread;
-#ifndef _ALLBSD_SOURCE
-  static Mutex* _createThread_lock;
-#endif
   static int _page_size;
 
   static julong available_memory();
   static julong physical_memory() { return _physical_memory; }
   static void initialize_system_info();
 
-#ifndef _ALLBSD_SOURCE
-  static void set_glibc_version(const char *s)      { _glibc_version = s; }
-  static void set_libpthread_version(const char *s) { _libpthread_version = s; }
-#endif
-
   static bool supports_variable_stack_size();
 
-#ifndef _ALLBSD_SOURCE
-  static void set_is_NPTL()                   { _is_NPTL = true;  }
-  static void set_is_BsdThreads()           { _is_NPTL = false; }
-  static void set_is_floating_stack()         { _is_floating_stack = true; }
-#endif
-
   static void rebuild_cpu_to_node_map();
   static GrowableArray<int>* cpu_to_node()    { return _cpu_to_node; }
 
@@ -106,25 +79,10 @@
  public:
 
   static void init_thread_fpu_state();
-#ifndef _ALLBSD_SOURCE
-  static int  get_fpu_control_word();
-  static void set_fpu_control_word(int fpu_control);
-#endif
   static pthread_t main_thread(void)                                { return _main_thread; }
 
-#ifndef _ALLBSD_SOURCE
-  // returns kernel thread id (similar to LWP id on Solaris), which can be
-  // used to access /proc
-  static pid_t gettid();
-  static void set_createThread_lock(Mutex* lk)                      { _createThread_lock = lk; }
-  static Mutex* createThread_lock(void)                             { return _createThread_lock; }
-#endif
   static void hotspot_sigmask(Thread* thread);
 
-#ifndef _ALLBSD_SOURCE
-  static address   initial_thread_stack_bottom(void)                { return _initial_thread_stack_bottom; }
-  static uintptr_t initial_thread_stack_size(void)                  { return _initial_thread_stack_size; }
-#endif
   static bool is_initial_thread(void);
 
   static int page_size(void)                                        { return _page_size; }
@@ -161,23 +119,6 @@
   static struct sigaction *get_chained_signal_action(int sig);
   static bool chained_handler(int sig, siginfo_t* siginfo, void* context);
 
-#ifndef _ALLBSD_SOURCE
-  // GNU libc and libpthread version strings
-  static const char *glibc_version()          { return _glibc_version; }
-  static const char *libpthread_version()     { return _libpthread_version; }
-
-  // NPTL or BsdThreads?
-  static bool is_BsdThreads()               { return !_is_NPTL; }
-  static bool is_NPTL()                       { return _is_NPTL;  }
-
-  // NPTL is always floating stack. BsdThreads could be using floating
-  // stack or fixed stack.
-  static bool is_floating_stack()             { return _is_floating_stack; }
-
-  static void libpthread_init();
-  static bool libnuma_init();
-  static void* libnuma_dlsym(void* handle, const char* name);
-#endif
   // Minimum stack size a thread can be created with (allowing
   // the VM to completely create the thread and enter user code)
   static size_t min_stack_allowed;
@@ -186,22 +127,9 @@
   static size_t default_stack_size(os::ThreadType thr_type);
   static size_t default_guard_size(os::ThreadType thr_type);
 
-#ifndef _ALLBSD_SOURCE
-  static void capture_initial_stack(size_t max_size);
-
-  // Stack overflow handling
-  static bool manually_expand_stack(JavaThread * t, address addr);
-  static int max_register_window_saves_before_flushing();
-#endif
-
   // Real-time clock functions
   static void clock_init(void);
 
-#ifndef _ALLBSD_SOURCE
-  // fast POSIX clocks support
-  static void fast_thread_clock_init(void);
-#endif
-
   static inline bool supports_monotonic_clock() {
     return _clock_gettime != NULL;
   }
@@ -210,18 +138,6 @@
     return _clock_gettime ? _clock_gettime(clock_id, tp) : -1;
   }
 
-#ifndef _ALLBSD_SOURCE
-  static int pthread_getcpuclockid(pthread_t tid, clockid_t *clock_id) {
-    return _pthread_getcpuclockid ? _pthread_getcpuclockid(tid, clock_id) : -1;
-  }
-
-  static bool supports_fast_thread_cpu_time() {
-    return _supports_fast_thread_cpu_time;
-  }
-
-  static jlong fast_thread_cpu_time(clockid_t clockid);
-#endif
-
   // Stack repair handling
 
   // none present
--- a/src/os/bsd/vm/os_bsd.inline.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/bsd/vm/os_bsd.inline.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -26,13 +26,13 @@
 #define OS_BSD_VM_OS_BSD_INLINE_HPP
 
 #include "runtime/atomic.hpp"
+#include "runtime/atomic.inline.hpp"
 #include "runtime/os.hpp"
+
 #ifdef TARGET_OS_ARCH_bsd_x86
-# include "atomic_bsd_x86.inline.hpp"
 # include "orderAccess_bsd_x86.inline.hpp"
 #endif
 #ifdef TARGET_OS_ARCH_bsd_zero
-# include "atomic_bsd_zero.inline.hpp"
 # include "orderAccess_bsd_zero.inline.hpp"
 #endif
 
@@ -59,14 +59,6 @@
   return ":";
 }
 
-inline const char* os::jlong_format_specifier() {
-  return "%lld";
-}
-
-inline const char* os::julong_format_specifier() {
-  return "%llu";
-}
-
 // File names are case-sensitive on windows only
 inline int os::file_name_strcmp(const char* s1, const char* s2) {
   return strcmp(s1, s2);
--- a/src/os/bsd/vm/perfMemory_bsd.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/bsd/vm/perfMemory_bsd.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -30,6 +30,7 @@
 #include "os_bsd.inline.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/perfMemory.hpp"
+#include "services/memTracker.hpp"
 #include "utilities/exceptions.hpp"
 
 // put OS-includes here
@@ -753,6 +754,10 @@
   // clear the shared memory region
   (void)::memset((void*) mapAddress, 0, size);
 
+  // it does not go through os api, the operation has to record from here
+  MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC);
+  MemTracker::record_virtual_memory_type((address)mapAddress, mtInternal);
+
   return mapAddress;
 }
 
@@ -912,6 +917,10 @@
               "Could not map PerfMemory");
   }
 
+  // it does not go through os api, the operation has to record from here
+  MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC);
+  MemTracker::record_virtual_memory_type((address)mapAddress, mtInternal);
+
   *addr = mapAddress;
   *sizep = size;
 
--- a/src/os/bsd/vm/threadCritical_bsd.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/bsd/vm/threadCritical_bsd.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,8 +23,8 @@
  */
 
 #include "precompiled.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/threadCritical.hpp"
-#include "thread_bsd.inline.hpp"
 
 // put OS-includes here
 # include <pthread.h>
--- a/src/os/bsd/vm/thread_bsd.inline.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/bsd/vm/thread_bsd.inline.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -25,6 +25,10 @@
 #ifndef OS_BSD_VM_THREAD_BSD_INLINE_HPP
 #define OS_BSD_VM_THREAD_BSD_INLINE_HPP
 
+#ifndef SHARE_VM_RUNTIME_THREAD_INLINE_HPP_SCOPE
+#error "This file should only be included from thread.inline.hpp"
+#endif
+
 #include "runtime/atomic.hpp"
 #include "runtime/prefetch.hpp"
 #include "runtime/thread.hpp"
--- a/src/os/linux/vm/mutex_linux.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/linux/vm/mutex_linux.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -26,7 +26,7 @@
 #include "mutex_linux.inline.hpp"
 #include "runtime/interfaceSupport.hpp"
 #include "runtime/mutex.hpp"
-#include "thread_linux.inline.hpp"
+#include "runtime/thread.inline.hpp"
 #include "utilities/events.hpp"
 
 // put OS-includes here
--- a/src/os/linux/vm/mutex_linux.inline.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/linux/vm/mutex_linux.inline.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -27,7 +27,7 @@
 
 #include "os_linux.inline.hpp"
 #include "runtime/interfaceSupport.hpp"
-#include "thread_linux.inline.hpp"
+#include "runtime/thread.inline.hpp"
 
 
 // Reconciliation History
--- a/src/os/linux/vm/osThread_linux.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/linux/vm/osThread_linux.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,29 +23,10 @@
  */
 
 // no precompiled headers
-#include "runtime/atomic.hpp"
-#include "runtime/handles.inline.hpp"
-#include "runtime/mutexLocker.hpp"
-#include "runtime/os.hpp"
+#include "runtime/mutex.hpp"
 #include "runtime/osThread.hpp"
-#include "runtime/safepoint.hpp"
-#include "runtime/vmThread.hpp"
-#ifdef TARGET_ARCH_x86
-# include "assembler_x86.inline.hpp"
-#endif
-#ifdef TARGET_ARCH_sparc
-# include "assembler_sparc.inline.hpp"
-#endif
-#ifdef TARGET_ARCH_zero
-# include "assembler_zero.inline.hpp"
-#endif
-#ifdef TARGET_ARCH_arm
-# include "assembler_arm.inline.hpp"
-#endif
-#ifdef TARGET_ARCH_ppc
-# include "assembler_ppc.inline.hpp"
-#endif
 
+#include <signal.h>
 
 void OSThread::pd_initialize() {
   assert(this != NULL, "check");
--- a/src/os/linux/vm/os_linux.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/linux/vm/os_linux.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,6 +29,7 @@
 #include "code/icBuffer.hpp"
 #include "code/vtableStubs.hpp"
 #include "compiler/compileBroker.hpp"
+#include "compiler/disassembler.hpp"
 #include "interpreter/interpreter.hpp"
 #include "jvm_linux.h"
 #include "memory/allocation.inline.hpp"
@@ -52,36 +53,16 @@
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/statSampler.hpp"
 #include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/threadCritical.hpp"
 #include "runtime/timer.hpp"
 #include "services/attachListener.hpp"
 #include "services/runtimeService.hpp"
-#include "thread_linux.inline.hpp"
 #include "utilities/decoder.hpp"
 #include "utilities/defaultStream.hpp"
 #include "utilities/events.hpp"
 #include "utilities/growableArray.hpp"
 #include "utilities/vmError.hpp"
-#ifdef TARGET_ARCH_x86
-# include "assembler_x86.inline.hpp"
-# include "nativeInst_x86.hpp"
-#endif
-#ifdef TARGET_ARCH_sparc
-# include "assembler_sparc.inline.hpp"
-# include "nativeInst_sparc.hpp"
-#endif
-#ifdef TARGET_ARCH_zero
-# include "assembler_zero.inline.hpp"
-# include "nativeInst_zero.hpp"
-#endif
-#ifdef TARGET_ARCH_arm
-# include "assembler_arm.inline.hpp"
-# include "nativeInst_arm.hpp"
-#endif
-#ifdef TARGET_ARCH_ppc
-# include "assembler_ppc.inline.hpp"
-# include "nativeInst_ppc.hpp"
-#endif
 
 // put OS-includes here
 # include <sys/types.h>
@@ -340,12 +321,12 @@
 
   // The next steps are taken in the product version:
   //
-  // Obtain the JAVA_HOME value from the location of libjvm[_g].so.
+  // Obtain the JAVA_HOME value from the location of libjvm.so.
   // This library should be located at:
-  // <JAVA_HOME>/jre/lib/<arch>/{client|server}/libjvm[_g].so.
+  // <JAVA_HOME>/jre/lib/<arch>/{client|server}/libjvm.so.
   //
   // If "/jre/lib/" appears at the right place in the path, then we
-  // assume libjvm[_g].so is installed in a JDK and we use this path.
+  // assume libjvm.so is installed in a JDK and we use this path.
   //
   // Otherwise exit with message: "Could not create the Java virtual machine."
   //
@@ -355,9 +336,9 @@
   // instead of exit check for $JAVA_HOME environment variable.
   //
   // If it is defined and we are able to locate $JAVA_HOME/jre/lib/<arch>,
-  // then we append a fake suffix "hotspot/libjvm[_g].so" to this path so
-  // it looks like libjvm[_g].so is installed there
-  // <JAVA_HOME>/jre/lib/<arch>/hotspot/libjvm[_g].so.
+  // then we append a fake suffix "hotspot/libjvm.so" to this path so
+  // it looks like libjvm.so is installed there
+  // <JAVA_HOME>/jre/lib/<arch>/hotspot/libjvm.so.
   //
   // Otherwise exit.
   //
@@ -1650,19 +1631,20 @@
   return os::stat(filename, &statbuf) == 0;
 }
 
-void os::dll_build_name(char* buffer, size_t buflen,
+bool os::dll_build_name(char* buffer, size_t buflen,
                         const char* pname, const char* fname) {
+  bool retval = false;
   // Copied from libhpi
   const size_t pnamelen = pname ? strlen(pname) : 0;
 
-  // Quietly truncate on buffer overflow.  Should be an error.
+  // Return error on buffer overflow.
   if (pnamelen + strlen(fname) + 10 > (size_t) buflen) {
-      *buffer = '\0';
-      return;
+    return retval;
   }
 
   if (pnamelen == 0) {
     snprintf(buffer, buflen, "lib%s.so", fname);
+    retval = true;
   } else if (strchr(pname, *os::path_separator()) != NULL) {
     int n;
     char** pelements = split_path(pname, &n);
@@ -1673,6 +1655,7 @@
       }
       snprintf(buffer, buflen, "%s/lib%s.so", pelements[i], fname);
       if (file_exists(buffer)) {
+        retval = true;
         break;
       }
     }
@@ -1687,14 +1670,16 @@
     }
   } else {
     snprintf(buffer, buflen, "%s/lib%s.so", pname, fname);
-  }
+    retval = true;
+  }
+  return retval;
 }
 
 const char* os::get_current_directory(char *buf, int buflen) {
   return getcwd(buf, buflen);
 }
 
-// check if addr is inside libjvm[_g].so
+// check if addr is inside libjvm.so
 bool os::address_is_in_vm(address addr) {
   static address libjvm_base_addr;
   Dl_info dlinfo;
@@ -2195,7 +2180,7 @@
 
 static char saved_jvm_path[MAXPATHLEN] = {0};
 
-// Find the full path to the current module, libjvm.so or libjvm_g.so
+// Find the full path to the current module, libjvm.so
 void os::jvm_path(char *buf, jint buflen) {
   // Error checking.
   if (buflen < MAXPATHLEN) {
@@ -2238,10 +2223,9 @@
         char* jrelib_p;
         int len;
 
-        // Check the current module name "libjvm.so" or "libjvm_g.so".
+        // Check the current module name "libjvm.so".
         p = strrchr(buf, '/');
         assert(strstr(p, "/libjvm") == p, "invalid library name");
-        p = strstr(p, "_g") ? "_g" : "";
 
         rp = realpath(java_home_var, buf);
         if (rp == NULL)
@@ -2257,11 +2241,9 @@
         }
 
         if (0 == access(buf, F_OK)) {
-          // Use current module name "libjvm[_g].so" instead of
-          // "libjvm"debug_only("_g")".so" since for fastdebug version
-          // we should have "libjvm.so" but debug_only("_g") adds "_g"!
+          // Use current module name "libjvm.so"
           len = strlen(buf);
-          snprintf(buf + len, buflen-len, "/hotspot/libjvm%s.so", p);
+          snprintf(buf + len, buflen-len, "/hotspot/libjvm.so");
         } else {
           // Go back to path of .so
           rp = realpath(dli_fname, buf);
@@ -5019,11 +5001,12 @@
      }
      -- _nParked ;
 
-    // In theory we could move the ST of 0 into _Event past the unlock(),
-    // but then we'd need a MEMBAR after the ST.
     _Event = 0 ;
      status = pthread_mutex_unlock(_mutex);
      assert_status(status == 0, status, "mutex_unlock");
+    // Paranoia to ensure our locked and lock-free paths interact
+    // correctly with each other.
+    OrderAccess::fence();
   }
   guarantee (_Event >= 0, "invariant") ;
 }
@@ -5086,40 +5069,44 @@
   status = pthread_mutex_unlock(_mutex);
   assert_status(status == 0, status, "mutex_unlock");
   assert (_nParked == 0, "invariant") ;
+  // Paranoia to ensure our locked and lock-free paths interact
+  // correctly with each other.
+  OrderAccess::fence();
   return ret;
 }
 
 void os::PlatformEvent::unpark() {
-  int v, AnyWaiters ;
-  for (;;) {
-      v = _Event ;
-      if (v > 0) {
-         // The LD of _Event could have reordered or be satisfied
-         // by a read-aside from this processor's write buffer.
-         // To avoid problems execute a barrier and then
-         // ratify the value.
-         OrderAccess::fence() ;
-         if (_Event == v) return ;
-         continue ;
-      }
-      if (Atomic::cmpxchg (v+1, &_Event, v) == v) break ;
-  }
-  if (v < 0) {
-     // Wait for the thread associated with the event to vacate
-     int status = pthread_mutex_lock(_mutex);
-     assert_status(status == 0, status, "mutex_lock");
-     AnyWaiters = _nParked ;
-     assert (AnyWaiters == 0 || AnyWaiters == 1, "invariant") ;
-     if (AnyWaiters != 0 && WorkAroundNPTLTimedWaitHang) {
-        AnyWaiters = 0 ;
-        pthread_cond_signal (_cond);
-     }
-     status = pthread_mutex_unlock(_mutex);
-     assert_status(status == 0, status, "mutex_unlock");
-     if (AnyWaiters != 0) {
-        status = pthread_cond_signal(_cond);
-        assert_status(status == 0, status, "cond_signal");
-     }
+  // Transitions for _Event:
+  //    0 :=> 1
+  //    1 :=> 1
+  //   -1 :=> either 0 or 1; must signal target thread
+  //          That is, we can safely transition _Event from -1 to either
+  //          0 or 1. Forcing 1 is slightly more efficient for back-to-back
+  //          unpark() calls.
+  // See also: "Semaphores in Plan 9" by Mullender & Cox
+  //
+  // Note: Forcing a transition from "-1" to "1" on an unpark() means
+  // that it will take two back-to-back park() calls for the owning
+  // thread to block. This has the benefit of forcing a spurious return
+  // from the first park() call after an unpark() call which will help
+  // shake out uses of park() and unpark() without condition variables.
+
+  if (Atomic::xchg(1, &_Event) >= 0) return;
+
+  // Wait for the thread associated with the event to vacate
+  int status = pthread_mutex_lock(_mutex);
+  assert_status(status == 0, status, "mutex_lock");
+  int AnyWaiters = _nParked;
+  assert(AnyWaiters == 0 || AnyWaiters == 1, "invariant");
+  if (AnyWaiters != 0 && WorkAroundNPTLTimedWaitHang) {
+    AnyWaiters = 0;
+    pthread_cond_signal(_cond);
+  }
+  status = pthread_mutex_unlock(_mutex);
+  assert_status(status == 0, status, "mutex_unlock");
+  if (AnyWaiters != 0) {
+    status = pthread_cond_signal(_cond);
+    assert_status(status == 0, status, "cond_signal");
   }
 
   // Note that we signal() _after dropping the lock for "immortal" Events.
@@ -5205,13 +5192,14 @@
 }
 
 void Parker::park(bool isAbsolute, jlong time) {
+  // Ideally we'd do something useful while spinning, such
+  // as calling unpackTime().
+
   // Optional fast-path check:
   // Return immediately if a permit is available.
-  if (_counter > 0) {
-      _counter = 0 ;
-      OrderAccess::fence();
-      return ;
-  }
+  // We depend on Atomic::xchg() having full barrier semantics
+  // since we are doing a lock-free update to _counter.
+  if (Atomic::xchg(0, &_counter) > 0) return;
 
   Thread* thread = Thread::current();
   assert(thread->is_Java_thread(), "Must be JavaThread");
@@ -5252,6 +5240,8 @@
     _counter = 0;
     status = pthread_mutex_unlock(_mutex);
     assert (status == 0, "invariant") ;
+    // Paranoia to ensure our locked and lock-free paths interact
+    // correctly with each other and Java-level accesses.
     OrderAccess::fence();
     return;
   }
@@ -5288,12 +5278,14 @@
   _counter = 0 ;
   status = pthread_mutex_unlock(_mutex) ;
   assert_status(status == 0, status, "invariant") ;
+  // Paranoia to ensure our locked and lock-free paths interact
+  // correctly with each other and Java-level accesses.
+  OrderAccess::fence();
+
   // If externally suspended while waiting, re-suspend
   if (jt->handle_special_suspend_equivalent_condition()) {
     jt->java_suspend_self();
   }
-
-  OrderAccess::fence();
 }
 
 void Parker::unpark() {
--- a/src/os/linux/vm/os_linux.inline.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/linux/vm/os_linux.inline.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -26,25 +26,22 @@
 #define OS_LINUX_VM_OS_LINUX_INLINE_HPP
 
 #include "runtime/atomic.hpp"
+#include "runtime/atomic.inline.hpp"
 #include "runtime/os.hpp"
+
 #ifdef TARGET_OS_ARCH_linux_x86
-# include "atomic_linux_x86.inline.hpp"
 # include "orderAccess_linux_x86.inline.hpp"
 #endif
 #ifdef TARGET_OS_ARCH_linux_sparc
-# include "atomic_linux_sparc.inline.hpp"
 # include "orderAccess_linux_sparc.inline.hpp"
 #endif
 #ifdef TARGET_OS_ARCH_linux_zero
-# include "atomic_linux_zero.inline.hpp"
 # include "orderAccess_linux_zero.inline.hpp"
 #endif
 #ifdef TARGET_OS_ARCH_linux_arm
-# include "atomic_linux_arm.inline.hpp"
 # include "orderAccess_linux_arm.inline.hpp"
 #endif
 #ifdef TARGET_OS_ARCH_linux_ppc
-# include "atomic_linux_ppc.inline.hpp"
 # include "orderAccess_linux_ppc.inline.hpp"
 #endif
 
@@ -71,14 +68,6 @@
   return ":";
 }
 
-inline const char* os::jlong_format_specifier() {
-  return "%lld";
-}
-
-inline const char* os::julong_format_specifier() {
-  return "%llu";
-}
-
 // File names are case-sensitive on windows only
 inline int os::file_name_strcmp(const char* s1, const char* s2) {
   return strcmp(s1, s2);
--- a/src/os/linux/vm/perfMemory_linux.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/linux/vm/perfMemory_linux.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -30,6 +30,7 @@
 #include "os_linux.inline.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/perfMemory.hpp"
+#include "services/memTracker.hpp"
 #include "utilities/exceptions.hpp"
 
 // put OS-includes here
@@ -753,6 +754,10 @@
   // clear the shared memory region
   (void)::memset((void*) mapAddress, 0, size);
 
+  // it does not go through os api, the operation has to record from here
+  MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC);
+  MemTracker::record_virtual_memory_type((address)mapAddress, mtInternal);
+
   return mapAddress;
 }
 
@@ -912,6 +917,10 @@
               "Could not map PerfMemory");
   }
 
+  // it does not go through os api, the operation has to record from here
+  MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC);
+  MemTracker::record_virtual_memory_type((address)mapAddress, mtInternal);
+
   *addr = mapAddress;
   *sizep = size;
 
--- a/src/os/linux/vm/threadCritical_linux.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/linux/vm/threadCritical_linux.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,8 +23,8 @@
  */
 
 #include "precompiled.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/threadCritical.hpp"
-#include "thread_linux.inline.hpp"
 
 // put OS-includes here
 # include <pthread.h>
--- a/src/os/linux/vm/thread_linux.inline.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/linux/vm/thread_linux.inline.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -25,6 +25,10 @@
 #ifndef OS_LINUX_VM_THREAD_LINUX_INLINE_HPP
 #define OS_LINUX_VM_THREAD_LINUX_INLINE_HPP
 
+#ifndef SHARE_VM_RUNTIME_THREAD_INLINE_HPP_SCOPE
+#error "This file should only be included from thread.inline.hpp"
+#endif
+
 #include "runtime/atomic.hpp"
 #include "runtime/prefetch.hpp"
 #include "runtime/thread.hpp"
--- a/src/os/posix/launcher/java_md.c	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/posix/launcher/java_md.c	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1876,11 +1876,6 @@
     }
 }
 
-const char *
-jlong_format_specifier() {
-    return "%lld";
-}
-
 /*
  * Block current thread and continue execution in a new thread
  */
--- a/src/os/posix/launcher/java_md.h	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/posix/launcher/java_md.h	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -64,6 +64,12 @@
 #define Counter2Micros(counts)    (1)
 #endif /* HAVE_GETHRTIME */
 
+#ifdef _LP64
+#define JLONG_FORMAT "%ld"
+#else
+#define JLONG_FORMAT "%lld"
+#endif
+
 /*
  * Function prototypes.
  */
--- a/src/os/posix/vm/os_posix.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/posix/vm/os_posix.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -93,6 +93,47 @@
   return;
 }
 
+// Multiple threads can race in this code, and can remap over each other with MAP_FIXED,
+// so on posix, unmap the section at the start and at the end of the chunk that we mapped
+// rather than unmapping and remapping the whole chunk to get requested alignment.
+char* os::reserve_memory_aligned(size_t size, size_t alignment) {
+  assert((alignment & (os::vm_allocation_granularity() - 1)) == 0,
+      "Alignment must be a multiple of allocation granularity (page size)");
+  assert((size & (alignment -1)) == 0, "size must be 'alignment' aligned");
+
+  size_t extra_size = size + alignment;
+  assert(extra_size >= size, "overflow, size is too large to allow alignment");
+
+  char* extra_base = os::reserve_memory(extra_size, NULL, alignment);
+
+  if (extra_base == NULL) {
+    return NULL;
+  }
+
+  // Do manual alignment
+  char* aligned_base = (char*) align_size_up((uintptr_t) extra_base, alignment);
+
+  // [  |                                       |  ]
+  // ^ extra_base
+  //    ^ extra_base + begin_offset == aligned_base
+  //     extra_base + begin_offset + size       ^
+  //                       extra_base + extra_size ^
+  // |<>| == begin_offset
+  //                              end_offset == |<>|
+  size_t begin_offset = aligned_base - extra_base;
+  size_t end_offset = (extra_base + extra_size) - (aligned_base + size);
+
+  if (begin_offset > 0) {
+      os::release_memory(extra_base, begin_offset);
+  }
+
+  if (end_offset > 0) {
+      os::release_memory(extra_base + begin_offset + size, end_offset);
+  }
+
+  return aligned_base;
+}
+
 void os::Posix::print_load_average(outputStream* st) {
   st->print("load average:");
   double loadavg[3];
--- a/src/os/solaris/vm/mutex_solaris.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/solaris/vm/mutex_solaris.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -26,7 +26,7 @@
 #include "mutex_solaris.inline.hpp"
 #include "runtime/interfaceSupport.hpp"
 #include "runtime/mutex.hpp"
-#include "thread_solaris.inline.hpp"
+#include "runtime/thread.inline.hpp"
 #include "utilities/events.hpp"
 
 // Solaris-specific include, therefore not in includeDB_*
--- a/src/os/solaris/vm/mutex_solaris.inline.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/solaris/vm/mutex_solaris.inline.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -27,6 +27,6 @@
 
 #include "os_solaris.inline.hpp"
 #include "runtime/interfaceSupport.hpp"
-#include "thread_solaris.inline.hpp"
+#include "runtime/thread.inline.hpp"
 
 #endif // OS_SOLARIS_VM_MUTEX_SOLARIS_INLINE_HPP
--- a/src/os/solaris/vm/osThread_solaris.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/solaris/vm/osThread_solaris.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -30,14 +30,8 @@
 #include "runtime/osThread.hpp"
 #include "runtime/safepoint.hpp"
 #include "runtime/vmThread.hpp"
-#ifdef TARGET_ARCH_x86
-# include "assembler_x86.inline.hpp"
-#endif
-#ifdef TARGET_ARCH_sparc
-# include "assembler_sparc.inline.hpp"
-#endif
 
-# include <signal.h>
+#include <signal.h>
 
  // ***************************************************************
  // Platform dependent initialization and cleanup
--- a/src/os/solaris/vm/os_solaris.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/solaris/vm/os_solaris.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,6 +29,7 @@
 #include "code/icBuffer.hpp"
 #include "code/vtableStubs.hpp"
 #include "compiler/compileBroker.hpp"
+#include "compiler/disassembler.hpp"
 #include "interpreter/interpreter.hpp"
 #include "jvm_solaris.h"
 #include "memory/allocation.inline.hpp"
@@ -52,24 +53,17 @@
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/statSampler.hpp"
 #include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/threadCritical.hpp"
 #include "runtime/timer.hpp"
 #include "services/attachListener.hpp"
+#include "services/memTracker.hpp"
 #include "services/runtimeService.hpp"
-#include "thread_solaris.inline.hpp"
 #include "utilities/decoder.hpp"
 #include "utilities/defaultStream.hpp"
 #include "utilities/events.hpp"
 #include "utilities/growableArray.hpp"
 #include "utilities/vmError.hpp"
-#ifdef TARGET_ARCH_x86
-# include "assembler_x86.inline.hpp"
-# include "nativeInst_x86.hpp"
-#endif
-#ifdef TARGET_ARCH_sparc
-# include "assembler_sparc.inline.hpp"
-# include "nativeInst_sparc.hpp"
-#endif
 
 // put OS-includes here
 # include <dlfcn.h>
@@ -740,12 +734,12 @@
 
   // The next steps are taken in the product version:
   //
-  // Obtain the JAVA_HOME value from the location of libjvm[_g].so.
+  // Obtain the JAVA_HOME value from the location of libjvm.so.
   // This library should be located at:
-  // <JAVA_HOME>/jre/lib/<arch>/{client|server}/libjvm[_g].so.
+  // <JAVA_HOME>/jre/lib/<arch>/{client|server}/libjvm.so.
   //
   // If "/jre/lib/" appears at the right place in the path, then we
-  // assume libjvm[_g].so is installed in a JDK and we use this path.
+  // assume libjvm.so is installed in a JDK and we use this path.
   //
   // Otherwise exit with message: "Could not create the Java virtual machine."
   //
@@ -755,9 +749,9 @@
   // instead of exit check for $JAVA_HOME environment variable.
   //
   // If it is defined and we are able to locate $JAVA_HOME/jre/lib/<arch>,
-  // then we append a fake suffix "hotspot/libjvm[_g].so" to this path so
-  // it looks like libjvm[_g].so is installed there
-  // <JAVA_HOME>/jre/lib/<arch>/hotspot/libjvm[_g].so.
+  // then we append a fake suffix "hotspot/libjvm.so" to this path so
+  // it looks like libjvm.so is installed there
+  // <JAVA_HOME>/jre/lib/<arch>/hotspot/libjvm.so.
   //
   // Otherwise exit.
   //
@@ -1893,18 +1887,19 @@
   return os::stat(filename, &statbuf) == 0;
 }
 
-void os::dll_build_name(char* buffer, size_t buflen,
+bool os::dll_build_name(char* buffer, size_t buflen,
                         const char* pname, const char* fname) {
+  bool retval = false;
   const size_t pnamelen = pname ? strlen(pname) : 0;
 
-  // Quietly truncate on buffer overflow.  Should be an error.
+  // Return error on buffer overflow.
   if (pnamelen + strlen(fname) + 10 > (size_t) buflen) {
-    *buffer = '\0';
-    return;
+    return retval;
   }
 
   if (pnamelen == 0) {
     snprintf(buffer, buflen, "lib%s.so", fname);
+    retval = true;
   } else if (strchr(pname, *os::path_separator()) != NULL) {
     int n;
     char** pelements = split_path(pname, &n);
@@ -1915,6 +1910,7 @@
       }
       snprintf(buffer, buflen, "%s/lib%s.so", pelements[i], fname);
       if (file_exists(buffer)) {
+        retval = true;
         break;
       }
     }
@@ -1929,14 +1925,16 @@
     }
   } else {
     snprintf(buffer, buflen, "%s/lib%s.so", pname, fname);
-  }
+    retval = true;
+  }
+  return retval;
 }
 
 const char* os::get_current_directory(char *buf, int buflen) {
   return getcwd(buf, buflen);
 }
 
-// check if addr is inside libjvm[_g].so
+// check if addr is inside libjvm.so
 bool os::address_is_in_vm(address addr) {
   static address libjvm_base_addr;
   Dl_info dlinfo;
@@ -2476,7 +2474,7 @@
 
 static char saved_jvm_path[MAXPATHLEN] = { 0 };
 
-// Find the full path to the current module, libjvm.so or libjvm_g.so
+// Find the full path to the current module, libjvm.so
 void os::jvm_path(char *buf, jint buflen) {
   // Error checking.
   if (buflen < MAXPATHLEN) {
@@ -2524,10 +2522,9 @@
           strcpy(cpu_arch, "amd64");
         }
 #endif
-        // Check the current module name "libjvm.so" or "libjvm_g.so".
+        // Check the current module name "libjvm.so".
         p = strrchr(buf, '/');
         assert(strstr(p, "/libjvm") == p, "invalid library name");
-        p = strstr(p, "_g") ? "_g" : "";
 
         realpath(java_home_var, buf);
         // determine if this is a legacy image or modules image
@@ -2540,11 +2537,9 @@
         }
 
         if (0 == access(buf, F_OK)) {
-          // Use current module name "libjvm[_g].so" instead of
-          // "libjvm"debug_only("_g")".so" since for fastdebug version
-          // we should have "libjvm.so" but debug_only("_g") adds "_g"!
+          // Use current module name "libjvm.so"
           len = strlen(buf);
-          snprintf(buf + len, buflen-len, "/hotspot/libjvm%s.so", p);
+          snprintf(buf + len, buflen-len, "/hotspot/libjvm.so");
         } else {
           // Go back to path of .so
           realpath((char *)dlinfo.dli_fname, buf);
@@ -3072,11 +3067,12 @@
   // Since snv_84, Solaris attempts to honor the address hint - see 5003415.
   // Give it a try, if the kernel honors the hint we can return immediately.
   char* addr = Solaris::anon_mmap(requested_addr, bytes, 0, false);
+
   volatile int err = errno;
   if (addr == requested_addr) {
     return addr;
   } else if (addr != NULL) {
-    unmap_memory(addr, bytes);
+    pd_unmap_memory(addr, bytes);
   }
 
   if (PrintMiscellaneous && Verbose) {
@@ -6018,6 +6014,9 @@
      _Event = 0 ;
      status = os::Solaris::mutex_unlock(_mutex);
      assert_status(status == 0, status, "mutex_unlock");
+    // Paranoia to ensure our locked and lock-free paths interact
+    // correctly with each other.
+    OrderAccess::fence();
   }
 }
 
@@ -6059,51 +6058,43 @@
   _Event = 0 ;
   status = os::Solaris::mutex_unlock(_mutex);
   assert_status(status == 0, status, "mutex_unlock");
+  // Paranoia to ensure our locked and lock-free paths interact
+  // correctly with each other.
+  OrderAccess::fence();
   return ret;
 }
 
 void os::PlatformEvent::unpark() {
-  int v, AnyWaiters;
-
-  // Increment _Event.
-  // Another acceptable implementation would be to simply swap 1
-  // into _Event:
-  //   if (Swap (&_Event, 1) < 0) {
-  //      mutex_lock (_mutex) ; AnyWaiters = nParked; mutex_unlock (_mutex) ;
-  //      if (AnyWaiters) cond_signal (_cond) ;
-  //   }
-
-  for (;;) {
-    v = _Event ;
-    if (v > 0) {
-       // The LD of _Event could have reordered or be satisfied
-       // by a read-aside from this processor's write buffer.
-       // To avoid problems execute a barrier and then
-       // ratify the value.  A degenerate CAS() would also work.
-       // Viz., CAS (v+0, &_Event, v) == v).
-       OrderAccess::fence() ;
-       if (_Event == v) return ;
-       continue ;
-    }
-    if (Atomic::cmpxchg (v+1, &_Event, v) == v) break ;
-  }
+  // Transitions for _Event:
+  //    0 :=> 1
+  //    1 :=> 1
+  //   -1 :=> either 0 or 1; must signal target thread
+  //          That is, we can safely transition _Event from -1 to either
+  //          0 or 1. Forcing 1 is slightly more efficient for back-to-back
+  //          unpark() calls.
+  // See also: "Semaphores in Plan 9" by Mullender & Cox
+  //
+  // Note: Forcing a transition from "-1" to "1" on an unpark() means
+  // that it will take two back-to-back park() calls for the owning
+  // thread to block. This has the benefit of forcing a spurious return
+  // from the first park() call after an unpark() call which will help
+  // shake out uses of park() and unpark() without condition variables.
+
+  if (Atomic::xchg(1, &_Event) >= 0) return;
 
   // If the thread associated with the event was parked, wake it.
-  if (v < 0) {
-     int status ;
-     // Wait for the thread assoc with the PlatformEvent to vacate.
-     status = os::Solaris::mutex_lock(_mutex);
-     assert_status(status == 0, status, "mutex_lock");
-     AnyWaiters = _nParked ;
-     status = os::Solaris::mutex_unlock(_mutex);
-     assert_status(status == 0, status, "mutex_unlock");
-     guarantee (AnyWaiters == 0 || AnyWaiters == 1, "invariant") ;
-     if (AnyWaiters != 0) {
-       // We intentional signal *after* dropping the lock
-       // to avoid a common class of futile wakeups.
-       status = os::Solaris::cond_signal(_cond);
-       assert_status(status == 0, status, "cond_signal");
-     }
+  // Wait for the thread assoc with the PlatformEvent to vacate.
+  int status = os::Solaris::mutex_lock(_mutex);
+  assert_status(status == 0, status, "mutex_lock");
+  int AnyWaiters = _nParked;
+  status = os::Solaris::mutex_unlock(_mutex);
+  assert_status(status == 0, status, "mutex_unlock");
+  guarantee(AnyWaiters == 0 || AnyWaiters == 1, "invariant");
+  if (AnyWaiters != 0) {
+    // We intentional signal *after* dropping the lock
+    // to avoid a common class of futile wakeups.
+    status = os::Solaris::cond_signal(_cond);
+    assert_status(status == 0, status, "cond_signal");
   }
 }
 
@@ -6181,14 +6172,14 @@
 }
 
 void Parker::park(bool isAbsolute, jlong time) {
+  // Ideally we'd do something useful while spinning, such
+  // as calling unpackTime().
 
   // Optional fast-path check:
   // Return immediately if a permit is available.
-  if (_counter > 0) {
-      _counter = 0 ;
-      OrderAccess::fence();
-      return ;
-  }
+  // We depend on Atomic::xchg() having full barrier semantics
+  // since we are doing a lock-free update to _counter.
+  if (Atomic::xchg(0, &_counter) > 0) return;
 
   // Optional fast-exit: Check interrupt before trying to wait
   Thread* thread = Thread::current();
@@ -6230,6 +6221,8 @@
     _counter = 0;
     status = os::Solaris::mutex_unlock(_mutex);
     assert (status == 0, "invariant") ;
+    // Paranoia to ensure our locked and lock-free paths interact
+    // correctly with each other and Java-level accesses.
     OrderAccess::fence();
     return;
   }
@@ -6271,12 +6264,14 @@
   _counter = 0 ;
   status = os::Solaris::mutex_unlock(_mutex);
   assert_status(status == 0, status, "mutex_unlock") ;
+  // Paranoia to ensure our locked and lock-free paths interact
+  // correctly with each other and Java-level accesses.
+  OrderAccess::fence();
 
   // If externally suspended while waiting, re-suspend
   if (jt->handle_special_suspend_equivalent_condition()) {
     jt->java_suspend_self();
   }
-  OrderAccess::fence();
 }
 
 void Parker::unpark() {
--- a/src/os/solaris/vm/os_solaris.inline.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/solaris/vm/os_solaris.inline.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -26,13 +26,13 @@
 #define OS_SOLARIS_VM_OS_SOLARIS_INLINE_HPP
 
 #include "runtime/atomic.hpp"
+#include "runtime/atomic.inline.hpp"
 #include "runtime/os.hpp"
+
 #ifdef TARGET_OS_ARCH_solaris_x86
-# include "atomic_solaris_x86.inline.hpp"
 # include "orderAccess_solaris_x86.inline.hpp"
 #endif
 #ifdef TARGET_OS_ARCH_solaris_sparc
-# include "atomic_solaris_sparc.inline.hpp"
 # include "orderAccess_solaris_sparc.inline.hpp"
 #endif
 
@@ -50,9 +50,6 @@
 inline const char* os::line_separator() { return "\n"; }
 inline const char* os::path_separator() { return ":"; }
 
-inline const char* os::jlong_format_specifier()   { return "%lld"; }
-inline const char* os::julong_format_specifier()  { return "%llu"; }
-
 // File names are case-sensitive on windows only
 inline int os::file_name_strcmp(const char* s1, const char* s2) {
   return strcmp(s1, s2);
--- a/src/os/solaris/vm/perfMemory_solaris.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/solaris/vm/perfMemory_solaris.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -30,6 +30,7 @@
 #include "os_solaris.inline.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/perfMemory.hpp"
+#include "services/memTracker.hpp"
 #include "utilities/exceptions.hpp"
 
 // put OS-includes here
@@ -768,6 +769,10 @@
   // clear the shared memory region
   (void)::memset((void*) mapAddress, 0, size);
 
+  // it does not go through os api, the operation has to record from here
+  MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC);
+  MemTracker::record_virtual_memory_type((address)mapAddress, mtInternal);
+
   return mapAddress;
 }
 
@@ -927,6 +932,10 @@
               "Could not map PerfMemory");
   }
 
+  // it does not go through os api, the operation has to record from here
+  MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC);
+  MemTracker::record_virtual_memory_type((address)mapAddress, mtInternal);
+
   *addr = mapAddress;
   *sizep = size;
 
--- a/src/os/solaris/vm/threadCritical_solaris.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/solaris/vm/threadCritical_solaris.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,8 +23,8 @@
  */
 
 #include "precompiled.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/threadCritical.hpp"
-#include "thread_solaris.inline.hpp"
 
 // OS-includes here
 #include <thread.h>
--- a/src/os/solaris/vm/thread_solaris.inline.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/solaris/vm/thread_solaris.inline.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -25,6 +25,10 @@
 #ifndef OS_SOLARIS_VM_THREAD_SOLARIS_INLINE_HPP
 #define OS_SOLARIS_VM_THREAD_SOLARIS_INLINE_HPP
 
+#ifndef SHARE_VM_RUNTIME_THREAD_INLINE_HPP_SCOPE
+#error "This file should only be included from thread.inline.hpp"
+#endif
+
 #include "runtime/atomic.hpp"
 #include "runtime/prefetch.hpp"
 #include "runtime/thread.hpp"
--- a/src/os/windows/launcher/java_md.c	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/windows/launcher/java_md.c	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1323,11 +1323,6 @@
     }
 }
 
-const char *
-jlong_format_specifier() {
-    return "%I64d";
-}
-
 /*
  * Block current thread and continue execution in a new thread
  */
--- a/src/os/windows/launcher/java_md.h	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/windows/launcher/java_md.h	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -69,6 +69,8 @@
 extern int _main(int argc, char **argv);
 #endif
 
+#define JLONG_FORMAT "%I64d"
+
 /*
  * Function prototypes.
  */
--- a/src/os/windows/vm/decoder_windows.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/windows/vm/decoder_windows.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -49,7 +49,7 @@
     pfn_SymSetOptions _pfnSymSetOptions = (pfn_SymSetOptions)::GetProcAddress(handle, "SymSetOptions");
     pfn_SymInitialize _pfnSymInitialize = (pfn_SymInitialize)::GetProcAddress(handle, "SymInitialize");
     _pfnSymGetSymFromAddr64 = (pfn_SymGetSymFromAddr64)::GetProcAddress(handle, "SymGetSymFromAddr64");
-    _pfnUndecorateSymbolName = (pfn_UndecorateSymbolName)GetProcAddress(handle, "UnDecorateSymbolName");
+    _pfnUndecorateSymbolName = (pfn_UndecorateSymbolName)::GetProcAddress(handle, "UnDecorateSymbolName");
 
     if (_pfnSymSetOptions == NULL || _pfnSymInitialize == NULL || _pfnSymGetSymFromAddr64 == NULL) {
       _pfnSymGetSymFromAddr64 = NULL;
@@ -60,8 +60,9 @@
       return;
     }
 
-    _pfnSymSetOptions(SYMOPT_UNDNAME | SYMOPT_DEFERRED_LOADS);
-    if (!_pfnSymInitialize(GetCurrentProcess(), NULL, TRUE)) {
+    HANDLE hProcess = ::GetCurrentProcess();
+    _pfnSymSetOptions(SYMOPT_UNDNAME | SYMOPT_DEFERRED_LOADS | SYMOPT_EXACT_SYMBOLS);
+    if (!_pfnSymInitialize(hProcess, NULL, TRUE)) {
       _pfnSymGetSymFromAddr64 = NULL;
       _pfnUndecorateSymbolName = NULL;
       ::FreeLibrary(handle);
@@ -70,6 +71,77 @@
       return;
     }
 
+    // set pdb search paths
+    pfn_SymSetSearchPath  _pfn_SymSetSearchPath =
+      (pfn_SymSetSearchPath)::GetProcAddress(handle, "SymSetSearchPath");
+    pfn_SymGetSearchPath  _pfn_SymGetSearchPath =
+      (pfn_SymGetSearchPath)::GetProcAddress(handle, "SymGetSearchPath");
+    if (_pfn_SymSetSearchPath != NULL && _pfn_SymGetSearchPath != NULL) {
+      char paths[MAX_PATH];
+      int  len = sizeof(paths);
+      if (!_pfn_SymGetSearchPath(hProcess, paths, len)) {
+        paths[0] = '\0';
+      } else {
+        // available spaces in path buffer
+        len -= (int)strlen(paths);
+      }
+
+      char tmp_path[MAX_PATH];
+      DWORD dwSize;
+      HMODULE hJVM = ::GetModuleHandle("jvm.dll");
+      tmp_path[0] = '\0';
+      // append the path where jvm.dll is located
+      if (hJVM != NULL && (dwSize = ::GetModuleFileName(hJVM, tmp_path, sizeof(tmp_path))) > 0) {
+        while (dwSize > 0 && tmp_path[dwSize] != '\\') {
+          dwSize --;
+        }
+
+        tmp_path[dwSize] = '\0';
+
+        if (dwSize > 0 && len > (int)dwSize + 1) {
+          strncat(paths, os::path_separator(), 1);
+          strncat(paths, tmp_path, dwSize);
+          len -= dwSize + 1;
+        }
+      }
+
+      // append $JRE/bin. Arguments::get_java_home actually returns $JRE
+      // path
+      char *p = Arguments::get_java_home();
+      assert(p != NULL, "empty java home");
+      size_t java_home_len = strlen(p);
+      if (len > (int)java_home_len + 5) {
+        strncat(paths, os::path_separator(), 1);
+        strncat(paths, p, java_home_len);
+        strncat(paths, "\\bin", 4);
+        len -= (int)(java_home_len + 5);
+      }
+
+      // append $JDK/bin path if it exists
+      assert(java_home_len < MAX_PATH, "Invalid path length");
+      // assume $JRE is under $JDK, construct $JDK/bin path and
+      // see if it exists or not
+      if (strncmp(&p[java_home_len - 3], "jre", 3) == 0) {
+        strncpy(tmp_path, p, java_home_len - 3);
+        tmp_path[java_home_len - 3] = '\0';
+        strncat(tmp_path, "bin", 3);
+
+        // if the directory exists
+        DWORD dwAttrib = GetFileAttributes(tmp_path);
+        if (dwAttrib != INVALID_FILE_ATTRIBUTES &&
+            (dwAttrib & FILE_ATTRIBUTE_DIRECTORY)) {
+          // tmp_path should have the same length as java_home_len, since we only
+          // replaced 'jre' with 'bin'
+          if (len > (int)java_home_len + 1) {
+            strncat(paths, os::path_separator(), 1);
+            strncat(paths, tmp_path, java_home_len);
+          }
+        }
+      }
+
+      _pfn_SymSetSearchPath(hProcess, paths);
+    }
+
      // find out if jvm.dll contains private symbols, by decoding
      // current function and comparing the result
      address addr = (address)Decoder::demangle;
--- a/src/os/windows/vm/decoder_windows.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/windows/vm/decoder_windows.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -35,6 +35,8 @@
 typedef BOOL  (WINAPI *pfn_SymInitialize)(HANDLE, PCTSTR, BOOL);
 typedef BOOL  (WINAPI *pfn_SymGetSymFromAddr64)(HANDLE, DWORD64, PDWORD64, PIMAGEHLP_SYMBOL64);
 typedef DWORD (WINAPI *pfn_UndecorateSymbolName)(const char*, char*, DWORD, DWORD);
+typedef BOOL  (WINAPI *pfn_SymSetSearchPath)(HANDLE, PCTSTR);
+typedef BOOL  (WINAPI *pfn_SymGetSearchPath)(HANDLE, PTSTR, int);
 
 class WindowsDecoder : public AbstractDecoder {
 
--- a/src/os/windows/vm/mutex_windows.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/windows/vm/mutex_windows.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -26,7 +26,7 @@
 #include "mutex_windows.inline.hpp"
 #include "runtime/interfaceSupport.hpp"
 #include "runtime/mutex.hpp"
-#include "thread_windows.inline.hpp"
+#include "runtime/thread.inline.hpp"
 #include "utilities/events.hpp"
 
 // put OS-includes here
--- a/src/os/windows/vm/mutex_windows.inline.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/windows/vm/mutex_windows.inline.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -27,6 +27,6 @@
 
 #include "os_windows.inline.hpp"
 #include "runtime/interfaceSupport.hpp"
-#include "thread_windows.inline.hpp"
+#include "runtime/thread.inline.hpp"
 
 #endif // OS_WINDOWS_VM_MUTEX_WINDOWS_INLINE_HPP
--- a/src/os/windows/vm/osThread_windows.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/windows/vm/osThread_windows.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -30,9 +30,6 @@
 #include "runtime/osThread.hpp"
 #include "runtime/safepoint.hpp"
 #include "runtime/vmThread.hpp"
-#ifdef TARGET_ARCH_x86
-# include "assembler_x86.inline.hpp"
-#endif
 
 void OSThread::pd_initialize() {
   set_thread_handle(NULL);
--- a/src/os/windows/vm/os_windows.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/windows/vm/os_windows.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -22,7 +22,7 @@
  *
  */
 
-// Must be at least Windows 2000 or XP to use VectoredExceptions and IsDebuggerPresent
+// Must be at least Windows 2000 or XP to use IsDebuggerPresent
 #define _WIN32_WINNT 0x500
 
 // no precompiled headers
@@ -32,6 +32,7 @@
 #include "code/icBuffer.hpp"
 #include "code/vtableStubs.hpp"
 #include "compiler/compileBroker.hpp"
+#include "compiler/disassembler.hpp"
 #include "interpreter/interpreter.hpp"
 #include "jvm_windows.h"
 #include "memory/allocation.inline.hpp"
@@ -55,20 +56,16 @@
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/statSampler.hpp"
 #include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/threadCritical.hpp"
 #include "runtime/timer.hpp"
 #include "services/attachListener.hpp"
 #include "services/runtimeService.hpp"
-#include "thread_windows.inline.hpp"
 #include "utilities/decoder.hpp"
 #include "utilities/defaultStream.hpp"
 #include "utilities/events.hpp"
 #include "utilities/growableArray.hpp"
 #include "utilities/vmError.hpp"
-#ifdef TARGET_ARCH_x86
-# include "assembler_x86.inline.hpp"
-# include "nativeInst_x86.hpp"
-#endif
 
 #ifdef _DEBUG
 #include <crtdbg.h>
@@ -110,10 +107,6 @@
 static FILETIME process_user_time;
 static FILETIME process_kernel_time;
 
-#ifdef _WIN64
-PVOID  topLevelVectoredExceptionHandler = NULL;
-#endif
-
 #ifdef _M_IA64
 #define __CPU__ ia64
 #elif _M_AMD64
@@ -136,12 +129,6 @@
     case DLL_PROCESS_DETACH:
       if(ForceTimeHighResolution)
         timeEndPeriod(1L);
-#ifdef _WIN64
-      if (topLevelVectoredExceptionHandler != NULL) {
-        RemoveVectoredExceptionHandler(topLevelVectoredExceptionHandler);
-        topLevelVectoredExceptionHandler = NULL;
-      }
-#endif
       break;
     default:
       break;
@@ -195,7 +182,7 @@
 
       if (!getenv("_ALT_JAVA_HOME_DIR", home_dir, MAX_PATH)) {
           os::jvm_path(home_dir, sizeof(home_dir));
-          // Found the full path to jvm[_g].dll.
+          // Found the full path to jvm.dll.
           // Now cut the path to <java_home>/jre if we can.
           *(strrchr(home_dir, '\\')) = '\0';  /* get rid of \jvm.dll */
           pslash = strrchr(home_dir, '\\');
@@ -408,20 +395,14 @@
   }
 
 
-  if (UseVectoredExceptions) {
-    // If we are using vectored exception we don't need to set a SEH
-    thread->run();
-  }
-  else {
-    // Install a win32 structured exception handler around every thread created
-    // by VM, so VM can genrate error dump when an exception occurred in non-
-    // Java thread (e.g. VM thread).
-    __try {
-       thread->run();
-    } __except(topLevelExceptionFilter(
-               (_EXCEPTION_POINTERS*)_exception_info())) {
-        // Nothing to do.
-    }
+  // Install a win32 structured exception handler around every thread created
+  // by VM, so VM can genrate error dump when an exception occurred in non-
+  // Java thread (e.g. VM thread).
+  __try {
+     thread->run();
+  } __except(topLevelExceptionFilter(
+             (_EXCEPTION_POINTERS*)_exception_info())) {
+      // Nothing to do.
   }
 
   // One less thread is executing
@@ -1148,21 +1129,23 @@
   return GetFileAttributes(filename) != INVALID_FILE_ATTRIBUTES;
 }
 
-void os::dll_build_name(char *buffer, size_t buflen,
+bool os::dll_build_name(char *buffer, size_t buflen,
                         const char* pname, const char* fname) {
+  bool retval = false;
   const size_t pnamelen = pname ? strlen(pname) : 0;
   const char c = (pnamelen > 0) ? pname[pnamelen-1] : 0;
 
-  // Quietly truncates on buffer overflow. Should be an error.
+  // Return error on buffer overflow.
   if (pnamelen + strlen(fname) + 10 > buflen) {
-    *buffer = '\0';
-    return;
+    return retval;
   }
 
   if (pnamelen == 0) {
     jio_snprintf(buffer, buflen, "%s.dll", fname);
+    retval = true;
   } else if (c == ':' || c == '\\') {
     jio_snprintf(buffer, buflen, "%s%s.dll", pname, fname);
+    retval = true;
   } else if (strchr(pname, *os::path_separator()) != NULL) {
     int n;
     char** pelements = split_path(pname, &n);
@@ -1180,6 +1163,7 @@
         jio_snprintf(buffer, buflen, "%s\\%s.dll", path, fname);
       }
       if (file_exists(buffer)) {
+        retval = true;
         break;
       }
     }
@@ -1194,7 +1178,9 @@
     }
   } else {
     jio_snprintf(buffer, buflen, "%s\\%s.dll", pname, fname);
-  }
+    retval = true;
+  }
+  return retval;
 }
 
 // Needs to be in os specific directory because windows requires another
@@ -1729,7 +1715,7 @@
 
 static char saved_jvm_path[MAX_PATH] = {0};
 
-// Find the full path to the current module, jvm.dll or jvm_g.dll
+// Find the full path to the current module, jvm.dll
 void os::jvm_path(char *buf, jint buflen) {
   // Error checking.
   if (buflen < MAX_PATH) {
@@ -1888,8 +1874,22 @@
       }
       return TRUE;
       break;
+    case CTRL_LOGOFF_EVENT: {
+      // Don't terminate JVM if it is running in a non-interactive session,
+      // such as a service process.
+      USEROBJECTFLAGS flags;
+      HANDLE handle = GetProcessWindowStation();
+      if (handle != NULL &&
+          GetUserObjectInformation(handle, UOI_FLAGS, &flags,
+            sizeof( USEROBJECTFLAGS), NULL)) {
+        // If it is a non-interactive session, let next handler to deal
+        // with it.
+        if ((flags.dwFlags & WSF_VISIBLE) == 0) {
+          return FALSE;
+        }
+      }
+    }
     case CTRL_CLOSE_EVENT:
-    case CTRL_LOGOFF_EVENT:
     case CTRL_SHUTDOWN_EVENT:
       os::signal_raise(SIGTERM);
       return TRUE;
@@ -2489,16 +2489,6 @@
       }
 #endif
 
-#ifdef _WIN64
-      // Windows will sometimes generate an access violation
-      // when we call malloc.  Since we use VectoredExceptions
-      // on 64 bit platforms, we see this exception.  We must
-      // pass this exception on so Windows can recover.
-      // We check to see if the pc of the fault is in NTDLL.DLL
-      // if so, we pass control on to Windows for handling.
-      if (UseVectoredExceptions && _addr_in_ntdll(pc)) return EXCEPTION_CONTINUE_SEARCH;
-#endif
-
       // Stack overflow or null pointer exception in native code.
       report_error(t, exception_code, pc, exceptionInfo->ExceptionRecord,
                    exceptionInfo->ContextRecord);
@@ -2527,30 +2517,8 @@
   }
 
   if (exception_code != EXCEPTION_BREAKPOINT) {
-#ifndef _WIN64
     report_error(t, exception_code, pc, exceptionInfo->ExceptionRecord,
                  exceptionInfo->ContextRecord);
-#else
-    // Itanium Windows uses a VectoredExceptionHandler
-    // Which means that C++ programatic exception handlers (try/except)
-    // will get here.  Continue the search for the right except block if
-    // the exception code is not a fatal code.
-    switch ( exception_code ) {
-      case EXCEPTION_ACCESS_VIOLATION:
-      case EXCEPTION_STACK_OVERFLOW:
-      case EXCEPTION_ILLEGAL_INSTRUCTION:
-      case EXCEPTION_ILLEGAL_INSTRUCTION_2:
-      case EXCEPTION_INT_OVERFLOW:
-      case EXCEPTION_INT_DIVIDE_BY_ZERO:
-      case EXCEPTION_UNCAUGHT_CXX_EXCEPTION:
-      {  report_error(t, exception_code, pc, exceptionInfo->ExceptionRecord,
-                       exceptionInfo->ContextRecord);
-      }
-        break;
-      default:
-        break;
-    }
-#endif
   }
   return EXCEPTION_CONTINUE_SEARCH;
 }
@@ -2941,6 +2909,36 @@
   }
 }
 
+// Multiple threads can race in this code but it's not possible to unmap small sections of
+// virtual space to get requested alignment, like posix-like os's.
+// Windows prevents multiple thread from remapping over each other so this loop is thread-safe.
+char* os::reserve_memory_aligned(size_t size, size_t alignment) {
+  assert((alignment & (os::vm_allocation_granularity() - 1)) == 0,
+      "Alignment must be a multiple of allocation granularity (page size)");
+  assert((size & (alignment -1)) == 0, "size must be 'alignment' aligned");
+
+  size_t extra_size = size + alignment;
+  assert(extra_size >= size, "overflow, size is too large to allow alignment");
+
+  char* aligned_base = NULL;
+
+  do {
+    char* extra_base = os::reserve_memory(extra_size, NULL, alignment);
+    if (extra_base == NULL) {
+      return NULL;
+    }
+    // Do manual alignment
+    aligned_base = (char*) align_size_up((uintptr_t) extra_base, alignment);
+
+    os::release_memory(extra_base, extra_size);
+
+    aligned_base = os::reserve_memory(size, aligned_base);
+
+  } while (aligned_base == NULL);
+
+  return aligned_base;
+}
+
 char* os::pd_reserve_memory(size_t bytes, char* addr, size_t alignment_hint) {
   assert((size_t)addr % os::vm_allocation_granularity() == 0,
          "reserve alignment");
@@ -2962,7 +2960,7 @@
     }
     if( Verbose && PrintMiscellaneous ) {
       reserveTimer.stop();
-      tty->print_cr("reserve_memory of %Ix bytes took %ld ms (%ld ticks)", bytes,
+      tty->print_cr("reserve_memory of %Ix bytes took " JLONG_FORMAT " ms (" JLONG_FORMAT " ticks)", bytes,
                     reserveTimer.milliseconds(), reserveTimer.ticks());
     }
   }
@@ -3706,18 +3704,6 @@
 
   // Setup Windows Exceptions
 
-  // On Itanium systems, Structured Exception Handling does not
-  // work since stack frames must be walkable by the OS.  Since
-  // much of our code is dynamically generated, and we do not have
-  // proper unwind .xdata sections, the system simply exits
-  // rather than delivering the exception.  To work around
-  // this we use VectorExceptions instead.
-#ifdef _WIN64
-  if (UseVectoredExceptions) {
-    topLevelVectoredExceptionHandler = AddVectoredExceptionHandler( 1, topLevelExceptionFilter);
-  }
-#endif
-
   // for debugging float code generation bugs
   if (ForceFloatExceptions) {
 #ifndef  _WIN64
@@ -4333,7 +4319,7 @@
   if (hFile == NULL) {
     if (PrintMiscellaneous && Verbose) {
       DWORD err = GetLastError();
-      tty->print_cr("CreateFile() failed: GetLastError->%ld.");
+      tty->print_cr("CreateFile() failed: GetLastError->%ld.", err);
     }
     return NULL;
   }
@@ -4383,7 +4369,7 @@
     if (hMap == NULL) {
       if (PrintMiscellaneous && Verbose) {
         DWORD err = GetLastError();
-        tty->print_cr("CreateFileMapping() failed: GetLastError->%ld.");
+        tty->print_cr("CreateFileMapping() failed: GetLastError->%ld.", err);
       }
       CloseHandle(hFile);
       return NULL;
@@ -4593,6 +4579,7 @@
     }
     v = _Event ;
     _Event = 0 ;
+    // see comment at end of os::PlatformEvent::park() below:
     OrderAccess::fence() ;
     // If we encounter a nearly simultanous timeout expiry and unpark()
     // we return OS_OK indicating we awoke via unpark().
@@ -4630,25 +4617,25 @@
 
 void os::PlatformEvent::unpark() {
   guarantee (_ParkHandle != NULL, "Invariant") ;
-  int v ;
-  for (;;) {
-      v = _Event ;      // Increment _Event if it's < 1.
-      if (v > 0) {
-         // If it's already signaled just return.
-         // The LD of _Event could have reordered or be satisfied
-         // by a read-aside from this processor's write buffer.
-         // To avoid problems execute a barrier and then
-         // ratify the value.  A degenerate CAS() would also work.
-         // Viz., CAS (v+0, &_Event, v) == v).
-         OrderAccess::fence() ;
-         if (_Event == v) return ;
-         continue ;
-      }
-      if (Atomic::cmpxchg (v+1, &_Event, v) == v) break ;
-  }
-  if (v < 0) {
-     ::SetEvent (_ParkHandle) ;
-  }
+
+  // Transitions for _Event:
+  //    0 :=> 1
+  //    1 :=> 1
+  //   -1 :=> either 0 or 1; must signal target thread
+  //          That is, we can safely transition _Event from -1 to either
+  //          0 or 1. Forcing 1 is slightly more efficient for back-to-back
+  //          unpark() calls.
+  // See also: "Semaphores in Plan 9" by Mullender & Cox
+  //
+  // Note: Forcing a transition from "-1" to "1" on an unpark() means
+  // that it will take two back-to-back park() calls for the owning
+  // thread to block. This has the benefit of forcing a spurious return
+  // from the first park() call after an unpark() call which will help
+  // shake out uses of park() and unpark() without condition variables.
+
+  if (Atomic::xchg(1, &_Event) >= 0) return;
+
+  ::SetEvent(_ParkHandle);
 }
 
 
--- a/src/os/windows/vm/os_windows.inline.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/windows/vm/os_windows.inline.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -26,9 +26,10 @@
 #define OS_WINDOWS_VM_OS_WINDOWS_INLINE_HPP
 
 #include "runtime/atomic.hpp"
+#include "runtime/atomic.inline.hpp"
 #include "runtime/os.hpp"
+
 #ifdef TARGET_OS_ARCH_windows_x86
-# include "atomic_windows_x86.inline.hpp"
 # include "orderAccess_windows_x86.inline.hpp"
 #endif
 
@@ -37,9 +38,6 @@
 inline const char* os::path_separator()                { return ";"; }
 inline const char* os::dll_file_extension()            { return ".dll"; }
 
-inline const char* os::jlong_format_specifier()        { return "%I64d"; }
-inline const char* os::julong_format_specifier()       { return "%I64u"; }
-
 inline const int os::default_file_open_flags() { return O_BINARY | O_NOINHERIT;}
 
 // File names are case-insensitive on windows only
--- a/src/os/windows/vm/perfMemory_windows.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/windows/vm/perfMemory_windows.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -30,6 +30,7 @@
 #include "os_windows.inline.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/perfMemory.hpp"
+#include "services/memTracker.hpp"
 #include "utilities/exceptions.hpp"
 
 #include <windows.h>
@@ -1496,6 +1497,10 @@
   // clear the shared memory region
   (void)memset(mapAddress, '\0', size);
 
+  // it does not go through os api, the operation has to record from here
+  MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC);
+  MemTracker::record_virtual_memory_type((address)mapAddress, mtInternal);
+
   return (char*) mapAddress;
 }
 
@@ -1672,6 +1677,11 @@
               "Could not map PerfMemory");
   }
 
+  // it does not go through os api, the operation has to record from here
+  MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC);
+  MemTracker::record_virtual_memory_type((address)mapAddress, mtInternal);
+
+
   *addrp = (char*)mapAddress;
   *sizep = size;
 
@@ -1824,6 +1834,8 @@
   }
 
   remove_file_mapping(addr);
+  // it does not go through os api, the operation has to record from here
+  MemTracker::record_virtual_memory_release((address)addr, bytes);
 }
 
 char* PerfMemory::backing_store_filename() {
--- a/src/os/windows/vm/threadCritical_windows.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/windows/vm/threadCritical_windows.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,8 +23,8 @@
  */
 
 #include "precompiled.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/threadCritical.hpp"
-#include "thread_windows.inline.hpp"
 
 // OS-includes here
 # include <windows.h>
--- a/src/os/windows/vm/thread_windows.inline.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os/windows/vm/thread_windows.inline.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -25,6 +25,10 @@
 #ifndef OS_WINDOWS_VM_THREAD_WINDOWS_INLINE_HPP
 #define OS_WINDOWS_VM_THREAD_WINDOWS_INLINE_HPP
 
+#ifndef SHARE_VM_RUNTIME_THREAD_INLINE_HPP_SCOPE
+#error "This file should only be included from thread.inline.hpp"
+#endif
+
 #include "runtime/atomic.hpp"
 #include "runtime/prefetch.hpp"
 #include "runtime/thread.hpp"
--- a/src/os_cpu/bsd_x86/vm/assembler_bsd_x86.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/bsd_x86/vm/assembler_bsd_x86.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,8 +23,8 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
 #include "runtime/os.hpp"
 #include "runtime/threadLocalStorage.hpp"
 
--- a/src/os_cpu/bsd_x86/vm/bytes_bsd_x86.inline.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/bsd_x86/vm/bytes_bsd_x86.inline.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -25,10 +25,6 @@
 #ifndef OS_CPU_BSD_X86_VM_BYTES_BSD_X86_INLINE_HPP
 #define OS_CPU_BSD_X86_VM_BYTES_BSD_X86_INLINE_HPP
 
-#ifndef _ALLBSD_SOURCE
-#include <byteswap.h>
-#endif
-
 #ifdef __APPLE__
 #include <libkern/OSByteOrder.h>
 #endif
--- a/src/os_cpu/bsd_x86/vm/globals_bsd_x86.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/bsd_x86/vm/globals_bsd_x86.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -48,7 +48,5 @@
 
 // Used on 64 bit platforms for UseCompressedOops base address or CDS
 define_pd_global(uintx, HeapBaseMinAddress,      2*G);
-// Only used on 64 bit Windows platforms
-define_pd_global(bool, UseVectoredExceptions,    false);
 
 #endif // OS_CPU_BSD_X86_VM_GLOBALS_BSD_X86_HPP
--- a/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,7 +23,7 @@
  */
 
 // no precompiled headers
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
 #include "classfile/classLoader.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "classfile/vmSymbols.hpp"
@@ -33,7 +33,6 @@
 #include "jvm_bsd.h"
 #include "memory/allocation.inline.hpp"
 #include "mutex_bsd.inline.hpp"
-#include "nativeInst_x86.hpp"
 #include "os_share_bsd.hpp"
 #include "prims/jniFastGetField.hpp"
 #include "prims/jvm.h"
@@ -48,8 +47,8 @@
 #include "runtime/osThread.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/timer.hpp"
-#include "thread_bsd.inline.hpp"
 #include "utilities/events.hpp"
 #include "utilities/vmError.hpp"
 
@@ -76,7 +75,7 @@
 # include <ucontext.h>
 #endif
 
-#if defined(_ALLBSD_SOURCE) && !defined(__APPLE__) && !defined(__NetBSD__)
+#if !defined(__APPLE__) && !defined(__NetBSD__)
 # include <pthread_np.h>
 #endif
 
@@ -489,23 +488,6 @@
           // to handle_unexpected_exception way down below.
           thread->disable_stack_red_zone();
           tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
-#ifndef _ALLBSD_SOURCE
-        } else {
-          // Accessing stack address below sp may cause SEGV if current
-          // thread has MAP_GROWSDOWN stack. This should only happen when
-          // current thread was created by user code with MAP_GROWSDOWN flag
-          // and then attached to VM. See notes in os_bsd.cpp.
-          if (thread->osthread()->expanding_stack() == 0) {
-             thread->osthread()->set_expanding_stack();
-             if (os::Bsd::manually_expand_stack(thread, addr)) {
-               thread->osthread()->clear_expanding_stack();
-               return 1;
-             }
-             thread->osthread()->clear_expanding_stack();
-          } else {
-             fatal("recursive segv. expanding stack.");
-          }
-#endif
         }
       }
     }
@@ -744,61 +726,21 @@
   ShouldNotReachHere();
 }
 
-#ifdef _ALLBSD_SOURCE
 // From solaris_i486.s ported to bsd_i486.s
 extern "C" void fixcw();
-#endif
 
 void os::Bsd::init_thread_fpu_state(void) {
 #ifndef AMD64
-# ifdef _ALLBSD_SOURCE
   // Set fpu to 53 bit precision. This happens too early to use a stub.
   fixcw();
-# else
-  // set fpu to 53 bit precision
-  set_fpu_control_word(0x27f);
-# endif
 #endif // !AMD64
 }
 
-#ifndef _ALLBSD_SOURCE
-int os::Bsd::get_fpu_control_word(void) {
-#ifdef AMD64
-  return 0;
-#else
-  int fpu_control;
-  _FPU_GETCW(fpu_control);
-  return fpu_control & 0xffff;
-#endif // AMD64
-}
-
-void os::Bsd::set_fpu_control_word(int fpu_control) {
-#ifndef AMD64
-  _FPU_SETCW(fpu_control);
-#endif // !AMD64
-}
-#endif
 
 // Check that the bsd kernel version is 2.4 or higher since earlier
 // versions do not support SSE without patches.
 bool os::supports_sse() {
-#if defined(AMD64) || defined(_ALLBSD_SOURCE)
   return true;
-#else
-  struct utsname uts;
-  if( uname(&uts) != 0 ) return false; // uname fails?
-  char *minor_string;
-  int major = strtol(uts.release,&minor_string,10);
-  int minor = strtol(minor_string+1,NULL,10);
-  bool result = (major > 2 || (major==2 && minor >= 4));
-#ifndef PRODUCT
-  if (PrintMiscellaneous && Verbose) {
-    tty->print("OS version is %d.%d, which %s support SSE/SSE2\n",
-               major,minor, result ? "DOES" : "does NOT");
-  }
-#endif
-  return result;
-#endif // AMD64
 }
 
 bool os::is_allocatable(size_t bytes) {
@@ -836,46 +778,7 @@
 #define GET_GS() ({int gs; __asm__ volatile("movw %%gs, %w0":"=q"(gs)); gs&0xffff;})
 #endif
 
-#ifdef _ALLBSD_SOURCE
 bool os::Bsd::supports_variable_stack_size() { return true; }
-#else
-// Test if pthread library can support variable thread stack size. BsdThreads
-// in fixed stack mode allocates 2M fixed slot for each thread. BsdThreads
-// in floating stack mode and NPTL support variable stack size.
-bool os::Bsd::supports_variable_stack_size() {
-  if (os::Bsd::is_NPTL()) {
-     // NPTL, yes
-     return true;
-
-  } else {
-    // Note: We can't control default stack size when creating a thread.
-    // If we use non-default stack size (pthread_attr_setstacksize), both
-    // floating stack and non-floating stack BsdThreads will return the
-    // same value. This makes it impossible to implement this function by
-    // detecting thread stack size directly.
-    //
-    // An alternative approach is to check %gs. Fixed-stack BsdThreads
-    // do not use %gs, so its value is 0. Floating-stack BsdThreads use
-    // %gs (either as LDT selector or GDT selector, depending on kernel)
-    // to access thread specific data.
-    //
-    // Note that %gs is a reserved glibc register since early 2001, so
-    // applications are not allowed to change its value (Ulrich Drepper from
-    // Redhat confirmed that all known offenders have been modified to use
-    // either %fs or TSD). In the worst case scenario, when VM is embedded in
-    // a native application that plays with %gs, we might see non-zero %gs
-    // even BsdThreads is running in fixed stack mode. As the result, we'll
-    // return true and skip _thread_safety_check(), so we may not be able to
-    // detect stack-heap collisions. But otherwise it's harmless.
-    //
-#ifdef __GNUC__
-    return (GET_GS() != 0);
-#else
-    return false;
-#endif
-  }
-}
-#endif
 #endif // AMD64
 
 // return default stack size for thr_type
@@ -943,7 +846,7 @@
 
   *bottom = (address)((char *)ss.ss_sp - ss.ss_size);
   *size   = ss.ss_size;
-#elif defined(_ALLBSD_SOURCE)
+#else
   pthread_attr_t attr;
 
   int rslt = pthread_attr_init(&attr);
@@ -963,33 +866,6 @@
   }
 
   pthread_attr_destroy(&attr);
-#else
-  if (os::Bsd::is_initial_thread()) {
-     // initial thread needs special handling because pthread_getattr_np()
-     // may return bogus value.
-     *bottom = os::Bsd::initial_thread_stack_bottom();
-     *size   = os::Bsd::initial_thread_stack_size();
-  } else {
-     pthread_attr_t attr;
-
-     int rslt = pthread_getattr_np(pthread_self(), &attr);
-
-     // JVM needs to know exact stack location, abort if it fails
-     if (rslt != 0) {
-       if (rslt == ENOMEM) {
-         vm_exit_out_of_memory(0, "pthread_getattr_np");
-       } else {
-         fatal(err_msg("pthread_getattr_np failed with errno = %d", rslt));
-       }
-     }
-
-     if (pthread_attr_getstack(&attr, (void **)bottom, size) != 0) {
-         fatal("Can not locate current stack attributes!");
-     }
-
-     pthread_attr_destroy(&attr);
-
-  }
 #endif
   assert(os::current_stack_pointer() >= *bottom &&
          os::current_stack_pointer() < *bottom + *size, "just checking");
--- a/src/os_cpu/bsd_x86/vm/threadLS_bsd_x86.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/bsd_x86/vm/threadLS_bsd_x86.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,8 +23,8 @@
  */
 
 #include "precompiled.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/threadLocalStorage.hpp"
-#include "thread_bsd.inline.hpp"
 
 // Map stack pointer (%esp) to thread pointer for faster TLS access
 //
--- a/src/os_cpu/bsd_x86/vm/thread_bsd_x86.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/bsd_x86/vm/thread_bsd_x86.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -24,7 +24,7 @@
 
 #include "precompiled.hpp"
 #include "runtime/frame.inline.hpp"
-#include "thread_bsd.inline.hpp"
+#include "runtime/thread.inline.hpp"
 
 // For Forte Analyzer AsyncGetCallTrace profiling support - thread is
 // currently interrupted by SIGPROF
--- a/src/os_cpu/bsd_x86/vm/vmStructs_bsd_x86.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/bsd_x86/vm/vmStructs_bsd_x86.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -29,37 +29,26 @@
 // constants required by the Serviceability Agent. This file is
 // referenced by vmStructs.cpp.
 
-#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field, last_entry) \
+#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
                                                                                                                                      \
   /******************************/                                                                                                   \
   /* Threads (NOTE: incomplete) */                                                                                                   \
   /******************************/                                                                                                   \
   nonstatic_field(OSThread,                      _thread_id,                                      OSThread::thread_id_t)             \
-  nonstatic_field(OSThread,                      _pthread_id,                                     pthread_t)                         \
-  /* This must be the last entry, and must be present */                                                                             \
-  last_entry()
+  nonstatic_field(OSThread,                      _pthread_id,                                     pthread_t)
 
 
-#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type, last_entry) \
+#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
                                                                           \
   /**********************/                                                \
   /* Posix Thread IDs   */                                                \
   /**********************/                                                \
                                                                           \
   declare_unsigned_integer_type(OSThread::thread_id_t)                    \
-  declare_unsigned_integer_type(pthread_t)                                \
-                                                                          \
-  /* This must be the last entry, and must be present */                  \
-  last_entry()
+  declare_unsigned_integer_type(pthread_t)
 
-#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant, last_entry) \
-                                                                        \
-  /* This must be the last entry, and must be present */                \
-  last_entry()
+#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
 
-#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant, last_entry) \
-                                                                        \
-  /* This must be the last entry, and must be present */                \
-  last_entry()
+#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
 
 #endif // OS_CPU_BSD_X86_VM_VMSTRUCTS_BSD_X86_HPP
--- a/src/os_cpu/bsd_zero/vm/globals_bsd_zero.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/bsd_zero/vm/globals_bsd_zero.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -41,7 +41,6 @@
 define_pd_global(intx,  CompilerThreadStackSize, 0);
 define_pd_global(uintx, JVMInvokeMethodSlack,    8192);
 
-define_pd_global(bool,  UseVectoredExceptions,   false);
 // Used on 64 bit platforms for UseCompressedOops base address or CDS
 define_pd_global(uintx, HeapBaseMinAddress,      2*G);
 
--- a/src/os_cpu/bsd_zero/vm/os_bsd_zero.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/bsd_zero/vm/os_bsd_zero.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,7 +23,7 @@
  *
  */
 
-#if defined(_ALLBSD_SOURCE) && !defined(__APPLE__) && !defined(__NetBSD__)
+#if !defined(__APPLE__) && !defined(__NetBSD__)
 #include <pthread.h>
 # include <pthread_np.h> /* For pthread_attr_get_np */
 #endif
@@ -54,8 +54,8 @@
 #include "runtime/osThread.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/timer.hpp"
-#include "thread_bsd.inline.hpp"
 #include "utilities/events.hpp"
 #include "utilities/vmError.hpp"
 
@@ -178,26 +178,6 @@
           thread->disable_stack_red_zone();
           ShouldNotCallThis();
         }
-#ifndef _ALLBSD_SOURCE
-        else {
-          // Accessing stack address below sp may cause SEGV if
-          // current thread has MAP_GROWSDOWN stack. This should
-          // only happen when current thread was created by user
-          // code with MAP_GROWSDOWN flag and then attached to VM.
-          // See notes in os_bsd.cpp.
-          if (thread->osthread()->expanding_stack() == 0) {
-            thread->osthread()->set_expanding_stack();
-            if (os::Bsd::manually_expand_stack(thread, addr)) {
-              thread->osthread()->clear_expanding_stack();
-              return true;
-            }
-            thread->osthread()->clear_expanding_stack();
-          }
-          else {
-            fatal("recursive segv. expanding stack.");
-          }
-        }
-#endif
       }
     }
 
@@ -266,16 +246,6 @@
   // Nothing to do
 }
 
-#ifndef _ALLBSD_SOURCE
-int os::Bsd::get_fpu_control_word() {
-  ShouldNotCallThis();
-}
-
-void os::Bsd::set_fpu_control_word(int fpu) {
-  ShouldNotCallThis();
-}
-#endif
-
 bool os::is_allocatable(size_t bytes) {
 #ifdef _LP64
   return true;
@@ -339,7 +309,7 @@
   stack_top = (address) ss.ss_sp;
   stack_bytes  = ss.ss_size;
   stack_bottom = stack_top - stack_bytes;
-#elif defined(_ALLBSD_SOURCE)
+#else
   pthread_attr_t attr;
 
   int rslt = pthread_attr_init(&attr);
@@ -362,67 +332,6 @@
   pthread_attr_destroy(&attr);
 
   stack_top = stack_bottom + stack_bytes;
-#else /* Linux */
-  pthread_attr_t attr;
-  int res = pthread_getattr_np(pthread_self(), &attr);
-  if (res != 0) {
-    if (res == ENOMEM) {
-      vm_exit_out_of_memory(0, "pthread_getattr_np");
-    }
-    else {
-      fatal(err_msg("pthread_getattr_np failed with errno = " INT32_FORMAT,
-            res));
-    }
-  }
-
-  res = pthread_attr_getstack(&attr, (void **) &stack_bottom, &stack_bytes);
-  if (res != 0) {
-    fatal(err_msg("pthread_attr_getstack failed with errno = " INT32_FORMAT,
-          res));
-  }
-  stack_top = stack_bottom + stack_bytes;
-
-  // The block of memory returned by pthread_attr_getstack() includes
-  // guard pages where present.  We need to trim these off.
-  size_t page_bytes = os::Bsd::page_size();
-  assert(((intptr_t) stack_bottom & (page_bytes - 1)) == 0, "unaligned stack");
-
-  size_t guard_bytes;
-  res = pthread_attr_getguardsize(&attr, &guard_bytes);
-  if (res != 0) {
-    fatal(err_msg(
-        "pthread_attr_getguardsize failed with errno = " INT32_FORMAT, res));
-  }
-  int guard_pages = align_size_up(guard_bytes, page_bytes) / page_bytes;
-  assert(guard_bytes == guard_pages * page_bytes, "unaligned guard");
-
-#ifdef IA64
-  // IA64 has two stacks sharing the same area of memory, a normal
-  // stack growing downwards and a register stack growing upwards.
-  // Guard pages, if present, are in the centre.  This code splits
-  // the stack in two even without guard pages, though in theory
-  // there's nothing to stop us allocating more to the normal stack
-  // or more to the register stack if one or the other were found
-  // to grow faster.
-  int total_pages = align_size_down(stack_bytes, page_bytes) / page_bytes;
-  stack_bottom += (total_pages - guard_pages) / 2 * page_bytes;
-#endif // IA64
-
-  stack_bottom += guard_bytes;
-
-  pthread_attr_destroy(&attr);
-
-  // The initial thread has a growable stack, and the size reported
-  // by pthread_attr_getstack is the maximum size it could possibly
-  // be given what currently mapped.  This can be huge, so we cap it.
-  if (os::Bsd::is_initial_thread()) {
-    stack_bytes = stack_top - stack_bottom;
-
-    if (stack_bytes > JavaThread::stack_size_at_create())
-      stack_bytes = JavaThread::stack_size_at_create();
-
-    stack_bottom = stack_top - stack_bytes;
-  }
 #endif
 
   assert(os::current_stack_pointer() >= stack_bottom, "should do");
--- a/src/os_cpu/bsd_zero/vm/threadLS_bsd_zero.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/bsd_zero/vm/threadLS_bsd_zero.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -24,8 +24,8 @@
  */
 
 #include "precompiled.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/threadLocalStorage.hpp"
-#include "thread_bsd.inline.hpp"
 
 void ThreadLocalStorage::generate_code_for_get_thread() {
   // nothing to do
--- a/src/os_cpu/bsd_zero/vm/thread_bsd_zero.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/bsd_zero/vm/thread_bsd_zero.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -25,7 +25,7 @@
 
 #include "precompiled.hpp"
 #include "runtime/frame.inline.hpp"
-#include "thread_bsd.inline.hpp"
+#include "runtime/thread.inline.hpp"
 
 void JavaThread::cache_global_variables() {
   // nothing to do
--- a/src/os_cpu/bsd_zero/vm/vmStructs_bsd_zero.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/bsd_zero/vm/vmStructs_bsd_zero.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -30,21 +30,13 @@
 // constants required by the Serviceability Agent. This file is
 // referenced by vmStructs.cpp.
 
-#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field, last_entry) \
-  /* This must be the last entry, and must be present */                \
-  last_entry()
+#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)
 
 
-#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type, last_entry) \
-  /* This must be the last entry, and must be present */                \
-  last_entry()
+#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
 
-#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant, last_entry) \
-  /* This must be the last entry, and must be present */                \
-  last_entry()
+#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
 
-#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant, last_entry) \
-  /* This must be the last entry, and must be present */                \
-  last_entry()
+#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
 
 #endif // OS_CPU_BSD_ZERO_VM_VMSTRUCTS_BSD_ZERO_HPP
--- a/src/os_cpu/linux_sparc/vm/assembler_linux_sparc.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/linux_sparc/vm/assembler_linux_sparc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,8 +23,7 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
-#include "assembler_sparc.inline.hpp"
+#include "asm/macroAssembler.hpp"
 #include "runtime/os.hpp"
 #include "runtime/threadLocalStorage.hpp"
 
--- a/src/os_cpu/linux_sparc/vm/globals_linux_sparc.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/linux_sparc/vm/globals_linux_sparc.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -35,7 +35,5 @@
 
 // Used on 64 bit platforms for UseCompressedOops base address or CDS
 define_pd_global(uintx, HeapBaseMinAddress,      CONST64(4)*G);
-// Only used on 64 bit Windows platforms
-define_pd_global(bool, UseVectoredExceptions, false);
 
 #endif // OS_CPU_LINUX_SPARC_VM_GLOBALS_LINUX_SPARC_HPP
--- a/src/os_cpu/linux_sparc/vm/os_linux_sparc.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/linux_sparc/vm/os_linux_sparc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,7 +23,7 @@
  */
 
 // no precompiled headers
-#include "assembler_sparc.inline.hpp"
+#include "asm/macroAssembler.hpp"
 #include "classfile/classLoader.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "classfile/vmSymbols.hpp"
@@ -48,8 +48,8 @@
 #include "runtime/osThread.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/timer.hpp"
-#include "thread_linux.inline.hpp"
 #include "utilities/events.hpp"
 #include "utilities/vmError.hpp"
 
--- a/src/os_cpu/linux_sparc/vm/threadLS_linux_sparc.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/linux_sparc/vm/threadLS_linux_sparc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,8 +23,8 @@
  */
 
 #include "precompiled.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/threadLocalStorage.hpp"
-#include "thread_linux.inline.hpp"
 
 void ThreadLocalStorage::generate_code_for_get_thread() {
 }
--- a/src/os_cpu/linux_sparc/vm/thread_linux_sparc.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/linux_sparc/vm/thread_linux_sparc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -24,7 +24,7 @@
 
 #include "precompiled.hpp"
 #include "runtime/frame.inline.hpp"
-#include "thread_linux.inline.hpp"
+#include "runtime/thread.inline.hpp"
 
 // For Forte Analyzer AsyncGetCallTrace profiling support - thread is
 // currently interrupted by SIGPROF
--- a/src/os_cpu/linux_sparc/vm/vmStructs_linux_sparc.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/linux_sparc/vm/vmStructs_linux_sparc.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -29,7 +29,7 @@
 // constants required by the Serviceability Agent. This file is
 // referenced by vmStructs.cpp.
 
-#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field, last_entry) \
+#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
                                                                                                                                      \
   /******************************/                                                                                                   \
   /* Threads (NOTE: incomplete) */                                                                                                   \
@@ -37,38 +37,27 @@
                                                                                                                                      \
   nonstatic_field(JavaThread,                  _base_of_stack_pointer,                        intptr_t*)                             \
   nonstatic_field(OSThread,                    _thread_id,                                    OSThread::thread_id_t)                 \
-  nonstatic_field(OSThread,                    _pthread_id,                                   pthread_t)                             \
-  /* This must be the last entry, and must be present */                                                                             \
-  last_entry()
+  nonstatic_field(OSThread,                    _pthread_id,                                   pthread_t)
 
 
-#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type, last_entry) \
+#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
                                                                           \
   /**********************/                                                \
   /* POSIX Thread IDs */                                                  \
   /**********************/                                                \
                                                                           \
   declare_integer_type(OSThread::thread_id_t)                             \
-  declare_unsigned_integer_type(pthread_t)                                \
-                                                                          \
-  /* This must be the last entry, and must be present */                  \
-  last_entry()
+  declare_unsigned_integer_type(pthread_t)
 
 
-#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant, last_entry) \
+#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \
                                                                         \
   /************************/                                            \
   /* JavaThread constants */                                            \
   /************************/                                            \
                                                                         \
-  declare_constant(JavaFrameAnchor::flushed)                            \
-                                                                        \
-  /* This must be the last entry, and must be present */                \
-  last_entry()
+  declare_constant(JavaFrameAnchor::flushed)
 
-#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant, last_entry) \
-                                                                        \
-  /* This must be the last entry, and must be present */                \
-  last_entry()
+#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
 
 #endif // OS_CPU_LINUX_SPARC_VM_VMSTRUCTS_LINUX_SPARC_HPP
--- a/src/os_cpu/linux_x86/vm/assembler_linux_x86.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/linux_x86/vm/assembler_linux_x86.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,8 +23,8 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
 #include "runtime/os.hpp"
 #include "runtime/threadLocalStorage.hpp"
 
--- a/src/os_cpu/linux_x86/vm/globals_linux_x86.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/linux_x86/vm/globals_linux_x86.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -46,7 +46,5 @@
 
 // Used on 64 bit platforms for UseCompressedOops base address or CDS
 define_pd_global(uintx,HeapBaseMinAddress,       2*G);
-// Only used on 64 bit Windows platforms
-define_pd_global(bool, UseVectoredExceptions,    false);
 
 #endif // OS_CPU_LINUX_X86_VM_GLOBALS_LINUX_X86_HPP
--- a/src/os_cpu/linux_x86/vm/os_linux_x86.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/linux_x86/vm/os_linux_x86.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,7 +23,7 @@
  */
 
 // no precompiled headers
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
 #include "classfile/classLoader.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "classfile/vmSymbols.hpp"
@@ -33,7 +33,6 @@
 #include "jvm_linux.h"
 #include "memory/allocation.inline.hpp"
 #include "mutex_linux.inline.hpp"
-#include "nativeInst_x86.hpp"
 #include "os_share_linux.hpp"
 #include "prims/jniFastGetField.hpp"
 #include "prims/jvm.h"
@@ -48,8 +47,8 @@
 #include "runtime/osThread.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/timer.hpp"
-#include "thread_linux.inline.hpp"
 #include "utilities/events.hpp"
 #include "utilities/vmError.hpp"
 
--- a/src/os_cpu/linux_x86/vm/threadLS_linux_x86.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/linux_x86/vm/threadLS_linux_x86.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,8 +23,8 @@
  */
 
 #include "precompiled.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/threadLocalStorage.hpp"
-#include "thread_linux.inline.hpp"
 
 // Map stack pointer (%esp) to thread pointer for faster TLS access
 //
--- a/src/os_cpu/linux_x86/vm/thread_linux_x86.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/linux_x86/vm/thread_linux_x86.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -24,7 +24,7 @@
 
 #include "precompiled.hpp"
 #include "runtime/frame.inline.hpp"
-#include "thread_linux.inline.hpp"
+#include "runtime/thread.inline.hpp"
 
 // For Forte Analyzer AsyncGetCallTrace profiling support - thread is
 // currently interrupted by SIGPROF
--- a/src/os_cpu/linux_x86/vm/vmStructs_linux_x86.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/linux_x86/vm/vmStructs_linux_x86.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -29,37 +29,26 @@
 // constants required by the Serviceability Agent. This file is
 // referenced by vmStructs.cpp.
 
-#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field, last_entry) \
+#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
                                                                                                                                      \
   /******************************/                                                                                                   \
   /* Threads (NOTE: incomplete) */                                                                                                   \
   /******************************/                                                                                                   \
   nonstatic_field(OSThread,                      _thread_id,                                      OSThread::thread_id_t)             \
-  nonstatic_field(OSThread,                      _pthread_id,                                     pthread_t)                         \
-  /* This must be the last entry, and must be present */                                                                             \
-  last_entry()
+  nonstatic_field(OSThread,                      _pthread_id,                                     pthread_t)
 
 
-#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type, last_entry) \
+#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
                                                                           \
   /**********************/                                                \
   /* Posix Thread IDs   */                                                \
   /**********************/                                                \
                                                                           \
   declare_integer_type(OSThread::thread_id_t)                             \
-  declare_unsigned_integer_type(pthread_t)                                \
-                                                                          \
-  /* This must be the last entry, and must be present */                  \
-  last_entry()
+  declare_unsigned_integer_type(pthread_t)
 
-#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant, last_entry) \
-                                                                        \
-  /* This must be the last entry, and must be present */                \
-  last_entry()
+#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
 
-#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant, last_entry) \
-                                                                        \
-  /* This must be the last entry, and must be present */                \
-  last_entry()
+#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
 
 #endif // OS_CPU_LINUX_X86_VM_VMSTRUCTS_LINUX_X86_HPP
--- a/src/os_cpu/linux_zero/vm/globals_linux_zero.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/linux_zero/vm/globals_linux_zero.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -41,7 +41,6 @@
 define_pd_global(intx,  CompilerThreadStackSize, 0);
 define_pd_global(uintx, JVMInvokeMethodSlack,    8192);
 
-define_pd_global(bool,  UseVectoredExceptions,   false);
 // Used on 64 bit platforms for UseCompressedOops base address or CDS
 define_pd_global(uintx, HeapBaseMinAddress,      2*G);
 
--- a/src/os_cpu/linux_zero/vm/os_linux_zero.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/linux_zero/vm/os_linux_zero.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -49,8 +49,8 @@
 #include "runtime/osThread.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/timer.hpp"
-#include "thread_linux.inline.hpp"
 #include "utilities/events.hpp"
 #include "utilities/vmError.hpp"
 
--- a/src/os_cpu/linux_zero/vm/threadLS_linux_zero.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/linux_zero/vm/threadLS_linux_zero.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -24,8 +24,8 @@
  */
 
 #include "precompiled.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/threadLocalStorage.hpp"
-#include "thread_linux.inline.hpp"
 
 void ThreadLocalStorage::generate_code_for_get_thread() {
   // nothing to do
--- a/src/os_cpu/linux_zero/vm/thread_linux_zero.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/linux_zero/vm/thread_linux_zero.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -25,7 +25,7 @@
 
 #include "precompiled.hpp"
 #include "runtime/frame.inline.hpp"
-#include "thread_linux.inline.hpp"
+#include "runtime/thread.inline.hpp"
 
 void JavaThread::cache_global_variables() {
   // nothing to do
--- a/src/os_cpu/linux_zero/vm/vmStructs_linux_zero.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/linux_zero/vm/vmStructs_linux_zero.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -30,21 +30,12 @@
 // constants required by the Serviceability Agent. This file is
 // referenced by vmStructs.cpp.
 
-#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field, last_entry) \
-  /* This must be the last entry, and must be present */                \
-  last_entry()
+#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)
 
+#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
 
-#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type, last_entry) \
-  /* This must be the last entry, and must be present */                \
-  last_entry()
+#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
 
-#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant, last_entry) \
-  /* This must be the last entry, and must be present */                \
-  last_entry()
-
-#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant, last_entry) \
-  /* This must be the last entry, and must be present */                \
-  last_entry()
+#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
 
 #endif // OS_CPU_LINUX_ZERO_VM_VMSTRUCTS_LINUX_ZERO_HPP
--- a/src/os_cpu/solaris_sparc/vm/assembler_solaris_sparc.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/solaris_sparc/vm/assembler_solaris_sparc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,8 +23,7 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
-#include "assembler_sparc.inline.hpp"
+#include "asm/macroAssembler.inline.hpp"
 #include "runtime/os.hpp"
 #include "runtime/threadLocalStorage.hpp"
 
--- a/src/os_cpu/solaris_sparc/vm/globals_solaris_sparc.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/solaris_sparc/vm/globals_solaris_sparc.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -39,8 +39,6 @@
 #else
 define_pd_global(uintx, HeapBaseMinAddress,      2*G);
 #endif
-// Only used on 64 bit Windows platforms
-define_pd_global(bool, UseVectoredExceptions,    false);
 
 
 
--- a/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,7 +23,7 @@
  */
 
 // no precompiled headers
-#include "assembler_sparc.inline.hpp"
+#include "asm/macroAssembler.hpp"
 #include "classfile/classLoader.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "classfile/vmSymbols.hpp"
@@ -48,8 +48,8 @@
 #include "runtime/osThread.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/timer.hpp"
-#include "thread_solaris.inline.hpp"
 #include "utilities/events.hpp"
 #include "utilities/vmError.hpp"
 
--- a/src/os_cpu/solaris_sparc/vm/threadLS_solaris_sparc.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/solaris_sparc/vm/threadLS_solaris_sparc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,8 +23,8 @@
  */
 
 #include "precompiled.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/threadLocalStorage.hpp"
-#include "thread_solaris.inline.hpp"
 
 // Provides an entry point we can link against and
 // a buffer we can emit code into. The buffer is
--- a/src/os_cpu/solaris_sparc/vm/thread_solaris_sparc.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/solaris_sparc/vm/thread_solaris_sparc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -24,7 +24,7 @@
 
 #include "precompiled.hpp"
 #include "runtime/frame.inline.hpp"
-#include "thread_solaris.inline.hpp"
+#include "runtime/thread.inline.hpp"
 
 // For Forte Analyzer AsyncGetCallTrace profiling support - thread is
 // currently interrupted by SIGPROF
--- a/src/os_cpu/solaris_sparc/vm/vmStructs_solaris_sparc.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/solaris_sparc/vm/vmStructs_solaris_sparc.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -29,44 +29,32 @@
 // constants required by the Serviceability Agent. This file is
 // referenced by vmStructs.cpp.
 
-#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field, last_entry) \
+#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
                                                                                                                                      \
   /******************************/                                                                                                   \
   /* Threads (NOTE: incomplete) */                                                                                                   \
   /******************************/                                                                                                   \
                                                                                                                                      \
   nonstatic_field(JavaThread,                  _base_of_stack_pointer,                        intptr_t*)                             \
-  nonstatic_field(OSThread,                    _thread_id,                                    OSThread::thread_id_t)                 \
-  /* This must be the last entry, and must be present */                                                                             \
-  last_entry()
+  nonstatic_field(OSThread,                    _thread_id,                                    OSThread::thread_id_t)
 
-
-#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type, last_entry) \
+#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
                                                                           \
   /**********************/                                                \
   /* Solaris Thread IDs */                                                \
   /**********************/                                                \
                                                                           \
-  declare_unsigned_integer_type(OSThread::thread_id_t)                    \
-                                                                          \
-  /* This must be the last entry, and must be present */                  \
-  last_entry()
+  declare_unsigned_integer_type(OSThread::thread_id_t)
 
 
-#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant, last_entry) \
+#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \
                                                                         \
   /************************/                                            \
   /* JavaThread constants */                                            \
   /************************/                                            \
                                                                         \
-  declare_constant(JavaFrameAnchor::flushed)                            \
-                                                                        \
-  /* This must be the last entry, and must be present */                \
-  last_entry()
+  declare_constant(JavaFrameAnchor::flushed)
 
-#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant, last_entry) \
-                                                                        \
-  /* This must be the last entry, and must be present */                \
-  last_entry()
+#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
 
 #endif // OS_CPU_SOLARIS_SPARC_VM_VMSTRUCTS_SOLARIS_SPARC_HPP
--- a/src/os_cpu/solaris_x86/vm/assembler_solaris_x86.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/solaris_x86/vm/assembler_solaris_x86.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,8 +23,8 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
 #include "runtime/os.hpp"
 #include "runtime/threadLocalStorage.hpp"
 
@@ -116,7 +116,7 @@
   ThreadLocalStorage::pd_tlsAccessMode tlsMode = ThreadLocalStorage::pd_getTlsAccessMode ();
   if (tlsMode == ThreadLocalStorage::pd_tlsAccessIndirect) {            // T1
      // Use thread as a temporary: mov r, gs:[0]; mov r, [r+tlsOffset]
-     emit_byte (segment);
+     emit_int8 (segment);
      // ExternalAddress doesn't work because it can't take NULL
      AddressLiteral null(0, relocInfo::none);
      movptr (thread, null);
@@ -125,7 +125,7 @@
   } else
   if (tlsMode == ThreadLocalStorage::pd_tlsAccessDirect) {              // T2
      // mov r, gs:[tlsOffset]
-     emit_byte (segment);
+     emit_int8 (segment);
      AddressLiteral tls_off((address)ThreadLocalStorage::pd_getTlsOffset(), relocInfo::none);
      movptr (thread, tls_off);
      return ;
--- a/src/os_cpu/solaris_x86/vm/globals_solaris_x86.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/solaris_x86/vm/globals_solaris_x86.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -45,7 +45,5 @@
 
 // Used on 64 bit platforms for UseCompressedOops base address or CDS
 define_pd_global(uintx,HeapBaseMinAddress,       256*M);
-// Only used on 64 bit Windows platforms
-define_pd_global(bool, UseVectoredExceptions,    false);
 
 #endif // OS_CPU_SOLARIS_X86_VM_GLOBALS_SOLARIS_X86_HPP
--- a/src/os_cpu/solaris_x86/vm/orderAccess_solaris_x86.inline.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/solaris_x86/vm/orderAccess_solaris_x86.inline.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -27,6 +27,7 @@
 
 #include "runtime/atomic.hpp"
 #include "runtime/orderAccess.hpp"
+#include "runtime/os.hpp"
 #include "vm_version_x86.hpp"
 
 // Implementation of class OrderAccess.
--- a/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,7 +23,7 @@
  */
 
 // no precompiled headers
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
 #include "classfile/classLoader.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "classfile/vmSymbols.hpp"
@@ -33,7 +33,6 @@
 #include "jvm_solaris.h"
 #include "memory/allocation.inline.hpp"
 #include "mutex_solaris.inline.hpp"
-#include "nativeInst_x86.hpp"
 #include "os_share_solaris.hpp"
 #include "prims/jniFastGetField.hpp"
 #include "prims/jvm.h"
@@ -48,8 +47,8 @@
 #include "runtime/osThread.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/timer.hpp"
-#include "thread_solaris.inline.hpp"
 #include "utilities/events.hpp"
 #include "utilities/vmError.hpp"
 
--- a/src/os_cpu/solaris_x86/vm/threadLS_solaris_x86.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/solaris_x86/vm/threadLS_solaris_x86.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,8 +23,8 @@
  */
 
 #include "precompiled.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/threadLocalStorage.hpp"
-#include "thread_solaris.inline.hpp"
 
 #ifdef AMD64
 extern "C" Thread*  fs_load(ptrdiff_t tlsOffset);
--- a/src/os_cpu/solaris_x86/vm/thread_solaris_x86.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/solaris_x86/vm/thread_solaris_x86.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -24,7 +24,7 @@
 
 #include "precompiled.hpp"
 #include "runtime/frame.inline.hpp"
-#include "thread_solaris.inline.hpp"
+#include "runtime/thread.inline.hpp"
 
 // For Forte Analyzer AsyncGetCallTrace profiling support - thread is
 // currently interrupted by SIGPROF
--- a/src/os_cpu/solaris_x86/vm/vmStructs_solaris_x86.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/solaris_x86/vm/vmStructs_solaris_x86.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -29,36 +29,24 @@
 // constants required by the Serviceability Agent. This file is
 // referenced by vmStructs.cpp.
 
-#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field, last_entry) \
+#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
                                                                                                                                      \
   /******************************/                                                                                                   \
   /* Threads (NOTE: incomplete) */                                                                                                   \
   /******************************/                                                                                                   \
                                                                                                                                      \
-  nonstatic_field(OSThread,                      _thread_id,                                    OSThread::thread_id_t)               \
-                                                                                                                                     \
-  /* This must be the last entry, and must be present */                                                                             \
-  last_entry()
+  nonstatic_field(OSThread,                      _thread_id,                                    OSThread::thread_id_t)
 
-#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type, last_entry) \
+#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
                                                                           \
   /**********************/                                                \
   /* Solaris Thread IDs */                                                \
   /**********************/                                                \
                                                                           \
-  declare_unsigned_integer_type(OSThread::thread_id_t)                    \
-                                                                          \
-  /* This must be the last entry, and must be present */                  \
-  last_entry()
+  declare_unsigned_integer_type(OSThread::thread_id_t)
 
-#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant, last_entry) \
-                                                                        \
-  /* This must be the last entry, and must be present */                \
-  last_entry()
+#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
 
-#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant, last_entry) \
-                                                                        \
-  /* This must be the last entry, and must be present */                \
-  last_entry()
+#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
 
 #endif // OS_CPU_SOLARIS_X86_VM_VMSTRUCTS_SOLARIS_X86_HPP
--- a/src/os_cpu/windows_x86/vm/assembler_windows_x86.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/windows_x86/vm/assembler_windows_x86.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,14 +23,14 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
 #include "runtime/os.hpp"
 #include "runtime/threadLocalStorage.hpp"
 
 
 void MacroAssembler::int3() {
-  emit_byte(0xCC);
+  emit_int8((unsigned char)0xCC);
 }
 
 #ifndef _LP64
--- a/src/os_cpu/windows_x86/vm/globals_windows_x86.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/windows_x86/vm/globals_windows_x86.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -47,7 +47,5 @@
 
 // Used on 64 bit platforms for UseCompressedOops base address or CDS
 define_pd_global(uintx, HeapBaseMinAddress,      2*G);
-// Only used on 64 bit Windows platforms
-define_pd_global(bool, UseVectoredExceptions,    false);
 
 #endif // OS_CPU_WINDOWS_X86_VM_GLOBALS_WINDOWS_X86_HPP
--- a/src/os_cpu/windows_x86/vm/os_windows_x86.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/windows_x86/vm/os_windows_x86.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,7 +23,7 @@
  */
 
 // no precompiled headers
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
 #include "classfile/classLoader.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "classfile/vmSymbols.hpp"
@@ -48,8 +48,8 @@
 #include "runtime/osThread.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/timer.hpp"
-#include "thread_windows.inline.hpp"
 #include "utilities/events.hpp"
 #include "utilities/vmError.hpp"
 
@@ -175,9 +175,6 @@
   PRUNTIME_FUNCTION prt;
   PUNWIND_INFO_EH_ONLY punwind;
 
-  // If we are using Vectored Exceptions we don't need this registration
-  if (UseVectoredExceptions) return true;
-
   BufferBlob* blob = BufferBlob::create("CodeCache Exception Handler", sizeof(DynamicCodeData));
   CodeBuffer cb(blob);
   MacroAssembler* masm = new MacroAssembler(&cb);
--- a/src/os_cpu/windows_x86/vm/threadLS_windows_x86.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/windows_x86/vm/threadLS_windows_x86.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,8 +23,8 @@
  */
 
 #include "precompiled.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/threadLocalStorage.hpp"
-#include "thread_windows.inline.hpp"
 
 // Provides an entry point we can link against and
 // a buffer we can emit code into. The buffer is
--- a/src/os_cpu/windows_x86/vm/thread_windows_x86.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/windows_x86/vm/thread_windows_x86.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -24,7 +24,7 @@
 
 #include "precompiled.hpp"
 #include "runtime/frame.inline.hpp"
-#include "thread_windows.inline.hpp"
+#include "runtime/thread.inline.hpp"
 
 // For Forte Analyzer AsyncGetCallTrace profiling support - thread is
 // currently interrupted by SIGPROF
--- a/src/os_cpu/windows_x86/vm/vmStructs_windows_x86.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/os_cpu/windows_x86/vm/vmStructs_windows_x86.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -29,32 +29,21 @@
 // constants required by the Serviceability Agent. This file is
 // referenced by vmStructs.cpp.
 
-#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field, last_entry) \
+#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
                                                                                                                                      \
   /******************************/                                                                                                   \
   /* Threads (NOTE: incomplete) */                                                                                                   \
   /******************************/                                                                                                   \
                                                                                                                                      \
   nonstatic_field(OSThread,                    _thread_id,                                    OSThread::thread_id_t)                 \
-  unchecked_nonstatic_field(OSThread,          _thread_handle,                                sizeof(HANDLE)) /* NOTE: no type */    \
-                                                                                                                                     \
-  /* This must be the last entry, and must be present */                                                                             \
-  last_entry()
+  unchecked_nonstatic_field(OSThread,          _thread_handle,                                sizeof(HANDLE)) /* NOTE: no type */
 
-#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type, last_entry) \
+#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
                                                                           \
-  declare_unsigned_integer_type(OSThread::thread_id_t)                    \
-  /* This must be the last entry, and must be present */                  \
-  last_entry()
+  declare_unsigned_integer_type(OSThread::thread_id_t)
 
-#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant, last_entry) \
-                                                                        \
-  /* This must be the last entry, and must be present */                \
-  last_entry()
+#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
 
-#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant, last_entry) \
-                                                                        \
-  /* This must be the last entry, and must be present */                \
-  last_entry()
+#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
 
 #endif // OS_CPU_WINDOWS_X86_VM_VMSTRUCTS_WINDOWS_X86_HPP
--- a/src/share/tools/LogCompilation/README	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/tools/LogCompilation/README	Sun Feb 03 22:43:57 2013 +0100
@@ -13,6 +13,6 @@
 
 More information about the LogCompilation output can be found at 
 
-http://wikis.sun.com/display/HotSpotInternals/LogCompilation+overview
-http://wikis.sun.com/display/HotSpotInternals/PrintCompilation
-http://wikis.sun.com/display/HotSpotInternals/LogCompilation+tool
+https://wikis.oracle.com/display/HotSpotInternals/LogCompilation+overview
+https://wikis.oracle.com/display/HotSpotInternals/PrintCompilation
+https://wikis.oracle.com/display/HotSpotInternals/LogCompilation+tool
--- a/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/CallSite.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/CallSite.java	Sun Feb 03 22:43:57 2013 +0100
@@ -38,6 +38,7 @@
     private String reason;
     private List<CallSite> calls;
     private int endNodes;
+    private int endLiveNodes;
     private double timeStamp;
 
     CallSite() {
@@ -106,7 +107,7 @@
             }
         }
         if (getEndNodes() > 0) {
-          stream.printf(" (end time: %6.4f nodes: %d)", getTimeStamp(), getEndNodes());
+            stream.printf(" (end time: %6.4f nodes: %d live: %d)", getTimeStamp(), getEndNodes(), getEndLiveNodes());
         }
         stream.println("");
         if (getReceiver() != null) {
@@ -195,6 +196,14 @@
         return endNodes;
     }
 
+    void setEndLiveNodes(int n) {
+        endLiveNodes = n;
+    }
+
+    public int getEndLiveNodes() {
+        return endLiveNodes;
+    }
+
     void setTimeStamp(double time) {
         timeStamp = time;
     }
--- a/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/LogCompilation.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/LogCompilation.java	Sun Feb 03 22:43:57 2013 +0100
@@ -37,13 +37,13 @@
 public class LogCompilation extends DefaultHandler implements ErrorHandler, Constants {
 
     public static void usage(int exitcode) {
-        System.out.println("Usage: LogCompilation [ -v ] [ -c ] [ -s ] [ -e | -N ] file1 ...");
+        System.out.println("Usage: LogCompilation [ -v ] [ -c ] [ -s ] [ -e | -n ] file1 ...");
         System.out.println("  -c:   clean up malformed 1.5 xml");
         System.out.println("  -i:   print inlining decisions");
         System.out.println("  -S:   print compilation statistics");
         System.out.println("  -s:   sort events by start time");
         System.out.println("  -e:   sort events by elapsed time");
-        System.out.println("  -N:   sort events by name and start");
+        System.out.println("  -n:   sort events by name and start");
         System.exit(exitcode);
     }
 
@@ -137,7 +137,11 @@
                         v2 = Integer.valueOf(0);
                     }
                     phaseNodes.put(phase.getName(), Integer.valueOf(v2.intValue() + phase.getNodes()));
-                    out.printf("\t%s %6.4f %d %d\n", phase.getName(), phase.getElapsedTime(), phase.getStartNodes(), phase.getNodes());
+                    /* Print phase name, elapsed time, nodes at the start of the phase,
+                       nodes created in the phase, live nodes at the start of the phase,
+                       live nodes added in the phase.
+                    */
+                    out.printf("\t%s %6.4f %d %d %d %d\n", phase.getName(), phase.getElapsedTime(), phase.getStartNodes(), phase.getNodes(), phase.getStartLiveNodes(), phase.getLiveNodes());
                 }
             } else if (e instanceof MakeNotEntrantEvent) {
                 MakeNotEntrantEvent mne = (MakeNotEntrantEvent) e;
--- a/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/LogParser.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/LogParser.java	Sun Feb 03 22:43:57 2013 +0100
@@ -224,7 +224,6 @@
         throw new InternalError("can't find " + name);
     }
     int indent = 0;
-    String compile_id;
 
     String type(String id) {
         String result = types.get(id);
@@ -268,12 +267,18 @@
         if (qname.equals("phase")) {
             Phase p = new Phase(search(atts, "name"),
                     Double.parseDouble(search(atts, "stamp")),
-                    Integer.parseInt(search(atts, "nodes")));
+                    Integer.parseInt(search(atts, "nodes", "0")),
+                    Integer.parseInt(search(atts, "live")));
             phaseStack.push(p);
         } else if (qname.equals("phase_done")) {
             Phase p = phaseStack.pop();
-            p.setEndNodes(Integer.parseInt(search(atts, "nodes")));
+            if (! p.getId().equals(search(atts, "name"))) {
+                System.out.println("phase: " + p.getId());
+                throw new InternalError("phase name mismatch");
+            }
             p.setEnd(Double.parseDouble(search(atts, "stamp")));
+            p.setEndNodes(Integer.parseInt(search(atts, "nodes", "0")));
+            p.setEndLiveNodes(Integer.parseInt(search(atts, "live")));
             compile.getPhases().add(p);
         } else if (qname.equals("task")) {
             compile = new Compilation(Integer.parseInt(search(atts, "compile_id", "-1")));
@@ -317,13 +322,16 @@
             m.setName(search(atts, "name"));
             m.setReturnType(type(search(atts, "return")));
             m.setArguments(search(atts, "arguments", "void"));
-            m.setBytes(search(atts, "bytes"));
-            m.setIICount(search(atts, "iicount"));
-            m.setFlags(search(atts, "flags"));
+
+            if (search(atts, "unloaded", "0").equals("0")) {
+               m.setBytes(search(atts, "bytes"));
+               m.setIICount(search(atts, "iicount"));
+               m.setFlags(search(atts, "flags"));
+            }
             methods.put(id, m);
         } else if (qname.equals("call")) {
             site = new CallSite(bci, method(search(atts, "method")));
-            site.setCount(Integer.parseInt(search(atts, "count")));
+            site.setCount(Integer.parseInt(search(atts, "count", "0")));
             String receiver = atts.getValue("receiver");
             if (receiver != null) {
                 site.setReceiver(type(receiver));
@@ -406,6 +414,7 @@
         } else if (qname.equals("parse_done")) {
             CallSite call = scopes.pop();
             call.setEndNodes(Integer.parseInt(search(atts, "nodes", "1")));
+            call.setEndLiveNodes(Integer.parseInt(search(atts, "live", "1")));
             call.setTimeStamp(Double.parseDouble(search(atts, "stamp")));
             scopes.push(call);
         }
--- a/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/Phase.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/Phase.java	Sun Feb 03 22:43:57 2013 +0100
@@ -30,10 +30,13 @@
 
     private final int startNodes;
     private int endNodes;
+    private final int startLiveNodes;
+    private int endLiveNodes;
 
-    Phase(String n, double s, int nodes) {
+    Phase(String n, double s, int nodes, int live) {
         super(s, n);
         startNodes = nodes;
+        startLiveNodes = live;
     }
 
     int getNodes() {
@@ -55,6 +58,22 @@
     public int getEndNodes() {
         return endNodes;
     }
+    /* Number of live nodes added by the phase */
+    int getLiveNodes() {
+        return getEndLiveNodes() - getStartLiveNodes();
+    }
+
+    void setEndLiveNodes(int n) {
+        endLiveNodes = n;
+    }
+
+    public int getStartLiveNodes() {
+        return startLiveNodes;
+    }
+
+    public int getEndLiveNodes() {
+        return endLiveNodes;
+    }
 
     @Override
     public void print(PrintStream stream) {
--- a/src/share/tools/ProjectCreator/ProjectCreator.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/tools/ProjectCreator/ProjectCreator.java	Sun Feb 03 22:43:57 2013 +0100
@@ -36,7 +36,7 @@
             + "into .dsp file, substituting for path given in "
             + "-sourceBase. Example: HotSpotWorkSpace>");
       System.err.println("  -dllLoc <path to directory in which to put "
-            + "jvm.dll and jvm_g.dll; no trailing slash>");
+            + "jvm.dll; no trailing slash>");
       System.err.println("  If any of the above are specified, "
             + "they must all be.");
       System.err.println("  Additional, optional arguments, which can be "
--- a/src/share/tools/hsdis/hsdis-demo.c	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/tools/hsdis/hsdis-demo.c	Sun Feb 03 22:43:57 2013 +0100
@@ -85,9 +85,11 @@
 
 #include "dlfcn.h"
 
-#define DECODE_INSTRUCTIONS_NAME "decode_instructions_virtual"
+#define DECODE_INSTRUCTIONS_VIRTUAL_NAME "decode_instructions_virtual"
+#define DECODE_INSTRUCTIONS_NAME "decode_instructions"
 #define HSDIS_NAME               "hsdis"
 static void* decode_instructions_pv = 0;
+static void* decode_instructions_sv = 0;
 static const char* hsdis_path[] = {
   HSDIS_NAME"-"LIBARCH LIB_EXT,
   "./" HSDIS_NAME"-"LIBARCH LIB_EXT,
@@ -101,11 +103,12 @@
   void* dllib = NULL;
   const char* *next_in_path = hsdis_path;
   while (1) {
-    decode_instructions_pv = dlsym(dllib, DECODE_INSTRUCTIONS_NAME);
-    if (decode_instructions_pv != NULL)
+    decode_instructions_pv = dlsym(dllib, DECODE_INSTRUCTIONS_VIRTUAL_NAME);
+    decode_instructions_sv = dlsym(dllib, DECODE_INSTRUCTIONS_NAME);
+    if (decode_instructions_pv != NULL || decode_instructions_sv != NULL)
       return NULL;
     if (dllib != NULL)
-      return "plugin does not defined "DECODE_INSTRUCTIONS_NAME;
+      return "plugin does not defined "DECODE_INSTRUCTIONS_VIRTUAL_NAME" and "DECODE_INSTRUCTIONS_NAME;
     for (dllib = NULL; dllib == NULL; ) {
       const char* next_lib = (*next_in_path++);
       if (next_lib == NULL)
@@ -213,20 +216,44 @@
     printf("%s: %s\n", err, dlerror());
     exit(1);
   }
-  printf("Decoding from %p to %p...\n", from, to);
-  decode_instructions_ftype decode_instructions
-    = (decode_instructions_ftype) decode_instructions_pv;
+  decode_func_vtype decode_instructions_v
+    = (decode_func_vtype) decode_instructions_pv;
+  decode_func_stype decode_instructions_s
+    = (decode_func_stype) decode_instructions_sv;
   void* res;
-  if (raw && xml) {
-    res = (*decode_instructions)(from, to, (unsigned char*)from, to - from, simple_handle_event, stdout, NULL, stdout, options);
-  } else if (raw) {
-    res = (*decode_instructions)(from, to, (unsigned char*)from, to - from, simple_handle_event, stdout, NULL, stdout, options);
-  } else {
-    res = (*decode_instructions)(from, to, (unsigned char*)from, to - from,
-                                 handle_event, (void*) event_cookie,
-                                 fprintf_callback, stdout,
-                                 options);
+  if (decode_instructions_pv != NULL) {
+    printf("\nDecoding from %p to %p...with %s\n", from, to, DECODE_INSTRUCTIONS_VIRTUAL_NAME);
+    if (raw) {
+      res = (*decode_instructions_v)(from, to,
+                                     (unsigned char*)from, to - from,
+                                     simple_handle_event, stdout,
+                                     NULL, stdout,
+                                     options, 0);
+    } else {
+      res = (*decode_instructions_v)(from, to,
+                                    (unsigned char*)from, to - from,
+                                     handle_event, (void*) event_cookie,
+                                     fprintf_callback, stdout,
+                                     options, 0);
+    }
+    if (res != (void*)to)
+      printf("*** Result was %p!\n", res);
   }
-  if (res != (void*)to)
-    printf("*** Result was %p!\n", res);
+  void* sres;
+  if (decode_instructions_sv != NULL) {
+    printf("\nDecoding from %p to %p...with old decode_instructions\n", from, to, DECODE_INSTRUCTIONS_NAME);
+    if (raw) {
+      sres = (*decode_instructions_s)(from, to,
+                                      simple_handle_event, stdout,
+                                      NULL, stdout,
+                                      options);
+    } else {
+      sres = (*decode_instructions_s)(from, to,
+                                      handle_event, (void*) event_cookie,
+                                      fprintf_callback, stdout,
+                                      options);
+    }
+    if (sres != (void *)to)
+      printf("*** Result of decode_instructions %p!\n", sres);
+  }
 }
--- a/src/share/tools/hsdis/hsdis.c	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/tools/hsdis/hsdis.c	Sun Feb 03 22:43:57 2013 +0100
@@ -99,7 +99,7 @@
                             unsigned char* buffer, uintptr_t length,
                             event_callback_t  event_callback_arg,  void* event_stream_arg,
                             printf_callback_t printf_callback_arg, void* printf_stream_arg,
-                            const char* options) {
+                            const char* options, int newline) {
   struct hsdis_app_data app_data;
   memset(&app_data, 0, sizeof(app_data));
   app_data.start_va    = start_va;
@@ -110,7 +110,7 @@
   app_data.event_stream    = event_stream_arg;
   app_data.printf_callback = printf_callback_arg;
   app_data.printf_stream   = printf_stream_arg;
-  app_data.do_newline = false;
+  app_data.do_newline = newline == 0 ? false : true;
 
   return decode(&app_data, options);
 }
@@ -132,7 +132,7 @@
                              event_stream_arg,
                              printf_callback_arg,
                              printf_stream_arg,
-                             options);
+                             options, false);
 }
 
 static void* decode(struct hsdis_app_data* app_data, const char* options) {
@@ -173,7 +173,7 @@
       if (!app_data->losing) {
         const char* insn_close = format_insn_close("/insn", &app_data->dinfo,
                                                    buf, sizeof(buf));
-        (*event_callback)(event_stream, insn_close, (void*) p) != NULL;
+        (*event_callback)(event_stream, insn_close, (void*) p);
 
         if (app_data->do_newline) {
           /* follow each complete insn by a nice newline */
@@ -182,13 +182,14 @@
       }
     }
 
-    (*event_callback)(event_stream, "/insns", (void*) p);
+    if (app_data->losing) (*event_callback)(event_stream, "/insns", (void*) p);
     return (void*) p;
   }
 }
 
 /* take the address of the function, for luck, and also test the typedef: */
-const decode_instructions_ftype decode_instructions_address = &decode_instructions_virtual;
+const decode_func_vtype decode_func_virtual_address = &decode_instructions_virtual;
+const decode_func_stype decode_func_address = &decode_instructions;
 
 static const char* format_insn_close(const char* close,
                                      disassemble_info* dinfo,
--- a/src/share/tools/hsdis/hsdis.h	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/tools/hsdis/hsdis.h	Sun Feb 03 22:43:57 2013 +0100
@@ -47,6 +47,9 @@
    where tag is a simple identifier, signifying (as in XML) a element start,
    element end, and standalone element.  (To render as XML, add angle brackets.)
 */
+#ifndef SHARED_TOOLS_HSDIS_H
+#define SHARED_TOOLS_HSDIS_H
+
 extern
 #ifdef DLL_EXPORT
   DLL_EXPORT
@@ -57,16 +60,37 @@
                                   void* event_stream,
                                   int (*printf_callback)(void*, const char*, ...),
                                   void* printf_stream,
-                                  const char* options);
+                                  const char* options,
+                                  int newline /* bool value for nice new line */);
+
+/* This is the compatability interface for older versions of hotspot */
+extern
+#ifdef DLL_ENTRY
+  DLL_ENTRY
+#endif
+void* decode_instructions(void* start_pv, void* end_pv,
+                    void* (*event_callback)(void*, const char*, void*),
+                    void* event_stream,
+                    int   (*printf_callback)(void*, const char*, ...),
+                    void* printf_stream,
+                    const char* options);
 
 /* convenience typedefs */
 
 typedef void* (*decode_instructions_event_callback_ftype)  (void*, const char*, void*);
 typedef int   (*decode_instructions_printf_callback_ftype) (void*, const char*, ...);
-typedef void* (*decode_instructions_ftype) (uintptr_t start_va, uintptr_t end_va,
-                                            unsigned char* buffer, uintptr_t length,
-                                            decode_instructions_event_callback_ftype event_callback,
-                                            void* event_stream,
-                                            decode_instructions_printf_callback_ftype printf_callback,
-                                            void* printf_stream,
-                                            const char* options);
+typedef void* (*decode_func_vtype) (uintptr_t start_va, uintptr_t end_va,
+                                    unsigned char* buffer, uintptr_t length,
+                                    decode_instructions_event_callback_ftype event_callback,
+                                    void* event_stream,
+                                    decode_instructions_printf_callback_ftype printf_callback,
+                                    void* printf_stream,
+                                    const char* options,
+                                    int newline);
+typedef void* (*decode_func_stype) (void* start_pv, void* end_pv,
+                                    decode_instructions_event_callback_ftype event_callback,
+                                    void* event_stream,
+                                    decode_instructions_printf_callback_ftype printf_callback,
+                                    void* printf_stream,
+                                    const char* options);
+#endif /* SHARED_TOOLS_HSDIS_H */
--- a/src/share/tools/launcher/java.c	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/tools/launcher/java.c	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -808,7 +808,7 @@
 static int
 parse_stack_size(const char *s, jlong *result) {
   jlong n = 0;
-  int args_read = sscanf(s, jlong_format_specifier(), &n);
+  int args_read = sscanf(s, JLONG_FORMAT, &n);
   if (args_read != 1) {
     return 0;
   }
--- a/src/share/tools/launcher/java.h	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/tools/launcher/java.h	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -86,7 +86,6 @@
 jboolean RemovableMachineDependentOption(char * option);
 void PrintMachineDependentOptions();
 
-const char *jlong_format_specifier();
 /*
  * Block current thread and continue execution in new thread
  */
--- a/src/share/vm/adlc/formssel.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/adlc/formssel.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -862,8 +862,10 @@
       ( strcmp(_matrule->_rChild->_opType,"AryEq"     )==0 ||
         strcmp(_matrule->_rChild->_opType,"StrComp"   )==0 ||
         strcmp(_matrule->_rChild->_opType,"StrEquals" )==0 ||
-        strcmp(_matrule->_rChild->_opType,"StrIndexOf")==0 )) {
+        strcmp(_matrule->_rChild->_opType,"StrIndexOf")==0 ||
+        strcmp(_matrule->_rChild->_opType,"EncodeISOArray")==0)) {
         // String.(compareTo/equals/indexOf) and Arrays.equals
+        // and sun.nio.cs.iso8859_1$Encoder.EncodeISOArray
         // take 1 control and 1 memory edges.
     return 2;
   }
--- a/src/share/vm/adlc/main.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/adlc/main.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -212,7 +212,7 @@
   AD.addInclude(AD._CPP_file, "adfiles", get_basename(AD._VM_file._name));
   AD.addInclude(AD._CPP_file, "adfiles", get_basename(AD._HPP_file._name));
   AD.addInclude(AD._CPP_file, "memory/allocation.inline.hpp");
-  AD.addInclude(AD._CPP_file, "asm/assembler.hpp");
+  AD.addInclude(AD._CPP_file, "asm/macroAssembler.inline.hpp");
   AD.addInclude(AD._CPP_file, "code/vmreg.hpp");
   AD.addInclude(AD._CPP_file, "gc_interface/collectedHeap.inline.hpp");
   AD.addInclude(AD._CPP_file, "oops/compiledICHolder.hpp");
@@ -231,17 +231,14 @@
   AD.addInclude(AD._CPP_file, "runtime/stubRoutines.hpp");
   AD.addInclude(AD._CPP_file, "utilities/growableArray.hpp");
 #ifdef TARGET_ARCH_x86
-  AD.addInclude(AD._CPP_file, "assembler_x86.inline.hpp");
   AD.addInclude(AD._CPP_file, "nativeInst_x86.hpp");
   AD.addInclude(AD._CPP_file, "vmreg_x86.inline.hpp");
 #endif
 #ifdef TARGET_ARCH_sparc
-  AD.addInclude(AD._CPP_file, "assembler_sparc.inline.hpp");
   AD.addInclude(AD._CPP_file, "nativeInst_sparc.hpp");
   AD.addInclude(AD._CPP_file, "vmreg_sparc.inline.hpp");
 #endif
 #ifdef TARGET_ARCH_arm
-  AD.addInclude(AD._CPP_file, "assembler_arm.inline.hpp");
   AD.addInclude(AD._CPP_file, "nativeInst_arm.hpp");
   AD.addInclude(AD._CPP_file, "vmreg_arm.inline.hpp");
 #endif
--- a/src/share/vm/asm/assembler.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/asm/assembler.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,26 +23,13 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
-#include "asm/assembler.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
 #include "asm/codeBuffer.hpp"
+#include "runtime/atomic.hpp"
+#include "runtime/atomic.inline.hpp"
 #include "runtime/icache.hpp"
 #include "runtime/os.hpp"
-#ifdef TARGET_ARCH_x86
-# include "assembler_x86.inline.hpp"
-#endif
-#ifdef TARGET_ARCH_sparc
-# include "assembler_sparc.inline.hpp"
-#endif
-#ifdef TARGET_ARCH_zero
-# include "assembler_zero.inline.hpp"
-#endif
-#ifdef TARGET_ARCH_arm
-# include "assembler_arm.inline.hpp"
-#endif
-#ifdef TARGET_ARCH_ppc
-# include "assembler_ppc.inline.hpp"
-#endif
 
 
 // Implementation of AbstractAssembler
@@ -56,16 +43,13 @@
   if (code == NULL)  return;
   CodeSection* cs = code->insts();
   cs->clear_mark();   // new assembler kills old mark
-  _code_section = cs;
-  _code_begin  = cs->start();
-  _code_limit  = cs->limit();
-  _code_pos    = cs->end();
-  _oop_recorder= code->oop_recorder();
-  DEBUG_ONLY( _short_branch_delta = 0; )
-  if (_code_begin == NULL)  {
+  if (cs->start() == NULL)  {
     vm_exit_out_of_memory(0, err_msg("CodeCache: no room for %s",
                                      code->name()));
   }
+  _code_section = cs;
+  _oop_recorder= code->oop_recorder();
+  DEBUG_ONLY( _short_branch_delta = 0; )
 }
 
 void AbstractAssembler::set_code_section(CodeSection* cs) {
@@ -73,9 +57,6 @@
   assert(cs->is_allocated(), "need to pre-allocate this section");
   cs->clear_mark();  // new assembly into this section kills old mark
   _code_section = cs;
-  _code_begin  = cs->start();
-  _code_limit  = cs->limit();
-  _code_pos    = cs->end();
 }
 
 // Inform CodeBuffer that incoming code and relocation will be for stubs
@@ -83,7 +64,6 @@
   CodeBuffer*  cb = code();
   CodeSection* cs = cb->stubs();
   assert(_code_section == cb->insts(), "not in insts?");
-  sync();
   if (cs->maybe_expand_to_ensure_remaining(required_space)
       && cb->blob() == NULL) {
     return NULL;
@@ -96,7 +76,6 @@
 // Should not be called if start_a_stub() returned NULL
 void AbstractAssembler::end_a_stub() {
   assert(_code_section == code()->stubs(), "not in stubs?");
-  sync();
   set_code_section(code()->insts());
 }
 
@@ -104,8 +83,7 @@
 address AbstractAssembler::start_a_const(int required_space, int required_align) {
   CodeBuffer*  cb = code();
   CodeSection* cs = cb->consts();
-  assert(_code_section == cb->insts(), "not in insts?");
-  sync();
+  assert(_code_section == cb->insts() || _code_section == cb->stubs(), "not in insts/stubs?");
   address end = cs->end();
   int pad = -(intptr_t)end & (required_align-1);
   if (cs->maybe_expand_to_ensure_remaining(pad + required_space)) {
@@ -121,50 +99,16 @@
 }
 
 // Inform CodeBuffer that incoming code and relocation will be code
-// Should not be called if start_a_const() returned NULL
-void AbstractAssembler::end_a_const() {
+// in section cs (insts or stubs).
+void AbstractAssembler::end_a_const(CodeSection* cs) {
   assert(_code_section == code()->consts(), "not in consts?");
-  sync();
-  set_code_section(code()->insts());
-}
-
-
-void AbstractAssembler::flush() {
-  sync();
-  ICache::invalidate_range(addr_at(0), offset());
-}
-
-
-void AbstractAssembler::a_byte(int x) {
-  emit_byte(x);
+  set_code_section(cs);
 }
 
-
-void AbstractAssembler::a_long(jint x) {
-  emit_long(x);
+void AbstractAssembler::flush() {
+  ICache::invalidate_range(addr_at(0), offset());
 }
 
-// Labels refer to positions in the (to be) generated code.  There are bound
-// and unbound
-//
-// Bound labels refer to known positions in the already generated code.
-// offset() is the position the label refers to.
-//
-// Unbound labels refer to unknown positions in the code to be generated; it
-// may contain a list of unresolved displacements that refer to it
-#ifndef PRODUCT
-void AbstractAssembler::print(Label& L) {
-  if (L.is_bound()) {
-    tty->print_cr("bound label to %d|%d", L.loc_pos(), L.loc_sect());
-  } else if (L.is_unbound()) {
-    L.print_instructions((MacroAssembler*)this);
-  } else {
-    tty->print_cr("label in inconsistent state (loc = %d)", L.loc());
-  }
-}
-#endif // PRODUCT
-
-
 void AbstractAssembler::bind(Label& L) {
   if (L.is_bound()) {
     // Assembler can bind a label more than once to the same place.
@@ -367,28 +311,3 @@
 #endif
   return offset < 0 || os::vm_page_size() <= offset;
 }
-
-#ifndef PRODUCT
-void Label::print_instructions(MacroAssembler* masm) const {
-  CodeBuffer* cb = masm->code();
-  for (int i = 0; i < _patch_index; ++i) {
-    int branch_loc;
-    if (i >= PatchCacheSize) {
-      branch_loc = _patch_overflow->at(i - PatchCacheSize);
-    } else {
-      branch_loc = _patches[i];
-    }
-    int branch_pos  = CodeBuffer::locator_pos(branch_loc);
-    int branch_sect = CodeBuffer::locator_sect(branch_loc);
-    address branch = cb->locator_address(branch_loc);
-    tty->print_cr("unbound label");
-    tty->print("@ %d|%d ", branch_pos, branch_sect);
-    if (branch_sect == CodeBuffer::SECT_CONSTS) {
-      tty->print_cr(PTR_FORMAT, *(address*)branch);
-      continue;
-    }
-    masm->pd_print_patched_instruction(branch);
-    tty->cr();
-  }
-}
-#endif // ndef PRODUCT
--- a/src/share/vm/asm/assembler.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/asm/assembler.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -25,12 +25,14 @@
 #ifndef SHARE_VM_ASM_ASSEMBLER_HPP
 #define SHARE_VM_ASM_ASSEMBLER_HPP
 
+#include "asm/codeBuffer.hpp"
 #include "code/oopRecorder.hpp"
 #include "code/relocInfo.hpp"
 #include "memory/allocation.hpp"
 #include "utilities/debug.hpp"
 #include "utilities/growableArray.hpp"
 #include "utilities/top.hpp"
+
 #ifdef TARGET_ARCH_x86
 # include "register_x86.hpp"
 # include "vm_version_x86.hpp"
@@ -54,7 +56,6 @@
 
 // This file contains platform-independent assembler declarations.
 
-class CodeBuffer;
 class MacroAssembler;
 class AbstractAssembler;
 class Label;
@@ -122,7 +123,7 @@
     assert(_loc == -1, "already bound");
     _loc = loc;
   }
-  void bind_loc(int pos, int sect);  // = bind_loc(locator(pos, sect))
+  void bind_loc(int pos, int sect) { bind_loc(CodeBuffer::locator(pos, sect)); }
 
 #ifndef PRODUCT
   // Iterates over all unresolved instructions for printing
@@ -137,8 +138,8 @@
     assert(_loc >= 0, "unbound label");
     return _loc;
   }
-  int loc_pos() const;   // == locator_pos(loc())
-  int loc_sect() const;  // == locator_sect(loc())
+  int loc_pos()  const { return CodeBuffer::locator_pos(loc()); }
+  int loc_sect() const { return CodeBuffer::locator_sect(loc()); }
 
   bool is_bound() const    { return _loc >=  0; }
   bool is_unbound() const  { return _loc == -1 && _patch_index > 0; }
@@ -201,27 +202,20 @@
 
  protected:
   CodeSection* _code_section;          // section within the code buffer
-  address      _code_begin;            // first byte of code buffer
-  address      _code_limit;            // first byte after code buffer
-  address      _code_pos;              // current code generation position
   OopRecorder* _oop_recorder;          // support for relocInfo::oop_type
 
   // Code emission & accessing
-  address addr_at(int pos) const       { return _code_begin + pos; }
+  address addr_at(int pos) const { return code_section()->start() + pos; }
+
 
   // This routine is called with a label is used for an address.
   // Labels and displacements truck in offsets, but target must return a PC.
-  address target(Label& L);            // return _code_section->target(L)
+  address target(Label& L)             { return code_section()->target(L, pc()); }
 
   bool is8bit(int x) const             { return -0x80 <= x && x < 0x80; }
   bool isByte(int x) const             { return 0 <= x && x < 0x100; }
   bool isShiftCount(int x) const       { return 0 <= x && x < 32; }
 
-  void emit_byte(int x);  // emit a single byte
-  void emit_word(int x);  // emit a 16-bit word (not a wordSize word!)
-  void emit_long(jint x); // emit a 32-bit word (not a longSize word!)
-  void emit_address(address x); // emit an address (not a longSize word!)
-
   // Instruction boundaries (required when emitting relocatable values).
   class InstructionMark: public StackObj {
    private:
@@ -237,10 +231,10 @@
     }
   };
   friend class InstructionMark;
-  #ifdef ASSERT
+#ifdef ASSERT
   // Make it return true on platforms which need to verify
   // instruction boundaries for some operations.
-  inline static bool pd_check_instruction_mark();
+  static bool pd_check_instruction_mark();
 
   // Add delta to short branch distance to verify that it still fit into imm8.
   int _short_branch_delta;
@@ -262,28 +256,31 @@
       _assm->clear_short_branch_delta();
     }
   };
-  #else
+#else
   // Dummy in product.
   class ShortBranchVerifier: public StackObj {
    public:
     ShortBranchVerifier(AbstractAssembler* assm) {}
   };
-  #endif
-
-  // Label functions
-  void print(Label& L);
+#endif
 
  public:
 
   // Creation
   AbstractAssembler(CodeBuffer* code);
 
-  // save end pointer back to code buf.
-  void sync();
-
   // ensure buf contains all code (call this before using/copying the code)
   void flush();
 
+  void emit_int8(   int8_t  x) { code_section()->emit_int8(   x); }
+  void emit_int16(  int16_t x) { code_section()->emit_int16(  x); }
+  void emit_int32(  int32_t x) { code_section()->emit_int32(  x); }
+  void emit_int64(  int64_t x) { code_section()->emit_int64(  x); }
+
+  void emit_float(  jfloat  x) { code_section()->emit_float(  x); }
+  void emit_double( jdouble x) { code_section()->emit_double( x); }
+  void emit_address(address x) { code_section()->emit_address(x); }
+
   // min and max values for signed immediate ranges
   static int min_simm(int nbits) { return -(intptr_t(1) << (nbits - 1))    ; }
   static int max_simm(int nbits) { return  (intptr_t(1) << (nbits - 1)) - 1; }
@@ -308,26 +305,29 @@
   static bool is_simm32(intptr_t x) { return is_simm(x, 32); }
 
   // Accessors
-  CodeBuffer*   code() const;          // _code_section->outer()
   CodeSection*  code_section() const   { return _code_section; }
-  int           sect() const;          // return _code_section->index()
-  address       pc() const             { return _code_pos; }
-  int           offset() const         { return _code_pos - _code_begin; }
-  int           locator() const;       // CodeBuffer::locator(offset(), sect())
+  CodeBuffer*   code()         const   { return code_section()->outer(); }
+  int           sect()         const   { return code_section()->index(); }
+  address       pc()           const   { return code_section()->end();   }
+  int           offset()       const   { return code_section()->size();  }
+  int           locator()      const   { return CodeBuffer::locator(offset(), sect()); }
+
   OopRecorder*  oop_recorder() const   { return _oop_recorder; }
   void      set_oop_recorder(OopRecorder* r) { _oop_recorder = r; }
 
-  address  inst_mark() const;
-  void set_inst_mark();
-  void clear_inst_mark();
+  address       inst_mark() const { return code_section()->mark();       }
+  void      set_inst_mark()       {        code_section()->set_mark();   }
+  void    clear_inst_mark()       {        code_section()->clear_mark(); }
 
   // Constants in code
-  void a_byte(int x);
-  void a_long(jint x);
-  void relocate(RelocationHolder const& rspec, int format = 0);
+  void relocate(RelocationHolder const& rspec, int format = 0) {
+    assert(!pd_check_instruction_mark()
+        || inst_mark() == NULL || inst_mark() == code_section()->end(),
+        "call relocate() between instructions");
+    code_section()->relocate(code_section()->end(), rspec, format);
+  }
   void relocate(   relocInfo::relocType rtype, int format = 0) {
-    if (rtype != relocInfo::none)
-      relocate(Relocation::spec_simple(rtype), format);
+    code_section()->relocate(code_section()->end(), rtype, format);
   }
 
   static int code_fill_byte();         // used to pad out odd-sized code buffers
@@ -348,52 +348,55 @@
   void       end_a_stub();
   // Ditto for constants.
   address    start_a_const(int required_space, int required_align = sizeof(double));
-  void       end_a_const();
+  void       end_a_const(CodeSection* cs);  // Pass the codesection to continue in (insts or stubs?).
 
   // constants support
+  //
+  // We must remember the code section (insts or stubs) in c1
+  // so we can reset to the proper section in end_a_const().
   address long_constant(jlong c) {
+    CodeSection* c1 = _code_section;
     address ptr = start_a_const(sizeof(c), sizeof(c));
     if (ptr != NULL) {
-      *(jlong*)ptr = c;
-      _code_pos = ptr + sizeof(c);
-      end_a_const();
+      emit_int64(c);
+      end_a_const(c1);
     }
     return ptr;
   }
   address double_constant(jdouble c) {
+    CodeSection* c1 = _code_section;
     address ptr = start_a_const(sizeof(c), sizeof(c));
     if (ptr != NULL) {
-      *(jdouble*)ptr = c;
-      _code_pos = ptr + sizeof(c);
-      end_a_const();
+      emit_double(c);
+      end_a_const(c1);
     }
     return ptr;
   }
   address float_constant(jfloat c) {
+    CodeSection* c1 = _code_section;
     address ptr = start_a_const(sizeof(c), sizeof(c));
     if (ptr != NULL) {
-      *(jfloat*)ptr = c;
-      _code_pos = ptr + sizeof(c);
-      end_a_const();
+      emit_float(c);
+      end_a_const(c1);
     }
     return ptr;
   }
   address address_constant(address c) {
+    CodeSection* c1 = _code_section;
     address ptr = start_a_const(sizeof(c), sizeof(c));
     if (ptr != NULL) {
-      *(address*)ptr = c;
-      _code_pos = ptr + sizeof(c);
-      end_a_const();
+      emit_address(c);
+      end_a_const(c1);
     }
     return ptr;
   }
   address address_constant(address c, RelocationHolder const& rspec) {
+    CodeSection* c1 = _code_section;
     address ptr = start_a_const(sizeof(c), sizeof(c));
     if (ptr != NULL) {
       relocate(rspec);
-      *(address*)ptr = c;
-      _code_pos = ptr + sizeof(c);
-      end_a_const();
+      emit_address(c);
+      end_a_const(c1);
     }
     return ptr;
   }
@@ -429,15 +432,6 @@
    */
   void pd_patch_instruction(address branch, address target);
 
-#ifndef PRODUCT
-  /**
-   * Platform-dependent method of printing an instruction that needs to be
-   * patched.
-   *
-   * @param branch the instruction to be patched in the buffer.
-   */
-  static void pd_print_patched_instruction(address branch);
-#endif // PRODUCT
 };
 
 #ifdef TARGET_ARCH_x86
--- a/src/share/vm/asm/assembler.inline.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/asm/assembler.inline.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -26,92 +26,21 @@
 #define SHARE_VM_ASM_ASSEMBLER_INLINE_HPP
 
 #include "asm/assembler.hpp"
-#include "asm/codeBuffer.hpp"
-#include "compiler/disassembler.hpp"
-#include "runtime/threadLocalStorage.hpp"
 
-inline void AbstractAssembler::sync() {
-  CodeSection* cs = code_section();
-  guarantee(cs->start() == _code_begin, "must not shift code buffer");
-  cs->set_end(_code_pos);
-}
-
-inline void AbstractAssembler::emit_byte(int x) {
-  assert(isByte(x), "not a byte");
-  *(unsigned char*)_code_pos = (unsigned char)x;
-  _code_pos += sizeof(unsigned char);
-  sync();
-}
-
-
-inline void AbstractAssembler::emit_word(int x) {
-  *(short*)_code_pos = (short)x;
-  _code_pos += sizeof(short);
-  sync();
-}
-
-
-inline void AbstractAssembler::emit_long(jint x) {
-  *(jint*)_code_pos = x;
-  _code_pos += sizeof(jint);
-  sync();
-}
-
-inline void AbstractAssembler::emit_address(address x) {
-  *(address*)_code_pos = x;
-  _code_pos += sizeof(address);
-  sync();
-}
-
-inline address AbstractAssembler::inst_mark() const {
-  return code_section()->mark();
-}
-
-
-inline void AbstractAssembler::set_inst_mark() {
-  code_section()->set_mark();
-}
-
-
-inline void AbstractAssembler::clear_inst_mark() {
-  code_section()->clear_mark();
-}
-
-
-inline void AbstractAssembler::relocate(RelocationHolder const& rspec, int format) {
-  assert(!pd_check_instruction_mark()
-         || inst_mark() == NULL || inst_mark() == _code_pos,
-         "call relocate() between instructions");
-  code_section()->relocate(_code_pos, rspec, format);
-}
-
-
-inline CodeBuffer* AbstractAssembler::code() const {
-  return code_section()->outer();
-}
-
-inline int AbstractAssembler::sect() const {
-  return code_section()->index();
-}
-
-inline int AbstractAssembler::locator() const {
-  return CodeBuffer::locator(offset(), sect());
-}
-
-inline address AbstractAssembler::target(Label& L) {
-  return code_section()->target(L, pc());
-}
-
-inline int Label::loc_pos() const {
-  return CodeBuffer::locator_pos(loc());
-}
-
-inline int Label::loc_sect() const {
-  return CodeBuffer::locator_sect(loc());
-}
-
-inline void Label::bind_loc(int pos, int sect) {
-  bind_loc(CodeBuffer::locator(pos, sect));
-}
+#ifdef TARGET_ARCH_x86
+# include "assembler_x86.inline.hpp"
+#endif
+#ifdef TARGET_ARCH_sparc
+# include "assembler_sparc.inline.hpp"
+#endif
+#ifdef TARGET_ARCH_zero
+# include "assembler_zero.inline.hpp"
+#endif
+#ifdef TARGET_ARCH_arm
+# include "assembler_arm.inline.hpp"
+#endif
+#ifdef TARGET_ARCH_ppc
+# include "assembler_ppc.inline.hpp"
+#endif
 
 #endif // SHARE_VM_ASM_ASSEMBLER_INLINE_HPP
--- a/src/share/vm/asm/codeBuffer.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/asm/codeBuffer.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -254,6 +254,10 @@
   return start + locator_pos(locator);
 }
 
+bool CodeBuffer::is_backward_branch(Label& L) {
+  return L.is_bound() && insts_end() <= locator_address(L.loc());
+}
+
 address CodeBuffer::decode_begin() {
   address begin = _insts.start();
   if (_decode_begin != NULL && _decode_begin > begin)
@@ -492,6 +496,14 @@
   dest->verify_section_allocation();
 }
 
+// Append an oop reference that keeps the class alive.
+static void append_oop_references(GrowableArray<oop>* oops, Klass* k) {
+  oop cl = k->klass_holder();
+  if (cl != NULL && !oops->contains(cl)) {
+    oops->append(cl);
+  }
+}
+
 void CodeBuffer::finalize_oop_references(methodHandle mh) {
   No_Safepoint_Verifier nsv;
 
@@ -509,7 +521,6 @@
         if (md->metadata_is_immediate()) {
           Metadata* m = md->metadata_value();
           if (oop_recorder()->is_real(m)) {
-            oop o = NULL;
             if (m->is_methodData()) {
               m = ((MethodData*)m)->method();
             }
@@ -517,16 +528,13 @@
               m = ((Method*)m)->method_holder();
             }
             if (m->is_klass()) {
-              o = ((Klass*)m)->class_loader();
+              append_oop_references(&oops, (Klass*)m);
             } else {
               // XXX This will currently occur for MDO which don't
               // have a backpointer.  This has to be fixed later.
               m->print();
               ShouldNotReachHere();
             }
-            if (o != NULL && oops.find(o) == -1) {
-              oops.append(o);
-            }
           }
         }
       }
@@ -537,7 +545,6 @@
     for (int i = 0; i < oop_recorder()->metadata_count(); i++) {
       Metadata* m = oop_recorder()->metadata_at(i);
       if (oop_recorder()->is_real(m)) {
-        oop o = NULL;
         if (m->is_methodData()) {
           m = ((MethodData*)m)->method();
         }
@@ -545,24 +552,18 @@
           m = ((Method*)m)->method_holder();
         }
         if (m->is_klass()) {
-          o = ((Klass*)m)->class_loader();
+          append_oop_references(&oops, (Klass*)m);
         } else {
           m->print();
           ShouldNotReachHere();
         }
-        if (o != NULL && oops.find(o) == -1) {
-          oops.append(o);
-        }
       }
     }
 
   }
 
   // Add the class loader of Method* for the nmethod itself
-  oop cl = mh->method_holder()->class_loader();
-  if (cl != NULL) {
-    oops.append(cl);
-  }
+  append_oop_references(&oops, mh->method_holder());
 
   // Add any oops that we've found
   Thread* thread = Thread::current();
@@ -749,7 +750,18 @@
 
     // Make the new code copy use the old copy's relocations:
     dest_cs->initialize_locs_from(cs);
+  }
 
+  // Do relocation after all sections are copied.
+  // This is necessary if the code uses constants in stubs, which are
+  // relocated when the corresponding instruction in the code (e.g., a
+  // call) is relocated. Stubs are placed behind the main code
+  // section, so that section has to be copied before relocating.
+  for (int n = (int) SECT_FIRST; n < (int)SECT_LIMIT; n++) {
+    // pull code out of each section
+    const CodeSection* cs = code_section(n);
+    if (cs->is_empty()) continue;  // skip trivial section
+    CodeSection* dest_cs = dest->code_section(n);
     { // Repair the pc relative information in the code after the move
       RelocIterator iter(dest_cs);
       while (iter.next()) {
@@ -758,7 +770,7 @@
     }
   }
 
-  if (dest->blob() == NULL) {
+  if (dest->blob() == NULL && dest_filled != NULL) {
     // Destination is a final resting place, not just another buffer.
     // Normalize uninitialized bytes in the final padding.
     Copy::fill_to_bytes(dest_filled, dest_end - dest_filled,
--- a/src/share/vm/asm/codeBuffer.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/asm/codeBuffer.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -25,17 +25,15 @@
 #ifndef SHARE_VM_ASM_CODEBUFFER_HPP
 #define SHARE_VM_ASM_CODEBUFFER_HPP
 
-#include "asm/assembler.hpp"
 #include "code/oopRecorder.hpp"
 #include "code/relocInfo.hpp"
 
-class  CodeComments;
-class  AbstractAssembler;
-class  MacroAssembler;
-class  PhaseCFG;
-class  Compile;
-class  BufferBlob;
-class  CodeBuffer;
+class CodeComments;
+class PhaseCFG;
+class Compile;
+class BufferBlob;
+class CodeBuffer;
+class Label;
 
 class CodeOffsets: public StackObj {
 public:
@@ -194,10 +192,14 @@
   }
 
   // Code emission
-  void emit_int8 (int8_t  x) { *((int8_t*)  end()) = x; set_end(end() + 1); }
-  void emit_int16(int16_t x) { *((int16_t*) end()) = x; set_end(end() + 2); }
-  void emit_int32(int32_t x) { *((int32_t*) end()) = x; set_end(end() + 4); }
-  void emit_int64(int64_t x) { *((int64_t*) end()) = x; set_end(end() + 8); }
+  void emit_int8 ( int8_t  x)  { *((int8_t*)  end()) = x; set_end(end() + sizeof(int8_t)); }
+  void emit_int16( int16_t x)  { *((int16_t*) end()) = x; set_end(end() + sizeof(int16_t)); }
+  void emit_int32( int32_t x)  { *((int32_t*) end()) = x; set_end(end() + sizeof(int32_t)); }
+  void emit_int64( int64_t x)  { *((int64_t*) end()) = x; set_end(end() + sizeof(int64_t)); }
+
+  void emit_float( jfloat  x)  { *((jfloat*)  end()) = x; set_end(end() + sizeof(jfloat)); }
+  void emit_double(jdouble x)  { *((jdouble*) end()) = x; set_end(end() + sizeof(jdouble)); }
+  void emit_address(address x) { *((address*) end()) = x; set_end(end() + sizeof(address)); }
 
   // Share a scratch buffer for relocinfo.  (Hacky; saves a resource allocation.)
   void initialize_shared_locs(relocInfo* buf, int length);
@@ -451,6 +453,9 @@
   int        locator(address addr) const;
   address    locator_address(int locator) const;
 
+  // Heuristic for pre-packing the taken/not-taken bit of a predicted branch.
+  bool is_backward_branch(Label& L);
+
   // Properties
   const char* name() const                  { return _name; }
   CodeBuffer* before_expand() const         { return _before_expand; }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/asm/macroAssembler.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_ASM_MACROASSEMBLER_HPP
+#define SHARE_VM_ASM_MACROASSEMBLER_HPP
+
+#include "asm/assembler.hpp"
+
+#ifdef TARGET_ARCH_x86
+# include "macroAssembler_x86.hpp"
+#endif
+#ifdef TARGET_ARCH_sparc
+# include "macroAssembler_sparc.hpp"
+#endif
+#ifdef TARGET_ARCH_zero
+# include "assembler_zero.hpp"
+#endif
+#ifdef TARGET_ARCH_arm
+# include "assembler_arm.hpp"
+#endif
+#ifdef TARGET_ARCH_ppc
+# include "assembler_ppc.hpp"
+#endif
+
+#endif // SHARE_VM_ASM_MACROASSEMBLER_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/asm/macroAssembler.inline.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_ASM_MACROASSEMBLER_INLINE_HPP
+#define SHARE_VM_ASM_MACROASSEMBLER_INLINE_HPP
+
+#include "asm/macroAssembler.hpp"
+
+#ifdef TARGET_ARCH_x86
+// no macroAssembler_x86.inline.hpp
+#endif
+#ifdef TARGET_ARCH_sparc
+# include "macroAssembler_sparc.inline.hpp"
+#endif
+#ifdef TARGET_ARCH_zero
+# include "assembler_zero.inline.hpp"
+#endif
+#ifdef TARGET_ARCH_arm
+# include "assembler_arm.inline.hpp"
+#endif
+#ifdef TARGET_ARCH_ppc
+# include "assembler_ppc.inline.hpp"
+#endif
+
+#endif // SHARE_VM_ASM_MACROASSEMBLER_INLINE_HPP
--- a/src/share/vm/asm/register.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/asm/register.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -93,6 +93,21 @@
 #define REGISTER_DEFINITION(type, name) \
 const type name = ((type)name##_##type##EnumValue)
 
+#ifdef TARGET_ARCH_x86
+# include "register_x86.hpp"
+#endif
+#ifdef TARGET_ARCH_sparc
+# include "register_sparc.hpp"
+#endif
+#ifdef TARGET_ARCH_zero
+# include "register_zero.hpp"
+#endif
+#ifdef TARGET_ARCH_arm
+# include "register_arm.hpp"
+#endif
+#ifdef TARGET_ARCH_ppc
+# include "register_ppc.hpp"
+#endif
 
 
 // Debugging support
--- a/src/share/vm/c1/c1_Compilation.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/c1/c1_Compilation.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -129,7 +129,15 @@
   CHECK_BAILOUT();
 
   // setup ir
+  CompileLog* log = this->log();
+  if (log != NULL) {
+    log->begin_head("parse method='%d' ",
+                    log->identify(_method));
+    log->stamp();
+    log->end_head();
+  }
   _hir = new IR(this, method(), osr_bci());
+  if (log)  log->done("parse");
   if (!_hir->is_valid()) {
     bailout("invalid parsing");
     return;
--- a/src/share/vm/c1/c1_GraphBuilder.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/c1/c1_GraphBuilder.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1836,7 +1836,7 @@
   // check if we could do inlining
   if (!PatchALot && Inline && klass->is_loaded() &&
       (klass->is_initialized() || klass->is_interface() && target->holder()->is_initialized())
-      && target->will_link(klass, callee_holder, code)) {
+      && target->is_loaded()) {
     // callee is known => check if we have static binding
     assert(target->is_loaded(), "callee must be known");
     if (code == Bytecodes::_invokestatic  ||
@@ -1844,17 +1844,12 @@
         code == Bytecodes::_invokevirtual && target->is_final_method() ||
         code == Bytecodes::_invokedynamic) {
       ciMethod* inline_target = (cha_monomorphic_target != NULL) ? cha_monomorphic_target : target;
-      bool success = false;
-      if (target->is_method_handle_intrinsic()) {
-        // method handle invokes
-        success = try_method_handle_inline(target);
-      } else {
-        // static binding => check if callee is ok
-        success = try_inline(inline_target, (cha_monomorphic_target != NULL) || (exact_target != NULL), code, better_receiver);
-      }
+      // static binding => check if callee is ok
+      bool success = try_inline(inline_target, (cha_monomorphic_target != NULL) || (exact_target != NULL), code, better_receiver);
+
       CHECK_BAILOUT();
-
       clear_inline_bailout();
+
       if (success) {
         // Register dependence if JVMTI has either breakpoint
         // setting or hotswapping of methods capabilities since they may
@@ -3201,6 +3196,11 @@
     return false;
   }
 
+  // method handle invokes
+  if (callee->is_method_handle_intrinsic()) {
+    return try_method_handle_inline(callee);
+  }
+
   // handle intrinsics
   if (callee->intrinsic_id() != vmIntrinsics::_none) {
     if (try_inline_intrinsics(callee)) {
@@ -3223,7 +3223,12 @@
   }
   if (try_inline_full(callee, holder_known, bc, receiver))
     return true;
-  print_inlining(callee, _inline_bailout_msg, /*success*/ false);
+
+  // Entire compilation could fail during try_inline_full call.
+  // In that case printing inlining decision info is useless.
+  if (!bailed_out())
+    print_inlining(callee, _inline_bailout_msg, /*success*/ false);
+
   return false;
 }
 
@@ -3442,6 +3447,11 @@
       preserves_state = true;
       break;
 
+    case vmIntrinsics::_loadFence :
+    case vmIntrinsics::_storeFence:
+    case vmIntrinsics::_fullFence :
+      break;
+
     default                       : return false; // do not inline
   }
   // create intrinsic node
@@ -3748,7 +3758,8 @@
   push_scope(callee, cont);
 
   // the BlockListBuilder for the callee could have bailed out
-  CHECK_BAILOUT_(false);
+  if (bailed_out())
+      return false;
 
   // Temporarily set up bytecode stream so we can append instructions
   // (only using the bci of this stream)
@@ -3814,7 +3825,8 @@
   iterate_all_blocks(callee_start_block == NULL);
 
   // If we bailed out during parsing, return immediately (this is bad news)
-  if (bailed_out()) return false;
+  if (bailed_out())
+      return false;
 
   // iterate_all_blocks theoretically traverses in random order; in
   // practice, we have only traversed the continuation if we are
@@ -3823,9 +3835,6 @@
          !continuation()->is_set(BlockBegin::was_visited_flag),
          "continuation should not have been parsed yet if we created it");
 
-  // If we bailed out during parsing, return immediately (this is bad news)
-  CHECK_BAILOUT_(false);
-
   // At this point we are almost ready to return and resume parsing of
   // the caller back in the GraphBuilder. The only thing we want to do
   // first is an optimization: during parsing of the callee we
@@ -3885,10 +3894,14 @@
       ValueType* type = state()->stack_at(args_base)->type();
       if (type->is_constant()) {
         ciMethod* target = type->as_ObjectType()->constant_value()->as_method_handle()->get_vmtarget();
-        guarantee(!target->is_method_handle_intrinsic(), "should not happen");  // XXX remove
-        Bytecodes::Code bc = target->is_static() ? Bytecodes::_invokestatic : Bytecodes::_invokevirtual;
-        if (try_inline(target, /*holder_known*/ true, bc)) {
-          return true;
+        // We don't do CHA here so only inline static and statically bindable methods.
+        if (target->is_static() || target->can_be_statically_bound()) {
+          Bytecodes::Code bc = target->is_static() ? Bytecodes::_invokestatic : Bytecodes::_invokevirtual;
+          if (try_inline(target, /*holder_known*/ true, bc)) {
+            return true;
+          }
+        } else {
+          print_inlining(target, "not static or statically bindable", /*success*/ false);
         }
       } else {
         print_inlining(callee, "receiver not constant", /*success*/ false);
@@ -3941,9 +3954,14 @@
             }
             j += t->size();  // long and double take two slots
           }
-          Bytecodes::Code bc = target->is_static() ? Bytecodes::_invokestatic : Bytecodes::_invokevirtual;
-          if (try_inline(target, /*holder_known*/ true, bc)) {
-            return true;
+          // We don't do CHA here so only inline static and statically bindable methods.
+          if (target->is_static() || target->can_be_statically_bound()) {
+            Bytecodes::Code bc = target->is_static() ? Bytecodes::_invokestatic : Bytecodes::_invokevirtual;
+            if (try_inline(target, /*holder_known*/ true, bc)) {
+              return true;
+            }
+          } else {
+            print_inlining(target, "not static or statically bindable", /*success*/ false);
           }
         }
       } else {
@@ -4157,7 +4175,10 @@
       else
         log->inline_success("receiver is statically known");
     } else {
-      log->inline_fail(msg);
+      if (msg != NULL)
+        log->inline_fail(msg);
+      else
+        log->inline_fail("reason unknown");
     }
   }
 
--- a/src/share/vm/c1/c1_InstructionPrinter.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/c1/c1_InstructionPrinter.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -360,7 +360,7 @@
   ValueType* t = x->type();
   switch (t->tag()) {
     case intTag    : output()->print("%d"  , t->as_IntConstant   ()->value());    break;
-    case longTag   : output()->print(os::jlong_format_specifier(), t->as_LongConstant()->value()); output()->print("L"); break;
+    case longTag   : output()->print(JLONG_FORMAT, t->as_LongConstant()->value()); output()->print("L"); break;
     case floatTag  : output()->print("%g"  , t->as_FloatConstant ()->value());    break;
     case doubleTag : output()->print("%gD" , t->as_DoubleConstant()->value());    break;
     case objectTag : print_object(x);                                        break;
--- a/src/share/vm/c1/c1_LIR.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/c1/c1_LIR.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1563,7 +1563,7 @@
   switch (type()) {
     case T_ADDRESS:out->print("address:%d",as_jint());          break;
     case T_INT:    out->print("int:%d",   as_jint());           break;
-    case T_LONG:   out->print("lng:%lld", as_jlong());          break;
+    case T_LONG:   out->print("lng:" JLONG_FORMAT, as_jlong()); break;
     case T_FLOAT:  out->print("flt:%f",   as_jfloat());         break;
     case T_DOUBLE: out->print("dbl:%f",   as_jdouble());        break;
     case T_OBJECT: out->print("obj:0x%x", as_jobject());        break;
--- a/src/share/vm/c1/c1_LIR.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/c1/c1_LIR.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -2259,7 +2259,7 @@
   typedef enum { inputMode, firstMode = inputMode, tempMode, outputMode, numModes, invalidMode = -1 } OprMode;
 
   enum {
-    maxNumberOfOperands = 16,
+    maxNumberOfOperands = 20,
     maxNumberOfInfos = 4
   };
 
--- a/src/share/vm/c1/c1_LIRGenerator.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/c1/c1_LIRGenerator.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -2977,6 +2977,16 @@
     do_CompareAndSwap(x, longType);
     break;
 
+  case vmIntrinsics::_loadFence :
+    if (os::is_MP()) __ membar_acquire();
+    break;
+  case vmIntrinsics::_storeFence:
+    if (os::is_MP()) __ membar_release();
+    break;
+  case vmIntrinsics::_fullFence :
+    if (os::is_MP()) __ membar();
+    break;
+
   case vmIntrinsics::_Reference_get:
     do_Reference_get(x);
     break;
--- a/src/share/vm/c1/c1_MacroAssembler.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/c1/c1_MacroAssembler.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -25,22 +25,8 @@
 #ifndef SHARE_VM_C1_C1_MACROASSEMBLER_HPP
 #define SHARE_VM_C1_C1_MACROASSEMBLER_HPP
 
-#include "asm/assembler.hpp"
-#ifdef TARGET_ARCH_x86
-# include "assembler_x86.inline.hpp"
-#endif
-#ifdef TARGET_ARCH_sparc
-# include "assembler_sparc.inline.hpp"
-#endif
-#ifdef TARGET_ARCH_zero
-# include "assembler_zero.inline.hpp"
-#endif
-#ifdef TARGET_ARCH_arm
-# include "assembler_arm.inline.hpp"
-#endif
-#ifdef TARGET_ARCH_ppc
-# include "assembler_ppc.inline.hpp"
-#endif
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
 
 class CodeEmitInfo;
 
--- a/src/share/vm/c1/c1_Runtime1.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/c1/c1_Runtime1.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -374,7 +374,7 @@
 
 JRT_ENTRY(void, Runtime1::throw_array_store_exception(JavaThread* thread, oopDesc* obj))
   ResourceMark rm(thread);
-  const char* klass_name = Klass::cast(obj->klass())->external_name();
+  const char* klass_name = obj->klass()->external_name();
   SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_ArrayStoreException(), klass_name);
 JRT_END
 
@@ -631,7 +631,7 @@
   NOT_PRODUCT(_throw_class_cast_exception_count++;)
   ResourceMark rm(thread);
   char* message = SharedRuntime::generate_class_cast_message(
-    thread, Klass::cast(object->klass())->external_name());
+    thread, object->klass()->external_name());
   SharedRuntime::throw_and_post_jvmti_exception(
     thread, vmSymbols::java_lang_ClassCastException(), message);
 JRT_END
@@ -876,7 +876,7 @@
       case Bytecodes::_anewarray:
         { Bytecode_anewarray anew(caller_method(), caller_method->bcp_from(bci));
           Klass* ek = caller_method->constants()->klass_at(anew.index(), CHECK);
-          k = Klass::cast(ek)->array_klass(CHECK);
+          k = ek->array_klass(CHECK);
         }
         break;
       case Bytecodes::_ldc:
@@ -1236,7 +1236,7 @@
   } else {
     Klass* bound = ObjArrayKlass::cast(dst->klass())->element_klass();
     Klass* stype = ObjArrayKlass::cast(src->klass())->element_klass();
-    if (stype == bound || Klass::cast(stype)->is_subtype_of(bound)) {
+    if (stype == bound || stype->is_subtype_of(bound)) {
       // Elements are guaranteed to be subtypes, so no check necessary
       bs->write_ref_array_pre(dst_addr, length);
       Copy::conjoint_oops_atomic(src_addr, dst_addr, length);
--- a/src/share/vm/c1/c1_globals.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/c1/c1_globals.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -147,7 +147,7 @@
           "Inline methods containing exception handlers "                   \
           "(NOTE: does not work with current backend)")                     \
                                                                             \
-  develop(bool, InlineSynchronizedMethods, true,                            \
+  product(bool, InlineSynchronizedMethods, true,                            \
           "Inline synchronized methods")                                    \
                                                                             \
   develop(bool, InlineNIOCheckIndex, true,                                  \
--- a/src/share/vm/ci/bcEscapeAnalyzer.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/ci/bcEscapeAnalyzer.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -282,7 +282,7 @@
   ciMethod* inline_target = NULL;
   if (target->is_loaded() && klass->is_loaded()
       && (klass->is_initialized() || klass->is_interface() && target->holder()->is_initialized())
-      && target->will_link(klass, callee_holder, code)) {
+      && target->is_loaded()) {
     if (code == Bytecodes::_invokestatic
         || code == Bytecodes::_invokespecial
         || code == Bytecodes::_invokevirtual && target->is_final_method()) {
--- a/src/share/vm/ci/ciClassList.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/ci/ciClassList.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -106,6 +106,7 @@
 friend class ciArray;                  \
 friend class ciObjArray;               \
 friend class ciMetadata;               \
+friend class ciReplay;                 \
 friend class ciTypeArray;              \
 friend class ciType;                   \
 friend class ciReturnAddress;          \
--- a/src/share/vm/ci/ciEnv.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/ci/ciEnv.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -30,6 +30,7 @@
 #include "ci/ciInstanceKlass.hpp"
 #include "ci/ciMethod.hpp"
 #include "ci/ciNullObject.hpp"
+#include "ci/ciReplay.hpp"
 #include "ci/ciUtilities.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "classfile/vmSymbols.hpp"
@@ -426,7 +427,7 @@
     for (int i = cpool->length() - 1; i >= 1; i--) {
       if (cpool->tag_at(i).is_klass()) {
         Klass* kls = cpool->resolved_klass_at(i);
-        if (Klass::cast(kls)->name() == sym) {
+        if (kls->name() == sym) {
           found_klass = KlassHandle(THREAD, kls);
           break;
         }
@@ -768,10 +769,15 @@
       Method* m = lookup_method(accessor->get_instanceKlass(), lookup, name_sym, sig_sym, bc);
       if (m != NULL &&
           (bc == Bytecodes::_invokestatic
-           ?  InstanceKlass::cast(m->method_holder())->is_not_initialized()
-           : !InstanceKlass::cast(m->method_holder())->is_loaded())) {
+           ?  m->method_holder()->is_not_initialized()
+           : !m->method_holder()->is_loaded())) {
         m = NULL;
       }
+#ifdef ASSERT
+      if (m != NULL && ReplayCompiles && !ciReplay::is_loaded(m)) {
+        m = NULL;
+      }
+#endif
       if (m != NULL) {
         // We found the method.
         return get_method(m);
@@ -1056,7 +1062,7 @@
                         method_name,
                         entry_bci);
         }
-        InstanceKlass::cast(method->method_holder())->add_osr_nmethod(nm);
+        method->method_holder()->add_osr_nmethod(nm);
 
       }
     }
@@ -1144,3 +1150,43 @@
   // If memory is low, we stop compiling methods.
   record_method_not_compilable("out of memory");
 }
+
+fileStream* ciEnv::_replay_data_stream = NULL;
+
+void ciEnv::dump_replay_data() {
+  VM_ENTRY_MARK;
+  MutexLocker ml(Compile_lock);
+  if (_replay_data_stream == NULL) {
+    _replay_data_stream = new (ResourceObj::C_HEAP, mtCompiler) fileStream(ReplayDataFile);
+    if (_replay_data_stream == NULL) {
+      fatal(err_msg("Can't open %s for replay data", ReplayDataFile));
+    }
+  }
+  dump_replay_data(_replay_data_stream);
+}
+
+
+void ciEnv::dump_replay_data(outputStream* out) {
+  ASSERT_IN_VM;
+
+#if INCLUDE_JVMTI
+  out->print_cr("JvmtiExport can_access_local_variables %d",     _jvmti_can_access_local_variables);
+  out->print_cr("JvmtiExport can_hotswap_or_post_breakpoint %d", _jvmti_can_hotswap_or_post_breakpoint);
+  out->print_cr("JvmtiExport can_post_on_exceptions %d",         _jvmti_can_post_on_exceptions);
+#endif // INCLUDE_JVMTI
+
+  GrowableArray<ciMetadata*>* objects = _factory->get_ci_metadata();
+  out->print_cr("# %d ciObject found", objects->length());
+  for (int i = 0; i < objects->length(); i++) {
+    objects->at(i)->dump_replay_data(out);
+  }
+  Method* method = task()->method();
+  int entry_bci = task()->osr_bci();
+  // Klass holder = method->method_holder();
+  out->print_cr("compile %s %s %s %d",
+                method->klass_name()->as_quoted_ascii(),
+                method->name()->as_quoted_ascii(),
+                method->signature()->as_quoted_ascii(),
+                entry_bci);
+  out->flush();
+}
--- a/src/share/vm/ci/ciEnv.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/ci/ciEnv.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -46,6 +46,8 @@
   friend class CompileBroker;
   friend class Dependencies;  // for get_object, during logging
 
+  static fileStream* _replay_data_stream;
+
 private:
   Arena*           _arena;       // Alias for _ciEnv_arena except in init_shared_objects()
   Arena            _ciEnv_arena;
@@ -448,6 +450,13 @@
 
   // RedefineClasses support
   void metadata_do(void f(Metadata*)) { _factory->metadata_do(f); }
+
+  // Dump the compilation replay data for this ciEnv to
+  // ReplayDataFile, creating the file if needed.
+  void  dump_replay_data();
+
+  // Dump the compilation replay data for the ciEnv to the stream.
+  void dump_replay_data(outputStream* out);
 };
 
 #endif // SHARE_VM_CI_CIENV_HPP
--- a/src/share/vm/ci/ciField.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/ci/ciField.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -366,10 +366,12 @@
 // ------------------------------------------------------------------
 // ciField::print
 void ciField::print() {
-  tty->print("<ciField ");
+  tty->print("<ciField name=");
   _holder->print_name();
   tty->print(".");
   _name->print_symbol();
+  tty->print(" signature=");
+  _signature->print_symbol();
   tty->print(" offset=%d type=", _offset);
   if (_type != NULL) _type->print_name();
   else               tty->print("(reference)");
--- a/src/share/vm/ci/ciInstanceKlass.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/ci/ciInstanceKlass.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -561,3 +561,114 @@
   }
   return impl;
 }
+
+// Utility class for printing of the contents of the static fields for
+// use by compilation replay.  It only prints out the information that
+// could be consumed by the compiler, so for primitive types it prints
+// out the actual value.  For Strings it's the actual string value.
+// For array types it it's first level array size since that's the
+// only value which statically unchangeable.  For all other reference
+// types it simply prints out the dynamic type.
+
+class StaticFinalFieldPrinter : public FieldClosure {
+  outputStream* _out;
+  const char*   _holder;
+ public:
+  StaticFinalFieldPrinter(outputStream* out, const char* holder) :
+    _out(out),
+    _holder(holder) {
+  }
+  void do_field(fieldDescriptor* fd) {
+    if (fd->is_final() && !fd->has_initial_value()) {
+      oop mirror = fd->field_holder()->java_mirror();
+      _out->print("staticfield %s %s %s ", _holder, fd->name()->as_quoted_ascii(), fd->signature()->as_quoted_ascii());
+      switch (fd->field_type()) {
+        case T_BYTE:    _out->print_cr("%d", mirror->byte_field(fd->offset()));   break;
+        case T_BOOLEAN: _out->print_cr("%d", mirror->bool_field(fd->offset()));   break;
+        case T_SHORT:   _out->print_cr("%d", mirror->short_field(fd->offset()));  break;
+        case T_CHAR:    _out->print_cr("%d", mirror->char_field(fd->offset()));   break;
+        case T_INT:     _out->print_cr("%d", mirror->int_field(fd->offset()));    break;
+        case T_LONG:    _out->print_cr(INT64_FORMAT, mirror->long_field(fd->offset()));   break;
+        case T_FLOAT: {
+          float f = mirror->float_field(fd->offset());
+          _out->print_cr("%d", *(int*)&f);
+          break;
+        }
+        case T_DOUBLE: {
+          double d = mirror->double_field(fd->offset());
+          _out->print_cr(INT64_FORMAT, *(jlong*)&d);
+          break;
+        }
+        case T_ARRAY: {
+          oop value =  mirror->obj_field_acquire(fd->offset());
+          if (value == NULL) {
+            _out->print_cr("null");
+          } else {
+            typeArrayOop ta = (typeArrayOop)value;
+            _out->print("%d", ta->length());
+            if (value->is_objArray()) {
+              objArrayOop oa = (objArrayOop)value;
+              const char* klass_name  = value->klass()->name()->as_quoted_ascii();
+              _out->print(" %s", klass_name);
+            }
+            _out->cr();
+          }
+          break;
+        }
+        case T_OBJECT: {
+          oop value =  mirror->obj_field_acquire(fd->offset());
+          if (value == NULL) {
+            _out->print_cr("null");
+          } else if (value->is_instance()) {
+            if (value->is_a(SystemDictionary::String_klass())) {
+              _out->print("\"");
+              _out->print_raw(java_lang_String::as_quoted_ascii(value));
+              _out->print_cr("\"");
+            } else {
+              const char* klass_name  = value->klass()->name()->as_quoted_ascii();
+              _out->print_cr(klass_name);
+            }
+          } else {
+            ShouldNotReachHere();
+          }
+          break;
+        }
+        default:
+          ShouldNotReachHere();
+        }
+    }
+  }
+};
+
+
+void ciInstanceKlass::dump_replay_data(outputStream* out) {
+  ASSERT_IN_VM;
+  InstanceKlass* ik = get_instanceKlass();
+  ConstantPool*  cp = ik->constants();
+
+  // Try to record related loaded classes
+  Klass* sub = ik->subklass();
+  while (sub != NULL) {
+    if (sub->oop_is_instance()) {
+      out->print_cr("instanceKlass %s", sub->name()->as_quoted_ascii());
+    }
+    sub = sub->next_sibling();
+  }
+
+  // Dump out the state of the constant pool tags.  During replay the
+  // tags will be validated for things which shouldn't change and
+  // classes will be resolved if the tags indicate that they were
+  // resolved at compile time.
+  out->print("ciInstanceKlass %s %d %d %d", ik->name()->as_quoted_ascii(),
+             is_linked(), is_initialized(), cp->length());
+  for (int index = 1; index < cp->length(); index++) {
+    out->print(" %d", cp->tags()->at(index));
+  }
+  out->cr();
+  if (is_initialized()) {
+    //  Dump out the static final fields in case the compilation relies
+    //  on their value for correct replay.
+    StaticFinalFieldPrinter sffp(out, ik->name()->as_quoted_ascii());
+    ik->do_local_static_fields(&sffp);
+  }
+}
--- a/src/share/vm/ci/ciInstanceKlass.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/ci/ciInstanceKlass.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -230,6 +230,9 @@
   // What kind of ciObject is this?
   bool is_instance_klass() const { return true; }
   bool is_java_klass() const     { return true; }
+
+  // Dump the current state of this klass for compilation replay.
+  virtual void dump_replay_data(outputStream* out);
 };
 
 #endif // SHARE_VM_CI_CIINSTANCEKLASS_HPP
--- a/src/share/vm/ci/ciMetadata.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/ci/ciMetadata.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -61,6 +61,7 @@
   virtual bool is_array_klass() const       { return false; }
   virtual bool is_obj_array_klass() const   { return false; }
   virtual bool is_type_array_klass() const  { return false; }
+  virtual void dump_replay_data(outputStream* st) { /* do nothing */ }
 
   ciMethod*                as_method() {
     assert(is_method(), "bad cast");
--- a/src/share/vm/ci/ciMethod.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/ci/ciMethod.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -31,6 +31,7 @@
 #include "ci/ciMethodData.hpp"
 #include "ci/ciStreams.hpp"
 #include "ci/ciSymbol.hpp"
+#include "ci/ciReplay.hpp"
 #include "ci/ciUtilities.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "compiler/abstractCompiler.hpp"
@@ -105,7 +106,7 @@
     CHECK_UNHANDLED_OOPS_ONLY(Thread::current()->clear_unhandled_oops());
   }
 
-  if (InstanceKlass::cast(h_m()->method_holder())->is_linked()) {
+  if (h_m()->method_holder()->is_linked()) {
     _can_be_statically_bound = h_m()->can_be_statically_bound();
   } else {
     // Have to use a conservative value in this case.
@@ -139,6 +140,12 @@
   }
   if (_interpreter_invocation_count == 0)
     _interpreter_invocation_count = 1;
+  _instructions_size = -1;
+#ifdef ASSERT
+  if (ReplayCompiles) {
+    ciReplay::initialize(this);
+  }
+#endif
 }
 
 
@@ -161,7 +168,8 @@
 #if defined(COMPILER2) || defined(SHARK)
   ,
   _flow(                   NULL),
-  _bcea(                   NULL)
+  _bcea(                   NULL),
+  _instructions_size(-1)
 #endif // COMPILER2 || SHARK
 {
   // Usually holder and accessor are the same type but in some cases
@@ -188,7 +196,7 @@
 
   // Revert any breakpoint bytecodes in ci's copy
   if (me->number_of_breakpoints() > 0) {
-    BreakpointInfo* bp = InstanceKlass::cast(me->method_holder())->breakpoints();
+    BreakpointInfo* bp = me->method_holder()->breakpoints();
     for (; bp != NULL; bp = bp->next()) {
       if (bp->match(me)) {
         code_at_put(bp->bci(), bp->orig_bytecode());
@@ -734,6 +742,24 @@
 }
 
 // ------------------------------------------------------------------
+// ciMethod::get_field_at_bci
+ciField* ciMethod::get_field_at_bci(int bci, bool &will_link) {
+  ciBytecodeStream iter(this);
+  iter.reset_to_bci(bci);
+  iter.next();
+  return iter.get_field(will_link);
+}
+
+// ------------------------------------------------------------------
+// ciMethod::get_method_at_bci
+ciMethod* ciMethod::get_method_at_bci(int bci, bool &will_link, ciSignature* *declared_signature) {
+  ciBytecodeStream iter(this);
+  iter.reset_to_bci(bci);
+  iter.next();
+  return iter.get_method(will_link, declared_signature);
+}
+
+// ------------------------------------------------------------------
 // Adjust a CounterData count to be commensurate with
 // interpreter_invocation_count.  If the MDO exists for
 // only 25% of the time the method exists, then the
@@ -868,25 +894,6 @@
 }
 
 // ------------------------------------------------------------------
-// ciMethod::will_link
-//
-// Will this method link in a specific calling context?
-bool ciMethod::will_link(ciKlass* accessing_klass,
-                         ciKlass* declared_method_holder,
-                         Bytecodes::Code bc) {
-  if (!is_loaded()) {
-    // Method lookup failed.
-    return false;
-  }
-
-  // The link checks have been front-loaded into the get_method
-  // call.  This method (ciMethod::will_link()) will be removed
-  // in the future.
-
-  return true;
-}
-
-// ------------------------------------------------------------------
 // ciMethod::should_exclude
 //
 // Should this method be excluded from compilation?
@@ -1000,8 +1007,7 @@
 // ------------------------------------------------------------------
 // ciMethod::has_compiled_code
 bool ciMethod::has_compiled_code() {
-  VM_ENTRY_MARK;
-  return get_Method()->code() != NULL;
+  return instructions_size() > 0;
 }
 
 int ciMethod::comp_level() {
@@ -1039,14 +1045,18 @@
 // junk like exception handler, stubs, and constant table, which are
 // not highly relevant to an inlined method.  So we use the more
 // specific accessor nmethod::insts_size.
-int ciMethod::instructions_size(int comp_level) {
-  GUARDED_VM_ENTRY(
-    nmethod* code = get_Method()->code();
-    if (code != NULL && (comp_level == CompLevel_any || comp_level == code->comp_level())) {
-      return code->insts_end() - code->verified_entry_point();
-    }
-    return 0;
-  )
+int ciMethod::instructions_size() {
+  if (_instructions_size == -1) {
+    GUARDED_VM_ENTRY(
+                     nmethod* code = get_Method()->code();
+                     if (code != NULL && (code->comp_level() == CompLevel_full_optimization)) {
+                       _instructions_size = code->insts_end() - code->verified_entry_point();
+                     } else {
+                       _instructions_size = 0;
+                     }
+                     );
+  }
+  return _instructions_size;
 }
 
 // ------------------------------------------------------------------
@@ -1166,6 +1176,20 @@
 
 #undef FETCH_FLAG_FROM_VM
 
+void ciMethod::dump_replay_data(outputStream* st) {
+  ASSERT_IN_VM;
+  Method* method = get_Method();
+  Klass*  holder = method->method_holder();
+  st->print_cr("ciMethod %s %s %s %d %d %d %d %d",
+               holder->name()->as_quoted_ascii(),
+               method->name()->as_quoted_ascii(),
+               method->signature()->as_quoted_ascii(),
+               method->invocation_counter()->raw_counter(),
+               method->backedge_counter()->raw_counter(),
+               interpreter_invocation_count(),
+               interpreter_throwout_count(),
+               _instructions_size);
+}
 
 // ------------------------------------------------------------------
 // ciMethod::print_codes
--- a/src/share/vm/ci/ciMethod.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/ci/ciMethod.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -51,6 +51,7 @@
   friend class ciExceptionHandlerStream;
   friend class ciBytecodeStream;
   friend class ciMethodHandle;
+  friend class ciReplay;
 
  private:
   // General method information.
@@ -69,6 +70,7 @@
   int _handler_count;
   int _interpreter_invocation_count;
   int _interpreter_throwout_count;
+  int _instructions_size;
 
   bool _uses_monitors;
   bool _balanced_monitors;
@@ -224,6 +226,9 @@
   ciCallProfile call_profile_at_bci(int bci);
   int           interpreter_call_site_count(int bci);
 
+  ciField*      get_field_at_bci( int bci, bool &will_link);
+  ciMethod*     get_method_at_bci(int bci, bool &will_link, ciSignature* *declared_signature);
+
   // Given a certain calling environment, find the monomorphic target
   // for the call.  Return NULL if the call is not monomorphic in
   // its calling environment.
@@ -239,9 +244,6 @@
   int resolve_vtable_index(ciKlass* caller, ciKlass* receiver);
 
   // Compilation directives
-  bool will_link(ciKlass* accessing_klass,
-                 ciKlass* declared_method_holder,
-                 Bytecodes::Code bc);
   bool should_exclude();
   bool should_inline();
   bool should_not_inline();
@@ -252,7 +254,6 @@
   bool can_be_osr_compiled(int entry_bci);
   void set_not_compilable();
   bool has_compiled_code();
-  int  instructions_size(int comp_level = CompLevel_any);
   void log_nmethod_identity(xmlStream* log);
   bool is_not_reached(int bci);
   bool was_executed_more_than(int times);
@@ -260,6 +261,7 @@
   bool is_klass_loaded(int refinfo_index, bool must_be_resolved) const;
   bool check_call(int refinfo_index, bool is_static) const;
   bool ensure_method_data();  // make sure it exists in the VM also
+  int instructions_size();
   int scale_count(int count, float prof_factor = 1.);  // make MDO count commensurate with IIC
 
   // JSR 292 support
@@ -291,6 +293,7 @@
   bool is_accessor    () const;
   bool is_initializer () const;
   bool can_be_statically_bound() const           { return _can_be_statically_bound; }
+  void dump_replay_data(outputStream* st);
 
   // Print the bytecodes of this method.
   void print_codes_on(outputStream* st);
--- a/src/share/vm/ci/ciMethodData.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/ci/ciMethodData.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -25,6 +25,7 @@
 #include "precompiled.hpp"
 #include "ci/ciMetadata.hpp"
 #include "ci/ciMethodData.hpp"
+#include "ci/ciReplay.hpp"
 #include "ci/ciUtilities.hpp"
 #include "memory/allocation.inline.hpp"
 #include "memory/resourceArea.hpp"
@@ -115,6 +116,11 @@
   _arg_local = mdo->arg_local();
   _arg_stack = mdo->arg_stack();
   _arg_returned  = mdo->arg_returned();
+#ifndef PRODUCT
+  if (ReplayCompiles) {
+    ciReplay::initialize(this);
+  }
+#endif
 }
 
 void ciReceiverTypeData::translate_receiver_data_from(ProfileData* data) {
@@ -366,6 +372,79 @@
   ciMetadata::print_impl(st);
 }
 
+void ciMethodData::dump_replay_data(outputStream* out) {
+  ASSERT_IN_VM;
+  MethodData* mdo = get_MethodData();
+  Method* method = mdo->method();
+  Klass* holder = method->method_holder();
+  out->print("ciMethodData %s %s %s %d %d",
+             holder->name()->as_quoted_ascii(),
+             method->name()->as_quoted_ascii(),
+             method->signature()->as_quoted_ascii(),
+             _state,
+             current_mileage());
+
+  // dump the contents of the MDO header as raw data
+  unsigned char* orig = (unsigned char*)&_orig;
+  int length = sizeof(_orig);
+  out->print(" orig %d", length);
+  for (int i = 0; i < length; i++) {
+    out->print(" %d", orig[i]);
+  }
+
+  // dump the MDO data as raw data
+  int elements = data_size() / sizeof(intptr_t);
+  out->print(" data %d", elements);
+  for (int i = 0; i < elements; i++) {
+    // We could use INTPTR_FORMAT here but that's a zero justified
+    // which makes comparing it with the SA version of this output
+    // harder.
+#ifdef _LP64
+    out->print(" 0x%" FORMAT64_MODIFIER "x", data()[i]);
+#else
+    out->print(" 0x%x", data()[i]);
+#endif
+  }
+
+  // The MDO contained oop references as ciObjects, so scan for those
+  // and emit pairs of offset and klass name so that they can be
+  // reconstructed at runtime.  The first round counts the number of
+  // oop references and the second actually emits them.
+  int count = 0;
+  for (int round = 0; round < 2; round++) {
+    if (round == 1) out->print(" oops %d", count);
+    ProfileData* pdata = first_data();
+    for ( ; is_valid(pdata); pdata = next_data(pdata)) {
+      if (pdata->is_ReceiverTypeData()) {
+        ciReceiverTypeData* vdata = (ciReceiverTypeData*)pdata;
+        for (uint i = 0; i < vdata->row_limit(); i++) {
+          ciKlass* k = vdata->receiver(i);
+          if (k != NULL) {
+            if (round == 0) {
+              count++;
+            } else {
+              out->print(" %d %s", dp_to_di(vdata->dp() + in_bytes(vdata->receiver_offset(i))) / sizeof(intptr_t), k->name()->as_quoted_ascii());
+            }
+          }
+        }
+      } else if (pdata->is_VirtualCallData()) {
+        ciVirtualCallData* vdata = (ciVirtualCallData*)pdata;
+        for (uint i = 0; i < vdata->row_limit(); i++) {
+          ciKlass* k = vdata->receiver(i);
+          if (k != NULL) {
+            if (round == 0) {
+              count++;
+            } else {
+              out->print(" %d %s", dp_to_di(vdata->dp() + in_bytes(vdata->receiver_offset(i))) / sizeof(intptr_t), k->name()->as_quoted_ascii());
+            }
+          }
+        }
+      }
+    }
+  }
+  out->cr();
+}
+
 #ifndef PRODUCT
 void ciMethodData::print() {
   print_data_on(tty);
--- a/src/share/vm/ci/ciMethodData.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/ci/ciMethodData.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -144,6 +144,7 @@
 
 class ciMethodData : public ciMetadata {
   CI_PACKAGE_ACCESS
+  friend class ciReplay;
 
 private:
   // Size in bytes
@@ -320,6 +321,7 @@
   void print();
   void print_data_on(outputStream* st);
 #endif
+  void dump_replay_data(outputStream* out);
 };
 
 #endif // SHARE_VM_CI_CIMETHODDATA_HPP
--- a/src/share/vm/ci/ciObject.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/ci/ciObject.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -131,6 +131,7 @@
   // Is this a type or value which has no associated class?
   // It is true of primitive types and null objects.
   virtual bool is_classless() const         { return false; }
+  virtual void dump_replay_data(outputStream* st) { /* do nothing */ }
 
   // Note: some ciObjects refer to oops which have yet to be created.
   // We refer to these as "unloaded".  Specifically, there are
--- a/src/share/vm/ci/ciObjectFactory.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/ci/ciObjectFactory.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -137,6 +137,7 @@
 
   ciReturnAddress* get_return_address(int bci);
 
+  GrowableArray<ciMetadata*>* get_ci_metadata() const { return _ci_metadata; }
   // RedefineClasses support
   void metadata_do(void f(Metadata*));
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/ci/ciReplay.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,942 @@
+/* Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "ci/ciMethodData.hpp"
+#include "ci/ciReplay.hpp"
+#include "ci/ciUtilities.hpp"
+#include "compiler/compileBroker.hpp"
+#include "memory/allocation.inline.hpp"
+#include "memory/oopFactory.hpp"
+#include "memory/resourceArea.hpp"
+#include "utilities/copy.hpp"
+
+#ifndef PRODUCT
+
+// ciReplay
+
+typedef struct _ciMethodDataRecord {
+  const char* klass;
+  const char* method;
+  const char* signature;
+  int state;
+  int current_mileage;
+  intptr_t* data;
+  int data_length;
+  char* orig_data;
+  int orig_data_length;
+  int oops_length;
+  jobject* oops_handles;
+  int* oops_offsets;
+} ciMethodDataRecord;
+
+typedef struct _ciMethodRecord {
+  const char* klass;
+  const char* method;
+  const char* signature;
+  int instructions_size;
+  int interpreter_invocation_count;
+  int interpreter_throwout_count;
+  int invocation_counter;
+  int backedge_counter;
+} ciMethodRecord;
+
+class CompileReplay;
+static CompileReplay* replay_state;
+
+class CompileReplay : public StackObj {
+ private:
+  FILE*   stream;
+  Thread* thread;
+  Handle  protection_domain;
+  Handle  loader;
+
+  GrowableArray<ciMethodRecord*>     ci_method_records;
+  GrowableArray<ciMethodDataRecord*> ci_method_data_records;
+
+  const char* _error_message;
+
+  char* bufptr;
+  char* buffer;
+  int   buffer_length;
+  int   buffer_end;
+  int   line_no;
+
+ public:
+  CompileReplay(const char* filename, TRAPS) {
+    thread = THREAD;
+    loader = Handle(thread, SystemDictionary::java_system_loader());
+    stream = fopen(filename, "rt");
+    if (stream == NULL) {
+      fprintf(stderr, "Can't open replay file %s\n", filename);
+    }
+    buffer_length = 32;
+    buffer = NEW_RESOURCE_ARRAY(char, buffer_length);
+    _error_message = NULL;
+
+    test();
+  }
+
+  ~CompileReplay() {
+    if (stream != NULL) fclose(stream);
+  }
+
+  void test() {
+    strcpy(buffer, "1 2 foo 4 bar 0x9 \"this is it\"");
+    bufptr = buffer;
+    assert(parse_int("test") == 1, "what");
+    assert(parse_int("test") == 2, "what");
+    assert(strcmp(parse_string(), "foo") == 0, "what");
+    assert(parse_int("test") == 4, "what");
+    assert(strcmp(parse_string(), "bar") == 0, "what");
+    assert(parse_intptr_t("test") == 9, "what");
+    assert(strcmp(parse_quoted_string(), "this is it") == 0, "what");
+  }
+
+  bool had_error() {
+    return _error_message != NULL || thread->has_pending_exception();
+  }
+
+  bool can_replay() {
+    return !(stream == NULL || had_error());
+  }
+
+  void report_error(const char* msg) {
+    _error_message = msg;
+    // Restore the buffer contents for error reporting
+    for (int i = 0; i < buffer_end; i++) {
+      if (buffer[i] == '\0') buffer[i] = ' ';
+    }
+  }
+
+  int parse_int(const char* label) {
+    if (had_error()) {
+      return 0;
+    }
+
+    int v = 0;
+    int read;
+    if (sscanf(bufptr, "%i%n", &v, &read) != 1) {
+      report_error(label);
+    } else {
+      bufptr += read;
+    }
+    return v;
+  }
+
+  intptr_t parse_intptr_t(const char* label) {
+    if (had_error()) {
+      return 0;
+    }
+
+    intptr_t v = 0;
+    int read;
+    if (sscanf(bufptr, INTPTR_FORMAT "%n", &v, &read) != 1) {
+      report_error(label);
+    } else {
+      bufptr += read;
+    }
+    return v;
+  }
+
+  void skip_ws() {
+    // Skip any leading whitespace
+    while (*bufptr == ' ' || *bufptr == '\t') {
+      bufptr++;
+    }
+  }
+
+
+  char* scan_and_terminate(char delim) {
+    char* str = bufptr;
+    while (*bufptr != delim && *bufptr != '\0') {
+      bufptr++;
+    }
+    if (*bufptr != '\0') {
+      *bufptr++ = '\0';
+    }
+    if (bufptr == str) {
+      // nothing here
+      return NULL;
+    }
+    return str;
+  }
+
+  char* parse_string() {
+    if (had_error()) return NULL;
+
+    skip_ws();
+    return scan_and_terminate(' ');
+  }
+
+  char* parse_quoted_string() {
+    if (had_error()) return NULL;
+
+    skip_ws();
+
+    if (*bufptr == '"') {
+      bufptr++;
+      return scan_and_terminate('"');
+    } else {
+      return scan_and_terminate(' ');
+    }
+  }
+
+  const char* parse_escaped_string() {
+    char* result = parse_quoted_string();
+    if (result != NULL) {
+      unescape_string(result);
+    }
+    return result;
+  }
+
+  // Look for the tag 'tag' followed by an
+  bool parse_tag_and_count(const char* tag, int& length) {
+    const char* t = parse_string();
+    if (t == NULL) {
+      return false;
+    }
+
+    if (strcmp(tag, t) != 0) {
+      report_error(tag);
+      return false;
+    }
+    length = parse_int("parse_tag_and_count");
+    return !had_error();
+  }
+
+  // Parse a sequence of raw data encoded as bytes and return the
+  // resulting data.
+  char* parse_data(const char* tag, int& length) {
+    if (!parse_tag_and_count(tag, length)) {
+      return NULL;
+    }
+
+    char * result = NEW_RESOURCE_ARRAY(char, length);
+    for (int i = 0; i < length; i++) {
+      int val = parse_int("data");
+      result[i] = val;
+    }
+    return result;
+  }
+
+  // Parse a standard chunk of data emitted as:
+  //   'tag' <length> # # ...
+  // Where each # is an intptr_t item
+  intptr_t* parse_intptr_data(const char* tag, int& length) {
+    if (!parse_tag_and_count(tag, length)) {
+      return NULL;
+    }
+
+    intptr_t* result = NEW_RESOURCE_ARRAY(intptr_t, length);
+    for (int i = 0; i < length; i++) {
+      skip_ws();
+      intptr_t val = parse_intptr_t("data");
+      result[i] = val;
+    }
+    return result;
+  }
+
+  // Parse a possibly quoted version of a symbol into a symbolOop
+  Symbol* parse_symbol(TRAPS) {
+    const char* str = parse_escaped_string();
+    if (str != NULL) {
+      Symbol* sym = SymbolTable::lookup(str, (int)strlen(str), CHECK_NULL);
+      return sym;
+    }
+    return NULL;
+  }
+
+  // Parse a valid klass name and look it up
+  Klass* parse_klass(TRAPS) {
+    const char* str = parse_escaped_string();
+    Symbol* klass_name = SymbolTable::lookup(str, (int)strlen(str), CHECK_NULL);
+    if (klass_name != NULL) {
+      Klass* k = SystemDictionary::resolve_or_fail(klass_name, loader, protection_domain, true, THREAD);
+      if (HAS_PENDING_EXCEPTION) {
+        oop throwable = PENDING_EXCEPTION;
+        java_lang_Throwable::print(throwable, tty);
+        tty->cr();
+        report_error(str);
+        return NULL;
+      }
+      return k;
+    }
+    return NULL;
+  }
+
+  // Lookup a klass
+  Klass* resolve_klass(const char* klass, TRAPS) {
+    Symbol* klass_name = SymbolTable::lookup(klass, (int)strlen(klass), CHECK_NULL);
+    return SystemDictionary::resolve_or_fail(klass_name, loader, protection_domain, true, CHECK_NULL);
+  }
+
+  // Parse the standard tuple of <klass> <name> <signature>
+  Method* parse_method(TRAPS) {
+    InstanceKlass* k = (InstanceKlass*)parse_klass(CHECK_NULL);
+    Symbol* method_name = parse_symbol(CHECK_NULL);
+    Symbol* method_signature = parse_symbol(CHECK_NULL);
+    Method* m = k->find_method(method_name, method_signature);
+    if (m == NULL) {
+      report_error("can't find method");
+    }
+    return m;
+  }
+
+  // Process each line of the replay file executing each command until
+  // the file ends.
+  void process(TRAPS) {
+    line_no = 1;
+    int pos = 0;
+    int c = getc(stream);
+    while(c != EOF) {
+      if (pos + 1 >= buffer_length) {
+        int newl = buffer_length * 2;
+        char* newb = NEW_RESOURCE_ARRAY(char, newl);
+        memcpy(newb, buffer, pos);
+        buffer = newb;
+        buffer_length = newl;
+      }
+      if (c == '\n') {
+        // null terminate it, reset the pointer and process the line
+        buffer[pos] = '\0';
+        buffer_end = pos++;
+        bufptr = buffer;
+        process_command(CHECK);
+        if (had_error()) {
+          tty->print_cr("Error while parsing line %d: %s\n", line_no, _error_message);
+          tty->print_cr("%s", buffer);
+          assert(false, "error");
+          return;
+        }
+        pos = 0;
+        buffer_end = 0;
+        line_no++;
+      } else if (c == '\r') {
+        // skip LF
+      } else {
+        buffer[pos++] = c;
+      }
+      c = getc(stream);
+    }
+  }
+
+  void process_command(TRAPS) {
+    char* cmd = parse_string();
+    if (cmd == NULL) {
+      return;
+    }
+    if (strcmp("#", cmd) == 0) {
+      // ignore
+    } else if (strcmp("compile", cmd) == 0) {
+      process_compile(CHECK);
+    } else if (strcmp("ciMethod", cmd) == 0) {
+      process_ciMethod(CHECK);
+    } else if (strcmp("ciMethodData", cmd) == 0) {
+      process_ciMethodData(CHECK);
+    } else if (strcmp("staticfield", cmd) == 0) {
+      process_staticfield(CHECK);
+    } else if (strcmp("ciInstanceKlass", cmd) == 0) {
+      process_ciInstanceKlass(CHECK);
+    } else if (strcmp("instanceKlass", cmd) == 0) {
+      process_instanceKlass(CHECK);
+#if INCLUDE_JVMTI
+    } else if (strcmp("JvmtiExport", cmd) == 0) {
+      process_JvmtiExport(CHECK);
+#endif // INCLUDE_JVMTI
+    } else {
+      report_error("unknown command");
+    }
+  }
+
+  // compile <klass> <name> <signature> <entry_bci>
+  void process_compile(TRAPS) {
+    // methodHandle method;
+    Method* method = parse_method(CHECK);
+    int entry_bci = parse_int("entry_bci");
+    Klass* k = method->method_holder();
+    ((InstanceKlass*)k)->initialize(THREAD);
+    if (HAS_PENDING_EXCEPTION) {
+      oop throwable = PENDING_EXCEPTION;
+      java_lang_Throwable::print(throwable, tty);
+      tty->cr();
+      if (ReplayIgnoreInitErrors) {
+        CLEAR_PENDING_EXCEPTION;
+        ((InstanceKlass*)k)->set_init_state(InstanceKlass::fully_initialized);
+      } else {
+        return;
+      }
+    }
+    // Make sure the existence of a prior compile doesn't stop this one
+    nmethod* nm = (entry_bci != InvocationEntryBci) ? method->lookup_osr_nmethod_for(entry_bci, CompLevel_full_optimization, true) : method->code();
+    if (nm != NULL) {
+      nm->make_not_entrant();
+    }
+    replay_state = this;
+    CompileBroker::compile_method(method, entry_bci, CompLevel_full_optimization,
+                                  methodHandle(), 0, "replay", THREAD);
+    replay_state = NULL;
+    reset();
+  }
+
+  // ciMethod <klass> <name> <signature> <invocation_counter> <backedge_counter> <interpreter_invocation_count> <interpreter_throwout_count> <instructions_size>
+  //
+  //
+  void process_ciMethod(TRAPS) {
+    Method* method = parse_method(CHECK);
+    ciMethodRecord* rec = new_ciMethod(method);
+    rec->invocation_counter = parse_int("invocation_counter");
+    rec->backedge_counter = parse_int("backedge_counter");
+    rec->interpreter_invocation_count = parse_int("interpreter_invocation_count");
+    rec->interpreter_throwout_count = parse_int("interpreter_throwout_count");
+    rec->instructions_size = parse_int("instructions_size");
+  }
+
+  // ciMethodData <klass> <name> <signature> <state> <current mileage> orig <length> # # ... data <length> # # ... oops <length>
+  void process_ciMethodData(TRAPS) {
+    Method* method = parse_method(CHECK);
+    /* jsut copied from Method, to build interpret data*/
+    if (InstanceRefKlass::owns_pending_list_lock((JavaThread*)THREAD)) {
+      return;
+    }
+    // methodOopDesc::build_interpreter_method_data(method, CHECK);
+    {
+      // Grab a lock here to prevent multiple
+      // MethodData*s from being created.
+      MutexLocker ml(MethodData_lock, THREAD);
+      if (method->method_data() == NULL) {
+        ClassLoaderData* loader_data = method->method_holder()->class_loader_data();
+        MethodData* method_data = MethodData::allocate(loader_data, method, CHECK);
+        method->set_method_data(method_data);
+      }
+    }
+
+    // collect and record all the needed information for later
+    ciMethodDataRecord* rec = new_ciMethodData(method);
+    rec->state = parse_int("state");
+    rec->current_mileage = parse_int("current_mileage");
+
+    rec->orig_data = parse_data("orig", rec->orig_data_length);
+    if (rec->orig_data == NULL) {
+      return;
+    }
+    rec->data = parse_intptr_data("data", rec->data_length);
+    if (rec->data == NULL) {
+      return;
+    }
+    if (!parse_tag_and_count("oops", rec->oops_length)) {
+      return;
+    }
+    rec->oops_handles = NEW_RESOURCE_ARRAY(jobject, rec->oops_length);
+    rec->oops_offsets = NEW_RESOURCE_ARRAY(int, rec->oops_length);
+    for (int i = 0; i < rec->oops_length; i++) {
+      int offset = parse_int("offset");
+      if (had_error()) {
+        return;
+      }
+      Klass* k = parse_klass(CHECK);
+      rec->oops_offsets[i] = offset;
+      rec->oops_handles[i] = (jobject)(new KlassHandle(THREAD, k));
+    }
+  }
+
+  // instanceKlass <name>
+  //
+  // Loads and initializes the klass 'name'.  This can be used to
+  // create particular class loading environments
+  void process_instanceKlass(TRAPS) {
+    // just load the referenced class
+    Klass* k = parse_klass(CHECK);
+  }
+
+  // ciInstanceKlass <name> <is_linked> <is_initialized> <length> tag # # # ...
+  //
+  // Load the klass 'name' and link or initialize it.  Verify that the
+  // constant pool is the same length as 'length' and make sure the
+  // constant pool tags are in the same state.
+  void process_ciInstanceKlass(TRAPS) {
+    InstanceKlass* k = (InstanceKlass *)parse_klass(CHECK);
+    int is_linked = parse_int("is_linked");
+    int is_initialized = parse_int("is_initialized");
+    int length = parse_int("length");
+    if (is_initialized) {
+      k->initialize(THREAD);
+      if (HAS_PENDING_EXCEPTION) {
+        oop throwable = PENDING_EXCEPTION;
+        java_lang_Throwable::print(throwable, tty);
+        tty->cr();
+        if (ReplayIgnoreInitErrors) {
+          CLEAR_PENDING_EXCEPTION;
+          k->set_init_state(InstanceKlass::fully_initialized);
+        } else {
+          return;
+        }
+      }
+    } else if (is_linked) {
+      k->link_class(CHECK);
+    }
+    ConstantPool* cp = k->constants();
+    if (length != cp->length()) {
+      report_error("constant pool length mismatch: wrong class files?");
+      return;
+    }
+
+    int parsed_two_word = 0;
+    for (int i = 1; i < length; i++) {
+      int tag = parse_int("tag");
+      if (had_error()) {
+        return;
+      }
+      switch (cp->tag_at(i).value()) {
+        case JVM_CONSTANT_UnresolvedClass: {
+          if (tag == JVM_CONSTANT_Class) {
+            tty->print_cr("Resolving klass %s at %d", cp->unresolved_klass_at(i)->as_utf8(), i);
+            Klass* k = cp->klass_at(i, CHECK);
+          }
+          break;
+        }
+        case JVM_CONSTANT_Long:
+        case JVM_CONSTANT_Double:
+          parsed_two_word = i + 1;
+
+        case JVM_CONSTANT_ClassIndex:
+        case JVM_CONSTANT_StringIndex:
+        case JVM_CONSTANT_String:
+        case JVM_CONSTANT_UnresolvedClassInError:
+        case JVM_CONSTANT_Fieldref:
+        case JVM_CONSTANT_Methodref:
+        case JVM_CONSTANT_InterfaceMethodref:
+        case JVM_CONSTANT_NameAndType:
+        case JVM_CONSTANT_Utf8:
+        case JVM_CONSTANT_Integer:
+        case JVM_CONSTANT_Float:
+          if (tag != cp->tag_at(i).value()) {
+            report_error("tag mismatch: wrong class files?");
+            return;
+          }
+          break;
+
+        case JVM_CONSTANT_Class:
+          if (tag == JVM_CONSTANT_Class) {
+          } else if (tag == JVM_CONSTANT_UnresolvedClass) {
+            tty->print_cr("Warning: entry was unresolved in the replay data");
+          } else {
+            report_error("Unexpected tag");
+            return;
+          }
+          break;
+
+        case 0:
+          if (parsed_two_word == i) continue;
+
+        default:
+          ShouldNotReachHere();
+          break;
+      }
+
+    }
+  }
+
+  // Initialize a class and fill in the value for a static field.
+  // This is useful when the compile was dependent on the value of
+  // static fields but it's impossible to properly rerun the static
+  // initiailizer.
+  void process_staticfield(TRAPS) {
+    InstanceKlass* k = (InstanceKlass *)parse_klass(CHECK);
+
+    if (ReplaySuppressInitializers == 0 ||
+        ReplaySuppressInitializers == 2 && k->class_loader() == NULL) {
+      return;
+    }
+
+    assert(k->is_initialized(), "must be");
+
+    const char* field_name = parse_escaped_string();;
+    const char* field_signature = parse_string();
+    fieldDescriptor fd;
+    Symbol* name = SymbolTable::lookup(field_name, (int)strlen(field_name), CHECK);
+    Symbol* sig = SymbolTable::lookup(field_signature, (int)strlen(field_signature), CHECK);
+    if (!k->find_local_field(name, sig, &fd) ||
+        !fd.is_static() ||
+        fd.has_initial_value()) {
+      report_error(field_name);
+      return;
+    }
+
+    oop java_mirror = k->java_mirror();
+    if (field_signature[0] == '[') {
+      int length = parse_int("array length");
+      oop value = NULL;
+
+      if (field_signature[1] == '[') {
+        // multi dimensional array
+        ArrayKlass* kelem = (ArrayKlass *)parse_klass(CHECK);
+        int rank = 0;
+        while (field_signature[rank] == '[') {
+          rank++;
+        }
+        int* dims = NEW_RESOURCE_ARRAY(int, rank);
+        dims[0] = length;
+        for (int i = 1; i < rank; i++) {
+          dims[i] = 1; // These aren't relevant to the compiler
+        }
+        value = kelem->multi_allocate(rank, dims, CHECK);
+      } else {
+        if (strcmp(field_signature, "[B") == 0) {
+          value = oopFactory::new_byteArray(length, CHECK);
+        } else if (strcmp(field_signature, "[Z") == 0) {
+          value = oopFactory::new_boolArray(length, CHECK);
+        } else if (strcmp(field_signature, "[C") == 0) {
+          value = oopFactory::new_charArray(length, CHECK);
+        } else if (strcmp(field_signature, "[S") == 0) {
+          value = oopFactory::new_shortArray(length, CHECK);
+        } else if (strcmp(field_signature, "[F") == 0) {
+          value = oopFactory::new_singleArray(length, CHECK);
+        } else if (strcmp(field_signature, "[D") == 0) {
+          value = oopFactory::new_doubleArray(length, CHECK);
+        } else if (strcmp(field_signature, "[I") == 0) {
+          value = oopFactory::new_intArray(length, CHECK);
+        } else if (strcmp(field_signature, "[J") == 0) {
+          value = oopFactory::new_longArray(length, CHECK);
+        } else if (field_signature[0] == '[' && field_signature[1] == 'L') {
+          KlassHandle kelem = resolve_klass(field_signature + 1, CHECK);
+          value = oopFactory::new_objArray(kelem(), length, CHECK);
+        } else {
+          report_error("unhandled array staticfield");
+        }
+      }
+      java_mirror->obj_field_put(fd.offset(), value);
+    } else {
+      const char* string_value = parse_escaped_string();
+      if (strcmp(field_signature, "I") == 0) {
+        int value = atoi(string_value);
+        java_mirror->int_field_put(fd.offset(), value);
+      } else if (strcmp(field_signature, "B") == 0) {
+        int value = atoi(string_value);
+        java_mirror->byte_field_put(fd.offset(), value);
+      } else if (strcmp(field_signature, "C") == 0) {
+        int value = atoi(string_value);
+        java_mirror->char_field_put(fd.offset(), value);
+      } else if (strcmp(field_signature, "S") == 0) {
+        int value = atoi(string_value);
+        java_mirror->short_field_put(fd.offset(), value);
+      } else if (strcmp(field_signature, "Z") == 0) {
+        int value = atol(string_value);
+        java_mirror->bool_field_put(fd.offset(), value);
+      } else if (strcmp(field_signature, "J") == 0) {
+        jlong value;
+        if (sscanf(string_value, JLONG_FORMAT, &value) != 1) {
+          fprintf(stderr, "Error parsing long: %s\n", string_value);
+          return;
+        }
+        java_mirror->long_field_put(fd.offset(), value);
+      } else if (strcmp(field_signature, "F") == 0) {
+        float value = atof(string_value);
+        java_mirror->float_field_put(fd.offset(), value);
+      } else if (strcmp(field_signature, "D") == 0) {
+        double value = atof(string_value);
+        java_mirror->double_field_put(fd.offset(), value);
+      } else if (strcmp(field_signature, "Ljava/lang/String;") == 0) {
+        Handle value = java_lang_String::create_from_str(string_value, CHECK);
+        java_mirror->obj_field_put(fd.offset(), value());
+      } else if (field_signature[0] == 'L') {
+        Symbol* klass_name = SymbolTable::lookup(field_signature, (int)strlen(field_signature), CHECK);
+        KlassHandle kelem = resolve_klass(field_signature, CHECK);
+        oop value = ((InstanceKlass*)kelem())->allocate_instance(CHECK);
+        java_mirror->obj_field_put(fd.offset(), value);
+      } else {
+        report_error("unhandled staticfield");
+      }
+    }
+  }
+
+#if INCLUDE_JVMTI
+  void process_JvmtiExport(TRAPS) {
+    const char* field = parse_string();
+    bool value = parse_int("JvmtiExport flag") != 0;
+    if (strcmp(field, "can_access_local_variables") == 0) {
+      JvmtiExport::set_can_access_local_variables(value);
+    } else if (strcmp(field, "can_hotswap_or_post_breakpoint") == 0) {
+      JvmtiExport::set_can_hotswap_or_post_breakpoint(value);
+    } else if (strcmp(field, "can_post_on_exceptions") == 0) {
+      JvmtiExport::set_can_post_on_exceptions(value);
+    } else {
+      report_error("Unrecognized JvmtiExport directive");
+    }
+  }
+#endif // INCLUDE_JVMTI
+
+  // Create and initialize a record for a ciMethod
+  ciMethodRecord* new_ciMethod(Method* method) {
+    ciMethodRecord* rec = NEW_RESOURCE_OBJ(ciMethodRecord);
+    rec->klass =  method->method_holder()->name()->as_utf8();
+    rec->method = method->name()->as_utf8();
+    rec->signature = method->signature()->as_utf8();
+    ci_method_records.append(rec);
+    return rec;
+  }
+
+  // Lookup data for a ciMethod
+  ciMethodRecord* find_ciMethodRecord(Method* method) {
+    const char* klass_name =  method->method_holder()->name()->as_utf8();
+    const char* method_name = method->name()->as_utf8();
+    const char* signature = method->signature()->as_utf8();
+    for (int i = 0; i < ci_method_records.length(); i++) {
+      ciMethodRecord* rec = ci_method_records.at(i);
+      if (strcmp(rec->klass, klass_name) == 0 &&
+          strcmp(rec->method, method_name) == 0 &&
+          strcmp(rec->signature, signature) == 0) {
+        return rec;
+      }
+    }
+    return NULL;
+  }
+
+  // Create and initialize a record for a ciMethodData
+  ciMethodDataRecord* new_ciMethodData(Method* method) {
+    ciMethodDataRecord* rec = NEW_RESOURCE_OBJ(ciMethodDataRecord);
+    rec->klass =  method->method_holder()->name()->as_utf8();
+    rec->method = method->name()->as_utf8();
+    rec->signature = method->signature()->as_utf8();
+    ci_method_data_records.append(rec);
+    return rec;
+  }
+
+  // Lookup data for a ciMethodData
+  ciMethodDataRecord* find_ciMethodDataRecord(Method* method) {
+    const char* klass_name =  method->method_holder()->name()->as_utf8();
+    const char* method_name = method->name()->as_utf8();
+    const char* signature = method->signature()->as_utf8();
+    for (int i = 0; i < ci_method_data_records.length(); i++) {
+      ciMethodDataRecord* rec = ci_method_data_records.at(i);
+      if (strcmp(rec->klass, klass_name) == 0 &&
+          strcmp(rec->method, method_name) == 0 &&
+          strcmp(rec->signature, signature) == 0) {
+        return rec;
+      }
+    }
+    return NULL;
+  }
+
+  const char* error_message() {
+    return _error_message;
+  }
+
+  void reset() {
+    _error_message = NULL;
+    ci_method_records.clear();
+    ci_method_data_records.clear();
+  }
+
+  // Take an ascii string contain \u#### escapes and convert it to utf8
+  // in place.
+  static void unescape_string(char* value) {
+    char* from = value;
+    char* to = value;
+    while (*from != '\0') {
+      if (*from != '\\') {
+        *from++ = *to++;
+      } else {
+        switch (from[1]) {
+          case 'u': {
+            from += 2;
+            jchar value=0;
+            for (int i=0; i<4; i++) {
+              char c = *from++;
+              switch (c) {
+                case '0': case '1': case '2': case '3': case '4':
+                case '5': case '6': case '7': case '8': case '9':
+                  value = (value << 4) + c - '0';
+                  break;
+                case 'a': case 'b': case 'c':
+                case 'd': case 'e': case 'f':
+                  value = (value << 4) + 10 + c - 'a';
+                  break;
+                case 'A': case 'B': case 'C':
+                case 'D': case 'E': case 'F':
+                  value = (value << 4) + 10 + c - 'A';
+                  break;
+                default:
+                  ShouldNotReachHere();
+              }
+            }
+            UNICODE::convert_to_utf8(&value, 1, to);
+            to++;
+            break;
+          }
+          case 't': *to++ = '\t'; from += 2; break;
+          case 'n': *to++ = '\n'; from += 2; break;
+          case 'r': *to++ = '\r'; from += 2; break;
+          case 'f': *to++ = '\f'; from += 2; break;
+          default:
+            ShouldNotReachHere();
+        }
+      }
+    }
+    *from = *to;
+  }
+};
+
+void ciReplay::replay(TRAPS) {
+  int exit_code = replay_impl(THREAD);
+
+  Threads::destroy_vm();
+
+  vm_exit(exit_code);
+}
+
+int ciReplay::replay_impl(TRAPS) {
+  HandleMark hm;
+  ResourceMark rm;
+  // Make sure we don't run with background compilation
+  BackgroundCompilation = false;
+
+  if (ReplaySuppressInitializers > 2) {
+    // ReplaySuppressInitializers > 2 means that we want to allow
+    // normal VM bootstrap but once we get into the replay itself
+    // don't allow any intializers to be run.
+    ReplaySuppressInitializers = 1;
+  }
+
+  // Load and parse the replay data
+  CompileReplay rp(ReplayDataFile, THREAD);
+  int exit_code = 0;
+  if (rp.can_replay()) {
+    rp.process(THREAD);
+  } else {
+    exit_code = 1;
+    return exit_code;
+  }
+
+  if (HAS_PENDING_EXCEPTION) {
+    oop throwable = PENDING_EXCEPTION;
+    CLEAR_PENDING_EXCEPTION;
+    java_lang_Throwable::print(throwable, tty);
+    tty->cr();
+    java_lang_Throwable::print_stack_trace(throwable, tty);
+    tty->cr();
+    exit_code = 2;
+  }
+
+  if (rp.had_error()) {
+    tty->print_cr("Failed on %s", rp.error_message());
+    exit_code = 1;
+  }
+  return exit_code;
+}
+
+
+void ciReplay::initialize(ciMethodData* m) {
+  if (replay_state == NULL) {
+    return;
+  }
+
+  ASSERT_IN_VM;
+  ResourceMark rm;
+
+  Method* method = m->get_MethodData()->method();
+  ciMethodDataRecord* rec = replay_state->find_ciMethodDataRecord(method);
+  if (rec == NULL) {
+    // This indicates some mismatch with the original environment and
+    // the replay environment though it's not always enough to
+    // interfere with reproducing a bug
+    tty->print_cr("Warning: requesting ciMethodData record for method with no data: ");
+    method->print_name(tty);
+    tty->cr();
+  } else {
+    m->_state = rec->state;
+    m->_current_mileage = rec->current_mileage;
+    if (rec->data_length != 0) {
+      assert(m->_data_size == rec->data_length * (int)sizeof(rec->data[0]), "must agree");
+
+      // Write the correct ciObjects back into the profile data
+      ciEnv* env = ciEnv::current();
+      for (int i = 0; i < rec->oops_length; i++) {
+        KlassHandle *h = (KlassHandle *)rec->oops_handles[i];
+        *(ciMetadata**)(rec->data + rec->oops_offsets[i]) =
+          env->get_metadata((*h)());
+      }
+      // Copy the updated profile data into place as intptr_ts
+#ifdef _LP64
+      Copy::conjoint_jlongs_atomic((jlong *)rec->data, (jlong *)m->_data, rec->data_length);
+#else
+      Copy::conjoint_jints_atomic((jint *)rec->data, (jint *)m->_data, rec->data_length);
+#endif
+    }
+
+    // copy in the original header
+    Copy::conjoint_jbytes(rec->orig_data, (char*)&m->_orig, rec->orig_data_length);
+  }
+}
+
+
+bool ciReplay::should_not_inline(ciMethod* method) {
+  if (replay_state == NULL) {
+    return false;
+  }
+
+  VM_ENTRY_MARK;
+  // ciMethod without a record shouldn't be inlined.
+  return replay_state->find_ciMethodRecord(method->get_Method()) == NULL;
+}
+
+
+void ciReplay::initialize(ciMethod* m) {
+  if (replay_state == NULL) {
+    return;
+  }
+
+  ASSERT_IN_VM;
+  ResourceMark rm;
+
+  Method* method = m->get_Method();
+  ciMethodRecord* rec = replay_state->find_ciMethodRecord(method);
+  if (rec == NULL) {
+    // This indicates some mismatch with the original environment and
+    // the replay environment though it's not always enough to
+    // interfere with reproducing a bug
+    tty->print_cr("Warning: requesting ciMethod record for method with no data: ");
+    method->print_name(tty);
+    tty->cr();
+  } else {
+    // m->_instructions_size = rec->instructions_size;
+    m->_instructions_size = -1;
+    m->_interpreter_invocation_count = rec->interpreter_invocation_count;
+    m->_interpreter_throwout_count = rec->interpreter_throwout_count;
+    method->invocation_counter()->_counter = rec->invocation_counter;
+    method->backedge_counter()->_counter = rec->backedge_counter;
+  }
+}
+
+bool ciReplay::is_loaded(Method* method) {
+  if (replay_state == NULL) {
+    return true;
+  }
+
+  ASSERT_IN_VM;
+  ResourceMark rm;
+
+  ciMethodRecord* rec = replay_state->find_ciMethodRecord(method);
+  return rec != NULL;
+}
+#endif // PRODUCT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/ci/ciReplay.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_CI_CIREPLAY_HPP
+#define SHARE_VM_CI_CIREPLAY_HPP
+
+#include "ci/ciMethod.hpp"
+
+// ciReplay
+
+class ciReplay {
+  CI_PACKAGE_ACCESS
+
+#ifndef PRODUCT
+ private:
+  static int replay_impl(TRAPS);
+
+ public:
+  static void replay(TRAPS);
+
+  // These are used by the CI to fill in the cached data from the
+  // replay file when replaying compiles.
+  static void initialize(ciMethodData* method);
+  static void initialize(ciMethod* method);
+
+  static bool is_loaded(Method* method);
+  static bool is_loaded(Klass* klass);
+
+  static bool should_not_inline(ciMethod* method);
+
+#endif
+};
+
+#endif // SHARE_VM_CI_CIREPLAY_HPP
--- a/src/share/vm/ci/ciSignature.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/ci/ciSignature.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -57,12 +57,14 @@
   ciSymbol* as_symbol() const                    { return _symbol; }
   ciKlass*  accessing_klass() const              { return _accessing_klass; }
 
-  ciType* return_type() const;
-  ciType* type_at(int index) const;
+  ciType*   return_type() const;
+  ciType*   type_at(int index) const;
 
   int       size() const                         { return _size; }
   int       count() const                        { return _count; }
 
+  int       arg_size_for_bc(Bytecodes::Code bc)  { return size() + (Bytecodes::has_receiver(bc) ? 1 : 0); }
+
   bool equals(ciSignature* that);
 
   void print_signature();
--- a/src/share/vm/ci/ciSymbol.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/ci/ciSymbol.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -63,6 +63,11 @@
   return s->as_utf8();
 }
 
+// The text of the symbol as a null-terminated C string.
+const char* ciSymbol::as_quoted_ascii() {
+  GUARDED_VM_QUICK_ENTRY(return get_symbol()->as_quoted_ascii();)
+}
+
 // ------------------------------------------------------------------
 // ciSymbol::base
 const jbyte* ciSymbol::base() {
--- a/src/share/vm/ci/ciSymbol.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/ci/ciSymbol.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -73,6 +73,9 @@
   const char* as_utf8();
   int         utf8_length();
 
+  // The text of the symbol as ascii with all non-printable characters quoted as \u####
+  const char* as_quoted_ascii();
+
   // Return the i-th utf8 byte, where i < utf8_length
   int         byte_at(int i);
 
--- a/src/share/vm/ci/ciType.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/ci/ciType.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -45,7 +45,7 @@
 }
 
 ciType::ciType(KlassHandle k) : ciMetadata(k()) {
-  _basic_type = Klass::cast(k())->oop_is_array() ? T_ARRAY : T_OBJECT;
+  _basic_type = k()->oop_is_array() ? T_ARRAY : T_OBJECT;
 }
 
 
@@ -60,6 +60,19 @@
 }
 
 // ------------------------------------------------------------------
+// ciType::name
+//
+// Return the name of this type
+const char* ciType::name() {
+  if (is_primitive_type()) {
+    return type2name(basic_type());
+  } else {
+    assert(is_klass(), "must be");
+    return as_klass()->name()->as_utf8();
+  }
+}
+
+// ------------------------------------------------------------------
 // ciType::print_impl
 //
 // Implementation of the print method.
@@ -73,7 +86,8 @@
 //
 // Print the name of this type
 void ciType::print_name_on(outputStream* st) {
-  st->print(type2name(basic_type()));
+  ResourceMark rm;
+  st->print(name());
 }
 
 
--- a/src/share/vm/ci/ciType.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/ci/ciType.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -77,6 +77,7 @@
   bool is_type() const                      { return true; }
   bool is_classless() const                 { return is_primitive_type(); }
 
+  const char* name();
   virtual void print_name_on(outputStream* st);
   void print_name() {
     print_name_on(tty);
--- a/src/share/vm/ci/ciUtilities.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/ci/ciUtilities.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -80,6 +80,9 @@
 #define GUARDED_VM_ENTRY(action)            \
   {if (IS_IN_VM) { action } else { VM_ENTRY_MARK; { action }}}
 
+#define GUARDED_VM_QUICK_ENTRY(action)      \
+  {if (IS_IN_VM) { action } else { VM_QUICK_ENTRY_MARK; { action }}}
+
 // Redefine this later.
 #define KILL_COMPILE_ON_FATAL_(result)           \
   THREAD);                                       \
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/classfile/bytecodeAssembler.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+
+#include "classfile/bytecodeAssembler.hpp"
+#include "interpreter/bytecodes.hpp"
+#include "memory/oopFactory.hpp"
+#include "oops/constantPool.hpp"
+
+#ifdef TARGET_ARCH_x86
+# include "bytes_x86.hpp"
+#endif
+#ifdef TARGET_ARCH_sparc
+# include "bytes_sparc.hpp"
+#endif
+#ifdef TARGET_ARCH_zero
+# include "bytes_zero.hpp"
+#endif
+#ifdef TARGET_ARCH_arm
+# include "bytes_arm.hpp"
+#endif
+#ifdef TARGET_ARCH_ppc
+# include "bytes_ppc.hpp"
+#endif
+
+u2 BytecodeConstantPool::find_or_add(BytecodeCPEntry const& bcpe) {
+  u2 index;
+  u2* probe = _indices.get(bcpe);
+  if (probe == NULL) {
+    index = _entries.length();
+    _entries.append(bcpe);
+    _indices.put(bcpe, index);
+  } else {
+    index = *probe;
+  }
+  return index + _orig->length();
+}
+
+ConstantPool* BytecodeConstantPool::create_constant_pool(TRAPS) const {
+  if (_entries.length() == 0) {
+    return _orig;
+  }
+
+  ConstantPool* cp = ConstantPool::allocate(
+      _orig->pool_holder()->class_loader_data(),
+      _orig->length() + _entries.length(), CHECK_NULL);
+
+  cp->set_pool_holder(_orig->pool_holder());
+  _orig->copy_cp_to(1, _orig->length() - 1, cp, 1, CHECK_NULL);
+
+  for (int i = 0; i < _entries.length(); ++i) {
+    BytecodeCPEntry entry = _entries.at(i);
+    int idx = i + _orig->length();
+    switch (entry._tag) {
+      case BytecodeCPEntry::UTF8:
+        cp->symbol_at_put(idx, entry._u.utf8);
+        entry._u.utf8->increment_refcount();
+        break;
+      case BytecodeCPEntry::KLASS:
+        cp->unresolved_klass_at_put(
+            idx, cp->symbol_at(entry._u.klass));
+        break;
+      case BytecodeCPEntry::STRING:
+        cp->unresolved_string_at_put(
+            idx, cp->symbol_at(entry._u.string));
+        break;
+      case BytecodeCPEntry::NAME_AND_TYPE:
+        cp->name_and_type_at_put(idx,
+            entry._u.name_and_type.name_index,
+            entry._u.name_and_type.type_index);
+        break;
+      case BytecodeCPEntry::METHODREF:
+        cp->method_at_put(idx,
+            entry._u.methodref.class_index,
+            entry._u.methodref.name_and_type_index);
+        break;
+      default:
+        ShouldNotReachHere();
+    }
+  }
+  return cp;
+}
+
+void BytecodeAssembler::append(u1 imm_u1) {
+  _code->append(imm_u1);
+}
+
+void BytecodeAssembler::append(u2 imm_u2) {
+  _code->append(0);
+  _code->append(0);
+  Bytes::put_Java_u2(_code->adr_at(_code->length() - 2), imm_u2);
+}
+
+void BytecodeAssembler::append(u4 imm_u4) {
+  _code->append(0);
+  _code->append(0);
+  _code->append(0);
+  _code->append(0);
+  Bytes::put_Java_u4(_code->adr_at(_code->length() - 4), imm_u4);
+}
+
+void BytecodeAssembler::xload(u4 index, u1 onebyteop, u1 twobyteop) {
+  if (index < 4) {
+    _code->append(onebyteop + index);
+  } else {
+    _code->append(twobyteop);
+    _code->append((u2)index);
+  }
+}
+
+void BytecodeAssembler::dup() {
+  _code->append(Bytecodes::_dup);
+}
+
+void BytecodeAssembler::_new(Symbol* sym) {
+  u2 cpool_index = _cp->klass(sym);
+  _code->append(Bytecodes::_new);
+  append(cpool_index);
+}
+
+void BytecodeAssembler::load_string(Symbol* sym) {
+  u2 cpool_index = _cp->string(sym);
+  if (cpool_index < 0x100) {
+    ldc(cpool_index);
+  } else {
+    ldc_w(cpool_index);
+  }
+}
+
+void BytecodeAssembler::ldc(u1 index) {
+  _code->append(Bytecodes::_ldc);
+  append(index);
+}
+
+void BytecodeAssembler::ldc_w(u2 index) {
+  _code->append(Bytecodes::_ldc_w);
+  append(index);
+}
+
+void BytecodeAssembler::athrow() {
+  _code->append(Bytecodes::_athrow);
+}
+
+void BytecodeAssembler::iload(u4 index) {
+  xload(index, Bytecodes::_iload_0, Bytecodes::_iload);
+}
+
+void BytecodeAssembler::lload(u4 index) {
+  xload(index, Bytecodes::_lload_0, Bytecodes::_lload);
+}
+
+void BytecodeAssembler::fload(u4 index) {
+  xload(index, Bytecodes::_fload_0, Bytecodes::_fload);
+}
+
+void BytecodeAssembler::dload(u4 index) {
+  xload(index, Bytecodes::_dload_0, Bytecodes::_dload);
+}
+
+void BytecodeAssembler::aload(u4 index) {
+  xload(index, Bytecodes::_aload_0, Bytecodes::_aload);
+}
+
+void BytecodeAssembler::load(BasicType bt, u4 index) {
+  switch (bt) {
+    case T_BOOLEAN:
+    case T_CHAR:
+    case T_BYTE:
+    case T_SHORT:
+    case T_INT:     iload(index); break;
+    case T_FLOAT:   fload(index); break;
+    case T_DOUBLE:  dload(index); break;
+    case T_LONG:    lload(index); break;
+    case T_OBJECT:
+    case T_ARRAY:   aload(index); break;
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+void BytecodeAssembler::checkcast(Symbol* sym) {
+  u2 cpool_index = _cp->klass(sym);
+  _code->append(Bytecodes::_checkcast);
+  append(cpool_index);
+}
+
+void BytecodeAssembler::invokespecial(Method* method) {
+  invokespecial(method->klass_name(), method->name(), method->signature());
+}
+
+void BytecodeAssembler::invokespecial(Symbol* klss, Symbol* name, Symbol* sig) {
+  u2 methodref_index = _cp->methodref(klss, name, sig);
+  _code->append(Bytecodes::_invokespecial);
+  append(methodref_index);
+}
+
+void BytecodeAssembler::invokevirtual(Method* method) {
+  invokevirtual(method->klass_name(), method->name(), method->signature());
+}
+
+void BytecodeAssembler::invokevirtual(Symbol* klss, Symbol* name, Symbol* sig) {
+  u2 methodref_index = _cp->methodref(klss, name, sig);
+  _code->append(Bytecodes::_invokevirtual);
+  append(methodref_index);
+}
+
+void BytecodeAssembler::ireturn() {
+  _code->append(Bytecodes::_ireturn);
+}
+
+void BytecodeAssembler::lreturn() {
+  _code->append(Bytecodes::_lreturn);
+}
+
+void BytecodeAssembler::freturn() {
+  _code->append(Bytecodes::_freturn);
+}
+
+void BytecodeAssembler::dreturn() {
+  _code->append(Bytecodes::_dreturn);
+}
+
+void BytecodeAssembler::areturn() {
+  _code->append(Bytecodes::_areturn);
+}
+
+void BytecodeAssembler::_return() {
+  _code->append(Bytecodes::_return);
+}
+
+void BytecodeAssembler::_return(BasicType bt) {
+  switch (bt) {
+    case T_BOOLEAN:
+    case T_CHAR:
+    case T_BYTE:
+    case T_SHORT:
+    case T_INT:     ireturn(); break;
+    case T_FLOAT:   freturn(); break;
+    case T_DOUBLE:  dreturn(); break;
+    case T_LONG:    lreturn(); break;
+    case T_OBJECT:
+    case T_ARRAY:   areturn(); break;
+    case T_VOID:    _return(); break;
+    default:
+      ShouldNotReachHere();
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/classfile/bytecodeAssembler.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_CLASSFILE_BYTECODEASSEMBLER_HPP
+#define SHARE_VM_CLASSFILE_BYTECODEASSEMBLER_HPP
+
+#include "memory/allocation.hpp"
+#include "oops/method.hpp"
+#include "oops/symbol.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/growableArray.hpp"
+#include "utilities/resourceHash.hpp"
+
+
+/**
+ * Bytecode Assembler
+ *
+ * These classes are used to synthesize code for creating new methods from
+ * within the VM.  This is only a partial implementation of an assembler;
+ * only the bytecodes that are needed by clients are implemented at this time.
+ * This is used during default method analysis to create overpass methods
+ * and add them to a call during parsing.  Other uses (such as creating
+ * bridges) may come later.  Any missing bytecodes can be implemented on an
+ * as-need basis.
+ */
+
+class BytecodeBuffer : public GrowableArray<u1> {
+ public:
+  BytecodeBuffer() : GrowableArray<u1>(20) {}
+};
+
+// Entries in a yet-to-be-created constant pool.  Limited types for now.
+class BytecodeCPEntry VALUE_OBJ_CLASS_SPEC {
+ public:
+  enum tag {
+    ERROR_TAG,
+    UTF8,
+    KLASS,
+    STRING,
+    NAME_AND_TYPE,
+    METHODREF
+  };
+
+  u1 _tag;
+  union {
+    Symbol* utf8;
+    u2 klass;
+    u2 string;
+    struct {
+      u2 name_index;
+      u2 type_index;
+    } name_and_type;
+    struct {
+      u2 class_index;
+      u2 name_and_type_index;
+    } methodref;
+    uintptr_t hash;
+  } _u;
+
+  BytecodeCPEntry() : _tag(ERROR_TAG) { _u.hash = 0; }
+  BytecodeCPEntry(u1 tag) : _tag(tag) { _u.hash = 0; }
+
+  static BytecodeCPEntry utf8(Symbol* symbol) {
+    BytecodeCPEntry bcpe(UTF8);
+    bcpe._u.utf8 = symbol;
+    return bcpe;
+  }
+
+  static BytecodeCPEntry klass(u2 index) {
+    BytecodeCPEntry bcpe(KLASS);
+    bcpe._u.klass = index;
+    return bcpe;
+  }
+
+  static BytecodeCPEntry string(u2 index) {
+    BytecodeCPEntry bcpe(STRING);
+    bcpe._u.string = index;
+    return bcpe;
+  }
+
+  static BytecodeCPEntry name_and_type(u2 name, u2 type) {
+    BytecodeCPEntry bcpe(NAME_AND_TYPE);
+    bcpe._u.name_and_type.name_index = name;
+    bcpe._u.name_and_type.type_index = type;
+    return bcpe;
+  }
+
+  static BytecodeCPEntry methodref(u2 class_index, u2 nat) {
+    BytecodeCPEntry bcpe(METHODREF);
+    bcpe._u.methodref.class_index = class_index;
+    bcpe._u.methodref.name_and_type_index = nat;
+    return bcpe;
+  }
+
+  static bool equals(BytecodeCPEntry const& e0, BytecodeCPEntry const& e1) {
+    return e0._tag == e1._tag && e0._u.hash == e1._u.hash;
+  }
+
+  static unsigned hash(BytecodeCPEntry const& e0) {
+    return (unsigned)(e0._tag ^ e0._u.hash);
+  }
+};
+
+class BytecodeConstantPool : ResourceObj {
+ private:
+  typedef ResourceHashtable<BytecodeCPEntry, u2,
+      &BytecodeCPEntry::hash, &BytecodeCPEntry::equals> IndexHash;
+
+  ConstantPool* _orig;
+  GrowableArray<BytecodeCPEntry> _entries;
+  IndexHash _indices;
+
+  u2 find_or_add(BytecodeCPEntry const& bcpe);
+
+ public:
+
+  BytecodeConstantPool(ConstantPool* orig) : _orig(orig) {}
+
+  BytecodeCPEntry const& at(u2 index) const { return _entries.at(index); }
+
+  InstanceKlass* pool_holder() const {
+    return InstanceKlass::cast(_orig->pool_holder());
+  }
+
+  u2 utf8(Symbol* sym) {
+    return find_or_add(BytecodeCPEntry::utf8(sym));
+  }
+
+  u2 klass(Symbol* class_name) {
+    return find_or_add(BytecodeCPEntry::klass(utf8(class_name)));
+  }
+
+  u2 string(Symbol* str) {
+    return find_or_add(BytecodeCPEntry::string(utf8(str)));
+  }
+
+  u2 name_and_type(Symbol* name, Symbol* sig) {
+    return find_or_add(BytecodeCPEntry::name_and_type(utf8(name), utf8(sig)));
+  }
+
+  u2 methodref(Symbol* class_name, Symbol* name, Symbol* sig) {
+    return find_or_add(BytecodeCPEntry::methodref(
+        klass(class_name), name_and_type(name, sig)));
+  }
+
+  ConstantPool* create_constant_pool(TRAPS) const;
+};
+
+// Partial bytecode assembler - only what we need for creating
+// overpass methods for default methods is implemented
+class BytecodeAssembler : StackObj {
+ private:
+  BytecodeBuffer* _code;
+  BytecodeConstantPool* _cp;
+
+  void append(u1 imm_u1);
+  void append(u2 imm_u2);
+  void append(u4 imm_u4);
+
+  void xload(u4 index, u1 quick, u1 twobyte);
+
+ public:
+  BytecodeAssembler(BytecodeBuffer* buffer, BytecodeConstantPool* cp)
+    : _code(buffer), _cp(cp) {}
+
+  void aload(u4 index);
+  void areturn();
+  void athrow();
+  void checkcast(Symbol* sym);
+  void dload(u4 index);
+  void dreturn();
+  void dup();
+  void fload(u4 index);
+  void freturn();
+  void iload(u4 index);
+  void invokespecial(Method* method);
+  void invokespecial(Symbol* cls, Symbol* name, Symbol* sig);
+  void invokevirtual(Method* method);
+  void invokevirtual(Symbol* cls, Symbol* name, Symbol* sig);
+  void ireturn();
+  void ldc(u1 index);
+  void ldc_w(u2 index);
+  void lload(u4 index);
+  void lreturn();
+  void _new(Symbol* sym);
+  void _return();
+
+  void load_string(Symbol* sym);
+  void load(BasicType bt, u4 index);
+  void _return(BasicType bt);
+};
+
+#endif // SHARE_VM_CLASSFILE_BYTECODEASSEMBLER_HPP
--- a/src/share/vm/classfile/classFileParser.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/classfile/classFileParser.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,6 +27,8 @@
 #include "classfile/classLoader.hpp"
 #include "classfile/classLoaderData.hpp"
 #include "classfile/classLoaderData.inline.hpp"
+#include "classfile/defaultMethods.hpp"
+#include "classfile/genericSignatures.hpp"
 #include "classfile/javaClasses.hpp"
 #include "classfile/symbolTable.hpp"
 #include "classfile/systemDictionary.hpp"
@@ -57,6 +59,7 @@
 #include "services/classLoadingService.hpp"
 #include "services/threadService.hpp"
 #include "utilities/array.hpp"
+#include "utilities/globalDefinitions.hpp"
 
 // We generally try to create the oops directly when parsing, rather than
 // allocating temporary data structures and copying the bytes twice. A
@@ -84,6 +87,9 @@
 // - to check NameAndType_info signatures more aggressively
 #define JAVA_7_VERSION                    51
 
+// Extension method support.
+#define JAVA_8_VERSION                    52
+
 
 void ClassFileParser::parse_constant_pool_entries(ClassLoaderData* loader_data, constantPoolHandle cp, int length, TRAPS) {
   // Use a local copy of ClassFileStream. It helps the C++ compiler to optimize
@@ -785,6 +791,7 @@
                                                  ClassLoaderData* loader_data,
                                                  Handle protection_domain,
                                                  Symbol* class_name,
+                                                 bool* has_default_methods,
                                                  TRAPS) {
   ClassFileStream* cfs = stream();
   assert(length > 0, "only called for length>0");
@@ -818,9 +825,12 @@
       interf = KlassHandle(THREAD, k);
     }
 
-    if (!Klass::cast(interf())->is_interface()) {
+    if (!interf()->is_interface()) {
       THROW_MSG_(vmSymbols::java_lang_IncompatibleClassChangeError(), "Implementing class", NULL);
     }
+    if (InstanceKlass::cast(interf())->has_default_methods()) {
+      *has_default_methods = true;
+    }
     interfaces->at_put(index, interf());
   }
 
@@ -897,6 +907,7 @@
                                              bool* is_synthetic_addr,
                                              u2* generic_signature_index_addr,
                                              AnnotationArray** field_annotations,
+                                             AnnotationArray** field_type_annotations,
                                              ClassFileParser::FieldAnnotationCollector* parsed_annotations,
                                              TRAPS) {
   ClassFileStream* cfs = stream();
@@ -908,6 +919,10 @@
   int runtime_visible_annotations_length = 0;
   u1* runtime_invisible_annotations = NULL;
   int runtime_invisible_annotations_length = 0;
+  u1* runtime_visible_type_annotations = NULL;
+  int runtime_visible_type_annotations_length = 0;
+  u1* runtime_invisible_type_annotations = NULL;
+  int runtime_invisible_type_annotations_length = 0;
   while (attributes_count--) {
     cfs->guarantee_more(6, CHECK);  // attribute_name_index, attribute_length
     u2 attribute_name_index = cfs->get_u2_fast();
@@ -956,12 +971,28 @@
         runtime_visible_annotations_length = attribute_length;
         runtime_visible_annotations = cfs->get_u1_buffer();
         assert(runtime_visible_annotations != NULL, "null visible annotations");
+        parse_annotations(loader_data,
+                          runtime_visible_annotations,
+                          runtime_visible_annotations_length,
+                          cp,
+                          parsed_annotations,
+                          CHECK);
         cfs->skip_u1(runtime_visible_annotations_length, CHECK);
       } else if (PreserveAllAnnotations && attribute_name == vmSymbols::tag_runtime_invisible_annotations()) {
         runtime_invisible_annotations_length = attribute_length;
         runtime_invisible_annotations = cfs->get_u1_buffer();
         assert(runtime_invisible_annotations != NULL, "null invisible annotations");
         cfs->skip_u1(runtime_invisible_annotations_length, CHECK);
+      } else if (attribute_name == vmSymbols::tag_runtime_visible_type_annotations()) {
+        runtime_visible_type_annotations_length = attribute_length;
+        runtime_visible_type_annotations = cfs->get_u1_buffer();
+        assert(runtime_visible_type_annotations != NULL, "null visible type annotations");
+        cfs->skip_u1(runtime_visible_type_annotations_length, CHECK);
+      } else if (PreserveAllAnnotations && attribute_name == vmSymbols::tag_runtime_invisible_type_annotations()) {
+        runtime_invisible_type_annotations_length = attribute_length;
+        runtime_invisible_type_annotations = cfs->get_u1_buffer();
+        assert(runtime_invisible_type_annotations != NULL, "null invisible type annotations");
+        cfs->skip_u1(runtime_invisible_type_annotations_length, CHECK);
       } else {
         cfs->skip_u1(attribute_length, CHECK);  // Skip unknown attributes
       }
@@ -979,6 +1010,12 @@
                                             runtime_invisible_annotations,
                                             runtime_invisible_annotations_length,
                                             CHECK);
+  *field_type_annotations = assemble_annotations(loader_data,
+                                            runtime_visible_type_annotations,
+                                            runtime_visible_type_annotations_length,
+                                            runtime_invisible_type_annotations,
+                                            runtime_invisible_type_annotations_length,
+                                            CHECK);
   return;
 }
 
@@ -1075,6 +1112,7 @@
                                          bool is_interface,
                                          FieldAllocationCount *fac,
                                          Array<AnnotationArray*>** fields_annotations,
+                                         Array<AnnotationArray*>** fields_type_annotations,
                                          u2* java_fields_count_ptr, TRAPS) {
   ClassFileStream* cfs = stream();
   cfs->guarantee_more(2, CHECK_NULL);  // length
@@ -1110,6 +1148,7 @@
              THREAD, u2, total_fields * (FieldInfo::field_slots + 1));
 
   AnnotationArray* field_annotations = NULL;
+  AnnotationArray* field_type_annotations = NULL;
   // The generic signature slots start after all other fields' data.
   int generic_signature_slot = total_fields * FieldInfo::field_slots;
   int num_generic_signature = 0;
@@ -1151,7 +1190,7 @@
                              cp, attributes_count, is_static, signature_index,
                              &constantvalue_index, &is_synthetic,
                              &generic_signature_index, &field_annotations,
-                             &parsed_annotations,
+                             &field_type_annotations, &parsed_annotations,
                              CHECK_NULL);
       if (field_annotations != NULL) {
         if (*fields_annotations == NULL) {
@@ -1161,6 +1200,14 @@
         }
         (*fields_annotations)->at_put(n, field_annotations);
       }
+      if (field_type_annotations != NULL) {
+        if (*fields_type_annotations == NULL) {
+          *fields_type_annotations = MetadataFactory::new_array<AnnotationArray*>(
+                                                  loader_data, length, NULL,
+                                                  CHECK_NULL);
+        }
+        (*fields_type_annotations)->at_put(n, field_type_annotations);
+      }
       if (is_synthetic) {
         access_flags.set_is_synthetic();
       }
@@ -1176,19 +1223,16 @@
     field->initialize(access_flags.as_short(),
                       name_index,
                       signature_index,
-                      constantvalue_index,
-                      0);
-    if (parsed_annotations.has_any_annotations())
-      parsed_annotations.apply_to(field);
-
+                      constantvalue_index);
     BasicType type = cp->basic_type_for_signature_at(signature_index);
 
     // Remember how many oops we encountered and compute allocation type
     FieldAllocationType atype = fac->update(is_static, type);
-
-    // The correct offset is computed later (all oop fields will be located together)
-    // We temporarily store the allocation type in the offset field
-    field->set_offset(atype);
+    field->set_allocation_type(atype);
+
+    // After field is initialized with type, we can augment it with aux info
+    if (parsed_annotations.has_any_annotations())
+      parsed_annotations.apply_to(field);
   }
 
   int index = length;
@@ -1219,17 +1263,13 @@
       field->initialize(JVM_ACC_FIELD_INTERNAL,
                         injected[n].name_index,
                         injected[n].signature_index,
-                        0,
                         0);
 
       BasicType type = FieldType::basic_type(injected[n].signature());
 
       // Remember how many oops we encountered and compute allocation type
       FieldAllocationType atype = fac->update(false, type);
-
-      // The correct offset is computed later (all oop fields will be located together)
-      // We temporarily store the allocation type in the offset field
-      field->set_offset(atype);
+      field->set_allocation_type(atype);
       index++;
     }
   }
@@ -1695,7 +1735,8 @@
 }
 
 // Sift through annotations, looking for those significant to the VM:
-void ClassFileParser::parse_annotations(u1* buffer, int limit,
+void ClassFileParser::parse_annotations(ClassLoaderData* loader_data,
+                                        u1* buffer, int limit,
                                         constantPoolHandle cp,
                                         ClassFileParser::AnnotationCollector* coll,
                                         TRAPS) {
@@ -1712,9 +1753,12 @@
       e_type_off = 7,   // utf8 such as 'Ljava/lang/annotation/RetentionPolicy;'
       e_con_off = 9,    // utf8 payload, such as 'SOURCE', 'CLASS', 'RUNTIME'
       e_size = 11,     // end of 'e' annotation
-    c_tag_val = 'c',
-      c_con_off = 7,    // utf8 payload, such as 'I' or 'Ljava/lang/String;'
+    c_tag_val = 'c',    // payload is type
+      c_con_off = 7,    // utf8 payload, such as 'I'
       c_size = 9,       // end of 'c' annotation
+    s_tag_val = 's',    // payload is String
+      s_con_off = 7,    // utf8 payload, such as 'Ljava/lang/String;'
+      s_size = 9,
     min_size = 6        // smallest possible size (zero members)
   };
   while ((--nann) >= 0 && (index-2 + min_size <= limit)) {
@@ -1733,57 +1777,63 @@
     }
 
     // Here is where parsing particular annotations will take place.
-    AnnotationCollector::ID id = coll->annotation_index(aname);
+    AnnotationCollector::ID id = coll->annotation_index(loader_data, aname);
     if (id == AnnotationCollector::_unknown)  continue;
     coll->set_annotation(id);
-    // If there are no values, just set the bit and move on:
-    if (count == 0)   continue;
-
-    // For the record, here is how annotation payloads can be collected.
-    // Suppose we want to capture @Retention.value.  Here is how:
-    //if (id == AnnotationCollector::_class_Retention) {
-    //  Symbol* payload = NULL;
-    //  if (count == 1
-    //      && e_size == (index0 - index)  // match size
-    //      && e_tag_val == *(abase + tag_off)
-    //      && (check_symbol_at(cp, Bytes::get_Java_u2(abase + e_type_off))
-    //          == vmSymbols::RetentionPolicy_signature())
-    //      && member == vmSymbols::value_name()) {
-    //    payload = check_symbol_at(cp, Bytes::get_Java_u2(abase + e_con_off));
-    //  }
-    //  check_property(payload != NULL,
-    //                 "Invalid @Retention annotation at offset %u in class file %s",
-    //                 index0, CHECK);
-    //  if (payload != NULL) {
-    //      payload->increment_refcount();
-    //      coll->_class_RetentionPolicy = payload;
-    //  }
-    //}
+
+    if (id == AnnotationCollector::_sun_misc_Contended) {
+      if (count == 1
+          && s_size == (index - index0)  // match size
+          && s_tag_val == *(abase + tag_off)
+          && member == vmSymbols::value_name()) {
+        u2 group_index = Bytes::get_Java_u2(abase + s_con_off);
+        coll->set_contended_group(group_index);
+      } else {
+        coll->set_contended_group(0); // default contended group
+      }
+      coll->set_contended(true);
+    } else {
+      coll->set_contended(false);
+    }
   }
 }
 
-ClassFileParser::AnnotationCollector::ID ClassFileParser::AnnotationCollector::annotation_index(Symbol* name) {
+ClassFileParser::AnnotationCollector::ID
+ClassFileParser::AnnotationCollector::annotation_index(ClassLoaderData* loader_data,
+                                                                Symbol* name) {
   vmSymbols::SID sid = vmSymbols::find_sid(name);
+  // Privileged code can use all annotations.  Other code silently drops some.
+  bool privileged = loader_data->is_the_null_class_loader_data() ||
+                    loader_data->is_anonymous();
   switch (sid) {
   case vmSymbols::VM_SYMBOL_ENUM_NAME(java_lang_invoke_ForceInline_signature):
     if (_location != _in_method)  break;  // only allow for methods
+    if (!privileged)              break;  // only allow in privileged code
     return _method_ForceInline;
   case vmSymbols::VM_SYMBOL_ENUM_NAME(java_lang_invoke_DontInline_signature):
     if (_location != _in_method)  break;  // only allow for methods
+    if (!privileged)              break;  // only allow in privileged code
     return _method_DontInline;
   case vmSymbols::VM_SYMBOL_ENUM_NAME(java_lang_invoke_LambdaForm_Compiled_signature):
     if (_location != _in_method)  break;  // only allow for methods
+    if (!privileged)              break;  // only allow in privileged code
     return _method_LambdaForm_Compiled;
   case vmSymbols::VM_SYMBOL_ENUM_NAME(java_lang_invoke_LambdaForm_Hidden_signature):
     if (_location != _in_method)  break;  // only allow for methods
+    if (!privileged)              break;  // only allow in privileged code
     return _method_LambdaForm_Hidden;
+  case vmSymbols::VM_SYMBOL_ENUM_NAME(sun_misc_Contended_signature):
+    if (_location != _in_field && _location != _in_class)          break;  // only allow for fields and classes
+    if (!EnableContended || (RestrictContended && !privileged))    break;  // honor privileges
+    return _sun_misc_Contended;
   default: break;
   }
   return AnnotationCollector::_unknown;
 }
 
 void ClassFileParser::FieldAnnotationCollector::apply_to(FieldInfo* f) {
-  fatal("no field annotations yet");
+  if (is_contended())
+    f->set_contended_group(contended_group());
 }
 
 void ClassFileParser::MethodAnnotationCollector::apply_to(methodHandle m) {
@@ -1798,7 +1848,7 @@
 }
 
 void ClassFileParser::ClassAnnotationCollector::apply_to(instanceKlassHandle k) {
-  fatal("no class annotations yet");
+  k->set_is_contended(is_contended());
 }
 
 
@@ -1822,6 +1872,7 @@
                                            AnnotationArray** method_annotations,
                                            AnnotationArray** method_parameter_annotations,
                                            AnnotationArray** method_default_annotations,
+                                           AnnotationArray** method_type_annotations,
                                            TRAPS) {
   ClassFileStream* cfs = stream();
   methodHandle nullHandle;
@@ -1894,6 +1945,8 @@
   u2** localvariable_table_start;
   u2* localvariable_type_table_length;
   u2** localvariable_type_table_start;
+  u2 method_parameters_length = 0;
+  u1* method_parameters_data = NULL;
   bool parsed_code_attribute = false;
   bool parsed_checked_exceptions_attribute = false;
   bool parsed_stackmap_attribute = false;
@@ -1909,6 +1962,10 @@
   int runtime_visible_parameter_annotations_length = 0;
   u1* runtime_invisible_parameter_annotations = NULL;
   int runtime_invisible_parameter_annotations_length = 0;
+  u1* runtime_visible_type_annotations = NULL;
+  int runtime_visible_type_annotations_length = 0;
+  u1* runtime_invisible_type_annotations = NULL;
+  int runtime_invisible_type_annotations_length = 0;
   u1* annotation_default = NULL;
   int annotation_default_length = 0;
 
@@ -1928,7 +1985,8 @@
     if (method_attribute_name == vmSymbols::tag_code()) {
       // Parse Code attribute
       if (_need_verify) {
-        guarantee_property(!access_flags.is_native() && !access_flags.is_abstract(),
+        guarantee_property(
+            !access_flags.is_native() && !access_flags.is_abstract(),
                         "Code attribute in native or abstract methods in class file %s",
                          CHECK_(nullHandle));
       }
@@ -2098,6 +2156,26 @@
             parse_checked_exceptions(&checked_exceptions_length,
                                      method_attribute_length,
                                      cp, CHECK_(nullHandle));
+    } else if (method_attribute_name == vmSymbols::tag_method_parameters()) {
+      method_parameters_length = cfs->get_u1_fast();
+      // Track the actual size (note: this is written for clarity; a
+      // decent compiler will CSE and constant-fold this into a single
+      // expression)
+      u2 actual_size = 1;
+      method_parameters_data = cfs->get_u1_buffer();
+      actual_size += 2 * method_parameters_length;
+      cfs->skip_u2_fast(method_parameters_length);
+      actual_size += 4 * method_parameters_length;
+      cfs->skip_u4_fast(method_parameters_length);
+      // Enforce attribute length
+      if (method_attribute_length != actual_size) {
+        classfile_parse_error(
+          "Invalid MethodParameters method attribute length %u in class file %s",
+          method_attribute_length, CHECK_(nullHandle));
+      }
+      // ignore this attribute if it cannot be reflected
+      if (!SystemDictionary::Parameter_klass_loaded())
+        method_parameters_length = 0;
     } else if (method_attribute_name == vmSymbols::tag_synthetic()) {
       if (method_attribute_length != 0) {
         classfile_parse_error(
@@ -2125,7 +2203,10 @@
         runtime_visible_annotations_length = method_attribute_length;
         runtime_visible_annotations = cfs->get_u1_buffer();
         assert(runtime_visible_annotations != NULL, "null visible annotations");
-        parse_annotations(runtime_visible_annotations, runtime_visible_annotations_length, cp, &parsed_annotations, CHECK_(nullHandle));
+        parse_annotations(loader_data,
+            runtime_visible_annotations,
+            runtime_visible_annotations_length, cp, &parsed_annotations,
+            CHECK_(nullHandle));
         cfs->skip_u1(runtime_visible_annotations_length, CHECK_(nullHandle));
       } else if (PreserveAllAnnotations && method_attribute_name == vmSymbols::tag_runtime_invisible_annotations()) {
         runtime_invisible_annotations_length = method_attribute_length;
@@ -2147,6 +2228,17 @@
         annotation_default = cfs->get_u1_buffer();
         assert(annotation_default != NULL, "null annotation default");
         cfs->skip_u1(annotation_default_length, CHECK_(nullHandle));
+      } else if (method_attribute_name == vmSymbols::tag_runtime_visible_type_annotations()) {
+        runtime_visible_type_annotations_length = method_attribute_length;
+        runtime_visible_type_annotations = cfs->get_u1_buffer();
+        assert(runtime_visible_type_annotations != NULL, "null visible type annotations");
+        // No need for the VM to parse Type annotations
+        cfs->skip_u1(runtime_visible_type_annotations_length, CHECK_(nullHandle));
+      } else if (PreserveAllAnnotations && method_attribute_name == vmSymbols::tag_runtime_invisible_type_annotations()) {
+        runtime_invisible_type_annotations_length = method_attribute_length;
+        runtime_invisible_type_annotations = cfs->get_u1_buffer();
+        assert(runtime_invisible_type_annotations != NULL, "null invisible type annotations");
+        cfs->skip_u1(runtime_invisible_type_annotations_length, CHECK_(nullHandle));
       } else {
         // Skip unknown attributes
         cfs->skip_u1(method_attribute_length, CHECK_(nullHandle));
@@ -2169,12 +2261,11 @@
   }
 
   // All sizing information for a Method* is finally available, now create it
-  Method* m = Method::allocate(loader_data, code_length, access_flags,
-                               linenumber_table_length,
-                               total_lvt_length,
-                               exception_table_length,
-                               checked_exceptions_length,
-                               CHECK_(nullHandle));
+  Method* m = Method::allocate(
+      loader_data, code_length, access_flags, linenumber_table_length,
+      total_lvt_length, exception_table_length, checked_exceptions_length,
+      method_parameters_length, generic_signature_index,
+      ConstMethod::NORMAL, CHECK_(nullHandle));
 
   ClassLoadingService::add_class_method_size(m->size()*HeapWordSize);
 
@@ -2182,7 +2273,6 @@
   m->set_constants(cp());
   m->set_name_index(name_index);
   m->set_signature_index(signature_index);
-  m->set_generic_signature_index(generic_signature_index);
 #ifdef CC_INTERP
   // hmm is there a gc issue here??
   ResultTypeFinder rtf(cp->symbol_at(signature_index));
@@ -2204,7 +2294,6 @@
   // Fill in code attribute information
   m->set_max_stack(max_stack);
   m->set_max_locals(max_locals);
-
   m->constMethod()->set_stackmap_data(stackmap_data);
 
   // Copy byte codes
@@ -2224,6 +2313,21 @@
                              exception_table_start, size);
   }
 
+  // Copy method parameters
+  if (method_parameters_length > 0) {
+    MethodParametersElement* elem = m->constMethod()->method_parameters_start();
+    for(int i = 0; i < method_parameters_length; i++) {
+      elem[i].name_cp_index =
+        Bytes::get_Java_u2(method_parameters_data);
+      method_parameters_data += 2;
+      u4 flags = Bytes::get_Java_u4(method_parameters_data);
+      // This caused an alignment fault on Sparc, if flags was a u4
+      elem[i].flags_lo = extract_low_short_from_int(flags);
+      elem[i].flags_hi = extract_high_short_from_int(flags);
+      method_parameters_data += 4;
+    }
+  }
+
   // Copy checked exceptions
   if (checked_exceptions_length > 0) {
     int size = checked_exceptions_length * sizeof(CheckedExceptionElement) / sizeof(u2);
@@ -2325,6 +2429,12 @@
                                                      NULL,
                                                      0,
                                                      CHECK_(nullHandle));
+  *method_type_annotations = assemble_annotations(loader_data,
+                                                  runtime_visible_type_annotations,
+                                                  runtime_visible_type_annotations_length,
+                                                  runtime_invisible_type_annotations,
+                                                  runtime_invisible_type_annotations_length,
+                                                  CHECK_(nullHandle));
 
   if (name == vmSymbols::finalize_method_name() &&
       signature == vmSymbols::void_method_signature()) {
@@ -2356,11 +2466,14 @@
                                                Array<AnnotationArray*>** methods_annotations,
                                                Array<AnnotationArray*>** methods_parameter_annotations,
                                                Array<AnnotationArray*>** methods_default_annotations,
+                                               Array<AnnotationArray*>** methods_type_annotations,
+                                               bool* has_default_methods,
                                                TRAPS) {
   ClassFileStream* cfs = stream();
   AnnotationArray* method_annotations = NULL;
   AnnotationArray* method_parameter_annotations = NULL;
   AnnotationArray* method_default_annotations = NULL;
+  AnnotationArray* method_type_annotations = NULL;
   cfs->guarantee_more(2, CHECK_NULL);  // length
   u2 length = cfs->get_u2_fast();
   if (length == 0) {
@@ -2377,28 +2490,51 @@
                                          &method_annotations,
                                          &method_parameter_annotations,
                                          &method_default_annotations,
+                                         &method_type_annotations,
                                          CHECK_NULL);
 
       if (method->is_final()) {
         *has_final_method = true;
       }
+      if (is_interface && !method->is_abstract() && !method->is_static()) {
+        // default method
+        *has_default_methods = true;
+      }
       methods->at_put(index, method());
-      if (*methods_annotations == NULL) {
-        *methods_annotations =
-             MetadataFactory::new_array<AnnotationArray*>(loader_data, length, NULL, CHECK_NULL);
+
+      if (method_annotations != NULL) {
+        if (*methods_annotations == NULL) {
+          *methods_annotations =
+              MetadataFactory::new_array<AnnotationArray*>(loader_data, length, NULL, CHECK_NULL);
+        }
+        (*methods_annotations)->at_put(index, method_annotations);
       }
-      (*methods_annotations)->at_put(index, method_annotations);
-      if (*methods_parameter_annotations == NULL) {
-        *methods_parameter_annotations =
-            MetadataFactory::new_array<AnnotationArray*>(loader_data, length, NULL, CHECK_NULL);
+
+      if (method_parameter_annotations != NULL) {
+        if (*methods_parameter_annotations == NULL) {
+          *methods_parameter_annotations =
+              MetadataFactory::new_array<AnnotationArray*>(loader_data, length, NULL, CHECK_NULL);
+        }
+        (*methods_parameter_annotations)->at_put(index, method_parameter_annotations);
       }
-      (*methods_parameter_annotations)->at_put(index, method_parameter_annotations);
-      if (*methods_default_annotations == NULL) {
-        *methods_default_annotations =
-            MetadataFactory::new_array<AnnotationArray*>(loader_data, length, NULL, CHECK_NULL);
+
+      if (method_default_annotations != NULL) {
+        if (*methods_default_annotations == NULL) {
+          *methods_default_annotations =
+              MetadataFactory::new_array<AnnotationArray*>(loader_data, length, NULL, CHECK_NULL);
+        }
+        (*methods_default_annotations)->at_put(index, method_default_annotations);
       }
-      (*methods_default_annotations)->at_put(index, method_default_annotations);
+
+      if (method_type_annotations != NULL) {
+        if (*methods_type_annotations == NULL) {
+          *methods_type_annotations =
+              MetadataFactory::new_array<AnnotationArray*>(loader_data, length, NULL, CHECK_NULL);
+        }
+        (*methods_type_annotations)->at_put(index, method_type_annotations);
+      }
     }
+
     if (_need_verify && length > 1) {
       // Check duplicated methods
       ResourceMark rm(THREAD);
@@ -2432,6 +2568,7 @@
                                           Array<AnnotationArray*>* methods_annotations,
                                           Array<AnnotationArray*>* methods_parameter_annotations,
                                           Array<AnnotationArray*>* methods_default_annotations,
+                                          Array<AnnotationArray*>* methods_type_annotations,
                                               TRAPS) {
   int length = methods->length();
   // If JVMTI original method ordering or sharing is enabled we have to
@@ -2450,7 +2587,8 @@
   // Note that the ordering is not alphabetical, see Symbol::fast_compare
   Method::sort_methods(methods, methods_annotations,
                        methods_parameter_annotations,
-                       methods_default_annotations);
+                       methods_default_annotations,
+                       methods_type_annotations);
 
   // If JVMTI original method ordering or sharing is enabled construct int
   // array remembering the original ordering
@@ -2715,6 +2853,10 @@
   int runtime_visible_annotations_length = 0;
   u1* runtime_invisible_annotations = NULL;
   int runtime_invisible_annotations_length = 0;
+  u1* runtime_visible_type_annotations = NULL;
+  int runtime_visible_type_annotations_length = 0;
+  u1* runtime_invisible_type_annotations = NULL;
+  int runtime_invisible_type_annotations_length = 0;
   u1* inner_classes_attribute_start = NULL;
   u4  inner_classes_attribute_length = 0;
   u2  enclosing_method_class_index = 0;
@@ -2782,7 +2924,8 @@
         runtime_visible_annotations_length = attribute_length;
         runtime_visible_annotations = cfs->get_u1_buffer();
         assert(runtime_visible_annotations != NULL, "null visible annotations");
-        parse_annotations(runtime_visible_annotations,
+        parse_annotations(loader_data,
+                          runtime_visible_annotations,
                           runtime_visible_annotations_length,
                           cp,
                           parsed_annotations,
@@ -2821,6 +2964,17 @@
           classfile_parse_error("Multiple BootstrapMethods attributes in class file %s", CHECK);
         parsed_bootstrap_methods_attribute = true;
         parse_classfile_bootstrap_methods_attribute(loader_data, cp, attribute_length, CHECK);
+      } else if (tag == vmSymbols::tag_runtime_visible_type_annotations()) {
+        runtime_visible_type_annotations_length = attribute_length;
+        runtime_visible_type_annotations = cfs->get_u1_buffer();
+        assert(runtime_visible_type_annotations != NULL, "null visible type annotations");
+        // No need for the VM to parse Type annotations
+        cfs->skip_u1(runtime_visible_type_annotations_length, CHECK);
+      } else if (PreserveAllAnnotations && tag == vmSymbols::tag_runtime_invisible_type_annotations()) {
+        runtime_invisible_type_annotations_length = attribute_length;
+        runtime_invisible_type_annotations = cfs->get_u1_buffer();
+        assert(runtime_invisible_type_annotations != NULL, "null invisible type annotations");
+        cfs->skip_u1(runtime_invisible_type_annotations_length, CHECK);
       } else {
         // Unknown attribute
         cfs->skip_u1(attribute_length, CHECK);
@@ -2837,6 +2991,13 @@
                                                       runtime_invisible_annotations_length,
                                                       CHECK);
   set_class_annotations(annotations);
+  AnnotationArray* type_annotations = assemble_annotations(loader_data,
+                                                           runtime_visible_type_annotations,
+                                                           runtime_visible_type_annotations_length,
+                                                           runtime_invisible_type_annotations,
+                                                           runtime_invisible_type_annotations_length,
+                                                           CHECK);
+  set_class_type_annotations(type_annotations);
 
   if (parsed_innerclasses_attribute || parsed_enclosingmethod_attribute) {
     u2 num_of_classes = parse_classfile_inner_classes_attribute(
@@ -2907,14 +3068,43 @@
 }
 
 
+#ifndef PRODUCT
+static void parseAndPrintGenericSignatures(
+    instanceKlassHandle this_klass, TRAPS) {
+  assert(ParseAllGenericSignatures == true, "Shouldn't call otherwise");
+  ResourceMark rm;
+
+  if (this_klass->generic_signature() != NULL) {
+    using namespace generic;
+    ClassDescriptor* spec = ClassDescriptor::parse_generic_signature(this_klass(), CHECK);
+
+    tty->print_cr("Parsing %s", this_klass->generic_signature()->as_C_string());
+    spec->print_on(tty);
+
+    for (int i = 0; i < this_klass->methods()->length(); ++i) {
+      Method* m = this_klass->methods()->at(i);
+      MethodDescriptor* method_spec = MethodDescriptor::parse_generic_signature(m, spec);
+      Symbol* sig = m->generic_signature();
+      if (sig == NULL) {
+        sig = m->signature();
+      }
+      tty->print_cr("Parsing %s", sig->as_C_string());
+      method_spec->print_on(tty);
+    }
+  }
+}
+#endif // ndef PRODUCT
+
+
 instanceKlassHandle ClassFileParser::parseClassFile(Symbol* name,
-                                                    Handle class_loader,
+                                                    ClassLoaderData* loader_data,
                                                     Handle protection_domain,
                                                     KlassHandle host_klass,
                                                     GrowableArray<Handle>* cp_patches,
                                                     TempNewSymbol& parsed_name,
                                                     bool verify,
                                                     TRAPS) {
+
   // When a retransformable agent is attached, JVMTI caches the
   // class bytes that existed before the first retransformation.
   // If RedefineClasses() was used before the retransformable
@@ -2922,7 +3112,9 @@
   // original class bytes.
   unsigned char *cached_class_file_bytes = NULL;
   jint cached_class_file_length;
-  ClassLoaderData* loader_data = ClassLoaderData::class_loader_data(class_loader());
+  Handle class_loader(THREAD, loader_data->class_loader());
+  bool has_default_methods = false;
+  ResourceMark rm(THREAD);
 
   ClassFileStream* cfs = stream();
   // Timing
@@ -2961,7 +3153,7 @@
     unsigned char* ptr = cfs->buffer();
     unsigned char* end_ptr = cfs->buffer() + cfs->length();
 
-    JvmtiExport::post_class_file_load_hook(name, class_loader, protection_domain,
+    JvmtiExport::post_class_file_load_hook(name, class_loader(), protection_domain,
                                            &ptr, &end_ptr,
                                            &cached_class_file_bytes,
                                            &cached_class_file_length);
@@ -3138,25 +3330,29 @@
     if (itfs_len == 0) {
       local_interfaces = Universe::the_empty_klass_array();
     } else {
-      local_interfaces = parse_interfaces(cp, itfs_len, loader_data, protection_domain, _class_name, CHECK_(nullHandle));
+      local_interfaces = parse_interfaces(
+          cp, itfs_len, loader_data, protection_domain, _class_name,
+          &has_default_methods, CHECK_(nullHandle));
     }
 
     u2 java_fields_count = 0;
     // Fields (offsets are filled in later)
     FieldAllocationCount fac;
     Array<AnnotationArray*>* fields_annotations = NULL;
+    Array<AnnotationArray*>* fields_type_annotations = NULL;
     Array<u2>* fields = parse_fields(loader_data, class_name, cp, access_flags.is_interface(), &fac, &fields_annotations,
+                                          &fields_type_annotations,
                                           &java_fields_count,
                                           CHECK_(nullHandle));
     // Methods
     bool has_final_method = false;
     AccessFlags promoted_flags;
     promoted_flags.set_flags(0);
-    // These need to be oop pointers because they are allocated lazily
-    // inside parse_methods inside a nested HandleMark
+
     Array<AnnotationArray*>* methods_annotations = NULL;
     Array<AnnotationArray*>* methods_parameter_annotations = NULL;
     Array<AnnotationArray*>* methods_default_annotations = NULL;
+    Array<AnnotationArray*>* methods_type_annotations = NULL;
     Array<Method*>* methods = parse_methods(loader_data,
                                             cp, access_flags.is_interface(),
                                             &promoted_flags,
@@ -3164,6 +3360,8 @@
                                             &methods_annotations,
                                             &methods_parameter_annotations,
                                             &methods_default_annotations,
+                                            &methods_type_annotations,
+                                            &has_default_methods,
                                             CHECK_(nullHandle));
 
     // Additional attributes
@@ -3193,6 +3391,11 @@
       super_klass = instanceKlassHandle(THREAD, kh());
     }
     if (super_klass.not_null()) {
+
+      if (super_klass->has_default_methods()) {
+        has_default_methods = true;
+      }
+
       if (super_klass->is_interface()) {
         ResourceMark rm(THREAD);
         Exceptions::fthrow(
@@ -3219,6 +3422,7 @@
                                                methods_annotations,
                                                methods_parameter_annotations,
                                                methods_default_annotations,
+                                               methods_type_annotations,
                                                CHECK_(nullHandle));
 
     // promote flags from parse_methods() to the klass' flags
@@ -3229,31 +3433,31 @@
     int itable_size = 0;
     int num_miranda_methods = 0;
 
-    klassVtable::compute_vtable_size_and_num_mirandas(vtable_size,
-                                                      num_miranda_methods,
-                                                      super_klass(),
-                                                      methods,
-                                                      access_flags,
-                                                      class_loader,
-                                                      class_name,
-                                                      local_interfaces,
+    GrowableArray<Method*> all_mirandas(20);
+
+    klassVtable::compute_vtable_size_and_num_mirandas(
+        &vtable_size, &num_miranda_methods, &all_mirandas, super_klass(), methods,
+        access_flags, class_loader, class_name, local_interfaces,
                                                       CHECK_(nullHandle));
 
     // Size of Java itable (in words)
     itable_size = access_flags.is_interface() ? 0 : klassItable::compute_itable_size(transitive_interfaces);
 
+    // get the padding width from the option
+    // TODO: Ask VM about specific CPU we are running on
+    int pad_size = ContendedPaddingWidth;
+
     // Field size and offset computation
     int nonstatic_field_size = super_klass() == NULL ? 0 : super_klass->nonstatic_field_size();
 #ifndef PRODUCT
     int orig_nonstatic_field_size = 0;
 #endif
-    int static_field_size = 0;
     int next_static_oop_offset;
     int next_static_double_offset;
     int next_static_word_offset;
     int next_static_short_offset;
     int next_static_byte_offset;
-    int next_static_type_offset;
+    int next_static_padded_offset;
     int next_nonstatic_oop_offset;
     int next_nonstatic_double_offset;
     int next_nonstatic_word_offset;
@@ -3263,11 +3467,36 @@
     int first_nonstatic_oop_offset;
     int first_nonstatic_field_offset;
     int next_nonstatic_field_offset;
+    int next_nonstatic_padded_offset;
+
+    // Count the contended fields by type.
+    int static_contended_count = 0;
+    int nonstatic_contended_count = 0;
+    FieldAllocationCount fac_contended;
+    for (AllFieldStream fs(fields, cp); !fs.done(); fs.next()) {
+      FieldAllocationType atype = (FieldAllocationType) fs.allocation_type();
+      if (fs.is_contended()) {
+        fac_contended.count[atype]++;
+        if (fs.access_flags().is_static()) {
+          static_contended_count++;
+        } else {
+          nonstatic_contended_count++;
+        }
+      }
+    }
+    int contended_count = static_contended_count + nonstatic_contended_count;
+
 
     // Calculate the starting byte offsets
     next_static_oop_offset      = InstanceMirrorKlass::offset_of_static_fields();
+
+    // class is contended, pad before all the fields
+    if (parsed_annotations.is_contended()) {
+      next_static_oop_offset += pad_size;
+    }
+
     next_static_double_offset   = next_static_oop_offset +
-                                  (fac.count[STATIC_OOP] * heapOopSize);
+                                  ((fac.count[STATIC_OOP] - fac_contended.count[STATIC_OOP]) * heapOopSize);
     if ( fac.count[STATIC_DOUBLE] &&
          (Universe::field_type_should_be_aligned(T_DOUBLE) ||
           Universe::field_type_should_be_aligned(T_LONG)) ) {
@@ -3275,25 +3504,29 @@
     }
 
     next_static_word_offset     = next_static_double_offset +
-                                  (fac.count[STATIC_DOUBLE] * BytesPerLong);
+                                  ((fac.count[STATIC_DOUBLE] - fac_contended.count[STATIC_DOUBLE]) * BytesPerLong);
     next_static_short_offset    = next_static_word_offset +
-                                  (fac.count[STATIC_WORD] * BytesPerInt);
+                                  ((fac.count[STATIC_WORD]   - fac_contended.count[STATIC_WORD]) * BytesPerInt);
     next_static_byte_offset     = next_static_short_offset +
-                                  (fac.count[STATIC_SHORT] * BytesPerShort);
-    next_static_type_offset     = align_size_up((next_static_byte_offset +
-                                  fac.count[STATIC_BYTE] ), wordSize );
-    static_field_size           = (next_static_type_offset -
-                                  next_static_oop_offset) / wordSize;
+                                  ((fac.count[STATIC_SHORT]  - fac_contended.count[STATIC_SHORT]) * BytesPerShort);
+    next_static_padded_offset   = next_static_byte_offset +
+                                  ((fac.count[STATIC_BYTE]   - fac_contended.count[STATIC_BYTE]) * 1);
 
     first_nonstatic_field_offset = instanceOopDesc::base_offset_in_bytes() +
                                    nonstatic_field_size * heapOopSize;
+
+    // class is contended, pad before all the fields
+    if (parsed_annotations.is_contended()) {
+      first_nonstatic_field_offset += pad_size;
+    }
+
     next_nonstatic_field_offset = first_nonstatic_field_offset;
 
-    unsigned int nonstatic_double_count = fac.count[NONSTATIC_DOUBLE];
-    unsigned int nonstatic_word_count   = fac.count[NONSTATIC_WORD];
-    unsigned int nonstatic_short_count  = fac.count[NONSTATIC_SHORT];
-    unsigned int nonstatic_byte_count   = fac.count[NONSTATIC_BYTE];
-    unsigned int nonstatic_oop_count    = fac.count[NONSTATIC_OOP];
+    unsigned int nonstatic_double_count = fac.count[NONSTATIC_DOUBLE] - fac_contended.count[NONSTATIC_DOUBLE];
+    unsigned int nonstatic_word_count   = fac.count[NONSTATIC_WORD]   - fac_contended.count[NONSTATIC_WORD];
+    unsigned int nonstatic_short_count  = fac.count[NONSTATIC_SHORT]  - fac_contended.count[NONSTATIC_SHORT];
+    unsigned int nonstatic_byte_count   = fac.count[NONSTATIC_BYTE]   - fac_contended.count[NONSTATIC_BYTE];
+    unsigned int nonstatic_oop_count    = fac.count[NONSTATIC_OOP]    - fac_contended.count[NONSTATIC_OOP];
 
     bool super_has_nonstatic_fields =
             (super_klass() != NULL && super_klass->has_nonstatic_fields());
@@ -3366,12 +3599,12 @@
     }
 
     if( allocation_style == 0 ) {
-      // Fields order: oops, longs/doubles, ints, shorts/chars, bytes
+      // Fields order: oops, longs/doubles, ints, shorts/chars, bytes, padded fields
       next_nonstatic_oop_offset    = next_nonstatic_field_offset;
       next_nonstatic_double_offset = next_nonstatic_oop_offset +
                                       (nonstatic_oop_count * heapOopSize);
     } else if( allocation_style == 1 ) {
-      // Fields order: longs/doubles, ints, shorts/chars, bytes, oops
+      // Fields order: longs/doubles, ints, shorts/chars, bytes, oops, padded fields
       next_nonstatic_double_offset = next_nonstatic_field_offset;
     } else if( allocation_style == 2 ) {
       // Fields allocation: oops fields in super and sub classes are together.
@@ -3450,27 +3683,33 @@
                                   (nonstatic_word_count * BytesPerInt);
     next_nonstatic_byte_offset  = next_nonstatic_short_offset +
                                   (nonstatic_short_count * BytesPerShort);
-
-    int notaligned_offset;
-    if( allocation_style == 0 ) {
-      notaligned_offset = next_nonstatic_byte_offset + nonstatic_byte_count;
-    } else { // allocation_style == 1
-      next_nonstatic_oop_offset = next_nonstatic_byte_offset + nonstatic_byte_count;
+    next_nonstatic_padded_offset = next_nonstatic_byte_offset +
+                                  nonstatic_byte_count;
+
+    // let oops jump before padding with this allocation style
+    if( allocation_style == 1 ) {
+      next_nonstatic_oop_offset = next_nonstatic_padded_offset;
       if( nonstatic_oop_count > 0 ) {
         next_nonstatic_oop_offset = align_size_up(next_nonstatic_oop_offset, heapOopSize);
       }
-      notaligned_offset = next_nonstatic_oop_offset + (nonstatic_oop_count * heapOopSize);
+      next_nonstatic_padded_offset = next_nonstatic_oop_offset + (nonstatic_oop_count * heapOopSize);
     }
-    next_nonstatic_type_offset = align_size_up(notaligned_offset, heapOopSize );
-    nonstatic_field_size = nonstatic_field_size + ((next_nonstatic_type_offset
-                                   - first_nonstatic_field_offset)/heapOopSize);
 
     // Iterate over fields again and compute correct offsets.
     // The field allocation type was temporarily stored in the offset slot.
     // oop fields are located before non-oop fields (static and non-static).
     for (AllFieldStream fs(fields, cp); !fs.done(); fs.next()) {
+
+      // skip already laid out fields
+      if (fs.is_offset_set()) continue;
+
+      // contended fields are handled below
+      if (fs.is_contended()) continue;
+
       int real_offset;
-      FieldAllocationType atype = (FieldAllocationType) fs.offset();
+      FieldAllocationType atype = (FieldAllocationType) fs.allocation_type();
+
+      // pack the rest of the fields
       switch (atype) {
         case STATIC_OOP:
           real_offset = next_static_oop_offset;
@@ -3559,13 +3798,225 @@
       fs.set_offset(real_offset);
     }
 
+
+    // Handle the contended cases.
+    //
+    // Each contended field should not intersect the cache line with another contended field.
+    // In the absence of alignment information, we end up with pessimistically separating
+    // the fields with full-width padding.
+    //
+    // Additionally, this should not break alignment for the fields, so we round the alignment up
+    // for each field.
+    if (contended_count > 0) {
+
+      // if there is at least one contended field, we need to have pre-padding for them
+      if (nonstatic_contended_count > 0) {
+        next_nonstatic_padded_offset += pad_size;
+      }
+
+      // collect all contended groups
+      BitMap bm(cp->size());
+      for (AllFieldStream fs(fields, cp); !fs.done(); fs.next()) {
+        // skip already laid out fields
+        if (fs.is_offset_set()) continue;
+
+        if (fs.is_contended()) {
+          bm.set_bit(fs.contended_group());
+        }
+      }
+
+      int current_group = -1;
+      while ((current_group = (int)bm.get_next_one_offset(current_group + 1)) != (int)bm.size()) {
+
+        for (AllFieldStream fs(fields, cp); !fs.done(); fs.next()) {
+
+          // skip already laid out fields
+          if (fs.is_offset_set()) continue;
+
+          // skip non-contended fields and fields from different group
+          if (!fs.is_contended() || (fs.contended_group() != current_group)) continue;
+
+          // handle statics below
+          if (fs.access_flags().is_static()) continue;
+
+          int real_offset;
+          FieldAllocationType atype = (FieldAllocationType) fs.allocation_type();
+
+          switch (atype) {
+            case NONSTATIC_BYTE:
+              next_nonstatic_padded_offset = align_size_up(next_nonstatic_padded_offset, 1);
+              real_offset = next_nonstatic_padded_offset;
+              next_nonstatic_padded_offset += 1;
+              break;
+
+            case NONSTATIC_SHORT:
+              next_nonstatic_padded_offset = align_size_up(next_nonstatic_padded_offset, BytesPerShort);
+              real_offset = next_nonstatic_padded_offset;
+              next_nonstatic_padded_offset += BytesPerShort;
+              break;
+
+            case NONSTATIC_WORD:
+              next_nonstatic_padded_offset = align_size_up(next_nonstatic_padded_offset, BytesPerInt);
+              real_offset = next_nonstatic_padded_offset;
+              next_nonstatic_padded_offset += BytesPerInt;
+              break;
+
+            case NONSTATIC_DOUBLE:
+              next_nonstatic_padded_offset = align_size_up(next_nonstatic_padded_offset, BytesPerLong);
+              real_offset = next_nonstatic_padded_offset;
+              next_nonstatic_padded_offset += BytesPerLong;
+              break;
+
+            case NONSTATIC_OOP:
+              next_nonstatic_padded_offset = align_size_up(next_nonstatic_padded_offset, heapOopSize);
+              real_offset = next_nonstatic_padded_offset;
+              next_nonstatic_padded_offset += heapOopSize;
+
+              // Create new oop map
+              nonstatic_oop_offsets[nonstatic_oop_map_count] = real_offset;
+              nonstatic_oop_counts [nonstatic_oop_map_count] = 1;
+              nonstatic_oop_map_count += 1;
+              if( first_nonstatic_oop_offset == 0 ) { // Undefined
+                first_nonstatic_oop_offset = real_offset;
+              }
+              break;
+
+            default:
+              ShouldNotReachHere();
+          }
+
+          if (fs.contended_group() == 0) {
+            // Contended group defines the equivalence class over the fields:
+            // the fields within the same contended group are not inter-padded.
+            // The only exception is default group, which does not incur the
+            // equivalence, and so requires intra-padding.
+            next_nonstatic_padded_offset += pad_size;
+          }
+
+          fs.set_offset(real_offset);
+        } // for
+
+        // Start laying out the next group.
+        // Note that this will effectively pad the last group in the back;
+        // this is expected to alleviate memory contention effects for
+        // subclass fields and/or adjacent object.
+        // If this was the default group, the padding is already in place.
+        if (current_group != 0) {
+          next_nonstatic_padded_offset += pad_size;
+        }
+      }
+
+      // handle static fields
+
+      // if there is at least one contended field, we need to have pre-padding for them
+      if (static_contended_count > 0) {
+        next_static_padded_offset += pad_size;
+      }
+
+      current_group = -1;
+      while ((current_group = (int)bm.get_next_one_offset(current_group + 1)) != (int)bm.size()) {
+
+        for (AllFieldStream fs(fields, cp); !fs.done(); fs.next()) {
+
+          // skip already laid out fields
+          if (fs.is_offset_set()) continue;
+
+          // skip non-contended fields and fields from different group
+          if (!fs.is_contended() || (fs.contended_group() != current_group)) continue;
+
+          // non-statics already handled above
+          if (!fs.access_flags().is_static()) continue;
+
+          int real_offset;
+          FieldAllocationType atype = (FieldAllocationType) fs.allocation_type();
+
+          switch (atype) {
+
+            case STATIC_BYTE:
+              next_static_padded_offset = align_size_up(next_static_padded_offset, 1);
+              real_offset = next_static_padded_offset;
+              next_static_padded_offset += 1;
+              break;
+
+            case STATIC_SHORT:
+              next_static_padded_offset = align_size_up(next_static_padded_offset, BytesPerShort);
+              real_offset = next_static_padded_offset;
+              next_static_padded_offset += BytesPerShort;
+              break;
+
+            case STATIC_WORD:
+              next_static_padded_offset = align_size_up(next_static_padded_offset, BytesPerInt);
+              real_offset = next_static_padded_offset;
+              next_static_padded_offset += BytesPerInt;
+              break;
+
+            case STATIC_DOUBLE:
+              next_static_padded_offset = align_size_up(next_static_padded_offset, BytesPerLong);
+              real_offset = next_static_padded_offset;
+              next_static_padded_offset += BytesPerLong;
+              break;
+
+            case STATIC_OOP:
+              next_static_padded_offset = align_size_up(next_static_padded_offset, heapOopSize);
+              real_offset = next_static_padded_offset;
+              next_static_padded_offset += heapOopSize;
+              break;
+
+            default:
+              ShouldNotReachHere();
+          }
+
+          if (fs.contended_group() == 0) {
+            // Contended group defines the equivalence class over the fields:
+            // the fields within the same contended group are not inter-padded.
+            // The only exception is default group, which does not incur the
+            // equivalence, and so requires intra-padding.
+            next_static_padded_offset += pad_size;
+          }
+
+          fs.set_offset(real_offset);
+        } // for
+
+        // Start laying out the next group.
+        // Note that this will effectively pad the last group in the back;
+        // this is expected to alleviate memory contention effects for
+        // subclass fields and/or adjacent object.
+        // If this was the default group, the padding is already in place.
+        if (current_group != 0) {
+          next_static_padded_offset += pad_size;
+        }
+
+      }
+
+    } // handle contended
+
     // Size of instances
     int instance_size;
 
+    int notaligned_offset = next_nonstatic_padded_offset;
+
+    // Entire class is contended, pad in the back.
+    // This helps to alleviate memory contention effects for subclass fields
+    // and/or adjacent object.
+    if (parsed_annotations.is_contended()) {
+      notaligned_offset += pad_size;
+      next_static_padded_offset += pad_size;
+    }
+
+    int next_static_type_offset     = align_size_up(next_static_padded_offset, wordSize);
+    int static_field_size           = (next_static_type_offset -
+                                  InstanceMirrorKlass::offset_of_static_fields()) / wordSize;
+
+    next_nonstatic_type_offset = align_size_up(notaligned_offset, heapOopSize );
+    nonstatic_field_size = nonstatic_field_size + ((next_nonstatic_type_offset
+                                   - first_nonstatic_field_offset)/heapOopSize);
+
     next_nonstatic_type_offset = align_size_up(notaligned_offset, wordSize );
     instance_size = align_object_size(next_nonstatic_type_offset / wordSize);
 
-    assert(instance_size == align_object_size(align_size_up((instanceOopDesc::base_offset_in_bytes() + nonstatic_field_size*heapOopSize), wordSize) / wordSize), "consistent layout helper value");
+    assert(instance_size == align_object_size(align_size_up(
+                (instanceOopDesc::base_offset_in_bytes() + nonstatic_field_size*heapOopSize + ((parsed_annotations.is_contended()) ? pad_size : 0)),
+            wordSize) / wordSize), "consistent layout helper value");
 
     // Number of non-static oop map blocks allocated at end of klass.
     const unsigned int total_oop_map_count =
@@ -3639,11 +4090,13 @@
     if (is_anonymous())  // I am well known to myself
       cp->klass_at_put(this_class_index, this_klass()); // eagerly resolve
 
+    // Allocate an annotation type if needed.
     if (fields_annotations != NULL ||
         methods_annotations != NULL ||
         methods_parameter_annotations != NULL ||
-        methods_default_annotations != NULL) {
-      // Allocate an annotation type if needed.
+        methods_default_annotations != NULL ||
+        fields_type_annotations != NULL ||
+        methods_type_annotations != NULL) {
       Annotations* anno = Annotations::allocate(loader_data,
                             fields_annotations, methods_annotations,
                             methods_parameter_annotations,
@@ -3653,9 +4106,20 @@
       this_klass->set_annotations(NULL);
     }
 
+    if (fields_type_annotations != NULL ||
+        methods_type_annotations != NULL) {
+      assert(this_klass->annotations() != NULL, "annotations should have been allocated");
+      Annotations* anno = Annotations::allocate(loader_data,
+                                                fields_type_annotations,
+                                                methods_type_annotations,
+                                                NULL,
+                                                NULL, CHECK_(nullHandle));
+      this_klass->annotations()->set_type_annotations(anno);
+    }
 
     this_klass->set_minor_version(minor_version);
     this_klass->set_major_version(major_version);
+    this_klass->set_has_default_methods(has_default_methods);
 
     // Set up Method*::intrinsic_id as soon as we know the names of methods.
     // (We used to do this lazily, but now we query it in Rewriter,
@@ -3673,6 +4137,30 @@
                                         cached_class_file_length);
     }
 
+    // Fill in field values obtained by parse_classfile_attributes
+    if (parsed_annotations.has_any_annotations())
+      parsed_annotations.apply_to(this_klass);
+
+    // Create annotations
+    if (_annotations != NULL && this_klass->annotations() == NULL) {
+      Annotations* anno = Annotations::allocate(loader_data, CHECK_NULL);
+      this_klass->set_annotations(anno);
+    }
+    apply_parsed_class_attributes(this_klass);
+
+    // Create type annotations
+    if (_type_annotations != NULL) {
+      if (this_klass->annotations() == NULL) {
+        Annotations* anno = Annotations::allocate(loader_data, CHECK_NULL);
+        this_klass->set_annotations(anno);
+      }
+      if (this_klass->annotations()->type_annotations() == NULL) {
+        Annotations* anno = Annotations::allocate(loader_data, CHECK_NULL);
+        this_klass->annotations()->set_type_annotations(anno);
+      }
+      this_klass->annotations()->type_annotations()->set_class_annotations(_type_annotations);
+    }
+
     // Miranda methods
     if ((num_miranda_methods > 0) ||
         // if this class introduced new miranda methods or
@@ -3682,18 +4170,6 @@
       this_klass->set_has_miranda_methods(); // then set a flag
     }
 
-    // Fill in field values obtained by parse_classfile_attributes
-    if (parsed_annotations.has_any_annotations()) {
-      parsed_annotations.apply_to(this_klass);
-    }
-    // Create annotations
-    if (_annotations != NULL && this_klass->annotations() == NULL) {
-      Annotations* anno = Annotations::allocate(loader_data, CHECK_NULL);
-      this_klass->set_annotations(anno);
-    }
-    apply_parsed_class_attributes(this_klass);
-
-    // Compute transitive closure of interfaces this class implements
     this_klass->set_transitive_interfaces(transitive_interfaces);
 
     // Fill in information needed to compute superclasses.
@@ -3702,6 +4178,7 @@
     // Initialize itable offset tables
     klassItable::setup_itable_offset_table(this_klass);
 
+    // Compute transitive closure of interfaces this class implements
     // Do final class setup
     fill_oop_maps(this_klass, nonstatic_oop_map_count, nonstatic_oop_offsets, nonstatic_oop_counts);
 
@@ -3723,7 +4200,25 @@
 
     // check that if this class is an interface then it doesn't have static methods
     if (this_klass->is_interface()) {
-      check_illegal_static_method(this_klass, CHECK_(nullHandle));
+      /* An interface in a JAVA 8 classfile can be static */
+      if (_major_version < JAVA_8_VERSION) {
+        check_illegal_static_method(this_klass, CHECK_(nullHandle));
+      }
+    }
+
+
+#ifdef ASSERT
+    if (ParseAllGenericSignatures) {
+      parseAndPrintGenericSignatures(this_klass, CHECK_(nullHandle));
+    }
+#endif
+
+    // Generate any default methods - default methods are interface methods
+    // that have a default implementation.  This is new with Lambda project.
+    if (has_default_methods && !access_flags.is_interface() &&
+        local_interfaces->length() > 0) {
+      DefaultMethods::generate_default_methods(
+          this_klass(), &all_mirandas, CHECK_(nullHandle));
     }
 
     // Allocate mirror and initialize static fields
@@ -3744,6 +4239,7 @@
                                              false /* not shared class */);
 
     if (TraceClassLoading) {
+      ResourceMark rm;
       // print in a single call to reduce interleaving of output
       if (cfs->source() != NULL) {
         tty->print("[Loaded %s from %s]\n", this_klass->external_name(),
@@ -3758,15 +4254,15 @@
           tty->print("[Loaded %s]\n", this_klass->external_name());
         }
       } else {
-        ResourceMark rm;
         tty->print("[Loaded %s from %s]\n", this_klass->external_name(),
                    InstanceKlass::cast(class_loader->klass())->external_name());
       }
     }
 
     if (TraceClassResolution) {
+      ResourceMark rm;
       // print out the superclass.
-      const char * from = Klass::cast(this_klass())->external_name();
+      const char * from = this_klass()->external_name();
       if (this_klass->java_super() != NULL) {
         tty->print("RESOLVE %s %s (super)\n", from, InstanceKlass::cast(this_klass->java_super())->external_name());
       }
@@ -3785,6 +4281,7 @@
 
 #ifndef PRODUCT
     if( PrintCompactFieldsSavings ) {
+      ResourceMark rm;
       if( nonstatic_field_size < orig_nonstatic_field_size ) {
         tty->print("[Saved %d of %d bytes in %s]\n",
                  (orig_nonstatic_field_size - nonstatic_field_size)*heapOopSize,
@@ -3799,6 +4296,18 @@
     }
 #endif
 
+#ifndef PRODUCT
+    if (PrintFieldLayout) {
+      print_field_layout(name,
+            fields,
+            cp,
+            instance_size,
+            first_nonstatic_field_offset,
+            next_nonstatic_field_offset,
+            next_static_type_offset);
+    }
+#endif
+
     // preserve result across HandleMark
     preserve_this_klass = this_klass();
   }
@@ -3811,6 +4320,37 @@
   return this_klass;
 }
 
+void ClassFileParser::print_field_layout(Symbol* name,
+                                         Array<u2>* fields,
+                                         constantPoolHandle cp,
+                                         int instance_size,
+                                         int instance_fields_start,
+                                         int instance_fields_end,
+                                         int static_fields_end) {
+  tty->print("%s: field layout\n", name->as_klass_external_name());
+  tty->print("  @%3d %s\n", instance_fields_start, "--- instance fields start ---");
+  for (AllFieldStream fs(fields, cp); !fs.done(); fs.next()) {
+    if (!fs.access_flags().is_static()) {
+      tty->print("  @%3d \"%s\" %s\n",
+          fs.offset(),
+          fs.name()->as_klass_external_name(),
+          fs.signature()->as_klass_external_name());
+    }
+  }
+  tty->print("  @%3d %s\n", instance_fields_end, "--- instance fields end ---");
+  tty->print("  @%3d %s\n", instance_size * wordSize, "--- instance ends ---");
+  tty->print("  @%3d %s\n", InstanceMirrorKlass::offset_of_static_fields(), "--- static fields start ---");
+  for (AllFieldStream fs(fields, cp); !fs.done(); fs.next()) {
+    if (fs.access_flags().is_static()) {
+      tty->print("  @%3d \"%s\" %s\n",
+          fs.offset(),
+          fs.name()->as_klass_external_name(),
+          fs.signature()->as_klass_external_name());
+    }
+  }
+  tty->print("  @%3d %s\n", static_fields_end, "--- static fields end ---");
+  tty->print("\n");
+}
 
 unsigned int
 ClassFileParser::compute_oop_map_count(instanceKlassHandle super,
@@ -3917,13 +4457,13 @@
     // java.lang.Object has empty default constructor
     k->set_has_vanilla_constructor();
   } else {
-    if (Klass::cast(super)->has_vanilla_constructor() &&
+    if (super->has_vanilla_constructor() &&
         _has_vanilla_constructor) {
       k->set_has_vanilla_constructor();
     }
 #ifdef ASSERT
     bool v = false;
-    if (Klass::cast(super)->has_vanilla_constructor()) {
+    if (super->has_vanilla_constructor()) {
       Method* constructor = k->find_method(vmSymbols::object_initializer_name(
 ), vmSymbols::void_method_signature());
       if (constructor != NULL && constructor->is_vanilla_constructor()) {
@@ -3939,8 +4479,7 @@
   assert(k->size_helper() > 0, "layout_helper is initialized");
   if ((!RegisterFinalizersAtInit && k->has_finalizer())
       || k->is_abstract() || k->is_interface()
-      || (k->name() == vmSymbols::java_lang_Class()
-          && k->class_loader_data()->is_the_null_class_loader_data())
+      || (k->name() == vmSymbols::java_lang_Class() && k->class_loader() == NULL)
       || k->size_helper() >= FastAllocateSizeLimit) {
     // Forbid fast-path allocation.
     jint lh = Klass::instance_layout_helper(k->size_helper(), true);
@@ -4065,7 +4604,7 @@
   int lng = local_interfaces->length();
   for (int i = lng - 1; i >= 0; i--) {
     Klass* k = local_interfaces->at(i);
-    assert (k != NULL && Klass::cast(k)->is_interface(), "invalid interface");
+    assert (k != NULL && k->is_interface(), "invalid interface");
     if (!Reflection::verify_class_access(this_klass(), k, false)) {
       ResourceMark rm(THREAD);
       Exceptions::fthrow(
@@ -4128,7 +4667,7 @@
           }
 
           // continue to look from super_m's holder's super.
-          k = InstanceKlass::cast(super_m->method_holder())->super();
+          k = super_m->method_holder()->super();
           continue;
         }
 
@@ -4262,15 +4801,41 @@
   const bool is_bridge       = (flags & JVM_ACC_BRIDGE)       != 0;
   const bool is_strict       = (flags & JVM_ACC_STRICT)       != 0;
   const bool is_synchronized = (flags & JVM_ACC_SYNCHRONIZED) != 0;
+  const bool is_protected    = (flags & JVM_ACC_PROTECTED)    != 0;
   const bool major_gte_15    = _major_version >= JAVA_1_5_VERSION;
+  const bool major_gte_8     = _major_version >= JAVA_8_VERSION;
   const bool is_initializer  = (name == vmSymbols::object_initializer_name());
 
   bool is_illegal = false;
 
   if (is_interface) {
-    if (!is_abstract || !is_public || is_static || is_final ||
-        is_native || (major_gte_15 && (is_synchronized || is_strict))) {
-      is_illegal = true;
+    if (major_gte_8) {
+      // Class file version is JAVA_8_VERSION or later Methods of
+      // interfaces may set any of the flags except ACC_PROTECTED,
+      // ACC_FINAL, ACC_NATIVE, and ACC_SYNCHRONIZED; they must
+      // have exactly one of the ACC_PUBLIC or ACC_PRIVATE flags set.
+      if ((is_public == is_private) || /* Only one of private and public should be true - XNOR */
+          (is_native || is_protected || is_final || is_synchronized) ||
+          // If a specific method of a class or interface has its
+          // ACC_ABSTRACT flag set, it must not have any of its
+          // ACC_FINAL, ACC_NATIVE, ACC_PRIVATE, ACC_STATIC,
+          // ACC_STRICT, or ACC_SYNCHRONIZED flags set.  No need to
+          // check for ACC_FINAL, ACC_NATIVE or ACC_SYNCHRONIZED as
+          // those flags are illegal irrespective of ACC_ABSTRACT being set or not.
+          (is_abstract && (is_private || is_static || is_strict))) {
+        is_illegal = true;
+      }
+    } else if (major_gte_15) {
+      // Class file version in the interval [JAVA_1_5_VERSION, JAVA_8_VERSION)
+      if (!is_public || is_static || is_final || is_synchronized ||
+          is_native || !is_abstract || is_strict) {
+        is_illegal = true;
+      }
+    } else {
+      // Class file version is pre-JAVA_1_5_VERSION
+      if (!is_public || is_static || is_final || is_native || !is_abstract) {
+        is_illegal = true;
+      }
     }
   } else { // not interface
     if (is_initializer) {
--- a/src/share/vm/classfile/classFileParser.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/classfile/classFileParser.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -64,6 +64,7 @@
   int        _sde_length;
   Array<u2>* _inner_classes;
   AnnotationArray* _annotations;
+  AnnotationArray* _type_annotations;
 
   void set_class_synthetic_flag(bool x)           { _synthetic_flag = x; }
   void set_class_sourcefile(Symbol* x)            { _sourcefile = x; }
@@ -71,12 +72,14 @@
   void set_class_sde_buffer(char* x, int len)     { _sde_buffer = x; _sde_length = len; }
   void set_class_inner_classes(Array<u2>* x)      { _inner_classes = x; }
   void set_class_annotations(AnnotationArray* x)  { _annotations = x; }
+  void set_class_type_annotations(AnnotationArray* x)  { _type_annotations = x; }
   void init_parsed_class_attributes() {
     _synthetic_flag = false;
     _sourcefile = NULL;
     _generic_signature = NULL;
     _sde_buffer = NULL;
     _sde_length = 0;
+    _annotations = _type_annotations = NULL;
     // initialize the other flags too:
     _has_finalizer = _has_empty_finalizer = _has_vanilla_constructor = false;
     _max_bootstrap_specifier_index = -1;
@@ -92,17 +95,20 @@
       _method_DontInline,
       _method_LambdaForm_Compiled,
       _method_LambdaForm_Hidden,
+      _sun_misc_Contended,
       _annotation_LIMIT
     };
     const Location _location;
     int _annotations_present;
+    u2 _contended_group;
+
     AnnotationCollector(Location location)
     : _location(location), _annotations_present(0)
     {
       assert((int)_annotation_LIMIT <= (int)sizeof(_annotations_present) * BitsPerByte, "");
     }
     // If this annotation name has an ID, report it (or _none).
-    ID annotation_index(Symbol* name);
+    ID annotation_index(ClassLoaderData* loader_data, Symbol* name);
     // Set the annotation name:
     void set_annotation(ID id) {
       assert((int)id >= 0 && (int)id < (int)_annotation_LIMIT, "oob");
@@ -111,6 +117,12 @@
     // Report if the annotation is present.
     bool has_any_annotations() { return _annotations_present != 0; }
     bool has_annotation(ID id) { return (nth_bit((int)id) & _annotations_present) != 0; }
+
+    void set_contended_group(u2 group) { _contended_group = group; }
+    u2 contended_group() { return _contended_group; }
+
+    void set_contended(bool contended) { set_annotation(_sun_misc_Contended); }
+    bool is_contended() { return has_annotation(_sun_misc_Contended); }
   };
   class FieldAnnotationCollector: public AnnotationCollector {
   public:
@@ -151,6 +163,7 @@
                                   ClassLoaderData* loader_data,
                                   Handle protection_domain,
                                   Symbol* class_name,
+                                  bool* has_default_methods,
                                   TRAPS);
   void record_defined_class_dependencies(instanceKlassHandle defined_klass, TRAPS);
 
@@ -162,6 +175,7 @@
                               bool* is_synthetic_addr,
                               u2* generic_signature_index_addr,
                               AnnotationArray** field_annotations,
+                              AnnotationArray** field_type_annotations,
                               FieldAnnotationCollector* parsed_annotations,
                               TRAPS);
   Array<u2>* parse_fields(ClassLoaderData* loader_data,
@@ -169,8 +183,17 @@
                           constantPoolHandle cp, bool is_interface,
                           FieldAllocationCount *fac,
                           Array<AnnotationArray*>** fields_annotations,
+                          Array<AnnotationArray*>** fields_type_annotations,
                           u2* java_fields_count_ptr, TRAPS);
 
+  void print_field_layout(Symbol* name,
+                          Array<u2>* fields,
+                          constantPoolHandle cp,
+                          int instance_size,
+                          int instance_fields_start,
+                          int instance_fields_end,
+                          int static_fields_end);
+
   // Method parsing
   methodHandle parse_method(ClassLoaderData* loader_data,
                             constantPoolHandle cp,
@@ -179,6 +202,7 @@
                             AnnotationArray** method_annotations,
                             AnnotationArray** method_parameter_annotations,
                             AnnotationArray** method_default_annotations,
+                            AnnotationArray** method_type_annotations,
                             TRAPS);
   Array<Method*>* parse_methods(ClassLoaderData* loader_data,
                                 constantPoolHandle cp,
@@ -188,12 +212,15 @@
                                 Array<AnnotationArray*>** methods_annotations,
                                 Array<AnnotationArray*>** methods_parameter_annotations,
                                 Array<AnnotationArray*>** methods_default_annotations,
+                                Array<AnnotationArray*>** methods_type_annotations,
+                                bool* has_default_method,
                                 TRAPS);
   Array<int>* sort_methods(ClassLoaderData* loader_data,
                            Array<Method*>* methods,
                            Array<AnnotationArray*>* methods_annotations,
                            Array<AnnotationArray*>* methods_parameter_annotations,
                            Array<AnnotationArray*>* methods_default_annotations,
+                           Array<AnnotationArray*>* methods_type_annotations,
                                 TRAPS);
   u2* parse_exception_table(ClassLoaderData* loader_data,
                             u4 code_length, u4 exception_table_length,
@@ -237,7 +264,8 @@
                                         int runtime_invisible_annotations_length, TRAPS);
   int skip_annotation(u1* buffer, int limit, int index);
   int skip_annotation_value(u1* buffer, int limit, int index);
-  void parse_annotations(u1* buffer, int limit, constantPoolHandle cp,
+  void parse_annotations(ClassLoaderData* loader_data,
+                         u1* buffer, int limit, constantPoolHandle cp,
                          /* Results (currently, only one result is supported): */
                          AnnotationCollector* result,
                          TRAPS);
@@ -361,16 +389,16 @@
   // "parsed_name" is updated by this method, and is the name found
   // while parsing the stream.
   instanceKlassHandle parseClassFile(Symbol* name,
-                                     Handle class_loader,
+                                     ClassLoaderData* loader_data,
                                      Handle protection_domain,
                                      TempNewSymbol& parsed_name,
                                      bool verify,
                                      TRAPS) {
     KlassHandle no_host_klass;
-    return parseClassFile(name, class_loader, protection_domain, no_host_klass, NULL, parsed_name, verify, THREAD);
+    return parseClassFile(name, loader_data, protection_domain, no_host_klass, NULL, parsed_name, verify, THREAD);
   }
   instanceKlassHandle parseClassFile(Symbol* name,
-                                     Handle class_loader,
+                                     ClassLoaderData* loader_data,
                                      Handle protection_domain,
                                      KlassHandle host_klass,
                                      GrowableArray<Handle>* cp_patches,
--- a/src/share/vm/classfile/classFileStream.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/classfile/classFileStream.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -93,3 +93,10 @@
   }
   _current += length * 2;
 }
+
+void ClassFileStream::skip_u4(int length, TRAPS) {
+  if (_need_verify) {
+    guarantee_more(length * 4, CHECK);
+  }
+  _current += length * 4;
+}
--- a/src/share/vm/classfile/classFileStream.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/classfile/classFileStream.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -133,6 +133,11 @@
     _current += 2 * length;
   }
 
+  void skip_u4(int length, TRAPS);
+  void skip_u4_fast(int length) {
+    _current += 4 * length;
+  }
+
   // Tells whether eos is reached
   bool at_eos() const          { return _current == _buffer_end; }
 };
--- a/src/share/vm/classfile/classLoader.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/classfile/classLoader.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -26,6 +26,7 @@
 #include "classfile/classFileParser.hpp"
 #include "classfile/classFileStream.hpp"
 #include "classfile/classLoader.hpp"
+#include "classfile/classLoaderData.inline.hpp"
 #include "classfile/javaClasses.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "classfile/vmSymbols.hpp"
@@ -605,8 +606,10 @@
   // Load zip library
   char path[JVM_MAXPATHLEN];
   char ebuf[1024];
-  os::dll_build_name(path, sizeof(path), Arguments::get_dll_dir(), "zip");
-  void* handle = os::dll_load(path, ebuf, sizeof ebuf);
+  void* handle = NULL;
+  if (os::dll_build_name(path, sizeof(path), Arguments::get_dll_dir(), "zip")) {
+    handle = os::dll_load(path, ebuf, sizeof ebuf);
+  }
   if (handle == NULL) {
     vm_exit_during_initialization("Unable to load ZIP library", path);
   }
@@ -908,11 +911,11 @@
 
     // class file found, parse it
     ClassFileParser parser(stream);
-    Handle class_loader;
+    ClassLoaderData* loader_data = ClassLoaderData::the_null_class_loader_data();
     Handle protection_domain;
     TempNewSymbol parsed_name = NULL;
     instanceKlassHandle result = parser.parseClassFile(h_name,
-                                                       class_loader,
+                                                       loader_data,
                                                        protection_domain,
                                                        parsed_name,
                                                        false,
--- a/src/share/vm/classfile/classLoaderData.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/classfile/classLoaderData.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -64,14 +64,22 @@
 
 ClassLoaderData * ClassLoaderData::_the_null_class_loader_data = NULL;
 
-ClassLoaderData::ClassLoaderData(Handle h_class_loader) : _class_loader(h_class_loader()),
+ClassLoaderData::ClassLoaderData(Handle h_class_loader, bool is_anonymous) :
+  _class_loader(h_class_loader()),
+  _is_anonymous(is_anonymous), _keep_alive(is_anonymous), // initially
   _metaspace(NULL), _unloading(false), _klasses(NULL),
-  _claimed(0), _jmethod_ids(NULL), _handles(NULL),
-  _deallocate_list(NULL), _next(NULL),
+  _claimed(0), _jmethod_ids(NULL), _handles(NULL), _deallocate_list(NULL),
+  _next(NULL), _dependencies(NULL),
   _metaspace_lock(new Mutex(Monitor::leaf+1, "Metaspace allocation lock", true)) {
     // empty
 }
 
+void ClassLoaderData::init_dependencies(TRAPS) {
+  // Create empty dependencies array to add to. CMS requires this to be
+  // an oop so that it can track additions via card marks.  We think.
+  _dependencies = (oop)oopFactory::new_objectArray(2, CHECK);
+}
+
 bool ClassLoaderData::claim() {
   if (_claimed == 1) {
     return false;
@@ -86,6 +94,7 @@
   }
 
   f->do_oop(&_class_loader);
+  f->do_oop(&_dependencies);
   _handles->oops_do(f);
   if (klass_closure != NULL) {
     classes_do(klass_closure);
@@ -110,70 +119,102 @@
   ClassLoaderData * const from_cld = this;
   ClassLoaderData * const to_cld = k->class_loader_data();
 
-  // Records dependency between non-null class loaders only.
-  if (to_cld->is_the_null_class_loader_data() || from_cld->is_the_null_class_loader_data()) {
+  // Dependency to the null class loader data doesn't need to be recorded
+  // because the null class loader data never goes away.
+  if (to_cld->is_the_null_class_loader_data()) {
     return;
   }
 
-  // Check that this dependency isn't from the same or parent class_loader
-  oop to = to_cld->class_loader();
-  oop from = from_cld->class_loader();
+  oop to;
+  if (to_cld->is_anonymous()) {
+    // Anonymous class dependencies are through the mirror.
+    to = k->java_mirror();
+  } else {
+    to = to_cld->class_loader();
 
-  oop curr = from;
-  while (curr != NULL) {
-    if (curr == to) {
-      return; // this class loader is in the parent list, no need to add it.
+    // If from_cld is anonymous, even if it's class_loader is a parent of 'to'
+    // we still have to add it.  The class_loader won't keep from_cld alive.
+    if (!from_cld->is_anonymous()) {
+      // Check that this dependency isn't from the same or parent class_loader
+      oop from = from_cld->class_loader();
+
+      oop curr = from;
+      while (curr != NULL) {
+        if (curr == to) {
+          return; // this class loader is in the parent list, no need to add it.
+        }
+        curr = java_lang_ClassLoader::parent(curr);
+      }
     }
-    curr = java_lang_ClassLoader::parent(curr);
   }
 
   // It's a dependency we won't find through GC, add it. This is relatively rare
-  from_cld->add_dependency(to_cld, CHECK);
+  // Must handle over GC point.
+  Handle dependency(THREAD, to);
+  from_cld->add_dependency(dependency, CHECK);
 }
 
-bool ClassLoaderData::has_dependency(ClassLoaderData* dependency) {
-  oop loader = dependency->class_loader();
 
-  // Get objArrayOop out of the class_loader oop and see if this dependency
-  // is there.  Don't safepoint!  These are all oops.
-  // Dependency list is (oop class_loader, objArrayOop next)
-  objArrayOop ok = (objArrayOop)java_lang_ClassLoader::dependencies(class_loader());
+void ClassLoaderData::add_dependency(Handle dependency, TRAPS) {
+  // Check first if this dependency is already in the list.
+  // Save a pointer to the last to add to under the lock.
+  objArrayOop ok = (objArrayOop)_dependencies;
+  objArrayOop last = NULL;
   while (ok != NULL) {
-    if (ok->obj_at(0) == loader) {
-      return true;
+    last = ok;
+    if (ok->obj_at(0) == dependency()) {
+      // Don't need to add it
+      return;
     }
     ok = (objArrayOop)ok->obj_at(1);
   }
-  return false;
+
+  // Must handle over GC points
+  assert (last != NULL, "dependencies should be initialized");
+  objArrayHandle last_handle(THREAD, last);
+
+  // Create a new dependency node with fields for (class_loader or mirror, next)
+  objArrayOop deps = oopFactory::new_objectArray(2, CHECK);
+  deps->obj_at_put(0, dependency());
+
+  // Must handle over GC points
+  objArrayHandle new_dependency(THREAD, deps);
+
+  // Add the dependency under lock
+  locked_add_dependency(last_handle, new_dependency);
 }
 
-void ClassLoaderData::add_dependency(ClassLoaderData* dependency, TRAPS) {
-  // Minimize the number of duplicates in the list.
-  if (has_dependency(dependency)) {
-    return;
-  }
+void ClassLoaderData::locked_add_dependency(objArrayHandle last_handle,
+                                            objArrayHandle new_dependency) {
 
-  // Create a new dependency node with fields for (class_loader, next)
-  objArrayOop deps = oopFactory::new_objectArray(2, CHECK);
-  deps->obj_at_put(0, dependency->class_loader());
+  // Have to lock and put the new dependency on the end of the dependency
+  // array so the card mark for CMS sees that this dependency is new.
+  // Can probably do this lock free with some effort.
+  MutexLockerEx ml(metaspace_lock(),  Mutex::_no_safepoint_check_flag);
+
+  oop loader_or_mirror = new_dependency->obj_at(0);
 
-  // Add this lock free, using compare and exchange, need barriers for GC
-  // Do the barrier first.
-  HeapWord* addr = java_lang_ClassLoader::dependencies_addr(class_loader());
-  while (true) {
-    oop old_dependency = java_lang_ClassLoader::dependencies(class_loader());
-    deps->obj_at_put(1, old_dependency);
-
-    oop newold = oopDesc::atomic_compare_exchange_oop((oop)deps, addr, old_dependency, true);
-    if (newold == old_dependency) {
-      update_barrier_set((void*)addr, (oop)deps);
-      // we won the race to add this dependency
-      break;
+  // Since the dependencies are only added, add to the end.
+  objArrayOop end = last_handle();
+  objArrayOop last = NULL;
+  while (end != NULL) {
+    last = end;
+    // check again if another thread added it to the end.
+    if (end->obj_at(0) == loader_or_mirror) {
+      // Don't need to add it
+      return;
     }
+    end = (objArrayOop)end->obj_at(1);
+  }
+  assert (last != NULL, "dependencies should be initialized");
+  // fill in the first element with the oop in new_dependency.
+  if (last->obj_at(0) == NULL) {
+    last->obj_at_put(0, new_dependency->obj_at(0));
+  } else {
+    last->obj_at_put(1, new_dependency());
   }
 }
 
-
 void ClassLoaderDataGraph::clear_claimed_marks() {
   for (ClassLoaderData* cld = _head; cld != NULL; cld = cld->next()) {
     cld->clear_claimed();
@@ -187,7 +228,7 @@
   // link the new item into the list
   _klasses = k;
 
-  if (TraceClassLoaderData && k->class_loader_data() != NULL) {
+  if (TraceClassLoaderData && Verbose && k->class_loader_data() != NULL) {
     ResourceMark rm;
     tty->print_cr("[TraceClassLoaderData] Adding k: " PTR_FORMAT " %s to CLD: "
                   PTR_FORMAT " loader: " PTR_FORMAT " %s",
@@ -195,8 +236,7 @@
                   k->external_name(),
                   k->class_loader_data(),
                   k->class_loader(),
-                  k->class_loader() != NULL ? k->class_loader()->klass()->external_name() : "NULL"
-      );
+                  loader_name());
   }
 }
 
@@ -221,6 +261,31 @@
   ShouldNotReachHere();   // should have found this class!!
 }
 
+void ClassLoaderData::unload() {
+  _unloading = true;
+
+  if (TraceClassLoaderData) {
+    ResourceMark rm;
+    tty->print("[ClassLoaderData: unload loader data "PTR_FORMAT, this);
+    tty->print(" for instance "PTR_FORMAT" of %s", class_loader(),
+               loader_name());
+    if (is_anonymous()) {
+      tty->print(" for anonymous class  "PTR_FORMAT " ", _klasses);
+    }
+    tty->print_cr("]");
+  }
+}
+
+bool ClassLoaderData::is_alive(BoolObjectClosure* is_alive_closure) const {
+  bool alive =
+    is_anonymous() ?
+       is_alive_closure->do_object_b(_klasses->java_mirror()) :
+       class_loader() == NULL || is_alive_closure->do_object_b(class_loader());
+  assert(!alive || claimed(), "must be claimed");
+  return alive;
+}
+
+
 ClassLoaderData::~ClassLoaderData() {
   Metaspace *m = _metaspace;
   if (m != NULL) {
@@ -253,6 +318,7 @@
 }
 
 Metaspace* ClassLoaderData::metaspace_non_null() {
+  assert(!DumpSharedSpaces, "wrong metaspace!");
   // If the metaspace has not been allocated, create a new one.  Might want
   // to create smaller arena for Reflection class loaders also.
   // The reason for the delayed allocation is because some class loaders are
@@ -263,12 +329,21 @@
     if (_metaspace != NULL) {
       return _metaspace;
     }
-    if (class_loader() == NULL) {
-      assert(this == the_null_class_loader_data(), "Must be");
-      size_t word_size = Metaspace::first_chunk_word_size();
-      set_metaspace(new Metaspace(_metaspace_lock, word_size));
+    if (this == the_null_class_loader_data()) {
+      assert (class_loader() == NULL, "Must be");
+      set_metaspace(new Metaspace(_metaspace_lock, Metaspace::BootMetaspaceType));
+    } else if (is_anonymous()) {
+      if (TraceClassLoaderData && Verbose && class_loader() != NULL) {
+        tty->print_cr("is_anonymous: %s", class_loader()->klass()->internal_name());
+      }
+      set_metaspace(new Metaspace(_metaspace_lock, Metaspace::AnonymousMetaspaceType));
+    } else if (class_loader()->is_a(SystemDictionary::reflect_DelegatingClassLoader_klass())) {
+      if (TraceClassLoaderData && Verbose && class_loader() != NULL) {
+        tty->print_cr("is_reflection: %s", class_loader()->klass()->internal_name());
+      }
+      set_metaspace(new Metaspace(_metaspace_lock, Metaspace::ReflectionMetaspaceType));
     } else {
-      set_metaspace(new Metaspace(_metaspace_lock));  // default size for now.
+      set_metaspace(new Metaspace(_metaspace_lock, Metaspace::StandardMetaspaceType));
     }
   }
   return _metaspace;
@@ -325,12 +400,18 @@
   }
 }
 
-#ifndef PRODUCT
-void ClassLoaderData::print_loader(ClassLoaderData *loader_data, outputStream* out) {
-  oop class_loader = loader_data->class_loader();
-  out->print("%s", SystemDictionary::loader_name(class_loader));
+// These anonymous class loaders are to contain classes used for JSR292
+ClassLoaderData* ClassLoaderData::anonymous_class_loader_data(oop loader, TRAPS) {
+  // Add a new class loader data to the graph.
+  return ClassLoaderDataGraph::add(NULL, loader, CHECK_NULL);
 }
 
+const char* ClassLoaderData::loader_name() {
+  // Handles null class loader
+  return SystemDictionary::loader_name(class_loader());
+}
+
+#ifndef PRODUCT
 // Define to dump klasses
 #undef CLD_DUMP_KLASSES
 
@@ -338,8 +419,7 @@
   ResourceMark rm;
   out->print("ClassLoaderData CLD: "PTR_FORMAT", loader: "PTR_FORMAT", loader_klass: "PTR_FORMAT" %s {",
       this, class_loader(),
-      class_loader() != NULL ? class_loader()->klass() : NULL,
-      class_loader() != NULL ? class_loader()->klass()->external_name() : "NULL");
+      class_loader() != NULL ? class_loader()->klass() : NULL, loader_name());
   if (claimed()) out->print(" claimed ");
   if (is_unloading()) out->print(" unloading ");
   out->print(" handles " INTPTR_FORMAT, handles());
@@ -373,8 +453,8 @@
 void ClassLoaderData::verify() {
   oop cl = class_loader();
 
-  guarantee(this == class_loader_data(cl), "Must be the same");
-  guarantee(cl != NULL || this == ClassLoaderData::the_null_class_loader_data(), "must be");
+  guarantee(this == class_loader_data(cl) || is_anonymous(), "Must be the same");
+  guarantee(cl != NULL || this == ClassLoaderData::the_null_class_loader_data() || is_anonymous(), "must be");
 
   // Verify the integrity of the allocated space.
   if (metaspace_or_null() != NULL) {
@@ -387,6 +467,7 @@
   }
 }
 
+
 // GC root of class loader data created.
 ClassLoaderData* ClassLoaderDataGraph::_head = NULL;
 ClassLoaderData* ClassLoaderDataGraph::_unloading = NULL;
@@ -395,19 +476,23 @@
 
 // Add a new class loader data node to the list.  Assign the newly created
 // ClassLoaderData into the java/lang/ClassLoader object as a hidden field
-ClassLoaderData* ClassLoaderDataGraph::add(ClassLoaderData** cld_addr, Handle loader_data) {
+ClassLoaderData* ClassLoaderDataGraph::add(ClassLoaderData** cld_addr, Handle loader, TRAPS) {
   // Not assigned a class loader data yet.
   // Create one.
   ClassLoaderData* *list_head = &_head;
   ClassLoaderData* next = _head;
-  ClassLoaderData* cld = new ClassLoaderData(loader_data);
+
+  bool is_anonymous = (cld_addr == NULL);
+  ClassLoaderData* cld = new ClassLoaderData(loader, is_anonymous);
 
-  // First, Atomically set it.
-  ClassLoaderData* old = (ClassLoaderData*) Atomic::cmpxchg_ptr(cld, cld_addr, NULL);
-  if (old != NULL) {
-    delete cld;
-    // Returns the data.
-    return old;
+  if (cld_addr != NULL) {
+    // First, Atomically set it
+    ClassLoaderData* old = (ClassLoaderData*) Atomic::cmpxchg_ptr(cld, cld_addr, NULL);
+    if (old != NULL) {
+      delete cld;
+      // Returns the data.
+      return old;
+    }
   }
 
   // We won the race, and therefore the task of adding the data to the list of
@@ -417,16 +502,22 @@
     ClassLoaderData* exchanged = (ClassLoaderData*)Atomic::cmpxchg_ptr(cld, list_head, next);
     if (exchanged == next) {
       if (TraceClassLoaderData) {
+        ResourceMark rm;
         tty->print("[ClassLoaderData: ");
         tty->print("create class loader data "PTR_FORMAT, cld);
-        tty->print(" for instance "PTR_FORMAT" of ", cld->class_loader());
-        loader_data->klass()->name()->print_symbol_on(tty);
+        tty->print(" for instance "PTR_FORMAT" of %s", cld->class_loader(),
+                   cld->loader_name());
         tty->print_cr("]");
       }
+      // Create dependencies after the CLD is added to the list.  Otherwise,
+      // the GC GC will not find the CLD and the _class_loader field will
+      // not be updated.
+      cld->init_dependencies(CHECK_NULL);
       return cld;
     }
     next = exchanged;
   } while (true);
+
 }
 
 void ClassLoaderDataGraph::oops_do(OopClosure* f, KlassClosure* klass_closure, bool must_claim) {
@@ -435,9 +526,19 @@
   }
 }
 
+void ClassLoaderDataGraph::keep_alive_oops_do(OopClosure* f, KlassClosure* klass_closure, bool must_claim) {
+  for (ClassLoaderData* cld = _head; cld != NULL; cld = cld->next()) {
+    if (cld->keep_alive()) {
+      cld->oops_do(f, klass_closure, must_claim);
+    }
+  }
+}
+
 void ClassLoaderDataGraph::always_strong_oops_do(OopClosure* f, KlassClosure* klass_closure, bool must_claim) {
   if (ClassUnloading) {
     ClassLoaderData::the_null_class_loader_data()->oops_do(f, klass_closure, must_claim);
+    // keep any special CLDs alive.
+    ClassLoaderDataGraph::keep_alive_oops_do(f, klass_closure, must_claim);
   } else {
     ClassLoaderDataGraph::oops_do(f, klass_closure, must_claim);
   }
@@ -516,9 +617,10 @@
 }
 #endif // PRODUCT
 
+
 // Move class loader data from main list to the unloaded list for unloading
 // and deallocation later.
-bool ClassLoaderDataGraph::do_unloading(BoolObjectClosure* is_alive) {
+bool ClassLoaderDataGraph::do_unloading(BoolObjectClosure* is_alive_closure) {
   ClassLoaderData* data = _head;
   ClassLoaderData* prev = NULL;
   bool seen_dead_loader = false;
@@ -527,8 +629,7 @@
   bool has_redefined_a_class = JvmtiExport::has_redefined_a_class();
   MetadataOnStackMark md_on_stack;
   while (data != NULL) {
-    if (data->class_loader() == NULL || is_alive->do_object_b(data->class_loader())) {
-      assert(data->claimed(), "class loader data must have been claimed");
+    if (data->keep_alive() || data->is_alive(is_alive_closure)) {
       if (has_redefined_a_class) {
         data->classes_do(InstanceKlass::purge_previous_versions);
       }
@@ -539,13 +640,7 @@
     }
     seen_dead_loader = true;
     ClassLoaderData* dead = data;
-    dead->mark_for_unload();
-    if (TraceClassLoaderData) {
-      tty->print("[ClassLoaderData: unload loader data "PTR_FORMAT, dead);
-      tty->print(" for instance "PTR_FORMAT" of ", dead->class_loader());
-      dead->class_loader()->klass()->name()->print_symbol_on(tty);
-      tty->print_cr("]");
-    }
+    dead->unload();
     data = data->next();
     // Remove from loader list.
     if (prev != NULL) {
@@ -587,8 +682,8 @@
          "only supported for null loader data for now");
   assert (!_shared_metaspaces_initialized, "only initialize once");
   MutexLockerEx ml(metaspace_lock(),  Mutex::_no_safepoint_check_flag);
-  _ro_metaspace = new Metaspace(_metaspace_lock, SharedReadOnlySize/wordSize);
-  _rw_metaspace = new Metaspace(_metaspace_lock, SharedReadWriteSize/wordSize);
+  _ro_metaspace = new Metaspace(_metaspace_lock, Metaspace::ROMetaspaceType);
+  _rw_metaspace = new Metaspace(_metaspace_lock, Metaspace::ReadWriteMetaspaceType);
   _shared_metaspaces_initialized = true;
 }
 
--- a/src/share/vm/classfile/classLoaderData.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/classfile/classLoaderData.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -8,7 +8,7 @@
  *
  * This code is distributed in the hope that it will be useful, but WITHOUT
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.See the GNU General Public License
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  * version 2 for more details (a copy is included in the LICENSE file that
  * accompanied this code).
  *
@@ -62,13 +62,14 @@
   // CMS support.
   static ClassLoaderData* _saved_head;
 
-  static ClassLoaderData* add(ClassLoaderData** loader_data_addr, Handle class_loader);
+  static ClassLoaderData* add(ClassLoaderData** loader_data_addr, Handle class_loader, TRAPS);
  public:
-  static ClassLoaderData* find_or_create(Handle class_loader);
+  static ClassLoaderData* find_or_create(Handle class_loader, TRAPS);
   static void purge();
   static void clear_claimed_marks();
   static void oops_do(OopClosure* f, KlassClosure* klass_closure, bool must_claim);
   static void always_strong_oops_do(OopClosure* blk, KlassClosure* klass_closure, bool must_claim);
+  static void keep_alive_oops_do(OopClosure* blk, KlassClosure* klass_closure, bool must_claim);
   static void classes_do(KlassClosure* klass_closure);
   static bool do_unloading(BoolObjectClosure* is_alive);
 
@@ -101,10 +102,14 @@
 
   oop _class_loader;       // oop used to uniquely identify a class loader
                            // class loader or a canonical class path
+  oop _dependencies;       // oop to hold dependencies from this class loader
+                           // data to others.
   Metaspace * _metaspace;  // Meta-space where meta-data defined by the
                            // classes in the class loader are allocated.
   Mutex* _metaspace_lock;  // Locks the metaspace for allocations and setup.
   bool _unloading;         // true if this class loader goes away
+  bool _keep_alive;        // if this CLD can be unloaded for anonymous loaders
+  bool _is_anonymous;      // if this CLD is for an anonymous class
   volatile int _claimed;   // true if claimed, for example during GC traces.
                            // To avoid applying oop closure more than once.
                            // Has to be an int because we cas it.
@@ -129,13 +134,13 @@
   static Metaspace* _ro_metaspace;
   static Metaspace* _rw_metaspace;
 
-  bool has_dependency(ClassLoaderData* cld);
-  void add_dependency(ClassLoaderData* to_loader_data, TRAPS);
+  void add_dependency(Handle dependency, TRAPS);
+  void locked_add_dependency(objArrayHandle last, objArrayHandle new_dependency);
 
   void set_next(ClassLoaderData* next) { _next = next; }
   ClassLoaderData* next() const        { return _next; }
 
-  ClassLoaderData(Handle h_class_loader);
+  ClassLoaderData(Handle h_class_loader, bool is_anonymous);
   ~ClassLoaderData();
 
   void set_metaspace(Metaspace* m) { _metaspace = m; }
@@ -150,7 +155,9 @@
   bool claimed() const          { return _claimed == 1; }
   bool claim();
 
-  void mark_for_unload()        { _unloading = true; }
+  void unload();
+  bool keep_alive() const       { return _keep_alive; }
+  bool is_alive(BoolObjectClosure* is_alive_closure) const;
 
   void classes_do(void f(InstanceKlass*));
 
@@ -168,10 +175,12 @@
     return _the_null_class_loader_data;
   }
 
+  bool is_anonymous() const { return _is_anonymous; }
+
   static void init_null_class_loader_data() {
     assert(_the_null_class_loader_data == NULL, "cannot initialize twice");
     assert(ClassLoaderDataGraph::_head == NULL, "cannot initialize twice");
-    _the_null_class_loader_data = new ClassLoaderData((oop)NULL);
+    _the_null_class_loader_data = new ClassLoaderData((oop)NULL, false);
     ClassLoaderDataGraph::_head = _the_null_class_loader_data;
     assert(_the_null_class_loader_data->is_the_null_class_loader_data(), "Must be");
     if (DumpSharedSpaces) {
@@ -194,6 +203,9 @@
     assert(!(is_the_null_class_loader_data() && _unloading), "The null class loader can never be unloaded");
     return _unloading;
   }
+  // Anonymous class loader data doesn't have anything to keep them from
+  // being unloaded during parsing the anonymous class.
+  void set_keep_alive(bool value) { _keep_alive = value; }
 
   unsigned int identity_hash() {
     return _class_loader == NULL ? 0 : _class_loader->identity_hash();
@@ -211,15 +223,18 @@
   void print_value_on(outputStream* out) const PRODUCT_RETURN;
   void dump(outputStream * const out) PRODUCT_RETURN;
   void verify();
+  const char* loader_name();
 
   jobject add_handle(Handle h);
   void add_class(Klass* k);
   void remove_class(Klass* k);
   void record_dependency(Klass* to, TRAPS);
+  void init_dependencies(TRAPS);
 
   void add_to_deallocate_list(Metadata* m);
 
   static ClassLoaderData* class_loader_data(oop loader);
+  static ClassLoaderData* anonymous_class_loader_data(oop loader, TRAPS);
   static void print_loader(ClassLoaderData *loader_data, outputStream *out);
 
   // CDS support
--- a/src/share/vm/classfile/classLoaderData.inline.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/classfile/classLoaderData.inline.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -33,7 +33,7 @@
 }
 
 
-inline ClassLoaderData *ClassLoaderDataGraph::find_or_create(Handle loader) {
+inline ClassLoaderData *ClassLoaderDataGraph::find_or_create(Handle loader, TRAPS) {
   assert(loader() != NULL,"Must be a class loader");
   // Gets the class loader data out of the java/lang/ClassLoader object, if non-null
   // it's already in the loader_data, so no need to add
@@ -42,5 +42,5 @@
   if (loader_data_id) {
      return loader_data_id;
   }
-  return ClassLoaderDataGraph::add(loader_data_addr, loader);
+  return ClassLoaderDataGraph::add(loader_data_addr, loader, THREAD);
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/classfile/defaultMethods.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,1395 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "classfile/bytecodeAssembler.hpp"
+#include "classfile/defaultMethods.hpp"
+#include "classfile/genericSignatures.hpp"
+#include "classfile/symbolTable.hpp"
+#include "memory/allocation.hpp"
+#include "memory/metadataFactory.hpp"
+#include "memory/resourceArea.hpp"
+#include "runtime/signature.hpp"
+#include "runtime/thread.hpp"
+#include "oops/instanceKlass.hpp"
+#include "oops/klass.hpp"
+#include "oops/method.hpp"
+#include "utilities/accessFlags.hpp"
+#include "utilities/exceptions.hpp"
+#include "utilities/ostream.hpp"
+#include "utilities/pair.hpp"
+#include "utilities/resourceHash.hpp"
+
+typedef enum { QUALIFIED, DISQUALIFIED } QualifiedState;
+
+// Because we use an iterative algorithm when iterating over the type
+// hierarchy, we can't use traditional scoped objects which automatically do
+// cleanup in the destructor when the scope is exited.  PseudoScope (and
+// PseudoScopeMark) provides a similar functionality, but for when you want a
+// scoped object in non-stack memory (such as in resource memory, as we do
+// here).  You've just got to remember to call 'destroy()' on the scope when
+// leaving it (and marks have to be explicitly added).
+class PseudoScopeMark : public ResourceObj {
+ public:
+  virtual void destroy() = 0;
+};
+
+class PseudoScope : public ResourceObj {
+ private:
+  GrowableArray<PseudoScopeMark*> _marks;
+ public:
+
+  static PseudoScope* cast(void* data) {
+    return static_cast<PseudoScope*>(data);
+  }
+
+  void add_mark(PseudoScopeMark* psm) {
+   _marks.append(psm);
+  }
+
+  void destroy() {
+    for (int i = 0; i < _marks.length(); ++i) {
+      _marks.at(i)->destroy();
+    }
+  }
+};
+
+class ContextMark : public PseudoScopeMark {
+ private:
+  generic::Context::Mark _mark;
+ public:
+  ContextMark(const generic::Context::Mark& cm) : _mark(cm) {}
+  virtual void destroy() { _mark.destroy(); }
+};
+
+#ifndef PRODUCT
+static void print_slot(outputStream* str, Symbol* name, Symbol* signature) {
+  ResourceMark rm;
+  str->print("%s%s", name->as_C_string(), signature->as_C_string());
+}
+
+static void print_method(outputStream* str, Method* mo, bool with_class=true) {
+  ResourceMark rm;
+  if (with_class) {
+    str->print("%s.", mo->klass_name()->as_C_string());
+  }
+  print_slot(str, mo->name(), mo->signature());
+}
+#endif // ndef PRODUCT
+
+/**
+ * Perform a depth-first iteration over the class hierarchy, applying
+ * algorithmic logic as it goes.
+ *
+ * This class is one half of the inheritance hierarchy analysis mechanism.
+ * It is meant to be used in conjunction with another class, the algorithm,
+ * which is indicated by the ALGO template parameter.  This class can be
+ * paired with any algorithm class that provides the required methods.
+ *
+ * This class contains all the mechanics for iterating over the class hierarchy
+ * starting at a particular root, without recursing (thus limiting stack growth
+ * from this point).  It visits each superclass (if present) and superinterface
+ * in a depth-first manner, with callbacks to the ALGO class as each class is
+ * encountered (visit()), The algorithm can cut-off further exploration of a
+ * particular branch by returning 'false' from a visit() call.
+ *
+ * The ALGO class, must provide a visit() method, which each of which will be
+ * called once for each node in the inheritance tree during the iteration.  In
+ * addition, it can provide a memory block via new_node_data(InstanceKlass*),
+ * which it can use for node-specific storage (and access via the
+ * current_data() and data_at_depth(int) methods).
+ *
+ * Bare minimum needed to be an ALGO class:
+ * class Algo : public HierarchyVisitor<Algo> {
+ *   void* new_node_data(InstanceKlass* cls) { return NULL; }
+ *   void free_node_data(void* data) { return; }
+ *   bool visit() { return true; }
+ * };
+ */
+template <class ALGO>
+class HierarchyVisitor : StackObj {
+ private:
+
+  class Node : public ResourceObj {
+   public:
+    InstanceKlass* _class;
+    bool _super_was_visited;
+    int _interface_index;
+    void* _algorithm_data;
+
+    Node(InstanceKlass* cls, void* data, bool visit_super)
+        : _class(cls), _super_was_visited(!visit_super),
+          _interface_index(0), _algorithm_data(data) {}
+
+    int number_of_interfaces() { return _class->local_interfaces()->length(); }
+    int interface_index() { return _interface_index; }
+    void set_super_visited() { _super_was_visited = true; }
+    void increment_visited_interface() { ++_interface_index; }
+    void set_all_interfaces_visited() {
+      _interface_index = number_of_interfaces();
+    }
+    bool has_visited_super() { return _super_was_visited; }
+    bool has_visited_all_interfaces() {
+      return interface_index() >= number_of_interfaces();
+    }
+    InstanceKlass* interface_at(int index) {
+      return InstanceKlass::cast(_class->local_interfaces()->at(index));
+    }
+    InstanceKlass* next_super() { return _class->java_super(); }
+    InstanceKlass* next_interface() {
+      return interface_at(interface_index());
+    }
+  };
+
+  bool _cancelled;
+  GrowableArray<Node*> _path;
+
+  Node* current_top() const { return _path.top(); }
+  bool has_more_nodes() const { return !_path.is_empty(); }
+  void push(InstanceKlass* cls, void* data) {
+    assert(cls != NULL, "Requires a valid instance class");
+    Node* node = new Node(cls, data, has_super(cls));
+    _path.push(node);
+  }
+  void pop() { _path.pop(); }
+
+  void reset_iteration() {
+    _cancelled = false;
+    _path.clear();
+  }
+  bool is_cancelled() const { return _cancelled; }
+
+  static bool has_super(InstanceKlass* cls) {
+    return cls->super() != NULL && !cls->is_interface();
+  }
+
+  Node* node_at_depth(int i) const {
+    return (i >= _path.length()) ? NULL : _path.at(_path.length() - i - 1);
+  }
+
+ protected:
+
+  // Accessors available to the algorithm
+  int current_depth() const { return _path.length() - 1; }
+
+  InstanceKlass* class_at_depth(int i) {
+    Node* n = node_at_depth(i);
+    return n == NULL ? NULL : n->_class;
+  }
+  InstanceKlass* current_class() { return class_at_depth(0); }
+
+  void* data_at_depth(int i) {
+    Node* n = node_at_depth(i);
+    return n == NULL ? NULL : n->_algorithm_data;
+  }
+  void* current_data() { return data_at_depth(0); }
+
+  void cancel_iteration() { _cancelled = true; }
+
+ public:
+
+  void run(InstanceKlass* root) {
+    ALGO* algo = static_cast<ALGO*>(this);
+
+    reset_iteration();
+
+    void* algo_data = algo->new_node_data(root);
+    push(root, algo_data);
+    bool top_needs_visit = true;
+
+    do {
+      Node* top = current_top();
+      if (top_needs_visit) {
+        if (algo->visit() == false) {
+          // algorithm does not want to continue along this path.  Arrange
+          // it so that this state is immediately popped off the stack
+          top->set_super_visited();
+          top->set_all_interfaces_visited();
+        }
+        top_needs_visit = false;
+      }
+
+      if (top->has_visited_super() && top->has_visited_all_interfaces()) {
+        algo->free_node_data(top->_algorithm_data);
+        pop();
+      } else {
+        InstanceKlass* next = NULL;
+        if (top->has_visited_super() == false) {
+          next = top->next_super();
+          top->set_super_visited();
+        } else {
+          next = top->next_interface();
+          top->increment_visited_interface();
+        }
+        assert(next != NULL, "Otherwise we shouldn't be here");
+        algo_data = algo->new_node_data(next);
+        push(next, algo_data);
+        top_needs_visit = true;
+      }
+    } while (!is_cancelled() && has_more_nodes());
+  }
+};
+
+#ifndef PRODUCT
+class PrintHierarchy : public HierarchyVisitor<PrintHierarchy> {
+ public:
+
+  bool visit() {
+    InstanceKlass* cls = current_class();
+    streamIndentor si(tty, current_depth() * 2);
+    tty->indent().print_cr("%s", cls->name()->as_C_string());
+    return true;
+  }
+
+  void* new_node_data(InstanceKlass* cls) { return NULL; }
+  void free_node_data(void* data) { return; }
+};
+#endif // ndef PRODUCT
+
+// Used to register InstanceKlass objects and all related metadata structures
+// (Methods, ConstantPools) as "in-use" by the current thread so that they can't
+// be deallocated by class redefinition while we're using them.  The classes are
+// de-registered when this goes out of scope.
+//
+// Once a class is registered, we need not bother with methodHandles or
+// constantPoolHandles for it's associated metadata.
+class KeepAliveRegistrar : public StackObj {
+ private:
+  Thread* _thread;
+  GrowableArray<ConstantPool*> _keep_alive;
+
+ public:
+  KeepAliveRegistrar(Thread* thread) : _thread(thread), _keep_alive(20) {
+    assert(thread == Thread::current(), "Must be current thread");
+  }
+
+  ~KeepAliveRegistrar() {
+    for (int i = _keep_alive.length() - 1; i >= 0; --i) {
+      ConstantPool* cp = _keep_alive.at(i);
+      int idx = _thread->metadata_handles()->find_from_end(cp);
+      assert(idx > 0, "Must be in the list");
+      _thread->metadata_handles()->remove_at(idx);
+    }
+  }
+
+  // Register a class as 'in-use' by the thread.  It's fine to register a class
+  // multiple times (though perhaps inefficient)
+  void register_class(InstanceKlass* ik) {
+    ConstantPool* cp = ik->constants();
+    _keep_alive.push(cp);
+    _thread->metadata_handles()->push(cp);
+  }
+};
+
+class KeepAliveVisitor : public HierarchyVisitor<KeepAliveVisitor> {
+ private:
+  KeepAliveRegistrar* _registrar;
+
+ public:
+  KeepAliveVisitor(KeepAliveRegistrar* registrar) : _registrar(registrar) {}
+
+  void* new_node_data(InstanceKlass* cls) { return NULL; }
+  void free_node_data(void* data) { return; }
+
+  bool visit() {
+    _registrar->register_class(current_class());
+    return true;
+  }
+};
+
+// A method family contains a set of all methods that implement a single
+// language-level method.  Because of erasure, these methods may have different
+// signatures.  As members of the set are collected while walking over the
+// hierarchy, they are tagged with a qualification state.  The qualification
+// state for an erased method is set to disqualified if there exists a path
+// from the root of hierarchy to the method that contains an interleaving
+// language-equivalent method defined in an interface.
+class MethodFamily : public ResourceObj {
+ private:
+
+  generic::MethodDescriptor* _descriptor; // language-level description
+  GrowableArray<Pair<Method*,QualifiedState> > _members;
+  ResourceHashtable<Method*, int> _member_index;
+
+  Method* _selected_target;  // Filled in later, if a unique target exists
+  Symbol* _exception_message; // If no unique target is found
+
+  bool contains_method(Method* method) {
+    int* lookup = _member_index.get(method);
+    return lookup != NULL;
+  }
+
+  void add_method(Method* method, QualifiedState state) {
+    Pair<Method*,QualifiedState> entry(method, state);
+    _member_index.put(method, _members.length());
+    _members.append(entry);
+  }
+
+  void disqualify_method(Method* method) {
+    int* index = _member_index.get(method);
+    assert(index != NULL && *index >= 0 && *index < _members.length(), "bad index");
+    _members.at(*index).second = DISQUALIFIED;
+  }
+
+  Symbol* generate_no_defaults_message(TRAPS) const;
+  Symbol* generate_abstract_method_message(Method* method, TRAPS) const;
+  Symbol* generate_conflicts_message(GrowableArray<Method*>* methods, TRAPS) const;
+
+ public:
+
+  MethodFamily(generic::MethodDescriptor* canonical_desc)
+      : _descriptor(canonical_desc), _selected_target(NULL),
+        _exception_message(NULL) {}
+
+  generic::MethodDescriptor* descriptor() const { return _descriptor; }
+
+  bool descriptor_matches(generic::MethodDescriptor* md, generic::Context* ctx) {
+    return descriptor()->covariant_match(md, ctx);
+  }
+
+  void set_target_if_empty(Method* m) {
+    if (_selected_target == NULL && !m->is_overpass()) {
+      _selected_target = m;
+    }
+  }
+
+  void record_qualified_method(Method* m) {
+    // If the method already exists in the set as qualified, this operation is
+    // redundant.  If it already exists as disqualified, then we leave it as
+    // disqualfied.  Thus we only add to the set if it's not already in the
+    // set.
+    if (!contains_method(m)) {
+      add_method(m, QUALIFIED);
+    }
+  }
+
+  void record_disqualified_method(Method* m) {
+    // If not in the set, add it as disqualified.  If it's already in the set,
+    // then set the state to disqualified no matter what the previous state was.
+    if (!contains_method(m)) {
+      add_method(m, DISQUALIFIED);
+    } else {
+      disqualify_method(m);
+    }
+  }
+
+  bool has_target() const { return _selected_target != NULL; }
+  bool throws_exception() { return _exception_message != NULL; }
+
+  Method* get_selected_target() { return _selected_target; }
+  Symbol* get_exception_message() { return _exception_message; }
+
+  // Either sets the target or the exception error message
+  void determine_target(InstanceKlass* root, TRAPS) {
+    if (has_target() || throws_exception()) {
+      return;
+    }
+
+    GrowableArray<Method*> qualified_methods;
+    for (int i = 0; i < _members.length(); ++i) {
+      Pair<Method*,QualifiedState> entry = _members.at(i);
+      if (entry.second == QUALIFIED) {
+        qualified_methods.append(entry.first);
+      }
+    }
+
+    if (qualified_methods.length() == 0) {
+      _exception_message = generate_no_defaults_message(CHECK);
+    } else if (qualified_methods.length() == 1) {
+      Method* method = qualified_methods.at(0);
+      if (method->is_abstract()) {
+        _exception_message = generate_abstract_method_message(method, CHECK);
+      } else {
+        _selected_target = qualified_methods.at(0);
+      }
+    } else {
+      _exception_message = generate_conflicts_message(&qualified_methods,CHECK);
+    }
+
+    assert((has_target() ^ throws_exception()) == 1,
+           "One and only one must be true");
+  }
+
+  bool contains_signature(Symbol* query) {
+    for (int i = 0; i < _members.length(); ++i) {
+      if (query == _members.at(i).first->signature()) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+#ifndef PRODUCT
+  void print_on(outputStream* str) const {
+    print_on(str, 0);
+  }
+
+  void print_on(outputStream* str, int indent) const {
+    streamIndentor si(str, indent * 2);
+
+    generic::Context ctx(NULL); // empty, as _descriptor already canonicalized
+    TempNewSymbol family = descriptor()->reify_signature(&ctx, Thread::current());
+    str->indent().print_cr("Logical Method %s:", family->as_C_string());
+
+    streamIndentor si2(str);
+    for (int i = 0; i < _members.length(); ++i) {
+      str->indent();
+      print_method(str, _members.at(i).first);
+      if (_members.at(i).second == DISQUALIFIED) {
+        str->print(" (disqualified)");
+      }
+      str->print_cr("");
+    }
+
+    if (_selected_target != NULL) {
+      print_selected(str, 1);
+    }
+  }
+
+  void print_selected(outputStream* str, int indent) const {
+    assert(has_target(), "Should be called otherwise");
+    streamIndentor si(str, indent * 2);
+    str->indent().print("Selected method: ");
+    print_method(str, _selected_target);
+    str->print_cr("");
+  }
+
+  void print_exception(outputStream* str, int indent) {
+    assert(throws_exception(), "Should be called otherwise");
+    streamIndentor si(str, indent * 2);
+    str->indent().print_cr("%s", _exception_message->as_C_string());
+  }
+#endif // ndef PRODUCT
+};
+
+Symbol* MethodFamily::generate_no_defaults_message(TRAPS) const {
+  return SymbolTable::new_symbol("No qualifying defaults found", CHECK_NULL);
+}
+
+Symbol* MethodFamily::generate_abstract_method_message(Method* method, TRAPS) const {
+  Symbol* klass = method->klass_name();
+  Symbol* name = method->name();
+  Symbol* sig = method->signature();
+  stringStream ss;
+  ss.print("Method ");
+  ss.write((const char*)klass->bytes(), klass->utf8_length());
+  ss.print(".");
+  ss.write((const char*)name->bytes(), name->utf8_length());
+  ss.write((const char*)sig->bytes(), sig->utf8_length());
+  ss.print(" is abstract");
+  return SymbolTable::new_symbol(ss.base(), (int)ss.size(), CHECK_NULL);
+}
+
+Symbol* MethodFamily::generate_conflicts_message(GrowableArray<Method*>* methods, TRAPS) const {
+  stringStream ss;
+  ss.print("Conflicting default methods:");
+  for (int i = 0; i < methods->length(); ++i) {
+    Method* method = methods->at(i);
+    Symbol* klass = method->klass_name();
+    Symbol* name = method->name();
+    ss.print(" ");
+    ss.write((const char*)klass->bytes(), klass->utf8_length());
+    ss.print(".");
+    ss.write((const char*)name->bytes(), name->utf8_length());
+  }
+  return SymbolTable::new_symbol(ss.base(), (int)ss.size(), CHECK_NULL);
+}
+
+class StateRestorer;
+
+// StatefulMethodFamily is a wrapper around MethodFamily that maintains the
+// qualification state during hierarchy visitation, and applies that state
+// when adding members to the MethodFamily.
+class StatefulMethodFamily : public ResourceObj {
+  friend class StateRestorer;
+ private:
+  MethodFamily* _method;
+  QualifiedState _qualification_state;
+
+  void set_qualification_state(QualifiedState state) {
+    _qualification_state = state;
+  }
+
+ public:
+  StatefulMethodFamily(generic::MethodDescriptor* md, generic::Context* ctx) {
+    _method = new MethodFamily(md->canonicalize(ctx));
+    _qualification_state = QUALIFIED;
+  }
+
+  void set_target_if_empty(Method* m) { _method->set_target_if_empty(m); }
+
+  MethodFamily* get_method_family() { return _method; }
+
+  bool descriptor_matches(generic::MethodDescriptor* md, generic::Context* ctx) {
+    return _method->descriptor_matches(md, ctx);
+  }
+
+  StateRestorer* record_method_and_dq_further(Method* mo);
+};
+
+class StateRestorer : public PseudoScopeMark {
+ private:
+  StatefulMethodFamily* _method;
+  QualifiedState _state_to_restore;
+ public:
+  StateRestorer(StatefulMethodFamily* dm, QualifiedState state)
+      : _method(dm), _state_to_restore(state) {}
+  ~StateRestorer() { destroy(); }
+  void restore_state() { _method->set_qualification_state(_state_to_restore); }
+  virtual void destroy() { restore_state(); }
+};
+
+StateRestorer* StatefulMethodFamily::record_method_and_dq_further(Method* mo) {
+  StateRestorer* mark = new StateRestorer(this, _qualification_state);
+  if (_qualification_state == QUALIFIED) {
+    _method->record_qualified_method(mo);
+  } else {
+    _method->record_disqualified_method(mo);
+  }
+  // Everything found "above"??? this method in the hierarchy walk is set to
+  // disqualified
+  set_qualification_state(DISQUALIFIED);
+  return mark;
+}
+
+class StatefulMethodFamilies : public ResourceObj {
+ private:
+  GrowableArray<StatefulMethodFamily*> _methods;
+
+ public:
+  StatefulMethodFamily* find_matching(
+      generic::MethodDescriptor* md, generic::Context* ctx) {
+    for (int i = 0; i < _methods.length(); ++i) {
+      StatefulMethodFamily* existing = _methods.at(i);
+      if (existing->descriptor_matches(md, ctx)) {
+        return existing;
+      }
+    }
+    return NULL;
+  }
+
+  StatefulMethodFamily* find_matching_or_create(
+      generic::MethodDescriptor* md, generic::Context* ctx) {
+    StatefulMethodFamily* method = find_matching(md, ctx);
+    if (method == NULL) {
+      method = new StatefulMethodFamily(md, ctx);
+      _methods.append(method);
+    }
+    return method;
+  }
+
+  void extract_families_into(GrowableArray<MethodFamily*>* array) {
+    for (int i = 0; i < _methods.length(); ++i) {
+      array->append(_methods.at(i)->get_method_family());
+    }
+  }
+};
+
+// Represents a location corresponding to a vtable slot for methods that
+// neither the class nor any of it's ancestors provide an implementaion.
+// Default methods may be present to fill this slot.
+class EmptyVtableSlot : public ResourceObj {
+ private:
+  Symbol* _name;
+  Symbol* _signature;
+  int _size_of_parameters;
+  MethodFamily* _binding;
+
+ public:
+  EmptyVtableSlot(Method* method)
+      : _name(method->name()), _signature(method->signature()),
+        _size_of_parameters(method->size_of_parameters()), _binding(NULL) {}
+
+  Symbol* name() const { return _name; }
+  Symbol* signature() const { return _signature; }
+  int size_of_parameters() const { return _size_of_parameters; }
+
+  void bind_family(MethodFamily* lm) { _binding = lm; }
+  bool is_bound() { return _binding != NULL; }
+  MethodFamily* get_binding() { return _binding; }
+
+#ifndef PRODUCT
+  void print_on(outputStream* str) const {
+    print_slot(str, name(), signature());
+  }
+#endif // ndef PRODUCT
+};
+
+static GrowableArray<EmptyVtableSlot*>* find_empty_vtable_slots(
+    InstanceKlass* klass, GrowableArray<Method*>* mirandas, TRAPS) {
+
+  assert(klass != NULL, "Must be valid class");
+
+  GrowableArray<EmptyVtableSlot*>* slots = new GrowableArray<EmptyVtableSlot*>();
+
+  // All miranda methods are obvious candidates
+  for (int i = 0; i < mirandas->length(); ++i) {
+    EmptyVtableSlot* slot = new EmptyVtableSlot(mirandas->at(i));
+    slots->append(slot);
+  }
+
+  // Also any overpasses in our superclasses, that we haven't implemented.
+  // (can't use the vtable because it is not guaranteed to be initialized yet)
+  InstanceKlass* super = klass->java_super();
+  while (super != NULL) {
+    for (int i = 0; i < super->methods()->length(); ++i) {
+      Method* m = super->methods()->at(i);
+      if (m->is_overpass()) {
+        // m is a method that would have been a miranda if not for the
+        // default method processing that occurred on behalf of our superclass,
+        // so it's a method we want to re-examine in this new context.  That is,
+        // unless we have a real implementation of it in the current class.
+        Method* impl = klass->lookup_method(m->name(), m->signature());
+        if (impl == NULL || impl->is_overpass()) {
+          slots->append(new EmptyVtableSlot(m));
+        }
+      }
+    }
+    super = super->java_super();
+  }
+
+#ifndef PRODUCT
+  if (TraceDefaultMethods) {
+    tty->print_cr("Slots that need filling:");
+    streamIndentor si(tty);
+    for (int i = 0; i < slots->length(); ++i) {
+      tty->indent();
+      slots->at(i)->print_on(tty);
+      tty->print_cr("");
+    }
+  }
+#endif // ndef PRODUCT
+  return slots;
+}
+
+// Iterates over the type hierarchy looking for all methods with a specific
+// method name.  The result of this is a set of method families each of
+// which is populated with a set of methods that implement the same
+// language-level signature.
+class FindMethodsByName : public HierarchyVisitor<FindMethodsByName> {
+ private:
+  // Context data
+  Thread* THREAD;
+  generic::DescriptorCache* _cache;
+  Symbol* _method_name;
+  generic::Context* _ctx;
+  StatefulMethodFamilies _families;
+
+ public:
+
+  FindMethodsByName(generic::DescriptorCache* cache, Symbol* name,
+      generic::Context* ctx, Thread* thread) :
+    _cache(cache), _method_name(name), _ctx(ctx), THREAD(thread) {}
+
+  void get_discovered_families(GrowableArray<MethodFamily*>* methods) {
+    _families.extract_families_into(methods);
+  }
+
+  void* new_node_data(InstanceKlass* cls) { return new PseudoScope(); }
+  void free_node_data(void* node_data) {
+    PseudoScope::cast(node_data)->destroy();
+  }
+
+  bool visit() {
+    PseudoScope* scope = PseudoScope::cast(current_data());
+    InstanceKlass* klass = current_class();
+    InstanceKlass* sub = current_depth() > 0 ? class_at_depth(1) : NULL;
+
+    ContextMark* cm = new ContextMark(_ctx->mark());
+    scope->add_mark(cm); // will restore context when scope is freed
+
+    _ctx->apply_type_arguments(sub, klass, THREAD);
+
+    int start, end = 0;
+    start = klass->find_method_by_name(_method_name, &end);
+    if (start != -1) {
+      for (int i = start; i < end; ++i) {
+        Method* m = klass->methods()->at(i);
+        // This gets the method's parameter list with its generic type
+        // parameters resolved
+        generic::MethodDescriptor* md = _cache->descriptor_for(m, THREAD);
+
+        // Find all methods on this hierarchy that match this method
+        // (name, signature).   This class collects other families of this
+        // method name.
+        StatefulMethodFamily* family =
+            _families.find_matching_or_create(md, _ctx);
+
+        if (klass->is_interface()) {
+          // ???
+          StateRestorer* restorer = family->record_method_and_dq_further(m);
+          scope->add_mark(restorer);
+        } else {
+          // This is the rule that methods in classes "win" (bad word) over
+          // methods in interfaces.  This works because of single inheritance
+          family->set_target_if_empty(m);
+        }
+      }
+    }
+    return true;
+  }
+};
+
+#ifndef PRODUCT
+static void print_families(
+    GrowableArray<MethodFamily*>* methods, Symbol* match) {
+  streamIndentor si(tty, 4);
+  if (methods->length() == 0) {
+    tty->indent();
+    tty->print_cr("No Logical Method found");
+  }
+  for (int i = 0; i < methods->length(); ++i) {
+    tty->indent();
+    MethodFamily* lm = methods->at(i);
+    if (lm->contains_signature(match)) {
+      tty->print_cr("<Matching>");
+    } else {
+      tty->print_cr("<Non-Matching>");
+    }
+    lm->print_on(tty, 1);
+  }
+}
+#endif // ndef PRODUCT
+
+static void merge_in_new_methods(InstanceKlass* klass,
+    GrowableArray<Method*>* new_methods, TRAPS);
+static void create_overpasses(
+    GrowableArray<EmptyVtableSlot*>* slots, InstanceKlass* klass, TRAPS);
+
+// This is the guts of the default methods implementation.  This is called just
+// after the classfile has been parsed if some ancestor has default methods.
+//
+// First if finds any name/signature slots that need any implementation (either
+// because they are miranda or a superclass's implementation is an overpass
+// itself).  For each slot, iterate over the hierarchy, using generic signature
+// information to partition any methods that match the name into method families
+// where each family contains methods whose signatures are equivalent at the
+// language level (i.e., their reified parameters match and return values are
+// covariant). Check those sets to see if they contain a signature that matches
+// the slot we're looking at (if we're lucky, there might be other empty slots
+// that we can fill using the same analysis).
+//
+// For each slot filled, we generate an overpass method that either calls the
+// unique default method candidate using invokespecial, or throws an exception
+// (in the case of no default method candidates, or more than one valid
+// candidate).  These methods are then added to the class's method list.  If
+// the method set we're using contains methods (qualified or not) with a
+// different runtime signature than the method we're creating, then we have to
+// create bridges with those signatures too.
+void DefaultMethods::generate_default_methods(
+    InstanceKlass* klass, GrowableArray<Method*>* mirandas, TRAPS) {
+
+  // This resource mark is the bound for all memory allocation that takes
+  // place during default method processing.  After this goes out of scope,
+  // all (Resource) objects' memory will be reclaimed.  Be careful if adding an
+  // embedded resource mark under here as that memory can't be used outside
+  // whatever scope it's in.
+  ResourceMark rm(THREAD);
+
+  generic::DescriptorCache cache;
+
+  // Keep entire hierarchy alive for the duration of the computation
+  KeepAliveRegistrar keepAlive(THREAD);
+  KeepAliveVisitor loadKeepAlive(&keepAlive);
+  loadKeepAlive.run(klass);
+
+#ifndef PRODUCT
+  if (TraceDefaultMethods) {
+    ResourceMark rm;  // be careful with these!
+    tty->print_cr("Class %s requires default method processing",
+        klass->name()->as_klass_external_name());
+    PrintHierarchy printer;
+    printer.run(klass);
+  }
+#endif // ndef PRODUCT
+
+  GrowableArray<EmptyVtableSlot*>* empty_slots =
+      find_empty_vtable_slots(klass, mirandas, CHECK);
+
+  for (int i = 0; i < empty_slots->length(); ++i) {
+    EmptyVtableSlot* slot = empty_slots->at(i);
+#ifndef PRODUCT
+    if (TraceDefaultMethods) {
+      streamIndentor si(tty, 2);
+      tty->indent().print("Looking for default methods for slot ");
+      slot->print_on(tty);
+      tty->print_cr("");
+    }
+#endif // ndef PRODUCT
+    if (slot->is_bound()) {
+#ifndef PRODUCT
+      if (TraceDefaultMethods) {
+        streamIndentor si(tty, 4);
+        tty->indent().print_cr("Already bound to logical method:");
+        slot->get_binding()->print_on(tty, 1);
+      }
+#endif // ndef PRODUCT
+      continue; // covered by previous processing
+    }
+
+    generic::Context ctx(&cache);
+    FindMethodsByName visitor(&cache, slot->name(), &ctx, CHECK);
+    visitor.run(klass);
+
+    GrowableArray<MethodFamily*> discovered_families;
+    visitor.get_discovered_families(&discovered_families);
+
+#ifndef PRODUCT
+    if (TraceDefaultMethods) {
+      print_families(&discovered_families, slot->signature());
+    }
+#endif // ndef PRODUCT
+
+    // Find and populate any other slots that match the discovered families
+    for (int j = i; j < empty_slots->length(); ++j) {
+      EmptyVtableSlot* open_slot = empty_slots->at(j);
+
+      if (slot->name() == open_slot->name()) {
+        for (int k = 0; k < discovered_families.length(); ++k) {
+          MethodFamily* lm = discovered_families.at(k);
+
+          if (lm->contains_signature(open_slot->signature())) {
+            lm->determine_target(klass, CHECK);
+            open_slot->bind_family(lm);
+          }
+        }
+      }
+    }
+  }
+
+#ifndef PRODUCT
+  if (TraceDefaultMethods) {
+    tty->print_cr("Creating overpasses...");
+  }
+#endif // ndef PRODUCT
+
+  create_overpasses(empty_slots, klass, CHECK);
+
+#ifndef PRODUCT
+  if (TraceDefaultMethods) {
+    tty->print_cr("Default method processing complete");
+  }
+#endif // ndef PRODUCT
+}
+
+
+/**
+ * Generic analysis was used upon interface '_target' and found a unique
+ * default method candidate with generic signature '_method_desc'.  This
+ * method is only viable if it would also be in the set of default method
+ * candidates if we ran a full analysis on the current class.
+ *
+ * The only reason that the method would not be in the set of candidates for
+ * the current class is if that there's another covariantly matching method
+ * which is "more specific" than the found method -- i.e., one could find a
+ * path in the interface hierarchy in which the matching method appears
+ * before we get to '_target'.
+ *
+ * In order to determine this, we examine all of the implemented
+ * interfaces.  If we find path that leads to the '_target' interface, then
+ * we examine that path to see if there are any methods that would shadow
+ * the selected method along that path.
+ */
+class ShadowChecker : public HierarchyVisitor<ShadowChecker> {
+ private:
+  generic::DescriptorCache* _cache;
+  Thread* THREAD;
+
+  InstanceKlass* _target;
+
+  Symbol* _method_name;
+  InstanceKlass* _method_holder;
+  generic::MethodDescriptor* _method_desc;
+  bool _found_shadow;
+
+  bool path_has_shadow() {
+    generic::Context ctx(_cache);
+
+    for (int i = current_depth() - 1; i > 0; --i) {
+      InstanceKlass* ik = class_at_depth(i);
+      InstanceKlass* sub = class_at_depth(i + 1);
+      ctx.apply_type_arguments(sub, ik, THREAD);
+
+      if (ik->is_interface()) {
+        int end;
+        int start = ik->find_method_by_name(_method_name, &end);
+        if (start != -1) {
+          for (int j = start; j < end; ++j) {
+            Method* mo = ik->methods()->at(j);
+            generic::MethodDescriptor* md = _cache->descriptor_for(mo, THREAD);
+            if (_method_desc->covariant_match(md, &ctx)) {
+              return true;
+            }
+          }
+        }
+      }
+    }
+    return false;
+  }
+
+ public:
+
+  ShadowChecker(generic::DescriptorCache* cache, Thread* thread,
+      Symbol* name, InstanceKlass* holder, generic::MethodDescriptor* desc,
+      InstanceKlass* target)
+    : _cache(cache), THREAD(thread), _method_name(name), _method_holder(holder),
+      _method_desc(desc), _target(target), _found_shadow(false) {}
+
+  void* new_node_data(InstanceKlass* cls) { return NULL; }
+  void free_node_data(void* data) { return; }
+
+  bool visit() {
+    InstanceKlass* ik = current_class();
+    if (ik == _target && current_depth() == 1) {
+      return false; // This was the specified super -- no need to search it
+    }
+    if (ik == _method_holder || ik == _target) {
+      // We found a path that should be examined to see if it shadows _method
+      if (path_has_shadow()) {
+        _found_shadow = true;
+        cancel_iteration();
+      }
+      return false; // no need to continue up hierarchy
+    }
+    return true;
+  }
+
+  bool found_shadow() { return _found_shadow; }
+};
+
+// This is called during linktime when we find an invokespecial call that
+// refers to a direct superinterface.  It indicates that we should find the
+// default method in the hierarchy of that superinterface, and if that method
+// would have been a candidate from the point of view of 'this' class, then we
+// return that method.
+Method* DefaultMethods::find_super_default(
+    Klass* cls, Klass* super, Symbol* method_name, Symbol* sig, TRAPS) {
+
+  ResourceMark rm(THREAD);
+
+  assert(cls != NULL && super != NULL, "Need real classes");
+
+  InstanceKlass* current_class = InstanceKlass::cast(cls);
+  InstanceKlass* direction = InstanceKlass::cast(super);
+
+  // Keep entire hierarchy alive for the duration of the computation
+  KeepAliveRegistrar keepAlive(THREAD);
+  KeepAliveVisitor loadKeepAlive(&keepAlive);
+  loadKeepAlive.run(current_class);
+
+#ifndef PRODUCT
+  if (TraceDefaultMethods) {
+    tty->print_cr("Finding super default method %s.%s%s from %s",
+      direction->name()->as_C_string(),
+      method_name->as_C_string(), sig->as_C_string(),
+      current_class->name()->as_C_string());
+  }
+#endif // ndef PRODUCT
+
+  if (!direction->is_interface()) {
+    // We should not be here
+    return NULL;
+  }
+
+  generic::DescriptorCache cache;
+  generic::Context ctx(&cache);
+
+  // Prime the initial generic context for current -> direction
+  ctx.apply_type_arguments(current_class, direction, CHECK_NULL);
+
+  FindMethodsByName visitor(&cache, method_name, &ctx, CHECK_NULL);
+  visitor.run(direction);
+
+  GrowableArray<MethodFamily*> families;
+  visitor.get_discovered_families(&families);
+
+#ifndef PRODUCT
+  if (TraceDefaultMethods) {
+    print_families(&families, sig);
+  }
+#endif // ndef PRODUCT
+
+  MethodFamily* selected_family = NULL;
+
+  for (int i = 0; i < families.length(); ++i) {
+    MethodFamily* lm = families.at(i);
+    if (lm->contains_signature(sig)) {
+      lm->determine_target(current_class, CHECK_NULL);
+      selected_family = lm;
+    }
+  }
+
+  if (selected_family->has_target()) {
+    Method* target = selected_family->get_selected_target();
+    InstanceKlass* holder = InstanceKlass::cast(target->method_holder());
+
+    // Verify that the identified method is valid from the context of
+    // the current class
+    ShadowChecker checker(&cache, THREAD, target->name(),
+        holder, selected_family->descriptor(), direction);
+    checker.run(current_class);
+
+    if (checker.found_shadow()) {
+#ifndef PRODUCT
+      if (TraceDefaultMethods) {
+        tty->print_cr("    Only candidate found was shadowed.");
+      }
+#endif // ndef PRODUCT
+      THROW_MSG_(vmSymbols::java_lang_AbstractMethodError(),
+                 "Accessible default method not found", NULL);
+    } else {
+#ifndef PRODUCT
+      if (TraceDefaultMethods) {
+        tty->print("    Returning ");
+        print_method(tty, target, true);
+        tty->print_cr("");
+      }
+#endif // ndef PRODUCT
+      return target;
+    }
+  } else {
+    assert(selected_family->throws_exception(), "must have target or throw");
+    THROW_MSG_(vmSymbols::java_lang_AbstractMethodError(),
+               selected_family->get_exception_message()->as_C_string(), NULL);
+  }
+}
+
+
+static int assemble_redirect(
+    BytecodeConstantPool* cp, BytecodeBuffer* buffer,
+    Symbol* incoming, Method* target, TRAPS) {
+
+  BytecodeAssembler assem(buffer, cp);
+
+  SignatureStream in(incoming, true);
+  SignatureStream out(target->signature(), true);
+  u2 parameter_count = 0;
+
+  assem.aload(parameter_count++); // load 'this'
+
+  while (!in.at_return_type()) {
+    assert(!out.at_return_type(), "Parameter counts do not match");
+    BasicType bt = in.type();
+    assert(out.type() == bt, "Parameter types are not compatible");
+    assem.load(bt, parameter_count);
+    if (in.is_object() && in.as_symbol(THREAD) != out.as_symbol(THREAD)) {
+      assem.checkcast(out.as_symbol(THREAD));
+    } else if (bt == T_LONG || bt == T_DOUBLE) {
+      ++parameter_count; // longs and doubles use two slots
+    }
+    ++parameter_count;
+    in.next();
+    out.next();
+  }
+  assert(out.at_return_type(), "Parameter counts do not match");
+  assert(in.type() == out.type(), "Return types are not compatible");
+
+  if (parameter_count == 1 && (in.type() == T_LONG || in.type() == T_DOUBLE)) {
+    ++parameter_count; // need room for return value
+  }
+  if (target->method_holder()->is_interface()) {
+    assem.invokespecial(target);
+  } else {
+    assem.invokevirtual(target);
+  }
+
+  if (in.is_object() && in.as_symbol(THREAD) != out.as_symbol(THREAD)) {
+    assem.checkcast(in.as_symbol(THREAD));
+  }
+  assem._return(in.type());
+  return parameter_count;
+}
+
+static int assemble_abstract_method_error(
+    BytecodeConstantPool* cp, BytecodeBuffer* buffer, Symbol* message, TRAPS) {
+
+  Symbol* errorName = vmSymbols::java_lang_AbstractMethodError();
+  Symbol* init = vmSymbols::object_initializer_name();
+  Symbol* sig = vmSymbols::string_void_signature();
+
+  BytecodeAssembler assem(buffer, cp);
+
+  assem._new(errorName);
+  assem.dup();
+  assem.load_string(message);
+  assem.invokespecial(errorName, init, sig);
+  assem.athrow();
+
+  return 3; // max stack size: [ exception, exception, string ]
+}
+
+static Method* new_method(
+    BytecodeConstantPool* cp, BytecodeBuffer* bytecodes, Symbol* name,
+    Symbol* sig, AccessFlags flags, int max_stack, int params,
+    ConstMethod::MethodType mt, TRAPS) {
+
+  address code_start = static_cast<address>(bytecodes->adr_at(0));
+  int code_length = bytecodes->length();
+
+  Method* m = Method::allocate(cp->pool_holder()->class_loader_data(),
+                               code_length, flags, 0, 0, 0, 0, 0, 0,
+                               mt, CHECK_NULL);
+
+  m->set_constants(NULL); // This will get filled in later
+  m->set_name_index(cp->utf8(name));
+  m->set_signature_index(cp->utf8(sig));
+#ifdef CC_INTERP
+  ResultTypeFinder rtf(sig);
+  m->set_result_index(rtf.type());
+#endif
+  m->set_size_of_parameters(params);
+  m->set_max_stack(max_stack);
+  m->set_max_locals(params);
+  m->constMethod()->set_stackmap_data(NULL);
+  m->set_code(code_start);
+  m->set_force_inline(true);
+
+  return m;
+}
+
+static void switchover_constant_pool(BytecodeConstantPool* bpool,
+    InstanceKlass* klass, GrowableArray<Method*>* new_methods, TRAPS) {
+
+  if (new_methods->length() > 0) {
+    ConstantPool* cp = bpool->create_constant_pool(CHECK);
+    if (cp != klass->constants()) {
+      klass->class_loader_data()->add_to_deallocate_list(klass->constants());
+      klass->set_constants(cp);
+      cp->set_pool_holder(klass);
+
+      for (int i = 0; i < new_methods->length(); ++i) {
+        new_methods->at(i)->set_constants(cp);
+      }
+      for (int i = 0; i < klass->methods()->length(); ++i) {
+        Method* mo = klass->methods()->at(i);
+        mo->set_constants(cp);
+      }
+    }
+  }
+}
+
+// A "bridge" is a method created by javac to bridge the gap between
+// an implementation and a generically-compatible, but different, signature.
+// Bridges have actual bytecode implementation in classfiles.
+// An "overpass", on the other hand, performs the same function as a bridge
+// but does not occur in a classfile; the VM creates overpass itself,
+// when it needs a path to get from a call site to an default method, and
+// a bridge doesn't exist.
+static void create_overpasses(
+    GrowableArray<EmptyVtableSlot*>* slots,
+    InstanceKlass* klass, TRAPS) {
+
+  GrowableArray<Method*> overpasses;
+  BytecodeConstantPool bpool(klass->constants());
+
+  for (int i = 0; i < slots->length(); ++i) {
+    EmptyVtableSlot* slot = slots->at(i);
+
+    if (slot->is_bound()) {
+      MethodFamily* method = slot->get_binding();
+      int max_stack = 0;
+      BytecodeBuffer buffer;
+
+#ifndef PRODUCT
+      if (TraceDefaultMethods) {
+        tty->print("for slot: ");
+        slot->print_on(tty);
+        tty->print_cr("");
+        if (method->has_target()) {
+          method->print_selected(tty, 1);
+        } else {
+          method->print_exception(tty, 1);
+        }
+      }
+#endif // ndef PRODUCT
+      if (method->has_target()) {
+        Method* selected = method->get_selected_target();
+        max_stack = assemble_redirect(
+            &bpool, &buffer, slot->signature(), selected, CHECK);
+      } else if (method->throws_exception()) {
+        max_stack = assemble_abstract_method_error(
+            &bpool, &buffer, method->get_exception_message(), CHECK);
+      }
+      AccessFlags flags = accessFlags_from(
+          JVM_ACC_PUBLIC | JVM_ACC_SYNTHETIC | JVM_ACC_BRIDGE);
+      Method* m = new_method(&bpool, &buffer, slot->name(), slot->signature(),
+          flags, max_stack, slot->size_of_parameters(),
+          ConstMethod::OVERPASS, CHECK);
+      if (m != NULL) {
+        overpasses.push(m);
+      }
+    }
+  }
+
+#ifndef PRODUCT
+  if (TraceDefaultMethods) {
+    tty->print_cr("Created %d overpass methods", overpasses.length());
+  }
+#endif // ndef PRODUCT
+
+  switchover_constant_pool(&bpool, klass, &overpasses, CHECK);
+  merge_in_new_methods(klass, &overpasses, CHECK);
+}
+
+static void sort_methods(GrowableArray<Method*>* methods) {
+  // Note that this must sort using the same key as is used for sorting
+  // methods in InstanceKlass.
+  bool sorted = true;
+  for (int i = methods->length() - 1; i > 0; --i) {
+    for (int j = 0; j < i; ++j) {
+      Method* m1 = methods->at(j);
+      Method* m2 = methods->at(j + 1);
+      if ((uintptr_t)m1->name() > (uintptr_t)m2->name()) {
+        methods->at_put(j, m2);
+        methods->at_put(j + 1, m1);
+        sorted = false;
+      }
+    }
+    if (sorted) break;
+    sorted = true;
+  }
+#ifdef ASSERT
+  uintptr_t prev = 0;
+  for (int i = 0; i < methods->length(); ++i) {
+    Method* mh = methods->at(i);
+    uintptr_t nv = (uintptr_t)mh->name();
+    assert(nv >= prev, "Incorrect overpass method ordering");
+    prev = nv;
+  }
+#endif
+}
+
+static void merge_in_new_methods(InstanceKlass* klass,
+    GrowableArray<Method*>* new_methods, TRAPS) {
+
+  enum { ANNOTATIONS, PARAMETERS, DEFAULTS, NUM_ARRAYS };
+
+  Array<AnnotationArray*>* original_annots[NUM_ARRAYS] = { NULL };
+
+  Array<Method*>* original_methods = klass->methods();
+  Annotations* annots = klass->annotations();
+  if (annots != NULL) {
+    original_annots[ANNOTATIONS] = annots->methods_annotations();
+    original_annots[PARAMETERS]  = annots->methods_parameter_annotations();
+    original_annots[DEFAULTS]    = annots->methods_default_annotations();
+  }
+
+  Array<int>* original_ordering = klass->method_ordering();
+  Array<int>* merged_ordering = Universe::the_empty_int_array();
+
+  int new_size = klass->methods()->length() + new_methods->length();
+
+  Array<AnnotationArray*>* merged_annots[NUM_ARRAYS];
+
+  Array<Method*>* merged_methods = MetadataFactory::new_array<Method*>(
+      klass->class_loader_data(), new_size, NULL, CHECK);
+  for (int i = 0; i < NUM_ARRAYS; ++i) {
+    if (original_annots[i] != NULL) {
+      merged_annots[i] = MetadataFactory::new_array<AnnotationArray*>(
+          klass->class_loader_data(), new_size, CHECK);
+    } else {
+      merged_annots[i] = NULL;
+    }
+  }
+  if (original_ordering != NULL && original_ordering->length() > 0) {
+    merged_ordering = MetadataFactory::new_array<int>(
+        klass->class_loader_data(), new_size, CHECK);
+  }
+  int method_order_index = klass->methods()->length();
+
+  sort_methods(new_methods);
+
+  // Perform grand merge of existing methods and new methods
+  int orig_idx = 0;
+  int new_idx = 0;
+
+  for (int i = 0; i < new_size; ++i) {
+    Method* orig_method = NULL;
+    Method* new_method = NULL;
+    if (orig_idx < original_methods->length()) {
+      orig_method = original_methods->at(orig_idx);
+    }
+    if (new_idx < new_methods->length()) {
+      new_method = new_methods->at(new_idx);
+    }
+
+    if (orig_method != NULL &&
+        (new_method == NULL || orig_method->name() < new_method->name())) {
+      merged_methods->at_put(i, orig_method);
+      original_methods->at_put(orig_idx, NULL);
+      for (int j = 0; j < NUM_ARRAYS; ++j) {
+        if (merged_annots[j] != NULL) {
+          merged_annots[j]->at_put(i, original_annots[j]->at(orig_idx));
+          original_annots[j]->at_put(orig_idx, NULL);
+        }
+      }
+      if (merged_ordering->length() > 0) {
+        merged_ordering->at_put(i, original_ordering->at(orig_idx));
+      }
+      ++orig_idx;
+    } else {
+      merged_methods->at_put(i, new_method);
+      if (merged_ordering->length() > 0) {
+        merged_ordering->at_put(i, method_order_index++);
+      }
+      ++new_idx;
+    }
+    // update idnum for new location
+    merged_methods->at(i)->set_method_idnum(i);
+  }
+
+  // Verify correct order
+#ifdef ASSERT
+  uintptr_t prev = 0;
+  for (int i = 0; i < merged_methods->length(); ++i) {
+    Method* mo = merged_methods->at(i);
+    uintptr_t nv = (uintptr_t)mo->name();
+    assert(nv >= prev, "Incorrect method ordering");
+    prev = nv;
+  }
+#endif
+
+  // Replace klass methods with new merged lists
+  klass->set_methods(merged_methods);
+  if (annots != NULL) {
+    annots->set_methods_annotations(merged_annots[ANNOTATIONS]);
+    annots->set_methods_parameter_annotations(merged_annots[PARAMETERS]);
+    annots->set_methods_default_annotations(merged_annots[DEFAULTS]);
+  } else {
+    assert(merged_annots[ANNOTATIONS] == NULL, "Must be");
+    assert(merged_annots[PARAMETERS] == NULL, "Must be");
+    assert(merged_annots[DEFAULTS] == NULL, "Must be");
+  }
+
+  ClassLoaderData* cld = klass->class_loader_data();
+  MetadataFactory::free_array(cld, original_methods);
+  for (int i = 0; i < NUM_ARRAYS; ++i) {
+    MetadataFactory::free_array(cld, original_annots[i]);
+  }
+  if (original_ordering->length() > 0) {
+    klass->set_method_ordering(merged_ordering);
+    MetadataFactory::free_array(cld, original_ordering);
+  }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/classfile/defaultMethods.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_CLASSFILE_DEFAULTMETHODS_HPP
+#define SHARE_VM_CLASSFILE_DEFAULTMETHODS_HPP
+
+#include "runtime/handles.hpp"
+#include "utilities/growableArray.hpp"
+#include "utilities/exceptions.hpp"
+
+class InstanceKlass;
+class Symbol;
+class Method;
+
+class DefaultMethods : AllStatic {
+ public:
+
+  // Analyzes class and determines which default methods are inherited
+  // from interfaces (and has no other implementation).  For each method
+  // (and each different signature the method could have), create an
+  // "overpass" method that is an instance method that redirects to the
+  // default method.  Overpass methods are added to the methods lists for
+  // the class.
+  static void generate_default_methods(
+      InstanceKlass* klass, GrowableArray<Method*>* mirandas, TRAPS);
+
+
+  // Called during linking when an invokespecial to an direct interface
+  // method is found.  Selects and returns a method if there is a unique
+  // default method in the 'super_iface' part of the hierarchy which is
+  // also a candidate default for 'this_klass'.  Otherwise throws an AME.
+  static Method* find_super_default(
+      Klass* this_klass, Klass* super_iface,
+      Symbol* method_name, Symbol* method_sig, TRAPS);
+};
+
+#endif // SHARE_VM_CLASSFILE_DEFAULTMETHODS_HPP
--- a/src/share/vm/classfile/dictionary.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/classfile/dictionary.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -346,7 +346,7 @@
                            KlassHandle obj) {
   assert_locked_or_safepoint(SystemDictionary_lock);
   assert(obj() != NULL, "adding NULL obj");
-  assert(Klass::cast(obj())->name() == class_name, "sanity check on name");
+  assert(obj()->name() == class_name, "sanity check on name");
 
   unsigned int hash = compute_hash(class_name, loader_data);
   int index = hash_to_index(hash);
@@ -553,7 +553,7 @@
       bool is_defining_class =
          (loader_data == InstanceKlass::cast(e)->class_loader_data());
       tty->print("%s%s", is_defining_class ? " " : "^",
-                   Klass::cast(e)->external_name());
+                   e->external_name());
 
         tty->print(", loader ");
       loader_data->print_value();
@@ -575,12 +575,12 @@
                           probe = probe->next()) {
       Klass* e = probe->klass();
       ClassLoaderData* loader_data = probe->loader_data();
-      guarantee(Klass::cast(e)->oop_is_instance(),
+      guarantee(e->oop_is_instance(),
                               "Verify of system dictionary failed");
       // class loader must be present;  a null class loader is the
       // boostrap loader
       guarantee(loader_data != NULL || DumpSharedSpaces ||
-                loader_data->is_the_null_class_loader_data() ||
+                loader_data->class_loader() == NULL ||
                 loader_data->class_loader()->is_instance(),
                 "checking type of class_loader");
       e->verify();
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/classfile/genericSignatures.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,1272 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+
+#include "classfile/genericSignatures.hpp"
+#include "classfile/symbolTable.hpp"
+#include "classfile/systemDictionary.hpp"
+#include "memory/resourceArea.hpp"
+
+namespace generic {
+
+// Helper class for parsing the generic signature Symbol in klass and methods
+class DescriptorStream : public ResourceObj {
+ private:
+  Symbol* _symbol;
+  int _offset;
+  int _mark;
+  const char* _parse_error;
+
+  void set_parse_error(const char* error) {
+    assert(error != NULL, "Can't set NULL error string");
+    _parse_error = error;
+  }
+
+ public:
+  DescriptorStream(Symbol* sym)
+      : _symbol(sym), _offset(0), _mark(-1), _parse_error(NULL) {}
+
+  const char* parse_error() const {
+    return _parse_error;
+  }
+
+  bool at_end() { return _offset >= _symbol->utf8_length(); }
+
+  char peek() {
+    if (at_end()) {
+      set_parse_error("Peeking past end of signature");
+      return '\0';
+    } else {
+      return _symbol->byte_at(_offset);
+    }
+  }
+
+  char read() {
+    if (at_end()) {
+      set_parse_error("Reading past end of signature");
+      return '\0';
+    } else {
+      return _symbol->byte_at(_offset++);
+    }
+  }
+
+  void read(char expected) {
+    char c = read();
+    assert_char(c, expected, 0);
+  }
+
+  void assert_char(char c, char expected, int pos = -1) {
+    if (c != expected) {
+      const char* fmt = "Parse error at %d: expected %c but got %c";
+      size_t len = strlen(fmt) + 5;
+      char* buffer = NEW_RESOURCE_ARRAY(char, len);
+      jio_snprintf(buffer, len, fmt, _offset + pos, expected, c);
+      set_parse_error(buffer);
+    }
+  }
+
+  void push(char c) {
+    assert(c == _symbol->byte_at(_offset - 1), "Pushing back wrong value");
+    --_offset;
+  }
+
+  void expect_end() {
+    if (!at_end()) {
+      set_parse_error("Unexpected data trailing signature");
+    }
+  }
+
+  bool has_mark() { return _mark != -1; }
+
+  void set_mark() {
+    _mark = _offset;
+  }
+
+  Identifier* identifier_from_mark() {
+    assert(has_mark(), "Mark should be set");
+    if (!has_mark()) {
+      set_parse_error("Expected mark to be set");
+      return NULL;
+    } else {
+      Identifier* id = new Identifier(_symbol, _mark, _offset - 1);
+      _mark = -1;
+      return id;
+    }
+  }
+};
+
+
+#define CHECK_FOR_PARSE_ERROR()         \
+  if (STREAM->parse_error() != NULL) {   \
+    if (VerifyGenericSignatures) {      \
+      fatal(STREAM->parse_error());      \
+    }                                   \
+    return NULL;                        \
+  } 0
+
+#define READ() STREAM->read(); CHECK_FOR_PARSE_ERROR()
+#define PEEK() STREAM->peek(); CHECK_FOR_PARSE_ERROR()
+#define PUSH(c) STREAM->push(c)
+#define EXPECT(c) STREAM->read(c); CHECK_FOR_PARSE_ERROR()
+#define EXPECTED(c, ch) STREAM->assert_char(c, ch); CHECK_FOR_PARSE_ERROR()
+#define EXPECT_END() STREAM->expect_end(); CHECK_FOR_PARSE_ERROR()
+
+#define CHECK_STREAM STREAM); CHECK_FOR_PARSE_ERROR(); (0
+
+#ifndef PRODUCT
+void Identifier::print_on(outputStream* str) const {
+  for (int i = _begin; i < _end; ++i) {
+    str->print("%c", (char)_sym->byte_at(i));
+  }
+}
+#endif // ndef PRODUCT
+
+bool Identifier::equals(Identifier* other) {
+  if (_sym == other->_sym && _begin == other->_begin && _end == other->_end) {
+    return true;
+  } else if (_end - _begin != other->_end - other->_begin) {
+    return false;
+  } else {
+    size_t len = _end - _begin;
+    char* addr = ((char*)_sym->bytes()) + _begin;
+    char* oaddr = ((char*)other->_sym->bytes()) + other->_begin;
+    return strncmp(addr, oaddr, len) == 0;
+  }
+}
+
+bool Identifier::equals(Symbol* sym) {
+  Identifier id(sym, 0, sym->utf8_length());
+  return equals(&id);
+}
+
+/**
+ * A formal type parameter may be found in the the enclosing class, but it could
+ * also come from an enclosing method or outer class, in the case of inner-outer
+ * classes or anonymous classes.  For example:
+ *
+ * class Outer<T,V> {
+ *   class Inner<W> {
+ *     void m(T t, V v, W w);
+ *   }
+ * }
+ *
+ * In this case, the type variables in m()'s signature are not all found in the
+ * immediate enclosing class (Inner).  class Inner has only type parameter W,
+ * but it's outer_class field will reference Outer's descriptor which contains
+ * T & V (no outer_method in this case).
+ *
+ * If you have an anonymous class, it has both an enclosing method *and* an
+ * enclosing class where type parameters can be declared:
+ *
+ * class MOuter<T> {
+ *   <V> void bar(V v) {
+ *     Runnable r = new Runnable() {
+ *       public void run() {}
+ *       public void foo(T t, V v) { ... }
+ *     };
+ *   }
+ * }
+ *
+ * In this case, foo will be a member of some class, Runnable$1, which has no
+ * formal parameters itself, but has an outer_method (bar()) which provides
+ * type parameter V, and an outer class MOuter with type parameter T.
+ *
+ * It is also possible that the outer class is itself an inner class to some
+ * other class (or an anonymous class with an enclosing method), so we need to
+ * follow the outer_class/outer_method chain to it's end when looking for a
+ * type parameter.
+ */
+TypeParameter* Descriptor::find_type_parameter(Identifier* id, int* depth) {
+
+  int current_depth = 0;
+
+  MethodDescriptor* outer_method = as_method_signature();
+  ClassDescriptor* outer_class = as_class_signature();
+
+  if (outer_class == NULL) { // 'this' is a method signature; use the holder
+    outer_class = outer_method->outer_class();
+  }
+
+  while (outer_method != NULL || outer_class != NULL) {
+    if (outer_method != NULL) {
+      for (int i = 0; i < outer_method->type_parameters().length(); ++i) {
+        TypeParameter* p = outer_method->type_parameters().at(i);
+        if (p->identifier()->equals(id)) {
+          *depth = -1; // indicates this this is a method parameter
+          return p;
+        }
+      }
+    }
+    if (outer_class != NULL) {
+      for (int i = 0; i < outer_class->type_parameters().length(); ++i) {
+        TypeParameter* p = outer_class->type_parameters().at(i);
+        if (p->identifier()->equals(id)) {
+          *depth = current_depth;
+          return p;
+        }
+      }
+      outer_method = outer_class->outer_method();
+      outer_class = outer_class->outer_class();
+      ++current_depth;
+    }
+  }
+
+  if (VerifyGenericSignatures) {
+    fatal("Could not resolve identifier");
+  }
+
+  return NULL;
+}
+
+ClassDescriptor* ClassDescriptor::parse_generic_signature(Klass* klass, TRAPS) {
+  return parse_generic_signature(klass, NULL, CHECK_NULL);
+}
+
+ClassDescriptor* ClassDescriptor::parse_generic_signature(
+      Klass* klass, Symbol* original_name, TRAPS) {
+
+  InstanceKlass* ik = InstanceKlass::cast(klass);
+  Symbol* sym = ik->generic_signature();
+
+  ClassDescriptor* spec;
+
+  if (sym == NULL || (spec = ClassDescriptor::parse_generic_signature(sym)) == NULL) {
+    spec = ClassDescriptor::placeholder(ik);
+  }
+
+  u2 outer_index = get_outer_class_index(ik, CHECK_NULL);
+  if (outer_index != 0) {
+    if (original_name == NULL) {
+      original_name = ik->name();
+    }
+    Handle class_loader = Handle(THREAD, ik->class_loader());
+    Handle protection_domain = Handle(THREAD, ik->protection_domain());
+
+    Symbol* outer_name = ik->constants()->klass_name_at(outer_index);
+    Klass* outer = SystemDictionary::find(
+        outer_name, class_loader, protection_domain, CHECK_NULL);
+    if (outer == NULL && !THREAD->is_Compiler_thread()) {
+      outer = SystemDictionary::resolve_super_or_fail(original_name,
+          outer_name, class_loader, protection_domain, false, CHECK_NULL);
+    }
+
+    InstanceKlass* outer_ik;
+    ClassDescriptor* outer_spec = NULL;
+    if (outer == NULL) {
+      outer_spec = ClassDescriptor::placeholder(ik);
+      assert(false, "Outer class not loaded and not loadable from here");
+    } else {
+      outer_ik = InstanceKlass::cast(outer);
+      outer_spec = parse_generic_signature(outer, original_name, CHECK_NULL);
+    }
+    spec->set_outer_class(outer_spec);
+
+    u2 encl_method_idx = ik->enclosing_method_method_index();
+    if (encl_method_idx != 0 && outer_ik != NULL) {
+      ConstantPool* cp = ik->constants();
+      u2 name_index = cp->name_ref_index_at(encl_method_idx);
+      u2 sig_index = cp->signature_ref_index_at(encl_method_idx);
+      Symbol* name = cp->symbol_at(name_index);
+      Symbol* sig = cp->symbol_at(sig_index);
+      Method* m = outer_ik->find_method(name, sig);
+      if (m != NULL) {
+        Symbol* gsig = m->generic_signature();
+        if (gsig != NULL) {
+          MethodDescriptor* gms = MethodDescriptor::parse_generic_signature(gsig, outer_spec);
+          spec->set_outer_method(gms);
+        }
+      } else if (VerifyGenericSignatures) {
+        ResourceMark rm;
+        stringStream ss;
+        ss.print("Could not find method %s %s in class %s",
+          name->as_C_string(), sig->as_C_string(), outer_name->as_C_string());
+        fatal(ss.as_string());
+      }
+    }
+  }
+
+  spec->bind_variables_to_parameters();
+  return spec;
+}
+
+ClassDescriptor* ClassDescriptor::placeholder(InstanceKlass* klass) {
+  GrowableArray<TypeParameter*> formals;
+  GrowableArray<ClassType*> interfaces;
+  ClassType* super_type = NULL;
+
+  Klass* super_klass = klass->super();
+  if (super_klass != NULL) {
+    InstanceKlass* super = InstanceKlass::cast(super_klass);
+    super_type = ClassType::from_symbol(super->name());
+  }
+
+  for (int i = 0; i < klass->local_interfaces()->length(); ++i) {
+    InstanceKlass* iface = InstanceKlass::cast(klass->local_interfaces()->at(i));
+    interfaces.append(ClassType::from_symbol(iface->name()));
+  }
+  return new ClassDescriptor(formals, super_type, interfaces);
+}
+
+ClassDescriptor* ClassDescriptor::parse_generic_signature(Symbol* sym) {
+
+  DescriptorStream ds(sym);
+  DescriptorStream* STREAM = &ds;
+
+  GrowableArray<TypeParameter*> parameters(8);
+  char c = READ();
+  if (c == '<') {
+    c = READ();
+    while (c != '>') {
+      PUSH(c);
+      TypeParameter* ftp = TypeParameter::parse_generic_signature(CHECK_STREAM);
+      parameters.append(ftp);
+      c = READ();
+    }
+  } else {
+    PUSH(c);
+  }
+
+  EXPECT('L');
+  ClassType* super = ClassType::parse_generic_signature(CHECK_STREAM);
+
+  GrowableArray<ClassType*> signatures(2);
+  while (!STREAM->at_end()) {
+    EXPECT('L');
+    ClassType* iface = ClassType::parse_generic_signature(CHECK_STREAM);
+    signatures.append(iface);
+  }
+
+  EXPECT_END();
+
+  return new ClassDescriptor(parameters, super, signatures);
+}
+
+#ifndef PRODUCT
+void ClassDescriptor::print_on(outputStream* str) const {
+  str->indent().print_cr("ClassDescriptor {");
+  {
+    streamIndentor si(str);
+    if (_type_parameters.length() > 0) {
+      str->indent().print_cr("Formals {");
+      {
+        streamIndentor si(str);
+        for (int i = 0; i < _type_parameters.length(); ++i) {
+          _type_parameters.at(i)->print_on(str);
+        }
+      }
+      str->indent().print_cr("}");
+    }
+    if (_super != NULL) {
+      str->indent().print_cr("Superclass: ");
+      {
+        streamIndentor si(str);
+        _super->print_on(str);
+      }
+    }
+    if (_interfaces.length() > 0) {
+      str->indent().print_cr("SuperInterfaces: {");
+      {
+        streamIndentor si(str);
+        for (int i = 0; i < _interfaces.length(); ++i) {
+          _interfaces.at(i)->print_on(str);
+        }
+      }
+      str->indent().print_cr("}");
+    }
+    if (_outer_method != NULL) {
+      str->indent().print_cr("Outer Method: {");
+      {
+        streamIndentor si(str);
+        _outer_method->print_on(str);
+      }
+      str->indent().print_cr("}");
+    }
+    if (_outer_class != NULL) {
+      str->indent().print_cr("Outer Class: {");
+      {
+        streamIndentor si(str);
+        _outer_class->print_on(str);
+      }
+      str->indent().print_cr("}");
+    }
+  }
+  str->indent().print_cr("}");
+}
+#endif // ndef PRODUCT
+
+ClassType* ClassDescriptor::interface_desc(Symbol* sym) {
+  for (int i = 0; i < _interfaces.length(); ++i) {
+    if (_interfaces.at(i)->identifier()->equals(sym)) {
+      return _interfaces.at(i);
+    }
+  }
+  if (VerifyGenericSignatures) {
+    fatal("Did not find expected interface");
+  }
+  return NULL;
+}
+
+void ClassDescriptor::bind_variables_to_parameters() {
+  if (_outer_class != NULL) {
+    _outer_class->bind_variables_to_parameters();
+  }
+  if (_outer_method != NULL) {
+    _outer_method->bind_variables_to_parameters();
+  }
+  for (int i = 0; i < _type_parameters.length(); ++i) {
+    _type_parameters.at(i)->bind_variables_to_parameters(this, i);
+  }
+  if (_super != NULL) {
+    _super->bind_variables_to_parameters(this);
+  }
+  for (int i = 0; i < _interfaces.length(); ++i) {
+    _interfaces.at(i)->bind_variables_to_parameters(this);
+  }
+}
+
+ClassDescriptor* ClassDescriptor::canonicalize(Context* ctx) {
+
+  GrowableArray<TypeParameter*> type_params(_type_parameters.length());
+  for (int i = 0; i < _type_parameters.length(); ++i) {
+    type_params.append(_type_parameters.at(i)->canonicalize(ctx, 0));
+  }
+
+  ClassDescriptor* outer = _outer_class == NULL ? NULL :
+      _outer_class->canonicalize(ctx);
+
+  ClassType* super = _super == NULL ? NULL : _super->canonicalize(ctx, 0);
+
+  GrowableArray<ClassType*> interfaces(_interfaces.length());
+  for (int i = 0; i < _interfaces.length(); ++i) {
+    interfaces.append(_interfaces.at(i)->canonicalize(ctx, 0));
+  }
+
+  MethodDescriptor* md = _outer_method == NULL ? NULL :
+      _outer_method->canonicalize(ctx);
+
+  return new ClassDescriptor(type_params, super, interfaces, outer, md);
+}
+
+u2 ClassDescriptor::get_outer_class_index(InstanceKlass* klass, TRAPS) {
+  int inner_index = InstanceKlass::inner_class_inner_class_info_offset;
+  int outer_index = InstanceKlass::inner_class_outer_class_info_offset;
+  int name_offset = InstanceKlass::inner_class_inner_name_offset;
+  int next_offset = InstanceKlass::inner_class_next_offset;
+
+  if (klass->inner_classes() == NULL || klass->inner_classes()->length() == 0) {
+    // No inner class info => no declaring class
+    return 0;
+  }
+
+  Array<u2>* i_icls = klass->inner_classes();
+  ConstantPool* i_cp = klass->constants();
+  int i_length = i_icls->length();
+
+  // Find inner_klass attribute
+  for (int i = 0; i + next_offset < i_length; i += next_offset) {
+    u2 ioff = i_icls->at(i + inner_index);
+    u2 ooff = i_icls->at(i + outer_index);
+    u2 noff = i_icls->at(i + name_offset);
+    if (ioff != 0) {
+      // Check to see if the name matches the class we're looking for
+      // before attempting to find the class.
+      if (i_cp->klass_name_at_matches(klass, ioff) && ooff != 0) {
+        return ooff;
+      }
+    }
+  }
+
+  // It may be anonymous; try for that.
+  u2 encl_method_class_idx = klass->enclosing_method_class_index();
+  if (encl_method_class_idx != 0) {
+    return encl_method_class_idx;
+  }
+
+  return 0;
+}
+
+MethodDescriptor* MethodDescriptor::parse_generic_signature(Method* m, ClassDescriptor* outer) {
+  Symbol* generic_sig = m->generic_signature();
+  MethodDescriptor* md = NULL;
+  if (generic_sig == NULL || (md = parse_generic_signature(generic_sig, outer)) == NULL) {
+    md = parse_generic_signature(m->signature(), outer);
+  }
+  assert(md != NULL, "Could not parse method signature");
+  md->bind_variables_to_parameters();
+  return md;
+}
+
+MethodDescriptor* MethodDescriptor::parse_generic_signature(Symbol* sym, ClassDescriptor* outer) {
+
+  DescriptorStream ds(sym);
+  DescriptorStream* STREAM = &ds;
+
+  GrowableArray<TypeParameter*> params(8);
+  char c = READ();
+  if (c == '<') {
+    c = READ();
+    while (c != '>') {
+      PUSH(c);
+      TypeParameter* ftp = TypeParameter::parse_generic_signature(CHECK_STREAM);
+      params.append(ftp);
+      c = READ();
+    }
+  } else {
+    PUSH(c);
+  }
+
+  EXPECT('(');
+
+  GrowableArray<Type*> parameters(8);
+  c = READ();
+  while (c != ')') {
+    PUSH(c);
+    Type* arg = Type::parse_generic_signature(CHECK_STREAM);
+    parameters.append(arg);
+    c = READ();
+  }
+
+  Type* rt = Type::parse_generic_signature(CHECK_STREAM);
+
+  GrowableArray<Type*> throws;
+  while (!STREAM->at_end()) {
+    EXPECT('^');
+    Type* spec = Type::parse_generic_signature(CHECK_STREAM);
+    throws.append(spec);
+  }
+
+  return new MethodDescriptor(params, outer, parameters, rt, throws);
+}
+
+void MethodDescriptor::bind_variables_to_parameters() {
+  for (int i = 0; i < _type_parameters.length(); ++i) {
+    _type_parameters.at(i)->bind_variables_to_parameters(this, i);
+  }
+  for (int i = 0; i < _parameters.length(); ++i) {
+    _parameters.at(i)->bind_variables_to_parameters(this);
+  }
+  _return_type->bind_variables_to_parameters(this);
+  for (int i = 0; i < _throws.length(); ++i) {
+    _throws.at(i)->bind_variables_to_parameters(this);
+  }
+}
+
+bool MethodDescriptor::covariant_match(MethodDescriptor* other, Context* ctx) {
+
+  if (_parameters.length() == other->_parameters.length()) {
+    for (int i = 0; i < _parameters.length(); ++i) {
+      if (!_parameters.at(i)->covariant_match(other->_parameters.at(i), ctx)) {
+        return false;
+      }
+    }
+
+    if (_return_type->as_primitive() != NULL) {
+      return _return_type->covariant_match(other->_return_type, ctx);
+    } else {
+      // return type is a reference
+      return other->_return_type->as_class() != NULL ||
+             other->_return_type->as_variable() != NULL ||
+             other->_return_type->as_array() != NULL;
+    }
+  } else {
+    return false;
+  }
+}
+
+MethodDescriptor* MethodDescriptor::canonicalize(Context* ctx) {
+
+  GrowableArray<TypeParameter*> type_params(_type_parameters.length());
+  for (int i = 0; i < _type_parameters.length(); ++i) {
+    type_params.append(_type_parameters.at(i)->canonicalize(ctx, 0));
+  }
+
+  ClassDescriptor* outer = _outer_class == NULL ? NULL :
+      _outer_class->canonicalize(ctx);
+
+  GrowableArray<Type*> params(_parameters.length());
+  for (int i = 0; i < _parameters.length(); ++i) {
+    params.append(_parameters.at(i)->canonicalize(ctx, 0));
+  }
+
+  Type* rt = _return_type->canonicalize(ctx, 0);
+
+  GrowableArray<Type*> throws(_throws.length());
+  for (int i = 0; i < _throws.length(); ++i) {
+    throws.append(_throws.at(i)->canonicalize(ctx, 0));
+  }
+
+  return new MethodDescriptor(type_params, outer, params, rt, throws);
+}
+
+#ifndef PRODUCT
+TempNewSymbol MethodDescriptor::reify_signature(Context* ctx, TRAPS) {
+  stringStream ss(256);
+
+  ss.print("(");
+  for (int i = 0; i < _parameters.length(); ++i) {
+    _parameters.at(i)->reify_signature(&ss, ctx);
+  }
+  ss.print(")");
+  _return_type->reify_signature(&ss, ctx);
+  return SymbolTable::new_symbol(ss.base(), (int)ss.size(), THREAD);
+}
+
+void MethodDescriptor::print_on(outputStream* str) const {
+  str->indent().print_cr("MethodDescriptor {");
+  {
+    streamIndentor si(str);
+    if (_type_parameters.length() > 0) {
+      str->indent().print_cr("Formals: {");
+      {
+        streamIndentor si(str);
+        for (int i = 0; i < _type_parameters.length(); ++i) {
+          _type_parameters.at(i)->print_on(str);
+        }
+      }
+      str->indent().print_cr("}");
+    }
+    str->indent().print_cr("Parameters: {");
+    {
+      streamIndentor si(str);
+      for (int i = 0; i < _parameters.length(); ++i) {
+        _parameters.at(i)->print_on(str);
+      }
+    }
+    str->indent().print_cr("}");
+    str->indent().print_cr("Return Type: ");
+    {
+      streamIndentor si(str);
+      _return_type->print_on(str);
+    }
+
+    if (_throws.length() > 0) {
+      str->indent().print_cr("Throws: {");
+      {
+        streamIndentor si(str);
+        for (int i = 0; i < _throws.length(); ++i) {
+          _throws.at(i)->print_on(str);
+        }
+      }
+      str->indent().print_cr("}");
+    }
+  }
+  str->indent().print_cr("}");
+}
+#endif // ndef PRODUCT
+
+TypeParameter* TypeParameter::parse_generic_signature(DescriptorStream* STREAM) {
+  STREAM->set_mark();
+  char c = READ();
+  while (c != ':') {
+    c = READ();
+  }
+
+  Identifier* id = STREAM->identifier_from_mark();
+
+  ClassType* class_bound = NULL;
+  GrowableArray<ClassType*> interface_bounds(8);
+
+  c = READ();
+  if (c != '>') {
+    if (c != ':') {
+      EXPECTED(c, 'L');
+      class_bound = ClassType::parse_generic_signature(CHECK_STREAM);
+      c = READ();
+    }
+
+    while (c == ':') {
+      EXPECT('L');
+      ClassType* fts = ClassType::parse_generic_signature(CHECK_STREAM);
+      interface_bounds.append(fts);
+      c = READ();
+    }
+  }
+  PUSH(c);
+
+  return new TypeParameter(id, class_bound, interface_bounds);
+}
+
+void TypeParameter::bind_variables_to_parameters(Descriptor* sig, int position) {
+  if (_class_bound != NULL) {
+    _class_bound->bind_variables_to_parameters(sig);
+  }
+  for (int i = 0; i < _interface_bounds.length(); ++i) {
+    _interface_bounds.at(i)->bind_variables_to_parameters(sig);
+  }
+  _position = position;
+}
+
+Type* TypeParameter::resolve(
+    Context* ctx, int inner_depth, int ctx_depth) {
+
+  if (inner_depth == -1) {
+    // This indicates that the parameter is a method type parameter, which
+    // isn't resolveable using the class hierarchy context
+    return bound();
+  }
+
+  ClassType* provider = ctx->at_depth(ctx_depth);
+  if (provider != NULL) {
+    for (int i = 0; i < inner_depth && provider != NULL; ++i) {
+      provider = provider->outer_class();
+    }
+    if (provider != NULL) {
+      TypeArgument* arg = provider->type_argument_at(_position);
+      if (arg != NULL) {
+        Type* value = arg->lower_bound();
+        return value->canonicalize(ctx, ctx_depth + 1);
+      }
+    }
+  }
+
+  return bound();
+}
+
+TypeParameter* TypeParameter::canonicalize(Context* ctx, int ctx_depth) {
+  ClassType* bound = _class_bound == NULL ? NULL :
+     _class_bound->canonicalize(ctx, ctx_depth);
+
+  GrowableArray<ClassType*> ifaces(_interface_bounds.length());
+  for (int i = 0; i < _interface_bounds.length(); ++i) {
+    ifaces.append(_interface_bounds.at(i)->canonicalize(ctx, ctx_depth));
+  }
+
+  TypeParameter* ret = new TypeParameter(_identifier, bound, ifaces);
+  ret->_position = _position;
+  return ret;
+}
+
+ClassType* TypeParameter::bound() {
+  if (_class_bound != NULL) {
+    return _class_bound;
+  }
+
+  if (_interface_bounds.length() == 1) {
+    return _interface_bounds.at(0);
+  }
+
+  return ClassType::java_lang_Object(); // TODO: investigate this case
+}
+
+#ifndef PRODUCT
+void TypeParameter::print_on(outputStream* str) const {
+  str->indent().print_cr("Formal: {");
+  {
+    streamIndentor si(str);
+
+    str->indent().print("Identifier: ");
+    _identifier->print_on(str);
+    str->print_cr("");
+    if (_class_bound != NULL) {
+      str->indent().print_cr("Class Bound: ");
+      streamIndentor si(str);
+      _class_bound->print_on(str);
+    }
+    if (_interface_bounds.length() > 0) {
+      str->indent().print_cr("Interface Bounds: {");
+      {
+        streamIndentor si(str);
+        for (int i = 0; i < _interface_bounds.length(); ++i) {
+          _interface_bounds.at(i)->print_on(str);
+        }
+      }
+      str->indent().print_cr("}");
+    }
+    str->indent().print_cr("Ordinal Position: %d", _position);
+  }
+  str->indent().print_cr("}");
+}
+#endif // ndef PRODUCT
+
+Type* Type::parse_generic_signature(DescriptorStream* STREAM) {
+  char c = READ();
+  switch (c) {
+    case 'L':
+      return ClassType::parse_generic_signature(CHECK_STREAM);
+    case 'T':
+      return TypeVariable::parse_generic_signature(CHECK_STREAM);
+    case '[':
+      return ArrayType::parse_generic_signature(CHECK_STREAM);
+    default:
+      return new PrimitiveType(c);
+  }
+}
+
+Identifier* ClassType::parse_generic_signature_simple(GrowableArray<TypeArgument*>* args,
+    bool* has_inner, DescriptorStream* STREAM) {
+  STREAM->set_mark();
+
+  char c = READ();
+  while (c != ';' && c != '.' && c != '<') { c = READ(); }
+  Identifier* id = STREAM->identifier_from_mark();
+
+  if (c == '<') {
+    c = READ();
+    while (c != '>') {
+      PUSH(c);
+      TypeArgument* arg = TypeArgument::parse_generic_signature(CHECK_STREAM);
+      args->append(arg);
+      c = READ();
+    }
+    c = READ();
+  }
+
+  *has_inner = (c == '.');
+  if (!(*has_inner)) {
+    EXPECTED(c, ';');
+  }
+
+  return id;
+}
+
+ClassType* ClassType::parse_generic_signature(DescriptorStream* STREAM) {
+  return parse_generic_signature(NULL, CHECK_STREAM);
+}
+
+ClassType* ClassType::parse_generic_signature(ClassType* outer, DescriptorStream* STREAM) {
+  GrowableArray<TypeArgument*> args;
+  ClassType* gct = NULL;
+  bool has_inner = false;
+
+  Identifier* id = parse_generic_signature_simple(&args, &has_inner, STREAM);
+  if (id != NULL) {
+    gct = new ClassType(id, args, outer);
+
+    if (has_inner) {
+      gct = parse_generic_signature(gct, CHECK_STREAM);
+    }
+  }
+  return gct;
+}
+
+ClassType* ClassType::from_symbol(Symbol* sym) {
+  assert(sym != NULL, "Must not be null");
+  GrowableArray<TypeArgument*> args;
+  Identifier* id = new Identifier(sym, 0, sym->utf8_length());
+  return new ClassType(id, args, NULL);
+}
+
+ClassType* ClassType::java_lang_Object() {
+  return from_symbol(vmSymbols::java_lang_Object());
+}
+
+void ClassType::bind_variables_to_parameters(Descriptor* sig) {
+  for (int i = 0; i < _type_arguments.length(); ++i) {
+    _type_arguments.at(i)->bind_variables_to_parameters(sig);
+  }
+  if (_outer_class != NULL) {
+    _outer_class->bind_variables_to_parameters(sig);
+  }
+}
+
+TypeArgument* ClassType::type_argument_at(int i) {
+  if (i >= 0 && i < _type_arguments.length()) {
+    return _type_arguments.at(i);
+  } else {
+    return NULL;
+  }
+}
+
+#ifndef PRODUCT
+void ClassType::reify_signature(stringStream* ss, Context* ctx) {
+  ss->print("L");
+  _identifier->print_on(ss);
+  ss->print(";");
+}
+
+void ClassType::print_on(outputStream* str) const {
+  str->indent().print_cr("Class {");
+  {
+    streamIndentor si(str);
+    str->indent().print("Name: ");
+    _identifier->print_on(str);
+    str->print_cr("");
+    if (_type_arguments.length() != 0) {
+      str->indent().print_cr("Type Arguments: {");
+      {
+        streamIndentor si(str);
+        for (int j = 0; j < _type_arguments.length(); ++j) {
+          _type_arguments.at(j)->print_on(str);
+        }
+      }
+      str->indent().print_cr("}");
+    }
+    if (_outer_class != NULL) {
+      str->indent().print_cr("Outer Class: ");
+      streamIndentor sir(str);
+      _outer_class->print_on(str);
+    }
+  }
+  str->indent().print_cr("}");
+}
+#endif // ndef PRODUCT
+
+bool ClassType::covariant_match(Type* other, Context* ctx) {
+
+  if (other == this) {
+    return true;
+  }
+
+  TypeVariable* variable = other->as_variable();
+  if (variable != NULL) {
+    other = variable->resolve(ctx, 0);
+  }
+
+  ClassType* outer = outer_class();
+  ClassType* other_class = other->as_class();
+
+  if (other_class == NULL ||
+      (outer == NULL) != (other_class->outer_class() == NULL)) {
+    return false;
+  }
+
+  if (!_identifier->equals(other_class->_identifier)) {
+    return false;
+  }
+
+  if (outer != NULL && !outer->covariant_match(other_class->outer_class(), ctx)) {
+    return false;
+  }
+
+  return true;
+}
+
+ClassType* ClassType::canonicalize(Context* ctx, int ctx_depth) {
+
+  GrowableArray<TypeArgument*> args(_type_arguments.length());
+  for (int i = 0; i < _type_arguments.length(); ++i) {
+    args.append(_type_arguments.at(i)->canonicalize(ctx, ctx_depth));
+  }
+
+  ClassType* outer = _outer_class == NULL ? NULL :
+      _outer_class->canonicalize(ctx, ctx_depth);
+
+  return new ClassType(_identifier, args, outer);
+}
+
+TypeVariable* TypeVariable::parse_generic_signature(DescriptorStream* STREAM) {
+  STREAM->set_mark();
+  char c = READ();
+  while (c != ';') {
+    c = READ();
+  }
+  Identifier* id = STREAM->identifier_from_mark();
+
+  return new TypeVariable(id);
+}
+
+void TypeVariable::bind_variables_to_parameters(Descriptor* sig) {
+  _parameter = sig->find_type_parameter(_id, &_inner_depth);
+  if (VerifyGenericSignatures && _parameter == NULL) {
+    fatal("Could not find formal parameter");
+  }
+}
+
+Type* TypeVariable::resolve(Context* ctx, int ctx_depth) {
+  if (parameter() != NULL) {
+    return parameter()->resolve(ctx, inner_depth(), ctx_depth);
+  } else {
+    if (VerifyGenericSignatures) {
+      fatal("Type variable matches no parameter");
+    }
+    return NULL;
+  }
+}
+
+bool TypeVariable::covariant_match(Type* other, Context* ctx) {
+
+  if (other == this) {
+    return true;
+  }
+
+  Context my_context(NULL); // empty, results in erasure
+  Type* my_type = resolve(&my_context, 0);
+  if (my_type == NULL) {
+    return false;
+  }
+
+  return my_type->covariant_match(other, ctx);
+}
+
+Type* TypeVariable::canonicalize(Context* ctx, int ctx_depth) {
+  return resolve(ctx, ctx_depth);
+}
+
+#ifndef PRODUCT
+void TypeVariable::reify_signature(stringStream* ss, Context* ctx) {
+  Type* type = resolve(ctx, 0);
+  if (type != NULL) {
+    type->reify_signature(ss, ctx);
+  }
+}
+
+void TypeVariable::print_on(outputStream* str) const {
+  str->indent().print_cr("Type Variable {");
+  {
+    streamIndentor si(str);
+    str->indent().print("Name: ");
+    _id->print_on(str);
+    str->print_cr("");
+    str->indent().print_cr("Inner depth: %d", _inner_depth);
+  }
+  str->indent().print_cr("}");
+}
+#endif // ndef PRODUCT
+
+ArrayType* ArrayType::parse_generic_signature(DescriptorStream* STREAM) {
+  Type* base = Type::parse_generic_signature(CHECK_STREAM);
+  return new ArrayType(base);
+}
+
+void ArrayType::bind_variables_to_parameters(Descriptor* sig) {
+  assert(_base != NULL, "Invalid base");
+  _base->bind_variables_to_parameters(sig);
+}
+
+bool ArrayType::covariant_match(Type* other, Context* ctx) {
+  assert(_base != NULL, "Invalid base");
+
+  if (other == this) {
+    return true;
+  }
+
+  ArrayType* other_array = other->as_array();
+  return (other_array != NULL && _base->covariant_match(other_array->_base, ctx));
+}
+
+ArrayType* ArrayType::canonicalize(Context* ctx, int ctx_depth) {
+  assert(_base != NULL, "Invalid base");
+  return new ArrayType(_base->canonicalize(ctx, ctx_depth));
+}
+
+#ifndef PRODUCT
+void ArrayType::reify_signature(stringStream* ss, Context* ctx) {
+  assert(_base != NULL, "Invalid base");
+  ss->print("[");
+  _base->reify_signature(ss, ctx);
+}
+
+void ArrayType::print_on(outputStream* str) const {
+  str->indent().print_cr("Array {");
+  {
+    streamIndentor si(str);
+    _base->print_on(str);
+  }
+  str->indent().print_cr("}");
+}
+#endif // ndef PRODUCT
+
+bool PrimitiveType::covariant_match(Type* other, Context* ctx) {
+
+  PrimitiveType* other_prim = other->as_primitive();
+  return (other_prim != NULL && _type == other_prim->_type);
+}
+
+PrimitiveType* PrimitiveType::canonicalize(Context* ctx, int ctxd) {
+  return this;
+}
+
+#ifndef PRODUCT
+void PrimitiveType::reify_signature(stringStream* ss, Context* ctx) {
+  ss->print("%c", _type);
+}
+
+void PrimitiveType::print_on(outputStream* str) const {
+  str->indent().print_cr("Primitive: '%c'", _type);
+}
+#endif // ndef PRODUCT
+
+void PrimitiveType::bind_variables_to_parameters(Descriptor* sig) {
+}
+
+TypeArgument* TypeArgument::parse_generic_signature(DescriptorStream* STREAM) {
+  char c = READ();
+  Type* type = NULL;
+
+  switch (c) {
+    case '*':
+      return new TypeArgument(ClassType::java_lang_Object(), NULL);
+      break;
+    default:
+      PUSH(c);
+      // fall-through
+    case '+':
+    case '-':
+      type = Type::parse_generic_signature(CHECK_STREAM);
+      if (c == '+') {
+        return new TypeArgument(type, NULL);
+      } else if (c == '-') {
+        return new TypeArgument(ClassType::java_lang_Object(), type);
+      } else {
+        return new TypeArgument(type, type);
+      }
+  }
+}
+
+void TypeArgument::bind_variables_to_parameters(Descriptor* sig) {
+  assert(_lower_bound != NULL, "Invalid lower bound");
+  _lower_bound->bind_variables_to_parameters(sig);
+  if (_upper_bound != NULL && _upper_bound != _lower_bound) {
+    _upper_bound->bind_variables_to_parameters(sig);
+  }
+}
+
+bool TypeArgument::covariant_match(TypeArgument* other, Context* ctx) {
+  assert(_lower_bound != NULL, "Invalid lower bound");
+
+  if (other == this) {
+    return true;
+  }
+
+  if (!_lower_bound->covariant_match(other->lower_bound(), ctx)) {
+    return false;
+  }
+  return true;
+}
+
+TypeArgument* TypeArgument::canonicalize(Context* ctx, int ctx_depth) {
+  assert(_lower_bound != NULL, "Invalid lower bound");
+  Type* lower = _lower_bound->canonicalize(ctx, ctx_depth);
+  Type* upper = NULL;
+
+  if (_upper_bound == _lower_bound) {
+    upper = lower;
+  } else if (_upper_bound != NULL) {
+    upper = _upper_bound->canonicalize(ctx, ctx_depth);
+  }
+
+  return new TypeArgument(lower, upper);
+}
+
+#ifndef PRODUCT
+void TypeArgument::print_on(outputStream* str) const {
+  str->indent().print_cr("TypeArgument {");
+  {
+    streamIndentor si(str);
+    if (_lower_bound != NULL) {
+      str->indent().print("Lower bound: ");
+      _lower_bound->print_on(str);
+    }
+    if (_upper_bound != NULL) {
+      str->indent().print("Upper bound: ");
+      _upper_bound->print_on(str);
+    }
+  }
+  str->indent().print_cr("}");
+}
+#endif // ndef PRODUCT
+
+void Context::Mark::destroy() {
+  if (is_active()) {
+    _context->reset_to_mark(_marked_size);
+  }
+  deactivate();
+}
+
+void Context::apply_type_arguments(
+    InstanceKlass* current, InstanceKlass* super, TRAPS) {
+  assert(_cache != NULL, "Cannot use an empty context");
+  ClassType* spec = NULL;
+  if (current != NULL) {
+    ClassDescriptor* descriptor = _cache->descriptor_for(current, CHECK);
+    if (super == current->super()) {
+      spec = descriptor->super();
+    } else {
+      spec = descriptor->interface_desc(super->name());
+    }
+    if (spec != NULL) {
+      _type_arguments.push(spec);
+    }
+  }
+}
+
+void Context::reset_to_mark(int size) {
+  _type_arguments.trunc_to(size);
+}
+
+ClassType* Context::at_depth(int i) const {
+  if (i < _type_arguments.length()) {
+    return _type_arguments.at(_type_arguments.length() - 1 - i);
+  }
+  return NULL;
+}
+
+#ifndef PRODUCT
+void Context::print_on(outputStream* str) const {
+  str->indent().print_cr("Context {");
+  for (int i = 0; i < _type_arguments.length(); ++i) {
+    streamIndentor si(str);
+    str->indent().print("leval %d: ", i);
+    ClassType* ct = at_depth(i);
+    if (ct == NULL) {
+      str->print_cr("<empty>");
+      continue;
+    } else {
+      str->print_cr("{");
+    }
+
+    for (int j = 0; j < ct->type_arguments_length(); ++j) {
+      streamIndentor si(str);
+      TypeArgument* ta = ct->type_argument_at(j);
+      Type* bound = ta->lower_bound();
+      bound->print_on(str);
+    }
+    str->indent().print_cr("}");
+  }
+  str->indent().print_cr("}");
+}
+#endif // ndef PRODUCT
+
+ClassDescriptor* DescriptorCache::descriptor_for(InstanceKlass* ik, TRAPS) {
+
+  ClassDescriptor** existing = _class_descriptors.get(ik);
+  if (existing == NULL) {
+    ClassDescriptor* cd = ClassDescriptor::parse_generic_signature(ik, CHECK_NULL);
+    _class_descriptors.put(ik, cd);
+    return cd;
+  } else {
+    return *existing;
+  }
+}
+
+MethodDescriptor* DescriptorCache::descriptor_for(
+    Method* mh, ClassDescriptor* cd, TRAPS) {
+  assert(mh != NULL && cd != NULL, "Should not be NULL");
+  MethodDescriptor** existing = _method_descriptors.get(mh);
+  if (existing == NULL) {
+    MethodDescriptor* md = MethodDescriptor::parse_generic_signature(mh, cd);
+    _method_descriptors.put(mh, md);
+    return md;
+  } else {
+    return *existing;
+  }
+}
+MethodDescriptor* DescriptorCache::descriptor_for(Method* mh, TRAPS) {
+  ClassDescriptor* cd = descriptor_for(
+      InstanceKlass::cast(mh->method_holder()), CHECK_NULL);
+  return descriptor_for(mh, cd, THREAD);
+}
+
+} // namespace generic
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/classfile/genericSignatures.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,467 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_CLASSFILE_GENERICSIGNATURES_HPP
+#define SHARE_VM_CLASSFILE_GENERICSIGNATURES_HPP
+
+#include "classfile/symbolTable.hpp"
+#include "memory/allocation.hpp"
+#include "runtime/signature.hpp"
+#include "utilities/growableArray.hpp"
+#include "utilities/resourceHash.hpp"
+
+class stringStream;
+
+namespace generic {
+
+class Identifier;
+class ClassDescriptor;
+class MethodDescriptor;
+
+class TypeParameter; // a formal type parameter declared in generic signatures
+class TypeArgument;  // The "type value" passed to fill parameters in supertypes
+class TypeVariable;  // A usage of a type parameter as a value
+/**
+ * Example:
+ *
+ * <T, V> class Foo extends Bar<String> { int m(V v) {} }
+ * ^^^^^^                       ^^^^^^          ^^
+ * type parameters            type argument    type variable
+ *
+ * Note that a type variable could be passed as an argument too:
+ * <T, V> class Foo extends Bar<T> { int m(V v) {} }
+ *                             ^^^
+ *                             type argument's value is a type variable
+ */
+
+
+class Type;
+class ClassType;
+class ArrayType;
+class PrimitiveType;
+class Context;
+class DescriptorCache;
+
+class DescriptorStream;
+
+class Identifier : public ResourceObj {
+ private:
+  Symbol* _sym;
+  int _begin;
+  int _end;
+
+ public:
+  Identifier(Symbol* sym, int begin, int end) :
+    _sym(sym), _begin(begin), _end(end) {}
+
+  bool equals(Identifier* other);
+  bool equals(Symbol* sym);
+
+#ifndef PRODUCT
+  void print_on(outputStream* str) const;
+#endif // ndef PRODUCT
+};
+
+class Descriptor : public ResourceObj {
+ protected:
+  GrowableArray<TypeParameter*> _type_parameters;
+  ClassDescriptor* _outer_class;
+
+  Descriptor(GrowableArray<TypeParameter*>& params,
+    ClassDescriptor* outer)
+    : _type_parameters(params), _outer_class(outer) {}
+
+ public:
+
+  ClassDescriptor* outer_class() { return _outer_class; }
+  void set_outer_class(ClassDescriptor* sig) { _outer_class = sig; }
+
+  virtual ClassDescriptor* as_class_signature() { return NULL; }
+  virtual MethodDescriptor* as_method_signature() { return NULL; }
+
+  bool is_class_signature() { return as_class_signature() != NULL; }
+  bool is_method_signature() { return as_method_signature() != NULL; }
+
+  GrowableArray<TypeParameter*>& type_parameters() {
+    return _type_parameters;
+  }
+
+  TypeParameter* find_type_parameter(Identifier* id, int* param_depth);
+
+  virtual void bind_variables_to_parameters() = 0;
+
+#ifndef PRODUCT
+  virtual void print_on(outputStream* str) const = 0;
+#endif
+};
+
+class ClassDescriptor : public Descriptor {
+ private:
+  ClassType* _super;
+  GrowableArray<ClassType*> _interfaces;
+  MethodDescriptor* _outer_method;
+
+  ClassDescriptor(GrowableArray<TypeParameter*>& ftp, ClassType* scs,
+      GrowableArray<ClassType*>& sis, ClassDescriptor* outer_class = NULL,
+      MethodDescriptor* outer_method = NULL)
+        : Descriptor(ftp, outer_class), _super(scs), _interfaces(sis),
+          _outer_method(outer_method) {}
+
+  static u2 get_outer_class_index(InstanceKlass* k, TRAPS);
+  static ClassDescriptor* parse_generic_signature(Klass* k, Symbol* original_name, TRAPS);
+
+ public:
+
+  virtual ClassDescriptor* as_class_signature() { return this; }
+
+  MethodDescriptor* outer_method() { return _outer_method; }
+  void set_outer_method(MethodDescriptor* m) { _outer_method = m; }
+
+  ClassType* super() { return _super; }
+  ClassType* interface_desc(Symbol* sym);
+
+  static ClassDescriptor* parse_generic_signature(Klass* k, TRAPS);
+  static ClassDescriptor* parse_generic_signature(Symbol* sym);
+
+  // For use in superclass chains in positions where this is no generic info
+  static ClassDescriptor* placeholder(InstanceKlass* klass);
+
+#ifndef PRODUCT
+  void print_on(outputStream* str) const;
+#endif
+
+  ClassDescriptor* canonicalize(Context* ctx);
+
+  // Linking sets the position index in any contained TypeVariable type
+  // to correspond to the location of that identifier in the formal type
+  // parameters.
+  void bind_variables_to_parameters();
+};
+
+class MethodDescriptor : public Descriptor {
+ private:
+  GrowableArray<Type*> _parameters;
+  Type* _return_type;
+  GrowableArray<Type*> _throws;
+
+  MethodDescriptor(GrowableArray<TypeParameter*>& ftp, ClassDescriptor* outer,
+      GrowableArray<Type*>& sigs, Type* rt, GrowableArray<Type*>& throws)
+      : Descriptor(ftp, outer), _parameters(sigs), _return_type(rt),
+        _throws(throws) {}
+
+ public:
+
+  static MethodDescriptor* parse_generic_signature(Method* m, ClassDescriptor* outer);
+  static MethodDescriptor* parse_generic_signature(Symbol* sym, ClassDescriptor* outer);
+
+  MethodDescriptor* as_method_signature() { return this; }
+
+  // Performs generic analysis on the method parameters to determine
+  // if both methods refer to the same argument types.
+  bool covariant_match(MethodDescriptor* other, Context* ctx);
+
+  // Returns a new method descriptor with all generic variables
+  // removed and replaced with whatever is indicated using the Context.
+  MethodDescriptor* canonicalize(Context* ctx);
+
+  void bind_variables_to_parameters();
+
+#ifndef PRODUCT
+  TempNewSymbol reify_signature(Context* ctx, TRAPS);
+  void print_on(outputStream* str) const;
+#endif
+};
+
+class TypeParameter : public ResourceObj {
+ private:
+  Identifier* _identifier;
+  ClassType* _class_bound;
+  GrowableArray<ClassType*> _interface_bounds;
+
+  // The position is the ordinal location of the parameter within the
+  // formal parameter list (excluding outer classes).  It is only set for
+  // formal type parameters that are associated with a class -- method
+  // type parameters are left as -1.  When resolving a generic variable to
+  // find the actual type, this index is used to access the generic type
+  // argument in the provided context object.
+  int _position; // Assigned during variable linking
+
+  TypeParameter(Identifier* id, ClassType* class_bound,
+    GrowableArray<ClassType*>& interface_bounds) :
+      _identifier(id), _class_bound(class_bound),
+      _interface_bounds(interface_bounds), _position(-1) {}
+
+ public:
+  static TypeParameter* parse_generic_signature(DescriptorStream* str);
+
+  ClassType* bound();
+  int position() { return _position; }
+
+  void bind_variables_to_parameters(Descriptor* sig, int position);
+  Identifier* identifier() { return _identifier; }
+
+  Type* resolve(Context* ctx, int inner_depth, int ctx_depth);
+  TypeParameter* canonicalize(Context* ctx, int ctx_depth);
+
+#ifndef PRODUCT
+  void print_on(outputStream* str) const;
+#endif
+};
+
+class Type : public ResourceObj {
+ public:
+  static Type* parse_generic_signature(DescriptorStream* str);
+
+  virtual ClassType* as_class() { return NULL; }
+  virtual TypeVariable* as_variable() { return NULL; }
+  virtual ArrayType* as_array() { return NULL; }
+  virtual PrimitiveType* as_primitive() { return NULL; }
+
+  virtual bool covariant_match(Type* gt, Context* ctx) = 0;
+  virtual Type* canonicalize(Context* ctx, int ctx_depth) = 0;
+
+  virtual void bind_variables_to_parameters(Descriptor* sig) = 0;
+
+#ifndef PRODUCT
+  virtual void reify_signature(stringStream* ss, Context* ctx) = 0;
+  virtual void print_on(outputStream* str) const = 0;
+#endif
+};
+
+class ClassType : public Type {
+  friend class ClassDescriptor;
+ protected:
+  Identifier* _identifier;
+  GrowableArray<TypeArgument*> _type_arguments;
+  ClassType* _outer_class;
+
+  ClassType(Identifier* identifier,
+      GrowableArray<TypeArgument*>& args,
+      ClassType* outer)
+      : _identifier(identifier), _type_arguments(args), _outer_class(outer) {}
+
+  // Returns true if there are inner classes to read
+  static Identifier* parse_generic_signature_simple(
+      GrowableArray<TypeArgument*>* args,
+      bool* has_inner, DescriptorStream* str);
+
+  static ClassType* parse_generic_signature(ClassType* outer,
+      DescriptorStream* str);
+  static ClassType* from_symbol(Symbol* sym);
+
+ public:
+  ClassType* as_class() { return this; }
+
+  static ClassType* parse_generic_signature(DescriptorStream* str);
+  static ClassType* java_lang_Object();
+
+  Identifier* identifier() { return _identifier; }
+  int type_arguments_length() { return _type_arguments.length(); }
+  TypeArgument* type_argument_at(int i);
+
+  virtual ClassType* outer_class() { return _outer_class; }
+
+  bool covariant_match(Type* gt, Context* ctx);
+  ClassType* canonicalize(Context* ctx, int context_depth);
+
+  void bind_variables_to_parameters(Descriptor* sig);
+
+#ifndef PRODUCT
+  void reify_signature(stringStream* ss, Context* ctx);
+  void print_on(outputStream* str) const;
+#endif
+};
+
+class TypeVariable : public Type {
+ private:
+  Identifier* _id;
+  TypeParameter* _parameter; // assigned during linking
+
+  // how many steps "out" from inner classes, -1 if method
+  int _inner_depth;
+
+  TypeVariable(Identifier* id)
+      : _id(id), _parameter(NULL), _inner_depth(0) {}
+
+ public:
+  TypeVariable* as_variable() { return this; }
+
+  static TypeVariable* parse_generic_signature(DescriptorStream* str);
+
+  Identifier* identifier() { return _id; }
+  TypeParameter* parameter() { return _parameter; }
+  int inner_depth() { return _inner_depth; }
+
+  void bind_variables_to_parameters(Descriptor* sig);
+
+  Type* resolve(Context* ctx, int ctx_depth);
+  bool covariant_match(Type* gt, Context* ctx);
+  Type* canonicalize(Context* ctx, int ctx_depth);
+
+#ifndef PRODUCT
+  void reify_signature(stringStream* ss, Context* ctx);
+  void print_on(outputStream* str) const;
+#endif
+};
+
+class ArrayType : public Type {
+ private:
+  Type* _base;
+
+  ArrayType(Type* base) : _base(base) {}
+
+ public:
+  ArrayType* as_array() { return this; }
+
+  static ArrayType* parse_generic_signature(DescriptorStream* str);
+
+  bool covariant_match(Type* gt, Context* ctx);
+  ArrayType* canonicalize(Context* ctx, int ctx_depth);
+
+  void bind_variables_to_parameters(Descriptor* sig);
+
+#ifndef PRODUCT
+  void reify_signature(stringStream* ss, Context* ctx);
+  void print_on(outputStream* str) const;
+#endif
+};
+
+class PrimitiveType : public Type {
+  friend class Type;
+ private:
+  char _type; // includes V for void
+
+  PrimitiveType(char& type) : _type(type) {}
+
+ public:
+  PrimitiveType* as_primitive() { return this; }
+
+  bool covariant_match(Type* gt, Context* ctx);
+  PrimitiveType* canonicalize(Context* ctx, int ctx_depth);
+
+  void bind_variables_to_parameters(Descriptor* sig);
+
+#ifndef PRODUCT
+  void reify_signature(stringStream* ss, Context* ctx);
+  void print_on(outputStream* str) const;
+#endif
+};
+
+class TypeArgument : public ResourceObj {
+ private:
+  Type* _lower_bound;
+  Type* _upper_bound; // may be null or == _lower_bound
+
+  TypeArgument(Type* lower_bound, Type* upper_bound)
+      : _lower_bound(lower_bound), _upper_bound(upper_bound) {}
+
+ public:
+
+  static TypeArgument* parse_generic_signature(DescriptorStream* str);
+
+  Type* lower_bound() { return _lower_bound; }
+  Type* upper_bound() { return _upper_bound; }
+
+  void bind_variables_to_parameters(Descriptor* sig);
+  TypeArgument* canonicalize(Context* ctx, int ctx_depth);
+
+  bool covariant_match(TypeArgument* a, Context* ctx);
+
+#ifndef PRODUCT
+  void print_on(outputStream* str) const;
+#endif
+};
+
+
+class Context : public ResourceObj {
+ private:
+  DescriptorCache* _cache;
+  GrowableArray<ClassType*> _type_arguments;
+
+  void reset_to_mark(int size);
+
+ public:
+  // When this object goes out of scope or 'destroy' is
+  // called, then the application of the type to the
+  // context is wound-back (unless it's been deactivated).
+  class Mark : public StackObj {
+   private:
+    mutable Context* _context;
+    int _marked_size;
+
+    bool is_active() const { return _context != NULL; }
+    void deactivate() const { _context = NULL; }
+
+   public:
+    Mark() : _context(NULL), _marked_size(0) {}
+    Mark(Context* ctx, int sz) : _context(ctx), _marked_size(sz) {}
+    Mark(const Mark& m) : _context(m._context), _marked_size(m._marked_size) {
+      m.deactivate(); // Ownership is transferred
+    }
+
+    Mark& operator=(const Mark& cm) {
+      destroy();
+      _context = cm._context;
+      _marked_size = cm._marked_size;
+      cm.deactivate();
+      return *this;
+    }
+
+    void destroy();
+    ~Mark() { destroy(); }
+  };
+
+  Context(DescriptorCache* cache) : _cache(cache) {}
+
+  Mark mark() { return Mark(this, _type_arguments.length()); }
+  void apply_type_arguments(InstanceKlass* current, InstanceKlass* super,TRAPS);
+
+  ClassType* at_depth(int i) const;
+
+#ifndef PRODUCT
+  void print_on(outputStream* str) const;
+#endif
+};
+
+/**
+ * Contains a cache of descriptors for classes and methods so they can be
+ * looked-up instead of reparsing each time they are needed.
+ */
+class DescriptorCache : public ResourceObj {
+ private:
+  ResourceHashtable<InstanceKlass*, ClassDescriptor*> _class_descriptors;
+  ResourceHashtable<Method*, MethodDescriptor*> _method_descriptors;
+
+ public:
+  ClassDescriptor* descriptor_for(InstanceKlass* ikh, TRAPS);
+
+  MethodDescriptor* descriptor_for(Method* mh, ClassDescriptor* cd, TRAPS);
+  // Class descriptor derived from method holder
+  MethodDescriptor* descriptor_for(Method* mh, TRAPS);
+};
+
+} // namespace generic
+
+#endif // SHARE_VM_CLASSFILE_GENERICSIGNATURES_HPP
+
--- a/src/share/vm/classfile/javaClasses.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/classfile/javaClasses.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -47,20 +47,9 @@
 #include "runtime/java.hpp"
 #include "runtime/javaCalls.hpp"
 #include "runtime/safepoint.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/vframe.hpp"
 #include "utilities/preserveException.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 
 #define INJECTED_FIELD_COMPUTE_OFFSET(klass, name, signature, may_be_java)    \
   klass::_##name##_offset = JavaClasses::compute_injected_offset(JavaClasses::klass##_##name##_enum);
@@ -338,14 +327,30 @@
   return result;
 }
 
-unsigned int java_lang_String::to_hash(oop java_string) {
+unsigned int java_lang_String::hash_code(oop java_string) {
   int          length = java_lang_String::length(java_string);
-  // Zero length string will hash to zero with String.toHash() function.
+  // Zero length string will hash to zero with String.hashCode() function.
   if (length == 0) return 0;
 
   typeArrayOop value  = java_lang_String::value(java_string);
   int          offset = java_lang_String::offset(java_string);
-  return java_lang_String::to_hash(value->char_at_addr(offset), length);
+  return java_lang_String::hash_code(value->char_at_addr(offset), length);
+}
+
+char* java_lang_String::as_quoted_ascii(oop java_string) {
+  typeArrayOop value  = java_lang_String::value(java_string);
+  int          offset = java_lang_String::offset(java_string);
+  int          length = java_lang_String::length(java_string);
+
+  jchar* base = (length == 0) ? NULL : value->char_at_addr(offset);
+  if (base == NULL) return NULL;
+
+  int result_length = UNICODE::quoted_ascii_length(base, length) + 1;
+  char* result = NEW_RESOURCE_ARRAY(char, result_length);
+  UNICODE::as_quoted_ascii(base, length, result, result_length);
+  assert(result_length >= length + 1, "must not be shorter");
+  assert(result_length == (int)strlen(result) + 1, "must match");
+  return result;
 }
 
 unsigned int java_lang_String::hash_string(oop java_string) {
@@ -545,7 +550,7 @@
         assert(k->oop_is_objArray(), "Must be");
         Klass* element_klass = ObjArrayKlass::cast(k())->element_klass();
         assert(element_klass != NULL, "Must have an element klass");
-          comp_mirror = Klass::cast(element_klass)->java_mirror();
+          comp_mirror = element_klass->java_mirror();
       }
       assert(comp_mirror.not_null(), "must have a mirror");
 
@@ -628,8 +633,8 @@
     name = vmSymbols::type_signature(primitive_type(java_class));
   } else {
     Klass* k = as_Klass(java_class);
-    is_instance = Klass::cast(k)->oop_is_instance();
-    name = Klass::cast(k)->name();
+    is_instance = k->oop_is_instance();
+    name = k->name();
   }
   if (name == NULL) {
     st->print("<null>");
@@ -651,12 +656,12 @@
     name->increment_refcount();
   } else {
     Klass* k = as_Klass(java_class);
-    if (!Klass::cast(k)->oop_is_instance()) {
-      name = Klass::cast(k)->name();
+    if (!k->oop_is_instance()) {
+      name = k->name();
       name->increment_refcount();
     } else {
       ResourceMark rm;
-      const char* sigstr = Klass::cast(k)->signature_name();
+      const char* sigstr = k->signature_name();
       int         siglen = (int) strlen(sigstr);
       if (!intern_if_not_found) {
         name = SymbolTable::probe(sigstr, siglen);
@@ -671,30 +676,17 @@
 
 Klass* java_lang_Class::array_klass(oop java_class) {
   Klass* k = ((Klass*)java_class->metadata_field(_array_klass_offset));
-  assert(k == NULL || k->is_klass() && Klass::cast(k)->oop_is_array(), "should be array klass");
+  assert(k == NULL || k->is_klass() && k->oop_is_array(), "should be array klass");
   return k;
 }
 
 
 void java_lang_Class::set_array_klass(oop java_class, Klass* klass) {
-  assert(klass->is_klass() && Klass::cast(klass)->oop_is_array(), "should be array klass");
+  assert(klass->is_klass() && klass->oop_is_array(), "should be array klass");
   java_class->metadata_field_put(_array_klass_offset, klass);
 }
 
 
-Method* java_lang_Class::resolved_constructor(oop java_class) {
-  Metadata* constructor = java_class->metadata_field(_resolved_constructor_offset);
-  assert(constructor == NULL || constructor->is_method(), "should be method");
-  return ((Method*)constructor);
-}
-
-
-void java_lang_Class::set_resolved_constructor(oop java_class, Method* constructor) {
-  assert(constructor->is_method(), "should be method");
-  java_class->metadata_field_put(_resolved_constructor_offset, constructor);
-}
-
-
 bool java_lang_Class::is_primitive(oop java_class) {
   // should assert:
   //assert(java_lang_Class::is_instance(java_class), "must be a Class object");
@@ -919,7 +911,6 @@
 // Write the thread status value to threadStatus field in java.lang.Thread java class.
 void java_lang_Thread::set_thread_status(oop java_thread,
                                          java_lang_Thread::ThreadStatus status) {
-  assert(JavaThread::current()->thread_state() == _thread_in_vm, "Java Thread is not running in vm");
   // The threadStatus is only present starting in 1.5
   if (_thread_status_offset > 0) {
     java_thread->int_field_put(_thread_status_offset, status);
@@ -1153,179 +1144,43 @@
   }
 }
 
-// Print stack trace element to resource allocated buffer
-char* java_lang_Throwable::print_stack_element_to_buffer(Method* method, int bci) {
-  // Get strings and string lengths
-  InstanceKlass* klass = InstanceKlass::cast(method->method_holder());
-  const char* klass_name  = klass->external_name();
-  int buf_len = (int)strlen(klass_name);
-  char* source_file_name;
-  if (klass->source_file_name() == NULL) {
-    source_file_name = NULL;
+// After this many redefines, the stack trace is unreliable.
+const int MAX_VERSION = USHRT_MAX;
+
+// Helper backtrace functions to store bci|version together.
+static inline int merge_bci_and_version(int bci, int version) {
+  // only store u2 for version, checking for overflow.
+  if (version > USHRT_MAX || version < 0) version = MAX_VERSION;
+  assert((jushort)bci == bci, "bci should be short");
+  return build_int_from_shorts(version, bci);
+}
+
+static inline int bci_at(unsigned int merged) {
+  return extract_high_short_from_int(merged);
+}
+static inline int version_at(unsigned int merged) {
+  return extract_low_short_from_int(merged);
+}
+
+static inline bool version_matches(Method* method, int version) {
+  return (method->constants()->version() == version && version < MAX_VERSION);
+}
+
+static inline int get_line_number(Method* method, int bci) {
+  int line_number = 0;
+  if (method->is_native()) {
+    // Negative value different from -1 below, enabling Java code in
+    // class java.lang.StackTraceElement to distinguish "native" from
+    // "no LineNumberTable".  JDK tests for -2.
+    line_number = -2;
   } else {
-    source_file_name = klass->source_file_name()->as_C_string();
-    buf_len += (int)strlen(source_file_name);
-  }
-  char* method_name = method->name()->as_C_string();
-  buf_len += (int)strlen(method_name);
-
-  // Allocate temporary buffer with extra space for formatting and line number
-  char* buf = NEW_RESOURCE_ARRAY(char, buf_len + 64);
-
-  // Print stack trace line in buffer
-  sprintf(buf, "\tat %s.%s", klass_name, method_name);
-  if (method->is_native()) {
-    strcat(buf, "(Native Method)");
-  } else {
-    int line_number = method->line_number_from_bci(bci);
-    if (source_file_name != NULL && (line_number != -1)) {
-      // Sourcename and linenumber
-      sprintf(buf + (int)strlen(buf), "(%s:%d)", source_file_name, line_number);
-    } else if (source_file_name != NULL) {
-      // Just sourcename
-      sprintf(buf + (int)strlen(buf), "(%s)", source_file_name);
-    } else {
-      // Neither soucename and linenumber
-      sprintf(buf + (int)strlen(buf), "(Unknown Source)");
-    }
-    nmethod* nm = method->code();
-    if (WizardMode && nm != NULL) {
-      sprintf(buf + (int)strlen(buf), "(nmethod " INTPTR_FORMAT ")", (intptr_t)nm);
+    // Returns -1 if no LineNumberTable, and otherwise actual line number
+    line_number = method->line_number_from_bci(bci);
+    if (line_number == -1 && ShowHiddenFrames) {
+      line_number = bci + 1000000;
     }
   }
-
-  return buf;
-}
-
-
-void java_lang_Throwable::print_stack_element(Handle stream, Method* method, int bci) {
-  ResourceMark rm;
-  char* buf = print_stack_element_to_buffer(method, bci);
-  print_to_stream(stream, buf);
-}
-
-void java_lang_Throwable::print_stack_element(outputStream *st, Method* method, int bci) {
-  ResourceMark rm;
-  char* buf = print_stack_element_to_buffer(method, bci);
-  st->print_cr("%s", buf);
-}
-
-void java_lang_Throwable::print_to_stream(Handle stream, const char* str) {
-  if (stream.is_null()) {
-    tty->print_cr("%s", str);
-  } else {
-    EXCEPTION_MARK;
-    JavaValue result(T_VOID);
-    Handle arg (THREAD, oopFactory::new_charArray(str, THREAD));
-    if (!HAS_PENDING_EXCEPTION) {
-      JavaCalls::call_virtual(&result,
-                              stream,
-                              KlassHandle(THREAD, stream->klass()),
-                              vmSymbols::println_name(),
-                              vmSymbols::char_array_void_signature(),
-                              arg,
-                              THREAD);
-    }
-    // Ignore any exceptions. we are in the middle of exception handling. Same as classic VM.
-    if (HAS_PENDING_EXCEPTION) CLEAR_PENDING_EXCEPTION;
-  }
-
-}
-
-
-const char* java_lang_Throwable::no_stack_trace_message() {
-  return "\t<<no stack trace available>>";
-}
-
-
-// Currently used only for exceptions occurring during startup
-void java_lang_Throwable::print_stack_trace(oop throwable, outputStream* st) {
-  Thread *THREAD = Thread::current();
-  Handle h_throwable(THREAD, throwable);
-  while (h_throwable.not_null()) {
-    objArrayHandle result (THREAD, objArrayOop(backtrace(h_throwable())));
-    if (result.is_null()) {
-      st->print_cr(no_stack_trace_message());
-      return;
-    }
-
-    while (result.not_null()) {
-      typeArrayHandle methods (THREAD,
-                               typeArrayOop(result->obj_at(trace_methods_offset)));
-      typeArrayHandle bcis (THREAD,
-                            typeArrayOop(result->obj_at(trace_bcis_offset)));
-
-      if (methods.is_null() || bcis.is_null()) {
-        st->print_cr(no_stack_trace_message());
-        return;
-      }
-
-      int length = methods()->length();
-      for (int index = 0; index < length; index++) {
-        Method* method = ((Method*)methods()->metadata_at(index));
-        if (method == NULL) goto handle_cause;
-        int bci = bcis->ushort_at(index);
-        print_stack_element(st, method, bci);
-      }
-      result = objArrayHandle(THREAD, objArrayOop(result->obj_at(trace_next_offset)));
-    }
-  handle_cause:
-    {
-      EXCEPTION_MARK;
-      JavaValue result(T_OBJECT);
-      JavaCalls::call_virtual(&result,
-                              h_throwable,
-                              KlassHandle(THREAD, h_throwable->klass()),
-                              vmSymbols::getCause_name(),
-                              vmSymbols::void_throwable_signature(),
-                              THREAD);
-      // Ignore any exceptions. we are in the middle of exception handling. Same as classic VM.
-      if (HAS_PENDING_EXCEPTION) {
-        CLEAR_PENDING_EXCEPTION;
-        h_throwable = Handle();
-      } else {
-        h_throwable = Handle(THREAD, (oop) result.get_jobject());
-        if (h_throwable.not_null()) {
-          st->print("Caused by: ");
-          print(h_throwable, st);
-          st->cr();
-        }
-      }
-    }
-  }
-}
-
-
-void java_lang_Throwable::print_stack_trace(oop throwable, oop print_stream) {
-  // Note: this is no longer used in Merlin, but we support it for compatibility.
-  Thread *thread = Thread::current();
-  Handle stream(thread, print_stream);
-  objArrayHandle result (thread, objArrayOop(backtrace(throwable)));
-  if (result.is_null()) {
-    print_to_stream(stream, no_stack_trace_message());
-    return;
-  }
-
-  while (result.not_null()) {
-    typeArrayHandle methods(thread,
-                            typeArrayOop(result->obj_at(trace_methods_offset)));
-    typeArrayHandle bcis (thread,
-                          typeArrayOop(result->obj_at(trace_bcis_offset)));
-
-    if (methods.is_null() || bcis.is_null()) {
-      print_to_stream(stream, no_stack_trace_message());
-      return;
-    }
-
-    int length = methods()->length();
-    for (int index = 0; index < length; index++) {
-      Method* method = ((Method*)methods()->metadata_at(index));
-      if (method == NULL) return;
-      int bci = bcis->ushort_at(index);
-      print_stack_element(stream, method, bci);
-    }
-    result = objArrayHandle(thread, objArrayOop(result->obj_at(trace_next_offset)));
-  }
+  return line_number;
 }
 
 // This class provides a simple wrapper over the internal structure of
@@ -1345,13 +1200,30 @@
 
   enum {
     trace_methods_offset = java_lang_Throwable::trace_methods_offset,
-    trace_bcis_offset    = java_lang_Throwable::trace_bcis_offset,
+    trace_bcis_offset = java_lang_Throwable::trace_bcis_offset,
     trace_mirrors_offset = java_lang_Throwable::trace_mirrors_offset,
     trace_next_offset    = java_lang_Throwable::trace_next_offset,
     trace_size           = java_lang_Throwable::trace_size,
     trace_chunk_size     = java_lang_Throwable::trace_chunk_size
   };
 
+  // get info out of chunks
+  static typeArrayOop get_methods(objArrayHandle chunk) {
+    typeArrayOop methods = typeArrayOop(chunk->obj_at(trace_methods_offset));
+    assert(methods != NULL, "method array should be initialized in backtrace");
+    return methods;
+  }
+  static typeArrayOop get_bcis(objArrayHandle chunk) {
+    typeArrayOop bcis = typeArrayOop(chunk->obj_at(trace_bcis_offset));
+    assert(bcis != NULL, "bci array should be initialized in backtrace");
+    return bcis;
+  }
+  static objArrayOop get_mirrors(objArrayHandle chunk) {
+    objArrayOop mirrors = objArrayOop(chunk->obj_at(trace_mirrors_offset));
+    assert(mirrors != NULL, "mirror array should be initialized in backtrace");
+    return mirrors;
+  }
+
   // constructor for new backtrace
   BacktraceBuilder(TRAPS): _methods(NULL), _bcis(NULL), _head(NULL), _mirrors(NULL) {
     expand(CHECK);
@@ -1359,6 +1231,19 @@
     _index = 0;
   }
 
+  BacktraceBuilder(objArrayHandle backtrace) {
+    _methods = get_methods(backtrace);
+    _bcis = get_bcis(backtrace);
+    _mirrors = get_mirrors(backtrace);
+    assert(_methods->length() == _bcis->length() &&
+           _methods->length() == _mirrors->length(),
+           "method and source information arrays should match");
+
+    // head is the preallocated backtrace
+    _backtrace = _head = backtrace();
+    _index = 0;
+  }
+
   void expand(TRAPS) {
     objArrayHandle old_head(THREAD, _head);
     Pause_No_Safepoint_Verifier pnsv(&_nsv);
@@ -1366,10 +1251,10 @@
     objArrayOop head = oopFactory::new_objectArray(trace_size, CHECK);
     objArrayHandle new_head(THREAD, head);
 
-    typeArrayOop methods = oopFactory::new_metaDataArray(trace_chunk_size, CHECK);
+    typeArrayOop methods = oopFactory::new_shortArray(trace_chunk_size, CHECK);
     typeArrayHandle new_methods(THREAD, methods);
 
-    typeArrayOop bcis = oopFactory::new_shortArray(trace_chunk_size, CHECK);
+    typeArrayOop bcis = oopFactory::new_intArray(trace_chunk_size, CHECK);
     typeArrayHandle new_bcis(THREAD, bcis);
 
     objArrayOop mirrors = oopFactory::new_objectArray(trace_chunk_size, CHECK);
@@ -1384,7 +1269,7 @@
 
     _head    = new_head();
     _methods = new_methods();
-    _bcis    = new_bcis();
+    _bcis = new_bcis();
     _mirrors = new_mirrors();
     _index = 0;
   }
@@ -1398,7 +1283,6 @@
     // shorts.  The later line number lookup would just smear the -1
     // to a 0 even if it could be recorded.
     if (bci == SynchronizationEntryBCI) bci = 0;
-    assert(bci == (jushort)bci, "doesn't fit");
 
     if (_index >= trace_chunk_size) {
       methodHandle mhandle(THREAD, method);
@@ -1406,26 +1290,148 @@
       method = mhandle();
     }
 
-    _methods->metadata_at_put(_index, method);
-    _bcis->ushort_at_put(_index, bci);
-    // we need to save the mirrors in the backtrace to keep the methods from
-    // being unloaded if their class loader is unloaded while we still have
-    // this stack trace.
+    _methods->short_at_put(_index, method->method_idnum());
+    _bcis->int_at_put(_index, merge_bci_and_version(bci, method->constants()->version()));
+
+    // We need to save the mirrors in the backtrace to keep the class
+    // from being unloaded while we still have this stack trace.
+    assert(method->method_holder()->java_mirror() != NULL, "never push null for mirror");
     _mirrors->obj_at_put(_index, method->method_holder()->java_mirror());
     _index++;
   }
 
-  Method* current_method() {
-    assert(_index >= 0 && _index < trace_chunk_size, "out of range");
-    return ((Method*)_methods->metadata_at(_index));
+};
+
+// Print stack trace element to resource allocated buffer
+char* java_lang_Throwable::print_stack_element_to_buffer(Handle mirror,
+                                  int method_id, int version, int bci) {
+
+  // Get strings and string lengths
+  InstanceKlass* holder = InstanceKlass::cast(java_lang_Class::as_Klass(mirror()));
+  const char* klass_name  = holder->external_name();
+  int buf_len = (int)strlen(klass_name);
+
+  // pushing to the stack trace added one.
+  Method* method = holder->method_with_idnum(method_id);
+  char* method_name = method->name()->as_C_string();
+  buf_len += (int)strlen(method_name);
+
+  char* source_file_name = NULL;
+  if (version_matches(method, version)) {
+    Symbol* source = holder->source_file_name();
+    if (source != NULL) {
+      source_file_name = source->as_C_string();
+      buf_len += (int)strlen(source_file_name);
+    }
+  }
+
+  // Allocate temporary buffer with extra space for formatting and line number
+  char* buf = NEW_RESOURCE_ARRAY(char, buf_len + 64);
+
+  // Print stack trace line in buffer
+  sprintf(buf, "\tat %s.%s", klass_name, method_name);
+
+  if (!version_matches(method, version)) {
+    strcat(buf, "(Redefined)");
+  } else {
+    int line_number = get_line_number(method, bci);
+    if (line_number == -2) {
+      strcat(buf, "(Native Method)");
+    } else {
+      if (source_file_name != NULL && (line_number != -1)) {
+        // Sourcename and linenumber
+        sprintf(buf + (int)strlen(buf), "(%s:%d)", source_file_name, line_number);
+      } else if (source_file_name != NULL) {
+        // Just sourcename
+        sprintf(buf + (int)strlen(buf), "(%s)", source_file_name);
+      } else {
+        // Neither sourcename nor linenumber
+        sprintf(buf + (int)strlen(buf), "(Unknown Source)");
+      }
+      nmethod* nm = method->code();
+      if (WizardMode && nm != NULL) {
+        sprintf(buf + (int)strlen(buf), "(nmethod " INTPTR_FORMAT ")", (intptr_t)nm);
+      }
+    }
   }
 
-  jushort current_bci() {
-    assert(_index >= 0 && _index < trace_chunk_size, "out of range");
-    return _bcis->ushort_at(_index);
+  return buf;
+}
+
+void java_lang_Throwable::print_stack_element(outputStream *st, Handle mirror,
+                                              int method_id, int version, int bci) {
+  ResourceMark rm;
+  char* buf = print_stack_element_to_buffer(mirror, method_id, version, bci);
+  st->print_cr("%s", buf);
+}
+
+void java_lang_Throwable::print_stack_element(outputStream *st, methodHandle method, int bci) {
+  Handle mirror = method->method_holder()->java_mirror();
+  int method_id = method->method_idnum();
+  int version = method->constants()->version();
+  print_stack_element(st, mirror, method_id, version, bci);
+}
+
+const char* java_lang_Throwable::no_stack_trace_message() {
+  return "\t<<no stack trace available>>";
+}
+
+
+// Currently used only for exceptions occurring during startup
+void java_lang_Throwable::print_stack_trace(oop throwable, outputStream* st) {
+  Thread *THREAD = Thread::current();
+  Handle h_throwable(THREAD, throwable);
+  while (h_throwable.not_null()) {
+    objArrayHandle result (THREAD, objArrayOop(backtrace(h_throwable())));
+    if (result.is_null()) {
+      st->print_cr(no_stack_trace_message());
+      return;
+    }
+
+    while (result.not_null()) {
+
+      // Get method id, bci, version and mirror from chunk
+      typeArrayHandle methods (THREAD, BacktraceBuilder::get_methods(result));
+      typeArrayHandle bcis (THREAD, BacktraceBuilder::get_bcis(result));
+      objArrayHandle mirrors (THREAD, BacktraceBuilder::get_mirrors(result));
+
+      int length = methods()->length();
+      for (int index = 0; index < length; index++) {
+        Handle mirror(THREAD, mirrors->obj_at(index));
+        // NULL mirror means end of stack trace
+        if (mirror.is_null()) goto handle_cause;
+        int method = methods->short_at(index);
+        int version = version_at(bcis->int_at(index));
+        int bci = bci_at(bcis->int_at(index));
+        print_stack_element(st, mirror, method, version, bci);
+      }
+      result = objArrayHandle(THREAD, objArrayOop(result->obj_at(trace_next_offset)));
+    }
+  handle_cause:
+    {
+      EXCEPTION_MARK;
+      JavaValue cause(T_OBJECT);
+      JavaCalls::call_virtual(&cause,
+                              h_throwable,
+                              KlassHandle(THREAD, h_throwable->klass()),
+                              vmSymbols::getCause_name(),
+                              vmSymbols::void_throwable_signature(),
+                              THREAD);
+      // Ignore any exceptions. we are in the middle of exception handling. Same as classic VM.
+      if (HAS_PENDING_EXCEPTION) {
+        CLEAR_PENDING_EXCEPTION;
+        h_throwable = Handle();
+      } else {
+        h_throwable = Handle(THREAD, (oop) cause.get_jobject());
+        if (h_throwable.not_null()) {
+          st->print("Caused by: ");
+          print(h_throwable, st);
+          st->cr();
+        }
+      }
+    }
   }
-};
-
+}
 
 void java_lang_Throwable::fill_in_stack_trace(Handle throwable, methodHandle method, TRAPS) {
   if (!StackTraceInThrowable) return;
@@ -1586,21 +1592,8 @@
 
   // No-op if stack trace is disabled
   if (!StackTraceInThrowable) return;
-
-  objArrayOop h_oop = oopFactory::new_objectArray(trace_size, CHECK);
-  objArrayHandle backtrace  (THREAD, h_oop);
-  typeArrayOop m_oop = oopFactory::new_metaDataArray(trace_chunk_size, CHECK);
-  typeArrayHandle methods (THREAD, m_oop);
-  typeArrayOop b = oopFactory::new_shortArray(trace_chunk_size, CHECK);
-  typeArrayHandle bcis(THREAD, b);
-  objArrayOop mirror_oop = oopFactory::new_objectArray(trace_chunk_size, CHECK);
-  objArrayHandle mirrors (THREAD, mirror_oop);
-
-  // backtrace has space for one chunk (next is NULL)
-  backtrace->obj_at_put(trace_methods_offset, methods());
-  backtrace->obj_at_put(trace_bcis_offset, bcis());
-  backtrace->obj_at_put(trace_mirrors_offset, mirrors());
-  set_backtrace(throwable(), backtrace());
+  BacktraceBuilder bt(CHECK);   // creates a backtrace
+  set_backtrace(throwable(), bt.backtrace());
 }
 
 
@@ -1612,48 +1605,26 @@
 
   assert(throwable->is_a(SystemDictionary::Throwable_klass()), "sanity check");
 
-  objArrayOop backtrace = (objArrayOop)java_lang_Throwable::backtrace(throwable());
-  assert(backtrace != NULL, "backtrace not preallocated");
-
-  oop m = backtrace->obj_at(trace_methods_offset);
-  typeArrayOop methods = typeArrayOop(m);
-  assert(methods != NULL && methods->length() > 0, "method array not preallocated");
-
-  oop b = backtrace->obj_at(trace_bcis_offset);
-  typeArrayOop bcis = typeArrayOop(b);
-  assert(bcis != NULL, "bci array not preallocated");
-
-  oop mr = backtrace->obj_at(trace_mirrors_offset);
-  objArrayOop mirrors = objArrayOop(mr);
-  assert(mirrors != NULL, "bci array not preallocated");
-
-  assert(methods->length() == bcis->length() &&
-         methods->length() == mirrors->length(),
-         "method and bci arrays should match");
-
-  JavaThread* thread = JavaThread::current();
-  ResourceMark rm(thread);
-  vframeStream st(thread);
+  JavaThread* THREAD = JavaThread::current();
+
+  objArrayHandle backtrace (THREAD, (objArrayOop)java_lang_Throwable::backtrace(throwable()));
+  assert(backtrace.not_null(), "backtrace should have been preallocated");
+
+  ResourceMark rm(THREAD);
+  vframeStream st(THREAD);
+
+  BacktraceBuilder bt(backtrace);
 
   // Unlike fill_in_stack_trace we do not skip fillInStackTrace or throwable init
   // methods as preallocated errors aren't created by "java" code.
 
   // fill in as much stack trace as possible
+  typeArrayOop methods = BacktraceBuilder::get_methods(backtrace);
   int max_chunks = MIN2(methods->length(), (int)MaxJavaStackTraceDepth);
   int chunk_count = 0;
 
   for (;!st.at_end(); st.next()) {
-    // Add entry and smear the -1 bci to 0 since the array only holds
-    // unsigned shorts.  The later line number lookup would just smear
-    // the -1 to a 0 even if it could be recorded.
-    int bci = st.bci();
-    if (bci == SynchronizationEntryBCI) bci = 0;
-    assert(bci == (jushort)bci, "doesn't fit");
-    bcis->ushort_at_put(chunk_count, bci);
-    methods->metadata_at_put(chunk_count, st.method());
-    mirrors->obj_at_put(chunk_count,
-                   st.method()->method_holder()->java_mirror());
-
+    bt.push(st.method(), st.bci(), CHECK);
     chunk_count++;
 
     // Bail-out for deep stacks
@@ -1667,7 +1638,6 @@
       java_lang_Throwable::set_stacktrace(throwable(), java_lang_Throwable::unassigned_stacktrace());
       assert(java_lang_Throwable::unassigned_stacktrace() != NULL, "not initialized");
   }
-
 }
 
 
@@ -1686,12 +1656,12 @@
       chunk = next;
     }
     assert(chunk != NULL && chunk->obj_at(trace_next_offset) == NULL, "sanity check");
-    // Count element in remaining partial chunk
-    typeArrayOop methods = typeArrayOop(chunk->obj_at(trace_methods_offset));
-    typeArrayOop bcis = typeArrayOop(chunk->obj_at(trace_bcis_offset));
-    assert(methods != NULL && bcis != NULL, "sanity check");
-    for (int i = 0; i < methods->length(); i++) {
-      if (methods->metadata_at(i) == NULL) break;
+    // Count element in remaining partial chunk.  NULL value for mirror
+    // marks the end of the stack trace elements that are saved.
+    objArrayOop mirrors = BacktraceBuilder::get_mirrors(chunk);
+    assert(mirrors != NULL, "sanity check");
+    for (int i = 0; i < mirrors->length(); i++) {
+      if (mirrors->obj_at(i) == NULL) break;
       depth++;
     }
   }
@@ -1717,25 +1687,28 @@
   if (chunk == NULL) {
     THROW_(vmSymbols::java_lang_IndexOutOfBoundsException(), NULL);
   }
-  // Get method,bci from chunk
-  typeArrayOop methods = typeArrayOop(chunk->obj_at(trace_methods_offset));
-  typeArrayOop bcis = typeArrayOop(chunk->obj_at(trace_bcis_offset));
-  assert(methods != NULL && bcis != NULL, "sanity check");
-  methodHandle method(THREAD, ((Method*)methods->metadata_at(chunk_index)));
-  int bci = bcis->ushort_at(chunk_index);
+  // Get method id, bci, version and mirror from chunk
+  typeArrayOop methods = BacktraceBuilder::get_methods(chunk);
+  typeArrayOop bcis = BacktraceBuilder::get_bcis(chunk);
+  objArrayOop mirrors = BacktraceBuilder::get_mirrors(chunk);
+
+  assert(methods != NULL && bcis != NULL && mirrors != NULL, "sanity check");
+
+  int method = methods->short_at(chunk_index);
+  int version = version_at(bcis->int_at(chunk_index));
+  int bci = bci_at(bcis->int_at(chunk_index));
+  Handle mirror(THREAD, mirrors->obj_at(chunk_index));
+
   // Chunk can be partial full
-  if (method.is_null()) {
+  if (mirror.is_null()) {
     THROW_(vmSymbols::java_lang_IndexOutOfBoundsException(), NULL);
   }
 
-  oop element = java_lang_StackTraceElement::create(method, bci, CHECK_0);
+  oop element = java_lang_StackTraceElement::create(mirror, method, version, bci, CHECK_0);
   return element;
 }
 
-oop java_lang_StackTraceElement::create(methodHandle method, int bci, TRAPS) {
-  // SystemDictionary::stackTraceElement_klass() will be null for pre-1.4 JDKs
-  assert(JDK_Version::is_gte_jdk14x_version(), "should only be called in >= 1.4");
-
+oop java_lang_StackTraceElement::create(Handle mirror, int method_id, int version, int bci, TRAPS) {
   // Allocate java.lang.StackTraceElement instance
   Klass* k = SystemDictionary::StackTraceElement_klass();
   assert(k != NULL, "must be loaded in 1.4+");
@@ -1747,37 +1720,39 @@
   Handle element = ik->allocate_instance_handle(CHECK_0);
   // Fill in class name
   ResourceMark rm(THREAD);
-  const char* str = InstanceKlass::cast(method->method_holder())->external_name();
+  InstanceKlass* holder = InstanceKlass::cast(java_lang_Class::as_Klass(mirror()));
+  const char* str = holder->external_name();
   oop classname = StringTable::intern((char*) str, CHECK_0);
   java_lang_StackTraceElement::set_declaringClass(element(), classname);
+
   // Fill in method name
+  Method* method = holder->method_with_idnum(method_id);
   oop methodname = StringTable::intern(method->name(), CHECK_0);
   java_lang_StackTraceElement::set_methodName(element(), methodname);
-  // Fill in source file name
-  Symbol* source = InstanceKlass::cast(method->method_holder())->source_file_name();
-  if (ShowHiddenFrames && source == NULL)
-    source = vmSymbols::unknown_class_name();
-  oop filename = StringTable::intern(source, CHECK_0);
-  java_lang_StackTraceElement::set_fileName(element(), filename);
-  // File in source line number
-  int line_number;
-  if (method->is_native()) {
-    // Negative value different from -1 below, enabling Java code in
-    // class java.lang.StackTraceElement to distinguish "native" from
-    // "no LineNumberTable".
-    line_number = -2;
+
+  if (!version_matches(method, version)) {
+    // The method was redefined, accurate line number information isn't available
+    java_lang_StackTraceElement::set_fileName(element(), NULL);
+    java_lang_StackTraceElement::set_lineNumber(element(), -1);
   } else {
-    // Returns -1 if no LineNumberTable, and otherwise actual line number
-    line_number = method->line_number_from_bci(bci);
-    if (line_number == -1 && ShowHiddenFrames) {
-      line_number = bci + 1000000;
-    }
+    // Fill in source file name and line number.
+    Symbol* source = holder->source_file_name();
+    if (ShowHiddenFrames && source == NULL)
+      source = vmSymbols::unknown_class_name();
+    oop filename = StringTable::intern(source, CHECK_0);
+    java_lang_StackTraceElement::set_fileName(element(), filename);
+
+    int line_number = get_line_number(method, bci);
+    java_lang_StackTraceElement::set_lineNumber(element(), line_number);
   }
-  java_lang_StackTraceElement::set_lineNumber(element(), line_number);
-
   return element();
 }
 
+oop java_lang_StackTraceElement::create(methodHandle method, int bci, TRAPS) {
+  Handle mirror (THREAD, method->method_holder()->java_mirror());
+  int method_id = method->method_idnum();
+  return create(mirror, method_id, method->constants()->version(), bci, THREAD);
+}
 
 void java_lang_reflect_AccessibleObject::compute_offsets() {
   Klass* k = SystemDictionary::reflect_AccessibleObject_klass();
@@ -1808,10 +1783,12 @@
   annotations_offset = -1;
   parameter_annotations_offset = -1;
   annotation_default_offset = -1;
+  type_annotations_offset = -1;
   compute_optional_offset(signature_offset,             k, vmSymbols::signature_name(),             vmSymbols::string_signature());
   compute_optional_offset(annotations_offset,           k, vmSymbols::annotations_name(),           vmSymbols::byte_array_signature());
   compute_optional_offset(parameter_annotations_offset, k, vmSymbols::parameter_annotations_name(), vmSymbols::byte_array_signature());
   compute_optional_offset(annotation_default_offset,    k, vmSymbols::annotation_default_name(),    vmSymbols::byte_array_signature());
+  compute_optional_offset(type_annotations_offset,      k, vmSymbols::type_annotations_name(),      vmSymbols::byte_array_signature());
 }
 
 Handle java_lang_reflect_Method::create(TRAPS) {
@@ -1957,6 +1934,22 @@
   method->obj_field_put(annotation_default_offset, value);
 }
 
+bool java_lang_reflect_Method::has_type_annotations_field() {
+  return (type_annotations_offset >= 0);
+}
+
+oop java_lang_reflect_Method::type_annotations(oop method) {
+  assert(Universe::is_fully_initialized(), "Need to find another solution to the reflection problem");
+  assert(has_type_annotations_field(), "type_annotations field must be present");
+  return method->obj_field(type_annotations_offset);
+}
+
+void java_lang_reflect_Method::set_type_annotations(oop method, oop value) {
+  assert(Universe::is_fully_initialized(), "Need to find another solution to the reflection problem");
+  assert(has_type_annotations_field(), "type_annotations field must be present");
+  method->obj_field_put(type_annotations_offset, value);
+}
+
 void java_lang_reflect_Constructor::compute_offsets() {
   Klass* k = SystemDictionary::reflect_Constructor_klass();
   compute_offset(clazz_offset,          k, vmSymbols::clazz_name(),          vmSymbols::class_signature());
@@ -1968,9 +1961,11 @@
   signature_offset = -1;
   annotations_offset = -1;
   parameter_annotations_offset = -1;
+  type_annotations_offset = -1;
   compute_optional_offset(signature_offset,             k, vmSymbols::signature_name(),             vmSymbols::string_signature());
   compute_optional_offset(annotations_offset,           k, vmSymbols::annotations_name(),           vmSymbols::byte_array_signature());
   compute_optional_offset(parameter_annotations_offset, k, vmSymbols::parameter_annotations_name(), vmSymbols::byte_array_signature());
+  compute_optional_offset(type_annotations_offset,      k, vmSymbols::type_annotations_name(),      vmSymbols::byte_array_signature());
 }
 
 Handle java_lang_reflect_Constructor::create(TRAPS) {
@@ -2081,6 +2076,22 @@
   method->obj_field_put(parameter_annotations_offset, value);
 }
 
+bool java_lang_reflect_Constructor::has_type_annotations_field() {
+  return (type_annotations_offset >= 0);
+}
+
+oop java_lang_reflect_Constructor::type_annotations(oop constructor) {
+  assert(Universe::is_fully_initialized(), "Need to find another solution to the reflection problem");
+  assert(has_type_annotations_field(), "type_annotations field must be present");
+  return constructor->obj_field(type_annotations_offset);
+}
+
+void java_lang_reflect_Constructor::set_type_annotations(oop constructor, oop value) {
+  assert(Universe::is_fully_initialized(), "Need to find another solution to the reflection problem");
+  assert(has_type_annotations_field(), "type_annotations field must be present");
+  constructor->obj_field_put(type_annotations_offset, value);
+}
+
 void java_lang_reflect_Field::compute_offsets() {
   Klass* k = SystemDictionary::reflect_Field_klass();
   compute_offset(clazz_offset,     k, vmSymbols::clazz_name(),     vmSymbols::class_signature());
@@ -2091,8 +2102,10 @@
   // The generic signature and annotations fields are only present in 1.5
   signature_offset = -1;
   annotations_offset = -1;
+  type_annotations_offset = -1;
   compute_optional_offset(signature_offset, k, vmSymbols::signature_name(), vmSymbols::string_signature());
   compute_optional_offset(annotations_offset,  k, vmSymbols::annotations_name(),  vmSymbols::byte_array_signature());
+  compute_optional_offset(type_annotations_offset,  k, vmSymbols::type_annotations_name(),  vmSymbols::byte_array_signature());
 }
 
 Handle java_lang_reflect_Field::create(TRAPS) {
@@ -2187,6 +2200,21 @@
   field->obj_field_put(annotations_offset, value);
 }
 
+bool java_lang_reflect_Field::has_type_annotations_field() {
+  return (type_annotations_offset >= 0);
+}
+
+oop java_lang_reflect_Field::type_annotations(oop field) {
+  assert(Universe::is_fully_initialized(), "Need to find another solution to the reflection problem");
+  assert(has_type_annotations_field(), "type_annotations field must be present");
+  return field->obj_field(type_annotations_offset);
+}
+
+void java_lang_reflect_Field::set_type_annotations(oop field, oop value) {
+  assert(Universe::is_fully_initialized(), "Need to find another solution to the reflection problem");
+  assert(has_type_annotations_field(), "type_annotations field must be present");
+  field->obj_field_put(type_annotations_offset, value);
+}
 
 void sun_reflect_ConstantPool::compute_offsets() {
   Klass* k = SystemDictionary::reflect_ConstantPool_klass();
@@ -2197,6 +2225,66 @@
   }
 }
 
+void java_lang_reflect_Parameter::compute_offsets() {
+  Klass* k = SystemDictionary::reflect_Parameter_klass();
+  if(NULL != k) {
+    compute_offset(name_offset,        k, vmSymbols::name_name(),        vmSymbols::string_signature());
+    compute_offset(modifiers_offset,   k, vmSymbols::modifiers_name(),   vmSymbols::int_signature());
+    compute_offset(index_offset,       k, vmSymbols::index_name(),       vmSymbols::int_signature());
+    compute_offset(executable_offset,  k, vmSymbols::executable_name(),  vmSymbols::executable_signature());
+  }
+}
+
+Handle java_lang_reflect_Parameter::create(TRAPS) {
+  assert(Universe::is_fully_initialized(), "Need to find another solution to the reflection problem");
+  Symbol* name = vmSymbols::java_lang_reflect_Parameter();
+  Klass* k = SystemDictionary::resolve_or_fail(name, true, CHECK_NH);
+  instanceKlassHandle klass (THREAD, k);
+  // Ensure it is initialized
+  klass->initialize(CHECK_NH);
+  return klass->allocate_instance_handle(CHECK_NH);
+}
+
+oop java_lang_reflect_Parameter::name(oop param) {
+  assert(Universe::is_fully_initialized(), "Need to find another solution to the reflection problem");
+  return param->obj_field(name_offset);
+}
+
+void java_lang_reflect_Parameter::set_name(oop param, oop value) {
+  assert(Universe::is_fully_initialized(), "Need to find another solution to the reflection problem");
+  param->obj_field_put(name_offset, value);
+}
+
+int java_lang_reflect_Parameter::modifiers(oop param) {
+  assert(Universe::is_fully_initialized(), "Need to find another solution to the reflection problem");
+  return param->int_field(modifiers_offset);
+}
+
+void java_lang_reflect_Parameter::set_modifiers(oop param, int value) {
+  assert(Universe::is_fully_initialized(), "Need to find another solution to the reflection problem");
+  param->int_field_put(modifiers_offset, value);
+}
+
+int java_lang_reflect_Parameter::index(oop param) {
+  assert(Universe::is_fully_initialized(), "Need to find another solution to the reflection problem");
+  return param->int_field(index_offset);
+}
+
+void java_lang_reflect_Parameter::set_index(oop param, int value) {
+  assert(Universe::is_fully_initialized(), "Need to find another solution to the reflection problem");
+  param->int_field_put(index_offset, value);
+}
+
+oop java_lang_reflect_Parameter::executable(oop param) {
+  assert(Universe::is_fully_initialized(), "Need to find another solution to the reflection problem");
+  return param->obj_field(executable_offset);
+}
+
+void java_lang_reflect_Parameter::set_executable(oop param, oop value) {
+  assert(Universe::is_fully_initialized(), "Need to find another solution to the reflection problem");
+  param->obj_field_put(executable_offset, value);
+}
+
 
 Handle sun_reflect_ConstantPool::create(TRAPS) {
   assert(Universe::is_fully_initialized(), "Need to find another solution to the reflection problem");
@@ -2539,8 +2627,8 @@
 
 void java_lang_invoke_MemberName::set_vmtarget(oop mname, Metadata* ref) {
   assert(is_instance(mname), "wrong type");
-#ifdef ASSERT
   // check the type of the vmtarget
+  oop dependency = NULL;
   if (ref != NULL) {
     switch (flags(mname) & (MN_IS_METHOD |
                             MN_IS_CONSTRUCTOR |
@@ -2548,28 +2636,21 @@
     case MN_IS_METHOD:
     case MN_IS_CONSTRUCTOR:
       assert(ref->is_method(), "should be a method");
+      dependency = ((Method*)ref)->method_holder()->java_mirror();
       break;
     case MN_IS_FIELD:
       assert(ref->is_klass(), "should be a class");
+      dependency = ((Klass*)ref)->java_mirror();
       break;
     default:
       ShouldNotReachHere();
     }
   }
-#endif //ASSERT
   mname->address_field_put(_vmtarget_offset, (address)ref);
-  oop loader = NULL;
-  if (ref != NULL) {
-    if (ref->is_klass()) {
-      loader = ((Klass*)ref)->class_loader();
-    } else if (ref->is_method()) {
-      loader = ((Method*)ref)->method_holder()->class_loader();
-    } else {
-      ShouldNotReachHere();
-    }
-  }
-  // Add a reference to the loader to ensure the metadata is kept alive
-  mname->obj_field_put(_vmloader_offset, loader);
+  // Add a reference to the loader (actually mirror because anonymous classes will not have
+  // distinct loaders) to ensure the metadata is kept alive
+  // This mirror may be different than the one in clazz field.
+  mname->obj_field_put(_vmloader_offset, dependency);
 }
 
 intptr_t java_lang_invoke_MemberName::vmindex(oop mname) {
@@ -2734,7 +2815,6 @@
 
 bool java_lang_ClassLoader::offsets_computed = false;
 int  java_lang_ClassLoader::_loader_data_offset = -1;
-int  java_lang_ClassLoader::_dependencies_offset = -1;
 int  java_lang_ClassLoader::parallelCapable_offset = -1;
 
 ClassLoaderData** java_lang_ClassLoader::loader_data_addr(oop loader) {
@@ -2746,18 +2826,6 @@
   return *java_lang_ClassLoader::loader_data_addr(loader);
 }
 
-oop java_lang_ClassLoader::dependencies(oop loader) {
-  return loader->obj_field(_dependencies_offset);
-}
-
-HeapWord* java_lang_ClassLoader::dependencies_addr(oop loader) {
-  if (UseCompressedOops) {
-    return (HeapWord*)loader->obj_field_addr<narrowOop>(_dependencies_offset);
-  } else {
-    return (HeapWord*)loader->obj_field_addr<oop>(_dependencies_offset);
-  }
-}
-
 void java_lang_ClassLoader::compute_offsets() {
   assert(!offsets_computed, "offsets should be initialized only once");
   offsets_computed = true;
@@ -2851,7 +2919,6 @@
 
 int java_lang_Class::_klass_offset;
 int java_lang_Class::_array_klass_offset;
-int java_lang_Class::_resolved_constructor_offset;
 int java_lang_Class::_oop_size_offset;
 int java_lang_Class::_static_oop_field_count_offset;
 GrowableArray<Klass*>* java_lang_Class::_fixup_mirror_list = NULL;
@@ -2872,6 +2939,7 @@
 int java_lang_reflect_Method::annotations_offset;
 int java_lang_reflect_Method::parameter_annotations_offset;
 int java_lang_reflect_Method::annotation_default_offset;
+int java_lang_reflect_Method::type_annotations_offset;
 int java_lang_reflect_Constructor::clazz_offset;
 int java_lang_reflect_Constructor::parameterTypes_offset;
 int java_lang_reflect_Constructor::exceptionTypes_offset;
@@ -2880,6 +2948,7 @@
 int java_lang_reflect_Constructor::signature_offset;
 int java_lang_reflect_Constructor::annotations_offset;
 int java_lang_reflect_Constructor::parameter_annotations_offset;
+int java_lang_reflect_Constructor::type_annotations_offset;
 int java_lang_reflect_Field::clazz_offset;
 int java_lang_reflect_Field::name_offset;
 int java_lang_reflect_Field::type_offset;
@@ -2887,6 +2956,11 @@
 int java_lang_reflect_Field::modifiers_offset;
 int java_lang_reflect_Field::signature_offset;
 int java_lang_reflect_Field::annotations_offset;
+int java_lang_reflect_Field::type_annotations_offset;
+int java_lang_reflect_Parameter::name_offset;
+int java_lang_reflect_Parameter::modifiers_offset;
+int java_lang_reflect_Parameter::index_offset;
+int java_lang_reflect_Parameter::executable_offset;
 int java_lang_boxing_object::value_offset;
 int java_lang_boxing_object::long_value_offset;
 int java_lang_ref_Reference::referent_offset;
@@ -3071,6 +3145,8 @@
     sun_reflect_ConstantPool::compute_offsets();
     sun_reflect_UnsafeStaticFieldAccessorImpl::compute_offsets();
   }
+  if (JDK_Version::is_jdk18x_version())
+    java_lang_reflect_Parameter::compute_offsets();
 
   // generated interpreter code wants to know about the offsets we just computed:
   AbstractAssembler::update_delayed_values();
@@ -3196,7 +3272,6 @@
   // Fake fields
   // CHECK_OFFSET("java/lang/Class", java_lang_Class, klass); // %%% this needs to be checked
   // CHECK_OFFSET("java/lang/Class", java_lang_Class, array_klass); // %%% this needs to be checked
-  // CHECK_OFFSET("java/lang/Class", java_lang_Class, resolved_constructor); // %%% this needs to be checked
 
   // java.lang.Throwable
 
--- a/src/share/vm/classfile/javaClasses.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/classfile/javaClasses.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -154,6 +154,8 @@
   static char*  as_utf8_string(oop java_string, int start, int len);
   static char*  as_platform_dependent_str(Handle java_string, TRAPS);
   static jchar* as_unicode_string(oop java_string, int& length);
+  // produce an ascii string with all other values quoted using \u####
+  static char*  as_quoted_ascii(oop java_string);
 
   // Compute the hash value for a java.lang.String object which would
   // contain the characters passed in.
@@ -164,8 +166,8 @@
   // objects in the shared archive file.
   // hash P(31) from Kernighan & Ritchie
   //
-  // For this reason, THIS ALGORITHM MUST MATCH String.toHash().
-  template <typename T> static unsigned int to_hash(T* s, int len) {
+  // For this reason, THIS ALGORITHM MUST MATCH String.hashCode().
+  template <typename T> static unsigned int hash_code(T* s, int len) {
     unsigned int h = 0;
     while (len-- > 0) {
       h = 31*h + (unsigned int) *s;
@@ -173,10 +175,10 @@
     }
     return h;
   }
-  static unsigned int to_hash(oop java_string);
+  static unsigned int hash_code(oop java_string);
 
   // This is the string hash code used by the StringTable, which may be
-  // the same as String.toHash or an alternate hash code.
+  // the same as String.hashCode or an alternate hash code.
   static unsigned int hash_string(oop java_string);
 
   static bool equals(oop java_string, jchar* chars, int len);
@@ -204,7 +206,6 @@
 
 #define CLASS_INJECTED_FIELDS(macro)                                       \
   macro(java_lang_Class, klass,                  intptr_signature,  false) \
-  macro(java_lang_Class, resolved_constructor,   intptr_signature,  false) \
   macro(java_lang_Class, array_klass,            intptr_signature,  false) \
   macro(java_lang_Class, oop_size,               int_signature,     false) \
   macro(java_lang_Class, static_oop_field_count, int_signature,     false)
@@ -216,7 +217,6 @@
   // The fake offsets are added by the class loader when java.lang.Class is loaded
 
   static int _klass_offset;
-  static int _resolved_constructor_offset;
   static int _array_klass_offset;
 
   static int _oop_size_offset;
@@ -252,15 +252,11 @@
   static bool is_primitive(oop java_class);
   static BasicType primitive_type(oop java_class);
   static oop primitive_mirror(BasicType t);
-  // JVM_NewInstance support
-  static Method* resolved_constructor(oop java_class);
-  static void set_resolved_constructor(oop java_class, Method* constructor);
   // JVM_NewArray support
   static Klass* array_klass(oop java_class);
   static void set_array_klass(oop java_class, Klass* klass);
   // compiler support for class operations
   static int klass_offset_in_bytes()                { return _klass_offset; }
-  static int resolved_constructor_offset_in_bytes() { return _resolved_constructor_offset; }
   static int array_klass_offset_in_bytes()          { return _array_klass_offset; }
   // Support for classRedefinedCount field
   static int classRedefinedCount(oop the_class_mirror);
@@ -467,8 +463,7 @@
   static int static_unassigned_stacktrace_offset;
 
   // Printing
-  static char* print_stack_element_to_buffer(Method* method, int bci);
-  static void print_to_stream(Handle stream, const char* str);
+  static char* print_stack_element_to_buffer(Handle mirror, int method, int version, int bci);
   // StackTrace (programmatic access, new since 1.4)
   static void clear_stacktrace(oop throwable);
   // No stack trace available
@@ -488,12 +483,9 @@
   static oop message(oop throwable);
   static oop message(Handle throwable);
   static void set_message(oop throwable, oop value);
-  // Print stack trace stored in exception by call-back to Java
-  // Note: this is no longer used in Merlin, but we still suppport
-  // it for compatibility.
-  static void print_stack_trace(oop throwable, oop print_stream);
-  static void print_stack_element(Handle stream, Method* method, int bci);
-  static void print_stack_element(outputStream *st, Method* method, int bci);
+  static void print_stack_element(outputStream *st, Handle mirror, int method,
+                                  int version, int bci);
+  static void print_stack_element(outputStream *st, methodHandle method, int bci);
   static void print_stack_usage(Handle stream);
 
   // Allocate space for backtrace (created but stack trace not filled in)
@@ -552,6 +544,7 @@
   static int annotations_offset;
   static int parameter_annotations_offset;
   static int annotation_default_offset;
+  static int type_annotations_offset;
 
   static void compute_offsets();
 
@@ -597,6 +590,10 @@
   static oop annotation_default(oop method);
   static void set_annotation_default(oop method, oop value);
 
+  static bool has_type_annotations_field();
+  static oop type_annotations(oop method);
+  static void set_type_annotations(oop method, oop value);
+
   // Debugging
   friend class JavaClasses;
 };
@@ -616,6 +613,7 @@
   static int signature_offset;
   static int annotations_offset;
   static int parameter_annotations_offset;
+  static int type_annotations_offset;
 
   static void compute_offsets();
 
@@ -651,6 +649,10 @@
   static oop parameter_annotations(oop method);
   static void set_parameter_annotations(oop method, oop value);
 
+  static bool has_type_annotations_field();
+  static oop type_annotations(oop constructor);
+  static void set_type_annotations(oop constructor, oop value);
+
   // Debugging
   friend class JavaClasses;
 };
@@ -669,6 +671,7 @@
   static int modifiers_offset;
   static int signature_offset;
   static int annotations_offset;
+  static int type_annotations_offset;
 
   static void compute_offsets();
 
@@ -708,10 +711,45 @@
   static oop annotation_default(oop method);
   static void set_annotation_default(oop method, oop value);
 
+  static bool has_type_annotations_field();
+  static oop type_annotations(oop field);
+  static void set_type_annotations(oop field, oop value);
+
   // Debugging
   friend class JavaClasses;
 };
 
+class java_lang_reflect_Parameter {
+ private:
+  // Note that to reduce dependencies on the JDK we compute these
+  // offsets at run-time.
+  static int name_offset;
+  static int modifiers_offset;
+  static int index_offset;
+  static int executable_offset;
+
+  static void compute_offsets();
+
+ public:
+  // Allocation
+  static Handle create(TRAPS);
+
+  // Accessors
+  static oop name(oop field);
+  static void set_name(oop field, oop value);
+
+  static int index(oop reflect);
+  static void set_index(oop reflect, int value);
+
+  static int modifiers(oop reflect);
+  static void set_modifiers(oop reflect, int value);
+
+  static oop executable(oop constructor);
+  static void set_executable(oop constructor, oop value);
+
+  friend class JavaClasses;
+};
+
 // Interface to sun.reflect.ConstantPool objects
 class sun_reflect_ConstantPool {
  private:
@@ -912,7 +950,7 @@
 
   // Testers
   static bool is_subclass(Klass* klass) {
-    return Klass::cast(klass)->is_subclass_of(SystemDictionary::MethodHandle_klass());
+    return klass->is_subclass_of(SystemDictionary::MethodHandle_klass());
   }
   static bool is_instance(oop obj) {
     return obj != NULL && is_subclass(obj->klass());
@@ -942,7 +980,7 @@
   // Testers
   static bool is_subclass(Klass* klass) {
     return SystemDictionary::LambdaForm_klass() != NULL &&
-      Klass::cast(klass)->is_subclass_of(SystemDictionary::LambdaForm_klass());
+      klass->is_subclass_of(SystemDictionary::LambdaForm_klass());
   }
   static bool is_instance(oop obj) {
     return obj != NULL && is_subclass(obj->klass());
@@ -1004,7 +1042,7 @@
 
   // Testers
   static bool is_subclass(Klass* klass) {
-    return Klass::cast(klass)->is_subclass_of(SystemDictionary::MemberName_klass());
+    return klass->is_subclass_of(SystemDictionary::MemberName_klass());
   }
   static bool is_instance(oop obj) {
     return obj != NULL && is_subclass(obj->klass());
@@ -1090,7 +1128,7 @@
 
   // Testers
   static bool is_subclass(Klass* klass) {
-    return Klass::cast(klass)->is_subclass_of(SystemDictionary::CallSite_klass());
+    return klass->is_subclass_of(SystemDictionary::CallSite_klass());
   }
   static bool is_instance(oop obj) {
     return obj != NULL && is_subclass(obj->klass());
@@ -1123,8 +1161,7 @@
 // Interface to java.lang.ClassLoader objects
 
 #define CLASSLOADER_INJECTED_FIELDS(macro)                            \
-  macro(java_lang_ClassLoader, loader_data,  intptr_signature, false) \
-  macro(java_lang_ClassLoader, dependencies, object_signature, false)
+  macro(java_lang_ClassLoader, loader_data,  intptr_signature, false)
 
 class java_lang_ClassLoader : AllStatic {
  private:
@@ -1133,7 +1170,6 @@
    hc_parent_offset = 0
   };
   static int _loader_data_offset;
-  static int _dependencies_offset;
   static bool offsets_computed;
   static int parent_offset;
   static int parallelCapable_offset;
@@ -1144,9 +1180,6 @@
   static ClassLoaderData** loader_data_addr(oop loader);
   static ClassLoaderData* loader_data(oop loader);
 
-  static oop  dependencies(oop loader);
-  static HeapWord* dependencies_addr(oop loader);
-
   static oop parent(oop loader);
   static bool isAncestor(oop loader, oop cl);
 
@@ -1160,7 +1193,7 @@
 
   // Testers
   static bool is_subclass(Klass* klass) {
-    return Klass::cast(klass)->is_subclass_of(SystemDictionary::ClassLoader_klass());
+    return klass->is_subclass_of(SystemDictionary::ClassLoader_klass());
   }
   static bool is_instance(oop obj) {
     return obj != NULL && is_subclass(obj->klass());
@@ -1220,7 +1253,8 @@
   static void set_lineNumber(oop element, int value);
 
   // Create an instance of StackTraceElement
-  static oop create(methodHandle m, int bci, TRAPS);
+  static oop create(Handle mirror, int method, int version, int bci, TRAPS);
+  static oop create(methodHandle method, int bci, TRAPS);
 
   // Debugging
   friend class JavaClasses;
--- a/src/share/vm/classfile/loaderConstraints.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/classfile/loaderConstraints.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -118,7 +118,7 @@
                      probe->name()->as_C_string());
           for (int i = 0; i < probe->num_loaders(); i++) {
             tty->print_cr("[   [%d]: %s", i,
-                          SystemDictionary::loader_name(probe->loader_data(i)));
+                          probe->loader_data(i)->loader_name());
           }
         }
       }
@@ -129,7 +129,7 @@
             if (TraceLoaderConstraints) {
               ResourceMark rm;
               tty->print_cr("[Purging loader %s from constraint for name %s",
-                          SystemDictionary::loader_name(probe->loader_data(n)),
+                            probe->loader_data(n)->loader_name(),
                             probe->name()->as_C_string()
                             );
             }
@@ -145,7 +145,7 @@
               tty->print_cr("[New loader list:");
               for (int i = 0; i < probe->num_loaders(); i++) {
                 tty->print_cr("[   [%d]: %s", i,
-                            SystemDictionary::loader_name(probe->loader_data(i)));
+                              probe->loader_data(i)->loader_name());
               }
             }
 
@@ -320,7 +320,7 @@
                                                        Handle loader) {
   LoaderConstraintEntry *p = *(find_loader_constraint(name, loader));
   if (p != NULL && p->klass() != NULL) {
-    if (Klass::cast(p->klass())->oop_is_instance() && !InstanceKlass::cast(p->klass())->is_loaded()) {
+    if (p->klass()->oop_is_instance() && !InstanceKlass::cast(p->klass())->is_loaded()) {
       // Only return fully loaded classes.  Classes found through the
       // constraints might still be in the process of loading.
       return NULL;
@@ -400,7 +400,7 @@
 
     for (int i = 0; i < p1->num_loaders(); i++) {
       tty->print_cr("[   [%d]: %s", i,
-                    SystemDictionary::loader_name(p1->loader_data(i)));
+                    p1->loader_data(i)->loader_name());
     }
     if (p1->klass() == NULL) {
       tty->print_cr("[... and setting class object]");
--- a/src/share/vm/classfile/placeholders.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/classfile/placeholders.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -45,7 +45,7 @@
   entry->set_loadInstanceThreadQ(NULL);
   entry->set_defineThreadQ(NULL);
   entry->set_definer(NULL);
-  entry->set_instanceKlass(NULL);
+  entry->set_instance_klass(NULL);
   return entry;
 }
 
@@ -142,7 +142,7 @@
 }
 
 
-// placeholder used to track class loading internal states
+// placeholder is used to track class loading internal states
 // placeholder existence now for loading superclass/superinterface
 // superthreadQ tracks class circularity, while loading superclass/superinterface
 // loadInstanceThreadQ tracks load_instance_class calls
@@ -153,15 +153,17 @@
 // All claimants remove SeenThread after completing action
 // On removal: if definer and all queues empty, remove entry
 // Note: you can be in both placeholders and systemDictionary
-// see parse_stream for redefine classes
 // Therefore - must always check SD first
 // Ignores the case where entry is not found
 void PlaceholderTable::find_and_remove(int index, unsigned int hash,
-                       Symbol* name, ClassLoaderData* loader_data, Thread* thread) {
+                                       Symbol* name, ClassLoaderData* loader_data,
+                                       classloadAction action,
+                                       Thread* thread) {
     assert_locked_or_safepoint(SystemDictionary_lock);
     PlaceholderEntry *probe = get_entry(index, hash, name, loader_data);
     if (probe != NULL) {
-       // No other threads using this entry
+       probe->remove_seen_thread(thread, action);
+       // If no other threads using this entry, and this thread is not using this entry for other states
        if ((probe->superThreadQ() == NULL) && (probe->loadInstanceThreadQ() == NULL)
           && (probe->defineThreadQ() == NULL) && (probe->definer() == NULL)) {
          remove_entry(index, hash, name, loader_data);
@@ -188,7 +190,7 @@
 void PlaceholderEntry::classes_do(KlassClosure* closure) {
   assert(klassname() != NULL, "should have a non-null klass");
   if (_instanceKlass != NULL) {
-    closure->do_klass(InstanceKlass());
+    closure->do_klass(instance_klass());
   }
 }
 
@@ -220,9 +222,9 @@
     tty->print(", definer ");
     definer()->print_value();
   }
-  if (InstanceKlass() != NULL) {
+  if (instance_klass() != NULL) {
     tty->print(", InstanceKlass ");
-    InstanceKlass()->print_value();
+    instance_klass()->print_value();
   }
   tty->print("\n");
   tty->print("loadInstanceThreadQ threads:");
@@ -241,9 +243,9 @@
   guarantee(loader_data() != NULL, "Must have been setup.");
   guarantee(loader_data()->class_loader() == NULL || loader_data()->class_loader()->is_instance(),
             "checking type of _loader");
-  guarantee(InstanceKlass() == NULL
-            || Klass::cast(InstanceKlass())->oop_is_instance(),
-            "checking type of InstanceKlass result");
+  guarantee(instance_klass() == NULL
+            || instance_klass()->oop_is_instance(),
+            "checking type of instance_klass result");
 }
 
 void PlaceholderTable::verify() {
--- a/src/share/vm/classfile/placeholders.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/classfile/placeholders.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -82,7 +82,7 @@
  };
 
   // find_and_add returns probe pointer - old or new
-  // If no entry exists, add a placeholder entry and push SeenThread
+  // If no entry exists, add a placeholder entry and push SeenThread for classloadAction
   // If entry exists, reuse entry and push SeenThread for classloadAction
   PlaceholderEntry* find_and_add(int index, unsigned int hash,
                                  Symbol* name, ClassLoaderData* loader_data,
@@ -92,9 +92,11 @@
   void remove_entry(int index, unsigned int hash,
                     Symbol* name, ClassLoaderData* loader_data);
 
-// Remove placeholder information
+  // find_and_remove first removes SeenThread for classloadAction
+  // If all queues are empty and definer is null, remove the PlacheholderEntry completely
   void find_and_remove(int index, unsigned int hash,
-                       Symbol* name, ClassLoaderData* loader_data, Thread* thread);
+                       Symbol* name, ClassLoaderData* loader_data,
+                       classloadAction action, Thread* thread);
 
   // GC support.
   void classes_do(KlassClosure* f);
@@ -191,8 +193,8 @@
   Thread*            definer()             const {return _definer; }
   void               set_definer(Thread* definer) { _definer = definer; }
 
-  Klass*             InstanceKlass()     const {return _instanceKlass; }
-  void               set_instanceKlass(Klass* InstanceKlass) { _instanceKlass = InstanceKlass; }
+  Klass*             instance_klass()      const {return _instanceKlass; }
+  void               set_instance_klass(Klass* ik) { _instanceKlass = ik; }
 
   SeenThread*        superThreadQ()        const { return _superThreadQ; }
   void               set_superThreadQ(SeenThread* SeenThread) { _superThreadQ = SeenThread; }
--- a/src/share/vm/classfile/symbolTable.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/classfile/symbolTable.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -179,7 +179,7 @@
 unsigned int SymbolTable::hash_symbol(const char* s, int len) {
   return use_alternate_hashcode() ?
            AltHashing::murmur3_32(seed(), (const jbyte*)s, len) :
-           java_lang_String::to_hash(s, len);
+           java_lang_String::hash_code(s, len);
 }
 
 
@@ -617,7 +617,7 @@
 // Pick hashing algorithm
 unsigned int StringTable::hash_string(const jchar* s, int len) {
   return use_alternate_hashcode() ? AltHashing::murmur3_32(seed(), s, len) :
-                                    java_lang_String::to_hash(s, len);
+                                    java_lang_String::hash_code(s, len);
 }
 
 oop StringTable::lookup(int index, jchar* name,
--- a/src/share/vm/classfile/symbolTable.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/classfile/symbolTable.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -262,19 +262,14 @@
   // The string table
   static StringTable* the_table() { return _the_table; }
 
+  // Size of one bucket in the string table.  Used when checking for rollover.
+  static uint bucket_size() { return sizeof(HashtableBucket<mtSymbol>); }
+
   static void create_table() {
     assert(_the_table == NULL, "One string table allowed.");
     _the_table = new StringTable();
   }
 
-  static void create_table(HashtableBucket<mtSymbol>* t, int length,
-                           int number_of_entries) {
-    assert(_the_table == NULL, "One string table allowed.");
-    assert((size_t)length == StringTableSize * sizeof(HashtableBucket<mtSymbol>),
-           "bad shared string size.");
-    _the_table = new StringTable(t, number_of_entries);
-  }
-
   // GC support
   //   Delete pointers to otherwise-unreachable objects.
   static void unlink(BoolObjectClosure* cl);
--- a/src/share/vm/classfile/systemDictionary.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/classfile/systemDictionary.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -106,9 +106,9 @@
 }
 
 
-ClassLoaderData* SystemDictionary::register_loader(Handle class_loader) {
+ClassLoaderData* SystemDictionary::register_loader(Handle class_loader, TRAPS) {
   if (class_loader() == NULL) return ClassLoaderData::the_null_class_loader_data();
-  return ClassLoaderDataGraph::find_or_create(class_loader);
+  return ClassLoaderDataGraph::find_or_create(class_loader, CHECK_NULL);
 }
 
 // ----------------------------------------------------------------------------
@@ -172,7 +172,7 @@
       assert(klass_h() == NULL, "Should not have result with exception pending");
       Handle e(THREAD, PENDING_EXCEPTION);
       CLEAR_PENDING_EXCEPTION;
-      THROW_MSG_CAUSE_0(vmSymbols::java_lang_NoClassDefFoundError(), class_name->as_C_string(), e);
+      THROW_MSG_CAUSE_NULL(vmSymbols::java_lang_NoClassDefFoundError(), class_name->as_C_string(), e);
     } else {
       return NULL;
     }
@@ -181,9 +181,9 @@
   if (klass_h() == NULL) {
     ResourceMark rm(THREAD);
     if (throw_error) {
-      THROW_MSG_0(vmSymbols::java_lang_NoClassDefFoundError(), class_name->as_C_string());
+      THROW_MSG_NULL(vmSymbols::java_lang_NoClassDefFoundError(), class_name->as_C_string());
     } else {
-      THROW_MSG_0(vmSymbols::java_lang_ClassNotFoundException(), class_name->as_C_string());
+      THROW_MSG_NULL(vmSymbols::java_lang_ClassNotFoundException(), class_name->as_C_string());
     }
   }
   return (Klass*)klass_h();
@@ -240,7 +240,7 @@
                                                          protection_domain,
                                                          CHECK_NULL);
     if (k != NULL) {
-      k = Klass::cast(k)->array_klass(fd.dimension(), CHECK_NULL);
+      k = k->array_klass(fd.dimension(), CHECK_NULL);
     }
   } else {
     k = Universe::typeArrayKlassObj(t);
@@ -328,8 +328,8 @@
     if ((childk != NULL ) && (is_superclass) &&
        ((quicksuperk = InstanceKlass::cast(childk)->super()) != NULL) &&
 
-         ((Klass::cast(quicksuperk)->name() == class_name) &&
-            (Klass::cast(quicksuperk)->class_loader()  == class_loader()))) {
+         ((quicksuperk->name() == class_name) &&
+            (quicksuperk->class_loader()  == class_loader()))) {
            return quicksuperk;
     } else {
       PlaceholderEntry* probe = placeholders()->get_entry(p_index, p_hash, child_name, loader_data);
@@ -343,29 +343,29 @@
   }
   if (throw_circularity_error) {
       ResourceMark rm(THREAD);
-      THROW_MSG_0(vmSymbols::java_lang_ClassCircularityError(), child_name->as_C_string());
+      THROW_MSG_NULL(vmSymbols::java_lang_ClassCircularityError(), child_name->as_C_string());
   }
 
 // java.lang.Object should have been found above
   assert(class_name != NULL, "null super class for resolving");
   // Resolve the super class or interface, check results on return
-  Klass* superk = NULL;
-  superk = SystemDictionary::resolve_or_null(class_name,
+  Klass* superk = SystemDictionary::resolve_or_null(class_name,
                                                  class_loader,
                                                  protection_domain,
                                                  THREAD);
 
   KlassHandle superk_h(THREAD, superk);
 
-  // Note: clean up of placeholders currently in callers of
-  // resolve_super_or_fail - either at update_dictionary time
-  // or on error
+  // Clean up of placeholders moved so that each classloadAction registrar self-cleans up
+  // It is no longer necessary to keep the placeholder table alive until update_dictionary
+  // or error. GC used to walk the placeholder table as strong roots.
+  // The instanceKlass is kept alive because the class loader is on the stack,
+  // which keeps the loader_data alive, as well as all instanceKlasses in
+  // the loader_data. parseClassFile adds the instanceKlass to loader_data.
   {
-  MutexLocker mu(SystemDictionary_lock, THREAD);
-   PlaceholderEntry* probe = placeholders()->get_entry(p_index, p_hash, child_name, loader_data);
-   if (probe != NULL) {
-      probe->remove_seen_thread(THREAD, PlaceholderTable::LOAD_SUPER);
-   }
+    MutexLocker mu(SystemDictionary_lock, THREAD);
+    placeholders()->find_and_remove(p_index, p_hash, child_name, loader_data, PlaceholderTable::LOAD_SUPER, THREAD);
+    SystemDictionary_lock->notify_all();
   }
   if (HAS_PENDING_EXCEPTION || superk_h() == NULL) {
     // can null superk
@@ -430,8 +430,8 @@
 
       // We're using a No_Safepoint_Verifier to catch any place where we
       // might potentially do a GC at all.
-      // SystemDictionary::do_unloading() asserts that classes are only
-      // unloaded at a safepoint.
+      // Dictionary::do_unloading() asserts that classes in SD are only
+      // unloaded at a safepoint. Anonymous classes are not in SD.
       No_Safepoint_Verifier nosafepoint;
       dictionary()->add_protection_domain(d_index, d_hash, klass, loader_data,
                                           protection_domain, THREAD);
@@ -486,7 +486,6 @@
 // super class loading here.
 // This also is critical in cases where the original thread gets stalled
 // even in non-circularity situations.
-// Note: only one thread can define the class, but multiple can resolve
 // Note: must call resolve_super_or_fail even if null super -
 // to force placeholder entry creation for this class for circularity detection
 // Caller must check for pending exception
@@ -518,14 +517,6 @@
                                                           protection_domain,
                                                           true,
                                                           CHECK_(nh));
-  // We don't redefine the class, so we just need to clean up if there
-  // was not an error (don't want to modify any system dictionary
-  // data structures).
-  {
-    MutexLocker mu(SystemDictionary_lock, THREAD);
-    placeholders()->find_and_remove(p_index, p_hash, name, loader_data, THREAD);
-    SystemDictionary_lock->notify_all();
-  }
 
   // parallelCapable class loaders do NOT wait for parallel superclass loads to complete
   // Serial class loaders and bootstrap classloader do wait for superclass loads
@@ -591,10 +582,14 @@
   // UseNewReflection
   // Fix for 4474172; see evaluation for more details
   class_loader = Handle(THREAD, java_lang_ClassLoader::non_reflection_class_loader(class_loader()));
-  ClassLoaderData *loader_data = register_loader(class_loader);
+  ClassLoaderData *loader_data = register_loader(class_loader, CHECK_NULL);
 
   // Do lookup to see if class already exist and the protection domain
   // has the right access
+  // This call uses find which checks protection domain already matches
+  // All subsequent calls use find_class, and set has_loaded_class so that
+  // before we return a result we call out to java to check for valid protection domain
+  // to allow returning the Klass* and add it to the pd_set if it is valid
   unsigned int d_hash = dictionary()->compute_hash(name, loader_data);
   int d_index = dictionary()->hash_to_index(d_hash);
   Klass* probe = dictionary()->find(d_index, d_hash, name, loader_data,
@@ -652,7 +647,7 @@
     }
   }
 
-  // If the class in is in the placeholder table, class loading is in progress
+  // If the class is in the placeholder table, class loading is in progress
   if (super_load_in_progress && havesupername==true) {
     k = SystemDictionary::handle_parallel_super_load(name, superclassname,
         class_loader, protection_domain, lockObject, THREAD);
@@ -664,7 +659,9 @@
     }
   }
 
+  bool throw_circularity_error = false;
   if (!class_has_been_loaded) {
+    bool load_instance_added = false;
 
     // add placeholder entry to record loading instance class
     // Five cases:
@@ -690,7 +687,7 @@
     //    No performance benefit and no deadlock issues.
     // case 5. parallelCapable user level classloaders - without objectLocker
     //    Allow parallel classloading of a class/classloader pair
-    bool throw_circularity_error = false;
+
     {
       MutexLocker mu(SystemDictionary_lock, THREAD);
       if (class_loader.is_null() || !is_parallelCapable(class_loader)) {
@@ -726,12 +723,13 @@
           }
         }
       }
-      // All cases: add LOAD_INSTANCE
+      // All cases: add LOAD_INSTANCE holding SystemDictionary_lock
       // case 3: UnsyncloadClass || case 5: parallelCapable: allow competing threads to try
       // LOAD_INSTANCE in parallel
-      // add placeholder entry even if error - callers will remove on error
+
       if (!throw_circularity_error && !class_has_been_loaded) {
         PlaceholderEntry* newprobe = placeholders()->find_and_add(p_index, p_hash, name, loader_data, PlaceholderTable::LOAD_INSTANCE, NULL, THREAD);
+        load_instance_added = true;
         // For class loaders that do not acquire the classloader object lock,
         // if they did not catch another thread holding LOAD_INSTANCE,
         // need a check analogous to the acquire ObjectLocker/find_class
@@ -740,19 +738,18 @@
         // class loaders holding the ObjectLock shouldn't find the class here
         Klass* check = find_class(d_index, d_hash, name, loader_data);
         if (check != NULL) {
-        // Klass is already loaded, so just return it
+        // Klass is already loaded, so return it after checking/adding protection domain
           k = instanceKlassHandle(THREAD, check);
           class_has_been_loaded = true;
-          newprobe->remove_seen_thread(THREAD, PlaceholderTable::LOAD_INSTANCE);
-          placeholders()->find_and_remove(p_index, p_hash, name, loader_data, THREAD);
-          SystemDictionary_lock->notify_all();
         }
       }
     }
+
     // must throw error outside of owning lock
     if (throw_circularity_error) {
+      assert(!HAS_PENDING_EXCEPTION && load_instance_added == false,"circularity error cleanup");
       ResourceMark rm(THREAD);
-      THROW_MSG_0(vmSymbols::java_lang_ClassCircularityError(), name->as_C_string());
+      THROW_MSG_NULL(vmSymbols::java_lang_ClassCircularityError(), name->as_C_string());
     }
 
     if (!class_has_been_loaded) {
@@ -782,20 +779,6 @@
         }
       }
 
-      // clean up placeholder entries for success or error
-      // This cleans up LOAD_INSTANCE entries
-      // It also cleans up LOAD_SUPER entries on errors from
-      // calling load_instance_class
-      {
-        MutexLocker mu(SystemDictionary_lock, THREAD);
-        PlaceholderEntry* probe = placeholders()->get_entry(p_index, p_hash, name, loader_data);
-        if (probe != NULL) {
-          probe->remove_seen_thread(THREAD, PlaceholderTable::LOAD_INSTANCE);
-          placeholders()->find_and_remove(p_index, p_hash, name, loader_data, THREAD);
-          SystemDictionary_lock->notify_all();
-        }
-      }
-
       // If everything was OK (no exceptions, no null return value), and
       // class_loader is NOT the defining loader, do a little more bookkeeping.
       if (!HAS_PENDING_EXCEPTION && !k.is_null() &&
@@ -819,18 +802,22 @@
           }
         }
       }
-      if (HAS_PENDING_EXCEPTION || k.is_null()) {
-        // On error, clean up placeholders
-        {
-          MutexLocker mu(SystemDictionary_lock, THREAD);
-          placeholders()->find_and_remove(p_index, p_hash, name, loader_data, THREAD);
-          SystemDictionary_lock->notify_all();
-        }
-        return NULL;
-      }
+    } // load_instance_class loop
+
+    if (load_instance_added == true) {
+      // clean up placeholder entries for LOAD_INSTANCE success or error
+      // This brackets the SystemDictionary updates for both defining
+      // and initiating loaders
+      MutexLocker mu(SystemDictionary_lock, THREAD);
+        placeholders()->find_and_remove(p_index, p_hash, name, loader_data, PlaceholderTable::LOAD_INSTANCE, THREAD);
+        SystemDictionary_lock->notify_all();
     }
   }
 
+  if (HAS_PENDING_EXCEPTION || k.is_null()) {
+    return NULL;
+  }
+
 #ifdef ASSERT
   {
     ClassLoaderData* loader_data = k->class_loader_data();
@@ -850,8 +837,8 @@
     // so we cannot allow GC to occur while we're holding this entry.
     // We're using a No_Safepoint_Verifier to catch any place where we
     // might potentially do a GC at all.
-    // SystemDictionary::do_unloading() asserts that classes are only
-    // unloaded at a safepoint.
+    // Dictionary::do_unloading() asserts that classes in SD are only
+    // unloaded at a safepoint. Anonymous classes are not in SD.
     No_Safepoint_Verifier nosafepoint;
     if (dictionary()->is_valid_protection_domain(d_index, d_hash, name,
                                                  loader_data,
@@ -888,7 +875,7 @@
   // of the call to resolve_instance_class_or_null().
   // See evaluation 6790209 and 4474172 for more details.
   class_loader = Handle(THREAD, java_lang_ClassLoader::non_reflection_class_loader(class_loader()));
-  ClassLoaderData* loader_data = register_loader(class_loader);
+  ClassLoaderData* loader_data = register_loader(class_loader, CHECK_NULL);
 
   unsigned int d_hash = dictionary()->compute_hash(class_name, loader_data);
   int d_index = dictionary()->hash_to_index(d_hash);
@@ -898,8 +885,8 @@
     // so we cannot allow GC to occur while we're holding this entry.
     // We're using a No_Safepoint_Verifier to catch any place where we
     // might potentially do a GC at all.
-    // SystemDictionary::do_unloading() asserts that classes are only
-    // unloaded at a safepoint.
+    // Dictionary::do_unloading() asserts that classes in SD are only
+    // unloaded at a safepoint. Anonymous classes are not in SD.
     No_Safepoint_Verifier nosafepoint;
     return dictionary()->find(d_index, d_hash, class_name, loader_data,
                               protection_domain, THREAD);
@@ -928,7 +915,7 @@
       k = SystemDictionary::find(fd.object_key(), class_loader, protection_domain, THREAD);
     }
     if (k != NULL) {
-      k = Klass::cast(k)->array_klass_or_null(fd.dimension());
+      k = k->array_klass_or_null(fd.dimension());
     }
   } else {
     k = find(class_name, class_loader, protection_domain, THREAD);
@@ -948,18 +935,26 @@
                                       TRAPS) {
   TempNewSymbol parsed_name = NULL;
 
+  ClassLoaderData* loader_data;
+  if (host_klass.not_null()) {
+    // Create a new CLD for anonymous class, that uses the same class loader
+    // as the host_klass
+    assert(EnableInvokeDynamic, "");
+    guarantee(host_klass->class_loader() == class_loader(), "should be the same");
+    loader_data = ClassLoaderData::anonymous_class_loader_data(class_loader(), CHECK_NULL);
+    loader_data->record_dependency(host_klass(), CHECK_NULL);
+  } else {
+    loader_data = ClassLoaderData::class_loader_data(class_loader());
+  }
+
   // Parse the stream. Note that we do this even though this klass might
   // already be present in the SystemDictionary, otherwise we would not
   // throw potential ClassFormatErrors.
   //
   // Note: "name" is updated.
-  // Further note:  a placeholder will be added for this class when
-  //   super classes are loaded (resolve_super_or_fail). We expect this
-  //   to be called for all classes but java.lang.Object; and we preload
-  //   java.lang.Object through resolve_or_fail, not this path.
 
   instanceKlassHandle k = ClassFileParser(st).parseClassFile(class_name,
-                                                             class_loader,
+                                                             loader_data,
                                                              protection_domain,
                                                              host_klass,
                                                              cp_patches,
@@ -967,29 +962,11 @@
                                                              true,
                                                              THREAD);
 
-  // We don't redefine the class, so we just need to clean up whether there
-  // was an error or not (don't want to modify any system dictionary
-  // data structures).
-  // Parsed name could be null if we threw an error before we got far
-  // enough along to parse it -- in that case, there is nothing to clean up.
-  if (parsed_name != NULL) {
-    ClassLoaderData* loader_data = class_loader_data(class_loader);
-
-    unsigned int p_hash = placeholders()->compute_hash(parsed_name,
-                                                       loader_data);
-    int p_index = placeholders()->hash_to_index(p_hash);
-    {
-    MutexLocker mu(SystemDictionary_lock, THREAD);
-    placeholders()->find_and_remove(p_index, p_hash, parsed_name, loader_data, THREAD);
-    SystemDictionary_lock->notify_all();
-    }
-  }
 
   if (host_klass.not_null() && k.not_null()) {
     assert(EnableInvokeDynamic, "");
+    k->set_host_klass(host_klass());
     // If it's anonymous, initialize it now, since nobody else will.
-    k->class_loader_data()->record_dependency(host_klass(), CHECK_NULL);
-    k->set_host_klass(host_klass());
 
     {
       MutexLocker mu_r(Compile_lock, THREAD);
@@ -1002,11 +979,11 @@
     }
 
     // Rewrite and patch constant pool here.
-    k->link_class(THREAD);
+    k->link_class(CHECK_NULL);
     if (cp_patches != NULL) {
       k->constants()->patch_resolved_references(cp_patches);
     }
-    k->eager_initialize(THREAD);
+    k->eager_initialize(CHECK_NULL);
 
     // notify jvmti
     if (JvmtiExport::should_post_class_load()) {
@@ -1039,7 +1016,7 @@
     DoObjectLock = false;
   }
 
-  ClassLoaderData* loader_data = register_loader(class_loader);
+  ClassLoaderData* loader_data = register_loader(class_loader, CHECK_NULL);
 
   // Make sure we are synchronized on the class loader before we proceed
   Handle lockObject = compute_loader_lock_object(class_loader, THREAD);
@@ -1053,13 +1030,9 @@
   // throw potential ClassFormatErrors.
   //
   // Note: "name" is updated.
-  // Further note:  a placeholder will be added for this class when
-  //   super classes are loaded (resolve_super_or_fail). We expect this
-  //   to be called for all classes but java.lang.Object; and we preload
-  //   java.lang.Object through resolve_or_fail, not this path.
 
   instanceKlassHandle k = ClassFileParser(st).parseClassFile(class_name,
-                                                             class_loader,
+                                                             loader_data,
                                                              protection_domain,
                                                              parsed_name,
                                                              verify,
@@ -1105,25 +1078,7 @@
     }
   }
 
-  // If parsing the class file or define_instance_class failed, we
-  // need to remove the placeholder added on our behalf. But we
-  // must make sure parsed_name is valid first (it won't be if we had
-  // a format error before the class was parsed far enough to
-  // find the name).
-  if (HAS_PENDING_EXCEPTION && parsed_name != NULL) {
-    unsigned int p_hash = placeholders()->compute_hash(parsed_name,
-                                                       loader_data);
-    int p_index = placeholders()->hash_to_index(p_hash);
-    {
-    MutexLocker mu(SystemDictionary_lock, THREAD);
-    placeholders()->find_and_remove(p_index, p_hash, parsed_name, loader_data, THREAD);
-    SystemDictionary_lock->notify_all();
-    }
-    return NULL;
-  }
-
-  // Make sure that we didn't leave a place holder in the
-  // SystemDictionary; this is only done on success
+  // Make sure we have an entry in the SystemDictionary on success
   debug_only( {
     if (!HAS_PENDING_EXCEPTION) {
       assert(parsed_name != NULL, "parsed_name is still null?");
@@ -1537,15 +1492,14 @@
     // Only special cases allow parallel defines and can use other thread's results
     // Other cases fall through, and may run into duplicate defines
     // caught by finding an entry in the SystemDictionary
-    if ((UnsyncloadClass || is_parallelDefine(class_loader)) && (probe->InstanceKlass() != NULL)) {
-        probe->remove_seen_thread(THREAD, PlaceholderTable::DEFINE_CLASS);
-        placeholders()->find_and_remove(p_index, p_hash, name_h, loader_data, THREAD);
+    if ((UnsyncloadClass || is_parallelDefine(class_loader)) && (probe->instance_klass() != NULL)) {
+        placeholders()->find_and_remove(p_index, p_hash, name_h, loader_data, PlaceholderTable::DEFINE_CLASS, THREAD);
         SystemDictionary_lock->notify_all();
 #ifdef ASSERT
         Klass* check = find_class(d_index, d_hash, name_h, loader_data);
         assert(check != NULL, "definer missed recording success");
 #endif
-        return(instanceKlassHandle(THREAD, probe->InstanceKlass()));
+        return(instanceKlassHandle(THREAD, probe->instance_klass()));
     } else {
       // This thread will define the class (even if earlier thread tried and had an error)
       probe->set_definer(THREAD);
@@ -1566,11 +1520,10 @@
         linkage_exception = Handle(THREAD,PENDING_EXCEPTION);
         CLEAR_PENDING_EXCEPTION;
       } else {
-        probe->set_instanceKlass(k());
+        probe->set_instance_klass(k());
       }
       probe->set_definer(NULL);
-      probe->remove_seen_thread(THREAD, PlaceholderTable::DEFINE_CLASS);
-      placeholders()->find_and_remove(p_index, p_hash, name_h, loader_data, THREAD);
+      placeholders()->find_and_remove(p_index, p_hash, name_h, loader_data, PlaceholderTable::DEFINE_CLASS, THREAD);
       SystemDictionary_lock->notify_all();
     }
   }
@@ -1727,6 +1680,8 @@
   }
   return newsize;
 }
+// Assumes classes in the SystemDictionary are only unloaded at a safepoint
+// Note: anonymous classes are not in the SD.
 bool SystemDictionary::do_unloading(BoolObjectClosure* is_alive) {
   // First, mark for unload all ClassLoaderData referencing a dead class loader.
   bool has_dead_loaders = ClassLoaderDataGraph::do_unloading(is_alive);
@@ -2096,9 +2051,7 @@
   // All loaded classes get a unique ID.
   TRACE_INIT_ID(k);
 
-  // Check for a placeholder. If there, remove it and make a
-  // new system dictionary entry.
-  placeholders()->find_and_remove(p_index, p_hash, name, loader_data, THREAD);
+  // Make a new system dictionary entry.
   Klass* sd_check = find_class(d_index, d_hash, name, loader_data);
   if (sd_check == NULL) {
     dictionary()->add_klass(name, loader_data, k);
@@ -2107,12 +2060,8 @@
 #ifdef ASSERT
   sd_check = find_class(d_index, d_hash, name, loader_data);
   assert (sd_check != NULL, "should have entry in system dictionary");
-// Changed to allow PH to remain to complete class circularity checking
-// while only one thread can define a class at one time, multiple
-// classes can resolve the superclass for a class at one time,
-// and the placeholder is used to track that
-//  Symbol* ph_check = find_placeholder(name, class_loader);
-//  assert (ph_check == NULL, "should not have a placeholder entry");
+  // Note: there may be a placeholder entry: for circularity testing
+  // or for parallel defines
 #endif
     SystemDictionary_lock->notify_all();
   }
@@ -2149,7 +2098,7 @@
     }
     // If element class already loaded, allocate array klass
     if (klass != NULL) {
-      klass = Klass::cast(klass)->array_klass_or_null(fd.dimension());
+      klass = klass->array_klass_or_null(fd.dimension());
     }
   } else {
     MutexLocker mu(SystemDictionary_lock, THREAD);
@@ -2343,6 +2292,7 @@
 
 // Helper for unpacking the return value from linkMethod and linkCallSite.
 static methodHandle unpack_method_and_appendix(Handle mname,
+                                               KlassHandle accessing_klass,
                                                objArrayHandle appendix_box,
                                                Handle* appendix_result,
                                                TRAPS) {
@@ -2361,6 +2311,12 @@
     #endif //PRODUCT
       }
       (*appendix_result) = Handle(THREAD, appendix);
+      // the target is stored in the cpCache and if a reference to this
+      // MethodName is dropped we need a way to make sure the
+      // class_loader containing this method is kept alive.
+      // FIXME: the appendix might also preserve this dependency.
+      ClassLoaderData* this_key = InstanceKlass::cast(accessing_klass())->class_loader_data();
+      this_key->record_dependency(m->method_holder(), CHECK_NULL); // Can throw OOM
       return methodHandle(THREAD, m);
     }
   }
@@ -2405,7 +2361,7 @@
                          &args, CHECK_(empty));
   Handle mname(THREAD, (oop) result.get_jobject());
   (*method_type_result) = method_type;
-  return unpack_method_and_appendix(mname, appendix_box, appendix_result, THREAD);
+  return unpack_method_and_appendix(mname, accessing_klass, appendix_box, appendix_result, THREAD);
 }
 
 
@@ -2466,9 +2422,9 @@
       Klass* sel_klass = java_lang_Class::as_Klass(mirror);
       mirror = NULL;  // safety
       // Emulate ConstantPool::verify_constant_pool_resolve.
-      if (Klass::cast(sel_klass)->oop_is_objArray())
+      if (sel_klass->oop_is_objArray())
         sel_klass = ObjArrayKlass::cast(sel_klass)->bottom_klass();
-      if (Klass::cast(sel_klass)->oop_is_instance()) {
+      if (sel_klass->oop_is_instance()) {
         KlassHandle sel_kh(THREAD, sel_klass);
         LinkResolver::check_klass_accessability(accessing_klass, sel_kh, CHECK_(empty));
       }
@@ -2596,7 +2552,7 @@
                          &args, CHECK_(empty));
   Handle mname(THREAD, (oop) result.get_jobject());
   (*method_type_result) = method_type;
-  return unpack_method_and_appendix(mname, appendix_box, appendix_result, THREAD);
+  return unpack_method_and_appendix(mname, caller, appendix_box, appendix_result, THREAD);
 }
 
 // Since the identity hash code for symbols changes when the symbols are
--- a/src/share/vm/classfile/systemDictionary.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/classfile/systemDictionary.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -131,12 +131,14 @@
   do_klass(Properties_klass,                            java_util_Properties,                      Pre                 ) \
   do_klass(reflect_AccessibleObject_klass,              java_lang_reflect_AccessibleObject,        Pre                 ) \
   do_klass(reflect_Field_klass,                         java_lang_reflect_Field,                   Pre                 ) \
+  do_klass(reflect_Parameter_klass,                     java_lang_reflect_Parameter,               Opt                 ) \
   do_klass(reflect_Method_klass,                        java_lang_reflect_Method,                  Pre                 ) \
   do_klass(reflect_Constructor_klass,                   java_lang_reflect_Constructor,             Pre                 ) \
                                                                                                                          \
   /* NOTE: needed too early in bootstrapping process to have checks based on JDK version */                              \
   /* Universe::is_gte_jdk14x_version() is not set up by this point. */                                                   \
   /* It's okay if this turns out to be NULL in non-1.4 JDKs. */                                                          \
+  do_klass(lambda_MagicLambdaImpl_klass,                java_lang_invoke_MagicLambdaImpl, Opt ) \
   do_klass(reflect_MagicAccessorImpl_klass,             sun_reflect_MagicAccessorImpl,             Opt                 ) \
   do_klass(reflect_MethodAccessorImpl_klass,            sun_reflect_MethodAccessorImpl,            Opt_Only_JDK14NewRef) \
   do_klass(reflect_ConstructorAccessorImpl_klass,       sun_reflect_ConstructorAccessorImpl,       Opt_Only_JDK14NewRef) \
@@ -458,6 +460,7 @@
   // Tells whether ClassLoader.checkPackageAccess is present
   static bool has_checkPackageAccess()      { return _has_checkPackageAccess; }
 
+  static bool Parameter_klass_loaded()      { return WK_KLASS(reflect_Parameter_klass) != NULL; }
   static bool Class_klass_loaded()          { return WK_KLASS(Class_klass) != NULL; }
   static bool Cloneable_klass_loaded()      { return WK_KLASS(Cloneable_klass) != NULL; }
   static bool Object_klass_loaded()         { return WK_KLASS(Object_klass) != NULL; }
@@ -470,7 +473,7 @@
   static void compute_java_system_loader(TRAPS);
 
   // Register a new class loader
-  static ClassLoaderData* register_loader(Handle class_loader);
+  static ClassLoaderData* register_loader(Handle class_loader, TRAPS);
 private:
   // Mirrors for primitive classes (created eagerly)
   static oop check_mirror(oop m) {
@@ -530,7 +533,7 @@
             InstanceKlass::cast((loader)->klass())->name()->as_C_string() );
   }
   static const char* loader_name(ClassLoaderData* loader_data) {
-    return (loader_data->is_the_null_class_loader_data() ? "<bootloader>" :
+    return (loader_data->class_loader() == NULL ? "<bootloader>" :
             InstanceKlass::cast((loader_data->class_loader())->klass())->name()->as_C_string() );
   }
 
--- a/src/share/vm/classfile/verifier.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/classfile/verifier.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -446,7 +446,7 @@
           bytecode_name = "<illegal>";
       }
     }
-    InstanceKlass* ik = InstanceKlass::cast(method->method_holder());
+    InstanceKlass* ik = method->method_holder();
     ss->indent().print_cr("Location:");
     streamIndentor si2(ss);
     ss->indent().print_cr("%s.%s%s @%d: %s",
@@ -555,9 +555,10 @@
     if (was_recursively_verified())  return;
 
     Method* m = methods->at(index);
-    if (m->is_native() || m->is_abstract()) {
+    if (m->is_native() || m->is_abstract() || m->is_overpass()) {
       // If m is native or abstract, skip it.  It is checked in class file
-      // parser that methods do not override a final method.
+      // parser that methods do not override a final method.  Overpass methods
+      // are trusted since the VM generates them.
       continue;
     }
     verify_method(methodHandle(THREAD, m), CHECK_VERIFY(this));
@@ -1849,7 +1850,7 @@
   if ((index <= 0) || (index >= nconstants)) {
     verify_error(ErrorContext::bad_cp_index(bci, index),
         "Illegal constant pool index %d in class %s",
-        index, InstanceKlass::cast(cp->pool_holder())->external_name());
+        index, cp->pool_holder()->external_name());
     return;
   }
 }
@@ -1868,7 +1869,7 @@
   if ((types & (1 << tag)) == 0) {
     verify_error(ErrorContext::bad_cp_index(bci, index),
       "Illegal type at constant pool entry %d in class %s",
-      index, InstanceKlass::cast(cp->pool_holder())->external_name());
+      index, cp->pool_holder()->external_name());
     return;
   }
 }
@@ -1880,7 +1881,7 @@
   if (!tag.is_klass() && !tag.is_unresolved_klass()) {
     verify_error(ErrorContext::bad_cp_index(bci, index),
         "Illegal type at constant pool entry %d in class %s",
-        index, InstanceKlass::cast(cp->pool_holder())->external_name());
+        index, cp->pool_holder()->external_name());
     return;
   }
 }
@@ -2304,11 +2305,21 @@
   // Make sure the constant pool item is the right type
   u2 index = bcs->get_index_u2();
   Bytecodes::Code opcode = bcs->raw_code();
-  unsigned int types = (opcode == Bytecodes::_invokeinterface
-                                ? 1 << JVM_CONSTANT_InterfaceMethodref
-                      : opcode == Bytecodes::_invokedynamic
-                                ? 1 << JVM_CONSTANT_InvokeDynamic
-                                : 1 << JVM_CONSTANT_Methodref);
+  unsigned int types;
+  switch (opcode) {
+    case Bytecodes::_invokeinterface:
+      types = 1 << JVM_CONSTANT_InterfaceMethodref;
+      break;
+    case Bytecodes::_invokedynamic:
+      types = 1 << JVM_CONSTANT_InvokeDynamic;
+      break;
+    case Bytecodes::_invokespecial:
+      types = (1 << JVM_CONSTANT_InterfaceMethodref) |
+              (1 << JVM_CONSTANT_Methodref);
+      break;
+    default:
+      types = 1 << JVM_CONSTANT_Methodref;
+  }
   verify_cp_type(bcs->bci(), index, cp, types, CHECK_VERIFY(this));
 
   // Get method name and signature
--- a/src/share/vm/classfile/vmSymbols.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/classfile/vmSymbols.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -507,7 +507,7 @@
 }
 
 void vmIntrinsics::verify_method(ID actual_id, Method* m) {
-  Symbol* mk = Klass::cast(m->method_holder())->name();
+  Symbol* mk = m->method_holder()->name();
   ID declared_id = match_method_with_klass(m, mk);
 
   if (declared_id == actual_id)  return; // success
--- a/src/share/vm/classfile/vmSymbols.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/classfile/vmSymbols.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -86,6 +86,7 @@
   template(java_lang_reflect_Method,                  "java/lang/reflect/Method")                 \
   template(java_lang_reflect_Constructor,             "java/lang/reflect/Constructor")            \
   template(java_lang_reflect_Field,                   "java/lang/reflect/Field")                  \
+  template(java_lang_reflect_Parameter,               "java/lang/reflect/Parameter")              \
   template(java_lang_reflect_Array,                   "java/lang/reflect/Array")                  \
   template(java_lang_StringBuffer,                    "java/lang/StringBuffer")                   \
   template(java_lang_StringBuilder,                   "java/lang/StringBuilder")                  \
@@ -110,10 +111,12 @@
   template(sun_jkernel_DownloadManager,               "sun/jkernel/DownloadManager")              \
   template(getBootClassPathEntryForClass_name,        "getBootClassPathEntryForClass")            \
   template(sun_misc_PostVMInitHook,                   "sun/misc/PostVMInitHook")                  \
+  template(sun_misc_Launcher_ExtClassLoader,          "sun/misc/Launcher$ExtClassLoader")         \
                                                                                                   \
   /* Java runtime version access */                                                               \
   template(sun_misc_Version,                          "sun/misc/Version")                         \
   template(java_runtime_name_name,                    "java_runtime_name")                        \
+  template(java_runtime_version_name,                 "java_runtime_version")                     \
                                                                                                   \
   /* class file format tags */                                                                    \
   template(tag_source_file,                           "SourceFile")                               \
@@ -124,6 +127,7 @@
   template(tag_line_number_table,                     "LineNumberTable")                          \
   template(tag_local_variable_table,                  "LocalVariableTable")                       \
   template(tag_local_variable_type_table,             "LocalVariableTypeTable")                   \
+  template(tag_method_parameters,                     "MethodParameters")                         \
   template(tag_stack_map_table,                       "StackMapTable")                            \
   template(tag_synthetic,                             "Synthetic")                                \
   template(tag_deprecated,                            "Deprecated")                               \
@@ -134,6 +138,8 @@
   template(tag_runtime_visible_parameter_annotations, "RuntimeVisibleParameterAnnotations")       \
   template(tag_runtime_invisible_parameter_annotations,"RuntimeInvisibleParameterAnnotations")    \
   template(tag_annotation_default,                    "AnnotationDefault")                        \
+  template(tag_runtime_visible_type_annotations,      "RuntimeVisibleTypeAnnotations")            \
+  template(tag_runtime_invisible_type_annotations,    "RuntimeInvisibleTypeAnnotations")          \
   template(tag_enclosing_method,                      "EnclosingMethod")                          \
   template(tag_bootstrap_methods,                     "BootstrapMethods")                         \
                                                                                                   \
@@ -188,7 +194,10 @@
   template(java_lang_VirtualMachineError,             "java/lang/VirtualMachineError")            \
   template(java_lang_StackOverflowError,              "java/lang/StackOverflowError")             \
   template(java_lang_StackTraceElement,               "java/lang/StackTraceElement")              \
+                                                                                                  \
+  /* Concurrency support */                                                                       \
   template(java_util_concurrent_locks_AbstractOwnableSynchronizer,   "java/util/concurrent/locks/AbstractOwnableSynchronizer") \
+  template(sun_misc_Contended_signature,              "Lsun/misc/Contended;")                     \
                                                                                                   \
   /* class symbols needed by intrinsics */                                                        \
   VM_INTRINSICS_DO(VM_INTRINSIC_IGNORE, template, VM_SYMBOL_IGNORE, VM_SYMBOL_IGNORE, VM_ALIAS_IGNORE) \
@@ -231,12 +240,17 @@
   /* Support for annotations (JDK 1.5 and above) */                                               \
                                                                                                   \
   template(annotations_name,                          "annotations")                              \
+  template(index_name,                                "index")                                    \
+  template(executable_name,                           "executable")                               \
   template(parameter_annotations_name,                "parameterAnnotations")                     \
   template(annotation_default_name,                   "annotationDefault")                        \
   template(sun_reflect_ConstantPool,                  "sun/reflect/ConstantPool")                 \
   template(ConstantPool_name,                         "constantPoolOop")                          \
   template(sun_reflect_UnsafeStaticFieldAccessorImpl, "sun/reflect/UnsafeStaticFieldAccessorImpl")\
   template(base_name,                                 "base")                                     \
+  /* Type Annotations (JDK 8 and above) */                                                        \
+  template(type_annotations_name,                     "typeAnnotations")                          \
+                                                                                                  \
                                                                                                   \
   /* Support for JSR 292 & invokedynamic (JDK 1.7 and above) */                                   \
   template(java_lang_invoke_CallSite,                 "java/lang/invoke/CallSite")                \
@@ -257,6 +271,7 @@
   template(java_lang_invoke_DontInline_signature,     "Ljava/lang/invoke/DontInline;")            \
   template(java_lang_invoke_LambdaForm_Compiled_signature, "Ljava/lang/invoke/LambdaForm$Compiled;") \
   template(java_lang_invoke_LambdaForm_Hidden_signature, "Ljava/lang/invoke/LambdaForm$Hidden;")  \
+  template(java_lang_invoke_MagicLambdaImpl,          "java/lang/invoke/MagicLambdaImpl")         \
   /* internal up-calls made only by the JVM, via class sun.invoke.MethodHandleNatives: */         \
   template(findMethodHandleType_name,                 "findMethodHandleType")                     \
   template(findMethodHandleType_signature,       "(Ljava/lang/Class;[Ljava/lang/Class;)Ljava/lang/invoke/MethodType;") \
@@ -272,7 +287,7 @@
   NOT_LP64(  do_alias(intptr_signature,               int_signature)  )                           \
   LP64_ONLY( do_alias(intptr_signature,               long_signature) )                           \
   template(selectAlternative_signature, "(ZLjava/lang/invoke/MethodHandle;Ljava/lang/invoke/MethodHandle;)Ljava/lang/invoke/MethodHandle;") \
-                                                                                                  \
+                                                                      \
   /* common method and field names */                                                             \
   template(object_initializer_name,                   "<init>")                                   \
   template(class_initializer_name,                    "<clinit>")                                 \
@@ -371,7 +386,6 @@
   template(basicType_name,                            "basicType")                                \
   template(append_name,                               "append")                                   \
   template(klass_name,                                "klass")                                    \
-  template(resolved_constructor_name,                 "resolved_constructor")                     \
   template(array_klass_name,                          "array_klass")                              \
   template(oop_size_name,                             "oop_size")                                 \
   template(static_oop_field_count_name,               "static_oop_field_count")                   \
@@ -467,6 +481,7 @@
   template(class_signature,                           "Ljava/lang/Class;")                                        \
   template(string_signature,                          "Ljava/lang/String;")                                       \
   template(reference_signature,                       "Ljava/lang/ref/Reference;")                                \
+  template(executable_signature,                      "Ljava/lang/reflect/Executable;")                           \
   template(concurrenthashmap_signature,               "Ljava/util/concurrent/ConcurrentHashMap;")                 \
   template(String_StringBuilder_signature,            "(Ljava/lang/String;)Ljava/lang/StringBuilder;")            \
   template(int_StringBuilder_signature,               "(I)Ljava/lang/StringBuilder;")                             \
@@ -720,9 +735,29 @@
   do_intrinsic(_checkIndex,               java_nio_Buffer,        checkIndex_name, int_int_signature,            F_R)   \
    do_name(     checkIndex_name,                                 "checkIndex")                                          \
                                                                                                                         \
+  do_class(sun_nio_cs_iso8859_1_Encoder,  "sun/nio/cs/ISO_8859_1$Encoder")                                              \
+  do_intrinsic(_encodeISOArray,     sun_nio_cs_iso8859_1_Encoder, encodeISOArray_name, encodeISOArray_signature, F_S)   \
+   do_name(     encodeISOArray_name,                             "encodeISOArray")                                      \
+   do_signature(encodeISOArray_signature,                        "([CI[BII)I")                                          \
+                                                                                                                        \
   /* java/lang/ref/Reference */                                                                                         \
   do_intrinsic(_Reference_get,            java_lang_ref_Reference, get_name,    void_object_signature, F_R)             \
                                                                                                                         \
+  /* support for com.sum.crypto.provider.AESCrypt and some of its callers */                                            \
+  do_class(com_sun_crypto_provider_aescrypt,      "com/sun/crypto/provider/AESCrypt")                                   \
+  do_intrinsic(_aescrypt_encryptBlock, com_sun_crypto_provider_aescrypt, encryptBlock_name, byteArray_int_byteArray_int_signature, F_R)   \
+  do_intrinsic(_aescrypt_decryptBlock, com_sun_crypto_provider_aescrypt, decryptBlock_name, byteArray_int_byteArray_int_signature, F_R)   \
+   do_name(     encryptBlock_name,                                 "encryptBlock")                                      \
+   do_name(     decryptBlock_name,                                 "decryptBlock")                                      \
+   do_signature(byteArray_int_byteArray_int_signature,             "([BI[BI)V")                                         \
+                                                                                                                        \
+  do_class(com_sun_crypto_provider_cipherBlockChaining,            "com/sun/crypto/provider/CipherBlockChaining")       \
+   do_intrinsic(_cipherBlockChaining_encryptAESCrypt, com_sun_crypto_provider_cipherBlockChaining, encrypt_name, byteArray_int_int_byteArray_int_signature, F_R)   \
+   do_intrinsic(_cipherBlockChaining_decryptAESCrypt, com_sun_crypto_provider_cipherBlockChaining, decrypt_name, byteArray_int_int_byteArray_int_signature, F_R)   \
+   do_name(     encrypt_name,                                      "encrypt")                                           \
+   do_name(     decrypt_name,                                      "decrypt")                                           \
+   do_signature(byteArray_int_int_byteArray_int_signature,         "([BII[BI)V")                                        \
+                                                                                                                        \
   /* support for sun.misc.Unsafe */                                                                                     \
   do_class(sun_misc_Unsafe,               "sun/misc/Unsafe")                                                            \
                                                                                                                         \
@@ -738,6 +773,15 @@
   do_intrinsic(_unpark,                   sun_misc_Unsafe,        unpark_name, unpark_signature,                 F_RN)  \
    do_name(     unpark_name,                                     "unpark")                                              \
    do_alias(    unpark_signature,                               /*(LObject;)V*/ object_void_signature)                  \
+  do_intrinsic(_loadFence,                sun_misc_Unsafe,        loadFence_name, loadFence_signature,           F_RN)  \
+   do_name(     loadFence_name,                                  "loadFence")                                           \
+   do_alias(    loadFence_signature,                              void_method_signature)                                \
+  do_intrinsic(_storeFence,               sun_misc_Unsafe,        storeFence_name, storeFence_signature,         F_RN)  \
+   do_name(     storeFence_name,                                 "storeFence")                                          \
+   do_alias(    storeFence_signature,                             void_method_signature)                                \
+  do_intrinsic(_fullFence,                sun_misc_Unsafe,        fullFence_name, fullFence_signature,           F_RN)  \
+   do_name(     fullFence_name,                                  "fullFence")                                           \
+   do_alias(    fullFence_signature,                              void_method_signature)                                \
                                                                                                                         \
   /* unsafe memory references (there are a lot of them...) */                                                           \
   do_signature(getObject_signature,       "(Ljava/lang/Object;J)Ljava/lang/Object;")                                    \
@@ -879,12 +923,14 @@
   do_intrinsic(_getAndAddLong,            sun_misc_Unsafe,        getAndAddLong_name, getAndAddLong_signature, F_R)     \
    do_name(     getAndAddLong_name,                               "getAndAddLong")                                      \
    do_signature(getAndAddLong_signature,                          "(Ljava/lang/Object;JJ)J" )                           \
-  do_intrinsic(_getAndSetInt,             sun_misc_Unsafe,        getAndSet_name, getAndSetInt_signature, F_R)          \
-   do_name(     getAndSet_name,                                   "getAndSet")                                          \
+  do_intrinsic(_getAndSetInt,             sun_misc_Unsafe,        getAndSetInt_name, getAndSetInt_signature, F_R)       \
+   do_name(     getAndSetInt_name,                                "getAndSetInt")                                       \
    do_alias(    getAndSetInt_signature,                         /*"(Ljava/lang/Object;JI)I"*/ getAndAddInt_signature)   \
-  do_intrinsic(_getAndSetLong,            sun_misc_Unsafe,        getAndSet_name, getAndSetLong_signature, F_R)         \
+  do_intrinsic(_getAndSetLong,            sun_misc_Unsafe,        getAndSetLong_name, getAndSetLong_signature, F_R)     \
+   do_name(     getAndSetLong_name,                               "getAndSetLong")                                      \
    do_alias(    getAndSetLong_signature,                        /*"(Ljava/lang/Object;JJ)J"*/ getAndAddLong_signature)  \
-  do_intrinsic(_getAndSetObject,          sun_misc_Unsafe,        getAndSet_name, getAndSetObject_signature,  F_R)      \
+  do_intrinsic(_getAndSetObject,          sun_misc_Unsafe,        getAndSetObject_name, getAndSetObject_signature,  F_R)\
+   do_name(     getAndSetObject_name,                             "getAndSetObject")                                    \
    do_signature(getAndSetObject_signature,                        "(Ljava/lang/Object;JLjava/lang/Object;)Ljava/lang/Object;" ) \
                                                                                                                         \
   /* prefetch_signature is shared by all prefetch variants */                                                           \
--- a/src/share/vm/code/codeCache.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/code/codeCache.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -30,6 +30,7 @@
 #include "code/icBuffer.hpp"
 #include "code/nmethod.hpp"
 #include "code/pcDesc.hpp"
+#include "compiler/compileBroker.hpp"
 #include "gc_implementation/shared/markSweep.hpp"
 #include "memory/allocation.inline.hpp"
 #include "memory/gcLocker.hpp"
@@ -39,6 +40,7 @@
 #include "oops/objArrayOop.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/handles.inline.hpp"
+#include "runtime/arguments.hpp"
 #include "runtime/icache.hpp"
 #include "runtime/java.hpp"
 #include "runtime/mutexLocker.hpp"
@@ -168,6 +170,8 @@
   return (nmethod*)cb;
 }
 
+static size_t maxCodeCacheUsed = 0;
+
 CodeBlob* CodeCache::allocate(int size) {
   // Do not seize the CodeCache lock here--if the caller has not
   // already done so, we are going to lose bigtime, since the code
@@ -192,6 +196,8 @@
                     (address)_heap->end() - (address)_heap->begin());
     }
   }
+  maxCodeCacheUsed = MAX2(maxCodeCacheUsed, ((address)_heap->high_boundary() -
+                          (address)_heap->low_boundary()) - unallocated_capacity());
   verify_if_often();
   print_trace("allocation", cb, size);
   return cb;
@@ -928,7 +934,14 @@
   FREE_C_HEAP_ARRAY(int, buckets, mtCode);
 }
 
+#endif // !PRODUCT
+
 void CodeCache::print() {
+  print_summary(tty);
+
+#ifndef PRODUCT
+  if (!Verbose) return;
+
   CodeBlob_sizes live;
   CodeBlob_sizes dead;
 
@@ -953,7 +966,7 @@
   }
 
 
-  if (Verbose) {
+  if (WizardMode) {
      // print the oop_map usage
     int code_size = 0;
     int number_of_blobs = 0;
@@ -977,20 +990,30 @@
     tty->print_cr("  map size  = %d", map_size);
   }
 
+#endif // !PRODUCT
 }
 
-#endif // PRODUCT
+void CodeCache::print_summary(outputStream* st, bool detailed) {
+  size_t total = (_heap->high_boundary() - _heap->low_boundary());
+  st->print_cr("CodeCache: size=" SIZE_FORMAT "Kb used=" SIZE_FORMAT
+               "Kb max_used=" SIZE_FORMAT "Kb free=" SIZE_FORMAT
+               "Kb max_free_chunk=" SIZE_FORMAT "Kb",
+               total/K, (total - unallocated_capacity())/K,
+               maxCodeCacheUsed/K, unallocated_capacity()/K, largest_free_block()/K);
 
-void CodeCache::print_bounds(outputStream* st) {
-  st->print_cr("Code Cache  [" INTPTR_FORMAT ", " INTPTR_FORMAT ", " INTPTR_FORMAT ")",
-               _heap->low_boundary(),
-               _heap->high(),
-               _heap->high_boundary());
-  st->print_cr(" total_blobs=" UINT32_FORMAT " nmethods=" UINT32_FORMAT
-               " adapters=" UINT32_FORMAT " free_code_cache=" SIZE_FORMAT "Kb"
-               " largest_free_block=" SIZE_FORMAT,
-               nof_blobs(), nof_nmethods(), nof_adapters(),
-               unallocated_capacity()/K, largest_free_block());
+  if (detailed) {
+    st->print_cr(" bounds [" INTPTR_FORMAT ", " INTPTR_FORMAT ", " INTPTR_FORMAT "]",
+                 _heap->low_boundary(),
+                 _heap->high(),
+                 _heap->high_boundary());
+    st->print_cr(" total_blobs=" UINT32_FORMAT " nmethods=" UINT32_FORMAT
+                 " adapters=" UINT32_FORMAT,
+                 nof_blobs(), nof_nmethods(), nof_adapters());
+    st->print_cr(" compilation: %s", CompileBroker::should_compile_new_jobs() ?
+                 "enabled" : Arguments::mode() == Arguments::_int ?
+                 "disabled (interpreter mode)" :
+                 "disabled (not enough contiguous free space left)");
+  }
 }
 
 void CodeCache::log_state(outputStream* st) {
--- a/src/share/vm/code/codeCache.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/code/codeCache.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -145,11 +145,11 @@
   static void prune_scavenge_root_nmethods();
 
   // Printing/debugging
-  static void print()   PRODUCT_RETURN;          // prints summary
+  static void print();                           // prints summary
   static void print_internals();
   static void verify();                          // verifies the code cache
   static void print_trace(const char* event, CodeBlob* cb, int size = 0) PRODUCT_RETURN;
-  static void print_bounds(outputStream* st);    // Prints a summary of the bounds of the code cache
+  static void print_summary(outputStream* st, bool detailed = true); // Prints a summary of the code cache usage
   static void log_state(outputStream* st);
 
   // The full limits of the codeCache
--- a/src/share/vm/code/compiledIC.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/code/compiledIC.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -191,8 +191,8 @@
     int index = klassItable::compute_itable_index(call_info->resolved_method()());
     entry = VtableStubs::create_stub(false, index, method());
     assert(entry != NULL, "entry not computed");
-    Klass* k = call_info->resolved_method()->method_holder();
-    assert(Klass::cast(k)->is_interface(), "sanity check");
+    InstanceKlass* k = call_info->resolved_method()->method_holder();
+    assert(k->is_interface(), "sanity check");
     InlineCacheBuffer::create_transition_stub(this, k, entry);
   } else {
     // Can be different than method->vtable_index(), due to package-private etc.
--- a/src/share/vm/code/dependencies.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/code/dependencies.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -552,7 +552,7 @@
     }
     tty->print("  %s = %s", what, (put_star? "*": ""));
     if (arg.is_klass())
-      tty->print("%s", Klass::cast((Klass*)arg.metadata_value())->external_name());
+      tty->print("%s", ((Klass*)arg.metadata_value())->external_name());
     else if (arg.is_method())
       ((Method*)arg.metadata_value())->print_value();
     else
@@ -563,12 +563,13 @@
     bool put_star = !Dependencies::is_concrete_klass(witness);
     tty->print_cr("  witness = %s%s",
                   (put_star? "*": ""),
-                  Klass::cast(witness)->external_name());
+                  witness->external_name());
   }
 }
 
 void Dependencies::DepStream::log_dependency(Klass* witness) {
   if (_deps == NULL && xtty == NULL)  return;  // fast cutout for runtime
+  ResourceMark rm;
   int nargs = argument_count();
   DepArgument args[max_arg_count];
   for (int j = 0; j < nargs; j++) {
@@ -808,7 +809,7 @@
     if (!(m->is_public() || m->is_protected()))
       // The override story is complex when packages get involved.
       return true;  // Must punt the assertion to true.
-    Klass* k = Klass::cast(ctxk);
+    Klass* k = ctxk;
     Method* lm = k->lookup_method(m->name(), m->signature());
     if (lm == NULL && k->oop_is_instance()) {
       // It might be an abstract interface method, devoid of mirandas.
@@ -829,13 +830,13 @@
       }
       if (   !Dependencies::is_concrete_method(lm)
           && !Dependencies::is_concrete_method(m)
-          && Klass::cast(lm->method_holder())->is_subtype_of(m->method_holder()))
+          && lm->method_holder()->is_subtype_of(m->method_holder()))
         // Method m is overridden by lm, but both are non-concrete.
         return true;
     }
     ResourceMark rm;
     tty->print_cr("Dependency method not found in the associated context:");
-    tty->print_cr("  context = %s", Klass::cast(ctxk)->external_name());
+    tty->print_cr("  context = %s", ctxk->external_name());
     tty->print(   "  method = "); m->print_short_name(tty); tty->cr();
     if (lm != NULL) {
       tty->print( "  found = "); lm->print_short_name(tty); tty->cr();
@@ -1010,7 +1011,7 @@
     for (int i = 0; i < num_participants(); i++) {
       Klass* part = participant(i);
       if (part == NULL)  continue;
-      assert(changes.involves_context(part) == Klass::cast(new_type)->is_subtype_of(part),
+      assert(changes.involves_context(part) == new_type->is_subtype_of(part),
              "correct marking of participants, b/c new_type is unique");
       if (changes.involves_context(part)) {
         // new guy is protected from this check by previous participant
@@ -1146,7 +1147,7 @@
 
 
 bool Dependencies::is_concrete_klass(Klass* k) {
-  if (Klass::cast(k)->is_abstract())  return false;
+  if (k->is_abstract())  return false;
   // %%% We could treat classes which are concrete but
   // have not yet been instantiated as virtually abstract.
   // This would require a deoptimization barrier on first instantiation.
@@ -1160,7 +1161,11 @@
 
   // We could also return false if m does not yet appear to be
   // executed, if the VM version supports this distinction also.
-  return !m->is_abstract();
+  return !m->is_abstract() &&
+         !InstanceKlass::cast(m->method_holder())->is_interface();
+         // TODO: investigate whether default methods should be
+         // considered as "concrete" in this situation.  For now they
+         // are not.
 }
 
 
@@ -1701,12 +1706,12 @@
 }
 
 bool KlassDepChange::involves_context(Klass* k) {
-  if (k == NULL || !Klass::cast(k)->oop_is_instance()) {
+  if (k == NULL || !k->oop_is_instance()) {
     return false;
   }
   InstanceKlass* ik = InstanceKlass::cast(k);
   bool is_contained = ik->is_marked_dependent();
-  assert(is_contained == Klass::cast(new_type())->is_subtype_of(k),
+  assert(is_contained == new_type()->is_subtype_of(k),
          "correct marking of potential context types");
   return is_contained;
 }
--- a/src/share/vm/code/icBuffer.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/code/icBuffer.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,6 +23,7 @@
  */
 
 #include "precompiled.hpp"
+#include "code/codeCache.hpp"
 #include "code/compiledIC.hpp"
 #include "code/icBuffer.hpp"
 #include "code/nmethod.hpp"
@@ -37,21 +38,6 @@
 #include "oops/oop.inline2.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/stubRoutines.hpp"
-#ifdef TARGET_ARCH_x86
-# include "assembler_x86.inline.hpp"
-#endif
-#ifdef TARGET_ARCH_sparc
-# include "assembler_sparc.inline.hpp"
-#endif
-#ifdef TARGET_ARCH_zero
-# include "assembler_zero.inline.hpp"
-#endif
-#ifdef TARGET_ARCH_arm
-# include "assembler_arm.inline.hpp"
-#endif
-#ifdef TARGET_ARCH_ppc
-# include "assembler_ppc.inline.hpp"
-#endif
 
 
 DEF_STUB_INTERFACE(ICStub);
--- a/src/share/vm/code/nmethod.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/code/nmethod.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1263,7 +1263,7 @@
   assert(_entry_bci != InvocationEntryBci, "wrong kind of nmethod");
   // Remove from list of active nmethods
   if (method() != NULL)
-    InstanceKlass::cast(method()->method_holder())->remove_osr_nmethod(this);
+    method()->method_holder()->remove_osr_nmethod(this);
   // Set entry as invalid
   _entry_bci = InvalidOSREntryBci;
 }
@@ -2568,9 +2568,8 @@
     deps.print_dependency();
     Klass* ctxk = deps.context_type();
     if (ctxk != NULL) {
-      Klass* k = Klass::cast(ctxk);
-      if (k->oop_is_instance() && ((InstanceKlass*)k)->is_dependent_nmethod(this)) {
-        tty->print_cr("   [nmethod<=klass]%s", k->external_name());
+      if (ctxk->oop_is_instance() && ((InstanceKlass*)ctxk)->is_dependent_nmethod(this)) {
+        tty->print_cr("   [nmethod<=klass]%s", ctxk->external_name());
       }
     }
     deps.log_dependency();  // put it into the xml log also
--- a/src/share/vm/code/relocInfo.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/code/relocInfo.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,32 +23,13 @@
  */
 
 #include "precompiled.hpp"
+#include "code/codeCache.hpp"
 #include "code/compiledIC.hpp"
 #include "code/nmethod.hpp"
 #include "code/relocInfo.hpp"
 #include "memory/resourceArea.hpp"
 #include "runtime/stubCodeGenerator.hpp"
 #include "utilities/copy.hpp"
-#ifdef TARGET_ARCH_x86
-# include "assembler_x86.inline.hpp"
-# include "nativeInst_x86.hpp"
-#endif
-#ifdef TARGET_ARCH_sparc
-# include "assembler_sparc.inline.hpp"
-# include "nativeInst_sparc.hpp"
-#endif
-#ifdef TARGET_ARCH_zero
-# include "assembler_zero.inline.hpp"
-# include "nativeInst_zero.hpp"
-#endif
-#ifdef TARGET_ARCH_arm
-# include "assembler_arm.inline.hpp"
-# include "nativeInst_arm.hpp"
-#endif
-#ifdef TARGET_ARCH_ppc
-# include "assembler_ppc.inline.hpp"
-# include "nativeInst_ppc.hpp"
-#endif
 
 
 const RelocationHolder RelocationHolder::none; // its type is relocInfo::none
--- a/src/share/vm/code/vmreg.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/code/vmreg.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -27,21 +27,8 @@
 
 #include "memory/allocation.hpp"
 #include "utilities/globalDefinitions.hpp"
-#ifdef TARGET_ARCH_x86
-# include "register_x86.hpp"
-#endif
-#ifdef TARGET_ARCH_sparc
-# include "register_sparc.hpp"
-#endif
-#ifdef TARGET_ARCH_zero
-# include "register_zero.hpp"
-#endif
-#ifdef TARGET_ARCH_arm
-# include "register_arm.hpp"
-#endif
-#ifdef TARGET_ARCH_ppc
-# include "register_ppc.hpp"
-#endif
+#include "asm/register.hpp"
+
 #ifdef COMPILER2
 #include "opto/adlcVMDeps.hpp"
 #include "utilities/ostream.hpp"
--- a/src/share/vm/compiler/abstractCompiler.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/compiler/abstractCompiler.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -50,6 +50,7 @@
   // Missing feature tests
   virtual bool supports_native()                 { return true; }
   virtual bool supports_osr   ()                 { return true; }
+  virtual bool can_compile_method(methodHandle method)  { return true; }
 #if defined(TIERED) || ( !defined(COMPILER1) && !defined(COMPILER2) && !defined(SHARK))
   virtual bool is_c1   ()                        { return false; }
   virtual bool is_c2   ()                        { return false; }
--- a/src/share/vm/compiler/compileBroker.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/compiler/compileBroker.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -272,7 +272,7 @@
 
   _compile_id = compile_id;
   _method = method();
-  _method_loader = JNIHandles::make_global(_method->method_holder()->class_loader());
+  _method_holder = JNIHandles::make_global(method->method_holder()->klass_holder());
   _osr_bci = osr_bci;
   _is_blocking = is_blocking;
   _comp_level = comp_level;
@@ -283,7 +283,7 @@
   _code_handle = NULL;
 
   _hot_method = NULL;
-  _hot_method_loader = NULL;
+  _hot_method_holder = NULL;
   _hot_count = hot_count;
   _time_queued = 0;  // tidy
   _comment = comment;
@@ -295,8 +295,9 @@
         _hot_method = _method;
       } else {
         _hot_method = hot_method();
+        // only add loader or mirror if different from _method_holder
+        _hot_method_holder = JNIHandles::make_global(hot_method->method_holder()->klass_holder());
       }
-      _hot_method_loader = JNIHandles::make_global(_hot_method->method_holder()->class_loader());
     }
   }
 
@@ -321,8 +322,8 @@
 void CompileTask::free() {
   set_code(NULL);
   assert(!_lock->is_locked(), "Should not be locked when freed");
-  JNIHandles::destroy_global(_method_loader);
-  JNIHandles::destroy_global(_hot_method_loader);
+  JNIHandles::destroy_global(_method_holder);
+  JNIHandles::destroy_global(_hot_method_holder);
 }
 
 
@@ -1051,7 +1052,7 @@
   guarantee(!method->is_abstract(), "cannot compile abstract methods");
   assert(method->method_holder()->oop_is_instance(),
          "sanity check");
-  assert(!InstanceKlass::cast(method->method_holder())->is_not_initialized(),
+  assert(!method->method_holder()->is_not_initialized(),
          "method holder must be initialized");
   assert(!method->is_method_handle_intrinsic(), "do not enqueue these guys");
 
@@ -1206,7 +1207,7 @@
   assert(method->method_holder()->oop_is_instance(), "not an instance method");
   assert(osr_bci == InvocationEntryBci || (0 <= osr_bci && osr_bci < method->code_size()), "bci out of range");
   assert(!method->is_abstract() && (osr_bci == InvocationEntryBci || !method->is_native()), "cannot compile abstract/native methods");
-  assert(!InstanceKlass::cast(method->method_holder())->is_not_initialized(), "method holder must be initialized");
+  assert(!method->method_holder()->is_not_initialized(), "method holder must be initialized");
 
   if (!TieredCompilation) {
     comp_level = CompLevel_highest_tier;
@@ -1217,7 +1218,7 @@
   // lock, make sure that the compilation
   // isn't prohibited in a straightforward way.
 
-  if (compiler(comp_level) == NULL || compilation_is_prohibited(method, osr_bci, comp_level)) {
+  if (compiler(comp_level) == NULL || !compiler(comp_level)->can_compile_method(method) || compilation_is_prohibited(method, osr_bci, comp_level)) {
     return NULL;
   }
 
@@ -1713,6 +1714,20 @@
   }
 }
 
+// wrapper for CodeCache::print_summary()
+static void codecache_print(bool detailed)
+{
+  ResourceMark rm;
+  stringStream s;
+  // Dump code cache  into a buffer before locking the tty,
+  {
+    MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
+    CodeCache::print_summary(&s, detailed);
+  }
+  ttyLocker ttyl;
+  tty->print_cr(s.as_string());
+}
+
 // ------------------------------------------------------------------
 // CompileBroker::invoke_compiler_on_method
 //
@@ -1840,6 +1855,9 @@
     tty->print_cr("size: %d time: %d inlined: %d bytes", code_size, (int)time.milliseconds(), task->num_inlined_bytecodes());
   }
 
+  if (PrintCodeCacheOnCompilation)
+    codecache_print(/* detailed= */ false);
+
   // Disable compilation, if required.
   switch (compilable) {
   case ciEnv::MethodCompilable_never:
@@ -1884,6 +1902,7 @@
   UseInterpreter = true;
   if (UseCompiler || AlwaysCompileLoopMethods ) {
     if (xtty != NULL) {
+      ResourceMark rm;
       stringStream s;
       // Dump code cache state into a buffer before locking the tty,
       // because log_state() will use locks causing lock conflicts.
@@ -1897,9 +1916,9 @@
     }
     warning("CodeCache is full. Compiler has been disabled.");
     warning("Try increasing the code cache size using -XX:ReservedCodeCacheSize=");
-    CodeCache::print_bounds(tty);
 #ifndef PRODUCT
     if (CompileTheWorld || ExitOnFullCodeCache) {
+      codecache_print(/* detailed= */ true);
       before_exit(JavaThread::current());
       exit_globals(); // will delete tty
       vm_direct_exit(CompileTheWorld ? 0 : 1);
@@ -1912,6 +1931,7 @@
       AlwaysCompileLoopMethods  = false;
     }
   }
+  codecache_print(/* detailed= */ true);
 }
 
 // ------------------------------------------------------------------
--- a/src/share/vm/compiler/compileBroker.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/compiler/compileBroker.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -43,7 +43,7 @@
   Monitor*     _lock;
   uint         _compile_id;
   Method*      _method;
-  jobject      _method_loader;
+  jobject      _method_holder;
   int          _osr_bci;
   bool         _is_complete;
   bool         _is_success;
@@ -56,7 +56,7 @@
   // Fields used for logging why the compilation was initiated:
   jlong        _time_queued;  // in units of os::elapsed_counter()
   Method*      _hot_method;   // which method actually triggered this task
-  jobject      _hot_method_loader;
+  jobject      _hot_method_holder;
   int          _hot_count;    // information about its invocation counter
   const char*  _comment;      // more info about the task
 
--- a/src/share/vm/compiler/compilerOracle.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/compiler/compilerOracle.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -67,7 +67,7 @@
 
   // utility method
   MethodMatcher* find(methodHandle method) {
-    Symbol* class_name  = Klass::cast(method->method_holder())->name();
+    Symbol* class_name  = method->method_holder()->name();
     Symbol* method_name = method->name();
     for (MethodMatcher* current = this; current != NULL; current = current->_next) {
       if (match(class_name, current->class_name(), current->_class_mode) &&
@@ -538,6 +538,7 @@
 
   if (match != NULL) {
     if (!_quiet) {
+      ResourceMark rm;
       tty->print("CompilerOracle: %s ", command_names[command]);
       match->print();
     }
@@ -624,7 +625,7 @@
   assert(has_command_file(), "command file must be specified");
   fileStream stream(fopen(cc_file(), "at"));
   stream.print("exclude ");
-  Klass::cast(method->method_holder())->name()->print_symbol_on(&stream);
+  method->method_holder()->name()->print_symbol_on(&stream);
   stream.print(".");
   method->name()->print_symbol_on(&stream);
   method->signature()->print_symbol_on(&stream);
--- a/src/share/vm/compiler/disassembler.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/compiler/disassembler.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -55,16 +55,18 @@
 bool        Disassembler::_tried_to_load_library = false;
 
 // This routine is in the shared library:
+Disassembler::decode_func_virtual Disassembler::_decode_instructions_virtual = NULL;
 Disassembler::decode_func Disassembler::_decode_instructions = NULL;
 
 static const char hsdis_library_name[] = "hsdis-"HOTSPOT_LIB_ARCH;
-static const char decode_instructions_name[] = "decode_instructions_virtual";
-
+static const char decode_instructions_virtual_name[] = "decode_instructions_virtual";
+static const char decode_instructions_name[] = "decode_instructions";
+static bool use_new_version = true;
 #define COMMENT_COLUMN  40 LP64_ONLY(+8) /*could be an option*/
 #define BYTES_COMMENT   ";..."  /* funky byte display comment */
 
 bool Disassembler::load_library() {
-  if (_decode_instructions != NULL) {
+  if (_decode_instructions_virtual != NULL || _decode_instructions != NULL) {
     // Already succeeded.
     return true;
   }
@@ -123,11 +125,19 @@
     _library = os::dll_load(buf, ebuf, sizeof ebuf);
   }
   if (_library != NULL) {
+    _decode_instructions_virtual = CAST_TO_FN_PTR(Disassembler::decode_func_virtual,
+                                          os::dll_lookup(_library, decode_instructions_virtual_name));
+  }
+  if (_decode_instructions_virtual == NULL) {
+    // could not spot in new version, try old version
     _decode_instructions = CAST_TO_FN_PTR(Disassembler::decode_func,
                                           os::dll_lookup(_library, decode_instructions_name));
+    use_new_version = false;
+  } else {
+    use_new_version = true;
   }
   _tried_to_load_library = true;
-  if (_decode_instructions == NULL) {
+  if (_decode_instructions_virtual == NULL && _decode_instructions == NULL) {
     tty->print_cr("Could not load %s; %s; %s", buf,
                   ((_library != NULL)
                    ? "entry point is missing"
@@ -343,7 +353,7 @@
       obj->print_value_on(st);
       if (st->count() == c) {
         // No output.  (Can happen in product builds.)
-        st->print("(a %s)", Klass::cast(obj->klass())->external_name());
+        st->print("(a %s)", obj->klass()->external_name());
       }
       return;
     }
@@ -450,17 +460,31 @@
     // This is mainly for debugging the library itself.
     FILE* out = stdout;
     FILE* xmlout = (_print_raw > 1 ? out : NULL);
-    return (address)
-      (*Disassembler::_decode_instructions)((uintptr_t)start, (uintptr_t)end,
-                                            start, end - start,
+    return use_new_version ?
+      (address)
+      (*Disassembler::_decode_instructions_virtual)((uintptr_t)start, (uintptr_t)end,
+                                                    start, end - start,
+                                                    NULL, (void*) xmlout,
+                                                    NULL, (void*) out,
+                                                    options(), 0/*nice new line*/)
+      :
+      (address)
+      (*Disassembler::_decode_instructions)(start, end,
                                             NULL, (void*) xmlout,
                                             NULL, (void*) out,
                                             options());
   }
 
-  return (address)
-    (*Disassembler::_decode_instructions)((uintptr_t)start, (uintptr_t)end,
-                                          start, end - start,
+  return use_new_version ?
+    (address)
+    (*Disassembler::_decode_instructions_virtual)((uintptr_t)start, (uintptr_t)end,
+                                                  start, end - start,
+                                                  &event_to_env,  (void*) this,
+                                                  &printf_to_env, (void*) this,
+                                                  options(), 0/*nice new line*/)
+    :
+    (address)
+    (*Disassembler::_decode_instructions)(start, end,
                                           &event_to_env,  (void*) this,
                                           &printf_to_env, (void*) this,
                                           options());
--- a/src/share/vm/compiler/disassembler.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/compiler/disassembler.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -49,18 +49,27 @@
   friend class decode_env;
  private:
   // this is the type of the dll entry point:
-  typedef void* (*decode_func)(uintptr_t start_va, uintptr_t end_va,
+  typedef void* (*decode_func_virtual)(uintptr_t start_va, uintptr_t end_va,
                                unsigned char* buffer, uintptr_t length,
                                void* (*event_callback)(void*, const char*, void*),
                                void* event_stream,
                                int (*printf_callback)(void*, const char*, ...),
                                void* printf_stream,
+                               const char* options,
+                               int newline);
+  // this is the type of the dll entry point for old version:
+  typedef void* (*decode_func)(void* start_va, void* end_va,
+                               void* (*event_callback)(void*, const char*, void*),
+                               void* event_stream,
+                               int (*printf_callback)(void*, const char*, ...),
+                               void* printf_stream,
                                const char* options);
   // points to the library.
   static void*    _library;
   // bailout
   static bool     _tried_to_load_library;
   // points to the decode function.
+  static decode_func_virtual _decode_instructions_virtual;
   static decode_func _decode_instructions;
   // tries to load library and return whether it succedded.
   static bool load_library();
@@ -85,7 +94,9 @@
 
  public:
   static bool can_decode() {
-    return (_decode_instructions != NULL) || load_library();
+    return (_decode_instructions_virtual != NULL) ||
+           (_decode_instructions != NULL) ||
+           load_library();
   }
   static void decode(CodeBlob *cb,               outputStream* st = NULL);
   static void decode(nmethod* nm,                outputStream* st = NULL);
--- a/src/share/vm/compiler/oopMap.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/compiler/oopMap.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -542,17 +542,17 @@
     st->print("Oop");
     break;
   case OopMapValue::value_value:
-    st->print("Value" );
+    st->print("Value");
     break;
   case OopMapValue::narrowoop_value:
-    tty->print("NarrowOop" );
+    st->print("NarrowOop");
     break;
   case OopMapValue::callee_saved_value:
-    st->print("Callers_" );
+    st->print("Callers_");
     optional->print_on(st);
     break;
   case OopMapValue::derived_oop_value:
-    st->print("Derived_oop_" );
+    st->print("Derived_oop_");
     optional->print_on(st);
     break;
   default:
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/adaptiveFreeList.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc_implementation/concurrentMarkSweep/adaptiveFreeList.hpp"
+#include "gc_implementation/concurrentMarkSweep/freeChunk.hpp"
+#include "memory/freeBlockDictionary.hpp"
+#include "memory/sharedHeap.hpp"
+#include "runtime/globals.hpp"
+#include "runtime/mutex.hpp"
+#include "runtime/vmThread.hpp"
+
+template <>
+void AdaptiveFreeList<FreeChunk>::print_on(outputStream* st, const char* c) const {
+  if (c != NULL) {
+    st->print("%16s", c);
+  } else {
+    st->print(SIZE_FORMAT_W(16), size());
+  }
+  st->print("\t"
+           SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t"
+           SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\n",
+           bfr_surp(),             surplus(),             desired(),             prev_sweep(),           before_sweep(),
+           count(),               coal_births(),          coal_deaths(),          split_births(),         split_deaths());
+}
+
+template <class Chunk>
+AdaptiveFreeList<Chunk>::AdaptiveFreeList() : FreeList<Chunk>(), _hint(0) {
+  init_statistics();
+}
+
+template <class Chunk>
+AdaptiveFreeList<Chunk>::AdaptiveFreeList(Chunk* fc) : FreeList<Chunk>(fc), _hint(0) {
+  init_statistics();
+#ifndef PRODUCT
+  _allocation_stats.set_returned_bytes(size() * HeapWordSize);
+#endif
+}
+
+template <class Chunk>
+void AdaptiveFreeList<Chunk>::initialize() {
+  FreeList<Chunk>::initialize();
+  set_hint(0);
+  init_statistics(true /* split_birth */);
+}
+
+template <class Chunk>
+void AdaptiveFreeList<Chunk>::reset(size_t hint) {
+  FreeList<Chunk>::reset();
+  set_hint(hint);
+}
+
+#ifndef PRODUCT
+template <class Chunk>
+void AdaptiveFreeList<Chunk>::assert_proper_lock_protection_work() const {
+  assert(protecting_lock() != NULL, "Don't call this directly");
+  assert(ParallelGCThreads > 0, "Don't call this directly");
+  Thread* thr = Thread::current();
+  if (thr->is_VM_thread() || thr->is_ConcurrentGC_thread()) {
+    // assert that we are holding the freelist lock
+  } else if (thr->is_GC_task_thread()) {
+    assert(protecting_lock()->owned_by_self(), "FreeList RACE DETECTED");
+  } else if (thr->is_Java_thread()) {
+    assert(!SafepointSynchronize::is_at_safepoint(), "Should not be executing");
+  } else {
+    ShouldNotReachHere();  // unaccounted thread type?
+  }
+}
+#endif
+template <class Chunk>
+void AdaptiveFreeList<Chunk>::init_statistics(bool split_birth) {
+  _allocation_stats.initialize(split_birth);
+}
+
+template <class Chunk>
+size_t AdaptiveFreeList<Chunk>::get_better_size() {
+
+  // A candidate chunk has been found.  If it is already under
+  // populated and there is a hinT, REturn the hint().  Else
+  // return the size of this chunk.
+  if (surplus() <= 0) {
+    if (hint() != 0) {
+      return hint();
+    } else {
+      return size();
+    }
+  } else {
+    // This list has a surplus so use it.
+    return size();
+  }
+}
+
+
+template <class Chunk>
+void AdaptiveFreeList<Chunk>::return_chunk_at_head(Chunk* chunk) {
+  assert_proper_lock_protection();
+  return_chunk_at_head(chunk, true);
+}
+
+template <class Chunk>
+void AdaptiveFreeList<Chunk>::return_chunk_at_head(Chunk* chunk, bool record_return) {
+  FreeList<Chunk>::return_chunk_at_head(chunk, record_return);
+#ifdef ASSERT
+  if (record_return) {
+    increment_returned_bytes_by(size()*HeapWordSize);
+  }
+#endif
+}
+
+template <class Chunk>
+void AdaptiveFreeList<Chunk>::return_chunk_at_tail(Chunk* chunk) {
+  return_chunk_at_tail(chunk, true);
+}
+
+template <class Chunk>
+void AdaptiveFreeList<Chunk>::return_chunk_at_tail(Chunk* chunk, bool record_return) {
+  FreeList<Chunk>::return_chunk_at_tail(chunk, record_return);
+#ifdef ASSERT
+  if (record_return) {
+    increment_returned_bytes_by(size()*HeapWordSize);
+  }
+#endif
+}
+
+#ifndef PRODUCT
+template <class Chunk>
+void AdaptiveFreeList<Chunk>::verify_stats() const {
+  // The +1 of the LH comparand is to allow some "looseness" in
+  // checking: we usually call this interface when adding a block
+  // and we'll subsequently update the stats; we cannot update the
+  // stats beforehand because in the case of the large-block BT
+  // dictionary for example, this might be the first block and
+  // in that case there would be no place that we could record
+  // the stats (which are kept in the block itself).
+  assert((_allocation_stats.prev_sweep() + _allocation_stats.split_births()
+          + _allocation_stats.coal_births() + 1)   // Total Production Stock + 1
+         >= (_allocation_stats.split_deaths() + _allocation_stats.coal_deaths()
+             + (ssize_t)count()),                // Total Current Stock + depletion
+         err_msg("FreeList " PTR_FORMAT " of size " SIZE_FORMAT
+                 " violates Conservation Principle: "
+                 "prev_sweep(" SIZE_FORMAT ")"
+                 " + split_births(" SIZE_FORMAT ")"
+                 " + coal_births(" SIZE_FORMAT ") + 1 >= "
+                 " split_deaths(" SIZE_FORMAT ")"
+                 " coal_deaths(" SIZE_FORMAT ")"
+                 " + count(" SSIZE_FORMAT ")",
+                 this, size(), _allocation_stats.prev_sweep(), _allocation_stats.split_births(),
+                 _allocation_stats.split_births(), _allocation_stats.split_deaths(),
+                 _allocation_stats.coal_deaths(), count()));
+}
+#endif
+
+// Needs to be after the definitions have been seen.
+template class AdaptiveFreeList<FreeChunk>;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/adaptiveFreeList.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,232 @@
+/*
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_MEMORY_ADAPTIVEFREELIST_HPP
+#define SHARE_VM_MEMORY_ADAPTIVEFREELIST_HPP
+
+#include "memory/freeList.hpp"
+#include "gc_implementation/shared/allocationStats.hpp"
+
+class CompactibleFreeListSpace;
+
+// A class for maintaining a free list of Chunk's.  The FreeList
+// maintains a the structure of the list (head, tail, etc.) plus
+// statistics for allocations from the list.  The links between items
+// are not part of FreeList.  The statistics are
+// used to make decisions about coalescing Chunk's when they
+// are swept during collection.
+//
+// See the corresponding .cpp file for a description of the specifics
+// for that implementation.
+
+class Mutex;
+
+template <class Chunk>
+class AdaptiveFreeList : public FreeList<Chunk> {
+  friend class CompactibleFreeListSpace;
+  friend class VMStructs;
+  // friend class PrintTreeCensusClosure<Chunk, FreeList_t>;
+
+  size_t        _hint;          // next larger size list with a positive surplus
+
+  AllocationStats _allocation_stats; // allocation-related statistics
+
+ public:
+
+  AdaptiveFreeList();
+  AdaptiveFreeList(Chunk* fc);
+
+  using FreeList<Chunk>::assert_proper_lock_protection;
+#ifdef ASSERT
+  using FreeList<Chunk>::protecting_lock;
+#endif
+  using FreeList<Chunk>::count;
+  using FreeList<Chunk>::size;
+  using FreeList<Chunk>::verify_chunk_in_free_list;
+  using FreeList<Chunk>::getFirstNChunksFromList;
+  using FreeList<Chunk>::print_on;
+  void return_chunk_at_head(Chunk* fc, bool record_return);
+  void return_chunk_at_head(Chunk* fc);
+  void return_chunk_at_tail(Chunk* fc, bool record_return);
+  void return_chunk_at_tail(Chunk* fc);
+  using FreeList<Chunk>::return_chunk_at_tail;
+  using FreeList<Chunk>::remove_chunk;
+  using FreeList<Chunk>::prepend;
+  using FreeList<Chunk>::print_labels_on;
+  using FreeList<Chunk>::get_chunk_at_head;
+
+  // Initialize.
+  void initialize();
+
+  // Reset the head, tail, hint, and count of a free list.
+  void reset(size_t hint);
+
+  void assert_proper_lock_protection_work() const PRODUCT_RETURN;
+
+  void print_on(outputStream* st, const char* c = NULL) const;
+
+  size_t hint() const {
+    return _hint;
+  }
+  void set_hint(size_t v) {
+    assert_proper_lock_protection();
+    assert(v == 0 || size() < v, "Bad hint");
+    _hint = v;
+  }
+
+  size_t get_better_size();
+
+  // Accessors for statistics
+  void init_statistics(bool split_birth = false);
+
+  AllocationStats* allocation_stats() {
+    assert_proper_lock_protection();
+    return &_allocation_stats;
+  }
+
+  ssize_t desired() const {
+    return _allocation_stats.desired();
+  }
+  void set_desired(ssize_t v) {
+    assert_proper_lock_protection();
+    _allocation_stats.set_desired(v);
+  }
+  void compute_desired(float inter_sweep_current,
+                       float inter_sweep_estimate,
+                       float intra_sweep_estimate) {
+    assert_proper_lock_protection();
+    _allocation_stats.compute_desired(count(),
+                                      inter_sweep_current,
+                                      inter_sweep_estimate,
+                                      intra_sweep_estimate);
+  }
+  ssize_t coal_desired() const {
+    return _allocation_stats.coal_desired();
+  }
+  void set_coal_desired(ssize_t v) {
+    assert_proper_lock_protection();
+    _allocation_stats.set_coal_desired(v);
+  }
+
+  ssize_t surplus() const {
+    return _allocation_stats.surplus();
+  }
+  void set_surplus(ssize_t v) {
+    assert_proper_lock_protection();
+    _allocation_stats.set_surplus(v);
+  }
+  void increment_surplus() {
+    assert_proper_lock_protection();
+    _allocation_stats.increment_surplus();
+  }
+  void decrement_surplus() {
+    assert_proper_lock_protection();
+    _allocation_stats.decrement_surplus();
+  }
+
+  ssize_t bfr_surp() const {
+    return _allocation_stats.bfr_surp();
+  }
+  void set_bfr_surp(ssize_t v) {
+    assert_proper_lock_protection();
+    _allocation_stats.set_bfr_surp(v);
+  }
+  ssize_t prev_sweep() const {
+    return _allocation_stats.prev_sweep();
+  }
+  void set_prev_sweep(ssize_t v) {
+    assert_proper_lock_protection();
+    _allocation_stats.set_prev_sweep(v);
+  }
+  ssize_t before_sweep() const {
+    return _allocation_stats.before_sweep();
+  }
+  void set_before_sweep(ssize_t v) {
+    assert_proper_lock_protection();
+    _allocation_stats.set_before_sweep(v);
+  }
+
+  ssize_t coal_births() const {
+    return _allocation_stats.coal_births();
+  }
+  void set_coal_births(ssize_t v) {
+    assert_proper_lock_protection();
+    _allocation_stats.set_coal_births(v);
+  }
+  void increment_coal_births() {
+    assert_proper_lock_protection();
+    _allocation_stats.increment_coal_births();
+  }
+
+  ssize_t coal_deaths() const {
+    return _allocation_stats.coal_deaths();
+  }
+  void set_coal_deaths(ssize_t v) {
+    assert_proper_lock_protection();
+    _allocation_stats.set_coal_deaths(v);
+  }
+  void increment_coal_deaths() {
+    assert_proper_lock_protection();
+    _allocation_stats.increment_coal_deaths();
+  }
+
+  ssize_t split_births() const {
+    return _allocation_stats.split_births();
+  }
+  void set_split_births(ssize_t v) {
+    assert_proper_lock_protection();
+    _allocation_stats.set_split_births(v);
+  }
+  void increment_split_births() {
+    assert_proper_lock_protection();
+    _allocation_stats.increment_split_births();
+  }
+
+  ssize_t split_deaths() const {
+    return _allocation_stats.split_deaths();
+  }
+  void set_split_deaths(ssize_t v) {
+    assert_proper_lock_protection();
+    _allocation_stats.set_split_deaths(v);
+  }
+  void increment_split_deaths() {
+    assert_proper_lock_protection();
+    _allocation_stats.increment_split_deaths();
+  }
+
+#ifndef PRODUCT
+  // For debugging.  The "_returned_bytes" in all the lists are summed
+  // and compared with the total number of bytes swept during a
+  // collection.
+  size_t returned_bytes() const { return _allocation_stats.returned_bytes(); }
+  void set_returned_bytes(size_t v) { _allocation_stats.set_returned_bytes(v); }
+  void increment_returned_bytes_by(size_t v) {
+    _allocation_stats.set_returned_bytes(_allocation_stats.returned_bytes() + v);
+  }
+  // Stats verification
+  void verify_stats() const;
+#endif  // NOT PRODUCT
+};
+
+#endif // SHARE_VM_MEMORY_ADAPTIVEFREELIST_HPP
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/cmsCollectorPolicy.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/cmsCollectorPolicy.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -40,19 +40,8 @@
 #include "runtime/globals_extension.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/java.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/vmThread.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 
 //
 // ConcurrentMarkSweepPolicy methods
@@ -67,7 +56,7 @@
   if (_generations == NULL)
     vm_exit_during_initialization("Unable to allocate gen spec");
 
-  if (ParNewGeneration::in_use()) {
+  if (UseParNewGC) {
     if (UseAdaptiveSizePolicy) {
       _generations[0] = new GenerationSpec(Generation::ASParNew,
                                            _initial_gen0_size, _max_gen0_size);
@@ -107,7 +96,7 @@
 
 void ConcurrentMarkSweepPolicy::initialize_gc_policy_counters() {
   // initialize the policy counters - 2 collectors, 3 generations
-  if (ParNewGeneration::in_use()) {
+  if (UseParNewGC) {
     _gc_policy_counters = new GCPolicyCounters("ParNew:CMS", 2, 3);
   }
   else {
@@ -130,7 +119,7 @@
 
   assert(size_policy() != NULL, "A size policy is required");
   // initialize the policy counters - 2 collectors, 3 generations
-  if (ParNewGeneration::in_use()) {
+  if (UseParNewGC) {
     _gc_policy_counters = new CMSGCAdaptivePolicyCounters("ParNew:CMS", 2, 3,
       size_policy());
   }
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -91,7 +91,7 @@
   _collector(NULL)
 {
   assert(sizeof(FreeChunk) / BytesPerWord <= MinChunkSize,
-    "FreeChunk is larger than expected");
+         "FreeChunk is larger than expected");
   _bt.set_space(this);
   initialize(mr, SpaceDecorator::Clear, SpaceDecorator::Mangle);
   // We have all of "mr", all of which we place in the dictionary
@@ -101,14 +101,14 @@
   // implementation, namely, the simple binary tree (splaying
   // temporarily disabled).
   switch (dictionaryChoice) {
+    case FreeBlockDictionary<FreeChunk>::dictionaryBinaryTree:
+      _dictionary = new BinaryTreeDictionary<FreeChunk, AdaptiveFreeList>(mr);
+      break;
     case FreeBlockDictionary<FreeChunk>::dictionarySplayTree:
     case FreeBlockDictionary<FreeChunk>::dictionarySkipList:
     default:
       warning("dictionaryChoice: selected option not understood; using"
               " default BinaryTreeDictionary implementation instead.");
-    case FreeBlockDictionary<FreeChunk>::dictionaryBinaryTree:
-      _dictionary = new BinaryTreeDictionary<FreeChunk>(mr, use_adaptive_freelists);
-      break;
   }
   assert(_dictionary != NULL, "CMS dictionary initialization");
   // The indexed free lists are initially all empty and are lazily
@@ -214,7 +214,6 @@
     assert(q->forwardee() == NULL, "should be forwarded to NULL");
   }
 
-  VALIDATE_MARK_SWEEP_ONLY(MarkSweep::register_live_oop(q, adjusted_size));
   compact_top += adjusted_size;
 
   // we need to update the offset table so that the beginnings of objects can be
@@ -453,7 +452,7 @@
   reportIndexedFreeListStatistics();
   gclog_or_tty->print_cr("Layout of Indexed Freelists");
   gclog_or_tty->print_cr("---------------------------");
-  FreeList<FreeChunk>::print_labels_on(st, "size");
+  AdaptiveFreeList<FreeChunk>::print_labels_on(st, "size");
   for (size_t i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
     _indexedFreeList[i].print_on(gclog_or_tty);
     for (FreeChunk* fc = _indexedFreeList[i].head(); fc != NULL;
@@ -555,7 +554,7 @@
     reportIndexedFreeListStatistics();
     size_t total_size = totalSizeInIndexedFreeLists() +
                        _dictionary->total_chunk_size(DEBUG_ONLY(freelistLock()));
-    gclog_or_tty->print(" free=%ld frag=%1.4f\n", total_size, flsFrag());
+    gclog_or_tty->print(" free=" SIZE_FORMAT " frag=%1.4f\n", total_size, flsFrag());
   }
 }
 
@@ -1319,7 +1318,7 @@
   size_t currSize = numWords + MinChunkSize;
   assert(currSize % MinObjAlignment == 0, "currSize should be aligned");
   for (i = currSize; i < IndexSetSize; i += IndexSetStride) {
-    FreeList<FreeChunk>* fl = &_indexedFreeList[i];
+    AdaptiveFreeList<FreeChunk>* fl = &_indexedFreeList[i];
     if (fl->head()) {
       ret = getFromListGreater(fl, numWords);
       assert(ret == NULL || ret->is_free(), "Should be returning a free chunk");
@@ -1702,7 +1701,9 @@
   _dictionary->return_chunk(chunk);
 #ifndef PRODUCT
   if (CMSCollector::abstract_state() != CMSCollector::Sweeping) {
-    TreeChunk<FreeChunk>::as_TreeChunk(chunk)->list()->verify_stats();
+    TreeChunk<FreeChunk, AdaptiveFreeList>* tc = TreeChunk<FreeChunk, AdaptiveFreeList>::as_TreeChunk(chunk);
+    TreeList<FreeChunk, AdaptiveFreeList>* tl = tc->list();
+    tl->verify_stats();
   }
 #endif // PRODUCT
 }
@@ -1745,7 +1746,7 @@
   {
     MutexLockerEx x(lock, Mutex::_no_safepoint_check_flag);
     ec = dictionary()->find_largest_dict();  // get largest block
-    if (ec != NULL && ec->end() == chunk) {
+    if (ec != NULL && ec->end() == (uintptr_t*) chunk) {
       // It's a coterminal block - we can coalesce.
       size_t old_size = ec->size();
       coalDeath(old_size);
@@ -1850,11 +1851,11 @@
      the excess is >= MIN_CHUNK. */
   size_t start = align_object_size(numWords + MinChunkSize);
   if (start < IndexSetSize) {
-    FreeList<FreeChunk>* it   = _indexedFreeList;
+    AdaptiveFreeList<FreeChunk>* it   = _indexedFreeList;
     size_t    hint = _indexedFreeList[start].hint();
     while (hint < IndexSetSize) {
       assert(hint % MinObjAlignment == 0, "hint should be aligned");
-      FreeList<FreeChunk> *fl = &_indexedFreeList[hint];
+      AdaptiveFreeList<FreeChunk> *fl = &_indexedFreeList[hint];
       if (fl->surplus() > 0 && fl->head() != NULL) {
         // Found a list with surplus, reset original hint
         // and split out a free chunk which is returned.
@@ -1873,7 +1874,7 @@
 }
 
 /* Requires fl->size >= numWords + MinChunkSize */
-FreeChunk* CompactibleFreeListSpace::getFromListGreater(FreeList<FreeChunk>* fl,
+FreeChunk* CompactibleFreeListSpace::getFromListGreater(AdaptiveFreeList<FreeChunk>* fl,
   size_t numWords) {
   FreeChunk *curr = fl->head();
   size_t oldNumWords = curr->size();
@@ -2155,7 +2156,7 @@
   assert_locked();
   size_t i;
   for (i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
-    FreeList<FreeChunk>* fl = &_indexedFreeList[i];
+    AdaptiveFreeList<FreeChunk>* fl    = &_indexedFreeList[i];
     if (PrintFLSStatistics > 1) {
       gclog_or_tty->print("size[%d] : ", i);
     }
@@ -2174,7 +2175,7 @@
   assert_locked();
   size_t i;
   for (i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
-    FreeList<FreeChunk> *fl = &_indexedFreeList[i];
+    AdaptiveFreeList<FreeChunk> *fl = &_indexedFreeList[i];
     fl->set_surplus(fl->count() -
                     (ssize_t)((double)fl->desired() * CMSSmallSplitSurplusPercent));
   }
@@ -2185,7 +2186,7 @@
   size_t i;
   size_t h = IndexSetSize;
   for (i = IndexSetSize - 1; i != 0; i -= IndexSetStride) {
-    FreeList<FreeChunk> *fl = &_indexedFreeList[i];
+    AdaptiveFreeList<FreeChunk> *fl = &_indexedFreeList[i];
     fl->set_hint(h);
     if (fl->surplus() > 0) {
       h = i;
@@ -2197,7 +2198,7 @@
   assert_locked();
   size_t i;
   for (i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
-    FreeList<FreeChunk> *fl = &_indexedFreeList[i];
+    AdaptiveFreeList<FreeChunk> *fl = &_indexedFreeList[i];
     fl->set_prev_sweep(fl->count());
     fl->set_coal_births(0);
     fl->set_coal_deaths(0);
@@ -2224,7 +2225,7 @@
 
 bool CompactibleFreeListSpace::coalOverPopulated(size_t size) {
   if (size < SmallForDictionary) {
-    FreeList<FreeChunk> *fl = &_indexedFreeList[size];
+    AdaptiveFreeList<FreeChunk> *fl = &_indexedFreeList[size];
     return (fl->coal_desired() < 0) ||
            ((int)fl->count() > fl->coal_desired());
   } else {
@@ -2234,14 +2235,14 @@
 
 void CompactibleFreeListSpace::smallCoalBirth(size_t size) {
   assert(size < SmallForDictionary, "Size too large for indexed list");
-  FreeList<FreeChunk> *fl = &_indexedFreeList[size];
+  AdaptiveFreeList<FreeChunk> *fl = &_indexedFreeList[size];
   fl->increment_coal_births();
   fl->increment_surplus();
 }
 
 void CompactibleFreeListSpace::smallCoalDeath(size_t size) {
   assert(size < SmallForDictionary, "Size too large for indexed list");
-  FreeList<FreeChunk> *fl = &_indexedFreeList[size];
+  AdaptiveFreeList<FreeChunk> *fl = &_indexedFreeList[size];
   fl->increment_coal_deaths();
   fl->decrement_surplus();
 }
@@ -2250,7 +2251,7 @@
   if (size  < SmallForDictionary) {
     smallCoalBirth(size);
   } else {
-    dictionary()->dict_census_udpate(size,
+    dictionary()->dict_census_update(size,
                                    false /* split */,
                                    true /* birth */);
   }
@@ -2260,7 +2261,7 @@
   if(size  < SmallForDictionary) {
     smallCoalDeath(size);
   } else {
-    dictionary()->dict_census_udpate(size,
+    dictionary()->dict_census_update(size,
                                    false /* split */,
                                    false /* birth */);
   }
@@ -2268,14 +2269,14 @@
 
 void CompactibleFreeListSpace::smallSplitBirth(size_t size) {
   assert(size < SmallForDictionary, "Size too large for indexed list");
-  FreeList<FreeChunk> *fl = &_indexedFreeList[size];
+  AdaptiveFreeList<FreeChunk> *fl = &_indexedFreeList[size];
   fl->increment_split_births();
   fl->increment_surplus();
 }
 
 void CompactibleFreeListSpace::smallSplitDeath(size_t size) {
   assert(size < SmallForDictionary, "Size too large for indexed list");
-  FreeList<FreeChunk> *fl = &_indexedFreeList[size];
+  AdaptiveFreeList<FreeChunk> *fl = &_indexedFreeList[size];
   fl->increment_split_deaths();
   fl->decrement_surplus();
 }
@@ -2284,7 +2285,7 @@
   if (size  < SmallForDictionary) {
     smallSplitBirth(size);
   } else {
-    dictionary()->dict_census_udpate(size,
+    dictionary()->dict_census_update(size,
                                    true /* split */,
                                    true /* birth */);
   }
@@ -2294,7 +2295,7 @@
   if (size  < SmallForDictionary) {
     smallSplitDeath(size);
   } else {
-    dictionary()->dict_census_udpate(size,
+    dictionary()->dict_census_update(size,
                                    true /* split */,
                                    false /* birth */);
   }
@@ -2517,10 +2518,10 @@
 
 #ifndef PRODUCT
 void CompactibleFreeListSpace::check_free_list_consistency() const {
-  assert(_dictionary->min_size() <= IndexSetSize,
+  assert((TreeChunk<FreeChunk, AdaptiveFreeList>::min_size() <= IndexSetSize),
     "Some sizes can't be allocated without recourse to"
     " linear allocation buffers");
-  assert(BinaryTreeDictionary<FreeChunk>::min_tree_chunk_size*HeapWordSize == sizeof(TreeChunk<FreeChunk>),
+  assert((TreeChunk<FreeChunk, AdaptiveFreeList>::min_size()*HeapWordSize == sizeof(TreeChunk<FreeChunk, AdaptiveFreeList>)),
     "else MIN_TREE_CHUNK_SIZE is wrong");
   assert(IndexSetStart != 0, "IndexSetStart not initialized");
   assert(IndexSetStride != 0, "IndexSetStride not initialized");
@@ -2529,15 +2530,15 @@
 
 void CompactibleFreeListSpace::printFLCensus(size_t sweep_count) const {
   assert_lock_strong(&_freelistLock);
-  FreeList<FreeChunk> total;
+  AdaptiveFreeList<FreeChunk> total;
   gclog_or_tty->print("end sweep# " SIZE_FORMAT "\n", sweep_count);
-  FreeList<FreeChunk>::print_labels_on(gclog_or_tty, "size");
+  AdaptiveFreeList<FreeChunk>::print_labels_on(gclog_or_tty, "size");
   size_t total_free = 0;
   for (size_t i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
-    const FreeList<FreeChunk> *fl = &_indexedFreeList[i];
+    const AdaptiveFreeList<FreeChunk> *fl = &_indexedFreeList[i];
     total_free += fl->count() * fl->size();
     if (i % (40*IndexSetStride) == 0) {
-      FreeList<FreeChunk>::print_labels_on(gclog_or_tty, "size");
+      AdaptiveFreeList<FreeChunk>::print_labels_on(gclog_or_tty, "size");
     }
     fl->print_on(gclog_or_tty);
     total.set_bfr_surp(    total.bfr_surp()     + fl->bfr_surp()    );
@@ -2620,7 +2621,7 @@
     res = _cfls->getChunkFromDictionaryExact(word_sz);
     if (res == NULL) return NULL;
   } else {
-    FreeList<FreeChunk>* fl = &_indexedFreeList[word_sz];
+    AdaptiveFreeList<FreeChunk>* fl = &_indexedFreeList[word_sz];
     if (fl->count() == 0) {
       // Attempt to refill this local free list.
       get_from_global_pool(word_sz, fl);
@@ -2640,7 +2641,7 @@
 
 // Get a chunk of blocks of the right size and update related
 // book-keeping stats
-void CFLS_LAB::get_from_global_pool(size_t word_sz, FreeList<FreeChunk>* fl) {
+void CFLS_LAB::get_from_global_pool(size_t word_sz, AdaptiveFreeList<FreeChunk>* fl) {
   // Get the #blocks we want to claim
   size_t n_blks = (size_t)_blocks_to_claim[word_sz].average();
   assert(n_blks > 0, "Error");
@@ -2722,7 +2723,7 @@
         if (num_retire > 0) {
           _cfls->_indexedFreeList[i].prepend(&_indexedFreeList[i]);
           // Reset this list.
-          _indexedFreeList[i] = FreeList<FreeChunk>();
+          _indexedFreeList[i] = AdaptiveFreeList<FreeChunk>();
           _indexedFreeList[i].set_size(i);
         }
       }
@@ -2736,7 +2737,7 @@
   }
 }
 
-void CompactibleFreeListSpace:: par_get_chunk_of_blocks(size_t word_sz, size_t n, FreeList<FreeChunk>* fl) {
+void CompactibleFreeListSpace:: par_get_chunk_of_blocks(size_t word_sz, size_t n, AdaptiveFreeList<FreeChunk>* fl) {
   assert(fl->count() == 0, "Precondition.");
   assert(word_sz < CompactibleFreeListSpace::IndexSetSize,
          "Precondition");
@@ -2752,12 +2753,12 @@
          (cur_sz < CompactibleFreeListSpace::IndexSetSize) &&
          (CMSSplitIndexedFreeListBlocks || k <= 1);
          k++, cur_sz = k * word_sz) {
-      FreeList<FreeChunk> fl_for_cur_sz;  // Empty.
+      AdaptiveFreeList<FreeChunk> fl_for_cur_sz;  // Empty.
       fl_for_cur_sz.set_size(cur_sz);
       {
         MutexLockerEx x(_indexedFreeListParLocks[cur_sz],
                         Mutex::_no_safepoint_check_flag);
-        FreeList<FreeChunk>* gfl = &_indexedFreeList[cur_sz];
+        AdaptiveFreeList<FreeChunk>* gfl = &_indexedFreeList[cur_sz];
         if (gfl->count() != 0) {
           // nn is the number of chunks of size cur_sz that
           // we'd need to split k-ways each, in order to create
@@ -2832,12 +2833,11 @@
     MutexLockerEx x(parDictionaryAllocLock(),
                     Mutex::_no_safepoint_check_flag);
     while (n > 0) {
-      fc = dictionary()->get_chunk(MAX2(n * word_sz,
-                                  _dictionary->min_size()),
+      fc = dictionary()->get_chunk(MAX2(n * word_sz, _dictionary->min_size()),
                                   FreeBlockDictionary<FreeChunk>::atLeast);
       if (fc != NULL) {
         _bt.allocated((HeapWord*)fc, fc->size(), true /* reducing */);  // update _unallocated_blk
-        dictionary()->dict_census_udpate(fc->size(),
+        dictionary()->dict_census_update(fc->size(),
                                        true /*split*/,
                                        false /*birth*/);
         break;
@@ -2890,7 +2890,7 @@
       fc->set_size(prefix_size);
       if (rem >= IndexSetSize) {
         returnChunkToDictionary(rem_fc);
-        dictionary()->dict_census_udpate(rem, true /*split*/, true /*birth*/);
+        dictionary()->dict_census_update(rem, true /*split*/, true /*birth*/);
         rem_fc = NULL;
       }
       // Otherwise, return it to the small list below.
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -25,6 +25,7 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_COMPACTIBLEFREELISTSPACE_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_COMPACTIBLEFREELISTSPACE_HPP
 
+#include "gc_implementation/concurrentMarkSweep/adaptiveFreeList.hpp"
 #include "gc_implementation/concurrentMarkSweep/promotionInfo.hpp"
 #include "memory/binaryTreeDictionary.hpp"
 #include "memory/blockOffsetTable.inline.hpp"
@@ -38,6 +39,7 @@
 class CompactibleFreeListSpace;
 class BlkClosure;
 class BlkClosureCareful;
+class FreeChunk;
 class UpwardsObjectClosure;
 class ObjectClosureCareful;
 class Klass;
@@ -131,7 +133,7 @@
   FreeBlockDictionary<FreeChunk>::DictionaryChoice _dictionaryChoice;
   FreeBlockDictionary<FreeChunk>* _dictionary;    // ptr to dictionary for large size blocks
 
-  FreeList<FreeChunk> _indexedFreeList[IndexSetSize];
+  AdaptiveFreeList<FreeChunk> _indexedFreeList[IndexSetSize];
                                        // indexed array for small size blocks
   // allocation stategy
   bool       _fitStrategy;      // Use best fit strategy.
@@ -168,7 +170,7 @@
   // If the count of "fl" is negative, it's absolute value indicates a
   // number of free chunks that had been previously "borrowed" from global
   // list of size "word_sz", and must now be decremented.
-  void par_get_chunk_of_blocks(size_t word_sz, size_t n, FreeList<FreeChunk>* fl);
+  void par_get_chunk_of_blocks(size_t word_sz, size_t n, AdaptiveFreeList<FreeChunk>* fl);
 
   // Allocation helper functions
   // Allocate using a strategy that takes from the indexed free lists
@@ -214,7 +216,7 @@
   // and return it.  The split off remainder is returned to
   // the free lists.  The old name for getFromListGreater
   // was lookInListGreater.
-  FreeChunk* getFromListGreater(FreeList<FreeChunk>* fl, size_t numWords);
+  FreeChunk* getFromListGreater(AdaptiveFreeList<FreeChunk>* fl, size_t numWords);
   // Get a chunk in the indexed free list or dictionary,
   // by considering a larger chunk and splitting it.
   FreeChunk* getChunkFromGreater(size_t numWords);
@@ -621,7 +623,7 @@
   CompactibleFreeListSpace* _cfls;
 
   // Our local free lists.
-  FreeList<FreeChunk> _indexedFreeList[CompactibleFreeListSpace::IndexSetSize];
+  AdaptiveFreeList<FreeChunk> _indexedFreeList[CompactibleFreeListSpace::IndexSetSize];
 
   // Initialized from a command-line arg.
 
@@ -634,7 +636,7 @@
   size_t        _num_blocks        [CompactibleFreeListSpace::IndexSetSize];
 
   // Internal work method
-  void get_from_global_pool(size_t word_sz, FreeList<FreeChunk>* fl);
+  void get_from_global_pool(size_t word_sz, AdaptiveFreeList<FreeChunk>* fl);
 
 public:
   CFLS_LAB(CompactibleFreeListSpace* cfls);
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -827,10 +827,10 @@
   GenCollectedHeap* gch = GenCollectedHeap::heap();
   if (PrintGCDetails) {
     if (Verbose) {
-      gclog_or_tty->print(" [%d %s-%s: "SIZE_FORMAT"("SIZE_FORMAT")]",
+      gclog_or_tty->print("[%d %s-%s: "SIZE_FORMAT"("SIZE_FORMAT")]",
         level(), short_name(), s, used(), capacity());
     } else {
-      gclog_or_tty->print(" [%d %s-%s: "SIZE_FORMAT"K("SIZE_FORMAT"K)]",
+      gclog_or_tty->print("[%d %s-%s: "SIZE_FORMAT"K("SIZE_FORMAT"K)]",
         level(), short_name(), s, used() / K, capacity() / K);
     }
   }
@@ -3338,7 +3338,7 @@
     if (Verbose && PrintGC) {
       size_t new_mem_size = _virtual_space.committed_size();
       size_t old_mem_size = new_mem_size - bytes;
-      gclog_or_tty->print_cr("Expanding %s from %ldK by %ldK to %ldK",
+      gclog_or_tty->print_cr("Expanding %s from " SIZE_FORMAT "K by " SIZE_FORMAT "K to " SIZE_FORMAT "K",
                     name(), old_mem_size/K, bytes/K, new_mem_size/K);
     }
   }
@@ -9143,7 +9143,7 @@
     size_t shrinkable_size_in_bytes = chunk_at_end->size();
     size_t aligned_shrinkable_size_in_bytes =
       align_size_down(shrinkable_size_in_bytes, os::vm_page_size());
-    assert(unallocated_start <= chunk_at_end->end(),
+    assert(unallocated_start <= (HeapWord*) chunk_at_end->end(),
       "Inconsistent chunk at end of space");
     size_t bytes = MIN2(desired_bytes, aligned_shrinkable_size_in_bytes);
     size_t word_size_before = heap_word_size(_virtual_space.committed_size());
@@ -9203,14 +9203,14 @@
       if (Verbose && PrintGCDetails) {
         size_t new_mem_size = _virtual_space.committed_size();
         size_t old_mem_size = new_mem_size + bytes;
-        gclog_or_tty->print_cr("Shrinking %s from %ldK by %ldK to %ldK",
+        gclog_or_tty->print_cr("Shrinking %s from " SIZE_FORMAT "K by " SIZE_FORMAT "K to " SIZE_FORMAT "K",
                       name(), old_mem_size/K, bytes/K, new_mem_size/K);
       }
     }
 
     assert(_cmsSpace->unallocated_block() <= _cmsSpace->end(),
       "Inconsistency at end of space");
-    assert(chunk_at_end->end() == _cmsSpace->end(),
+    assert(chunk_at_end->end() == (uintptr_t*) _cmsSpace->end(),
       "Shrinking is inconsistent");
     return;
   }
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepThread.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepThread.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -27,18 +27,7 @@
 
 #include "gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp"
 #include "gc_implementation/shared/concurrentGCThread.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
+#include "runtime/thread.inline.hpp"
 
 class ConcurrentMarkSweepGeneration;
 class CMSCollector;
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -133,7 +133,7 @@
   }
 
   // Return the address past the end of this chunk
-  HeapWord* end() const { return ((HeapWord*) this) + size(); }
+  uintptr_t* end() const { return ((uintptr_t*) this) + size(); }
 
   // debugging
   void verify()             const PRODUCT_RETURN;
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/vmStructs_cms.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/vmStructs_cms.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -25,6 +25,8 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_VMSTRUCTS_CMS_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_VMSTRUCTS_CMS_HPP
 
+typedef BinaryTreeDictionary<FreeChunk, AdaptiveFreeList> AFLBinaryTreeDictionary;
+
 #define VM_STRUCTS_CMS(nonstatic_field, \
                    volatile_nonstatic_field, \
                    static_field) \
@@ -38,14 +40,8 @@
   nonstatic_field(CMSCollector,                _markBitMap,                                   CMSBitMap)                             \
   nonstatic_field(ConcurrentMarkSweepGeneration, _cmsSpace,                                   CompactibleFreeListSpace*)             \
      static_field(ConcurrentMarkSweepThread,   _collector,                                    CMSCollector*)                         \
-  volatile_nonstatic_field(FreeChunk,          _size,                                         size_t)                                \
-  nonstatic_field(FreeChunk,                   _next,                                         FreeChunk*)                            \
-  nonstatic_field(FreeChunk,                   _prev,                                         FreeChunk*)                            \
   nonstatic_field(LinearAllocBlock,            _word_size,                                    size_t)                                \
-  nonstatic_field(FreeList<FreeChunk>,         _size,                                         size_t)                                \
-  nonstatic_field(FreeList<FreeChunk>,         _count,                                        ssize_t)                               \
-  nonstatic_field(BinaryTreeDictionary<FreeChunk>,_total_size,                                 size_t)                                \
-  nonstatic_field(CompactibleFreeListSpace,    _dictionary,                                   FreeBlockDictionary<FreeChunk>*)       \
+  nonstatic_field(AFLBinaryTreeDictionary,     _total_size,                                   size_t)                                \
   nonstatic_field(CompactibleFreeListSpace,    _indexedFreeList[0],                           FreeList<FreeChunk>)                   \
   nonstatic_field(CompactibleFreeListSpace,    _smallLinearAllocBlock,                        LinearAllocBlock)
 
@@ -60,19 +56,16 @@
   declare_toplevel_type(CMSCollector)                                     \
   declare_toplevel_type(CMSBitMap)                                        \
   declare_toplevel_type(FreeChunk)                                        \
+  declare_toplevel_type(Metablock)                                        \
   declare_toplevel_type(ConcurrentMarkSweepThread*)                       \
   declare_toplevel_type(ConcurrentMarkSweepGeneration*)                   \
   declare_toplevel_type(SurrogateLockerThread*)                           \
   declare_toplevel_type(CompactibleFreeListSpace*)                        \
   declare_toplevel_type(CMSCollector*)                                    \
-  declare_toplevel_type(FreeChunk*)                                       \
-  declare_toplevel_type(BinaryTreeDictionary<FreeChunk>*)                 \
-  declare_toplevel_type(FreeBlockDictionary<FreeChunk>*)                  \
-  declare_toplevel_type(FreeList<FreeChunk>*)                             \
-  declare_toplevel_type(FreeList<FreeChunk>)                              \
+  declare_toplevel_type(AFLBinaryTreeDictionary*)                         \
   declare_toplevel_type(LinearAllocBlock)                                 \
   declare_toplevel_type(FreeBlockDictionary<FreeChunk>)                   \
-            declare_type(BinaryTreeDictionary<FreeChunk>, FreeBlockDictionary<FreeChunk>)
+           declare_type(AFLBinaryTreeDictionary, FreeBlockDictionary<FreeChunk>)
 
 #define VM_INT_CONSTANTS_CMS(declare_constant)                            \
   declare_constant(Generation::ConcurrentMarkSweep)                       \
--- a/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -85,7 +85,7 @@
     _curr_index(0), _length(0), _first_par_unreserved_idx(0),
     _region_live_threshold_bytes(0), _remaining_reclaimable_bytes(0) {
   _region_live_threshold_bytes =
-    HeapRegion::GrainBytes * (size_t) G1OldCSetRegionLiveThresholdPercent / 100;
+    HeapRegion::GrainBytes * (size_t) G1MixedGCLiveThresholdPercent / 100;
 }
 
 #ifndef PRODUCT
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -46,27 +46,11 @@
 
 // Concurrent marking bit map wrapper
 
-CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) :
-  _bm((uintptr_t*)NULL,0),
+CMBitMapRO::CMBitMapRO(int shifter) :
+  _bm(),
   _shifter(shifter) {
-  _bmStartWord = (HeapWord*)(rs.base());
-  _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
-  ReservedSpace brs(ReservedSpace::allocation_align_size_up(
-                     (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
-
-  MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
-
-  guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map");
-  // For now we'll just commit all of the bit map up fromt.
-  // Later on we'll try to be more parsimonious with swap.
-  guarantee(_virtual_space.initialize(brs, brs.size()),
-            "couldn't reseve backing store for concurrent marking bit map");
-  assert(_virtual_space.committed_size() == brs.size(),
-         "didn't reserve backing store for all of concurrent marking bit map?");
-  _bm.set_map((uintptr_t*)_virtual_space.low());
-  assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
-         _bmWordSize, "inconsistency in bit map sizing");
-  _bm.set_size(_bmWordSize >> _shifter);
+  _bmStartWord = 0;
+  _bmWordSize = 0;
 }
 
 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
@@ -108,15 +92,40 @@
 }
 
 #ifndef PRODUCT
-bool CMBitMapRO::covers(ReservedSpace rs) const {
+bool CMBitMapRO::covers(ReservedSpace heap_rs) const {
   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
          "size inconsistency");
-  return _bmStartWord == (HeapWord*)(rs.base()) &&
-         _bmWordSize  == rs.size()>>LogHeapWordSize;
+  return _bmStartWord == (HeapWord*)(heap_rs.base()) &&
+         _bmWordSize  == heap_rs.size()>>LogHeapWordSize;
 }
 #endif
 
+bool CMBitMap::allocate(ReservedSpace heap_rs) {
+  _bmStartWord = (HeapWord*)(heap_rs.base());
+  _bmWordSize  = heap_rs.size()/HeapWordSize;    // heap_rs.size() is in bytes
+  ReservedSpace brs(ReservedSpace::allocation_align_size_up(
+                     (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
+  if (!brs.is_reserved()) {
+    warning("ConcurrentMark marking bit map allocation failure");
+    return false;
+  }
+  MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
+  // For now we'll just commit all of the bit map up front.
+  // Later on we'll try to be more parsimonious with swap.
+  if (!_virtual_space.initialize(brs, brs.size())) {
+    warning("ConcurrentMark marking bit map backing store failure");
+    return false;
+  }
+  assert(_virtual_space.committed_size() == brs.size(),
+         "didn't reserve backing store for all of concurrent marking bit map?");
+  _bm.set_map((uintptr_t*)_virtual_space.low());
+  assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
+         _bmWordSize, "inconsistency in bit map sizing");
+  _bm.set_size(_bmWordSize >> _shifter);
+  return true;
+}
+
 void CMBitMap::clearAll() {
   _bm.clear();
   return;
@@ -163,20 +172,80 @@
 #endif
 {}
 
-void CMMarkStack::allocate(size_t size) {
-  _base = NEW_C_HEAP_ARRAY(oop, size, mtGC);
-  if (_base == NULL) {
-    vm_exit_during_initialization("Failed to allocate CM region mark stack");
+bool CMMarkStack::allocate(size_t capacity) {
+  // allocate a stack of the requisite depth
+  ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
+  if (!rs.is_reserved()) {
+    warning("ConcurrentMark MarkStack allocation failure");
+    return false;
+  }
+  MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
+  if (!_virtual_space.initialize(rs, rs.size())) {
+    warning("ConcurrentMark MarkStack backing store failure");
+    // Release the virtual memory reserved for the marking stack
+    rs.release();
+    return false;
   }
-  _index = 0;
-  _capacity = (jint) size;
+  assert(_virtual_space.committed_size() == rs.size(),
+         "Didn't reserve backing store for all of ConcurrentMark stack?");
+  _base = (oop*) _virtual_space.low();
+  setEmpty();
+  _capacity = (jint) capacity;
   _saved_index = -1;
+  _should_expand = false;
   NOT_PRODUCT(_max_depth = 0);
+  return true;
+}
+
+void CMMarkStack::expand() {
+  // Called, during remark, if we've overflown the marking stack during marking.
+  assert(isEmpty(), "stack should been emptied while handling overflow");
+  assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
+  // Clear expansion flag
+  _should_expand = false;
+  if (_capacity == (jint) MarkStackSizeMax) {
+    if (PrintGCDetails && Verbose) {
+      gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
+    }
+    return;
+  }
+  // Double capacity if possible
+  jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
+  // Do not give up existing stack until we have managed to
+  // get the double capacity that we desired.
+  ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
+                                                           sizeof(oop)));
+  if (rs.is_reserved()) {
+    // Release the backing store associated with old stack
+    _virtual_space.release();
+    // Reinitialize virtual space for new stack
+    if (!_virtual_space.initialize(rs, rs.size())) {
+      fatal("Not enough swap for expanded marking stack capacity");
+    }
+    _base = (oop*)(_virtual_space.low());
+    _index = 0;
+    _capacity = new_capacity;
+  } else {
+    if (PrintGCDetails && Verbose) {
+      // Failed to double capacity, continue;
+      gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
+                          SIZE_FORMAT"K to " SIZE_FORMAT"K",
+                          _capacity / K, new_capacity / K);
+    }
+  }
+}
+
+void CMMarkStack::set_should_expand() {
+  // If we're resetting the marking state because of an
+  // marking stack overflow, record that we should, if
+  // possible, expand the stack.
+  _should_expand = _cm->has_overflown();
 }
 
 CMMarkStack::~CMMarkStack() {
   if (_base != NULL) {
-    FREE_C_HEAP_ARRAY(oop, _base, mtGC);
+    _base = NULL;
+    _virtual_space.release();
   }
 }
 
@@ -217,7 +286,7 @@
     jint res = Atomic::cmpxchg(next_index, &_index, index);
     if (res == index) {
       for (int i = 0; i < n; i++) {
-        int ind = index + i;
+        int  ind = index + i;
         assert(ind < _capacity, "By overflow test above.");
         _base[ind] = ptr_arr[i];
       }
@@ -228,7 +297,6 @@
   }
 }
 
-
 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   jint start = _index;
@@ -244,9 +312,9 @@
     assert(ind < _capacity, "By overflow test above.");
     _base[ind] = ptr_arr[i];
   }
+  NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 }
 
-
 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   jint index = _index;
@@ -255,7 +323,7 @@
     return false;
   } else {
     int k = MIN2(max, index);
-    jint new_ind = index - k;
+    jint  new_ind = index - k;
     for (int j = 0; j < k; j++) {
       ptr_arr[j] = _base[new_ind + j];
     }
@@ -404,9 +472,10 @@
   return MAX2((n_par_threads + 2) / 4, 1U);
 }
 
-ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) :
-  _markBitMap1(rs, MinObjAlignment - 1),
-  _markBitMap2(rs, MinObjAlignment - 1),
+ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) :
+  _g1h(g1h),
+  _markBitMap1(MinObjAlignment - 1),
+  _markBitMap2(MinObjAlignment - 1),
 
   _parallel_marking_threads(0),
   _max_parallel_marking_threads(0),
@@ -415,10 +484,10 @@
   _cleanup_sleep_factor(0.0),
   _cleanup_task_overhead(1.0),
   _cleanup_list("Cleanup List"),
-  _region_bm((BitMap::idx_t) max_regions, false /* in_resource_area*/),
-  _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
-           CardTableModRefBS::card_shift,
-           false /* in_resource_area*/),
+  _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
+  _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >>
+            CardTableModRefBS::card_shift,
+            false /* in_resource_area*/),
 
   _prevMarkBitMap(&_markBitMap1),
   _nextMarkBitMap(&_markBitMap2),
@@ -449,7 +518,8 @@
   _parallel_workers(NULL),
 
   _count_card_bitmaps(NULL),
-  _count_marked_bytes(NULL) {
+  _count_marked_bytes(NULL),
+  _completed_initialization(false) {
   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
   if (verbose_level < no_verbose) {
     verbose_level = no_verbose;
@@ -464,61 +534,34 @@
                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
   }
 
-  _markStack.allocate(MarkStackSize);
+  if (!_markBitMap1.allocate(heap_rs)) {
+    warning("Failed to allocate first CM bit map");
+    return;
+  }
+  if (!_markBitMap2.allocate(heap_rs)) {
+    warning("Failed to allocate second CM bit map");
+    return;
+  }
 
   // Create & start a ConcurrentMark thread.
   _cmThread = new ConcurrentMarkThread(this);
   assert(cmThread() != NULL, "CM Thread should have been created");
   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 
-  _g1h = G1CollectedHeap::heap();
   assert(CGC_lock != NULL, "Where's the CGC_lock?");
-  assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
-  assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
+  assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency");
+  assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency");
 
   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
   satb_qs.set_buffer_size(G1SATBBufferSize);
 
   _root_regions.init(_g1h, this);
 
-  _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
-  _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
-
-  _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
-  _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
-
-  BitMap::idx_t card_bm_size = _card_bm.size();
-
-  // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
-  _active_tasks = _max_worker_id;
-  for (uint i = 0; i < _max_worker_id; ++i) {
-    CMTaskQueue* task_queue = new CMTaskQueue();
-    task_queue->initialize();
-    _task_queues->register_queue(i, task_queue);
-
-    _count_card_bitmaps[i] = BitMap(card_bm_size, false);
-    _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, (size_t) max_regions, mtGC);
-
-    _tasks[i] = new CMTask(i, this,
-                           _count_marked_bytes[i],
-                           &_count_card_bitmaps[i],
-                           task_queue, _task_queues);
-
-    _accum_task_vtime[i] = 0.0;
-  }
-
-  // Calculate the card number for the bottom of the heap. Used
-  // in biasing indexes into the accounting card bitmaps.
-  _heap_bottom_card_num =
-    intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
-                                CardTableModRefBS::card_shift);
-
-  // Clear all the liveness counting data
-  clear_all_count_data();
-
   if (ConcGCThreads > ParallelGCThreads) {
-    vm_exit_during_initialization("Can't have more ConcGCThreads "
-                                  "than ParallelGCThreads.");
+    warning("Can't have more ConcGCThreads (" UINT32_FORMAT ") "
+            "than ParallelGCThreads (" UINT32_FORMAT ").",
+            ConcGCThreads, ParallelGCThreads);
+    return;
   }
   if (ParallelGCThreads == 0) {
     // if we are not running with any parallel GC threads we will not
@@ -590,9 +633,86 @@
     }
   }
 
+  if (FLAG_IS_DEFAULT(MarkStackSize)) {
+    uintx mark_stack_size =
+      MIN2(MarkStackSizeMax,
+          MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
+    // Verify that the calculated value for MarkStackSize is in range.
+    // It would be nice to use the private utility routine from Arguments.
+    if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
+      warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
+              "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
+              mark_stack_size, 1, MarkStackSizeMax);
+      return;
+    }
+    FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
+  } else {
+    // Verify MarkStackSize is in range.
+    if (FLAG_IS_CMDLINE(MarkStackSize)) {
+      if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
+        if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
+          warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
+                  "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
+                  MarkStackSize, 1, MarkStackSizeMax);
+          return;
+        }
+      } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
+        if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
+          warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
+                  " or for MarkStackSizeMax (" UINTX_FORMAT ")",
+                  MarkStackSize, MarkStackSizeMax);
+          return;
+        }
+      }
+    }
+  }
+
+  if (!_markStack.allocate(MarkStackSize)) {
+    warning("Failed to allocate CM marking stack");
+    return;
+  }
+
+  _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
+  _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
+
+  _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
+  _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
+
+  BitMap::idx_t card_bm_size = _card_bm.size();
+
+  // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
+  _active_tasks = _max_worker_id;
+
+  size_t max_regions = (size_t) _g1h->max_regions();
+  for (uint i = 0; i < _max_worker_id; ++i) {
+    CMTaskQueue* task_queue = new CMTaskQueue();
+    task_queue->initialize();
+    _task_queues->register_queue(i, task_queue);
+
+    _count_card_bitmaps[i] = BitMap(card_bm_size, false);
+    _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
+
+    _tasks[i] = new CMTask(i, this,
+                           _count_marked_bytes[i],
+                           &_count_card_bitmaps[i],
+                           task_queue, _task_queues);
+
+    _accum_task_vtime[i] = 0.0;
+  }
+
+  // Calculate the card number for the bottom of the heap. Used
+  // in biasing indexes into the accounting card bitmaps.
+  _heap_bottom_card_num =
+    intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
+                                CardTableModRefBS::card_shift);
+
+  // Clear all the liveness counting data
+  clear_all_count_data();
+
   // so that the call below can read a sensible value
-  _heap_start = (HeapWord*) rs.base();
+  _heap_start = (HeapWord*) heap_rs.base();
   set_non_marking_state();
+  _completed_initialization = true;
 }
 
 void ConcurrentMark::update_g1_committed(bool force) {
@@ -628,8 +748,8 @@
   assert(_heap_end != NULL, "heap bounds should look ok");
   assert(_heap_start < _heap_end, "heap bounds should look ok");
 
-  // reset all the marking data structures and any necessary flags
-  clear_marking_state();
+  // Reset all the marking data structures and any necessary flags
+  reset_marking_state();
 
   if (verbose_low()) {
     gclog_or_tty->print_cr("[global] resetting");
@@ -647,6 +767,23 @@
   set_concurrent_marking_in_progress();
 }
 
+
+void ConcurrentMark::reset_marking_state(bool clear_overflow) {
+  _markStack.set_should_expand();
+  _markStack.setEmpty();        // Also clears the _markStack overflow flag
+  if (clear_overflow) {
+    clear_has_overflown();
+  } else {
+    assert(has_overflown(), "pre-condition");
+  }
+  _finger = _heap_start;
+
+  for (uint i = 0; i < _max_worker_id; ++i) {
+    CMTaskQueue* queue = _task_queues->queue(i);
+    queue->set_empty();
+  }
+}
+
 void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) {
   assert(active_tasks <= _max_worker_id, "we should not have more");
 
@@ -677,7 +814,7 @@
 void ConcurrentMark::set_non_marking_state() {
   // We set the global marking state to some default values when we're
   // not doing marking.
-  clear_marking_state();
+  reset_marking_state();
   _active_tasks = 0;
   clear_concurrent_marking_in_progress();
 }
@@ -844,7 +981,7 @@
     // not clear the overflow flag since we rely on it being true when
     // we exit this method to abort the pause and restart concurent
     // marking.
-    clear_marking_state(concurrent() /* clear_overflow */);
+    reset_marking_state(concurrent() /* clear_overflow */);
     force_overflow()->update();
 
     if (G1Log::fine()) {
@@ -1138,8 +1275,9 @@
   if (has_overflown()) {
     // Oops.  We overflowed.  Restart concurrent marking.
     _restart_for_overflow = true;
-    // Clear the flag. We do not need it any more.
-    clear_has_overflown();
+    // Clear the marking state because we will be restarting
+    // marking due to overflowing the global mark stack.
+    reset_marking_state();
     if (G1TraceMarkStackOverflow) {
       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
     }
@@ -1163,11 +1301,13 @@
                        /* option */ VerifyOption_G1UseNextMarking);
     }
     assert(!restart_for_overflow(), "sanity");
+    // Completely reset the marking state since marking completed
+    set_non_marking_state();
   }
 
-  // Reset the marking state if marking completed
-  if (!restart_for_overflow()) {
-    set_non_marking_state();
+  // Expand the marking stack, if we have to and if we can.
+  if (_markStack.should_expand()) {
+    _markStack.expand();
   }
 
 #if VERIFY_OBJS_PROCESSED
@@ -2785,7 +2925,7 @@
     // Verify entries on the task queues
     for (uint i = 0; i < _max_worker_id; i += 1) {
       cl.set_phase(VerifyNoCSetOopsQueues, i);
-      OopTaskQueue* queue = _task_queues->queue(i);
+      CMTaskQueue* queue = _task_queues->queue(i);
       queue->oops_do(&cl);
     }
   }
@@ -2839,22 +2979,6 @@
 }
 #endif // PRODUCT
 
-void ConcurrentMark::clear_marking_state(bool clear_overflow) {
-  _markStack.setEmpty();
-  _markStack.clear_overflow();
-  if (clear_overflow) {
-    clear_has_overflown();
-  } else {
-    assert(has_overflown(), "pre-condition");
-  }
-  _finger = _heap_start;
-
-  for (uint i = 0; i < _max_worker_id; ++i) {
-    OopTaskQueue* queue = _task_queues->queue(i);
-    queue->set_empty();
-  }
-}
-
 // Aggregate the counting data that was constructed concurrently
 // with marking.
 class AggregateCountDataHRClosure: public HeapRegionClosure {
@@ -3061,7 +3185,7 @@
   // Clear the liveness counting data
   clear_all_count_data();
   // Empty mark stack
-  clear_marking_state();
+  reset_marking_state();
   for (uint i = 0; i < _max_worker_id; ++i) {
     _tasks[i]->clear_region_fields();
   }
--- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -63,7 +63,7 @@
 
  public:
   // constructor
-  CMBitMapRO(ReservedSpace rs, int shifter);
+  CMBitMapRO(int shifter);
 
   enum { do_yield = true };
 
@@ -117,8 +117,11 @@
 
  public:
   // constructor
-  CMBitMap(ReservedSpace rs, int shifter) :
-    CMBitMapRO(rs, shifter) {}
+  CMBitMap(int shifter) :
+    CMBitMapRO(shifter) {}
+
+  // Allocates the back store for the marking bitmap
+  bool allocate(ReservedSpace heap_rs);
 
   // write marks
   void mark(HeapWord* addr) {
@@ -155,17 +158,18 @@
   MemRegion getAndClearMarkedRegion(HeapWord* addr, HeapWord* end_addr);
 };
 
-// Represents a marking stack used by the CM collector.
-// Ideally this should be GrowableArray<> just like MSC's marking stack(s).
+// Represents a marking stack used by ConcurrentMarking in the G1 collector.
 class CMMarkStack VALUE_OBJ_CLASS_SPEC {
+  VirtualSpace _virtual_space;   // Underlying backing store for actual stack
   ConcurrentMark* _cm;
   oop*   _base;        // bottom of stack
-  jint   _index;       // one more than last occupied index
-  jint   _capacity;    // max #elements
-  jint   _saved_index; // value of _index saved at start of GC
-  NOT_PRODUCT(jint _max_depth;)  // max depth plumbed during run
+  jint _index;       // one more than last occupied index
+  jint _capacity;    // max #elements
+  jint _saved_index; // value of _index saved at start of GC
+  NOT_PRODUCT(jint _max_depth;)   // max depth plumbed during run
 
-  bool   _overflow;
+  bool  _overflow;
+  bool  _should_expand;
   DEBUG_ONLY(bool _drain_in_progress;)
   DEBUG_ONLY(bool _drain_in_progress_yields;)
 
@@ -173,7 +177,13 @@
   CMMarkStack(ConcurrentMark* cm);
   ~CMMarkStack();
 
-  void allocate(size_t size);
+#ifndef PRODUCT
+  jint max_depth() const {
+    return _max_depth;
+  }
+#endif
+
+  bool allocate(size_t capacity);
 
   oop pop() {
     if (!isEmpty()) {
@@ -231,11 +241,17 @@
 
   bool isEmpty()    { return _index == 0; }
   bool isFull()     { return _index == _capacity; }
-  int maxElems()    { return _capacity; }
+  int  maxElems()   { return _capacity; }
 
   bool overflow() { return _overflow; }
   void clear_overflow() { _overflow = false; }
 
+  bool should_expand() const { return _should_expand; }
+  void set_should_expand();
+
+  // Expand the stack, typically in response to an overflow condition
+  void expand();
+
   int  size() { return _index; }
 
   void setEmpty()   { _index = 0; clear_overflow(); }
@@ -344,6 +360,7 @@
 class ConcurrentMarkThread;
 
 class ConcurrentMark: public CHeapObj<mtGC> {
+  friend class CMMarkStack;
   friend class ConcurrentMarkThread;
   friend class CMTask;
   friend class CMBitMapClosure;
@@ -461,15 +478,18 @@
   // It resets the global marking data structures, as well as the
   // task local ones; should be called during initial mark.
   void reset();
-  // It resets all the marking data structures.
-  void clear_marking_state(bool clear_overflow = true);
+
+  // Resets all the marking data structures. Called when we have to restart
+  // marking or when marking completes (via set_non_marking_state below).
+  void reset_marking_state(bool clear_overflow = true);
+
+  // We do this after we're done with marking so that the marking data
+  // structures are initialised to a sensible and predictable state.
+  void set_non_marking_state();
 
   // It should be called to indicate which phase we're in (concurrent
   // mark or remark) and how many threads are currently active.
   void set_phase(uint active_tasks, bool concurrent);
-  // We do this after we're done with marking so that the marking data
-  // structures are initialised to a sensible and predictable state.
-  void set_non_marking_state();
 
   // prints all gathered CM-related statistics
   void print_stats();
@@ -577,6 +597,9 @@
   // the card bitmaps.
   intptr_t _heap_bottom_card_num;
 
+  // Set to true when initialization is complete
+  bool _completed_initialization;
+
 public:
   // Manipulation of the global mark stack.
   // Notice that the first mark_stack_push is CAS-based, whereas the
@@ -636,7 +659,7 @@
     return _task_queues->steal(worker_id, hash_seed, obj);
   }
 
-  ConcurrentMark(ReservedSpace rs, uint max_regions);
+  ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs);
   ~ConcurrentMark();
 
   ConcurrentMarkThread* cmThread() { return _cmThread; }
@@ -907,6 +930,11 @@
   // Should *not* be called from parallel code.
   inline bool mark_and_count(oop obj);
 
+  // Returns true if initialization was successfully completed.
+  bool completed_initialization() const {
+    return _completed_initialization;
+  }
+
 protected:
   // Clear all the per-task bitmaps and arrays used to store the
   // counting data.
--- a/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -159,13 +159,11 @@
           VM_CGC_Operation op(&final_cl, verbose_str, true /* needs_pll */);
           VMThread::execute(&op);
         }
-        if (cm()->restart_for_overflow() &&
-            G1TraceMarkStackOverflow) {
-          gclog_or_tty->print_cr("Restarting conc marking because of MS overflow "
-                                 "in remark (restart #%d).", iter);
-        }
-
         if (cm()->restart_for_overflow()) {
+          if (G1TraceMarkStackOverflow) {
+            gclog_or_tty->print_cr("Restarting conc marking because of MS overflow "
+                                   "in remark (restart #%d).", iter);
+          }
           if (G1Log::fine()) {
             gclog_or_tty->date_stamp(PrintGCDateStamps);
             gclog_or_tty->stamp(PrintGCTimeStamps);
--- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -28,20 +28,8 @@
 #include "runtime/atomic.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/safepoint.hpp"
-#include "runtime/thread.hpp"
+#include "runtime/thread.inline.hpp"
 #include "utilities/workgroup.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 
 bool DirtyCardQueue::apply_closure(CardTableEntryClosure* cl,
                                    bool consume,
--- a/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -302,16 +302,28 @@
   for (size_t c = start_card + 1; c <= end_card; c++ /* yeah! */) {
     u_char entry = _array->offset_array(c);
     if (c - start_card > BlockOffsetArray::power_to_cards_back(1)) {
-      guarantee(entry > N_words, "Should be in logarithmic region");
+      guarantee(entry > N_words,
+                err_msg("Should be in logarithmic region - "
+                        "entry: " UINT32_FORMAT ", "
+                        "_array->offset_array(c): " UINT32_FORMAT ", "
+                        "N_words: " UINT32_FORMAT,
+                        entry, _array->offset_array(c), N_words));
     }
     size_t backskip = BlockOffsetArray::entry_to_cards_back(entry);
     size_t landing_card = c - backskip;
     guarantee(landing_card >= (start_card - 1), "Inv");
     if (landing_card >= start_card) {
-      guarantee(_array->offset_array(landing_card) <= entry, "monotonicity");
+      guarantee(_array->offset_array(landing_card) <= entry,
+                err_msg("Monotonicity - landing_card offset: " UINT32_FORMAT ", "
+                        "entry: " UINT32_FORMAT,
+                        _array->offset_array(landing_card), entry));
     } else {
       guarantee(landing_card == start_card - 1, "Tautology");
-      guarantee(_array->offset_array(landing_card) <= N_words, "Offset value");
+      // Note that N_words is the maximum offset value
+      guarantee(_array->offset_array(landing_card) <= N_words,
+                err_msg("landing card offset: " UINT32_FORMAT ", "
+                        "N_words: " UINT32_FORMAT,
+                        _array->offset_array(landing_card), N_words));
     }
   }
 }
@@ -536,17 +548,27 @@
   // The offset can be 0 if the block starts on a boundary.  That
   // is checked by an assertion above.
   size_t start_index = _array->index_for(blk_start);
-  HeapWord* boundary    = _array->address_for_index(start_index);
+  HeapWord* boundary = _array->address_for_index(start_index);
   assert((_array->offset_array(orig_index) == 0 &&
           blk_start == boundary) ||
           (_array->offset_array(orig_index) > 0 &&
          _array->offset_array(orig_index) <= N_words),
-         "offset array should have been set");
+         err_msg("offset array should have been set - "
+                  "orig_index offset: " UINT32_FORMAT ", "
+                  "blk_start: " PTR_FORMAT ", "
+                  "boundary: " PTR_FORMAT,
+                  _array->offset_array(orig_index),
+                  blk_start, boundary));
   for (size_t j = orig_index + 1; j <= end_index; j++) {
     assert(_array->offset_array(j) > 0 &&
            _array->offset_array(j) <=
              (u_char) (N_words+BlockOffsetArray::N_powers-1),
-           "offset array should have been set");
+           err_msg("offset array should have been set - "
+                   UINT32_FORMAT " not > 0 OR "
+                   UINT32_FORMAT " not <= " UINT32_FORMAT,
+                   _array->offset_array(j),
+                   _array->offset_array(j),
+                   (u_char) (N_words+BlockOffsetArray::N_powers-1)));
   }
 #endif
 }
--- a/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -78,7 +78,9 @@
   virtual void resize(size_t new_word_size) = 0;
 
   virtual void set_bottom(HeapWord* new_bottom) {
-    assert(new_bottom <= _end, "new_bottom > _end");
+    assert(new_bottom <= _end,
+           err_msg("new_bottom (" PTR_FORMAT ") > _end (" PTR_FORMAT ")",
+                   new_bottom, _end));
     _bottom = new_bottom;
     resize(pointer_delta(_end, _bottom));
   }
@@ -134,29 +136,42 @@
   VirtualSpace _vs;
   u_char* _offset_array;          // byte array keeping backwards offsets
 
+  void check_index(size_t index, const char* msg) const {
+    assert(index < _vs.committed_size(),
+           err_msg("%s - "
+                   "index: " SIZE_FORMAT ", _vs.committed_size: " SIZE_FORMAT,
+                   msg, index, _vs.committed_size()));
+  }
+
+  void check_offset(size_t offset, const char* msg) const {
+    assert(offset <= N_words,
+           err_msg("%s - "
+                   "offset: " UINT32_FORMAT", N_words: " UINT32_FORMAT,
+                   msg, offset, N_words));
+  }
+
   // Bounds checking accessors:
   // For performance these have to devolve to array accesses in product builds.
   u_char offset_array(size_t index) const {
-    assert(index < _vs.committed_size(), "index out of range");
+    check_index(index, "index out of range");
     return _offset_array[index];
   }
 
   void set_offset_array(size_t index, u_char offset) {
-    assert(index < _vs.committed_size(), "index out of range");
-    assert(offset <= N_words, "offset too large");
+    check_index(index, "index out of range");
+    check_offset(offset, "offset too large");
     _offset_array[index] = offset;
   }
 
   void set_offset_array(size_t index, HeapWord* high, HeapWord* low) {
-    assert(index < _vs.committed_size(), "index out of range");
+    check_index(index, "index out of range");
     assert(high >= low, "addresses out of order");
-    assert(pointer_delta(high, low) <= N_words, "offset too large");
+    check_offset(pointer_delta(high, low), "offset too large");
     _offset_array[index] = (u_char) pointer_delta(high, low);
   }
 
   void set_offset_array(HeapWord* left, HeapWord* right, u_char offset) {
-    assert(index_for(right - 1) < _vs.committed_size(),
-           "right address out of range");
+    check_index(index_for(right - 1), "right address out of range");
     assert(left  < right, "Heap addresses out of order");
     size_t num_cards = pointer_delta(right, left) >> LogN_words;
     if (UseMemSetInBOT) {
@@ -171,7 +186,7 @@
   }
 
   void set_offset_array(size_t left, size_t right, u_char offset) {
-    assert(right < _vs.committed_size(), "right address out of range");
+    check_index(right, "right index out of range");
     assert(left <= right, "indexes out of order");
     size_t num_cards = right - left + 1;
     if (UseMemSetInBOT) {
@@ -186,11 +201,10 @@
   }
 
   void check_offset_array(size_t index, HeapWord* high, HeapWord* low) const {
-    assert(index < _vs.committed_size(), "index out of range");
+    check_index(index, "index out of range");
     assert(high >= low, "addresses out of order");
-    assert(pointer_delta(high, low) <= N_words, "offset too large");
-    assert(_offset_array[index] == pointer_delta(high, low),
-           "Wrong offset");
+    check_offset(pointer_delta(high, low), "offset too large");
+    assert(_offset_array[index] == pointer_delta(high, low), "Wrong offset");
   }
 
   bool is_card_boundary(HeapWord* p) const;
@@ -481,7 +495,6 @@
                       blk_start, blk_end);
   }
 
-
  public:
   G1BlockOffsetArrayContigSpace(G1BlockOffsetSharedArray* array, MemRegion mr);
 
--- a/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -49,16 +49,17 @@
   char* pc = (char*)p;
   assert(pc >= (char*)_reserved.start() &&
          pc <  (char*)_reserved.end(),
-         "p not in range.");
+         err_msg("p (" PTR_FORMAT ") not in reserved [" PTR_FORMAT ", " PTR_FORMAT ")",
+                 p, (char*)_reserved.start(), (char*)_reserved.end()));
   size_t delta = pointer_delta(pc, _reserved.start(), sizeof(char));
   size_t result = delta >> LogN;
-  assert(result < _vs.committed_size(), "bad index from address");
+  check_index(result, "bad index from address");
   return result;
 }
 
 inline HeapWord*
 G1BlockOffsetSharedArray::address_for_index(size_t index) const {
-  assert(index < _vs.committed_size(), "bad index");
+  check_index(index, "index out of range");
   HeapWord* result = _reserved.start() + (index << LogN_words);
   assert(result >= _reserved.start() && result < _reserved.end(),
          err_msg("bad address from index result " PTR_FORMAT
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -2079,7 +2079,11 @@
 
   // Create the ConcurrentMark data structure and thread.
   // (Must do this late, so that "max_regions" is defined.)
-  _cm       = new ConcurrentMark(heap_rs, max_regions());
+  _cm = new ConcurrentMark(this, heap_rs);
+  if (_cm == NULL || !_cm->completed_initialization()) {
+    vm_shutdown_during_initialization("Could not create/initialize ConcurrentMark");
+    return JNI_ENOMEM;
+  }
   _cmThread = _cm->cmThread();
 
   // Initialize the from_card cache structure of HeapRegionRemSet.
@@ -2087,7 +2091,7 @@
 
   // Now expand into the initial heap size.
   if (!expand(init_byte_size)) {
-    vm_exit_during_initialization("Failed to allocate initial heap.");
+    vm_shutdown_during_initialization("Failed to allocate initial heap.");
     return JNI_ENOMEM;
   }
 
@@ -3664,7 +3668,7 @@
   gclog_or_tty->stamp(PrintGCTimeStamps);
 
   GCCauseString gc_cause_str = GCCauseString("GC pause", gc_cause())
-    .append(g1_policy()->gcs_are_young() ? " (young)" : " (mixed)")
+    .append(g1_policy()->gcs_are_young() ? "(young)" : "(mixed)")
     .append(g1_policy()->during_initial_mark_pause() ? " (initial-mark)" : "");
 
   gclog_or_tty->print("[%s", (const char*)gc_cause_str);
@@ -3690,6 +3694,7 @@
     g1_policy()->print_heap_transition();
     gclog_or_tty->print_cr(", %3.7f secs]", pause_time_sec);
   }
+  gclog_or_tty->flush();
 }
 
 bool
@@ -4036,10 +4041,11 @@
 #endif
 
       gc_epilogue(false);
-
-      log_gc_footer(os::elapsedTime() - pause_start_sec);
     }
 
+    // Print the remainder of the GC log output.
+    log_gc_footer(os::elapsedTime() - pause_start_sec);
+
     // It is not yet to safe to tell the concurrent mark to
     // start as we have some optional output below. We don't want the
     // output from the concurrent mark thread interfering with this
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -309,9 +309,9 @@
 }
 
 G1YoungGenSizer::G1YoungGenSizer() : _sizer_kind(SizerDefaults), _adaptive_size(true) {
-  assert(G1DefaultMinNewGenPercent <= G1DefaultMaxNewGenPercent, "Min larger than max");
-  assert(G1DefaultMinNewGenPercent > 0 && G1DefaultMinNewGenPercent < 100, "Min out of bounds");
-  assert(G1DefaultMaxNewGenPercent > 0 && G1DefaultMaxNewGenPercent < 100, "Max out of bounds");
+  assert(G1NewSizePercent <= G1MaxNewSizePercent, "Min larger than max");
+  assert(G1NewSizePercent > 0 && G1NewSizePercent < 100, "Min out of bounds");
+  assert(G1MaxNewSizePercent > 0 && G1MaxNewSizePercent < 100, "Max out of bounds");
 
   if (FLAG_IS_CMDLINE(NewRatio)) {
     if (FLAG_IS_CMDLINE(NewSize) || FLAG_IS_CMDLINE(MaxNewSize)) {
@@ -344,12 +344,12 @@
 }
 
 uint G1YoungGenSizer::calculate_default_min_length(uint new_number_of_heap_regions) {
-  uint default_value = (new_number_of_heap_regions * G1DefaultMinNewGenPercent) / 100;
+  uint default_value = (new_number_of_heap_regions * G1NewSizePercent) / 100;
   return MAX2(1U, default_value);
 }
 
 uint G1YoungGenSizer::calculate_default_max_length(uint new_number_of_heap_regions) {
-  uint default_value = (new_number_of_heap_regions * G1DefaultMaxNewGenPercent) / 100;
+  uint default_value = (new_number_of_heap_regions * G1MaxNewSizePercent) / 100;
   return MAX2(1U, default_value);
 }
 
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -94,18 +94,18 @@
 // will occur.
 //
 // If nothing related to the the young gen size is set on the command
-// line we should allow the young gen to be between
-// G1DefaultMinNewGenPercent and G1DefaultMaxNewGenPercent of the
-// heap size. This means that every time the heap size changes the
-// limits for the young gen size will be updated.
+// line we should allow the young gen to be between G1NewSizePercent
+// and G1MaxNewSizePercent of the heap size. This means that every time
+// the heap size changes, the limits for the young gen size will be
+// recalculated.
 //
 // If only -XX:NewSize is set we should use the specified value as the
-// minimum size for young gen. Still using G1DefaultMaxNewGenPercent
-// of the heap as maximum.
+// minimum size for young gen. Still using G1MaxNewSizePercent of the
+// heap as maximum.
 //
 // If only -XX:MaxNewSize is set we should use the specified value as the
-// maximum size for young gen. Still using G1DefaultMinNewGenPercent
-// of the heap as minimum.
+// maximum size for young gen. Still using G1NewSizePercent of the heap
+// as minimum.
 //
 // If -XX:NewSize and -XX:MaxNewSize are both specified we use these values.
 // No updates when the heap size changes. There is a special case when
--- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -131,17 +131,23 @@
 
 #ifndef PRODUCT
 
+template <> const int WorkerDataArray<int>::_uninitialized = -1;
+template <> const double WorkerDataArray<double>::_uninitialized = -1.0;
+template <> const size_t WorkerDataArray<size_t>::_uninitialized = (size_t)-1;
+
 template <class T>
 void WorkerDataArray<T>::reset() {
   for (uint i = 0; i < _length; i++) {
-    _data[i] = (T)-1;
+    _data[i] = (T)_uninitialized;
   }
 }
 
 template <class T>
 void WorkerDataArray<T>::verify() {
   for (uint i = 0; i < _length; i++) {
-    assert(_data[i] >= (T)0, err_msg("Invalid data for worker %d", i));
+    assert(_data[i] != _uninitialized,
+        err_msg("Invalid data for worker " UINT32_FORMAT ", data: %lf, uninitialized: %lf",
+            i, (double)_data[i], (double)_uninitialized));
   }
 }
 
@@ -201,20 +207,20 @@
   _last_termination_attempts.verify();
   _last_gc_worker_end_times_ms.verify();
 
-    for (uint i = 0; i < _active_gc_threads; i++) {
-      double worker_time = _last_gc_worker_end_times_ms.get(i) - _last_gc_worker_start_times_ms.get(i);
-      _last_gc_worker_times_ms.set(i, worker_time);
+  for (uint i = 0; i < _active_gc_threads; i++) {
+    double worker_time = _last_gc_worker_end_times_ms.get(i) - _last_gc_worker_start_times_ms.get(i);
+    _last_gc_worker_times_ms.set(i, worker_time);
 
-      double worker_known_time = _last_ext_root_scan_times_ms.get(i) +
-        _last_satb_filtering_times_ms.get(i) +
-        _last_update_rs_times_ms.get(i) +
-        _last_scan_rs_times_ms.get(i) +
-        _last_obj_copy_times_ms.get(i) +
-        _last_termination_times_ms.get(i);
+    double worker_known_time = _last_ext_root_scan_times_ms.get(i) +
+      _last_satb_filtering_times_ms.get(i) +
+      _last_update_rs_times_ms.get(i) +
+      _last_scan_rs_times_ms.get(i) +
+      _last_obj_copy_times_ms.get(i) +
+      _last_termination_times_ms.get(i);
 
-      double worker_other_time = worker_time - worker_known_time;
-      _last_gc_worker_other_times_ms.set(i, worker_other_time);
-    }
+    double worker_other_time = worker_time - worker_known_time;
+    _last_gc_worker_other_times_ms.set(i, worker_other_time);
+  }
 
   _last_gc_worker_times_ms.verify();
   _last_gc_worker_other_times_ms.verify();
--- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -35,6 +35,8 @@
   const char* _print_format;
   bool        _print_sum;
 
+  NOT_PRODUCT(static const T _uninitialized;)
+
   // We are caching the sum and average to only have to calculate them once.
   // This is not done in an MT-safe way. It is intetened to allow single
   // threaded code to call sum() and average() multiple times in any order
--- a/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -282,10 +282,8 @@
       if (r->startsHumongous()) {
         // We must adjust the pointers on the single H object.
         oop obj = oop(r->bottom());
-        debug_only(GenMarkSweep::track_interior_pointers(obj));
         // point all the oops to the new location
         obj->adjust_pointers();
-        debug_only(GenMarkSweep::check_interior_pointers());
       }
     } else {
       // This really ought to be "as_CompactibleSpace"...
--- a/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -27,19 +27,7 @@
 #include "gc_implementation/g1/heapRegion.hpp"
 #include "gc_implementation/g1/satbQueue.hpp"
 #include "runtime/mutexLocker.hpp"
-#include "runtime/thread.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
+#include "runtime/thread.inline.hpp"
 
 G1SATBCardTableModRefBS::G1SATBCardTableModRefBS(MemRegion whole_heap,
                                                  int max_covered_regions) :
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -287,23 +287,24 @@
           "The number of times we'll force an overflow during "             \
           "concurrent marking")                                             \
                                                                             \
-  experimental(uintx, G1DefaultMinNewGenPercent, 20,                        \
-          "Percentage (0-100) of the heap size to use as minimum "          \
-          "young gen size.")                                                \
+  experimental(uintx, G1NewSizePercent, 5,                                  \
+          "Percentage (0-100) of the heap size to use as default "          \
+          "minimum young gen size.")                                        \
                                                                             \
-  experimental(uintx, G1DefaultMaxNewGenPercent, 80,                        \
-          "Percentage (0-100) of the heap size to use as maximum "          \
-          "young gen size.")                                                \
+  experimental(uintx, G1MaxNewSizePercent, 60,                              \
+          "Percentage (0-100) of the heap size to use as default "          \
+          " maximum young gen size.")                                       \
                                                                             \
-  experimental(uintx, G1OldCSetRegionLiveThresholdPercent, 90,              \
-          "Threshold for regions to be added to the collection set. "       \
-          "Regions with more live bytes than this will not be collected.")  \
+  experimental(uintx, G1MixedGCLiveThresholdPercent, 65,                    \
+          "Threshold for regions to be considered for inclusion in the "    \
+          "collection set of mixed GCs. "                                   \
+          "Regions with live bytes exceeding this will not be collected.")  \
                                                                             \
-  product(uintx, G1HeapWastePercent, 5,                                     \
+  product(uintx, G1HeapWastePercent, 10,                                    \
           "Amount of space, expressed as a percentage of the heap size, "   \
           "that G1 is willing not to collect to avoid expensive GCs.")      \
                                                                             \
-  product(uintx, G1MixedGCCountTarget, 4,                                   \
+  product(uintx, G1MixedGCCountTarget, 8,                                   \
           "The target number of mixed GCs after a marking cycle.")          \
                                                                             \
   experimental(uintx, G1OldCSetRegionThresholdPercent, 10,                  \
--- a/src/share/vm/gc_implementation/g1/ptrQueue.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/ptrQueue.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -28,18 +28,7 @@
 #include "memory/allocation.inline.hpp"
 #include "runtime/mutex.hpp"
 #include "runtime/mutexLocker.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
+#include "runtime/thread.inline.hpp"
 
 PtrQueue::PtrQueue(PtrQueueSet* qset, bool perm, bool active) :
   _qset(qset), _buf(NULL), _index(0), _active(active),
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -878,12 +878,6 @@
 
 bool ParNewGeneration::_avoid_promotion_undo = false;
 
-void ParNewGeneration::adjust_desired_tenuring_threshold() {
-  // Set the desired survivor size to half the real survivor space
-  _tenuring_threshold =
-    age_table()->compute_tenuring_threshold(to()->capacity()/HeapWordSize);
-}
-
 // A Generation that does parallel young-gen collection.
 
 void ParNewGeneration::collect(bool   full,
@@ -1013,6 +1007,8 @@
     size_policy->reset_gc_overhead_limit_count();
 
     assert(to()->is_empty(), "to space should be empty now");
+
+    adjust_desired_tenuring_threshold();
   } else {
     assert(_promo_failure_scan_stack.is_empty(), "post condition");
     _promo_failure_scan_stack.clear(true); // Clear cached segments.
@@ -1035,7 +1031,6 @@
   from()->set_concurrent_iteration_safe_limit(from()->top());
   to()->set_concurrent_iteration_safe_limit(to()->top());
 
-  adjust_desired_tenuring_threshold();
   if (ResizePLAB) {
     plab_stats()->adjust_desired_plab_sz(n_workers);
   }
@@ -1623,7 +1618,3 @@
 const char* ParNewGeneration::name() const {
   return "par new generation";
 }
-
-bool ParNewGeneration::in_use() {
-  return UseParNewGC && ParallelGCThreads > 0;
-}
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -347,10 +347,6 @@
   bool survivor_overflow() { return _survivor_overflow; }
   void set_survivor_overflow(bool v) { _survivor_overflow = v; }
 
-  // Adjust the tenuring threshold.  See the implementation for
-  // the details of the policy.
-  virtual void adjust_desired_tenuring_threshold();
-
  public:
   ParNewGeneration(ReservedSpace rs, size_t initial_byte_size, int level);
 
@@ -361,8 +357,6 @@
     delete _task_queues;
   }
 
-  static bool in_use();
-
   virtual void ref_processor_init();
   virtual Generation::Name kind()        { return Generation::ParNew; }
   virtual const char* name() const;
--- a/src/share/vm/gc_implementation/parallelScavenge/adjoiningVirtualSpaces.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/parallelScavenge/adjoiningVirtualSpaces.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -24,6 +24,7 @@
 
 #include "precompiled.hpp"
 #include "gc_implementation/parallelScavenge/adjoiningVirtualSpaces.hpp"
+#include "memory/allocation.inline.hpp"
 #include "runtime/java.hpp"
 
 AdjoiningVirtualSpaces::AdjoiningVirtualSpaces(ReservedSpace rs,
--- a/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -52,14 +52,22 @@
     PrintGCDetails && TraceParallelOldGCTasks, true, gclog_or_tty));
   ParCompactionManager* cm =
     ParCompactionManager::gc_thread_compaction_manager(which);
+
   PSParallelCompact::MarkAndPushClosure mark_and_push_closure(cm);
+  CLDToOopClosure mark_and_push_from_clds(&mark_and_push_closure, true);
   CodeBlobToOopClosure mark_and_push_in_blobs(&mark_and_push_closure, /*do_marking=*/ true);
 
   if (_java_thread != NULL)
-    _java_thread->oops_do(&mark_and_push_closure, &mark_and_push_in_blobs);
+    _java_thread->oops_do(
+        &mark_and_push_closure,
+        &mark_and_push_from_clds,
+        &mark_and_push_in_blobs);
 
   if (_vm_thread != NULL)
-    _vm_thread->oops_do(&mark_and_push_closure, &mark_and_push_in_blobs);
+    _vm_thread->oops_do(
+        &mark_and_push_closure,
+        &mark_and_push_from_clds,
+        &mark_and_push_in_blobs);
 
   // Do the real work
   cm->follow_marking_stacks();
@@ -89,7 +97,8 @@
     {
       ResourceMark rm;
       CodeBlobToOopClosure each_active_code_blob(&mark_and_push_closure, /*do_marking=*/ true);
-      Threads::oops_do(&mark_and_push_closure, &each_active_code_blob);
+      CLDToOopClosure mark_and_push_from_cld(&mark_and_push_closure);
+      Threads::oops_do(&mark_and_push_closure, &mark_and_push_from_cld, &each_active_code_blob);
     }
     break;
 
--- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -495,8 +495,9 @@
     ParallelScavengeHeap::ParStrongRootsScope psrs;
     Universe::oops_do(mark_and_push_closure());
     JNIHandles::oops_do(mark_and_push_closure());   // Global (strong) JNI handles
+    CLDToOopClosure mark_and_push_from_cld(mark_and_push_closure());
     CodeBlobToOopClosure each_active_code_blob(mark_and_push_closure(), /*do_marking=*/ true);
-    Threads::oops_do(mark_and_push_closure(), &each_active_code_blob);
+    Threads::oops_do(mark_and_push_closure(), &mark_and_push_from_cld, &each_active_code_blob);
     ObjectSynchronizer::oops_do(mark_and_push_closure());
     FlatProfiler::oops_do(mark_and_push_closure());
     Management::oops_do(mark_and_push_closure());
@@ -584,7 +585,8 @@
   // General strong roots.
   Universe::oops_do(adjust_root_pointer_closure());
   JNIHandles::oops_do(adjust_root_pointer_closure());   // Global (strong) JNI handles
-  Threads::oops_do(adjust_root_pointer_closure(), NULL);
+  CLDToOopClosure adjust_from_cld(adjust_root_pointer_closure());
+  Threads::oops_do(adjust_root_pointer_closure(), &adjust_from_cld, NULL);
   ObjectSynchronizer::oops_do(adjust_root_pointer_closure());
   FlatProfiler::oops_do(adjust_root_pointer_closure());
   Management::oops_do(adjust_root_pointer_closure());
--- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -164,7 +164,6 @@
         start_array->allocate_block(compact_top);
       }
 
-      VALIDATE_MARK_SWEEP_ONLY(MarkSweep::register_live_oop(oop(q), size));
       compact_top += size;
       assert(compact_top <= dest->space()->end(),
         "Exceeding space in destination");
@@ -225,7 +224,6 @@
             start_array->allocate_block(compact_top);
           }
 
-          VALIDATE_MARK_SWEEP_ONLY(MarkSweep::register_live_oop(oop(q), sz));
           compact_top += sz;
           assert(compact_top <= dest->space()->end(),
             "Exceeding space in destination");
@@ -304,11 +302,8 @@
     HeapWord* end = _first_dead;
 
     while (q < end) {
-      VALIDATE_MARK_SWEEP_ONLY(MarkSweep::track_interior_pointers(oop(q)));
       // point all the oops to the new location
       size_t size = oop(q)->adjust_pointers();
-      VALIDATE_MARK_SWEEP_ONLY(MarkSweep::check_interior_pointers());
-      VALIDATE_MARK_SWEEP_ONLY(MarkSweep::validate_live_oop(oop(q), size));
       q += size;
     }
 
@@ -328,11 +323,8 @@
     Prefetch::write(q, interval);
     if (oop(q)->is_gc_marked()) {
       // q is alive
-      VALIDATE_MARK_SWEEP_ONLY(MarkSweep::track_interior_pointers(oop(q)));
       // point all the oops to the new location
       size_t size = oop(q)->adjust_pointers();
-      VALIDATE_MARK_SWEEP_ONLY(MarkSweep::check_interior_pointers());
-      VALIDATE_MARK_SWEEP_ONLY(MarkSweep::validate_live_oop(oop(q), size));
       debug_only(prev_q = q);
       q += size;
     } else {
@@ -366,7 +358,6 @@
     while (q < end) {
       size_t size = oop(q)->size();
       assert(!oop(q)->is_gc_marked(), "should be unmarked (special dense prefix handling)");
-      VALIDATE_MARK_SWEEP_ONLY(MarkSweep::live_oop_moved_to(q, size, q));
       debug_only(prev_q = q);
       q += size;
     }
@@ -401,7 +392,6 @@
       Prefetch::write(compaction_top, copy_interval);
 
       // copy object and reinit its mark
-      VALIDATE_MARK_SWEEP_ONLY(MarkSweep::live_oop_moved_to(q, size, compaction_top));
       assert(q != compaction_top, "everything in this pass should be moving");
       Copy::aligned_conjoint_words(q, compaction_top, size);
       oop(compaction_top)->init_mark();
--- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -99,25 +99,6 @@
 bool   PSParallelCompact::_dwl_initialized = false;
 #endif  // #ifdef ASSERT
 
-#ifdef VALIDATE_MARK_SWEEP
-GrowableArray<void*>*   PSParallelCompact::_root_refs_stack = NULL;
-GrowableArray<oop> *    PSParallelCompact::_live_oops = NULL;
-GrowableArray<oop> *    PSParallelCompact::_live_oops_moved_to = NULL;
-GrowableArray<size_t>*  PSParallelCompact::_live_oops_size = NULL;
-size_t                  PSParallelCompact::_live_oops_index = 0;
-GrowableArray<void*>*   PSParallelCompact::_other_refs_stack = NULL;
-GrowableArray<void*>*   PSParallelCompact::_adjusted_pointers = NULL;
-bool                    PSParallelCompact::_pointer_tracking = false;
-bool                    PSParallelCompact::_root_tracking = true;
-
-GrowableArray<HeapWord*>* PSParallelCompact::_cur_gc_live_oops = NULL;
-GrowableArray<HeapWord*>* PSParallelCompact::_cur_gc_live_oops_moved_to = NULL;
-GrowableArray<size_t>   * PSParallelCompact::_cur_gc_live_oops_size = NULL;
-GrowableArray<HeapWord*>* PSParallelCompact::_last_gc_live_oops = NULL;
-GrowableArray<HeapWord*>* PSParallelCompact::_last_gc_live_oops_moved_to = NULL;
-GrowableArray<size_t>   * PSParallelCompact::_last_gc_live_oops_size = NULL;
-#endif
-
 void SplitInfo::record(size_t src_region_idx, size_t partial_obj_size,
                        HeapWord* destination)
 {
@@ -2436,7 +2417,8 @@
   // General strong roots.
   Universe::oops_do(adjust_root_pointer_closure());
   JNIHandles::oops_do(adjust_root_pointer_closure());   // Global (strong) JNI handles
-  Threads::oops_do(adjust_root_pointer_closure(), NULL);
+  CLDToOopClosure adjust_from_cld(adjust_root_pointer_closure());
+  Threads::oops_do(adjust_root_pointer_closure(), &adjust_from_cld, NULL);
   ObjectSynchronizer::oops_do(adjust_root_pointer_closure());
   FlatProfiler::oops_do(adjust_root_pointer_closure());
   Management::oops_do(adjust_root_pointer_closure());
@@ -2714,151 +2696,6 @@
 }
 #endif  // #ifdef ASSERT
 
-
-#ifdef VALIDATE_MARK_SWEEP
-
-void PSParallelCompact::track_adjusted_pointer(void* p, bool isroot) {
-  if (!ValidateMarkSweep)
-    return;
-
-  if (!isroot) {
-    if (_pointer_tracking) {
-      guarantee(_adjusted_pointers->contains(p), "should have seen this pointer");
-      _adjusted_pointers->remove(p);
-    }
-  } else {
-    ptrdiff_t index = _root_refs_stack->find(p);
-    if (index != -1) {
-      int l = _root_refs_stack->length();
-      if (l > 0 && l - 1 != index) {
-        void* last = _root_refs_stack->pop();
-        assert(last != p, "should be different");
-        _root_refs_stack->at_put(index, last);
-      } else {
-        _root_refs_stack->remove(p);
-      }
-    }
-  }
-}
-
-
-void PSParallelCompact::check_adjust_pointer(void* p) {
-  _adjusted_pointers->push(p);
-}
-
-
-class AdjusterTracker: public OopClosure {
- public:
-  AdjusterTracker() {};
-  void do_oop(oop* o)         { PSParallelCompact::check_adjust_pointer(o); }
-  void do_oop(narrowOop* o)   { PSParallelCompact::check_adjust_pointer(o); }
-};
-
-
-void PSParallelCompact::track_interior_pointers(oop obj) {
-  if (ValidateMarkSweep) {
-    _adjusted_pointers->clear();
-    _pointer_tracking = true;
-
-    AdjusterTracker checker;
-    obj->oop_iterate_no_header(&checker);
-  }
-}
-
-
-void PSParallelCompact::check_interior_pointers() {
-  if (ValidateMarkSweep) {
-    _pointer_tracking = false;
-    guarantee(_adjusted_pointers->length() == 0, "should have processed the same pointers");
-  }
-}
-
-
-void PSParallelCompact::reset_live_oop_tracking() {
-  if (ValidateMarkSweep) {
-    guarantee((size_t)_live_oops->length() == _live_oops_index, "should be at end of live oops");
-    _live_oops_index = 0;
-  }
-}
-
-
-void PSParallelCompact::register_live_oop(oop p, size_t size) {
-  if (ValidateMarkSweep) {
-    _live_oops->push(p);
-    _live_oops_size->push(size);
-    _live_oops_index++;
-  }
-}
-
-void PSParallelCompact::validate_live_oop(oop p, size_t size) {
-  if (ValidateMarkSweep) {
-    oop obj = _live_oops->at((int)_live_oops_index);
-    guarantee(obj == p, "should be the same object");
-    guarantee(_live_oops_size->at((int)_live_oops_index) == size, "should be the same size");
-    _live_oops_index++;
-  }
-}
-
-void PSParallelCompact::live_oop_moved_to(HeapWord* q, size_t size,
-                                  HeapWord* compaction_top) {
-  assert(oop(q)->forwardee() == NULL || oop(q)->forwardee() == oop(compaction_top),
-         "should be moved to forwarded location");
-  if (ValidateMarkSweep) {
-    PSParallelCompact::validate_live_oop(oop(q), size);
-    _live_oops_moved_to->push(oop(compaction_top));
-  }
-  if (RecordMarkSweepCompaction) {
-    _cur_gc_live_oops->push(q);
-    _cur_gc_live_oops_moved_to->push(compaction_top);
-    _cur_gc_live_oops_size->push(size);
-  }
-}
-
-
-void PSParallelCompact::compaction_complete() {
-  if (RecordMarkSweepCompaction) {
-    GrowableArray<HeapWord*>* _tmp_live_oops          = _cur_gc_live_oops;
-    GrowableArray<HeapWord*>* _tmp_live_oops_moved_to = _cur_gc_live_oops_moved_to;
-    GrowableArray<size_t>   * _tmp_live_oops_size     = _cur_gc_live_oops_size;
-
-    _cur_gc_live_oops           = _last_gc_live_oops;
-    _cur_gc_live_oops_moved_to  = _last_gc_live_oops_moved_to;
-    _cur_gc_live_oops_size      = _last_gc_live_oops_size;
-    _last_gc_live_oops          = _tmp_live_oops;
-    _last_gc_live_oops_moved_to = _tmp_live_oops_moved_to;
-    _last_gc_live_oops_size     = _tmp_live_oops_size;
-  }
-}
-
-
-void PSParallelCompact::print_new_location_of_heap_address(HeapWord* q) {
-  if (!RecordMarkSweepCompaction) {
-    tty->print_cr("Requires RecordMarkSweepCompaction to be enabled");
-    return;
-  }
-
-  if (_last_gc_live_oops == NULL) {
-    tty->print_cr("No compaction information gathered yet");
-    return;
-  }
-
-  for (int i = 0; i < _last_gc_live_oops->length(); i++) {
-    HeapWord* old_oop = _last_gc_live_oops->at(i);
-    size_t    sz      = _last_gc_live_oops_size->at(i);
-    if (old_oop <= q && q < (old_oop + sz)) {
-      HeapWord* new_oop = _last_gc_live_oops_moved_to->at(i);
-      size_t offset = (q - old_oop);
-      tty->print_cr("Address " PTR_FORMAT, q);
-      tty->print_cr(" Was in oop " PTR_FORMAT ", size %d, at offset %d", old_oop, sz, offset);
-      tty->print_cr(" Now in oop " PTR_FORMAT ", actual address " PTR_FORMAT, new_oop, new_oop + offset);
-      return;
-    }
-  }
-
-  tty->print_cr("Address " PTR_FORMAT " not found in live oop information from last GC", q);
-}
-#endif //VALIDATE_MARK_SWEEP
-
 // Update interior oops in the ranges of regions [beg_region, end_region).
 void
 PSParallelCompact::update_and_deadwood_in_dense_prefix(ParCompactionManager* cm,
--- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1006,34 +1006,6 @@
   // Reset time since last full gc
   static void reset_millis_since_last_gc();
 
- protected:
-#ifdef VALIDATE_MARK_SWEEP
-  static GrowableArray<void*>*           _root_refs_stack;
-  static GrowableArray<oop> *            _live_oops;
-  static GrowableArray<oop> *            _live_oops_moved_to;
-  static GrowableArray<size_t>*          _live_oops_size;
-  static size_t                          _live_oops_index;
-  static size_t                          _live_oops_index_at_perm;
-  static GrowableArray<void*>*           _other_refs_stack;
-  static GrowableArray<void*>*           _adjusted_pointers;
-  static bool                            _pointer_tracking;
-  static bool                            _root_tracking;
-
-  // The following arrays are saved since the time of the last GC and
-  // assist in tracking down problems where someone has done an errant
-  // store into the heap, usually to an oop that wasn't properly
-  // handleized across a GC. If we crash or otherwise fail before the
-  // next GC, we can query these arrays to find out the object we had
-  // intended to do the store to (assuming it is still alive) and the
-  // offset within that object. Covered under RecordMarkSweepCompaction.
-  static GrowableArray<HeapWord*> *      _cur_gc_live_oops;
-  static GrowableArray<HeapWord*> *      _cur_gc_live_oops_moved_to;
-  static GrowableArray<size_t>*          _cur_gc_live_oops_size;
-  static GrowableArray<HeapWord*> *      _last_gc_live_oops;
-  static GrowableArray<HeapWord*> *      _last_gc_live_oops_moved_to;
-  static GrowableArray<size_t>*          _last_gc_live_oops_size;
-#endif
-
  public:
   class MarkAndPushClosure: public OopClosure {
    private:
@@ -1191,25 +1163,6 @@
   // Time since last full gc (in milliseconds).
   static jlong millis_since_last_gc();
 
-#ifdef VALIDATE_MARK_SWEEP
-  static void track_adjusted_pointer(void* p, bool isroot);
-  static void check_adjust_pointer(void* p);
-  static void track_interior_pointers(oop obj);
-  static void check_interior_pointers();
-
-  static void reset_live_oop_tracking();
-  static void register_live_oop(oop p, size_t size);
-  static void validate_live_oop(oop p, size_t size);
-  static void live_oop_moved_to(HeapWord* q, size_t size, HeapWord* compaction_top);
-  static void compaction_complete();
-
-  // Querying operation of RecordMarkSweepCompaction results.
-  // Finds and prints the current base oop and offset for a word
-  // within an oop that was live during the last GC. Helpful for
-  // tracking down heap stomps.
-  static void print_new_location_of_heap_address(HeapWord* q);
-#endif  // #ifdef VALIDATE_MARK_SWEEP
-
 #ifndef PRODUCT
   // Debugging support.
   static const char* space_names[last_space_id];
@@ -1250,12 +1203,7 @@
 inline void PSParallelCompact::follow_root(ParCompactionManager* cm, T* p) {
   assert(!Universe::heap()->is_in_reserved(p),
          "roots shouldn't be things within the heap");
-#ifdef VALIDATE_MARK_SWEEP
-  if (ValidateMarkSweep) {
-    guarantee(!_root_refs_stack->contains(p), "should only be in here once");
-    _root_refs_stack->push(p);
-  }
-#endif
+
   T heap_oop = oopDesc::load_heap_oop(p);
   if (!oopDesc::is_null(heap_oop)) {
     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
@@ -1294,20 +1242,10 @@
       oopDesc::encode_store_heap_oop_not_null(p, new_obj);
     }
   }
-  VALIDATE_MARK_SWEEP_ONLY(track_adjusted_pointer(p, isroot));
 }
 
 template <class T>
 inline void PSParallelCompact::KeepAliveClosure::do_oop_work(T* p) {
-#ifdef VALIDATE_MARK_SWEEP
-  if (ValidateMarkSweep) {
-    if (!Universe::heap()->is_in_reserved(p)) {
-      _root_refs_stack->push(p);
-    } else {
-      _other_refs_stack->push(p);
-    }
-  }
-#endif
   mark_and_push(_compaction_manager, p);
 }
 
--- a/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -529,7 +529,7 @@
 
        if (PrintTenuringDistribution) {
          gclog_or_tty->cr();
-         gclog_or_tty->print_cr("Desired survivor size %ld bytes, new threshold %u (max %u)",
+         gclog_or_tty->print_cr("Desired survivor size " SIZE_FORMAT " bytes, new threshold %u (max %u)",
                                 size_policy->calculated_survivor_size_in_bytes(),
                                 _tenuring_threshold, MaxTenuringThreshold);
        }
--- a/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -65,7 +65,8 @@
     case threads:
     {
       ResourceMark rm;
-      Threads::oops_do(&roots_closure, NULL);
+      CLDToOopClosure* cld_closure = NULL; // Not needed. All CLDs are already visited.
+      Threads::oops_do(&roots_closure, cld_closure, NULL);
     }
     break;
 
@@ -120,13 +121,14 @@
 
   PSPromotionManager* pm = PSPromotionManager::gc_thread_promotion_manager(which);
   PSScavengeRootsClosure roots_closure(pm);
+  CLDToOopClosure* roots_from_clds = NULL;  // Not needed. All CLDs are already visited.
   CodeBlobToOopClosure roots_in_blobs(&roots_closure, /*do_marking=*/ true);
 
   if (_java_thread != NULL)
-    _java_thread->oops_do(&roots_closure, &roots_in_blobs);
+    _java_thread->oops_do(&roots_closure, roots_from_clds, &roots_in_blobs);
 
   if (_vm_thread != NULL)
-    _vm_thread->oops_do(&roots_closure, &roots_in_blobs);
+    _vm_thread->oops_do(&roots_closure, roots_from_clds, &roots_in_blobs);
 
   // Do the real work
   pm->drain_stacks(false);
--- a/src/share/vm/gc_implementation/parallelScavenge/psYoungGen.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/parallelScavenge/psYoungGen.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -808,8 +808,9 @@
   st->print("  to  "); to_space()->print_on(st);
 }
 
+// Note that a space is not printed before the [NAME:
 void PSYoungGen::print_used_change(size_t prev_used) const {
-  gclog_or_tty->print(" [%s:", name());
+  gclog_or_tty->print("[%s:", name());
   gclog_or_tty->print(" "  SIZE_FORMAT "K"
                       "->" SIZE_FORMAT "K"
                       "("  SIZE_FORMAT "K)",
--- a/src/share/vm/gc_implementation/shared/ageTable.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/shared/ageTable.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -96,7 +96,7 @@
 
     if (PrintTenuringDistribution) {
       gclog_or_tty->cr();
-      gclog_or_tty->print_cr("Desired survivor size %ld bytes, new threshold %u (max %u)",
+      gclog_or_tty->print_cr("Desired survivor size " SIZE_FORMAT " bytes, new threshold %u (max %u)",
         desired_survivor_size*oopSize, result, MaxTenuringThreshold);
     }
 
--- a/src/share/vm/gc_implementation/shared/gcStats.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/shared/gcStats.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -25,6 +25,7 @@
 #include "precompiled.hpp"
 #include "gc_implementation/shared/gcStats.hpp"
 #include "gc_implementation/shared/gcUtil.hpp"
+#include "memory/allocation.inline.hpp"
 
 GCStats::GCStats() {
     _avg_promoted       = new AdaptivePaddedNoZeroDevAverage(
--- a/src/share/vm/gc_implementation/shared/markSweep.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/shared/markSweep.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -42,26 +42,6 @@
 PreservedMark*          MarkSweep::_preserved_marks = NULL;
 ReferenceProcessor*     MarkSweep::_ref_processor   = NULL;
 
-#ifdef VALIDATE_MARK_SWEEP
-GrowableArray<void*>*   MarkSweep::_root_refs_stack = NULL;
-GrowableArray<oop> *    MarkSweep::_live_oops = NULL;
-GrowableArray<oop> *    MarkSweep::_live_oops_moved_to = NULL;
-GrowableArray<size_t>*  MarkSweep::_live_oops_size = NULL;
-size_t                  MarkSweep::_live_oops_index = 0;
-size_t                  MarkSweep::_live_oops_index_at_perm = 0;
-GrowableArray<void*>*   MarkSweep::_other_refs_stack = NULL;
-GrowableArray<void*>*   MarkSweep::_adjusted_pointers = NULL;
-bool                         MarkSweep::_pointer_tracking = false;
-bool                         MarkSweep::_root_tracking = true;
-
-GrowableArray<HeapWord*>* MarkSweep::_cur_gc_live_oops = NULL;
-GrowableArray<HeapWord*>* MarkSweep::_cur_gc_live_oops_moved_to = NULL;
-GrowableArray<size_t>   * MarkSweep::_cur_gc_live_oops_size = NULL;
-GrowableArray<HeapWord*>* MarkSweep::_last_gc_live_oops = NULL;
-GrowableArray<HeapWord*>* MarkSweep::_last_gc_live_oops_moved_to = NULL;
-GrowableArray<size_t>   * MarkSweep::_last_gc_live_oops_size = NULL;
-#endif
-
 MarkSweep::FollowRootClosure  MarkSweep::follow_root_closure;
 CodeBlobToOopClosure MarkSweep::follow_code_root_closure(&MarkSweep::follow_root_closure, /*do_marking=*/ true);
 
@@ -185,142 +165,6 @@
   }
 }
 
-#ifdef VALIDATE_MARK_SWEEP
-
-void MarkSweep::track_adjusted_pointer(void* p, bool isroot) {
-  if (!ValidateMarkSweep)
-    return;
-
-  if (!isroot) {
-    if (_pointer_tracking) {
-      guarantee(_adjusted_pointers->contains(p), "should have seen this pointer");
-      _adjusted_pointers->remove(p);
-    }
-  } else {
-    ptrdiff_t index = _root_refs_stack->find(p);
-    if (index != -1) {
-      int l = _root_refs_stack->length();
-      if (l > 0 && l - 1 != index) {
-        void* last = _root_refs_stack->pop();
-        assert(last != p, "should be different");
-        _root_refs_stack->at_put(index, last);
-      } else {
-        _root_refs_stack->remove(p);
-      }
-    }
-  }
-}
-
-void MarkSweep::check_adjust_pointer(void* p) {
-  _adjusted_pointers->push(p);
-}
-
-class AdjusterTracker: public OopClosure {
- public:
-  AdjusterTracker() {}
-  void do_oop(oop* o)       { MarkSweep::check_adjust_pointer(o); }
-  void do_oop(narrowOop* o) { MarkSweep::check_adjust_pointer(o); }
-};
-
-void MarkSweep::track_interior_pointers(oop obj) {
-  if (ValidateMarkSweep) {
-    _adjusted_pointers->clear();
-    _pointer_tracking = true;
-
-    AdjusterTracker checker;
-    obj->oop_iterate_no_header(&checker);
-  }
-}
-
-void MarkSweep::check_interior_pointers() {
-  if (ValidateMarkSweep) {
-    _pointer_tracking = false;
-    guarantee(_adjusted_pointers->length() == 0, "should have processed the same pointers");
-  }
-}
-
-void MarkSweep::reset_live_oop_tracking() {
-  if (ValidateMarkSweep) {
-    guarantee((size_t)_live_oops->length() == _live_oops_index, "should be at end of live oops");
-    _live_oops_index = 0;
-  }
-}
-
-void MarkSweep::register_live_oop(oop p, size_t size) {
-  if (ValidateMarkSweep) {
-    _live_oops->push(p);
-    _live_oops_size->push(size);
-    _live_oops_index++;
-  }
-}
-
-void MarkSweep::validate_live_oop(oop p, size_t size) {
-  if (ValidateMarkSweep) {
-    oop obj = _live_oops->at((int)_live_oops_index);
-    guarantee(obj == p, "should be the same object");
-    guarantee(_live_oops_size->at((int)_live_oops_index) == size, "should be the same size");
-    _live_oops_index++;
-  }
-}
-
-void MarkSweep::live_oop_moved_to(HeapWord* q, size_t size,
-                                  HeapWord* compaction_top) {
-  assert(oop(q)->forwardee() == NULL || oop(q)->forwardee() == oop(compaction_top),
-         "should be moved to forwarded location");
-  if (ValidateMarkSweep) {
-    MarkSweep::validate_live_oop(oop(q), size);
-    _live_oops_moved_to->push(oop(compaction_top));
-  }
-  if (RecordMarkSweepCompaction) {
-    _cur_gc_live_oops->push(q);
-    _cur_gc_live_oops_moved_to->push(compaction_top);
-    _cur_gc_live_oops_size->push(size);
-  }
-}
-
-void MarkSweep::compaction_complete() {
-  if (RecordMarkSweepCompaction) {
-    GrowableArray<HeapWord*>* _tmp_live_oops          = _cur_gc_live_oops;
-    GrowableArray<HeapWord*>* _tmp_live_oops_moved_to = _cur_gc_live_oops_moved_to;
-    GrowableArray<size_t>   * _tmp_live_oops_size     = _cur_gc_live_oops_size;
-
-    _cur_gc_live_oops           = _last_gc_live_oops;
-    _cur_gc_live_oops_moved_to  = _last_gc_live_oops_moved_to;
-    _cur_gc_live_oops_size      = _last_gc_live_oops_size;
-    _last_gc_live_oops          = _tmp_live_oops;
-    _last_gc_live_oops_moved_to = _tmp_live_oops_moved_to;
-    _last_gc_live_oops_size     = _tmp_live_oops_size;
-  }
-}
-
-void MarkSweep::print_new_location_of_heap_address(HeapWord* q) {
-  if (!RecordMarkSweepCompaction) {
-    tty->print_cr("Requires RecordMarkSweepCompaction to be enabled");
-    return;
-  }
-
-  if (_last_gc_live_oops == NULL) {
-    tty->print_cr("No compaction information gathered yet");
-    return;
-  }
-
-  for (int i = 0; i < _last_gc_live_oops->length(); i++) {
-    HeapWord* old_oop = _last_gc_live_oops->at(i);
-    size_t    sz      = _last_gc_live_oops_size->at(i);
-    if (old_oop <= q && q < (old_oop + sz)) {
-      HeapWord* new_oop = _last_gc_live_oops_moved_to->at(i);
-      size_t offset = (q - old_oop);
-      tty->print_cr("Address " PTR_FORMAT, q);
-      tty->print_cr(" Was in oop " PTR_FORMAT ", size " SIZE_FORMAT ", at offset " SIZE_FORMAT, old_oop, sz, offset);
-      tty->print_cr(" Now in oop " PTR_FORMAT ", actual address " PTR_FORMAT, new_oop, new_oop + offset);
-      return;
-    }
-  }
-
-  tty->print_cr("Address " PTR_FORMAT " not found in live oop information from last GC", q);
-}
-#endif //VALIDATE_MARK_SWEEP
-
 MarkSweep::IsAliveClosure   MarkSweep::is_alive;
 
 void MarkSweep::IsAliveClosure::do_object(oop p)   { ShouldNotReachHere(); }
--- a/src/share/vm/gc_implementation/shared/markSweep.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/shared/markSweep.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -44,21 +44,6 @@
 //
 // Class unloading will only occur when a full gc is invoked.
 
-// If VALIDATE_MARK_SWEEP is defined, the -XX:+ValidateMarkSweep flag will
-// be operational, and will provide slow but comprehensive self-checks within
-// the GC.  This is not enabled by default in product or release builds,
-// since the extra call to track_adjusted_pointer() in _adjust_pointer()
-// would be too much overhead, and would disturb performance measurement.
-// However, debug builds are sometimes way too slow to run GC tests!
-#ifdef ASSERT
-#define VALIDATE_MARK_SWEEP 1
-#endif
-#ifdef VALIDATE_MARK_SWEEP
-#define VALIDATE_MARK_SWEEP_ONLY(code) code
-#else
-#define VALIDATE_MARK_SWEEP_ONLY(code)
-#endif
-
 // declared at end
 class PreservedMark;
 
@@ -147,33 +132,6 @@
   // Reference processing (used in ...follow_contents)
   static ReferenceProcessor*             _ref_processor;
 
-#ifdef VALIDATE_MARK_SWEEP
-  static GrowableArray<void*>*           _root_refs_stack;
-  static GrowableArray<oop> *            _live_oops;
-  static GrowableArray<oop> *            _live_oops_moved_to;
-  static GrowableArray<size_t>*          _live_oops_size;
-  static size_t                          _live_oops_index;
-  static size_t                          _live_oops_index_at_perm;
-  static GrowableArray<void*>*           _other_refs_stack;
-  static GrowableArray<void*>*           _adjusted_pointers;
-  static bool                            _pointer_tracking;
-  static bool                            _root_tracking;
-
-  // The following arrays are saved since the time of the last GC and
-  // assist in tracking down problems where someone has done an errant
-  // store into the heap, usually to an oop that wasn't properly
-  // handleized across a GC. If we crash or otherwise fail before the
-  // next GC, we can query these arrays to find out the object we had
-  // intended to do the store to (assuming it is still alive) and the
-  // offset within that object. Covered under RecordMarkSweepCompaction.
-  static GrowableArray<HeapWord*> *      _cur_gc_live_oops;
-  static GrowableArray<HeapWord*> *      _cur_gc_live_oops_moved_to;
-  static GrowableArray<size_t>*          _cur_gc_live_oops_size;
-  static GrowableArray<HeapWord*> *      _last_gc_live_oops;
-  static GrowableArray<HeapWord*> *      _last_gc_live_oops_moved_to;
-  static GrowableArray<size_t>*          _last_gc_live_oops_size;
-#endif
-
   // Non public closures
   static KeepAliveClosure keep_alive;
 
@@ -227,24 +185,6 @@
   static void adjust_pointer(oop* p)       { adjust_pointer(p, false); }
   static void adjust_pointer(narrowOop* p) { adjust_pointer(p, false); }
 
-#ifdef VALIDATE_MARK_SWEEP
-  static void track_adjusted_pointer(void* p, bool isroot);
-  static void check_adjust_pointer(void* p);
-  static void track_interior_pointers(oop obj);
-  static void check_interior_pointers();
-
-  static void reset_live_oop_tracking();
-  static void register_live_oop(oop p, size_t size);
-  static void validate_live_oop(oop p, size_t size);
-  static void live_oop_moved_to(HeapWord* q, size_t size, HeapWord* compaction_top);
-  static void compaction_complete();
-
-  // Querying operation of RecordMarkSweepCompaction results.
-  // Finds and prints the current base oop and offset for a word
-  // within an oop that was live during the last GC. Helpful for
-  // tracking down heap stomps.
-  static void print_new_location_of_heap_address(HeapWord* q);
-#endif
 };
 
 class PreservedMark VALUE_OBJ_CLASS_SPEC {
--- a/src/share/vm/gc_implementation/shared/markSweep.inline.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/shared/markSweep.inline.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -46,12 +46,6 @@
 template <class T> inline void MarkSweep::follow_root(T* p) {
   assert(!Universe::heap()->is_in_reserved(p),
          "roots shouldn't be things within the heap");
-#ifdef VALIDATE_MARK_SWEEP
-  if (ValidateMarkSweep) {
-    guarantee(!_root_refs_stack->contains(p), "should only be in here once");
-    _root_refs_stack->push(p);
-  }
-#endif
   T heap_oop = oopDesc::load_heap_oop(p);
   if (!oopDesc::is_null(heap_oop)) {
     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
@@ -97,19 +91,9 @@
       oopDesc::encode_store_heap_oop_not_null(p, new_obj);
     }
   }
-  VALIDATE_MARK_SWEEP_ONLY(track_adjusted_pointer(p, isroot));
 }
 
 template <class T> inline void MarkSweep::KeepAliveClosure::do_oop_work(T* p) {
-#ifdef VALIDATE_MARK_SWEEP
-  if (ValidateMarkSweep) {
-    if (!Universe::heap()->is_in_reserved(p)) {
-      _root_refs_stack->push(p);
-    } else {
-      _other_refs_stack->push(p);
-    }
-  }
-#endif
   mark_and_push(p);
 }
 
--- a/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -28,19 +28,7 @@
 #include "gc_implementation/shared/spaceDecorator.hpp"
 #include "memory/sharedHeap.hpp"
 #include "oops/oop.inline.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
-
+#include "runtime/thread.inline.hpp"
 
 MutableNUMASpace::MutableNUMASpace(size_t alignment) : MutableSpace(alignment) {
   _lgrp_spaces = new (ResourceObj::C_HEAP, mtGC) GrowableArray<LGRPSpace*>(0, true);
--- a/src/share/vm/gc_implementation/shared/vmGCOperations.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_implementation/shared/vmGCOperations.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -191,7 +191,7 @@
 class VM_CollectForMetadataAllocation: public VM_GC_Operation {
  private:
   MetaWord*                _result;
-  size_t      _size;                       // size of object to be allocated
+  size_t                   _size;     // size of object to be allocated
   Metaspace::MetadataType  _mdtype;
   ClassLoaderData*         _loader_data;
  public:
--- a/src/share/vm/gc_interface/collectedHeap.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_interface/collectedHeap.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -30,19 +30,8 @@
 #include "oops/oop.inline.hpp"
 #include "oops/instanceMirrorKlass.hpp"
 #include "runtime/init.hpp"
+#include "runtime/thread.inline.hpp"
 #include "services/heapDumper.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 
 
 #ifdef ASSERT
--- a/src/share/vm/gc_interface/collectedHeap.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_interface/collectedHeap.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -289,11 +289,6 @@
   // (A scavenge is a GC which is not a full GC.)
   virtual bool is_scavengable(const void *p) = 0;
 
-  // Returns "TRUE" if "p" is a method oop in the
-  // current heap, with high probability. This predicate
-  // is not stable, in general.
-  bool is_valid_method(Method* p) const;
-
   void set_gc_cause(GCCause::Cause v) {
      if (UsePerfData) {
        _gc_lastcause = _gc_cause;
--- a/src/share/vm/gc_interface/collectedHeap.inline.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_interface/collectedHeap.inline.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -31,21 +31,9 @@
 #include "oops/arrayOop.hpp"
 #include "prims/jvmtiExport.hpp"
 #include "runtime/sharedRuntime.hpp"
-#include "runtime/thread.hpp"
+#include "runtime/thread.inline.hpp"
 #include "services/lowMemoryDetector.hpp"
 #include "utilities/copy.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 
 // Inline allocation implementations.
 
@@ -242,36 +230,6 @@
   return (oop)obj;
 }
 
-// Returns "TRUE" if "p" is a method oop in the
-// current heap with high probability. NOTE: The main
-// current consumers of this interface are Forte::
-// and ThreadProfiler::. In these cases, the
-// interpreter frame from which "p" came, may be
-// under construction when sampled asynchronously, so
-// the clients want to check that it represents a
-// valid method before using it. Nonetheless since
-// the clients do not typically lock out GC, the
-// predicate is_valid_method() is not stable, so
-// it is possible that by the time "p" is used, it
-// is no longer valid.
-inline bool CollectedHeap::is_valid_method(Method* p) const {
-  return
-    p != NULL &&
-
-    // Check whether "method" is metadata
-    p->is_metadata() &&
-
-    // See if GC is active; however, there is still an
-    // apparently unavoidable window after this call
-    // and before the client of this interface uses "p".
-    // If the client chooses not to lock out GC, then
-    // it's a risk the client must accept.
-    !is_gc_active() &&
-
-    // Check that p is a Method*.
-    p->is_method();
-}
-
 inline void CollectedHeap::oop_iterate_no_header(OopClosure* cl) {
   NoHeaderExtendedOopClosure no_header_cl(cl);
   oop_iterate(&no_header_cl);
--- a/src/share/vm/gc_interface/gcCause.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/gc_interface/gcCause.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -99,9 +99,9 @@
  public:
    GCCauseString(const char* prefix, GCCause::Cause cause) {
      if (PrintGCCause) {
-      _position = jio_snprintf(_buffer, _length, "%s (%s)", prefix, GCCause::to_string(cause));
+      _position = jio_snprintf(_buffer, _length, "%s (%s) ", prefix, GCCause::to_string(cause));
      } else {
-      _position = jio_snprintf(_buffer, _length, "%s", prefix);
+      _position = jio_snprintf(_buffer, _length, "%s ", prefix);
      }
      assert(_position >= 0 && _position <= _length,
        err_msg("Need to increase the buffer size in GCCauseString? %d", _position));
--- a/src/share/vm/interpreter/abstractInterpreter.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/interpreter/abstractInterpreter.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -27,6 +27,7 @@
 
 #include "code/stubs.hpp"
 #include "interpreter/bytecodes.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/vmThread.hpp"
 #include "utilities/top.hpp"
 #ifdef TARGET_ARCH_MODEL_x86_32
@@ -47,18 +48,6 @@
 #ifdef TARGET_ARCH_MODEL_ppc
 # include "interp_masm_ppc.hpp"
 #endif
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 
 // This file contains the platform-independent parts
 // of the abstract interpreter and the abstract interpreter generator.
@@ -320,6 +309,7 @@
   void bang_stack_shadow_pages(bool native_call);
 
   void generate_all();
+  void initialize_method_handle_entries();
 
  public:
   AbstractInterpreterGenerator(StubQueue* _code);
--- a/src/share/vm/interpreter/bytecodeInterpreter.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/interpreter/bytecodeInterpreter.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -235,10 +235,6 @@
 #endif
 #endif
 
-// JavaStack Implementation
-#define MORE_STACK(count)  \
-    (topOfStack -= ((count) * Interpreter::stackElementWords))
-
 
 #define UPDATE_PC(opsize) {pc += opsize; }
 /*
@@ -575,7 +571,7 @@
 
 /* 0xE0 */ &&opc_default,     &&opc_default,        &&opc_default,      &&opc_default,
 /* 0xE4 */ &&opc_default,     &&opc_fast_aldc,      &&opc_fast_aldc_w,  &&opc_return_register_finalizer,
-/* 0xE8 */ &&opc_default,     &&opc_default,        &&opc_default,      &&opc_default,
+/* 0xE8 */ &&opc_invokehandle,&&opc_default,        &&opc_default,      &&opc_default,
 /* 0xEC */ &&opc_default,     &&opc_default,        &&opc_default,      &&opc_default,
 
 /* 0xF0 */ &&opc_default,     &&opc_default,        &&opc_default,      &&opc_default,
@@ -1773,7 +1769,7 @@
 
           oop obj;
           if ((Bytecodes::Code)opcode == Bytecodes::_getstatic) {
-            Klass* k = (Klass*) cache->f1();
+            Klass* k = cache->f1_as_klass();
             obj = k->java_mirror();
             MORE_STACK(1);  // Assume single slot push
           } else {
@@ -1885,7 +1881,7 @@
             --count;
           }
           if ((Bytecodes::Code)opcode == Bytecodes::_putstatic) {
-            Klass* k = (Klass*) cache->f1();
+            Klass* k = cache->f1_as_klass();
             obj = k->java_mirror();
           } else {
             --count;
@@ -2047,8 +2043,8 @@
             if (objKlassOop != klassOf &&
                 !objKlassOop->is_subtype_of(klassOf)) {
               ResourceMark rm(THREAD);
-              const char* objName = Klass::cast(objKlassOop)->external_name();
-              const char* klassName = Klass::cast(klassOf)->external_name();
+              const char* objName = objKlassOop->external_name();
+              const char* klassName = klassOf->external_name();
               char* message = SharedRuntime::generate_class_cast_message(
                 objName, klassName);
               VM_JAVA_ERROR(vmSymbols::java_lang_ClassCastException(), message);
@@ -2190,6 +2186,7 @@
       }
 
       CASE(_invokedynamic): {
+
         if (!EnableInvokeDynamic) {
           // We should not encounter this bytecode if !EnableInvokeDynamic.
           // The verifier will stop it.  However, if we get past the verifier,
@@ -2199,26 +2196,64 @@
           ShouldNotReachHere();
         }
 
-        int index = Bytes::get_native_u4(pc+1);
+        u4 index = Bytes::get_native_u4(pc+1);
+        ConstantPoolCacheEntry* cache = cp->constant_pool()->invokedynamic_cp_cache_entry_at(index);
 
         // We are resolved if the resolved_references field contains a non-null object (CallSite, etc.)
         // This kind of CP cache entry does not need to match the flags byte, because
         // there is a 1-1 relation between bytecode type and CP entry type.
-        ConstantPool* constants = METHOD->constants();
-        oop result = constants->resolved_references()->obj_at(index);
-        if (result == NULL) {
+        if (! cache->is_resolved((Bytecodes::Code) opcode)) {
           CALL_VM(InterpreterRuntime::resolve_invokedynamic(THREAD),
                   handle_exception);
-          result = THREAD->vm_result();
+          cache = cp->constant_pool()->invokedynamic_cp_cache_entry_at(index);
+        }
+
+        Method* method = cache->f1_as_method();
+        VERIFY_OOP(method);
+
+        if (cache->has_appendix()) {
+          ConstantPool* constants = METHOD->constants();
+          SET_STACK_OBJECT(cache->appendix_if_resolved(constants), 0);
+          MORE_STACK(1);
+        }
+
+        istate->set_msg(call_method);
+        istate->set_callee(method);
+        istate->set_callee_entry_point(method->from_interpreted_entry());
+        istate->set_bcp_advance(5);
+
+        UPDATE_PC_AND_RETURN(0); // I'll be back...
+      }
+
+      CASE(_invokehandle): {
+
+        if (!EnableInvokeDynamic) {
+          ShouldNotReachHere();
         }
 
-        VERIFY_OOP(result);
-        oop method_handle = java_lang_invoke_CallSite::target(result);
-        CHECK_NULL(method_handle);
-
-        istate->set_msg(call_method_handle);
-        istate->set_callee((Method*) method_handle);
-        istate->set_bcp_advance(5);
+        u2 index = Bytes::get_native_u2(pc+1);
+        ConstantPoolCacheEntry* cache = cp->entry_at(index);
+
+        if (! cache->is_resolved((Bytecodes::Code) opcode)) {
+          CALL_VM(InterpreterRuntime::resolve_invokehandle(THREAD),
+                  handle_exception);
+          cache = cp->entry_at(index);
+        }
+
+        Method* method = cache->f1_as_method();
+
+        VERIFY_OOP(method);
+
+        if (cache->has_appendix()) {
+          ConstantPool* constants = METHOD->constants();
+          SET_STACK_OBJECT(cache->appendix_if_resolved(constants), 0);
+          MORE_STACK(1);
+        }
+
+        istate->set_msg(call_method);
+        istate->set_callee(method);
+        istate->set_callee_entry_point(method->from_interpreted_entry());
+        istate->set_bcp_advance(3);
 
         UPDATE_PC_AND_RETURN(0); // I'll be back...
       }
--- a/src/share/vm/interpreter/bytecodeInterpreter.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/interpreter/bytecodeInterpreter.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -50,6 +50,10 @@
 
 #ifdef CC_INTERP
 
+// JavaStack Implementation
+#define MORE_STACK(count)  \
+    (topOfStack -= ((count) * Interpreter::stackElementWords))
+
 // CVM definitions find hotspot equivalents...
 
 union VMJavaVal64 {
@@ -107,7 +111,6 @@
          rethrow_exception,         // unwinding and throwing exception
          // requests to frame manager from C++ interpreter
          call_method,               // request for new frame from interpreter, manager responds with method_entry
-         call_method_handle,        // like the above, except the callee is a method handle
          return_from_method,        // request from interpreter to unwind, manager responds with method_continue
          more_monitors,             // need a new monitor
          throwing_exception,        // unwind stack and rethrow
--- a/src/share/vm/interpreter/bytecodes.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/interpreter/bytecodes.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -423,7 +423,9 @@
   static bool        is_zero_const  (Code code)    { return (code == _aconst_null || code == _iconst_0
                                                            || code == _fconst_0 || code == _dconst_0); }
   static bool        is_invoke      (Code code)    { return (_invokevirtual <= code && code <= _invokedynamic); }
-
+  static bool        has_receiver   (Code code)    { assert(is_invoke(code), "");  return code == _invokevirtual ||
+                                                                                          code == _invokespecial ||
+                                                                                          code == _invokeinterface; }
   static bool        has_optional_appendix(Code code) { return code == _invokedynamic || code == _invokehandle; }
 
   static int         compute_flags  (const char* format, int more_flags = 0);  // compute the flags
--- a/src/share/vm/interpreter/cppInterpreter.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/interpreter/cppInterpreter.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -117,7 +117,6 @@
     method_entry(empty);
     method_entry(accessor);
     method_entry(abstract);
-    method_entry(method_handle);
     method_entry(java_lang_math_sin   );
     method_entry(java_lang_math_cos   );
     method_entry(java_lang_math_tan   );
@@ -125,7 +124,12 @@
     method_entry(java_lang_math_sqrt  );
     method_entry(java_lang_math_log   );
     method_entry(java_lang_math_log10 );
+    method_entry(java_lang_math_pow );
+    method_entry(java_lang_math_exp );
     method_entry(java_lang_ref_reference_get);
+
+    initialize_method_handle_entries();
+
     Interpreter::_native_entry_begin = Interpreter::code()->code_end();
     method_entry(native);
     method_entry(native_synchronized);
--- a/src/share/vm/interpreter/interpreter.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/interpreter/interpreter.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,7 +23,9 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "compiler/disassembler.hpp"
 #include "interpreter/bytecodeHistogram.hpp"
 #include "interpreter/bytecodeInterpreter.hpp"
 #include "interpreter/interpreter.hpp"
@@ -464,3 +466,11 @@
     }
   }
 }
+
+void AbstractInterpreterGenerator::initialize_method_handle_entries() {
+  // method handle entry kinds are generated later in MethodHandlesAdapterGenerator::generate:
+  for (int i = Interpreter::method_handle_invoke_FIRST; i <= Interpreter::method_handle_invoke_LAST; i++) {
+    Interpreter::MethodKind kind = (Interpreter::MethodKind) i;
+    Interpreter::_entry_table[kind] = Interpreter::_entry_table[Interpreter::abstract];
+  }
+}
--- a/src/share/vm/interpreter/interpreterRuntime.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/interpreter/interpreterRuntime.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -26,6 +26,7 @@
 #include "classfile/systemDictionary.hpp"
 #include "classfile/vmSymbols.hpp"
 #include "compiler/compileBroker.hpp"
+#include "compiler/disassembler.hpp"
 #include "gc_interface/collectedHeap.hpp"
 #include "interpreter/interpreter.hpp"
 #include "interpreter/interpreterRuntime.hpp"
@@ -312,7 +313,7 @@
 
 IRT_ENTRY(void, InterpreterRuntime::create_klass_exception(JavaThread* thread, char* name, oopDesc* obj))
   ResourceMark rm(thread);
-  const char* klass_name = Klass::cast(obj->klass())->external_name();
+  const char* klass_name = obj->klass()->external_name();
   // lookup exception klass
   TempNewSymbol s = SymbolTable::new_symbol(name, CHECK);
   if (ProfileTraps) {
@@ -341,7 +342,7 @@
 
   ResourceMark rm(thread);
   char* message = SharedRuntime::generate_class_cast_message(
-    thread, Klass::cast(obj->klass())->external_name());
+    thread, obj->klass()->external_name());
 
   if (ProfileTraps) {
     note_trap(thread, Deoptimization::Reason_class_check, CHECK);
@@ -416,7 +417,7 @@
 
     // exception handler lookup
     KlassHandle h_klass(THREAD, h_exception->klass());
-    handler_bci = h_method->fast_exception_handler_bci_for(h_klass, current_bci, THREAD);
+    handler_bci = Method::fast_exception_handler_bci_for(h_method, h_klass, current_bci, THREAD);
     if (HAS_PENDING_EXCEPTION) {
       // We threw an exception while trying to find the exception handler.
       // Transfer the new exception to the exception handle which will
@@ -733,12 +734,7 @@
                                  get_index_u2_cpcache(thread, bytecode), bytecode, CHECK);
   } // end JvmtiHideSingleStepping
 
-  cache_entry(thread)->set_method_handle(
-      pool,
-      info.resolved_method(),
-      info.resolved_appendix(),
-      info.resolved_method_type(),
-      pool->resolved_references());
+  cache_entry(thread)->set_method_handle(pool, info);
 }
 IRT_END
 
@@ -762,12 +758,7 @@
   } // end JvmtiHideSingleStepping
 
   ConstantPoolCacheEntry* cp_cache_entry = pool->invokedynamic_cp_cache_entry_at(index);
-  cp_cache_entry->set_dynamic_call(
-      pool,
-      info.resolved_method(),
-      info.resolved_appendix(),
-      info.resolved_method_type(),
-      pool->resolved_references());
+  cp_cache_entry->set_dynamic_call(pool, info);
 }
 IRT_END
 
--- a/src/share/vm/interpreter/interpreterRuntime.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/interpreter/interpreterRuntime.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -31,19 +31,8 @@
 #include "oops/method.hpp"
 #include "runtime/frame.inline.hpp"
 #include "runtime/signature.hpp"
+#include "runtime/thread.inline.hpp"
 #include "utilities/top.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 
 // The InterpreterRuntime is called by the interpreter for everything
 // that cannot/should not be dealt with in assembly and needs C support.
--- a/src/share/vm/interpreter/invocationCounter.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/interpreter/invocationCounter.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -40,6 +40,7 @@
 
 class InvocationCounter VALUE_OBJ_CLASS_SPEC {
   friend class VMStructs;
+  friend class ciReplay;
  private:                             // bit no: |31  3|  2  | 1 0 |
   unsigned int _counter;              // format: [count|carry|state]
 
@@ -85,6 +86,8 @@
   void set_carry();                              // set the sticky carry bit
   void set_carry_flag()                          {  _counter |= carry_mask; }
 
+  int raw_counter()                              { return _counter; }
+
   // Accessors
   State  state() const                           { return (State)(_counter & state_mask); }
   bool   carry() const                           { return (_counter & carry_mask) != 0; }
--- a/src/share/vm/interpreter/linkResolver.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/interpreter/linkResolver.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,6 +23,7 @@
  */
 
 #include "precompiled.hpp"
+#include "classfile/defaultMethods.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "classfile/vmSymbols.hpp"
 #include "compiler/compileBroker.hpp"
@@ -42,19 +43,8 @@
 #include "runtime/handles.inline.hpp"
 #include "runtime/reflection.hpp"
 #include "runtime/signature.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/vmThread.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 
 //------------------------------------------------------------------------------------------------------------------------
 // Implementation of FieldAccessInfo
@@ -132,7 +122,7 @@
       // don't force compilation, resolve was on behalf of compiler
       return;
     }
-    if (InstanceKlass::cast(selected_method->method_holder())->is_not_initialized()) {
+    if (selected_method->method_holder()->is_not_initialized()) {
       // 'is_not_initialized' means not only '!is_initialized', but also that
       // initialization has not been started yet ('!being_initialized')
       // Do not force compilation of methods in uninitialized classes.
@@ -202,7 +192,7 @@
   Method* result_oop = klass->uncached_lookup_method(name, signature);
   result = methodHandle(THREAD, result_oop);
   while (!result.is_null() && result->is_static()) {
-    klass = KlassHandle(THREAD, Klass::cast(result->method_holder())->super());
+    klass = KlassHandle(THREAD, result->method_holder()->super());
     result = methodHandle(THREAD, klass->uncached_lookup_method(name, signature));
   }
 }
@@ -404,21 +394,13 @@
                                   Symbol* method_name, Symbol* method_signature,
                                   KlassHandle current_klass, bool check_access, TRAPS) {
 
-  // 1. check if klass is not interface
-  if (resolved_klass->is_interface()) {
-    ResourceMark rm(THREAD);
-    char buf[200];
-    jio_snprintf(buf, sizeof(buf), "Found interface %s, but class was expected", Klass::cast(resolved_klass())->external_name());
-    THROW_MSG(vmSymbols::java_lang_IncompatibleClassChangeError(), buf);
-  }
-
   Handle nested_exception;
 
-  // 2. lookup method in resolved klass and its super klasses
+  // 1. lookup method in resolved klass and its super klasses
   lookup_method_in_klasses(resolved_method, resolved_klass, method_name, method_signature, CHECK);
 
   if (resolved_method.is_null()) { // not found in the class hierarchy
-    // 3. lookup method in all the interfaces implemented by the resolved klass
+    // 2. lookup method in all the interfaces implemented by the resolved klass
     lookup_method_in_interfaces(resolved_method, resolved_klass, method_name, method_signature, CHECK);
 
     if (resolved_method.is_null()) {
@@ -432,21 +414,30 @@
     }
 
     if (resolved_method.is_null()) {
-      // 4. method lookup failed
+      // 3. method lookup failed
       ResourceMark rm(THREAD);
       THROW_MSG_CAUSE(vmSymbols::java_lang_NoSuchMethodError(),
-                      Method::name_and_sig_as_C_string(Klass::cast(resolved_klass()),
+                      Method::name_and_sig_as_C_string(resolved_klass(),
                                                               method_name,
                                                               method_signature),
                       nested_exception);
     }
   }
 
+  // 4. check if klass is not interface
+  if (resolved_klass->is_interface() && resolved_method->is_abstract()) {
+    ResourceMark rm(THREAD);
+    char buf[200];
+    jio_snprintf(buf, sizeof(buf), "Found interface %s, but class was expected",
+        resolved_klass()->external_name());
+    THROW_MSG(vmSymbols::java_lang_IncompatibleClassChangeError(), buf);
+  }
+
   // 5. check if method is concrete
   if (resolved_method->is_abstract() && !resolved_klass->is_abstract()) {
     ResourceMark rm(THREAD);
     THROW_MSG(vmSymbols::java_lang_AbstractMethodError(),
-              Method::name_and_sig_as_C_string(Klass::cast(resolved_klass()),
+              Method::name_and_sig_as_C_string(resolved_klass(),
                                                       method_name,
                                                       method_signature));
   }
@@ -464,7 +455,7 @@
 
     // check loader constraints
     Handle loader (THREAD, InstanceKlass::cast(current_klass())->class_loader());
-    Handle class_loader (THREAD, InstanceKlass::cast(resolved_method->method_holder())->class_loader());
+    Handle class_loader (THREAD, resolved_method->method_holder()->class_loader());
     {
       ResourceMark rm(THREAD);
       char* failed_type_name =
@@ -475,7 +466,7 @@
           " \"%s\" the class loader (instance of %s) of the current class, %s,"
           " and the class loader (instance of %s) for resolved class, %s, have"
           " different Class objects for the type %s used in the signature";
-        char* sig = Method::name_and_sig_as_C_string(Klass::cast(resolved_klass()),method_name,method_signature);
+        char* sig = Method::name_and_sig_as_C_string(resolved_klass(),method_name,method_signature);
         const char* loader1 = SystemDictionary::loader_name(loader());
         char* current = InstanceKlass::cast(current_klass())->name()->as_C_string();
         const char* loader2 = SystemDictionary::loader_name(class_loader());
@@ -503,7 +494,7 @@
   if (!resolved_klass->is_interface()) {
     ResourceMark rm(THREAD);
     char buf[200];
-    jio_snprintf(buf, sizeof(buf), "Found class %s, but interface was expected", Klass::cast(resolved_klass())->external_name());
+    jio_snprintf(buf, sizeof(buf), "Found class %s, but interface was expected", resolved_klass()->external_name());
     THROW_MSG(vmSymbols::java_lang_IncompatibleClassChangeError(), buf);
   }
 
@@ -517,7 +508,7 @@
       // no method found
       ResourceMark rm(THREAD);
       THROW_MSG(vmSymbols::java_lang_NoSuchMethodError(),
-                Method::name_and_sig_as_C_string(Klass::cast(resolved_klass()),
+                Method::name_and_sig_as_C_string(resolved_klass(),
                                                         method_name,
                                                         method_signature));
     }
@@ -526,7 +517,7 @@
   if (check_access) {
     HandleMark hm(THREAD);
     Handle loader (THREAD, InstanceKlass::cast(current_klass())->class_loader());
-    Handle class_loader (THREAD, InstanceKlass::cast(resolved_method->method_holder())->class_loader());
+    Handle class_loader (THREAD, resolved_method->method_holder()->class_loader());
     {
       ResourceMark rm(THREAD);
       char* failed_type_name =
@@ -538,7 +529,7 @@
           "current class, %s, and the class loader (instance of %s) for "
           "resolved class, %s, have different Class objects for the type %s "
           "used in the signature";
-        char* sig = Method::name_and_sig_as_C_string(Klass::cast(resolved_klass()),method_name,method_signature);
+        char* sig = Method::name_and_sig_as_C_string(resolved_klass(),method_name,method_signature);
         const char* loader1 = SystemDictionary::loader_name(loader());
         char* current = InstanceKlass::cast(current_klass())->name()->as_C_string();
         const char* loader2 = SystemDictionary::loader_name(class_loader());
@@ -625,7 +616,7 @@
   if (is_static != fd.is_static()) {
     ResourceMark rm(THREAD);
     char msg[200];
-    jio_snprintf(msg, sizeof(msg), "Expected %s field %s.%s", is_static ? "static" : "non-static", Klass::cast(resolved_klass())->external_name(), fd.name()->as_C_string());
+    jio_snprintf(msg, sizeof(msg), "Expected %s field %s.%s", is_static ? "static" : "non-static", resolved_klass()->external_name(), fd.name()->as_C_string());
     THROW_MSG(vmSymbols::java_lang_IncompatibleClassChangeError(), msg);
   }
 
@@ -699,7 +690,7 @@
                                        bool check_access, bool initialize_class, TRAPS) {
   methodHandle resolved_method;
   linktime_resolve_static_method(resolved_method, resolved_klass, method_name, method_signature, current_klass, check_access, CHECK);
-  resolved_klass = KlassHandle(THREAD, Klass::cast(resolved_method->method_holder()));
+  resolved_klass = KlassHandle(THREAD, resolved_method->method_holder());
 
   // Initialize klass (this should only happen if everything is ok)
   if (initialize_class && resolved_klass->should_be_initialized()) {
@@ -723,7 +714,7 @@
   if (!resolved_method->is_static()) {
     ResourceMark rm(THREAD);
     char buf[200];
-    jio_snprintf(buf, sizeof(buf), "Expected static method %s", Method::name_and_sig_as_C_string(Klass::cast(resolved_klass()),
+    jio_snprintf(buf, sizeof(buf), "Expected static method %s", Method::name_and_sig_as_C_string(resolved_klass(),
                                                       resolved_method->name(),
                                                       resolved_method->signature()));
     THROW_MSG(vmSymbols::java_lang_IncompatibleClassChangeError(), buf);
@@ -743,6 +734,27 @@
                                                    Symbol* method_name, Symbol* method_signature,
                                                    KlassHandle current_klass, bool check_access, TRAPS) {
 
+  if (resolved_klass->is_interface() && current_klass() != NULL) {
+    // If the target class is a direct interface, treat this as a "super"
+    // default call.
+    //
+    // If the current method is an overpass that happens to call a direct
+    // super-interface's method, then we'll end up rerunning the default method
+    // analysis even though we don't need to, but that's ok since it will end
+    // up with the same answer.
+    InstanceKlass* ik = InstanceKlass::cast(current_klass());
+    Array<Klass*>* interfaces = ik->local_interfaces();
+    int num_interfaces = interfaces->length();
+    for (int index = 0; index < num_interfaces; index++) {
+      if (interfaces->at(index) == resolved_klass()) {
+        Method* method = DefaultMethods::find_super_default(current_klass(),
+            resolved_klass(), method_name, method_signature, CHECK);
+        resolved_method = methodHandle(THREAD, method);
+        return;
+      }
+    }
+  }
+
   resolve_method(resolved_method, resolved_klass, method_name, method_signature, current_klass, check_access, CHECK);
 
   // check if method name is <init>, that it is found in same klass as static type
@@ -766,7 +778,7 @@
     char buf[200];
     jio_snprintf(buf, sizeof(buf),
                  "Expecting non-static method %s",
-                 Method::name_and_sig_as_C_string(Klass::cast(resolved_klass()),
+                 Method::name_and_sig_as_C_string(resolved_klass(),
                                                          resolved_method->name(),
                                                          resolved_method->signature()));
     THROW_MSG(vmSymbols::java_lang_IncompatibleClassChangeError(), buf);
@@ -784,11 +796,17 @@
   { KlassHandle method_klass  = KlassHandle(THREAD,
                                             resolved_method->method_holder());
 
-    if (check_access &&
+    const bool direct_calling_default_method =
+      resolved_klass() != NULL && resolved_method() != NULL &&
+      resolved_klass->is_interface() && !resolved_method->is_abstract();
+
+    if (!direct_calling_default_method &&
+        check_access &&
         // a) check if ACC_SUPER flag is set for the current class
         (current_klass->is_super() || !AllowNonVirtualCalls) &&
         // b) check if the method class is a superclass of the current class (superclass relation is not reflexive!)
-        current_klass->is_subtype_of(method_klass()) && current_klass() != method_klass() &&
+        current_klass->is_subtype_of(method_klass()) &&
+        current_klass() != method_klass() &&
         // c) check if the method is not <init>
         resolved_method->name() != vmSymbols::object_initializer_name()) {
       // Lookup super method
@@ -800,7 +818,7 @@
       if (sel_method.is_null()) {
         ResourceMark rm(THREAD);
         THROW_MSG(vmSymbols::java_lang_AbstractMethodError(),
-                  Method::name_and_sig_as_C_string(Klass::cast(resolved_klass()),
+                  Method::name_and_sig_as_C_string(resolved_klass(),
                                             resolved_method->name(),
                                             resolved_method->signature()));
       }
@@ -811,7 +829,7 @@
   if (sel_method->is_static()) {
     ResourceMark rm(THREAD);
     char buf[200];
-    jio_snprintf(buf, sizeof(buf), "Expecting non-static method %s", Method::name_and_sig_as_C_string(Klass::cast(resolved_klass()),
+    jio_snprintf(buf, sizeof(buf), "Expecting non-static method %s", Method::name_and_sig_as_C_string(resolved_klass(),
                                                                                                              resolved_method->name(),
                                                                                                              resolved_method->signature()));
     THROW_MSG(vmSymbols::java_lang_IncompatibleClassChangeError(), buf);
@@ -821,7 +839,7 @@
   if (sel_method->is_abstract()) {
     ResourceMark rm(THREAD);
     THROW_MSG(vmSymbols::java_lang_AbstractMethodError(),
-              Method::name_and_sig_as_C_string(Klass::cast(resolved_klass()),
+              Method::name_and_sig_as_C_string(resolved_klass(),
                                                       sel_method->name(),
                                                       sel_method->signature()));
   }
@@ -852,7 +870,7 @@
   if (resolved_method->is_static()) {
     ResourceMark rm(THREAD);
     char buf[200];
-    jio_snprintf(buf, sizeof(buf), "Expecting non-static method %s", Method::name_and_sig_as_C_string(Klass::cast(resolved_klass()),
+    jio_snprintf(buf, sizeof(buf), "Expecting non-static method %s", Method::name_and_sig_as_C_string(resolved_klass(),
                                                                                                              resolved_method->name(),
                                                                                                              resolved_method->signature()));
     THROW_MSG(vmSymbols::java_lang_IncompatibleClassChangeError(), buf);
@@ -881,12 +899,12 @@
 
   // Virtual methods cannot be resolved before its klass has been linked, for otherwise the Method*'s
   // has not been rewritten, and the vtable initialized.
-  assert(InstanceKlass::cast(resolved_method->method_holder())->is_linked(), "must be linked");
+  assert(resolved_method->method_holder()->is_linked(), "must be linked");
 
   // Virtual methods cannot be resolved before its klass has been linked, for otherwise the Method*'s
   // has not been rewritten, and the vtable initialized. Make sure to do this after the nullcheck, since
   // a missing receiver might result in a bogus lookup.
-  assert(InstanceKlass::cast(resolved_method->method_holder())->is_linked(), "must be linked");
+  assert(resolved_method->method_holder()->is_linked(), "must be linked");
 
   // do lookup based on receiver klass using the vtable index
   if (resolved_method->method_holder()->is_interface()) { // miranda method
@@ -921,7 +939,7 @@
   if (selected_method.is_null()) {
     ResourceMark rm(THREAD);
     THROW_MSG(vmSymbols::java_lang_AbstractMethodError(),
-              Method::name_and_sig_as_C_string(Klass::cast(resolved_klass()),
+              Method::name_and_sig_as_C_string(resolved_klass(),
                                                       resolved_method->name(),
                                                       resolved_method->signature()));
   }
@@ -930,7 +948,7 @@
   if (check_null_and_abstract && selected_method->is_abstract()) {
     ResourceMark rm(THREAD);
     THROW_MSG(vmSymbols::java_lang_AbstractMethodError(),
-              Method::name_and_sig_as_C_string(Klass::cast(resolved_klass()),
+              Method::name_and_sig_as_C_string(resolved_klass(),
                                                       selected_method->name(),
                                                       selected_method->signature()));
   }
@@ -970,8 +988,8 @@
     ResourceMark rm(THREAD);
     char buf[200];
     jio_snprintf(buf, sizeof(buf), "Class %s does not implement the requested interface %s",
-                 (Klass::cast(recv_klass()))->external_name(),
-                 (Klass::cast(resolved_klass()))->external_name());
+                 recv_klass()->external_name(),
+                 resolved_klass()->external_name());
     THROW_MSG(vmSymbols::java_lang_IncompatibleClassChangeError(), buf);
   }
   // do lookup based on receiver klass
@@ -983,7 +1001,7 @@
   if (sel_method.is_null()) {
     ResourceMark rm(THREAD);
     THROW_MSG(vmSymbols::java_lang_AbstractMethodError(),
-              Method::name_and_sig_as_C_string(Klass::cast(recv_klass()),
+              Method::name_and_sig_as_C_string(recv_klass(),
                                                       resolved_method->name(),
                                                       resolved_method->signature()));
   }
@@ -991,7 +1009,7 @@
   if (!sel_method->is_public()) {
     ResourceMark rm(THREAD);
     THROW_MSG(vmSymbols::java_lang_IllegalAccessError(),
-              Method::name_and_sig_as_C_string(Klass::cast(recv_klass()),
+              Method::name_and_sig_as_C_string(recv_klass(),
                                                       sel_method->name(),
                                                       sel_method->signature()));
   }
@@ -999,7 +1017,7 @@
   if (check_null_and_abstract && sel_method->is_abstract()) {
     ResourceMark rm(THREAD);
     THROW_MSG(vmSymbols::java_lang_AbstractMethodError(),
-              Method::name_and_sig_as_C_string(Klass::cast(recv_klass()),
+              Method::name_and_sig_as_C_string(recv_klass(),
                                                       sel_method->name(),
                                                       sel_method->signature()));
   }
--- a/src/share/vm/interpreter/rewriter.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/interpreter/rewriter.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -27,13 +27,8 @@
 #include "interpreter/interpreter.hpp"
 #include "interpreter/rewriter.hpp"
 #include "memory/gcLocker.hpp"
-#include "memory/metadataFactory.hpp"
-#include "memory/oopFactory.hpp"
 #include "memory/resourceArea.hpp"
 #include "oops/generateOopMap.hpp"
-#include "oops/objArrayOop.hpp"
-#include "oops/oop.inline.hpp"
-#include "prims/methodComparator.hpp"
 #include "prims/methodHandles.hpp"
 
 // Computes a CPC map (new_index -> original_index) for constant pool entries
@@ -402,13 +397,6 @@
 }
 
 
-void Rewriter::rewrite(instanceKlassHandle klass, constantPoolHandle cpool, Array<Method*>* methods, TRAPS) {
-  ResourceMark rm(THREAD);
-  Rewriter     rw(klass, cpool, methods, CHECK);
-  // (That's all, folks.)
-}
-
-
 Rewriter::Rewriter(instanceKlassHandle klass, constantPoolHandle cpool, Array<Method*>* methods, TRAPS)
   : _klass(klass),
     _pool(cpool),
@@ -453,46 +441,25 @@
     restore_bytecodes();
     return;
   }
-}
 
-// Relocate jsr/rets in a method.  This can't be done with the rewriter
-// stage because it can throw other exceptions, leaving the bytecodes
-// pointing at constant pool cache entries.
-// Link and check jvmti dependencies while we're iterating over the methods.
-// JSR292 code calls with a different set of methods, so two entry points.
-void Rewriter::relocate_and_link(instanceKlassHandle this_oop, TRAPS) {
-  relocate_and_link(this_oop, this_oop->methods(), THREAD);
-}
-
-void Rewriter::relocate_and_link(instanceKlassHandle this_oop,
-                                 Array<Method*>* methods, TRAPS) {
-  int len = methods->length();
+  // Relocate after everything, but still do this under the is_rewritten flag,
+  // so methods with jsrs in custom class lists in aren't attempted to be
+  // rewritten in the RO section of the shared archive.
+  // Relocated bytecodes don't have to be restored, only the cp cache entries
   for (int i = len-1; i >= 0; i--) {
-    methodHandle m(THREAD, methods->at(i));
+    methodHandle m(THREAD, _methods->at(i));
 
     if (m->has_jsrs()) {
-      m = rewrite_jsrs(m, CHECK);
+      m = rewrite_jsrs(m, THREAD);
+      // Restore bytecodes to their unrewritten state if there are exceptions
+      // relocating bytecodes.  If some are relocated, that is ok because that
+      // doesn't affect constant pool to cpCache rewriting.
+      if (HAS_PENDING_EXCEPTION) {
+        restore_bytecodes();
+        return;
+      }
       // Method might have gotten rewritten.
       methods->at_put(i, m());
     }
-
-    // Set up method entry points for compiler and interpreter    .
-    m->link_method(m, CHECK);
-
-    // This is for JVMTI and unrelated to relocator but the last thing we do
-#ifdef ASSERT
-    if (StressMethodComparator) {
-      static int nmc = 0;
-      for (int j = i; j >= 0 && j >= i-4; j--) {
-        if ((++nmc % 1000) == 0)  tty->print_cr("Have run MethodComparator %d times...", nmc);
-        bool z = MethodComparator::methods_EMCP(m(),
-                   methods->at(j));
-        if (j == i && !z) {
-          tty->print("MethodComparator FAIL: "); m->print(); m->print_codes();
-          assert(z, "method must compare equal to itself");
-        }
-      }
-    }
-#endif //ASSERT
   }
 }
--- a/src/share/vm/interpreter/rewriter.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/interpreter/rewriter.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -158,14 +158,6 @@
  public:
   // Driver routine:
   static void rewrite(instanceKlassHandle klass, TRAPS);
-  static void rewrite(instanceKlassHandle klass, constantPoolHandle cpool, Array<Method*>* methods, TRAPS);
-
-  // Second pass, not gated by is_rewritten flag
-  static void relocate_and_link(instanceKlassHandle klass, TRAPS);
-  // JSR292 version to call with it's own methods.
-  static void relocate_and_link(instanceKlassHandle klass,
-                                Array<Method*>* methods, TRAPS);
-
 };
 
 #endif // SHARE_VM_INTERPRETER_REWRITER_HPP
--- a/src/share/vm/interpreter/templateInterpreter.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/interpreter/templateInterpreter.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -373,11 +373,7 @@
   method_entry(java_lang_math_pow  )
   method_entry(java_lang_ref_reference_get)
 
-  // method handle entry kinds are generated later in MethodHandlesAdapterGenerator::generate:
-  for (int i = Interpreter::method_handle_invoke_FIRST; i <= Interpreter::method_handle_invoke_LAST; i++) {
-    Interpreter::MethodKind kind = (Interpreter::MethodKind) i;
-    Interpreter::_entry_table[kind] = Interpreter::_entry_table[Interpreter::abstract];
-  }
+  initialize_method_handle_entries();
 
   // all native method kinds (must be one contiguous block)
   Interpreter::_native_entry_begin = Interpreter::code()->code_end();
--- a/src/share/vm/memory/allocation.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/memory/allocation.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -66,10 +66,17 @@
 }
 
 bool MetaspaceObj::is_metadata() const {
-  // ClassLoaderDataGraph::contains((address)this); has lock inversion problems
+  // GC Verify checks use this in guarantees.
+  // TODO: either replace them with is_metaspace_object() or remove them.
+  // is_metaspace_object() is slower than this test.  This test doesn't
+  // seem very useful for metaspace objects anymore though.
   return !Universe::heap()->is_in_reserved(this);
 }
 
+bool MetaspaceObj::is_metaspace_object() const {
+  return Metaspace::contains((void*)this);
+}
+
 void MetaspaceObj::print_address_on(outputStream* st) const {
   st->print(" {"INTPTR_FORMAT"}", this);
 }
@@ -433,19 +440,18 @@
   NOT_PRODUCT(Atomic::inc(&_instance_count);)
 }
 
-Arena::Arena(Arena *a) : _chunk(a->_chunk), _hwm(a->_hwm), _max(a->_max), _first(a->_first) {
-  set_size_in_bytes(a->size_in_bytes());
-  NOT_PRODUCT(Atomic::inc(&_instance_count);)
-}
-
-
 Arena *Arena::move_contents(Arena *copy) {
   copy->destruct_contents();
   copy->_chunk = _chunk;
   copy->_hwm   = _hwm;
   copy->_max   = _max;
   copy->_first = _first;
-  copy->set_size_in_bytes(size_in_bytes());
+
+  // workaround rare racing condition, which could double count
+  // the arena size by native memory tracking
+  size_t size = size_in_bytes();
+  set_size_in_bytes(0);
+  copy->set_size_in_bytes(size);
   // Destroy original arena
   reset();
   return copy;            // Return Arena with contents
@@ -497,6 +503,9 @@
     char* end = _first->next() ? _first->top() : _hwm;
     free_malloced_objects(_first, _first->bottom(), end, _hwm);
   }
+  // reset size before chop to avoid a rare racing condition
+  // that can have total arena memory exceed total chunk memory
+  set_size_in_bytes(0);
   _first->chop();
   reset();
 }
--- a/src/share/vm/memory/allocation.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/memory/allocation.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -144,8 +144,10 @@
   mtNMT               = 0x0A00,  // memory used by native memory tracking
   mtChunk             = 0x0B00,  // chunk that holds content of arenas
   mtJavaHeap          = 0x0C00,  // Java heap
-  mtDontTrack         = 0x0D00,  // memory we donot or cannot track
-  mt_number_of_types  = 0x000C,  // number of memory types
+  mtClassShared       = 0x0D00,  // class data sharing
+  mt_number_of_types  = 0x000D,  // number of memory types (mtDontTrack
+                                 // is not included as validate type)
+  mtDontTrack         = 0x0E00,  // memory we do not or cannot track
   mt_masks            = 0x7F00,
 
   // object type mask
@@ -200,7 +202,7 @@
 // Calling new or delete will result in fatal error.
 
 class StackObj ALLOCATION_SUPER_CLASS_SPEC {
- public:
+ private:
   void* operator new(size_t size);
   void  operator delete(void* p);
 };
@@ -224,7 +226,7 @@
 // be defined as a an empty string "".
 //
 class _ValueObj {
- public:
+ private:
   void* operator new(size_t size);
   void operator delete(void* p);
 };
@@ -243,6 +245,7 @@
 class MetaspaceObj {
  public:
   bool is_metadata() const;
+  bool is_metaspace_object() const;  // more specific test but slower
   bool is_shared() const;
   void print_address_on(outputStream* st) const;  // nonvirtual address printing
 
@@ -342,7 +345,6 @@
  public:
   Arena();
   Arena(size_t init_size);
-  Arena(Arena *old);
   ~Arena();
   void  destruct_contents();
   char* hwm() const             { return _hwm; }
--- a/src/share/vm/memory/binaryTreeDictionary.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/memory/binaryTreeDictionary.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -25,9 +25,15 @@
 #include "precompiled.hpp"
 #include "gc_implementation/shared/allocationStats.hpp"
 #include "memory/binaryTreeDictionary.hpp"
+#include "memory/freeList.hpp"
+#include "memory/freeBlockDictionary.hpp"
+#include "memory/metablock.hpp"
+#include "memory/metachunk.hpp"
 #include "runtime/globals.hpp"
 #include "utilities/ostream.hpp"
 #ifndef SERIALGC
+#include "gc_implementation/concurrentMarkSweep/adaptiveFreeList.hpp"
+#include "gc_implementation/concurrentMarkSweep/freeChunk.hpp"
 #include "gc_implementation/shared/spaceDecorator.hpp"
 #include "gc_implementation/concurrentMarkSweep/freeChunk.hpp"
 #endif // SERIALGC
@@ -37,15 +43,18 @@
 // This is currently used in the Concurrent Mark&Sweep implementation.
 ////////////////////////////////////////////////////////////////////////////////
 
-template <class Chunk>
-TreeChunk<Chunk>* TreeChunk<Chunk>::as_TreeChunk(Chunk* fc) {
+template <class Chunk_t, template <class> class FreeList_t>
+size_t TreeChunk<Chunk_t, FreeList_t>::_min_tree_chunk_size = sizeof(TreeChunk<Chunk_t,  FreeList_t>)/HeapWordSize;
+
+template <class Chunk_t, template <class> class FreeList_t>
+TreeChunk<Chunk_t, FreeList_t>* TreeChunk<Chunk_t, FreeList_t>::as_TreeChunk(Chunk_t* fc) {
   // Do some assertion checking here.
-  return (TreeChunk<Chunk>*) fc;
+  return (TreeChunk<Chunk_t, FreeList_t>*) fc;
 }
 
-template <class Chunk>
-void TreeChunk<Chunk>::verify_tree_chunk_list() const {
-  TreeChunk<Chunk>* nextTC = (TreeChunk<Chunk>*)next();
+template <class Chunk_t, template <class> class FreeList_t>
+void TreeChunk<Chunk_t, FreeList_t>::verify_tree_chunk_list() const {
+  TreeChunk<Chunk_t, FreeList_t>* nextTC = (TreeChunk<Chunk_t, FreeList_t>*)next();
   if (prev() != NULL) { // interior list node shouldn'r have tree fields
     guarantee(embedded_list()->parent() == NULL && embedded_list()->left() == NULL &&
               embedded_list()->right()  == NULL, "should be clear");
@@ -57,53 +66,114 @@
   }
 }
 
+template <class Chunk_t, template <class> class FreeList_t>
+TreeList<Chunk_t, FreeList_t>::TreeList() : _parent(NULL),
+  _left(NULL), _right(NULL) {}
 
-template <class Chunk>
-TreeList<Chunk>* TreeList<Chunk>::as_TreeList(TreeChunk<Chunk>* tc) {
+template <class Chunk_t, template <class> class FreeList_t>
+TreeList<Chunk_t, FreeList_t>*
+TreeList<Chunk_t, FreeList_t>::as_TreeList(TreeChunk<Chunk_t,FreeList_t>* tc) {
   // This first free chunk in the list will be the tree list.
-  assert(tc->size() >= BinaryTreeDictionary<Chunk>::min_tree_chunk_size, "Chunk is too small for a TreeChunk");
-  TreeList<Chunk>* tl = tc->embedded_list();
+  assert((tc->size() >= (TreeChunk<Chunk_t, FreeList_t>::min_size())),
+    "Chunk is too small for a TreeChunk");
+  TreeList<Chunk_t, FreeList_t>* tl = tc->embedded_list();
+  tl->initialize();
   tc->set_list(tl);
-#ifdef ASSERT
-  tl->set_protecting_lock(NULL);
-#endif
-  tl->set_hint(0);
   tl->set_size(tc->size());
   tl->link_head(tc);
   tl->link_tail(tc);
   tl->set_count(1);
-  tl->init_statistics(true /* split_birth */);
-  tl->set_parent(NULL);
-  tl->set_left(NULL);
-  tl->set_right(NULL);
+  assert(tl->parent() == NULL, "Should be clear");
+  return tl;
+}
+
+
+template <class Chunk_t, template <class> class FreeList_t>
+TreeList<Chunk_t, FreeList_t>*
+get_chunk(size_t size, enum FreeBlockDictionary<Chunk_t>::Dither dither) {
+  FreeBlockDictionary<Chunk_t>::verify_par_locked();
+  Chunk_t* res = get_chunk_from_tree(size, dither);
+  assert(res == NULL || res->is_free(),
+         "Should be returning a free chunk");
+  assert(dither != FreeBlockDictionary<Chunk_t>::exactly ||
+         res->size() == size, "Not correct size");
+  return res;
+}
+
+template <class Chunk_t, template <class> class FreeList_t>
+TreeList<Chunk_t, FreeList_t>*
+TreeList<Chunk_t, FreeList_t>::as_TreeList(HeapWord* addr, size_t size) {
+  TreeChunk<Chunk_t, FreeList_t>* tc = (TreeChunk<Chunk_t, FreeList_t>*) addr;
+  assert((size >= TreeChunk<Chunk_t, FreeList_t>::min_size()),
+    "Chunk is too small for a TreeChunk");
+  // The space will have been mangled initially but
+  // is not remangled when a Chunk_t is returned to the free list
+  // (since it is used to maintain the chunk on the free list).
+  tc->assert_is_mangled();
+  tc->set_size(size);
+  tc->link_prev(NULL);
+  tc->link_next(NULL);
+  TreeList<Chunk_t, FreeList_t>* tl = TreeList<Chunk_t, FreeList_t>::as_TreeList(tc);
   return tl;
 }
 
-template <class Chunk>
-TreeList<Chunk>* TreeList<Chunk>::as_TreeList(HeapWord* addr, size_t size) {
-  TreeChunk<Chunk>* tc = (TreeChunk<Chunk>*) addr;
-  assert(size >= BinaryTreeDictionary<Chunk>::min_tree_chunk_size, "Chunk is too small for a TreeChunk");
-  // The space in the heap will have been mangled initially but
-  // is not remangled when a free chunk is returned to the free list
-  // (since it is used to maintain the chunk on the free list).
-  assert((ZapUnusedHeapArea &&
-          SpaceMangler::is_mangled((HeapWord*) tc->size_addr()) &&
-          SpaceMangler::is_mangled((HeapWord*) tc->prev_addr()) &&
-          SpaceMangler::is_mangled((HeapWord*) tc->next_addr())) ||
-          (tc->size() == 0 && tc->prev() == NULL && tc->next() == NULL),
-    "Space should be clear or mangled");
-  tc->set_size(size);
-  tc->link_prev(NULL);
-  tc->link_next(NULL);
-  TreeList<Chunk>* tl = TreeList<Chunk>::as_TreeList(tc);
-  return tl;
+
+#ifndef SERIALGC
+// Specialize for AdaptiveFreeList which tries to avoid
+// splitting a chunk of a size that is under populated in favor of
+// an over populated size.  The general get_better_list() just returns
+// the current list.
+template <>
+TreeList<FreeChunk, AdaptiveFreeList>*
+TreeList<FreeChunk, AdaptiveFreeList>::get_better_list(
+  BinaryTreeDictionary<FreeChunk, ::AdaptiveFreeList>* dictionary) {
+  // A candidate chunk has been found.  If it is already under
+  // populated, get a chunk associated with the hint for this
+  // chunk.
+
+  TreeList<FreeChunk, ::AdaptiveFreeList>* curTL = this;
+  if (surplus() <= 0) {
+    /* Use the hint to find a size with a surplus, and reset the hint. */
+    TreeList<FreeChunk, ::AdaptiveFreeList>* hintTL = this;
+    while (hintTL->hint() != 0) {
+      assert(hintTL->hint() > hintTL->size(),
+        "hint points in the wrong direction");
+      hintTL = dictionary->find_list(hintTL->hint());
+      assert(curTL != hintTL, "Infinite loop");
+      if (hintTL == NULL ||
+          hintTL == curTL /* Should not happen but protect against it */ ) {
+        // No useful hint.  Set the hint to NULL and go on.
+        curTL->set_hint(0);
+        break;
+      }
+      assert(hintTL->size() > curTL->size(), "hint is inconsistent");
+      if (hintTL->surplus() > 0) {
+        // The hint led to a list that has a surplus.  Use it.
+        // Set the hint for the candidate to an overpopulated
+        // size.
+        curTL->set_hint(hintTL->size());
+        // Change the candidate.
+        curTL = hintTL;
+        break;
+      }
+    }
+  }
+  return curTL;
+}
+#endif // SERIALGC
+
+template <class Chunk_t, template <class> class FreeList_t>
+TreeList<Chunk_t, FreeList_t>*
+TreeList<Chunk_t, FreeList_t>::get_better_list(
+  BinaryTreeDictionary<Chunk_t, FreeList_t>* dictionary) {
+  return this;
 }
 
-template <class Chunk>
-TreeList<Chunk>* TreeList<Chunk>::remove_chunk_replace_if_needed(TreeChunk<Chunk>* tc) {
+template <class Chunk_t, template <class> class FreeList_t>
+TreeList<Chunk_t, FreeList_t>* TreeList<Chunk_t, FreeList_t>::remove_chunk_replace_if_needed(TreeChunk<Chunk_t, FreeList_t>* tc) {
 
-  TreeList<Chunk>* retTL = this;
-  Chunk* list = head();
+  TreeList<Chunk_t, FreeList_t>* retTL = this;
+  Chunk_t* list = head();
   assert(!list || list != list->next(), "Chunk on list twice");
   assert(tc != NULL, "Chunk being removed is NULL");
   assert(parent() == NULL || this == parent()->left() ||
@@ -112,13 +182,13 @@
   assert(head() == NULL || head()->prev() == NULL, "list invariant");
   assert(tail() == NULL || tail()->next() == NULL, "list invariant");
 
-  Chunk* prevFC = tc->prev();
-  TreeChunk<Chunk>* nextTC = TreeChunk<Chunk>::as_TreeChunk(tc->next());
+  Chunk_t* prevFC = tc->prev();
+  TreeChunk<Chunk_t, FreeList_t>* nextTC = TreeChunk<Chunk_t, FreeList_t>::as_TreeChunk(tc->next());
   assert(list != NULL, "should have at least the target chunk");
 
   // Is this the first item on the list?
   if (tc == list) {
-    // The "getChunk..." functions for a TreeList<Chunk> will not return the
+    // The "getChunk..." functions for a TreeList<Chunk_t, FreeList_t> will not return the
     // first chunk in the list unless it is the last chunk in the list
     // because the first chunk is also acting as the tree node.
     // When coalescing happens, however, the first chunk in the a tree
@@ -127,8 +197,8 @@
     // allocated when the sweeper yields (giving up the free list lock)
     // to allow mutator activity.  If this chunk is the first in the
     // list and is not the last in the list, do the work to copy the
-    // TreeList<Chunk> from the first chunk to the next chunk and update all
-    // the TreeList<Chunk> pointers in the chunks in the list.
+    // TreeList<Chunk_t, FreeList_t> from the first chunk to the next chunk and update all
+    // the TreeList<Chunk_t, FreeList_t> pointers in the chunks in the list.
     if (nextTC == NULL) {
       assert(prevFC == NULL, "Not last chunk in the list");
       set_tail(NULL);
@@ -141,11 +211,11 @@
       // This can be slow for a long list.  Consider having
       // an option that does not allow the first chunk on the
       // list to be coalesced.
-      for (TreeChunk<Chunk>* curTC = nextTC; curTC != NULL;
-          curTC = TreeChunk<Chunk>::as_TreeChunk(curTC->next())) {
+      for (TreeChunk<Chunk_t, FreeList_t>* curTC = nextTC; curTC != NULL;
+          curTC = TreeChunk<Chunk_t, FreeList_t>::as_TreeChunk(curTC->next())) {
         curTC->set_list(retTL);
       }
-      // Fix the parent to point to the new TreeList<Chunk>.
+      // Fix the parent to point to the new TreeList<Chunk_t, FreeList_t>.
       if (retTL->parent() != NULL) {
         if (this == retTL->parent()->left()) {
           retTL->parent()->set_left(retTL);
@@ -170,15 +240,15 @@
   } else {
     if (nextTC == NULL) {
       // Removing chunk at tail of list
-      link_tail(prevFC);
+      this->link_tail(prevFC);
     }
     // Chunk is interior to the list
     prevFC->link_after(nextTC);
   }
 
-  // Below this point the embeded TreeList<Chunk> being used for the
+  // Below this point the embeded TreeList<Chunk_t, FreeList_t> being used for the
   // tree node may have changed. Don't use "this"
-  // TreeList<Chunk>*.
+  // TreeList<Chunk_t, FreeList_t>*.
   // chunk should still be a free chunk (bit set in _prev)
   assert(!retTL->head() || retTL->size() == retTL->head()->size(),
     "Wrong sized chunk in list");
@@ -188,7 +258,7 @@
     tc->set_list(NULL);
     bool prev_found = false;
     bool next_found = false;
-    for (Chunk* curFC = retTL->head();
+    for (Chunk_t* curFC = retTL->head();
          curFC != NULL; curFC = curFC->next()) {
       assert(curFC != tc, "Chunk is still in list");
       if (curFC == prevFC) {
@@ -215,71 +285,81 @@
   return retTL;
 }
 
-template <class Chunk>
-void TreeList<Chunk>::return_chunk_at_tail(TreeChunk<Chunk>* chunk) {
+template <class Chunk_t, template <class> class FreeList_t>
+void TreeList<Chunk_t, FreeList_t>::return_chunk_at_tail(TreeChunk<Chunk_t, FreeList_t>* chunk) {
   assert(chunk != NULL, "returning NULL chunk");
   assert(chunk->list() == this, "list should be set for chunk");
   assert(tail() != NULL, "The tree list is embedded in the first chunk");
   // which means that the list can never be empty.
-  assert(!verify_chunk_in_free_list(chunk), "Double entry");
+  assert(!this->verify_chunk_in_free_list(chunk), "Double entry");
   assert(head() == NULL || head()->prev() == NULL, "list invariant");
   assert(tail() == NULL || tail()->next() == NULL, "list invariant");
 
-  Chunk* fc = tail();
+  Chunk_t* fc = tail();
   fc->link_after(chunk);
-  link_tail(chunk);
+  this->link_tail(chunk);
 
   assert(!tail() || size() == tail()->size(), "Wrong sized chunk in list");
-  increment_count();
-  debug_only(increment_returned_bytes_by(chunk->size()*sizeof(HeapWord));)
+  FreeList_t<Chunk_t>::increment_count();
+  debug_only(this->increment_returned_bytes_by(chunk->size()*sizeof(HeapWord));)
   assert(head() == NULL || head()->prev() == NULL, "list invariant");
   assert(tail() == NULL || tail()->next() == NULL, "list invariant");
 }
 
 // Add this chunk at the head of the list.  "At the head of the list"
 // is defined to be after the chunk pointer to by head().  This is
-// because the TreeList<Chunk> is embedded in the first TreeChunk<Chunk> in the
-// list.  See the definition of TreeChunk<Chunk>.
-template <class Chunk>
-void TreeList<Chunk>::return_chunk_at_head(TreeChunk<Chunk>* chunk) {
+// because the TreeList<Chunk_t, FreeList_t> is embedded in the first TreeChunk<Chunk_t, FreeList_t> in the
+// list.  See the definition of TreeChunk<Chunk_t, FreeList_t>.
+template <class Chunk_t, template <class> class FreeList_t>
+void TreeList<Chunk_t, FreeList_t>::return_chunk_at_head(TreeChunk<Chunk_t, FreeList_t>* chunk) {
   assert(chunk->list() == this, "list should be set for chunk");
   assert(head() != NULL, "The tree list is embedded in the first chunk");
   assert(chunk != NULL, "returning NULL chunk");
-  assert(!verify_chunk_in_free_list(chunk), "Double entry");
+  assert(!this->verify_chunk_in_free_list(chunk), "Double entry");
   assert(head() == NULL || head()->prev() == NULL, "list invariant");
   assert(tail() == NULL || tail()->next() == NULL, "list invariant");
 
-  Chunk* fc = head()->next();
+  Chunk_t* fc = head()->next();
   if (fc != NULL) {
     chunk->link_after(fc);
   } else {
     assert(tail() == NULL, "List is inconsistent");
-    link_tail(chunk);
+    this->link_tail(chunk);
   }
   head()->link_after(chunk);
   assert(!head() || size() == head()->size(), "Wrong sized chunk in list");
-  increment_count();
-  debug_only(increment_returned_bytes_by(chunk->size()*sizeof(HeapWord));)
+  FreeList_t<Chunk_t>::increment_count();
+  debug_only(this->increment_returned_bytes_by(chunk->size()*sizeof(HeapWord));)
   assert(head() == NULL || head()->prev() == NULL, "list invariant");
   assert(tail() == NULL || tail()->next() == NULL, "list invariant");
 }
 
-template <class Chunk>
-TreeChunk<Chunk>* TreeList<Chunk>::head_as_TreeChunk() {
-  assert(head() == NULL || TreeChunk<Chunk>::as_TreeChunk(head())->list() == this,
-    "Wrong type of chunk?");
-  return TreeChunk<Chunk>::as_TreeChunk(head());
+template <class Chunk_t, template <class> class FreeList_t>
+void TreeChunk<Chunk_t, FreeList_t>::assert_is_mangled() const {
+  assert((ZapUnusedHeapArea &&
+          SpaceMangler::is_mangled((HeapWord*) Chunk_t::size_addr()) &&
+          SpaceMangler::is_mangled((HeapWord*) Chunk_t::prev_addr()) &&
+          SpaceMangler::is_mangled((HeapWord*) Chunk_t::next_addr())) ||
+          (size() == 0 && prev() == NULL && next() == NULL),
+    "Space should be clear or mangled");
 }
 
-template <class Chunk>
-TreeChunk<Chunk>* TreeList<Chunk>::first_available() {
+template <class Chunk_t, template <class> class FreeList_t>
+TreeChunk<Chunk_t, FreeList_t>* TreeList<Chunk_t, FreeList_t>::head_as_TreeChunk() {
+  assert(head() == NULL || (TreeChunk<Chunk_t, FreeList_t>::as_TreeChunk(head())->list() == this),
+    "Wrong type of chunk?");
+  return TreeChunk<Chunk_t, FreeList_t>::as_TreeChunk(head());
+}
+
+template <class Chunk_t, template <class> class FreeList_t>
+TreeChunk<Chunk_t, FreeList_t>* TreeList<Chunk_t, FreeList_t>::first_available() {
   assert(head() != NULL, "The head of the list cannot be NULL");
-  Chunk* fc = head()->next();
-  TreeChunk<Chunk>* retTC;
+  Chunk_t* fc = head()->next();
+  TreeChunk<Chunk_t, FreeList_t>* retTC;
   if (fc == NULL) {
     retTC = head_as_TreeChunk();
   } else {
-    retTC = TreeChunk<Chunk>::as_TreeChunk(fc);
+    retTC = TreeChunk<Chunk_t, FreeList_t>::as_TreeChunk(fc);
   }
   assert(retTC->list() == this, "Wrong type of chunk.");
   return retTC;
@@ -288,41 +368,32 @@
 // Returns the block with the largest heap address amongst
 // those in the list for this size; potentially slow and expensive,
 // use with caution!
-template <class Chunk>
-TreeChunk<Chunk>* TreeList<Chunk>::largest_address() {
+template <class Chunk_t, template <class> class FreeList_t>
+TreeChunk<Chunk_t, FreeList_t>* TreeList<Chunk_t, FreeList_t>::largest_address() {
   assert(head() != NULL, "The head of the list cannot be NULL");
-  Chunk* fc = head()->next();
-  TreeChunk<Chunk>* retTC;
+  Chunk_t* fc = head()->next();
+  TreeChunk<Chunk_t, FreeList_t>* retTC;
   if (fc == NULL) {
     retTC = head_as_TreeChunk();
   } else {
     // walk down the list and return the one with the highest
     // heap address among chunks of this size.
-    Chunk* last = fc;
+    Chunk_t* last = fc;
     while (fc->next() != NULL) {
       if ((HeapWord*)last < (HeapWord*)fc) {
         last = fc;
       }
       fc = fc->next();
     }
-    retTC = TreeChunk<Chunk>::as_TreeChunk(last);
+    retTC = TreeChunk<Chunk_t, FreeList_t>::as_TreeChunk(last);
   }
   assert(retTC->list() == this, "Wrong type of chunk.");
   return retTC;
 }
 
-template <class Chunk>
-BinaryTreeDictionary<Chunk>::BinaryTreeDictionary(bool adaptive_freelists, bool splay) :
-  _splay(splay), _adaptive_freelists(adaptive_freelists),
-  _total_size(0), _total_free_blocks(0), _root(0) {}
-
-template <class Chunk>
-BinaryTreeDictionary<Chunk>::BinaryTreeDictionary(MemRegion mr,
-                                           bool adaptive_freelists,
-                                           bool splay):
-  _adaptive_freelists(adaptive_freelists), _splay(splay)
-{
-  assert(mr.word_size() >= BinaryTreeDictionary<Chunk>::min_tree_chunk_size, "minimum chunk size");
+template <class Chunk_t, template <class> class FreeList_t>
+BinaryTreeDictionary<Chunk_t, FreeList_t>::BinaryTreeDictionary(MemRegion mr) {
+  assert((mr.byte_size() > min_size()), "minimum chunk size");
 
   reset(mr);
   assert(root()->left() == NULL, "reset check failed");
@@ -333,52 +404,48 @@
   assert(total_free_blocks() == 1, "reset check failed");
 }
 
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::inc_total_size(size_t inc) {
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::inc_total_size(size_t inc) {
   _total_size = _total_size + inc;
 }
 
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::dec_total_size(size_t dec) {
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::dec_total_size(size_t dec) {
   _total_size = _total_size - dec;
 }
 
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::reset(MemRegion mr) {
-  assert(mr.word_size() >= BinaryTreeDictionary<Chunk>::min_tree_chunk_size, "minimum chunk size");
-  set_root(TreeList<Chunk>::as_TreeList(mr.start(), mr.word_size()));
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::reset(MemRegion mr) {
+  assert((mr.byte_size() > min_size()), "minimum chunk size");
+  set_root(TreeList<Chunk_t, FreeList_t>::as_TreeList(mr.start(), mr.word_size()));
   set_total_size(mr.word_size());
   set_total_free_blocks(1);
 }
 
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::reset(HeapWord* addr, size_t byte_size) {
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::reset(HeapWord* addr, size_t byte_size) {
   MemRegion mr(addr, heap_word_size(byte_size));
   reset(mr);
 }
 
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::reset() {
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::reset() {
   set_root(NULL);
   set_total_size(0);
   set_total_free_blocks(0);
 }
 
 // Get a free block of size at least size from tree, or NULL.
-// If a splay step is requested, the removal algorithm (only) incorporates
-// a splay step as follows:
-// . the search proceeds down the tree looking for a possible
-//   match. At the (closest) matching location, an appropriate splay step is applied
-//   (zig, zig-zig or zig-zag). A chunk of the appropriate size is then returned
-//   if available, and if it's the last chunk, the node is deleted. A deteleted
-//   node is replaced in place by its tree successor.
-template <class Chunk>
-TreeChunk<Chunk>*
-BinaryTreeDictionary<Chunk>::get_chunk_from_tree(size_t size, enum FreeBlockDictionary<Chunk>::Dither dither, bool splay)
+template <class Chunk_t, template <class> class FreeList_t>
+TreeChunk<Chunk_t, FreeList_t>*
+BinaryTreeDictionary<Chunk_t, FreeList_t>::get_chunk_from_tree(
+                              size_t size,
+                              enum FreeBlockDictionary<Chunk_t>::Dither dither)
 {
-  TreeList<Chunk> *curTL, *prevTL;
-  TreeChunk<Chunk>* retTC = NULL;
-  assert(size >= BinaryTreeDictionary<Chunk>::min_tree_chunk_size, "minimum chunk size");
+  TreeList<Chunk_t, FreeList_t> *curTL, *prevTL;
+  TreeChunk<Chunk_t, FreeList_t>* retTC = NULL;
+
+  assert((size >= min_size()), "minimum chunk size");
   if (FLSVerifyDictionary) {
     verify_tree();
   }
@@ -398,7 +465,7 @@
   }
   if (curTL == NULL) { // couldn't find exact match
 
-    if (dither == FreeBlockDictionary<Chunk>::exactly) return NULL;
+    if (dither == FreeBlockDictionary<Chunk_t>::exactly) return NULL;
 
     // try and find the next larger size by walking back up the search path
     for (curTL = prevTL; curTL != NULL;) {
@@ -410,46 +477,9 @@
   }
   if (curTL != NULL) {
     assert(curTL->size() >= size, "size inconsistency");
-    if (adaptive_freelists()) {
 
-      // A candidate chunk has been found.  If it is already under
-      // populated, get a chunk associated with the hint for this
-      // chunk.
-      if (curTL->surplus() <= 0) {
-        /* Use the hint to find a size with a surplus, and reset the hint. */
-        TreeList<Chunk>* hintTL = curTL;
-        while (hintTL->hint() != 0) {
-          assert(hintTL->hint() == 0 || hintTL->hint() > hintTL->size(),
-            "hint points in the wrong direction");
-          hintTL = find_list(hintTL->hint());
-          assert(curTL != hintTL, "Infinite loop");
-          if (hintTL == NULL ||
-              hintTL == curTL /* Should not happen but protect against it */ ) {
-            // No useful hint.  Set the hint to NULL and go on.
-            curTL->set_hint(0);
-            break;
-          }
-          assert(hintTL->size() > size, "hint is inconsistent");
-          if (hintTL->surplus() > 0) {
-            // The hint led to a list that has a surplus.  Use it.
-            // Set the hint for the candidate to an overpopulated
-            // size.
-            curTL->set_hint(hintTL->size());
-            // Change the candidate.
-            curTL = hintTL;
-            break;
-          }
-          // The evm code reset the hint of the candidate as
-          // at an interim point.  Why?  Seems like this leaves
-          // the hint pointing to a list that didn't work.
-          // curTL->set_hint(hintTL->size());
-        }
-      }
-    }
-    // don't waste time splaying if chunk's singleton
-    if (splay && curTL->head()->next() != NULL) {
-      semi_splay_step(curTL);
-    }
+    curTL = curTL->get_better_list(this);
+
     retTC = curTL->first_available();
     assert((retTC != NULL) && (curTL->count() > 0),
       "A list in the binary tree should not be NULL");
@@ -465,9 +495,9 @@
   return retTC;
 }
 
-template <class Chunk>
-TreeList<Chunk>* BinaryTreeDictionary<Chunk>::find_list(size_t size) const {
-  TreeList<Chunk>* curTL;
+template <class Chunk_t, template <class> class FreeList_t>
+TreeList<Chunk_t, FreeList_t>* BinaryTreeDictionary<Chunk_t, FreeList_t>::find_list(size_t size) const {
+  TreeList<Chunk_t, FreeList_t>* curTL;
   for (curTL = root(); curTL != NULL;) {
     if (curTL->size() == size) {        // exact match
       break;
@@ -484,10 +514,10 @@
 }
 
 
-template <class Chunk>
-bool BinaryTreeDictionary<Chunk>::verify_chunk_in_free_list(Chunk* tc) const {
+template <class Chunk_t, template <class> class FreeList_t>
+bool BinaryTreeDictionary<Chunk_t, FreeList_t>::verify_chunk_in_free_list(Chunk_t* tc) const {
   size_t size = tc->size();
-  TreeList<Chunk>* tl = find_list(size);
+  TreeList<Chunk_t, FreeList_t>* tl = find_list(size);
   if (tl == NULL) {
     return false;
   } else {
@@ -495,9 +525,9 @@
   }
 }
 
-template <class Chunk>
-Chunk* BinaryTreeDictionary<Chunk>::find_largest_dict() const {
-  TreeList<Chunk> *curTL = root();
+template <class Chunk_t, template <class> class FreeList_t>
+Chunk_t* BinaryTreeDictionary<Chunk_t, FreeList_t>::find_largest_dict() const {
+  TreeList<Chunk_t, FreeList_t> *curTL = root();
   if (curTL != NULL) {
     while(curTL->right() != NULL) curTL = curTL->right();
     return curTL->largest_address();
@@ -510,15 +540,15 @@
 // chunk in a list on a tree node, just unlink it.
 // If it is the last chunk in the list (the next link is NULL),
 // remove the node and repair the tree.
-template <class Chunk>
-TreeChunk<Chunk>*
-BinaryTreeDictionary<Chunk>::remove_chunk_from_tree(TreeChunk<Chunk>* tc) {
+template <class Chunk_t, template <class> class FreeList_t>
+TreeChunk<Chunk_t, FreeList_t>*
+BinaryTreeDictionary<Chunk_t, FreeList_t>::remove_chunk_from_tree(TreeChunk<Chunk_t, FreeList_t>* tc) {
   assert(tc != NULL, "Should not call with a NULL chunk");
   assert(tc->is_free(), "Header is not marked correctly");
 
-  TreeList<Chunk> *newTL, *parentTL;
-  TreeChunk<Chunk>* retTC;
-  TreeList<Chunk>* tl = tc->list();
+  TreeList<Chunk_t, FreeList_t> *newTL, *parentTL;
+  TreeChunk<Chunk_t, FreeList_t>* retTC;
+  TreeList<Chunk_t, FreeList_t>* tl = tc->list();
   debug_only(
     bool removing_only_chunk = false;
     if (tl == _root) {
@@ -538,8 +568,8 @@
 
   retTC = tc;
   // Removing this chunk can have the side effect of changing the node
-  // (TreeList<Chunk>*) in the tree.  If the node is the root, update it.
-  TreeList<Chunk>* replacementTL = tl->remove_chunk_replace_if_needed(tc);
+  // (TreeList<Chunk_t, FreeList_t>*) in the tree.  If the node is the root, update it.
+  TreeList<Chunk_t, FreeList_t>* replacementTL = tl->remove_chunk_replace_if_needed(tc);
   assert(tc->is_free(), "Chunk should still be free");
   assert(replacementTL->parent() == NULL ||
          replacementTL == replacementTL->parent()->left() ||
@@ -549,17 +579,18 @@
     assert(replacementTL->parent() == NULL, "Incorrectly replacing root");
     set_root(replacementTL);
   }
-  debug_only(
+#ifdef ASSERT
     if (tl != replacementTL) {
       assert(replacementTL->head() != NULL,
         "If the tree list was replaced, it should not be a NULL list");
-      TreeList<Chunk>* rhl = replacementTL->head_as_TreeChunk()->list();
-      TreeList<Chunk>* rtl = TreeChunk<Chunk>::as_TreeChunk(replacementTL->tail())->list();
+      TreeList<Chunk_t, FreeList_t>* rhl = replacementTL->head_as_TreeChunk()->list();
+      TreeList<Chunk_t, FreeList_t>* rtl =
+        TreeChunk<Chunk_t, FreeList_t>::as_TreeChunk(replacementTL->tail())->list();
       assert(rhl == replacementTL, "Broken head");
       assert(rtl == replacementTL, "Broken tail");
       assert(replacementTL->size() == tc->size(),  "Broken size");
     }
-  )
+#endif
 
   // Does the tree need to be repaired?
   if (replacementTL->count() == 0) {
@@ -574,7 +605,7 @@
     } else if (replacementTL->right() == NULL) {
       // right is NULL
       newTL = replacementTL->left();
-      debug_only(replacementTL->clearLeft();)
+      debug_only(replacementTL->clear_left();)
     } else {  // we have both children, so, by patriarchal convention,
               // my replacement is least node in right sub-tree
       complicated_splice = true;
@@ -623,7 +654,7 @@
       newTL->set_right(replacementTL->right());
       debug_only(
         replacementTL->clear_right();
-        replacementTL->clearLeft();
+        replacementTL->clear_left();
       )
     }
     assert(replacementTL->right() == NULL &&
@@ -644,21 +675,21 @@
     verify_tree();
   }
   assert(!removing_only_chunk || _root == NULL, "root should be NULL");
-  return TreeChunk<Chunk>::as_TreeChunk(retTC);
+  return TreeChunk<Chunk_t, FreeList_t>::as_TreeChunk(retTC);
 }
 
 // Remove the leftmost node (lm) in the tree and return it.
 // If lm has a right child, link it to the left node of
 // the parent of lm.
-template <class Chunk>
-TreeList<Chunk>* BinaryTreeDictionary<Chunk>::remove_tree_minimum(TreeList<Chunk>* tl) {
+template <class Chunk_t, template <class> class FreeList_t>
+TreeList<Chunk_t, FreeList_t>* BinaryTreeDictionary<Chunk_t, FreeList_t>::remove_tree_minimum(TreeList<Chunk_t, FreeList_t>* tl) {
   assert(tl != NULL && tl->parent() != NULL, "really need a proper sub-tree");
   // locate the subtree minimum by walking down left branches
-  TreeList<Chunk>* curTL = tl;
+  TreeList<Chunk_t, FreeList_t>* curTL = tl;
   for (; curTL->left() != NULL; curTL = curTL->left());
   // obviously curTL now has at most one child, a right child
   if (curTL != root()) {  // Should this test just be removed?
-    TreeList<Chunk>* parentTL = curTL->parent();
+    TreeList<Chunk_t, FreeList_t>* parentTL = curTL->parent();
     if (parentTL->left() == curTL) { // curTL is a left child
       parentTL->set_left(curTL->right());
     } else {
@@ -685,31 +716,14 @@
   return curTL;
 }
 
-// Based on a simplification of the algorithm by Sleator and Tarjan (JACM 1985).
-// The simplifications are the following:
-// . we splay only when we delete (not when we insert)
-// . we apply a single spay step per deletion/access
-// By doing such partial splaying, we reduce the amount of restructuring,
-// while getting a reasonably efficient search tree (we think).
-// [Measurements will be needed to (in)validate this expectation.]
-
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::semi_splay_step(TreeList<Chunk>* tc) {
-  // apply a semi-splay step at the given node:
-  // . if root, norting needs to be done
-  // . if child of root, splay once
-  // . else zig-zig or sig-zag depending on path from grandparent
-  if (root() == tc) return;
-  warning("*** Splaying not yet implemented; "
-          "tree operations may be inefficient ***");
-}
-
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::insert_chunk_in_tree(Chunk* fc) {
-  TreeList<Chunk> *curTL, *prevTL;
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::insert_chunk_in_tree(Chunk_t* fc) {
+  TreeList<Chunk_t, FreeList_t> *curTL, *prevTL;
   size_t size = fc->size();
 
-  assert(size >= BinaryTreeDictionary<Chunk>::min_tree_chunk_size, "too small to be a TreeList<Chunk>");
+  assert((size >= min_size()),
+    err_msg(SIZE_FORMAT " is too small to be a TreeChunk<Chunk_t, FreeList_t> " SIZE_FORMAT,
+      size, min_size()));
   if (FLSVerifyDictionary) {
     verify_tree();
   }
@@ -729,9 +743,9 @@
       curTL = curTL->right();
     }
   }
-  TreeChunk<Chunk>* tc = TreeChunk<Chunk>::as_TreeChunk(fc);
+  TreeChunk<Chunk_t, FreeList_t>* tc = TreeChunk<Chunk_t, FreeList_t>::as_TreeChunk(fc);
   // This chunk is being returned to the binary tree.  Its embedded
-  // TreeList<Chunk> should be unused at this point.
+  // TreeList<Chunk_t, FreeList_t> should be unused at this point.
   tc->initialize();
   if (curTL != NULL) {          // exact match
     tc->set_list(curTL);
@@ -739,8 +753,8 @@
   } else {                     // need a new node in tree
     tc->clear_next();
     tc->link_prev(NULL);
-    TreeList<Chunk>* newTL = TreeList<Chunk>::as_TreeList(tc);
-    assert(((TreeChunk<Chunk>*)tc)->list() == newTL,
+    TreeList<Chunk_t, FreeList_t>* newTL = TreeList<Chunk_t, FreeList_t>::as_TreeList(tc);
+    assert(((TreeChunk<Chunk_t, FreeList_t>*)tc)->list() == newTL,
       "List was not initialized correctly");
     if (prevTL == NULL) {      // we are the only tree node
       assert(root() == NULL, "control point invariant");
@@ -768,30 +782,30 @@
   }
 }
 
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::max_chunk_size() const {
-  FreeBlockDictionary<Chunk>::verify_par_locked();
-  TreeList<Chunk>* tc = root();
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::max_chunk_size() const {
+  FreeBlockDictionary<Chunk_t>::verify_par_locked();
+  TreeList<Chunk_t, FreeList_t>* tc = root();
   if (tc == NULL) return 0;
   for (; tc->right() != NULL; tc = tc->right());
   return tc->size();
 }
 
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::total_list_length(TreeList<Chunk>* tl) const {
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::total_list_length(TreeList<Chunk_t, FreeList_t>* tl) const {
   size_t res;
   res = tl->count();
 #ifdef ASSERT
   size_t cnt;
-  Chunk* tc = tl->head();
+  Chunk_t* tc = tl->head();
   for (cnt = 0; tc != NULL; tc = tc->next(), cnt++);
   assert(res == cnt, "The count is not being maintained correctly");
 #endif
   return res;
 }
 
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::total_size_in_tree(TreeList<Chunk>* tl) const {
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::total_size_in_tree(TreeList<Chunk_t, FreeList_t>* tl) const {
   if (tl == NULL)
     return 0;
   return (tl->size() * total_list_length(tl)) +
@@ -799,8 +813,8 @@
          total_size_in_tree(tl->right());
 }
 
-template <class Chunk>
-double BinaryTreeDictionary<Chunk>::sum_of_squared_block_sizes(TreeList<Chunk>* const tl) const {
+template <class Chunk_t, template <class> class FreeList_t>
+double BinaryTreeDictionary<Chunk_t, FreeList_t>::sum_of_squared_block_sizes(TreeList<Chunk_t, FreeList_t>* const tl) const {
   if (tl == NULL) {
     return 0.0;
   }
@@ -811,8 +825,8 @@
   return curr;
 }
 
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::total_free_blocks_in_tree(TreeList<Chunk>* tl) const {
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::total_free_blocks_in_tree(TreeList<Chunk_t, FreeList_t>* tl) const {
   if (tl == NULL)
     return 0;
   return total_list_length(tl) +
@@ -820,28 +834,28 @@
          total_free_blocks_in_tree(tl->right());
 }
 
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::num_free_blocks() const {
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::num_free_blocks() const {
   assert(total_free_blocks_in_tree(root()) == total_free_blocks(),
          "_total_free_blocks inconsistency");
   return total_free_blocks();
 }
 
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::tree_height_helper(TreeList<Chunk>* tl) const {
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::tree_height_helper(TreeList<Chunk_t, FreeList_t>* tl) const {
   if (tl == NULL)
     return 0;
   return 1 + MAX2(tree_height_helper(tl->left()),
                   tree_height_helper(tl->right()));
 }
 
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::treeHeight() const {
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::tree_height() const {
   return tree_height_helper(root());
 }
 
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::total_nodes_helper(TreeList<Chunk>* tl) const {
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::total_nodes_helper(TreeList<Chunk_t, FreeList_t>* tl) const {
   if (tl == NULL) {
     return 0;
   }
@@ -849,14 +863,18 @@
     total_nodes_helper(tl->right());
 }
 
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::total_nodes_in_tree(TreeList<Chunk>* tl) const {
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::total_nodes_in_tree(TreeList<Chunk_t, FreeList_t>* tl) const {
   return total_nodes_helper(root());
 }
 
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::dict_census_udpate(size_t size, bool split, bool birth){
-  TreeList<Chunk>* nd = find_list(size);
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::dict_census_update(size_t size, bool split, bool birth){}
+
+#ifndef SERIALGC
+template <>
+void BinaryTreeDictionary<FreeChunk, AdaptiveFreeList>::dict_census_update(size_t size, bool split, bool birth){
+  TreeList<FreeChunk, AdaptiveFreeList>* nd = find_list(size);
   if (nd) {
     if (split) {
       if (birth) {
@@ -882,16 +900,26 @@
   //   This is a birth associated with a LinAB.  The chunk
   //     for the LinAB is not in the dictionary.
 }
+#endif // SERIALGC
 
-template <class Chunk>
-bool BinaryTreeDictionary<Chunk>::coal_dict_over_populated(size_t size) {
+template <class Chunk_t, template <class> class FreeList_t>
+bool BinaryTreeDictionary<Chunk_t, FreeList_t>::coal_dict_over_populated(size_t size) {
+  // For the general type of freelists, encourage coalescing by
+  // returning true.
+  return true;
+}
+
+#ifndef SERIALGC
+template <>
+bool BinaryTreeDictionary<FreeChunk, AdaptiveFreeList>::coal_dict_over_populated(size_t size) {
   if (FLSAlwaysCoalesceLarge) return true;
 
-  TreeList<Chunk>* list_of_size = find_list(size);
+  TreeList<FreeChunk, AdaptiveFreeList>* list_of_size = find_list(size);
   // None of requested size implies overpopulated.
   return list_of_size == NULL || list_of_size->coal_desired() <= 0 ||
          list_of_size->count() > list_of_size->coal_desired();
 }
+#endif  // SERIALGC
 
 // Closures for walking the binary tree.
 //   do_list() walks the free list in a node applying the closure
@@ -899,35 +927,33 @@
 //   do_tree() walks the nodes in the binary tree applying do_list()
 //     to each list at each node.
 
-template <class Chunk>
+template <class Chunk_t, template <class> class FreeList_t>
 class TreeCensusClosure : public StackObj {
  protected:
-  virtual void do_list(FreeList<Chunk>* fl) = 0;
+  virtual void do_list(FreeList_t<Chunk_t>* fl) = 0;
  public:
-  virtual void do_tree(TreeList<Chunk>* tl) = 0;
+  virtual void do_tree(TreeList<Chunk_t, FreeList_t>* tl) = 0;
 };
 
-template <class Chunk>
-class AscendTreeCensusClosure : public TreeCensusClosure<Chunk> {
-  using TreeCensusClosure<Chunk>::do_list;
+template <class Chunk_t, template <class> class FreeList_t>
+class AscendTreeCensusClosure : public TreeCensusClosure<Chunk_t, FreeList_t> {
  public:
-  void do_tree(TreeList<Chunk>* tl) {
+  void do_tree(TreeList<Chunk_t, FreeList_t>* tl) {
     if (tl != NULL) {
       do_tree(tl->left());
-      do_list(tl);
+      this->do_list(tl);
       do_tree(tl->right());
     }
   }
 };
 
-template <class Chunk>
-class DescendTreeCensusClosure : public TreeCensusClosure<Chunk> {
-  using TreeCensusClosure<Chunk>::do_list;
+template <class Chunk_t, template <class> class FreeList_t>
+class DescendTreeCensusClosure : public TreeCensusClosure<Chunk_t, FreeList_t> {
  public:
-  void do_tree(TreeList<Chunk>* tl) {
+  void do_tree(TreeList<Chunk_t, FreeList_t>* tl) {
     if (tl != NULL) {
       do_tree(tl->right());
-      do_list(tl);
+      this->do_list(tl);
       do_tree(tl->left());
     }
   }
@@ -935,8 +961,8 @@
 
 // For each list in the tree, calculate the desired, desired
 // coalesce, count before sweep, and surplus before sweep.
-template <class Chunk>
-class BeginSweepClosure : public AscendTreeCensusClosure<Chunk> {
+template <class Chunk_t, template <class> class FreeList_t>
+class BeginSweepClosure : public AscendTreeCensusClosure<Chunk_t, FreeList_t> {
   double _percentage;
   float _inter_sweep_current;
   float _inter_sweep_estimate;
@@ -951,32 +977,36 @@
    _inter_sweep_estimate(inter_sweep_estimate),
    _intra_sweep_estimate(intra_sweep_estimate) { }
 
-  void do_list(FreeList<Chunk>* fl) {
+  void do_list(FreeList<Chunk_t>* fl) {}
+
+#ifndef SERIALGC
+  void do_list(AdaptiveFreeList<Chunk_t>* fl) {
     double coalSurplusPercent = _percentage;
     fl->compute_desired(_inter_sweep_current, _inter_sweep_estimate, _intra_sweep_estimate);
     fl->set_coal_desired((ssize_t)((double)fl->desired() * coalSurplusPercent));
     fl->set_before_sweep(fl->count());
     fl->set_bfr_surp(fl->surplus());
   }
+#endif // SERIALGC
 };
 
 // Used to search the tree until a condition is met.
 // Similar to TreeCensusClosure but searches the
 // tree and returns promptly when found.
 
-template <class Chunk>
+template <class Chunk_t, template <class> class FreeList_t>
 class TreeSearchClosure : public StackObj {
  protected:
-  virtual bool do_list(FreeList<Chunk>* fl) = 0;
+  virtual bool do_list(FreeList_t<Chunk_t>* fl) = 0;
  public:
-  virtual bool do_tree(TreeList<Chunk>* tl) = 0;
+  virtual bool do_tree(TreeList<Chunk_t, FreeList_t>* tl) = 0;
 };
 
 #if 0 //  Don't need this yet but here for symmetry.
-template <class Chunk>
-class AscendTreeSearchClosure : public TreeSearchClosure {
+template <class Chunk_t, template <class> class FreeList_t>
+class AscendTreeSearchClosure : public TreeSearchClosure<Chunk_t> {
  public:
-  bool do_tree(TreeList<Chunk>* tl) {
+  bool do_tree(TreeList<Chunk_t, FreeList_t>* tl) {
     if (tl != NULL) {
       if (do_tree(tl->left())) return true;
       if (do_list(tl)) return true;
@@ -987,14 +1017,13 @@
 };
 #endif
 
-template <class Chunk>
-class DescendTreeSearchClosure : public TreeSearchClosure<Chunk> {
-  using TreeSearchClosure<Chunk>::do_list;
+template <class Chunk_t, template <class> class FreeList_t>
+class DescendTreeSearchClosure : public TreeSearchClosure<Chunk_t, FreeList_t> {
  public:
-  bool do_tree(TreeList<Chunk>* tl) {
+  bool do_tree(TreeList<Chunk_t, FreeList_t>* tl) {
     if (tl != NULL) {
       if (do_tree(tl->right())) return true;
-      if (do_list(tl)) return true;
+      if (this->do_list(tl)) return true;
       if (do_tree(tl->left())) return true;
     }
     return false;
@@ -1003,17 +1032,17 @@
 
 // Searches the tree for a chunk that ends at the
 // specified address.
-template <class Chunk>
-class EndTreeSearchClosure : public DescendTreeSearchClosure<Chunk> {
+template <class Chunk_t, template <class> class FreeList_t>
+class EndTreeSearchClosure : public DescendTreeSearchClosure<Chunk_t, FreeList_t> {
   HeapWord* _target;
-  Chunk* _found;
+  Chunk_t* _found;
 
  public:
   EndTreeSearchClosure(HeapWord* target) : _target(target), _found(NULL) {}
-  bool do_list(FreeList<Chunk>* fl) {
-    Chunk* item = fl->head();
+  bool do_list(FreeList_t<Chunk_t>* fl) {
+    Chunk_t* item = fl->head();
     while (item != NULL) {
-      if (item->end() == _target) {
+      if (item->end() == (uintptr_t*) _target) {
         _found = item;
         return true;
       }
@@ -1021,22 +1050,22 @@
     }
     return false;
   }
-  Chunk* found() { return _found; }
+  Chunk_t* found() { return _found; }
 };
 
-template <class Chunk>
-Chunk* BinaryTreeDictionary<Chunk>::find_chunk_ends_at(HeapWord* target) const {
-  EndTreeSearchClosure<Chunk> etsc(target);
+template <class Chunk_t, template <class> class FreeList_t>
+Chunk_t* BinaryTreeDictionary<Chunk_t, FreeList_t>::find_chunk_ends_at(HeapWord* target) const {
+  EndTreeSearchClosure<Chunk_t, FreeList_t> etsc(target);
   bool found_target = etsc.do_tree(root());
   assert(found_target || etsc.found() == NULL, "Consistency check");
   assert(!found_target || etsc.found() != NULL, "Consistency check");
   return etsc.found();
 }
 
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::begin_sweep_dict_census(double coalSurplusPercent,
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::begin_sweep_dict_census(double coalSurplusPercent,
   float inter_sweep_current, float inter_sweep_estimate, float intra_sweep_estimate) {
-  BeginSweepClosure<Chunk> bsc(coalSurplusPercent, inter_sweep_current,
+  BeginSweepClosure<Chunk_t, FreeList_t> bsc(coalSurplusPercent, inter_sweep_current,
                                             inter_sweep_estimate,
                                             intra_sweep_estimate);
   bsc.do_tree(root());
@@ -1045,84 +1074,91 @@
 // Closures and methods for calculating total bytes returned to the
 // free lists in the tree.
 #ifndef PRODUCT
-template <class Chunk>
-class InitializeDictReturnedBytesClosure : public AscendTreeCensusClosure<Chunk> {
+template <class Chunk_t, template <class> class FreeList_t>
+class InitializeDictReturnedBytesClosure : public AscendTreeCensusClosure<Chunk_t, FreeList_t> {
    public:
-  void do_list(FreeList<Chunk>* fl) {
+  void do_list(FreeList_t<Chunk_t>* fl) {
     fl->set_returned_bytes(0);
   }
 };
 
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::initialize_dict_returned_bytes() {
-  InitializeDictReturnedBytesClosure<Chunk> idrb;
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::initialize_dict_returned_bytes() {
+  InitializeDictReturnedBytesClosure<Chunk_t, FreeList_t> idrb;
   idrb.do_tree(root());
 }
 
-template <class Chunk>
-class ReturnedBytesClosure : public AscendTreeCensusClosure<Chunk> {
+template <class Chunk_t, template <class> class FreeList_t>
+class ReturnedBytesClosure : public AscendTreeCensusClosure<Chunk_t, FreeList_t> {
   size_t _dict_returned_bytes;
  public:
   ReturnedBytesClosure() { _dict_returned_bytes = 0; }
-  void do_list(FreeList<Chunk>* fl) {
+  void do_list(FreeList_t<Chunk_t>* fl) {
     _dict_returned_bytes += fl->returned_bytes();
   }
   size_t dict_returned_bytes() { return _dict_returned_bytes; }
 };
 
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::sum_dict_returned_bytes() {
-  ReturnedBytesClosure<Chunk> rbc;
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::sum_dict_returned_bytes() {
+  ReturnedBytesClosure<Chunk_t, FreeList_t> rbc;
   rbc.do_tree(root());
 
   return rbc.dict_returned_bytes();
 }
 
 // Count the number of entries in the tree.
-template <class Chunk>
-class treeCountClosure : public DescendTreeCensusClosure<Chunk> {
+template <class Chunk_t, template <class> class FreeList_t>
+class treeCountClosure : public DescendTreeCensusClosure<Chunk_t, FreeList_t> {
  public:
   uint count;
   treeCountClosure(uint c) { count = c; }
-  void do_list(FreeList<Chunk>* fl) {
+  void do_list(FreeList_t<Chunk_t>* fl) {
     count++;
   }
 };
 
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::total_count() {
-  treeCountClosure<Chunk> ctc(0);
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::total_count() {
+  treeCountClosure<Chunk_t, FreeList_t> ctc(0);
   ctc.do_tree(root());
   return ctc.count;
 }
 #endif // PRODUCT
 
 // Calculate surpluses for the lists in the tree.
-template <class Chunk>
-class setTreeSurplusClosure : public AscendTreeCensusClosure<Chunk> {
+template <class Chunk_t, template <class> class FreeList_t>
+class setTreeSurplusClosure : public AscendTreeCensusClosure<Chunk_t, FreeList_t> {
   double percentage;
  public:
   setTreeSurplusClosure(double v) { percentage = v; }
-  void do_list(FreeList<Chunk>* fl) {
+  void do_list(FreeList<Chunk_t>* fl) {}
+
+#ifndef SERIALGC
+  void do_list(AdaptiveFreeList<Chunk_t>* fl) {
     double splitSurplusPercent = percentage;
     fl->set_surplus(fl->count() -
                    (ssize_t)((double)fl->desired() * splitSurplusPercent));
   }
+#endif // SERIALGC
 };
 
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::set_tree_surplus(double splitSurplusPercent) {
-  setTreeSurplusClosure<Chunk> sts(splitSurplusPercent);
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::set_tree_surplus(double splitSurplusPercent) {
+  setTreeSurplusClosure<Chunk_t, FreeList_t> sts(splitSurplusPercent);
   sts.do_tree(root());
 }
 
 // Set hints for the lists in the tree.
-template <class Chunk>
-class setTreeHintsClosure : public DescendTreeCensusClosure<Chunk> {
+template <class Chunk_t, template <class> class FreeList_t>
+class setTreeHintsClosure : public DescendTreeCensusClosure<Chunk_t, FreeList_t> {
   size_t hint;
  public:
   setTreeHintsClosure(size_t v) { hint = v; }
-  void do_list(FreeList<Chunk>* fl) {
+  void do_list(FreeList<Chunk_t>* fl) {}
+
+#ifndef SERIALGC
+  void do_list(AdaptiveFreeList<Chunk_t>* fl) {
     fl->set_hint(hint);
     assert(fl->hint() == 0 || fl->hint() > fl->size(),
       "Current hint is inconsistent");
@@ -1130,35 +1166,40 @@
       hint = fl->size();
     }
   }
+#endif // SERIALGC
 };
 
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::set_tree_hints(void) {
-  setTreeHintsClosure<Chunk> sth(0);
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::set_tree_hints(void) {
+  setTreeHintsClosure<Chunk_t, FreeList_t> sth(0);
   sth.do_tree(root());
 }
 
 // Save count before previous sweep and splits and coalesces.
-template <class Chunk>
-class clearTreeCensusClosure : public AscendTreeCensusClosure<Chunk> {
-  void do_list(FreeList<Chunk>* fl) {
+template <class Chunk_t, template <class> class FreeList_t>
+class clearTreeCensusClosure : public AscendTreeCensusClosure<Chunk_t, FreeList_t> {
+  void do_list(FreeList<Chunk_t>* fl) {}
+
+#ifndef SERIALGC
+  void do_list(AdaptiveFreeList<Chunk_t>* fl) {
     fl->set_prev_sweep(fl->count());
     fl->set_coal_births(0);
     fl->set_coal_deaths(0);
     fl->set_split_births(0);
     fl->set_split_deaths(0);
   }
+#endif  // SERIALGC
 };
 
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::clear_tree_census(void) {
-  clearTreeCensusClosure<Chunk> ctc;
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::clear_tree_census(void) {
+  clearTreeCensusClosure<Chunk_t, FreeList_t> ctc;
   ctc.do_tree(root());
 }
 
 // Do reporting and post sweep clean up.
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::end_sweep_dict_census(double splitSurplusPercent) {
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::end_sweep_dict_census(double splitSurplusPercent) {
   // Does walking the tree 3 times hurt?
   set_tree_surplus(splitSurplusPercent);
   set_tree_hints();
@@ -1169,9 +1210,9 @@
 }
 
 // Print summary statistics
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::report_statistics() const {
-  FreeBlockDictionary<Chunk>::verify_par_locked();
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::report_statistics() const {
+  FreeBlockDictionary<Chunk_t>::verify_par_locked();
   gclog_or_tty->print("Statistics for BinaryTreeDictionary:\n"
          "------------------------------------\n");
   size_t total_size = total_chunk_size(debug_only(NULL));
@@ -1182,36 +1223,47 @@
   if (free_blocks > 0) {
     gclog_or_tty->print("Av.  Block  Size: %d\n", total_size/free_blocks);
   }
-  gclog_or_tty->print("Tree      Height: %d\n", treeHeight());
+  gclog_or_tty->print("Tree      Height: %d\n", tree_height());
 }
 
 // Print census information - counts, births, deaths, etc.
 // for each list in the tree.  Also print some summary
 // information.
-template <class Chunk>
-class PrintTreeCensusClosure : public AscendTreeCensusClosure<Chunk> {
+template <class Chunk_t, template <class> class FreeList_t>
+class PrintTreeCensusClosure : public AscendTreeCensusClosure<Chunk_t, FreeList_t> {
   int _print_line;
   size_t _total_free;
-  FreeList<Chunk> _total;
+  FreeList_t<Chunk_t> _total;
 
  public:
   PrintTreeCensusClosure() {
     _print_line = 0;
     _total_free = 0;
   }
-  FreeList<Chunk>* total() { return &_total; }
+  FreeList_t<Chunk_t>* total() { return &_total; }
   size_t total_free() { return _total_free; }
-  void do_list(FreeList<Chunk>* fl) {
+  void do_list(FreeList<Chunk_t>* fl) {
     if (++_print_line >= 40) {
-      FreeList<Chunk>::print_labels_on(gclog_or_tty, "size");
+      FreeList_t<Chunk_t>::print_labels_on(gclog_or_tty, "size");
       _print_line = 0;
     }
     fl->print_on(gclog_or_tty);
     _total_free +=            fl->count()            * fl->size()        ;
     total()->set_count(      total()->count()       + fl->count()      );
-    total()->set_bfr_surp(    total()->bfr_surp()     + fl->bfr_surp()    );
+  }
+
+#ifndef SERIALGC
+  void do_list(AdaptiveFreeList<Chunk_t>* fl) {
+    if (++_print_line >= 40) {
+      FreeList_t<Chunk_t>::print_labels_on(gclog_or_tty, "size");
+      _print_line = 0;
+    }
+    fl->print_on(gclog_or_tty);
+    _total_free +=           fl->count()             * fl->size()        ;
+    total()->set_count(      total()->count()        + fl->count()      );
+    total()->set_bfr_surp(   total()->bfr_surp()     + fl->bfr_surp()    );
     total()->set_surplus(    total()->split_deaths() + fl->surplus()    );
-    total()->set_desired(    total()->desired()     + fl->desired()    );
+    total()->set_desired(    total()->desired()      + fl->desired()    );
     total()->set_prev_sweep(  total()->prev_sweep()   + fl->prev_sweep()  );
     total()->set_before_sweep(total()->before_sweep() + fl->before_sweep());
     total()->set_coal_births( total()->coal_births()  + fl->coal_births() );
@@ -1219,18 +1271,32 @@
     total()->set_split_births(total()->split_births() + fl->split_births());
     total()->set_split_deaths(total()->split_deaths() + fl->split_deaths());
   }
+#endif  // SERIALGC
 };
 
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::print_dict_census(void) const {
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::print_dict_census(void) const {
 
   gclog_or_tty->print("\nBinaryTree\n");
-  FreeList<Chunk>::print_labels_on(gclog_or_tty, "size");
-  PrintTreeCensusClosure<Chunk> ptc;
+  FreeList_t<Chunk_t>::print_labels_on(gclog_or_tty, "size");
+  PrintTreeCensusClosure<Chunk_t, FreeList_t> ptc;
   ptc.do_tree(root());
 
-  FreeList<Chunk>* total = ptc.total();
-  FreeList<Chunk>::print_labels_on(gclog_or_tty, " ");
+  FreeList_t<Chunk_t>* total = ptc.total();
+  FreeList_t<Chunk_t>::print_labels_on(gclog_or_tty, " ");
+}
+
+#ifndef SERIALGC
+template <>
+void BinaryTreeDictionary<FreeChunk, AdaptiveFreeList>::print_dict_census(void) const {
+
+  gclog_or_tty->print("\nBinaryTree\n");
+  AdaptiveFreeList<FreeChunk>::print_labels_on(gclog_or_tty, "size");
+  PrintTreeCensusClosure<FreeChunk, AdaptiveFreeList> ptc;
+  ptc.do_tree(root());
+
+  AdaptiveFreeList<FreeChunk>* total = ptc.total();
+  AdaptiveFreeList<FreeChunk>::print_labels_on(gclog_or_tty, " ");
   total->print_on(gclog_or_tty, "TOTAL\t");
   gclog_or_tty->print(
               "total_free(words): " SIZE_FORMAT_W(16)
@@ -1242,9 +1308,10 @@
              (double)(total->desired() - total->count())
              /(total->desired() != 0 ? (double)total->desired() : 1.0));
 }
+#endif  // SERIALGC
 
-template <class Chunk>
-class PrintFreeListsClosure : public AscendTreeCensusClosure<Chunk> {
+template <class Chunk_t, template <class> class FreeList_t>
+class PrintFreeListsClosure : public AscendTreeCensusClosure<Chunk_t, FreeList_t> {
   outputStream* _st;
   int _print_line;
 
@@ -1253,14 +1320,14 @@
     _st = st;
     _print_line = 0;
   }
-  void do_list(FreeList<Chunk>* fl) {
+  void do_list(FreeList_t<Chunk_t>* fl) {
     if (++_print_line >= 40) {
-      FreeList<Chunk>::print_labels_on(_st, "size");
+      FreeList_t<Chunk_t>::print_labels_on(_st, "size");
       _print_line = 0;
     }
     fl->print_on(gclog_or_tty);
     size_t sz = fl->size();
-    for (Chunk* fc = fl->head(); fc != NULL;
+    for (Chunk_t* fc = fl->head(); fc != NULL;
          fc = fc->next()) {
       _st->print_cr("\t[" PTR_FORMAT "," PTR_FORMAT ")  %s",
                     fc, (HeapWord*)fc + sz,
@@ -1269,11 +1336,11 @@
   }
 };
 
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::print_free_lists(outputStream* st) const {
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::print_free_lists(outputStream* st) const {
 
-  FreeList<Chunk>::print_labels_on(st, "size");
-  PrintFreeListsClosure<Chunk> pflc(st);
+  FreeList_t<Chunk_t>::print_labels_on(st, "size");
+  PrintFreeListsClosure<Chunk_t, FreeList_t> pflc(st);
   pflc.do_tree(root());
 }
 
@@ -1281,18 +1348,18 @@
 // . _root has no parent
 // . parent and child point to each other
 // . each node's key correctly related to that of its child(ren)
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::verify_tree() const {
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::verify_tree() const {
   guarantee(root() == NULL || total_free_blocks() == 0 ||
     total_size() != 0, "_total_size should't be 0?");
   guarantee(root() == NULL || root()->parent() == NULL, "_root shouldn't have parent");
   verify_tree_helper(root());
 }
 
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::verify_prev_free_ptrs(TreeList<Chunk>* tl) {
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::verify_prev_free_ptrs(TreeList<Chunk_t, FreeList_t>* tl) {
   size_t ct = 0;
-  for (Chunk* curFC = tl->head(); curFC != NULL; curFC = curFC->next()) {
+  for (Chunk_t* curFC = tl->head(); curFC != NULL; curFC = curFC->next()) {
     ct++;
     assert(curFC->prev() == NULL || curFC->prev()->is_free(),
       "Chunk should be free");
@@ -1303,8 +1370,8 @@
 // Note: this helper is recursive rather than iterative, so use with
 // caution on very deep trees; and watch out for stack overflow errors;
 // In general, to be used only for debugging.
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::verify_tree_helper(TreeList<Chunk>* tl) const {
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::verify_tree_helper(TreeList<Chunk_t, FreeList_t>* tl) const {
   if (tl == NULL)
     return;
   guarantee(tl->size() != 0, "A list must has a size");
@@ -1332,15 +1399,25 @@
   verify_tree_helper(tl->right());
 }
 
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::verify() const {
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::verify() const {
   verify_tree();
   guarantee(total_size() == total_size_in_tree(root()), "Total Size inconsistency");
 }
 
+template class TreeList<Metablock, FreeList>;
+template class BinaryTreeDictionary<Metablock, FreeList>;
+template class TreeChunk<Metablock, FreeList>;
+
+template class TreeList<Metachunk, FreeList>;
+template class BinaryTreeDictionary<Metachunk, FreeList>;
+template class TreeChunk<Metachunk, FreeList>;
+
+
 #ifndef SERIALGC
 // Explicitly instantiate these types for FreeChunk.
-template class BinaryTreeDictionary<FreeChunk>;
-template class TreeChunk<FreeChunk>;
-template class TreeList<FreeChunk>;
+template class TreeList<FreeChunk, AdaptiveFreeList>;
+template class BinaryTreeDictionary<FreeChunk, AdaptiveFreeList>;
+template class TreeChunk<FreeChunk, AdaptiveFreeList>;
+
 #endif // SERIALGC
--- a/src/share/vm/memory/binaryTreeDictionary.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/memory/binaryTreeDictionary.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -37,77 +37,78 @@
 // A TreeList is a FreeList which can be used to maintain a
 // binary tree of free lists.
 
-template <class Chunk> class TreeChunk;
-template <class Chunk> class BinaryTreeDictionary;
-template <class Chunk> class AscendTreeCensusClosure;
-template <class Chunk> class DescendTreeCensusClosure;
-template <class Chunk> class DescendTreeSearchClosure;
+template <class Chunk_t, template <class> class FreeList_t> class TreeChunk;
+template <class Chunk_t, template <class> class FreeList_t> class BinaryTreeDictionary;
+template <class Chunk_t, template <class> class FreeList_t> class AscendTreeCensusClosure;
+template <class Chunk_t, template <class> class FreeList_t> class DescendTreeCensusClosure;
+template <class Chunk_t, template <class> class FreeList_t> class DescendTreeSearchClosure;
 
-template <class Chunk>
-class TreeList: public FreeList<Chunk> {
-  friend class TreeChunk<Chunk>;
-  friend class BinaryTreeDictionary<Chunk>;
-  friend class AscendTreeCensusClosure<Chunk>;
-  friend class DescendTreeCensusClosure<Chunk>;
-  friend class DescendTreeSearchClosure<Chunk>;
+template <class Chunk_t, template <class> class FreeList_t>
+class TreeList : public FreeList_t<Chunk_t> {
+  friend class TreeChunk<Chunk_t, FreeList_t>;
+  friend class BinaryTreeDictionary<Chunk_t, FreeList_t>;
+  friend class AscendTreeCensusClosure<Chunk_t, FreeList_t>;
+  friend class DescendTreeCensusClosure<Chunk_t, FreeList_t>;
+  friend class DescendTreeSearchClosure<Chunk_t, FreeList_t>;
 
-  TreeList<Chunk>* _parent;
-  TreeList<Chunk>* _left;
-  TreeList<Chunk>* _right;
+  TreeList<Chunk_t, FreeList_t>* _parent;
+  TreeList<Chunk_t, FreeList_t>* _left;
+  TreeList<Chunk_t, FreeList_t>* _right;
 
  protected:
-  TreeList<Chunk>* parent() const { return _parent; }
-  TreeList<Chunk>* left()   const { return _left;   }
-  TreeList<Chunk>* right()  const { return _right;  }
 
-  // Explicitly import these names into our namespace to fix name lookup with templates
-  using FreeList<Chunk>::head;
-  using FreeList<Chunk>::set_head;
+  TreeList<Chunk_t, FreeList_t>* parent() const { return _parent; }
+  TreeList<Chunk_t, FreeList_t>* left()   const { return _left;   }
+  TreeList<Chunk_t, FreeList_t>* right()  const { return _right;  }
 
-  using FreeList<Chunk>::tail;
-  using FreeList<Chunk>::set_tail;
-  using FreeList<Chunk>::link_tail;
+  // Wrapper on call to base class, to get the template to compile.
+  Chunk_t* head() const { return FreeList_t<Chunk_t>::head(); }
+  Chunk_t* tail() const { return FreeList_t<Chunk_t>::tail(); }
+  void set_head(Chunk_t* head) { FreeList_t<Chunk_t>::set_head(head); }
+  void set_tail(Chunk_t* tail) { FreeList_t<Chunk_t>::set_tail(tail); }
 
-  using FreeList<Chunk>::increment_count;
-  NOT_PRODUCT(using FreeList<Chunk>::increment_returned_bytes_by;)
-  using FreeList<Chunk>::verify_chunk_in_free_list;
-  using FreeList<Chunk>::size;
+  size_t size() const { return FreeList_t<Chunk_t>::size(); }
 
   // Accessors for links in tree.
 
-  void set_left(TreeList<Chunk>* tl) {
+  void set_left(TreeList<Chunk_t, FreeList_t>* tl) {
     _left   = tl;
     if (tl != NULL)
       tl->set_parent(this);
   }
-  void set_right(TreeList<Chunk>* tl) {
+  void set_right(TreeList<Chunk_t, FreeList_t>* tl) {
     _right  = tl;
     if (tl != NULL)
       tl->set_parent(this);
   }
-  void set_parent(TreeList<Chunk>* tl)  { _parent = tl;   }
+  void set_parent(TreeList<Chunk_t, FreeList_t>* tl)  { _parent = tl;   }
 
-  void clearLeft()               { _left = NULL;   }
+  void clear_left()               { _left = NULL;   }
   void clear_right()              { _right = NULL;  }
   void clear_parent()             { _parent = NULL; }
-  void initialize()              { clearLeft(); clear_right(), clear_parent(); }
+  void initialize()               { clear_left(); clear_right(), clear_parent(); FreeList_t<Chunk_t>::initialize(); }
 
   // For constructing a TreeList from a Tree chunk or
   // address and size.
-  static TreeList<Chunk>* as_TreeList(TreeChunk<Chunk>* tc);
-  static TreeList<Chunk>* as_TreeList(HeapWord* addr, size_t size);
+  TreeList();
+  static TreeList<Chunk_t, FreeList_t>*
+          as_TreeList(TreeChunk<Chunk_t, FreeList_t>* tc);
+  static TreeList<Chunk_t, FreeList_t>* as_TreeList(HeapWord* addr, size_t size);
 
   // Returns the head of the free list as a pointer to a TreeChunk.
-  TreeChunk<Chunk>* head_as_TreeChunk();
+  TreeChunk<Chunk_t, FreeList_t>* head_as_TreeChunk();
 
   // Returns the first available chunk in the free list as a pointer
   // to a TreeChunk.
-  TreeChunk<Chunk>* first_available();
+  TreeChunk<Chunk_t, FreeList_t>* first_available();
 
   // Returns the block with the largest heap address amongst
   // those in the list for this size; potentially slow and expensive,
   // use with caution!
-  TreeChunk<Chunk>* largest_address();
+  TreeChunk<Chunk_t, FreeList_t>* largest_address();
+
+  TreeList<Chunk_t, FreeList_t>* get_better_list(
+    BinaryTreeDictionary<Chunk_t, FreeList_t>* dictionary);
 
   // remove_chunk_replace_if_needed() removes the given "tc" from the TreeList.
   // If "tc" is the first chunk in the list, it is also the
@@ -115,10 +116,10 @@
   // returns the possibly replaced TreeList* for the node in
   // the tree.  It also updates the parent of the original
   // node to point to the new node.
-  TreeList<Chunk>* remove_chunk_replace_if_needed(TreeChunk<Chunk>* tc);
+  TreeList<Chunk_t, FreeList_t>* remove_chunk_replace_if_needed(TreeChunk<Chunk_t, FreeList_t>* tc);
   // See FreeList.
-  void return_chunk_at_head(TreeChunk<Chunk>* tc);
-  void return_chunk_at_tail(TreeChunk<Chunk>* tc);
+  void return_chunk_at_head(TreeChunk<Chunk_t, FreeList_t>* tc);
+  void return_chunk_at_tail(TreeChunk<Chunk_t, FreeList_t>* tc);
 };
 
 // A TreeChunk is a subclass of a Chunk that additionally
@@ -134,52 +135,54 @@
 // on the free list for a node in the tree and is only removed if
 // it is the last chunk on the free list.
 
-template <class Chunk>
-class TreeChunk : public Chunk {
-  friend class TreeList<Chunk>;
-  TreeList<Chunk>* _list;
-  TreeList<Chunk> _embedded_list;  // if non-null, this chunk is on _list
+template <class Chunk_t, template <class> class FreeList_t>
+class TreeChunk : public Chunk_t {
+  friend class TreeList<Chunk_t, FreeList_t>;
+  TreeList<Chunk_t, FreeList_t>* _list;
+  TreeList<Chunk_t, FreeList_t> _embedded_list;  // if non-null, this chunk is on _list
+
+  static size_t _min_tree_chunk_size;
+
  protected:
-  TreeList<Chunk>* embedded_list() const { return (TreeList<Chunk>*) &_embedded_list; }
-  void set_embedded_list(TreeList<Chunk>* v) { _embedded_list = *v; }
+  TreeList<Chunk_t, FreeList_t>* embedded_list() const { return (TreeList<Chunk_t, FreeList_t>*) &_embedded_list; }
+  void set_embedded_list(TreeList<Chunk_t, FreeList_t>* v) { _embedded_list = *v; }
  public:
-  TreeList<Chunk>* list() { return _list; }
-  void set_list(TreeList<Chunk>* v) { _list = v; }
-  static TreeChunk<Chunk>* as_TreeChunk(Chunk* fc);
+  TreeList<Chunk_t, FreeList_t>* list() { return _list; }
+  void set_list(TreeList<Chunk_t, FreeList_t>* v) { _list = v; }
+  static TreeChunk<Chunk_t, FreeList_t>* as_TreeChunk(Chunk_t* fc);
   // Initialize fields in a TreeChunk that should be
   // initialized when the TreeChunk is being added to
   // a free list in the tree.
   void initialize() { embedded_list()->initialize(); }
 
-  Chunk* next() const { return Chunk::next(); }
-  Chunk* prev() const { return Chunk::prev(); }
-  size_t size() const volatile { return Chunk::size(); }
+  Chunk_t* next() const { return Chunk_t::next(); }
+  Chunk_t* prev() const { return Chunk_t::prev(); }
+  size_t size() const volatile { return Chunk_t::size(); }
+
+  static size_t min_size() {
+    return _min_tree_chunk_size;
+  }
 
   // debugging
   void verify_tree_chunk_list() const;
+  void assert_is_mangled() const;
 };
 
 
-template <class Chunk>
-class BinaryTreeDictionary: public FreeBlockDictionary<Chunk> {
+template <class Chunk_t, template <class> class FreeList_t>
+class BinaryTreeDictionary: public FreeBlockDictionary<Chunk_t> {
   friend class VMStructs;
-  bool       _splay;
-  bool       _adaptive_freelists;
   size_t     _total_size;
   size_t     _total_free_blocks;
-  TreeList<Chunk>* _root;
+  TreeList<Chunk_t, FreeList_t>* _root;
 
   // private accessors
-  bool splay() const { return _splay; }
-  void set_splay(bool v) { _splay = v; }
   void set_total_size(size_t v) { _total_size = v; }
   virtual void inc_total_size(size_t v);
   virtual void dec_total_size(size_t v);
-  size_t total_free_blocks() const { return _total_free_blocks; }
   void set_total_free_blocks(size_t v) { _total_free_blocks = v; }
-  TreeList<Chunk>* root() const { return _root; }
-  void set_root(TreeList<Chunk>* v) { _root = v; }
-  bool adaptive_freelists() { return _adaptive_freelists; }
+  TreeList<Chunk_t, FreeList_t>* root() const { return _root; }
+  void set_root(TreeList<Chunk_t, FreeList_t>* v) { _root = v; }
 
   // This field is added and can be set to point to the
   // the Mutex used to synchronize access to the
@@ -191,54 +194,55 @@
   // return it.  If the chunk
   // is the last chunk of that size, remove the node for that size
   // from the tree.
-  TreeChunk<Chunk>* get_chunk_from_tree(size_t size, enum FreeBlockDictionary<Chunk>::Dither dither, bool splay);
-  // Return a list of the specified size or NULL from the tree.
-  // The list is not removed from the tree.
-  TreeList<Chunk>* find_list (size_t size) const;
+  TreeChunk<Chunk_t, FreeList_t>* get_chunk_from_tree(size_t size, enum FreeBlockDictionary<Chunk_t>::Dither dither);
   // Remove this chunk from the tree.  If the removal results
   // in an empty list in the tree, remove the empty list.
-  TreeChunk<Chunk>* remove_chunk_from_tree(TreeChunk<Chunk>* tc);
+  TreeChunk<Chunk_t, FreeList_t>* remove_chunk_from_tree(TreeChunk<Chunk_t, FreeList_t>* tc);
   // Remove the node in the trees starting at tl that has the
   // minimum value and return it.  Repair the tree as needed.
-  TreeList<Chunk>* remove_tree_minimum(TreeList<Chunk>* tl);
-  void       semi_splay_step(TreeList<Chunk>* tl);
+  TreeList<Chunk_t, FreeList_t>* remove_tree_minimum(TreeList<Chunk_t, FreeList_t>* tl);
   // Add this free chunk to the tree.
-  void       insert_chunk_in_tree(Chunk* freeChunk);
+  void       insert_chunk_in_tree(Chunk_t* freeChunk);
  public:
 
-  static const size_t min_tree_chunk_size  = sizeof(TreeChunk<Chunk>)/HeapWordSize;
+  // Return a list of the specified size or NULL from the tree.
+  // The list is not removed from the tree.
+  TreeList<Chunk_t, FreeList_t>* find_list (size_t size) const;
 
   void       verify_tree() const;
   // verify that the given chunk is in the tree.
-  bool       verify_chunk_in_free_list(Chunk* tc) const;
+  bool       verify_chunk_in_free_list(Chunk_t* tc) const;
  private:
-  void          verify_tree_helper(TreeList<Chunk>* tl) const;
-  static size_t verify_prev_free_ptrs(TreeList<Chunk>* tl);
+  void          verify_tree_helper(TreeList<Chunk_t, FreeList_t>* tl) const;
+  static size_t verify_prev_free_ptrs(TreeList<Chunk_t, FreeList_t>* tl);
 
   // Returns the total number of chunks in the list.
-  size_t     total_list_length(TreeList<Chunk>* tl) const;
+  size_t     total_list_length(TreeList<Chunk_t, FreeList_t>* tl) const;
   // Returns the total number of words in the chunks in the tree
   // starting at "tl".
-  size_t     total_size_in_tree(TreeList<Chunk>* tl) const;
+  size_t     total_size_in_tree(TreeList<Chunk_t, FreeList_t>* tl) const;
   // Returns the sum of the square of the size of each block
   // in the tree starting at "tl".
-  double     sum_of_squared_block_sizes(TreeList<Chunk>* const tl) const;
+  double     sum_of_squared_block_sizes(TreeList<Chunk_t, FreeList_t>* const tl) const;
   // Returns the total number of free blocks in the tree starting
   // at "tl".
-  size_t     total_free_blocks_in_tree(TreeList<Chunk>* tl) const;
-  size_t     num_free_blocks() const;
-  size_t     treeHeight() const;
-  size_t     tree_height_helper(TreeList<Chunk>* tl) const;
-  size_t     total_nodes_in_tree(TreeList<Chunk>* tl) const;
-  size_t     total_nodes_helper(TreeList<Chunk>* tl) const;
+  size_t     total_free_blocks_in_tree(TreeList<Chunk_t, FreeList_t>* tl) const;
+  size_t     num_free_blocks()  const;
+  size_t     tree_height() const;
+  size_t     tree_height_helper(TreeList<Chunk_t, FreeList_t>* tl) const;
+  size_t     total_nodes_in_tree(TreeList<Chunk_t, FreeList_t>* tl) const;
+  size_t     total_nodes_helper(TreeList<Chunk_t, FreeList_t>* tl) const;
 
  public:
   // Constructor
-  BinaryTreeDictionary(bool adaptive_freelists, bool splay = false);
-  BinaryTreeDictionary(MemRegion mr, bool adaptive_freelists, bool splay = false);
+  BinaryTreeDictionary() :
+    _total_size(0), _total_free_blocks(0), _root(0) {}
+
+  BinaryTreeDictionary(MemRegion mr);
 
   // Public accessors
   size_t total_size() const { return _total_size; }
+  size_t total_free_blocks() const { return _total_free_blocks; }
 
   // Reset the dictionary to the initial conditions with
   // a single free chunk.
@@ -249,23 +253,24 @@
 
   // Return a chunk of size "size" or greater from
   // the tree.
-  // want a better dynamic splay strategy for the future.
-  Chunk* get_chunk(size_t size, enum FreeBlockDictionary<Chunk>::Dither dither) {
-    FreeBlockDictionary<Chunk>::verify_par_locked();
-    Chunk* res = get_chunk_from_tree(size, dither, splay());
+  Chunk_t* get_chunk(size_t size, enum FreeBlockDictionary<Chunk_t>::Dither dither) {
+    FreeBlockDictionary<Chunk_t>::verify_par_locked();
+    Chunk_t* res = get_chunk_from_tree(size, dither);
     assert(res == NULL || res->is_free(),
            "Should be returning a free chunk");
+    assert(dither != FreeBlockDictionary<Chunk_t>::exactly ||
+           res == NULL || res->size() == size, "Not correct size");
     return res;
   }
 
-  void return_chunk(Chunk* chunk) {
-    FreeBlockDictionary<Chunk>::verify_par_locked();
+  void return_chunk(Chunk_t* chunk) {
+    FreeBlockDictionary<Chunk_t>::verify_par_locked();
     insert_chunk_in_tree(chunk);
   }
 
-  void remove_chunk(Chunk* chunk) {
-    FreeBlockDictionary<Chunk>::verify_par_locked();
-    remove_chunk_from_tree((TreeChunk<Chunk>*)chunk);
+  void remove_chunk(Chunk_t* chunk) {
+    FreeBlockDictionary<Chunk_t>::verify_par_locked();
+    remove_chunk_from_tree((TreeChunk<Chunk_t, FreeList_t>*)chunk);
     assert(chunk->is_free(), "Should still be a free chunk");
   }
 
@@ -281,19 +286,19 @@
   }
 
   size_t     min_size() const {
-    return min_tree_chunk_size;
+    return TreeChunk<Chunk_t, FreeList_t>::min_size();
   }
 
   double     sum_of_squared_block_sizes() const {
     return sum_of_squared_block_sizes(root());
   }
 
-  Chunk* find_chunk_ends_at(HeapWord* target) const;
+  Chunk_t* find_chunk_ends_at(HeapWord* target) const;
 
   // Find the list with size "size" in the binary tree and update
   // the statistics in the list according to "split" (chunk was
   // split or coalesce) and "birth" (chunk was added or removed).
-  void       dict_census_udpate(size_t size, bool split, bool birth);
+  void       dict_census_update(size_t size, bool split, bool birth);
   // Return true if the dictionary is overpopulated (more chunks of
   // this size than desired) for size "size".
   bool       coal_dict_over_populated(size_t size);
@@ -307,7 +312,7 @@
   // statistics for the sweep.
   void       end_sweep_dict_census(double splitSurplusPercent);
   // Return the largest free chunk in the tree.
-  Chunk* find_largest_dict() const;
+  Chunk_t* find_largest_dict() const;
   // Accessors for statistics
   void       set_tree_surplus(double splitSurplusPercent);
   void       set_tree_hints(void);
--- a/src/share/vm/memory/collectorPolicy.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/memory/collectorPolicy.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -37,19 +37,8 @@
 #include "runtime/globals_extension.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/java.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/vmThread.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 #ifndef SERIALGC
 #include "gc_implementation/concurrentMarkSweep/cmsAdaptiveSizePolicy.hpp"
 #include "gc_implementation/concurrentMarkSweep/cmsGCAdaptivePolicyCounters.hpp"
@@ -742,6 +731,8 @@
   uint gc_count = 0;
   uint full_gc_count = 0;
 
+  assert(!Heap_lock->owned_by_self(), "Should not be holding the Heap_lock");
+
   do {
     MetaWord* result = NULL;
     if (GC_locker::is_active_and_needs_gc()) {
@@ -756,7 +747,6 @@
       }
       JavaThread* jthr = JavaThread::current();
       if (!jthr->in_critical()) {
-        MutexUnlocker mul(Heap_lock);
         // Wait for JNI critical section to be exited
         GC_locker::stall_until_clear();
         // The GC invoked by the last thread leaving the critical
@@ -787,6 +777,15 @@
                                        full_gc_count,
                                        GCCause::_metadata_GC_threshold);
     VMThread::execute(&op);
+
+    // If GC was locked out, try again.  Check
+    // before checking success because the prologue
+    // could have succeeded and the GC still have
+    // been locked out.
+    if (op.gc_locked()) {
+      continue;
+    }
+
     if (op.prologue_succeeded()) {
       return op.result();
     }
@@ -828,7 +827,7 @@
   if (_generations == NULL)
     vm_exit_during_initialization("Unable to allocate gen spec");
 
-  if (UseParNewGC && ParallelGCThreads > 0) {
+  if (UseParNewGC) {
     _generations[0] = new GenerationSpec(Generation::ParNew, _initial_gen0_size, _max_gen0_size);
   } else {
     _generations[0] = new GenerationSpec(Generation::DefNew, _initial_gen0_size, _max_gen0_size);
@@ -841,10 +840,9 @@
 
 void MarkSweepPolicy::initialize_gc_policy_counters() {
   // initialize the policy counters - 2 collectors, 3 generations
-  if (UseParNewGC && ParallelGCThreads > 0) {
+  if (UseParNewGC) {
     _gc_policy_counters = new GCPolicyCounters("ParNew:MSC", 2, 3);
-  }
-  else {
+  } else {
     _gc_policy_counters = new GCPolicyCounters("Copy:MSC", 2, 3);
   }
 }
--- a/src/share/vm/memory/defNewGeneration.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/memory/defNewGeneration.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -38,20 +38,9 @@
 #include "oops/instanceRefKlass.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/java.hpp"
+#include "runtime/thread.inline.hpp"
 #include "utilities/copy.hpp"
 #include "utilities/stack.inline.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 
 //
 // DefNewGeneration functions.
@@ -561,6 +550,11 @@
   return allocate(size, is_tlab);
 }
 
+void DefNewGeneration::adjust_desired_tenuring_threshold() {
+  // Set the desired survivor size to half the real survivor space
+  _tenuring_threshold =
+    age_table()->compute_tenuring_threshold(to()->capacity()/HeapWordSize);
+}
 
 void DefNewGeneration::collect(bool   full,
                                bool   clear_all_soft_refs,
@@ -660,9 +654,7 @@
 
     assert(to()->is_empty(), "to space should be empty now");
 
-    // Set the desired survivor size to half the real survivor space
-    _tenuring_threshold =
-      age_table()->compute_tenuring_threshold(to()->capacity()/HeapWordSize);
+    adjust_desired_tenuring_threshold();
 
     // A successful scavenge should restart the GC time limit count which is
     // for full GC's.
--- a/src/share/vm/memory/defNewGeneration.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/memory/defNewGeneration.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -124,7 +124,9 @@
     _should_allocate_from_space = true;
   }
 
- protected:
+  // Tenuring
+  void adjust_desired_tenuring_threshold();
+
   // Spaces
   EdenSpace*       _eden_space;
   ContiguousSpace* _from_space;
--- a/src/share/vm/memory/filemap.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/memory/filemap.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -25,10 +25,12 @@
 #include "precompiled.hpp"
 #include "classfile/classLoader.hpp"
 #include "classfile/symbolTable.hpp"
+#include "classfile/altHashing.hpp"
 #include "memory/filemap.hpp"
 #include "runtime/arguments.hpp"
 #include "runtime/java.hpp"
 #include "runtime/os.hpp"
+#include "services/memTracker.hpp"
 #include "utilities/defaultStream.hpp"
 
 # include <sys/stat.h>
@@ -81,26 +83,50 @@
   close();
 }
 
+// Fill in the fileMapInfo structure with data about this VM instance.
 
-// Fill in the fileMapInfo structure with data about this VM instance.
+// This method copies the vm version info into header_version.  If the version is too
+// long then a truncated version, which has a hash code appended to it, is copied.
+//
+// Using a template enables this method to verify that header_version is an array of
+// length JVM_IDENT_MAX.  This ensures that the code that writes to the CDS file and
+// the code that reads the CDS file will both use the same size buffer.  Hence, will
+// use identical truncation.  This is necessary for matching of truncated versions.
+template <int N> static void get_header_version(char (&header_version) [N]) {
+  assert(N == JVM_IDENT_MAX, "Bad header_version size");
+
+  const char *vm_version = VM_Version::internal_vm_info_string();
+  const int version_len = (int)strlen(vm_version);
+
+  if (version_len < (JVM_IDENT_MAX-1)) {
+    strcpy(header_version, vm_version);
+
+  } else {
+    // Get the hash value.  Use a static seed because the hash needs to return the same
+    // value over multiple jvm invocations.
+    unsigned int hash = AltHashing::murmur3_32(8191, (const jbyte*)vm_version, version_len);
+
+    // Truncate the ident, saving room for the 8 hex character hash value.
+    strncpy(header_version, vm_version, JVM_IDENT_MAX-9);
+
+    // Append the hash code as eight hex digits.
+    sprintf(&header_version[JVM_IDENT_MAX-9], "%08x", hash);
+    header_version[JVM_IDENT_MAX-1] = 0;  // Null terminate.
+  }
+}
 
 void FileMapInfo::populate_header(size_t alignment) {
   _header._magic = 0xf00baba2;
   _header._version = _current_version;
   _header._alignment = alignment;
+  _header._obj_alignment = ObjectAlignmentInBytes;
 
   // The following fields are for sanity checks for whether this archive
   // will function correctly with this JVM and the bootclasspath it's
   // invoked with.
 
   // JVM version string ... changes on each build.
-  const char *vm_version = VM_Version::internal_vm_info_string();
-  if (strlen(vm_version) < (JVM_IDENT_MAX-1)) {
-    strcpy(_header._jvm_ident, vm_version);
-  } else {
-    fail_stop("JVM Ident field for shared archive is too long"
-              " - truncated to <%s>", _header._jvm_ident);
-  }
+  get_header_version(_header._jvm_ident);
 
   // Build checks on classpath and jar files
   _header._num_jars = 0;
@@ -186,7 +212,11 @@
 
   // Remove the existing file in case another process has it open.
   remove(_full_path);
+#ifdef _WINDOWS  // if 0444 is used on Windows, then remove() will fail.
+  int fd = open(_full_path, O_RDWR | O_CREAT | O_TRUNC | O_BINARY, 0744);
+#else
   int fd = open(_full_path, O_RDWR | O_CREAT | O_TRUNC | O_BINARY, 0444);
+#endif
   if (fd < 0) {
     fail_stop("Unable to create shared archive file %s.", _full_path);
   }
@@ -344,25 +374,13 @@
     fail_continue(err_msg("Unable to reserved shared space at required address " INTPTR_FORMAT, requested_addr));
     return rs;
   }
+  // the reserved virtual memory is for mapping class data sharing archive
+  MemTracker::record_virtual_memory_type((address)rs.base(), mtClassShared);
+
   return rs;
 }
 
 // Memory map a region in the address space.
-
-char* FileMapInfo::map_region(int i, ReservedSpace rs) {
-  struct FileMapInfo::FileMapHeader::space_info* si = &_header._space[i];
-  size_t used = si->_used;
-  size_t size = align_size_up(used, os::vm_allocation_granularity());
-
-  ReservedSpace mapped_rs = rs.first_part(size, true, true);
-  ReservedSpace unmapped_rs = rs.last_part(size);
-  mapped_rs.release();
-
-  return map_region(i);
-}
-
-
-// Memory map a region in the address space.
 static const char* shared_region_name[] = { "ReadOnly", "ReadWrite", "MiscData", "MiscCode"};
 
 char* FileMapInfo::map_region(int i) {
@@ -380,6 +398,11 @@
     fail_continue(err_msg("Unable to map %s shared space at required address.", shared_region_name[i]));
     return NULL;
   }
+#ifdef _WINDOWS
+  // This call is Windows-only because the memory_type gets recorded for the other platforms
+  // in method FileMapInfo::reserve_shared_memory(), which is not called on Windows.
+  MemTracker::record_virtual_memory_type((address)base, mtClassShared);
+#endif
   return base;
 }
 
@@ -444,12 +467,19 @@
     fail_continue("The shared archive file has a bad magic number.");
     return false;
   }
-  if (strncmp(_header._jvm_ident, VM_Version::internal_vm_info_string(),
-              JVM_IDENT_MAX-1) != 0) {
+  char header_version[JVM_IDENT_MAX];
+  get_header_version(header_version);
+  if (strncmp(_header._jvm_ident, header_version, JVM_IDENT_MAX-1) != 0) {
     fail_continue("The shared archive file was created by a different"
                   " version or build of HotSpot.");
     return false;
   }
+  if (_header._obj_alignment != ObjectAlignmentInBytes) {
+    fail_continue("The shared archive file's ObjectAlignmentInBytes of %d"
+                  " does not equal the current ObjectAlignmentInBytes of %d.",
+                  _header._obj_alignment, ObjectAlignmentInBytes);
+    return false;
+  }
 
   // Cannot verify interpreter yet, as it can only be created after the GC
   // heap has been initialized.
--- a/src/share/vm/memory/filemap.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/memory/filemap.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -63,6 +63,7 @@
     int    _magic;                    // identify file type.
     int    _version;                  // (from enum, above.)
     size_t _alignment;                // how shared archive should be aligned
+    int    _obj_alignment;            // value of ObjectAlignmentInBytes
 
     struct space_info {
       int    _file_offset;   // sizeof(this) rounded to vm page size
@@ -125,7 +126,6 @@
                      size_t capacity, bool read_only, bool allow_exec);
   void  write_bytes(const void* buffer, int count);
   void  write_bytes_aligned(const void* buffer, int count);
-  char* map_region(int i, ReservedSpace rs);
   char* map_region(int i);
   void  unmap_region(int i);
   void  close();
--- a/src/share/vm/memory/freeBlockDictionary.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/memory/freeBlockDictionary.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -27,18 +27,9 @@
 #include "gc_implementation/concurrentMarkSweep/freeChunk.hpp"
 #endif // SERIALGC
 #include "memory/freeBlockDictionary.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
+#include "memory/metablock.hpp"
+#include "memory/metachunk.hpp"
+#include "runtime/thread.inline.hpp"
 
 #ifndef PRODUCT
 template <class Chunk> Mutex* FreeBlockDictionary<Chunk>::par_lock() const {
@@ -62,6 +53,9 @@
 }
 #endif
 
+template class FreeBlockDictionary<Metablock>;
+template class FreeBlockDictionary<Metachunk>;
+
 #ifndef SERIALGC
 // Explicitly instantiate for FreeChunk
 template class FreeBlockDictionary<FreeChunk>;
--- a/src/share/vm/memory/freeBlockDictionary.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/memory/freeBlockDictionary.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -66,7 +66,7 @@
   virtual void       reset(HeapWord* addr, size_t size) = 0;
   virtual void       reset() = 0;
 
-  virtual void       dict_census_udpate(size_t size, bool split, bool birth) = 0;
+  virtual void       dict_census_update(size_t size, bool split, bool birth) = 0;
   virtual bool       coal_dict_over_populated(size_t size) = 0;
   virtual void       begin_sweep_dict_census(double coalSurplusPercent,
                        float inter_sweep_current, float inter_sweep_estimate,
--- a/src/share/vm/memory/freeList.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/memory/freeList.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -25,6 +25,8 @@
 #include "precompiled.hpp"
 #include "memory/freeBlockDictionary.hpp"
 #include "memory/freeList.hpp"
+#include "memory/metablock.hpp"
+#include "memory/metachunk.hpp"
 #include "memory/sharedHeap.hpp"
 #include "runtime/globals.hpp"
 #include "runtime/mutex.hpp"
@@ -49,8 +51,6 @@
 {
   _size         = 0;
   _count        = 0;
-  _hint         = 0;
-  init_statistics();
 }
 
 template <class Chunk>
@@ -62,34 +62,50 @@
 {
   _size         = fc->size();
   _count        = 1;
-  _hint         = 0;
-  init_statistics();
-#ifndef PRODUCT
-  _allocation_stats.set_returned_bytes(size() * HeapWordSize);
-#endif
 }
 
 template <class Chunk>
-void FreeList<Chunk>::reset(size_t hint) {
+void FreeList<Chunk>::link_head(Chunk* v) {
+  assert_proper_lock_protection();
+  set_head(v);
+  // If this method is not used (just set the head instead),
+  // this check can be avoided.
+  if (v != NULL) {
+    v->link_prev(NULL);
+  }
+}
+
+
+
+template <class Chunk>
+void FreeList<Chunk>::reset() {
+  // Don't set the _size to 0 because this method is
+  // used with a existing list that has a size but which has
+  // been emptied.
+  // Don't clear the _protecting_lock of an existing list.
   set_count(0);
   set_head(NULL);
   set_tail(NULL);
-  set_hint(hint);
 }
 
 template <class Chunk>
-void FreeList<Chunk>::init_statistics(bool split_birth) {
-  _allocation_stats.initialize(split_birth);
+void FreeList<Chunk>::initialize() {
+#ifdef ASSERT
+  // Needed early because it might be checked in other initializing code.
+  set_protecting_lock(NULL);
+#endif
+  reset();
+  set_size(0);
 }
 
-template <class Chunk>
-Chunk* FreeList<Chunk>::get_chunk_at_head() {
+template <class Chunk_t>
+Chunk_t* FreeList<Chunk_t>::get_chunk_at_head() {
   assert_proper_lock_protection();
   assert(head() == NULL || head()->prev() == NULL, "list invariant");
   assert(tail() == NULL || tail()->next() == NULL, "list invariant");
-  Chunk* fc = head();
+  Chunk_t* fc = head();
   if (fc != NULL) {
-    Chunk* nextFC = fc->next();
+    Chunk_t* nextFC = fc->next();
     if (nextFC != NULL) {
       // The chunk fc being removed has a "next".  Set the "next" to the
       // "prev" of fc.
@@ -197,11 +213,6 @@
     link_tail(chunk);
   }
   increment_count(); // of # of chunks in list
-  DEBUG_ONLY(
-    if (record_return) {
-      increment_returned_bytes_by(size()*HeapWordSize);
-    }
-  )
   assert(head() == NULL || head()->prev() == NULL, "list invariant");
   assert(tail() == NULL || tail()->next() == NULL, "list invariant");
   assert(head() == NULL || head()->size() == size(), "wrong item on list");
@@ -233,11 +244,6 @@
   }
   link_tail(chunk);
   increment_count();  // of # of chunks in list
-  DEBUG_ONLY(
-    if (record_return) {
-      increment_returned_bytes_by(size()*HeapWordSize);
-    }
-  )
   assert(head() == NULL || head()->prev() == NULL, "list invariant");
   assert(tail() == NULL || tail()->next() == NULL, "list invariant");
   assert(head() == NULL || head()->size() == size(), "wrong item on list");
@@ -273,7 +279,7 @@
   }
 }
 
-// verify_chunk_in_free_list() is used to verify that an item is in this free list.
+// verify_chunk_in_free_lists() is used to verify that an item is in this free list.
 // It is used as a debugging aid.
 template <class Chunk>
 bool FreeList<Chunk>::verify_chunk_in_free_list(Chunk* fc) const {
@@ -294,40 +300,14 @@
 
 #ifndef PRODUCT
 template <class Chunk>
-void FreeList<Chunk>::verify_stats() const {
-  // The +1 of the LH comparand is to allow some "looseness" in
-  // checking: we usually call this interface when adding a block
-  // and we'll subsequently update the stats; we cannot update the
-  // stats beforehand because in the case of the large-block BT
-  // dictionary for example, this might be the first block and
-  // in that case there would be no place that we could record
-  // the stats (which are kept in the block itself).
-  assert((_allocation_stats.prev_sweep() + _allocation_stats.split_births()
-          + _allocation_stats.coal_births() + 1)   // Total Production Stock + 1
-         >= (_allocation_stats.split_deaths() + _allocation_stats.coal_deaths()
-             + (ssize_t)count()),                // Total Current Stock + depletion
-         err_msg("FreeList " PTR_FORMAT " of size " SIZE_FORMAT
-                 " violates Conservation Principle: "
-                 "prev_sweep(" SIZE_FORMAT ")"
-                 " + split_births(" SIZE_FORMAT ")"
-                 " + coal_births(" SIZE_FORMAT ") + 1 >= "
-                 " split_deaths(" SIZE_FORMAT ")"
-                 " coal_deaths(" SIZE_FORMAT ")"
-                 " + count(" SSIZE_FORMAT ")",
-                 this, _size, _allocation_stats.prev_sweep(), _allocation_stats.split_births(),
-                 _allocation_stats.split_births(), _allocation_stats.split_deaths(),
-                 _allocation_stats.coal_deaths(), count()));
-}
-
-template <class Chunk>
 void FreeList<Chunk>::assert_proper_lock_protection_work() const {
-  assert(_protecting_lock != NULL, "Don't call this directly");
+  assert(protecting_lock() != NULL, "Don't call this directly");
   assert(ParallelGCThreads > 0, "Don't call this directly");
   Thread* thr = Thread::current();
   if (thr->is_VM_thread() || thr->is_ConcurrentGC_thread()) {
     // assert that we are holding the freelist lock
   } else if (thr->is_GC_task_thread()) {
-    assert(_protecting_lock->owned_by_self(), "FreeList RACE DETECTED");
+    assert(protecting_lock()->owned_by_self(), "FreeList RACE DETECTED");
   } else if (thr->is_Java_thread()) {
     assert(!SafepointSynchronize::is_at_safepoint(), "Should not be executing");
   } else {
@@ -350,21 +330,17 @@
 // to the call is a non-null string, it is printed in the first column;
 // otherwise, if the argument is null (the default), then the size of the
 // (free list) block is printed in the first column.
-template <class Chunk>
-void FreeList<Chunk>::print_on(outputStream* st, const char* c) const {
+template <class Chunk_t>
+void FreeList<Chunk_t>::print_on(outputStream* st, const char* c) const {
   if (c != NULL) {
     st->print("%16s", c);
   } else {
     st->print(SIZE_FORMAT_W(16), size());
   }
-  st->print("\t"
-           SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t"
-           SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\n",
-           bfr_surp(),             surplus(),             desired(),             prev_sweep(),           before_sweep(),
-           count(),               coal_births(),          coal_deaths(),          split_births(),         split_deaths());
 }
 
+template class FreeList<Metablock>;
+template class FreeList<Metachunk>;
 #ifndef SERIALGC
-// Needs to be after the definitions have been seen.
 template class FreeList<FreeChunk>;
 #endif // SERIALGC
--- a/src/share/vm/memory/freeList.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/memory/freeList.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -40,23 +40,19 @@
 // for that implementation.
 
 class Mutex;
-template <class Chunk> class TreeList;
-template <class Chunk> class PrintTreeCensusClosure;
 
-template <class Chunk>
+template <class Chunk_t>
 class FreeList VALUE_OBJ_CLASS_SPEC {
   friend class CompactibleFreeListSpace;
   friend class VMStructs;
-  friend class PrintTreeCensusClosure<Chunk>;
 
  private:
-  Chunk*        _head;          // Head of list of free chunks
-  Chunk*        _tail;          // Tail of list of free chunks
+  Chunk_t*      _head;          // Head of list of free chunks
+  Chunk_t*      _tail;          // Tail of list of free chunks
   size_t        _size;          // Size in Heap words of each chunk
   ssize_t       _count;         // Number of entries in list
-  size_t        _hint;          // next larger size list with a positive surplus
 
-  AllocationStats _allocation_stats; // allocation-related statistics
+ protected:
 
 #ifdef ASSERT
   Mutex*        _protecting_lock;
@@ -71,10 +67,6 @@
 #endif
   }
 
-  // Initialize the allocation statistics.
- protected:
-  void init_statistics(bool split_birth = false);
-  void set_count(ssize_t v) { _count = v;}
   void increment_count()    {
     _count++;
   }
@@ -89,52 +81,48 @@
   // Construct a list without any entries.
   FreeList();
   // Construct a list with "fc" as the first (and lone) entry in the list.
-  FreeList(Chunk* fc);
+  FreeList(Chunk_t* fc);
 
-  // Reset the head, tail, hint, and count of a free list.
-  void reset(size_t hint);
+  // Do initialization
+  void initialize();
+
+  // Reset the head, tail, and count of a free list.
+  void reset();
 
   // Declare the current free list to be protected by the given lock.
 #ifdef ASSERT
-  void set_protecting_lock(Mutex* protecting_lock) {
-    _protecting_lock = protecting_lock;
+  Mutex* protecting_lock() const { return _protecting_lock; }
+  void set_protecting_lock(Mutex* v) {
+    _protecting_lock = v;
   }
 #endif
 
   // Accessors.
-  Chunk* head() const {
+  Chunk_t* head() const {
     assert_proper_lock_protection();
     return _head;
   }
-  void set_head(Chunk* v) {
+  void set_head(Chunk_t* v) {
     assert_proper_lock_protection();
     _head = v;
     assert(!_head || _head->size() == _size, "bad chunk size");
   }
   // Set the head of the list and set the prev field of non-null
   // values to NULL.
-  void link_head(Chunk* v) {
-    assert_proper_lock_protection();
-    set_head(v);
-    // If this method is not used (just set the head instead),
-    // this check can be avoided.
-    if (v != NULL) {
-      v->link_prev(NULL);
-    }
-  }
+  void link_head(Chunk_t* v);
 
-  Chunk* tail() const {
+  Chunk_t* tail() const {
     assert_proper_lock_protection();
     return _tail;
   }
-  void set_tail(Chunk* v) {
+  void set_tail(Chunk_t* v) {
     assert_proper_lock_protection();
     _tail = v;
     assert(!_tail || _tail->size() == _size, "bad chunk size");
   }
   // Set the tail of the list and set the next field of non-null
   // values to NULL.
-  void link_tail(Chunk* v) {
+  void link_tail(Chunk_t* v) {
     assert_proper_lock_protection();
     set_tail(v);
     if (v != NULL) {
@@ -152,174 +140,45 @@
     assert_proper_lock_protection();
     _size = v;
   }
-  ssize_t count() const {
-    return _count;
-  }
-  size_t hint() const {
-    return _hint;
-  }
-  void set_hint(size_t v) {
-    assert_proper_lock_protection();
-    assert(v == 0 || _size < v, "Bad hint"); _hint = v;
-  }
-
-  // Accessors for statistics
-  AllocationStats* allocation_stats() {
-    assert_proper_lock_protection();
-    return &_allocation_stats;
-  }
-
-  ssize_t desired() const {
-    return _allocation_stats.desired();
-  }
-  void set_desired(ssize_t v) {
-    assert_proper_lock_protection();
-    _allocation_stats.set_desired(v);
-  }
-  void compute_desired(float inter_sweep_current,
-                       float inter_sweep_estimate,
-                       float intra_sweep_estimate) {
-    assert_proper_lock_protection();
-    _allocation_stats.compute_desired(_count,
-                                      inter_sweep_current,
-                                      inter_sweep_estimate,
-                                      intra_sweep_estimate);
-  }
-  ssize_t coal_desired() const {
-    return _allocation_stats.coal_desired();
-  }
-  void set_coal_desired(ssize_t v) {
-    assert_proper_lock_protection();
-    _allocation_stats.set_coal_desired(v);
-  }
-
-  ssize_t surplus() const {
-    return _allocation_stats.surplus();
-  }
-  void set_surplus(ssize_t v) {
-    assert_proper_lock_protection();
-    _allocation_stats.set_surplus(v);
-  }
-  void increment_surplus() {
-    assert_proper_lock_protection();
-    _allocation_stats.increment_surplus();
-  }
-  void decrement_surplus() {
-    assert_proper_lock_protection();
-    _allocation_stats.decrement_surplus();
-  }
+  ssize_t count() const { return _count; }
+  void set_count(ssize_t v) { _count = v;}
 
-  ssize_t bfr_surp() const {
-    return _allocation_stats.bfr_surp();
-  }
-  void set_bfr_surp(ssize_t v) {
-    assert_proper_lock_protection();
-    _allocation_stats.set_bfr_surp(v);
-  }
-  ssize_t prev_sweep() const {
-    return _allocation_stats.prev_sweep();
-  }
-  void set_prev_sweep(ssize_t v) {
-    assert_proper_lock_protection();
-    _allocation_stats.set_prev_sweep(v);
-  }
-  ssize_t before_sweep() const {
-    return _allocation_stats.before_sweep();
-  }
-  void set_before_sweep(ssize_t v) {
-    assert_proper_lock_protection();
-    _allocation_stats.set_before_sweep(v);
-  }
-
-  ssize_t coal_births() const {
-    return _allocation_stats.coal_births();
-  }
-  void set_coal_births(ssize_t v) {
-    assert_proper_lock_protection();
-    _allocation_stats.set_coal_births(v);
-  }
-  void increment_coal_births() {
-    assert_proper_lock_protection();
-    _allocation_stats.increment_coal_births();
-  }
+  size_t get_better_size() { return size(); }
 
-  ssize_t coal_deaths() const {
-    return _allocation_stats.coal_deaths();
-  }
-  void set_coal_deaths(ssize_t v) {
-    assert_proper_lock_protection();
-    _allocation_stats.set_coal_deaths(v);
-  }
-  void increment_coal_deaths() {
-    assert_proper_lock_protection();
-    _allocation_stats.increment_coal_deaths();
-  }
-
-  ssize_t split_births() const {
-    return _allocation_stats.split_births();
-  }
-  void set_split_births(ssize_t v) {
-    assert_proper_lock_protection();
-    _allocation_stats.set_split_births(v);
-  }
-  void increment_split_births() {
-    assert_proper_lock_protection();
-    _allocation_stats.increment_split_births();
-  }
-
-  ssize_t split_deaths() const {
-    return _allocation_stats.split_deaths();
-  }
-  void set_split_deaths(ssize_t v) {
-    assert_proper_lock_protection();
-    _allocation_stats.set_split_deaths(v);
-  }
-  void increment_split_deaths() {
-    assert_proper_lock_protection();
-    _allocation_stats.increment_split_deaths();
-  }
-
-  NOT_PRODUCT(
-    // For debugging.  The "_returned_bytes" in all the lists are summed
-    // and compared with the total number of bytes swept during a
-    // collection.
-    size_t returned_bytes() const { return _allocation_stats.returned_bytes(); }
-    void set_returned_bytes(size_t v) { _allocation_stats.set_returned_bytes(v); }
-    void increment_returned_bytes_by(size_t v) {
-      _allocation_stats.set_returned_bytes(_allocation_stats.returned_bytes() + v);
-    }
-  )
+  size_t returned_bytes() const { ShouldNotReachHere(); return 0; }
+  void set_returned_bytes(size_t v) {}
+  void increment_returned_bytes_by(size_t v) {}
 
   // Unlink head of list and return it.  Returns NULL if
   // the list is empty.
-  Chunk* get_chunk_at_head();
+  Chunk_t* get_chunk_at_head();
 
   // Remove the first "n" or "count", whichever is smaller, chunks from the
   // list, setting "fl", which is required to be empty, to point to them.
-  void getFirstNChunksFromList(size_t n, FreeList<Chunk>* fl);
+  void getFirstNChunksFromList(size_t n, FreeList<Chunk_t>* fl);
 
   // Unlink this chunk from it's free list
-  void remove_chunk(Chunk* fc);
+  void remove_chunk(Chunk_t* fc);
 
   // Add this chunk to this free list.
-  void return_chunk_at_head(Chunk* fc);
-  void return_chunk_at_tail(Chunk* fc);
+  void return_chunk_at_head(Chunk_t* fc);
+  void return_chunk_at_tail(Chunk_t* fc);
 
   // Similar to returnChunk* but also records some diagnostic
   // information.
-  void return_chunk_at_head(Chunk* fc, bool record_return);
-  void return_chunk_at_tail(Chunk* fc, bool record_return);
+  void return_chunk_at_head(Chunk_t* fc, bool record_return);
+  void return_chunk_at_tail(Chunk_t* fc, bool record_return);
 
   // Prepend "fl" (whose size is required to be the same as that of "this")
   // to the front of "this" list.
-  void prepend(FreeList<Chunk>* fl);
+  void prepend(FreeList<Chunk_t>* fl);
 
   // Verify that the chunk is in the list.
   // found.  Return NULL if "fc" is not found.
-  bool verify_chunk_in_free_list(Chunk* fc) const;
+  bool verify_chunk_in_free_list(Chunk_t* fc) const;
 
   // Stats verification
-  void verify_stats() const PRODUCT_RETURN;
+//  void verify_stats() const { ShouldNotReachHere(); };
 
   // Printing support
   static void print_labels_on(outputStream* st, const char* c);
--- a/src/share/vm/memory/gcLocker.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/memory/gcLocker.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -29,21 +29,18 @@
 #include "memory/genCollectedHeap.hpp"
 #include "memory/universe.hpp"
 #include "oops/oop.hpp"
+#include "runtime/thread.inline.hpp"
 #ifdef TARGET_OS_FAMILY_linux
 # include "os_linux.inline.hpp"
-# include "thread_linux.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_solaris
 # include "os_solaris.inline.hpp"
-# include "thread_solaris.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_windows
 # include "os_windows.inline.hpp"
-# include "thread_windows.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_bsd
 # include "os_bsd.inline.hpp"
-# include "thread_bsd.inline.hpp"
 #endif
 
 // The direct lock/unlock calls do not force a collection if an unlock
--- a/src/share/vm/memory/genMarkSweep.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/memory/genMarkSweep.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -43,21 +43,10 @@
 #include "runtime/fprofiler.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/synchronizer.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/vmThread.hpp"
 #include "utilities/copy.hpp"
 #include "utilities/events.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 
 void GenMarkSweep::invoke_at_safepoint(int level, ReferenceProcessor* rp,
   bool clear_all_softrefs) {
@@ -111,21 +100,8 @@
 
   mark_sweep_phase3(level);
 
-  VALIDATE_MARK_SWEEP_ONLY(
-    if (ValidateMarkSweep) {
-      guarantee(_root_refs_stack->length() == 0, "should be empty by now");
-    }
-  )
-
   mark_sweep_phase4();
 
-  VALIDATE_MARK_SWEEP_ONLY(
-    if (ValidateMarkSweep) {
-      guarantee(_live_oops->length() == _live_oops_moved_to->length(),
-                "should be the same size");
-    }
-  )
-
   restore_marks();
 
   // Set saved marks for allocation profiler (and other things? -- dld)
@@ -198,31 +174,6 @@
 
   _preserved_marks = (PreservedMark*)scratch;
   _preserved_count = 0;
-
-#ifdef VALIDATE_MARK_SWEEP
-  if (ValidateMarkSweep) {
-    _root_refs_stack    = new (ResourceObj::C_HEAP, mtGC) GrowableArray<void*>(100, true);
-    _other_refs_stack   = new (ResourceObj::C_HEAP, mtGC) GrowableArray<void*>(100, true);
-    _adjusted_pointers  = new (ResourceObj::C_HEAP, mtGC) GrowableArray<void*>(100, true);
-    _live_oops          = new (ResourceObj::C_HEAP, mtGC) GrowableArray<oop>(100, true);
-    _live_oops_moved_to = new (ResourceObj::C_HEAP, mtGC) GrowableArray<oop>(100, true);
-    _live_oops_size     = new (ResourceObj::C_HEAP, mtGC) GrowableArray<size_t>(100, true);
-  }
-  if (RecordMarkSweepCompaction) {
-    if (_cur_gc_live_oops == NULL) {
-      _cur_gc_live_oops           = new(ResourceObj::C_HEAP, mtGC) GrowableArray<HeapWord*>(100, true);
-      _cur_gc_live_oops_moved_to  = new(ResourceObj::C_HEAP, mtGC) GrowableArray<HeapWord*>(100, true);
-      _cur_gc_live_oops_size      = new(ResourceObj::C_HEAP, mtGC) GrowableArray<size_t>(100, true);
-      _last_gc_live_oops          = new(ResourceObj::C_HEAP, mtGC) GrowableArray<HeapWord*>(100, true);
-      _last_gc_live_oops_moved_to = new(ResourceObj::C_HEAP, mtGC) GrowableArray<HeapWord*>(100, true);
-      _last_gc_live_oops_size     = new(ResourceObj::C_HEAP, mtGC) GrowableArray<size_t>(100, true);
-    } else {
-      _cur_gc_live_oops->clear();
-      _cur_gc_live_oops_moved_to->clear();
-      _cur_gc_live_oops_size->clear();
-    }
-  }
-#endif
 }
 
 
@@ -236,19 +187,6 @@
   _preserved_oop_stack.clear(true);
   _marking_stack.clear();
   _objarray_stack.clear(true);
-
-#ifdef VALIDATE_MARK_SWEEP
-  if (ValidateMarkSweep) {
-    delete _root_refs_stack;
-    delete _other_refs_stack;
-    delete _adjusted_pointers;
-    delete _live_oops;
-    delete _live_oops_size;
-    delete _live_oops_moved_to;
-    _live_oops_index = 0;
-    _live_oops_index_at_perm = 0;
-  }
-#endif
 }
 
 void GenMarkSweep::mark_sweep_phase1(int level,
@@ -257,8 +195,6 @@
   TraceTime tm("phase 1", PrintGC && Verbose, true, gclog_or_tty);
   trace(" 1");
 
-  VALIDATE_MARK_SWEEP_ONLY(reset_live_oop_tracking());
-
   GenCollectedHeap* gch = GenCollectedHeap::heap();
 
   // Because follow_root_closure is created statically, cannot
@@ -326,8 +262,6 @@
   TraceTime tm("phase 2", PrintGC && Verbose, true, gclog_or_tty);
   trace("2");
 
-  VALIDATE_MARK_SWEEP_ONLY(reset_live_oop_tracking());
-
   gch->prepare_for_compaction();
 }
 
@@ -348,8 +282,6 @@
   // Need new claim bits for the pointer adjustment tracing.
   ClassLoaderDataGraph::clear_claimed_marks();
 
-  VALIDATE_MARK_SWEEP_ONLY(reset_live_oop_tracking());
-
   // Because the two closures below are created statically, cannot
   // use OopsInGenClosure constructor which takes a generation,
   // as the Universe has not been created when the static constructors
@@ -404,10 +336,6 @@
   TraceTime tm("phase 4", PrintGC && Verbose, true, gclog_or_tty);
   trace("4");
 
-  VALIDATE_MARK_SWEEP_ONLY(reset_live_oop_tracking());
-
   GenCompactClosure blk;
   gch->generation_iterate(&blk, true);
-
-  VALIDATE_MARK_SWEEP_ONLY(compaction_complete());
 }
--- a/src/share/vm/memory/iterator.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/memory/iterator.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -30,6 +30,10 @@
   k->oops_do(_oop_closure);
 }
 
+void CLDToOopClosure::do_cld(ClassLoaderData* cld) {
+  cld->oops_do(_oop_closure, &_klass_closure, _must_claim_cld);
+}
+
 void ObjectToOopClosure::do_object(oop obj) {
   obj->oop_iterate(_cl);
 }
--- a/src/share/vm/memory/iterator.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/memory/iterator.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -135,6 +135,20 @@
   virtual void do_klass(Klass* k);
 };
 
+class CLDToOopClosure {
+  OopClosure* _oop_closure;
+  KlassToOopClosure _klass_closure;
+  bool _must_claim_cld;
+
+ public:
+  CLDToOopClosure(OopClosure* oop_closure, bool must_claim_cld = true) :
+      _oop_closure(oop_closure),
+      _klass_closure(oop_closure),
+      _must_claim_cld(must_claim_cld) {}
+
+  void do_cld(ClassLoaderData* cld);
+};
+
 // ObjectClosure is used for iterating through an object space
 
 class ObjectClosure : public Closure {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/memory/metablock.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "memory/allocation.hpp"
+#include "memory/metablock.hpp"
+#include "utilities/copy.hpp"
+#include "utilities/debug.hpp"
+
+// Blocks of space for metadata are allocated out of Metachunks.
+//
+// Metachunk are allocated out of MetadataVirtualspaces and once
+// allocated there is no explicit link between a Metachunk and
+// the MetadataVirtualspaces from which it was allocated.
+//
+// Each SpaceManager maintains a
+// list of the chunks it is using and the current chunk.  The current
+// chunk is the chunk from which allocations are done.  Space freed in
+// a chunk is placed on the free list of blocks (BlockFreelist) and
+// reused from there.
+//
+// Future modification
+//
+// The Metachunk can conceivable be replaced by the Chunk in
+// allocation.hpp.  Note that the latter Chunk is the space for
+// allocation (allocations from the chunk are out of the space in
+// the Chunk after the header for the Chunk) where as Metachunks
+// point to space in a VirtualSpace.  To replace Metachunks with
+// Chunks, change Chunks so that they can be allocated out of a VirtualSpace.
+size_t Metablock::_min_block_byte_size = sizeof(Metablock);
+
+#ifdef ASSERT
+size_t Metablock::_overhead =
+  Chunk::aligned_overhead_size(sizeof(Metablock)) / BytesPerWord;
+#else
+size_t Metablock::_overhead = 0;
+#endif
+
+// New blocks returned by the Metaspace are zero initialized.
+// We should fix the constructors to not assume this instead.
+Metablock* Metablock::initialize(MetaWord* p, size_t word_size) {
+  if (p == NULL) {
+    return NULL;
+  }
+
+  Metablock* result = (Metablock*) p;
+
+  // Clear the memory
+  Copy::fill_to_aligned_words((HeapWord*)result, word_size);
+#ifdef ASSERT
+  result->set_word_size(word_size);
+#endif
+  return result;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/memory/metablock.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+#ifndef SHARE_VM_MEMORY_METABLOCK_HPP
+#define SHARE_VM_MEMORY_METABLOCK_HPP
+
+// Metablock are the unit of allocation from a Chunk.  It is initialized
+// with the size of the requested allocation.  That size is overwritten
+// once the allocation returns.
+//
+// A Metablock may be reused by its SpaceManager but are never moved between
+// SpaceManagers.  There is no explicit link to the Metachunk
+// from which it was allocated.  Metablock may be deallocated and
+// put on a freelist but the space is never freed, rather
+// the Metachunk it is a part of will be deallocated when it's
+// associated class loader is collected.
+
+class Metablock VALUE_OBJ_CLASS_SPEC {
+  friend class VMStructs;
+ private:
+  // Used to align the allocation (see below).
+  union block_t {
+    void* _data[3];
+    struct header_t {
+      size_t _word_size;
+      Metablock* _next;
+      Metablock* _prev;
+    } _header;
+  } _block;
+  static size_t _min_block_byte_size;
+  static size_t _overhead;
+
+  typedef union block_t Block;
+  typedef struct header_t Header;
+  const Block* block() const { return &_block; }
+  const Block::header_t* header() const { return &(block()->_header); }
+ public:
+
+  static Metablock* initialize(MetaWord* p, size_t word_size);
+
+  // This places the body of the block at a 2 word boundary
+  // because every block starts on a 2 word boundary.  Work out
+  // how to make the body on a 2 word boundary if the block
+  // starts on a arbitrary boundary.  JJJ
+
+  size_t word_size() const  { return header()->_word_size; }
+  void set_word_size(size_t v) { _block._header._word_size = v; }
+  size_t size() const volatile { return _block._header._word_size; }
+  void set_size(size_t v) { _block._header._word_size = v; }
+  Metablock* next() const { return header()->_next; }
+  void set_next(Metablock* v) { _block._header._next = v; }
+  Metablock* prev() const { return header()->_prev; }
+  void set_prev(Metablock* v) { _block._header._prev = v; }
+
+  static size_t min_block_byte_size() { return _min_block_byte_size; }
+  static size_t overhead() { return _overhead; }
+
+  bool is_free()                 { return header()->_word_size != 0; }
+  void clear_next()              { set_next(NULL); }
+  void link_prev(Metablock* ptr) { set_prev(ptr); }
+  uintptr_t* end()              { return ((uintptr_t*) this) + size(); }
+  bool cantCoalesce() const     { return false; }
+  void link_next(Metablock* ptr) { set_next(ptr); }
+  void link_after(Metablock* ptr){
+    link_next(ptr);
+    if (ptr != NULL) ptr->link_prev(this);
+  }
+
+  // Should not be needed in a free list of Metablocks
+  void markNotFree()            { ShouldNotReachHere(); }
+
+  // Debug support
+#ifdef ASSERT
+  void* prev_addr() const { return (void*)&_block._header._prev; }
+  void* next_addr() const { return (void*)&_block._header._next; }
+  void* size_addr() const { return (void*)&_block._header._word_size; }
+#endif
+  bool verify_chunk_in_free_list(Metablock* tc) const { return true; }
+  bool verify_par_locked() { return true; }
+
+  void assert_is_mangled() const {/* Don't check "\*/}
+};
+#endif // SHARE_VM_MEMORY_METABLOCK_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/memory/metachunk.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "memory/allocation.hpp"
+#include "memory/metachunk.hpp"
+#include "utilities/copy.hpp"
+#include "utilities/debug.hpp"
+
+//
+// Future modification
+//
+// The Metachunk can conceivable be replaced by the Chunk in
+// allocation.hpp.  Note that the latter Chunk is the space for
+// allocation (allocations from the chunk are out of the space in
+// the Chunk after the header for the Chunk) where as Metachunks
+// point to space in a VirtualSpace.  To replace Metachunks with
+// Chunks, change Chunks so that they can be allocated out of a VirtualSpace.
+
+const size_t metadata_chunk_initialize = 0xf7f7f7f7;
+
+size_t Metachunk::_overhead =
+  Chunk::aligned_overhead_size(sizeof(Metachunk)) / BytesPerWord;
+
+// Metachunk methods
+
+Metachunk* Metachunk::initialize(MetaWord* ptr, size_t word_size) {
+  // Set bottom, top, and end.  Allow space for the Metachunk itself
+  Metachunk* chunk = (Metachunk*) ptr;
+
+  MetaWord* chunk_bottom = ptr + _overhead;
+  chunk->set_bottom(ptr);
+  chunk->set_top(chunk_bottom);
+  MetaWord* chunk_end = ptr + word_size;
+  assert(chunk_end > chunk_bottom, "Chunk must be too small");
+  chunk->set_end(chunk_end);
+  chunk->set_next(NULL);
+  chunk->set_prev(NULL);
+  chunk->set_word_size(word_size);
+#ifdef ASSERT
+  size_t data_word_size = pointer_delta(chunk_end, chunk_bottom, sizeof(MetaWord));
+  Copy::fill_to_words((HeapWord*) chunk_bottom, data_word_size, metadata_chunk_initialize);
+#endif
+  return chunk;
+}
+
+
+MetaWord* Metachunk::allocate(size_t word_size) {
+  MetaWord* result = NULL;
+  // If available, bump the pointer to allocate.
+  if (free_word_size() >= word_size) {
+    result = _top;
+    _top = _top + word_size;
+  }
+  return result;
+}
+
+// _bottom points to the start of the chunk including the overhead.
+size_t Metachunk::used_word_size() const {
+  return pointer_delta(_top, _bottom, sizeof(MetaWord));
+}
+
+size_t Metachunk::free_word_size() const {
+  return pointer_delta(_end, _top, sizeof(MetaWord));
+}
+
+size_t Metachunk::capacity_word_size() const {
+  return pointer_delta(_end, _bottom, sizeof(MetaWord));
+}
+
+void Metachunk::print_on(outputStream* st) const {
+  st->print_cr("Metachunk:"
+               " bottom " PTR_FORMAT " top " PTR_FORMAT
+               " end " PTR_FORMAT " size " SIZE_FORMAT,
+               bottom(), top(), end(), word_size());
+  if (Verbose) {
+    st->print_cr("    used " SIZE_FORMAT " free " SIZE_FORMAT,
+                 used_word_size(), free_word_size());
+  }
+}
+
+#ifndef PRODUCT
+void Metachunk::mangle() {
+  // Mangle the payload of the chunk and not the links that
+  // maintain list of chunks.
+  HeapWord* start = (HeapWord*)(bottom() + overhead());
+  size_t word_size = capacity_word_size() - overhead();
+  Copy::fill_to_words(start, word_size, metadata_chunk_initialize);
+}
+#endif // PRODUCT
+
+void Metachunk::verify() {
+#ifdef ASSERT
+  // Cannot walk through the blocks unless the blocks have
+  // headers with sizes.
+  assert(_bottom <= _top &&
+         _top <= _end,
+         "Chunk has been smashed");
+#endif
+  return;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/memory/metachunk.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+#ifndef SHARE_VM_MEMORY_METACHUNK_HPP
+#define SHARE_VM_MEMORY_METACHUNK_HPP
+
+//  Metachunk - Quantum of allocation from a Virtualspace
+//    Metachunks are reused (when freed are put on a global freelist) and
+//    have no permanent association to a SpaceManager.
+
+//            +--------------+ <- end
+//            |              |          --+       ---+
+//            |              |            | free     |
+//            |              |            |          |
+//            |              |            |          | capacity
+//            |              |            |          |
+//            |              | <- top   --+          |
+//            |              |           ---+        |
+//            |              |              | used   |
+//            |              |              |        |
+//            |              |              |        |
+//            +--------------+ <- bottom ---+     ---+
+
+class Metachunk VALUE_OBJ_CLASS_SPEC {
+  // link to support lists of chunks
+  Metachunk* _next;
+  Metachunk* _prev;
+
+  MetaWord* _bottom;
+  MetaWord* _end;
+  MetaWord* _top;
+  size_t _word_size;
+  // Used in a guarantee() so included in the Product builds
+  // even through it is only for debugging.
+  bool _is_free;
+
+  // Metachunks are allocated out of a MetadataVirtualSpace and
+  // and use some of its space to describe itself (plus alignment
+  // considerations).  Metadata is allocated in the rest of the chunk.
+  // This size is the overhead of maintaining the Metachunk within
+  // the space.
+  static size_t _overhead;
+
+  void set_bottom(MetaWord* v) { _bottom = v; }
+  void set_end(MetaWord* v) { _end = v; }
+  void set_top(MetaWord* v) { _top = v; }
+  void set_word_size(size_t v) { _word_size = v; }
+ public:
+#ifdef ASSERT
+  Metachunk() : _bottom(NULL), _end(NULL), _top(NULL), _is_free(false),
+    _next(NULL), _prev(NULL) {}
+#else
+  Metachunk() : _bottom(NULL), _end(NULL), _top(NULL),
+    _next(NULL), _prev(NULL) {}
+#endif
+
+  // Used to add a Metachunk to a list of Metachunks
+  void set_next(Metachunk* v) { _next = v; assert(v != this, "Boom");}
+  void set_prev(Metachunk* v) { _prev = v; assert(v != this, "Boom");}
+
+  MetaWord* allocate(size_t word_size);
+  static Metachunk* initialize(MetaWord* ptr, size_t word_size);
+
+  // Accessors
+  Metachunk* next() const { return _next; }
+  Metachunk* prev() const { return _prev; }
+  MetaWord* bottom() const { return _bottom; }
+  MetaWord* end() const { return _end; }
+  MetaWord* top() const { return _top; }
+  size_t word_size() const { return _word_size; }
+  size_t size() const volatile { return _word_size; }
+  void set_size(size_t v) { _word_size = v; }
+  bool is_free() { return _is_free; }
+  void set_is_free(bool v) { _is_free = v; }
+  static size_t overhead() { return _overhead; }
+  void clear_next()              { set_next(NULL); }
+  void link_prev(Metachunk* ptr) { set_prev(ptr); }
+  uintptr_t* end()              { return ((uintptr_t*) this) + size(); }
+  bool cantCoalesce() const     { return false; }
+  void link_next(Metachunk* ptr) { set_next(ptr); }
+  void link_after(Metachunk* ptr){
+    link_next(ptr);
+    if (ptr != NULL) ptr->link_prev(this);
+  }
+
+  // Reset top to bottom so chunk can be reused.
+  void reset_empty() { _top = (_bottom + _overhead); _next = NULL; _prev = NULL; }
+  bool is_empty() { return _top == (_bottom + _overhead); }
+
+  // used (has been allocated)
+  // free (available for future allocations)
+  // capacity (total size of chunk)
+  size_t used_word_size() const;
+  size_t free_word_size() const;
+  size_t capacity_word_size()const;
+
+  // Debug support
+#ifdef ASSERT
+  void* prev_addr() const { return (void*)&_prev; }
+  void* next_addr() const { return (void*)&_next; }
+  void* size_addr() const { return (void*)&_word_size; }
+#endif
+  bool verify_chunk_in_free_list(Metachunk* tc) const { return true; }
+  bool verify_par_locked() { return true; }
+
+  void assert_is_mangled() const {/* Don't check "\*/}
+
+  NOT_PRODUCT(void mangle();)
+
+  void print_on(outputStream* st) const;
+  void verify();
+};
+#endif  // SHARE_VM_MEMORY_METACHUNK_HPP
--- a/src/share/vm/memory/metadataFactory.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/memory/metadataFactory.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -66,7 +66,11 @@
     if (data != NULL) {
       assert(loader_data != NULL, "shouldn't pass null");
       int size = data->size();
-      loader_data->metaspace_non_null()->deallocate((MetaWord*)data, size, false);
+      if (DumpSharedSpaces) {
+        loader_data->ro_metaspace()->deallocate((MetaWord*)data, size, false);
+      } else {
+        loader_data->metaspace_non_null()->deallocate((MetaWord*)data, size, false);
+      }
     }
   }
 
@@ -77,6 +81,7 @@
       assert(loader_data != NULL, "shouldn't pass null");
       int size = md->size();
       // Call metadata's deallocate function which will call deallocate fields
+      assert(!DumpSharedSpaces, "cannot deallocate metadata when dumping CDS archive");
       assert(!md->on_stack(), "can't deallocate things on stack");
       md->deallocate_contents(loader_data);
       loader_data->metaspace_non_null()->deallocate((MetaWord*)md, size, md->is_klass());
--- a/src/share/vm/memory/metaspace.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/memory/metaspace.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -24,49 +24,61 @@
 #include "precompiled.hpp"
 #include "gc_interface/collectedHeap.hpp"
 #include "memory/binaryTreeDictionary.hpp"
+#include "memory/freeList.hpp"
 #include "memory/collectorPolicy.hpp"
 #include "memory/filemap.hpp"
 #include "memory/freeList.hpp"
+#include "memory/metablock.hpp"
+#include "memory/metachunk.hpp"
 #include "memory/metaspace.hpp"
 #include "memory/metaspaceShared.hpp"
 #include "memory/resourceArea.hpp"
 #include "memory/universe.hpp"
 #include "runtime/globals.hpp"
 #include "runtime/mutex.hpp"
+#include "runtime/orderAccess.hpp"
 #include "services/memTracker.hpp"
 #include "utilities/copy.hpp"
 #include "utilities/debug.hpp"
 
-// Define this macro to deallocate Metablock.  If not defined,
-// blocks are not yet deallocated and are only mangled.
-#undef DEALLOCATE_BLOCKS
-
-// Easily recognizable patterns
-// These patterns can be the same in 32bit or 64bit since
-// they only have to be easily recognizable.
-const void* metaspace_allocation_leader = (void*) 0X11111111;
-const void* metaspace_allocation_trailer = (void*) 0X77777777;
+typedef BinaryTreeDictionary<Metablock, FreeList> BlockTreeDictionary;
+typedef BinaryTreeDictionary<Metachunk, FreeList> ChunkTreeDictionary;
+// Define this macro to enable slow integrity checking of
+// the free chunk lists
+const bool metaspace_slow_verify = false;
+
 
 // Parameters for stress mode testing
 const uint metadata_deallocate_a_lot_block = 10;
 const uint metadata_deallocate_a_lock_chunk = 3;
 size_t const allocation_from_dictionary_limit = 64 * K;
-const size_t metadata_chunk_initialize = 0xf7f7f7f7;
 const size_t metadata_deallocate = 0xf5f5f5f5;
-const size_t metadata_space_manager_allocate = 0xf3f3f3f3;
 
 MetaWord* last_allocated = 0;
 
 // Used in declarations in SpaceManager and ChunkManager
 enum ChunkIndex {
-  SmallIndex = 0,
-  MediumIndex = 1,
-  HumongousIndex = 2,
-  NumberOfFreeLists = 3
+  ZeroIndex = 0,
+  SpecializedIndex = ZeroIndex,
+  SmallIndex = SpecializedIndex + 1,
+  MediumIndex = SmallIndex + 1,
+  HumongousIndex = MediumIndex + 1,
+  NumberOfFreeLists = 3,
+  NumberOfInUseLists = 4
+};
+
+enum ChunkSizes {    // in words.
+  ClassSpecializedChunk = 128,
+  SpecializedChunk = 128,
+  ClassSmallChunk = 256,
+  SmallChunk = 512,
+  ClassMediumChunk = 1 * K,
+  MediumChunk = 8 * K,
+  HumongousChunkGranularity = 8
 };
 
 static ChunkIndex next_chunk_index(ChunkIndex i) {
-  assert(i < NumberOfFreeLists, "Out of bound");
+  assert(i < NumberOfInUseLists, "Out of bound");
   return (ChunkIndex) (i+1);
 }
 
@@ -91,174 +103,6 @@
 // chunk is the chunk from which allocations are done.  Space freed in
 // a chunk is placed on the free list of blocks (BlockFreelist) and
 // reused from there.
-//
-// Future modification
-//
-// The Metachunk can conceivable be replaced by the Chunk in
-// allocation.hpp.  Note that the latter Chunk is the space for
-// allocation (allocations from the chunk are out of the space in
-// the Chunk after the header for the Chunk) where as Metachunks
-// point to space in a VirtualSpace.  To replace Metachunks with
-// Chunks, change Chunks so that they can be allocated out of a VirtualSpace.
-//
-
-// Metablock are the unit of allocation from a Chunk.  It contains
-// the size of the requested allocation in a debug build.
-// Also in a debug build it has a marker before and after the
-// body of the block. The address of the body is the address returned
-// by the allocation.
-//
-// Layout in a debug build.  In a product build only the body is present.
-//
-//     +-----------+-----------+------------+     +-----------+
-//     | word size | leader    | body       | ... | trailer   |
-//     +-----------+-----------+------------+     +-----------+
-//
-// A Metablock may be reused by its SpaceManager but are never moved between
-// SpaceManagers.  There is no explicit link to the Metachunk
-// from which it was allocated.  Metablock are not deallocated, rather
-// the Metachunk it is a part of will be deallocated when it's
-// associated class loader is collected.
-//
-// When the word size of a block is passed in to the deallocation
-// call the word size no longer needs to be part of a Metablock.
-
-class Metablock {
-  friend class VMStructs;
- private:
-  // Used to align the allocation (see below) and for debugging.
-#ifdef ASSERT
-  struct {
-    size_t _word_size;
-    void*  _leader;
-  } _header;
-  void* _data[1];
-#endif
-  static size_t _overhead;
-
-#ifdef ASSERT
-  void set_word_size(size_t v) { _header._word_size = v; }
-  void* leader() { return _header._leader; }
-  void* trailer() {
-    jlong index = (jlong) _header._word_size - sizeof(_header)/BytesPerWord - 1;
-    assert(index > 0, err_msg("Bad indexling of trailer %d", index));
-    void** ptr = &_data[index];
-    return *ptr;
-  }
-  void set_leader(void* v) { _header._leader = v; }
-  void set_trailer(void* v) {
-    void** ptr = &_data[_header._word_size - sizeof(_header)/BytesPerWord - 1];
-    *ptr = v;
-  }
- public:
-  size_t word_size() { return _header._word_size; }
-#endif
- public:
-
-  static Metablock* initialize(MetaWord* p, size_t word_size);
-
-  // This places the body of the block at a 2 word boundary
-  // because every block starts on a 2 word boundary.  Work out
-  // how to make the body on a 2 word boundary if the block
-  // starts on a arbitrary boundary.  JJJ
-
-#ifdef ASSERT
-  MetaWord* data() { return (MetaWord*) &_data[0]; }
-#else
-  MetaWord* data() { return (MetaWord*) this; }
-#endif
-  static Metablock* metablock_from_data(MetaWord* p) {
-#ifdef ASSERT
-    size_t word_offset = offset_of(Metablock, _data)/BytesPerWord;
-    Metablock* result = (Metablock*) (p - word_offset);
-    return result;
-#else
-    return (Metablock*) p;
-#endif
-  }
-
-  static size_t overhead() { return _overhead; }
-  void verify();
-};
-
-//  Metachunk - Quantum of allocation from a Virtualspace
-//    Metachunks are reused (when freed are put on a global freelist) and
-//    have no permanent association to a SpaceManager.
-
-//            +--------------+ <- end
-//            |              |          --+       ---+
-//            |              |            | free     |
-//            |              |            |          |
-//            |              |            |          | capacity
-//            |              |            |          |
-//            |              | <- top   --+          |
-//            |              |           ---+        |
-//            |              |              | used   |
-//            |              |              |        |
-//            |              |              |        |
-//            +--------------+ <- bottom ---+     ---+
-
-class Metachunk VALUE_OBJ_CLASS_SPEC {
-  // link to support lists of chunks
-  Metachunk* _next;
-
-  MetaWord* _bottom;
-  MetaWord* _end;
-  MetaWord* _top;
-  size_t _word_size;
-
-  // Metachunks are allocated out of a MetadataVirtualSpace and
-  // and use some of its space to describe itself (plus alignment
-  // considerations).  Metadata is allocated in the rest of the chunk.
-  // This size is the overhead of maintaining the Metachunk within
-  // the space.
-  static size_t _overhead;
-
-  void set_bottom(MetaWord* v) { _bottom = v; }
-  void set_end(MetaWord* v) { _end = v; }
-  void set_top(MetaWord* v) { _top = v; }
-  void set_word_size(size_t v) { _word_size = v; }
- public:
-
-  // Used to add a Metachunk to a list of Metachunks
-  void set_next(Metachunk* v) { _next = v; assert(v != this, "Boom");}
-
-  Metablock* allocate(size_t word_size);
-  static Metachunk* initialize(MetaWord* ptr, size_t word_size);
-
-  // Accessors
-  Metachunk* next() const { return _next; }
-  MetaWord* bottom() const { return _bottom; }
-  MetaWord* end() const { return _end; }
-  MetaWord* top() const { return _top; }
-  size_t word_size() const { return _word_size; }
-  static size_t overhead() { return _overhead; }
-
-  // Reset top to bottom so chunk can be reused.
-  void reset_empty() { _top = (_bottom + _overhead); }
-  bool is_empty() { return _top == (_bottom + _overhead); }
-
-  // used (has been allocated)
-  // free (available for future allocations)
-  // capacity (total size of chunk)
-  size_t used_word_size();
-  size_t free_word_size();
-  size_t capacity_word_size();
-
-#ifdef ASSERT
-  void mangle() {
-    // Mangle the payload of the chunk and not the links that
-    // maintain list of chunks.
-    HeapWord* start = (HeapWord*)(bottom() + overhead());
-    size_t word_size = capacity_word_size() - overhead();
-    Copy::fill_to_words(start, word_size, metadata_chunk_initialize);
-  }
-#endif // ASSERT
-
-  void print_on(outputStream* st) const;
-  void verify();
-};
-
 
 // Pointer to list of Metachunks.
 class ChunkList VALUE_OBJ_CLASS_SPEC {
@@ -292,7 +136,11 @@
   //   SmallChunk
   //   MediumChunk
   //   HumongousChunk
-  ChunkList _free_chunks[3];
+  ChunkList _free_chunks[NumberOfFreeLists];
+
+
+  //   HumongousChunk
+  ChunkTreeDictionary _humongous_dictionary;
 
   // ChunkManager in all lists of this type
   size_t _free_chunks_total;
@@ -313,7 +161,17 @@
   size_t sum_free_chunks_count();
 
   void locked_verify_free_chunks_total();
+  void slow_locked_verify_free_chunks_total() {
+    if (metaspace_slow_verify) {
+      locked_verify_free_chunks_total();
+    }
+  }
   void locked_verify_free_chunks_count();
+  void slow_locked_verify_free_chunks_count() {
+    if (metaspace_slow_verify) {
+      locked_verify_free_chunks_count();
+    }
+  }
   void verify_free_chunks_count();
 
  public:
@@ -324,6 +182,10 @@
   Metachunk* chunk_freelist_allocate(size_t word_size);
   void chunk_freelist_deallocate(Metachunk* chunk);
 
+  // Map a size to a list index assuming that there are lists
+  // for special, small, medium, and humongous chunks.
+  static ChunkIndex list_index(size_t size);
+
   // Total of the space in the free chunks list
   size_t free_chunks_total();
   size_t free_chunks_total_in_bytes();
@@ -335,9 +197,9 @@
     Atomic::add_ptr(count, &_free_chunks_count);
     Atomic::add_ptr(v, &_free_chunks_total);
   }
-  ChunkList* free_medium_chunks() { return &_free_chunks[1]; }
-  ChunkList* free_small_chunks() { return &_free_chunks[0]; }
-  ChunkList* free_humongous_chunks() { return &_free_chunks[2]; }
+  ChunkTreeDictionary* humongous_dictionary() {
+    return &_humongous_dictionary;
+  }
 
   ChunkList* free_chunks(ChunkIndex index);
 
@@ -351,46 +213,50 @@
 
   // Debug support
   void verify();
+  void slow_verify() {
+    if (metaspace_slow_verify) {
+      verify();
+    }
+  }
   void locked_verify();
+  void slow_locked_verify() {
+    if (metaspace_slow_verify) {
+      locked_verify();
+    }
+  }
   void verify_free_chunks_total();
 
   void locked_print_free_chunks(outputStream* st);
   void locked_print_sum_free_chunks(outputStream* st);
+
+  void print_on(outputStream* st);
 };
 
 
 // Used to manage the free list of Metablocks (a block corresponds
 // to the allocation of a quantum of metadata).
 class BlockFreelist VALUE_OBJ_CLASS_SPEC {
-#ifdef DEALLOCATE_BLOCKS
-  BinaryTreeDictionary<Metablock>* _dictionary;
-#endif
-  static Metablock* initialize_free_chunk(Metablock* block, size_t word_size);
-
-#ifdef DEALLOCATE_BLOCKS
+  BlockTreeDictionary* _dictionary;
+  static Metablock* initialize_free_chunk(MetaWord* p, size_t word_size);
+
   // Accessors
-  BinaryTreeDictionary<Metablock>* dictionary() const { return _dictionary; }
-#endif
+  BlockTreeDictionary* dictionary() const { return _dictionary; }
 
  public:
   BlockFreelist();
   ~BlockFreelist();
 
   // Get and return a block to the free list
-  Metablock* get_block(size_t word_size);
-  void return_block(Metablock* block, size_t word_size);
-
-  size_t totalSize() {
-#ifdef DEALLOCATE_BLOCKS
-    if (dictionary() == NULL) {
-      return 0;
-    } else {
-      return dictionary()->totalSize();
-    }
-#else
+  MetaWord* get_block(size_t word_size);
+  void return_block(MetaWord* p, size_t word_size);
+
+  size_t total_size() {
+  if (dictionary() == NULL) {
     return 0;
-#endif
+  } else {
+    return dictionary()->total_size();
   }
+}
 
   void print_on(outputStream* st) const;
 };
@@ -457,10 +323,12 @@
   bool expand_by(size_t words, bool pre_touch = false);
   bool shrink_by(size_t words);
 
+#ifdef ASSERT
   // Debug support
   static void verify_virtual_space_total();
   static void verify_virtual_space_count();
   void mangle();
+#endif
 
   void print_on(outputStream* st) const;
 };
@@ -547,7 +415,14 @@
   VirtualSpaceList(size_t word_size);
   VirtualSpaceList(ReservedSpace rs);
 
-  Metachunk* get_new_chunk(size_t word_size, size_t grow_chunks_by_words);
+  Metachunk* get_new_chunk(size_t word_size,
+                           size_t grow_chunks_by_words,
+                           size_t medium_chunk_bunch);
+
+  // Get the first chunk for a Metaspace.  Used for
+  // special cases such as the boot class loader, reflection
+  // class loader and anonymous class loader.
+  Metachunk* get_initialization_chunk(size_t word_size, size_t chunk_bunch);
 
   VirtualSpaceNode* current_virtual_space() {
     return _current_virtual_space;
@@ -600,7 +475,6 @@
   };
 };
 
-
 class Metadebug : AllStatic {
   // Debugging support for Metaspaces
   static int _deallocate_block_a_lot_count;
@@ -649,13 +523,17 @@
   friend class Metadebug;
 
  private:
+
   // protects allocations and contains.
   Mutex* const _lock;
 
+  // Chunk related size
+  size_t _medium_chunk_bunch;
+
   // List of chunks in use by this SpaceManager.  Allocations
   // are done from the current chunk.  The list is used for deallocating
   // chunks when the SpaceManager is freed.
-  Metachunk* _chunks_in_use[NumberOfFreeLists];
+  Metachunk* _chunks_in_use[NumberOfInUseLists];
   Metachunk* _current_chunk;
 
   // Virtual space where allocation comes from.
@@ -680,6 +558,7 @@
   static const int    _expand_lock_rank;
   static Mutex* const _expand_lock;
 
+ private:
   // Accessors
   Metachunk* chunks_in_use(ChunkIndex index) const { return _chunks_in_use[index]; }
   void set_chunks_in_use(ChunkIndex index, Metachunk* v) { _chunks_in_use[index] = v; }
@@ -700,43 +579,39 @@
   // Add chunk to the list of chunks in use
   void add_chunk(Metachunk* v, bool make_current);
 
-  // Debugging support
-  void verify_chunks_in_use_index(ChunkIndex index, Metachunk* v) {
-    switch (index) {
-    case 0:
-      assert(v->word_size() == SmallChunk, "Not a SmallChunk");
-      break;
-    case 1:
-      assert(v->word_size() == MediumChunk, "Not a MediumChunk");
-      break;
-    case 2:
-      assert(v->word_size() > MediumChunk, "Not a HumongousChunk");
-      break;
-    default:
-      assert(false, "Wrong list.");
-    }
-  }
+  Mutex* lock() const { return _lock; }
+
+  const char* chunk_size_name(ChunkIndex index) const;
 
  protected:
-  Mutex* lock() const { return _lock; }
+  void initialize();
 
  public:
-  SpaceManager(Mutex* lock, VirtualSpaceList* vs_list);
+  SpaceManager(Mutex* lock,
+               VirtualSpaceList* vs_list);
   ~SpaceManager();
 
-  enum ChunkSizes {    // in words.
-    SmallChunk = 512,
-    MediumChunk = 8 * K,
-    MediumChunkBunch = 4 * MediumChunk
+  enum ChunkMultiples {
+    MediumChunkMultiple = 4
   };
 
   // Accessors
+  size_t specialized_chunk_size() { return SpecializedChunk; }
+  size_t small_chunk_size() { return (size_t) vs_list()->is_class() ? ClassSmallChunk : SmallChunk; }
+  size_t medium_chunk_size() { return (size_t) vs_list()->is_class() ? ClassMediumChunk : MediumChunk; }
+  size_t medium_chunk_bunch() { return medium_chunk_size() * MediumChunkMultiple; }
+
   size_t allocation_total() const { return _allocation_total; }
   void inc_allocation_total(size_t v) { Atomic::add_ptr(v, &_allocation_total); }
-  static bool is_humongous(size_t word_size) { return word_size > MediumChunk; }
+  bool is_humongous(size_t word_size) { return word_size > medium_chunk_size(); }
 
   static Mutex* expand_lock() { return _expand_lock; }
 
+  // Set the sizes for the initial chunks.
+  void get_initial_chunk_sizes(Metaspace::MetaspaceType type,
+                               size_t* chunk_word_size,
+                               size_t* class_chunk_word_size);
+
   size_t sum_capacity_in_chunks_in_use() const;
   size_t sum_used_in_chunks_in_use() const;
   size_t sum_free_in_chunks_in_use() const;
@@ -746,15 +621,17 @@
   size_t sum_count_in_chunks_in_use();
   size_t sum_count_in_chunks_in_use(ChunkIndex i);
 
+  Metachunk* get_new_chunk(size_t word_size, size_t grow_chunks_by_words);
+
   // Block allocation and deallocation.
   // Allocates a block from the current chunk
   MetaWord* allocate(size_t word_size);
 
   // Helper for allocations
-  Metablock* allocate_work(size_t word_size);
+  MetaWord* allocate_work(size_t word_size);
 
   // Returns a block to the per manager freelist
-  void deallocate(MetaWord* p);
+  void deallocate(MetaWord* p, size_t word_size);
 
   // Based on the allocation size and a minimum chunk size,
   // returned chunk size (for expanding space for chunk allocation).
@@ -763,7 +640,7 @@
   // Called when an allocation from the current chunk fails.
   // Gets a new chunk (may require getting a new virtual space),
   // and allocates from that chunk.
-  Metablock* grow_and_allocate(size_t word_size);
+  MetaWord* grow_and_allocate(size_t word_size);
 
   // debugging support.
 
@@ -772,8 +649,9 @@
   void locked_print_chunks_in_use_on(outputStream* st) const;
 
   void verify();
+  void verify_chunk_size(Metachunk* chunk);
+  NOT_PRODUCT(void mangle_freed_chunks();)
 #ifdef ASSERT
-  void mangle_freed_chunks();
   void verify_allocation_total();
 #endif
 };
@@ -788,193 +666,60 @@
             SpaceManager::_expand_lock_name,
             Mutex::_allow_vm_block_flag);
 
-#ifdef ASSERT
-size_t Metablock::_overhead =
-  Chunk::aligned_overhead_size(sizeof(Metablock)) / BytesPerWord;
-#else
-size_t Metablock::_overhead = 0;
-#endif
-size_t Metachunk::_overhead =
-  Chunk::aligned_overhead_size(sizeof(Metachunk)) / BytesPerWord;
-
-// New blocks returned by the Metaspace are zero initialized.
-// We should fix the constructors to not assume this instead.
-Metablock* Metablock::initialize(MetaWord* p, size_t word_size) {
-  Metablock* result = (Metablock*) p;
-
-  // Clear the memory
-  Copy::fill_to_aligned_words((HeapWord*)result, word_size);
-#ifdef ASSERT
-  result->set_word_size(word_size);
-  // Check after work size is set.
-  result->set_leader((void*) metaspace_allocation_leader);
-  result->set_trailer((void*) metaspace_allocation_trailer);
-#endif
-  return result;
-}
-
-void Metablock::verify() {
-#ifdef ASSERT
-  assert(leader() == metaspace_allocation_leader &&
-         trailer() == metaspace_allocation_trailer,
-         "block has been corrupted");
-#endif
-}
-
-// Metachunk methods
-
-Metachunk* Metachunk::initialize(MetaWord* ptr, size_t word_size) {
-  // Set bottom, top, and end.  Allow space for the Metachunk itself
-  Metachunk* chunk = (Metachunk*) ptr;
-
-  MetaWord* chunk_bottom = ptr + _overhead;
-  chunk->set_bottom(ptr);
-  chunk->set_top(chunk_bottom);
-  MetaWord* chunk_end = ptr + word_size;
-  assert(chunk_end > chunk_bottom, "Chunk must be too small");
-  chunk->set_end(chunk_end);
-  chunk->set_next(NULL);
-  chunk->set_word_size(word_size);
-#ifdef ASSERT
-  size_t data_word_size = pointer_delta(chunk_end, chunk_bottom, sizeof(MetaWord));
-  Copy::fill_to_words((HeapWord*) chunk_bottom, data_word_size, metadata_chunk_initialize);
-#endif
-  return chunk;
-}
-
-
-Metablock* Metachunk::allocate(size_t word_size) {
-  Metablock* result = NULL;
-  // If available, bump the pointer to allocate.
-  if (free_word_size() >= word_size) {
-    result = Metablock::initialize(_top, word_size);
-    _top = _top + word_size;
-  }
-#ifdef ASSERT
-  assert(result == NULL ||
-         result->word_size() == word_size,
-         "Block size is not set correctly");
-#endif
-  return result;
-}
-
-// _bottom points to the start of the chunk including the overhead.
-size_t Metachunk::used_word_size() {
-  return pointer_delta(_top, _bottom, sizeof(MetaWord));
-}
-
-size_t Metachunk::free_word_size() {
-  return pointer_delta(_end, _top, sizeof(MetaWord));
-}
-
-size_t Metachunk::capacity_word_size() {
-  return pointer_delta(_end, _bottom, sizeof(MetaWord));
-}
-
-void Metachunk::print_on(outputStream* st) const {
-  st->print_cr("Metachunk:"
-               " bottom " PTR_FORMAT " top " PTR_FORMAT
-               " end " PTR_FORMAT " size " SIZE_FORMAT,
-               bottom(), top(), end(), word_size());
-}
-
-
-void Metachunk::verify() {
-#ifdef ASSERT
-  // Cannot walk through the blocks unless the blocks have
-  // headers with sizes.
-  MetaWord* curr = bottom() + overhead();
-  while (curr < top()) {
-    Metablock* block = (Metablock*) curr;
-    size_t word_size = block->word_size();
-    block->verify();
-    curr = curr + word_size;
-  }
-#endif
-  return;
-}
-
 // BlockFreelist methods
 
-#ifdef DEALLOCATE_BLOCKS
 BlockFreelist::BlockFreelist() : _dictionary(NULL) {}
-#else
-BlockFreelist::BlockFreelist() {}
-#endif
 
 BlockFreelist::~BlockFreelist() {
-#ifdef DEALLOCATE_BLOCKS
   if (_dictionary != NULL) {
     if (Verbose && TraceMetadataChunkAllocation) {
       _dictionary->print_free_lists(gclog_or_tty);
     }
     delete _dictionary;
   }
-#endif
 }
 
-Metablock* BlockFreelist::initialize_free_chunk(Metablock* block, size_t word_size) {
-#ifdef DEALLOCATE_BLOCKS
-#ifdef ASSERT
-  assert(word_size = block->word_size(), "Wrong chunk size");
-#endif
-  Metablock* result = block;
-  result->setSize(word_size);
-  result->linkPrev(NULL);
-  result->linkNext(NULL);
-
-  return result;
-#else
-  ShouldNotReachHere();
+Metablock* BlockFreelist::initialize_free_chunk(MetaWord* p, size_t word_size) {
+  Metablock* block = (Metablock*) p;
+  block->set_word_size(word_size);
+  block->set_prev(NULL);
+  block->set_next(NULL);
+
   return block;
-#endif
 }
 
-void BlockFreelist::return_block(Metablock* block, size_t word_size) {
-#ifdef ASSERT
-  assert(word_size = block->word_size(), "Block size is wrong");;
-#endif
-  Metablock* free_chunk = initialize_free_chunk(block, word_size);
-#ifdef DEALLOCATE_BLOCKS
+void BlockFreelist::return_block(MetaWord* p, size_t word_size) {
+  Metablock* free_chunk = initialize_free_chunk(p, word_size);
   if (dictionary() == NULL) {
-   _dictionary = new BinaryTreeDictionary<Metablock>(false /* adaptive_freelists */);
+   _dictionary = new BlockTreeDictionary();
   }
-  dictionary()->returnChunk(free_chunk);
-#endif
+  dictionary()->return_chunk(free_chunk);
 }
 
-Metablock* BlockFreelist::get_block(size_t word_size) {
-#ifdef DEALLOCATE_BLOCKS
+MetaWord* BlockFreelist::get_block(size_t word_size) {
   if (dictionary() == NULL) {
     return NULL;
   }
 
-  Metablock* free_chunk =
-    dictionary()->getChunk(word_size, FreeBlockDictionary<Metablock>::exactly);
-#else
-  Metablock* free_chunk = NULL;
-#endif
-  if (free_chunk == NULL) {
+  if (word_size < TreeChunk<Metablock, FreeList>::min_size()) {
+    // Dark matter.  Too small for dictionary.
     return NULL;
   }
-  assert(free_chunk->word_size() == word_size, "Size of chunk is incorrect");
-  Metablock* block = Metablock::initialize((MetaWord*) free_chunk, word_size);
-#ifdef ASSERT
-  assert(block->word_size() == word_size, "Block size is not set correctly");
-#endif
-
-  return block;
+
+  Metablock* free_block =
+    dictionary()->get_chunk(word_size, FreeBlockDictionary<Metablock>::exactly);
+  if (free_block == NULL) {
+    return NULL;
+  }
+
+  return (MetaWord*) free_block;
 }
 
 void BlockFreelist::print_on(outputStream* st) const {
-#ifdef DEALLOCATE_BLOCKS
   if (dictionary() == NULL) {
     return;
   }
   dictionary()->print_free_lists(st);
-#else
-  return;
-#endif
 }
 
 // VirtualSpaceNode methods
@@ -1070,8 +815,10 @@
     return false;
   }
 
-  // Commit only 1 page instead of the whole reserved space _rs.size()
-  size_t committed_byte_size = os::vm_page_size();
+  // An allocation out of this Virtualspace that is larger
+  // than an initial commit size can waste that initial committed
+  // space.
+  size_t committed_byte_size = 0;
   bool result = virtual_space()->initialize(_rs, committed_byte_size);
   if (result) {
     set_top((MetaWord*)virtual_space()->low());
@@ -1097,15 +844,18 @@
   st->print_cr("   space @ " PTR_FORMAT " " SIZE_FORMAT "K, %3d%% used "
            "[" PTR_FORMAT ", " PTR_FORMAT ", "
            PTR_FORMAT ", " PTR_FORMAT ")",
-           vs, capacity / K, used * 100 / capacity,
+           vs, capacity / K,
+           capacity == 0 ? 0 : used * 100 / capacity,
            bottom(), top(), end(),
            vs->high_boundary());
 }
 
+#ifdef ASSERT
 void VirtualSpaceNode::mangle() {
   size_t word_size = capacity_words_in_vs();
   Copy::fill_to_words((HeapWord*) low(), word_size, 0xf1f1f1f1);
 }
+#endif // ASSERT
 
 // VirtualSpaceList methods
 // Space allocated from the VirtualSpace
@@ -1192,6 +942,8 @@
     delete new_entry;
     return false;
   } else {
+    // ensure lock-free iteration sees fully initialized node
+    OrderAccess::storestore();
     link_vs(new_entry, vs_word_size);
     return true;
   }
@@ -1216,7 +968,8 @@
 }
 
 Metachunk* VirtualSpaceList::get_new_chunk(size_t word_size,
-                                           size_t grow_chunks_by_words) {
+                                           size_t grow_chunks_by_words,
+                                           size_t medium_chunk_bunch) {
 
   // Get a chunk from the chunk freelist
   Metachunk* next = chunk_manager()->chunk_freelist_allocate(grow_chunks_by_words);
@@ -1229,8 +982,8 @@
   if (next == NULL) {
     // Not enough room in current virtual space.  Try to commit
     // more space.
-    size_t expand_vs_by_words = MAX2((size_t)SpaceManager::MediumChunkBunch,
-                                       grow_chunks_by_words);
+    size_t expand_vs_by_words = MAX2(medium_chunk_bunch,
+                                     grow_chunks_by_words);
     size_t page_size_words = os::vm_page_size() / BytesPerWord;
     size_t aligned_expand_vs_by_words = align_size_up(expand_vs_by_words,
                                                         page_size_words);
@@ -1248,12 +1001,6 @@
           // Got it.  It's on the list now.  Get a chunk from it.
           next = current_virtual_space()->get_chunk_vs_with_expand(grow_chunks_by_words);
         }
-        if (TraceMetadataHumongousAllocation && SpaceManager::is_humongous(word_size)) {
-          gclog_or_tty->print_cr("  aligned_expand_vs_by_words " PTR_FORMAT,
-                                 aligned_expand_vs_by_words);
-          gclog_or_tty->print_cr("  grow_vs_words " PTR_FORMAT,
-                                 grow_vs_words);
-        }
       } else {
         // Allocation will fail and induce a GC
         if (TraceMetadataChunkAllocation && Verbose) {
@@ -1268,9 +1015,20 @@
     }
   }
 
+  assert(next == NULL || (next->next() == NULL && next->prev() == NULL),
+         "New chunk is still on some list");
   return next;
 }
 
+Metachunk* VirtualSpaceList::get_initialization_chunk(size_t chunk_word_size,
+                                                      size_t chunk_bunch) {
+  // Get a chunk from the chunk freelist
+  Metachunk* new_chunk = get_new_chunk(chunk_word_size,
+                                       chunk_word_size,
+                                       chunk_bunch);
+  return new_chunk;
+}
+
 void VirtualSpaceList::print_on(outputStream* st) const {
   if (TraceMetadataChunkAllocation && Verbose) {
     VirtualSpaceListIterator iter(virtual_space_list());
@@ -1281,7 +1039,6 @@
   }
 }
 
-#ifndef PRODUCT
 bool VirtualSpaceList::contains(const void *ptr) {
   VirtualSpaceNode* list = virtual_space_list();
   VirtualSpaceListIterator iter(list);
@@ -1293,7 +1050,6 @@
   }
   return false;
 }
-#endif // PRODUCT
 
 
 // MetaspaceGC methods
@@ -1597,14 +1353,11 @@
         Metadebug::deallocate_block_a_lot_count() % MetaDataDeallocateALotInterval == 0 ) {
     Metadebug::set_deallocate_block_a_lot_count(0);
     for (uint i = 0; i < metadata_deallocate_a_lot_block; i++) {
-      Metablock* dummy_block = sm->allocate_work(raw_word_size);
+      MetaWord* dummy_block = sm->allocate_work(raw_word_size);
       if (dummy_block == 0) {
         break;
       }
-#ifdef ASSERT
-      assert(dummy_block->word_size() == raw_word_size, "Block size is not set correctly");
-#endif
-      sm->deallocate(dummy_block->data());
+      sm->deallocate(dummy_block, raw_word_size);
     }
   } else {
     Metadebug::inc_deallocate_block_a_lot_count();
@@ -1672,16 +1425,17 @@
 
 void ChunkList::add_at_head(Metachunk* head, Metachunk* tail) {
   assert_lock_strong(SpaceManager::expand_lock());
-  assert(tail->next() == NULL, "Not the tail");
+  assert(head == tail || tail->next() == NULL,
+         "Not the tail or the head has already been added to a list");
 
   if (TraceMetadataChunkAllocation && Verbose) {
-    tty->print("ChunkList::add_at_head: ");
+    gclog_or_tty->print("ChunkList::add_at_head(head, tail): ");
     Metachunk* cur = head;
     while (cur != NULL) {
-    tty->print(PTR_FORMAT " (" SIZE_FORMAT ") ", cur, cur->word_size());
+      gclog_or_tty->print(PTR_FORMAT " (" SIZE_FORMAT ") ", cur, cur->word_size());
       cur = cur->next();
     }
-    tty->print_cr("");
+    gclog_or_tty->print_cr("");
   }
 
   if (tail != NULL) {
@@ -1719,7 +1473,7 @@
   if (!UseConcMarkSweepGC && !SpaceManager::expand_lock()->is_locked()) {
     MutexLockerEx cl(SpaceManager::expand_lock(),
                      Mutex::_no_safepoint_check_flag);
-    locked_verify_free_chunks_total();
+    slow_locked_verify_free_chunks_total();
   }
 #endif
   return _free_chunks_total;
@@ -1736,10 +1490,10 @@
                      Mutex::_no_safepoint_check_flag);
     // This lock is only needed in debug because the verification
     // of the _free_chunks_totals walks the list of free chunks
-    locked_verify_free_chunks_count();
+    slow_locked_verify_free_chunks_count();
   }
 #endif
-    return _free_chunks_count;
+  return _free_chunks_count;
 }
 
 void ChunkManager::locked_verify_free_chunks_total() {
@@ -1773,43 +1527,37 @@
 }
 
 void ChunkManager::verify() {
-#ifdef ASSERT
-  if (!UseConcMarkSweepGC) {
-    MutexLockerEx cl(SpaceManager::expand_lock(),
-                       Mutex::_no_safepoint_check_flag);
-    locked_verify_free_chunks_total();
-    locked_verify_free_chunks_count();
-  }
-#endif
+  MutexLockerEx cl(SpaceManager::expand_lock(),
+                     Mutex::_no_safepoint_check_flag);
+  locked_verify();
 }
 
 void ChunkManager::locked_verify() {
+  locked_verify_free_chunks_count();
   locked_verify_free_chunks_total();
-  locked_verify_free_chunks_count();
 }
 
 void ChunkManager::locked_print_free_chunks(outputStream* st) {
   assert_lock_strong(SpaceManager::expand_lock());
-  st->print_cr("Free chunk total 0x%x  count 0x%x",
+  st->print_cr("Free chunk total " SIZE_FORMAT "  count " SIZE_FORMAT,
                 _free_chunks_total, _free_chunks_count);
 }
 
 void ChunkManager::locked_print_sum_free_chunks(outputStream* st) {
   assert_lock_strong(SpaceManager::expand_lock());
-  st->print_cr("Sum free chunk total 0x%x  count 0x%x",
+  st->print_cr("Sum free chunk total " SIZE_FORMAT "  count " SIZE_FORMAT,
                 sum_free_chunks(), sum_free_chunks_count());
 }
 ChunkList* ChunkManager::free_chunks(ChunkIndex index) {
   return &_free_chunks[index];
 }
 
-
 // These methods that sum the free chunk lists are used in printing
 // methods that are used in product builds.
 size_t ChunkManager::sum_free_chunks() {
   assert_lock_strong(SpaceManager::expand_lock());
   size_t result = 0;
-  for (ChunkIndex i = SmallIndex; i < NumberOfFreeLists; i = next_chunk_index(i)) {
+  for (ChunkIndex i = ZeroIndex; i < NumberOfFreeLists; i = next_chunk_index(i)) {
     ChunkList* list = free_chunks(i);
 
     if (list == NULL) {
@@ -1818,32 +1566,28 @@
 
     result = result + list->sum_list_capacity();
   }
+  result = result + humongous_dictionary()->total_size();
   return result;
 }
 
 size_t ChunkManager::sum_free_chunks_count() {
   assert_lock_strong(SpaceManager::expand_lock());
   size_t count = 0;
-  for (ChunkIndex i = SmallIndex; i < NumberOfFreeLists; i = next_chunk_index(i)) {
+  for (ChunkIndex i = ZeroIndex; i < NumberOfFreeLists; i = next_chunk_index(i)) {
     ChunkList* list = free_chunks(i);
     if (list == NULL) {
       continue;
     }
     count = count + list->sum_list_count();
   }
+  count = count + humongous_dictionary()->total_free_blocks();
   return count;
 }
 
 ChunkList* ChunkManager::find_free_chunks_list(size_t word_size) {
-  switch (word_size) {
-  case SpaceManager::SmallChunk :
-      return &_free_chunks[0];
-  case SpaceManager::MediumChunk :
-      return &_free_chunks[1];
-  default:
-    assert(word_size > SpaceManager::MediumChunk, "List inconsistency");
-    return &_free_chunks[2];
-  }
+  ChunkIndex index = list_index(word_size);
+  assert(index < HumongousIndex, "No humongous list");
+  return free_chunks(index);
 }
 
 void ChunkManager::free_chunks_put(Metachunk* chunk) {
@@ -1853,7 +1597,7 @@
   free_list->set_head(chunk);
   // chunk is being returned to the chunk free list
   inc_free_chunks_total(chunk->capacity_word_size());
-  locked_verify();
+  slow_locked_verify();
 }
 
 void ChunkManager::chunk_freelist_deallocate(Metachunk* chunk) {
@@ -1861,8 +1605,8 @@
   // manangement code for a Metaspace and does not hold the
   // lock.
   assert(chunk != NULL, "Deallocating NULL");
-  // MutexLockerEx fcl(SpaceManager::expand_lock(), Mutex::_no_safepoint_check_flag);
-  locked_verify();
+  assert_lock_strong(SpaceManager::expand_lock());
+  slow_locked_verify();
   if (TraceMetadataChunkAllocation) {
     tty->print_cr("ChunkManager::chunk_freelist_deallocate: chunk "
                   PTR_FORMAT "  size " SIZE_FORMAT,
@@ -1874,24 +1618,25 @@
 Metachunk* ChunkManager::free_chunks_get(size_t word_size) {
   assert_lock_strong(SpaceManager::expand_lock());
 
-  locked_verify();
-  ChunkList* free_list = find_free_chunks_list(word_size);
-  assert(free_list != NULL, "Sanity check");
-
-  Metachunk* chunk = free_list->head();
-  debug_only(Metachunk* debug_head = chunk;)
-
-  if (chunk == NULL) {
-    return NULL;
-  }
-
-  Metachunk* prev_chunk = chunk;
-  if (chunk->word_size() == word_size) {
+  slow_locked_verify();
+
+  Metachunk* chunk = NULL;
+  if (list_index(word_size) != HumongousIndex) {
+    ChunkList* free_list = find_free_chunks_list(word_size);
+    assert(free_list != NULL, "Sanity check");
+
+    chunk = free_list->head();
+    debug_only(Metachunk* debug_head = chunk;)
+
+    if (chunk == NULL) {
+      return NULL;
+    }
+
+    // Remove the chunk as the head of the list.
+    free_list->set_head(chunk->next());
+
     // Chunk is being removed from the chunks free list.
     dec_free_chunks_total(chunk->capacity_word_size());
-    // Remove the chunk as the head of the list.
-    free_list->set_head(chunk->next());
-    chunk->set_next(NULL);
 
     if (TraceMetadataChunkAllocation && Verbose) {
       tty->print_cr("ChunkManager::free_chunks_get: free_list "
@@ -1899,87 +1644,38 @@
                     free_list, chunk, chunk->word_size());
     }
   } else {
-    assert(SpaceManager::is_humongous(word_size),
-      "Should only need to check humongous");
-    // This code to find the best fit is just for purposes of
-    // investigating the loss due to fragmentation on a humongous
-    // chunk.  It will be replace by a binaryTreeDictionary for
-    // the humongous chunks.
-    uint count = 0;
-    Metachunk* best_fit = NULL;
-    Metachunk* best_fit_prev = NULL;
-    while (chunk != NULL) {
-      count++;
-      if (chunk->word_size() < word_size) {
-        prev_chunk = chunk;
-        chunk = chunk->next();
-      } else if (chunk->word_size() == word_size) {
-        break;
-      } else {
-        if (best_fit == NULL ||
-            best_fit->word_size() > chunk->word_size()) {
-          best_fit_prev = prev_chunk;
-          best_fit = chunk;
-        }
-        prev_chunk = chunk;
-        chunk = chunk->next();
-      }
-    }
-      if (chunk == NULL) {
-        prev_chunk = best_fit_prev;
-        chunk = best_fit;
+    chunk = humongous_dictionary()->get_chunk(
+      word_size,
+      FreeBlockDictionary<Metachunk>::atLeast);
+
+    if (chunk != NULL) {
+      if (TraceMetadataHumongousAllocation) {
+        size_t waste = chunk->word_size() - word_size;
+        tty->print_cr("Free list allocate humongous chunk size " SIZE_FORMAT
+                      " for requested size " SIZE_FORMAT
+                      " waste " SIZE_FORMAT,
+                      chunk->word_size(), word_size, waste);
       }
-      if (chunk != NULL) {
-        if (TraceMetadataHumongousAllocation) {
-          size_t waste = chunk->word_size() - word_size;
-          tty->print_cr("Free list allocate humongous chunk size " SIZE_FORMAT
-                        " for requested size " SIZE_FORMAT
-                        " waste " SIZE_FORMAT
-                        " found at " SIZE_FORMAT " of " SIZE_FORMAT,
-                        chunk->word_size(), word_size, waste,
-                        count, free_list->sum_list_count());
-        }
-        // Chunk is being removed from the chunks free list.
-        dec_free_chunks_total(chunk->capacity_word_size());
-        // Remove the chunk if it is at the head of the list.
-        if (chunk == free_list->head()) {
-          free_list->set_head(chunk->next());
-
-          if (TraceMetadataHumongousAllocation) {
-            tty->print_cr("ChunkManager::free_chunks_get: humongous free_list "
-                          PTR_FORMAT " chunk " PTR_FORMAT " size " SIZE_FORMAT
-                          " new head " PTR_FORMAT,
-                          free_list, chunk, chunk->word_size(),
-                          free_list->head());
-          }
-        } else {
-          // Remove a chunk in the interior of the list
-          prev_chunk->set_next(chunk->next());
-
-          if (TraceMetadataHumongousAllocation) {
-            tty->print_cr("ChunkManager::free_chunks_get: humongous free_list "
-                          PTR_FORMAT " chunk " PTR_FORMAT " size " SIZE_FORMAT
-                          PTR_FORMAT "  prev " PTR_FORMAT " next " PTR_FORMAT,
-                          free_list, chunk, chunk->word_size(),
-                          prev_chunk, chunk->next());
-          }
-        }
-        chunk->set_next(NULL);
-      } else {
-        if (TraceMetadataHumongousAllocation) {
-          tty->print_cr("ChunkManager::free_chunks_get: New humongous chunk of size "
-                        SIZE_FORMAT,
-                        word_size);
-        }
-      }
+      // Chunk is being removed from the chunks free list.
+      dec_free_chunks_total(chunk->capacity_word_size());
+#ifdef ASSERT
+      chunk->set_is_free(false);
+#endif
+    } else {
+      return NULL;
+    }
   }
-  locked_verify();
+
+  // Remove it from the links to this freelist
+  chunk->set_next(NULL);
+  chunk->set_prev(NULL);
+  slow_locked_verify();
   return chunk;
 }
 
 Metachunk* ChunkManager::chunk_freelist_allocate(size_t word_size) {
   assert_lock_strong(SpaceManager::expand_lock());
-  locked_verify();
+  slow_locked_verify();
 
   // Take from the beginning of the list
   Metachunk* chunk = free_chunks_get(word_size);
@@ -1987,25 +1683,70 @@
     return NULL;
   }
 
-  assert(word_size <= chunk->word_size() ||
-           SpaceManager::is_humongous(chunk->word_size()),
-           "Non-humongous variable sized chunk");
+  assert((word_size <= chunk->word_size()) ||
+         list_index(chunk->word_size() == HumongousIndex),
+         "Non-humongous variable sized chunk");
   if (TraceMetadataChunkAllocation) {
-    tty->print("ChunkManager::chunk_freelist_allocate: chunk "
-               PTR_FORMAT "  size " SIZE_FORMAT " ",
-               chunk, chunk->word_size());
+    size_t list_count;
+    if (list_index(word_size) < HumongousIndex) {
+      ChunkList* list = find_free_chunks_list(word_size);
+      list_count = list->sum_list_count();
+    } else {
+      list_count = humongous_dictionary()->total_count();
+    }
+    tty->print("ChunkManager::chunk_freelist_allocate: " PTR_FORMAT " chunk "
+               PTR_FORMAT "  size " SIZE_FORMAT " count " SIZE_FORMAT " ",
+               this, chunk, chunk->word_size(), list_count);
     locked_print_free_chunks(tty);
   }
 
   return chunk;
 }
 
+void ChunkManager::print_on(outputStream* out) {
+  if (PrintFLSStatistics != 0) {
+    humongous_dictionary()->report_statistics();
+  }
+}
+
 // SpaceManager methods
 
+void SpaceManager::get_initial_chunk_sizes(Metaspace::MetaspaceType type,
+                                           size_t* chunk_word_size,
+                                           size_t* class_chunk_word_size) {
+  switch (type) {
+  case Metaspace::BootMetaspaceType:
+    *chunk_word_size = Metaspace::first_chunk_word_size();
+    *class_chunk_word_size = Metaspace::first_class_chunk_word_size();
+    break;
+  case Metaspace::ROMetaspaceType:
+    *chunk_word_size = SharedReadOnlySize / wordSize;
+    *class_chunk_word_size = ClassSpecializedChunk;
+    break;
+  case Metaspace::ReadWriteMetaspaceType:
+    *chunk_word_size = SharedReadWriteSize / wordSize;
+    *class_chunk_word_size = ClassSpecializedChunk;
+    break;
+  case Metaspace::AnonymousMetaspaceType:
+  case Metaspace::ReflectionMetaspaceType:
+    *chunk_word_size = SpecializedChunk;
+    *class_chunk_word_size = ClassSpecializedChunk;
+    break;
+  default:
+    *chunk_word_size = SmallChunk;
+    *class_chunk_word_size = ClassSmallChunk;
+    break;
+  }
+  assert(chunk_word_size != 0 && class_chunk_word_size != 0,
+    err_msg("Initial chunks sizes bad: data  " SIZE_FORMAT
+            " class " SIZE_FORMAT,
+            chunk_word_size, class_chunk_word_size));
+}
+
 size_t SpaceManager::sum_free_in_chunks_in_use() const {
   MutexLockerEx cl(lock(), Mutex::_no_safepoint_check_flag);
   size_t free = 0;
-  for (ChunkIndex i = SmallIndex; i < NumberOfFreeLists; i = next_chunk_index(i)) {
+  for (ChunkIndex i = ZeroIndex; i < NumberOfInUseLists; i = next_chunk_index(i)) {
     Metachunk* chunk = chunks_in_use(i);
     while (chunk != NULL) {
       free += chunk->free_word_size();
@@ -2018,27 +1759,24 @@
 size_t SpaceManager::sum_waste_in_chunks_in_use() const {
   MutexLockerEx cl(lock(), Mutex::_no_safepoint_check_flag);
   size_t result = 0;
-  for (ChunkIndex i = SmallIndex; i < NumberOfFreeLists; i = next_chunk_index(i)) {
-   // Count the free space in all the chunk but not the
-   // current chunk from which allocations are still being done.
+  for (ChunkIndex i = ZeroIndex; i < NumberOfInUseLists; i = next_chunk_index(i)) {
    result += sum_waste_in_chunks_in_use(i);
   }
+
   return result;
 }
 
 size_t SpaceManager::sum_waste_in_chunks_in_use(ChunkIndex index) const {
   size_t result = 0;
-  size_t count = 0;
   Metachunk* chunk = chunks_in_use(index);
   // Count the free space in all the chunk but not the
   // current chunk from which allocations are still being done.
   if (chunk != NULL) {
-    while (chunk != NULL) {
-      if (chunk != current_chunk()) {
-        result += chunk->free_word_size();
-      }
+    Metachunk* prev = chunk;
+    while (chunk != NULL && chunk != current_chunk()) {
+      result += chunk->free_word_size();
+      prev = chunk;
       chunk = chunk->next();
-      count++;
     }
   }
   return result;
@@ -2047,7 +1785,7 @@
 size_t SpaceManager::sum_capacity_in_chunks_in_use() const {
   MutexLockerEx cl(lock(), Mutex::_no_safepoint_check_flag);
   size_t sum = 0;
-  for (ChunkIndex i = SmallIndex; i < NumberOfFreeLists; i = next_chunk_index(i)) {
+  for (ChunkIndex i = ZeroIndex; i < NumberOfInUseLists; i = next_chunk_index(i)) {
     Metachunk* chunk = chunks_in_use(i);
     while (chunk != NULL) {
       // Just changed this sum += chunk->capacity_word_size();
@@ -2061,9 +1799,10 @@
 
 size_t SpaceManager::sum_count_in_chunks_in_use() {
   size_t count = 0;
-  for (ChunkIndex i = SmallIndex; i < NumberOfFreeLists; i = next_chunk_index(i)) {
+  for (ChunkIndex i = ZeroIndex; i < NumberOfInUseLists; i = next_chunk_index(i)) {
     count = count + sum_count_in_chunks_in_use(i);
   }
+
   return count;
 }
 
@@ -2081,7 +1820,7 @@
 size_t SpaceManager::sum_used_in_chunks_in_use() const {
   MutexLockerEx cl(lock(), Mutex::_no_safepoint_check_flag);
   size_t used = 0;
-  for (ChunkIndex i = SmallIndex; i < NumberOfFreeLists; i = next_chunk_index(i)) {
+  for (ChunkIndex i = ZeroIndex; i < NumberOfInUseLists; i = next_chunk_index(i)) {
     Metachunk* chunk = chunks_in_use(i);
     while (chunk != NULL) {
       used += chunk->used_word_size();
@@ -2093,19 +1832,17 @@
 
 void SpaceManager::locked_print_chunks_in_use_on(outputStream* st) const {
 
-  Metachunk* small_chunk = chunks_in_use(SmallIndex);
-  st->print_cr("SpaceManager: small chunk " PTR_FORMAT
-               " free " SIZE_FORMAT,
-               small_chunk,
-               small_chunk->free_word_size());
-
-  Metachunk* medium_chunk = chunks_in_use(MediumIndex);
-  st->print("medium chunk " PTR_FORMAT, medium_chunk);
-  Metachunk* tail = current_chunk();
-  st->print_cr(" current chunk " PTR_FORMAT, tail);
-
-  Metachunk* head = chunks_in_use(HumongousIndex);
-  st->print_cr("humongous chunk " PTR_FORMAT, head);
+  for (ChunkIndex i = ZeroIndex; i < NumberOfInUseLists; i = next_chunk_index(i)) {
+    Metachunk* chunk = chunks_in_use(i);
+    st->print("SpaceManager: %s " PTR_FORMAT,
+                 chunk_size_name(i), chunk);
+    if (chunk != NULL) {
+      st->print_cr(" free " SIZE_FORMAT,
+                   chunk->free_word_size());
+    } else {
+      st->print_cr("");
+    }
+  }
 
   vs_list()->chunk_manager()->locked_print_free_chunks(st);
   vs_list()->chunk_manager()->locked_print_sum_free_chunks(st);
@@ -2121,33 +1858,41 @@
   if (chunks_in_use(MediumIndex) == NULL &&
       (!has_small_chunk_limit() ||
        sum_count_in_chunks_in_use(SmallIndex) < _small_chunk_limit)) {
-    chunk_word_size = (size_t) SpaceManager::SmallChunk;
-    if (word_size + Metachunk::overhead() > SpaceManager::SmallChunk) {
-      chunk_word_size = MediumChunk;
+    chunk_word_size = (size_t) small_chunk_size();
+    if (word_size + Metachunk::overhead() > small_chunk_size()) {
+      chunk_word_size = medium_chunk_size();
     }
   } else {
-    chunk_word_size = MediumChunk;
+    chunk_word_size = medium_chunk_size();
   }
 
-  // Might still need a humongous chunk
+  // Might still need a humongous chunk.  Enforce an
+  // eight word granularity to facilitate reuse (some
+  // wastage but better chance of reuse).
+  size_t if_humongous_sized_chunk =
+    align_size_up(word_size + Metachunk::overhead(),
+                  HumongousChunkGranularity);
   chunk_word_size =
-    MAX2((size_t) chunk_word_size, word_size + Metachunk::overhead());
-
+    MAX2((size_t) chunk_word_size, if_humongous_sized_chunk);
+
+  assert(!SpaceManager::is_humongous(word_size) ||
+         chunk_word_size == if_humongous_sized_chunk,
+         err_msg("Size calculation is wrong, word_size " SIZE_FORMAT
+                 " chunk_word_size " SIZE_FORMAT,
+                 word_size, chunk_word_size));
   if (TraceMetadataHumongousAllocation &&
       SpaceManager::is_humongous(word_size)) {
     gclog_or_tty->print_cr("Metadata humongous allocation:");
     gclog_or_tty->print_cr("  word_size " PTR_FORMAT, word_size);
     gclog_or_tty->print_cr("  chunk_word_size " PTR_FORMAT,
                            chunk_word_size);
-    gclog_or_tty->print_cr("    block overhead " PTR_FORMAT
-                           " chunk overhead " PTR_FORMAT,
-                           Metablock::overhead(),
+    gclog_or_tty->print_cr("    chunk overhead " PTR_FORMAT,
                            Metachunk::overhead());
   }
   return chunk_word_size;
 }
 
-Metablock* SpaceManager::grow_and_allocate(size_t word_size) {
+MetaWord* SpaceManager::grow_and_allocate(size_t word_size) {
   assert(vs_list()->current_virtual_space() != NULL,
          "Should have been set");
   assert(current_chunk() == NULL ||
@@ -2156,15 +1901,21 @@
   MutexLockerEx cl(SpaceManager::expand_lock(), Mutex::_no_safepoint_check_flag);
 
   if (TraceMetadataChunkAllocation && Verbose) {
+    size_t words_left = 0;
+    size_t words_used = 0;
+    if (current_chunk() != NULL) {
+      words_left = current_chunk()->free_word_size();
+      words_used = current_chunk()->used_word_size();
+    }
     gclog_or_tty->print_cr("SpaceManager::grow_and_allocate for " SIZE_FORMAT
-                           " words " SIZE_FORMAT " space left",
-                            word_size, current_chunk() != NULL ?
-                              current_chunk()->free_word_size() : 0);
+                           " words " SIZE_FORMAT " words used " SIZE_FORMAT
+                           " words left",
+                            word_size, words_used, words_left);
   }
 
   // Get another chunk out of the virtual space
   size_t grow_chunks_by_words = calc_chunk_size(word_size);
-  Metachunk* next = vs_list()->get_new_chunk(word_size, grow_chunks_by_words);
+  Metachunk* next = get_new_chunk(word_size, grow_chunks_by_words);
 
   // If a chunk was available, add it to the in-use chunk list
   // and do an allocation from it.
@@ -2179,8 +1930,8 @@
 
 void SpaceManager::print_on(outputStream* st) const {
 
-  for (ChunkIndex i = SmallIndex;
-       i < NumberOfFreeLists ;
+  for (ChunkIndex i = ZeroIndex;
+       i < NumberOfInUseLists ;
        i = next_chunk_index(i) ) {
     st->print_cr("  chunks_in_use " PTR_FORMAT " chunk size " PTR_FORMAT,
                  chunks_in_use(i),
@@ -2191,16 +1942,25 @@
                sum_waste_in_chunks_in_use(SmallIndex),
                sum_waste_in_chunks_in_use(MediumIndex),
                sum_waste_in_chunks_in_use(HumongousIndex));
-  // Nothing in them yet
-  // block_freelists()->print_on(st);
+  // block free lists
+  if (block_freelists() != NULL) {
+    st->print_cr("total in block free lists " SIZE_FORMAT,
+      block_freelists()->total_size());
+  }
 }
 
-SpaceManager::SpaceManager(Mutex* lock, VirtualSpaceList* vs_list) :
+SpaceManager::SpaceManager(Mutex* lock,
+                           VirtualSpaceList* vs_list) :
   _vs_list(vs_list),
   _allocation_total(0),
-  _lock(lock) {
+  _lock(lock)
+{
+  initialize();
+}
+
+void SpaceManager::initialize() {
   Metadebug::init_allocation_fail_alot_count();
-  for (ChunkIndex i = SmallIndex; i < NumberOfFreeLists; i = next_chunk_index(i)) {
+  for (ChunkIndex i = ZeroIndex; i < NumberOfInUseLists; i = next_chunk_index(i)) {
     _chunks_in_use[i] = NULL;
   }
   _current_chunk = NULL;
@@ -2215,69 +1975,127 @@
 
   ChunkManager* chunk_manager = vs_list()->chunk_manager();
 
-  chunk_manager->locked_verify();
+  chunk_manager->slow_locked_verify();
 
   if (TraceMetadataChunkAllocation && Verbose) {
     gclog_or_tty->print_cr("~SpaceManager(): " PTR_FORMAT, this);
     locked_print_chunks_in_use_on(gclog_or_tty);
   }
 
+  // Mangle freed memory.
+  NOT_PRODUCT(mangle_freed_chunks();)
+
   // Have to update before the chunks_in_use lists are emptied
   // below.
   chunk_manager->inc_free_chunks_total(sum_capacity_in_chunks_in_use(),
                                        sum_count_in_chunks_in_use());
 
-#ifdef ASSERT
-  // Mangle freed memory.
-  mangle_freed_chunks();
-#endif // ASSERT
-
   // Add all the chunks in use by this space manager
   // to the global list of free chunks.
 
-  // Small chunks.  There is one _current_chunk for each
-  // Metaspace.  It could point to a small or medium chunk.
-  // Rather than determine which it is, follow the list of
-  // small chunks to add them to the free list
-  Metachunk* small_chunk = chunks_in_use(SmallIndex);
-  chunk_manager->free_small_chunks()->add_at_head(small_chunk);
-  set_chunks_in_use(SmallIndex, NULL);
-
-  // After the small chunk are the medium chunks
-  Metachunk* medium_chunk = chunks_in_use(MediumIndex);
-  assert(medium_chunk == NULL ||
-         medium_chunk->word_size() == MediumChunk,
-         "Chunk is on the wrong list");
-
-  if (medium_chunk != NULL) {
-    Metachunk* head = medium_chunk;
-    // If there is a medium chunk then the _current_chunk can only
-    // point to the last medium chunk.
-    Metachunk* tail = current_chunk();
-    chunk_manager->free_medium_chunks()->add_at_head(head, tail);
-    set_chunks_in_use(MediumIndex, NULL);
+  // Follow each list of chunks-in-use and add them to the
+  // free lists.  Each list is NULL terminated.
+
+  for (ChunkIndex i = ZeroIndex; i < HumongousIndex; i = next_chunk_index(i)) {
+    if (TraceMetadataChunkAllocation && Verbose) {
+      gclog_or_tty->print_cr("returned %d %s chunks to freelist",
+                             sum_count_in_chunks_in_use(i),
+                             chunk_size_name(i));
+    }
+    Metachunk* chunks = chunks_in_use(i);
+    chunk_manager->free_chunks(i)->add_at_head(chunks);
+    set_chunks_in_use(i, NULL);
+    if (TraceMetadataChunkAllocation && Verbose) {
+      gclog_or_tty->print_cr("updated freelist count %d %s",
+                             chunk_manager->free_chunks(i)->sum_list_count(),
+                             chunk_size_name(i));
+    }
+    assert(i != HumongousIndex, "Humongous chunks are handled explicitly later");
   }
 
+  // The medium chunk case may be optimized by passing the head and
+  // tail of the medium chunk list to add_at_head().  The tail is often
+  // the current chunk but there are probably exceptions.
+
   // Humongous chunks
+  if (TraceMetadataChunkAllocation && Verbose) {
+    gclog_or_tty->print_cr("returned %d %s humongous chunks to dictionary",
+                            sum_count_in_chunks_in_use(HumongousIndex),
+                            chunk_size_name(HumongousIndex));
+    gclog_or_tty->print("Humongous chunk dictionary: ");
+  }
   // Humongous chunks are never the current chunk.
   Metachunk* humongous_chunks = chunks_in_use(HumongousIndex);
 
-  if (humongous_chunks != NULL) {
-    chunk_manager->free_humongous_chunks()->add_at_head(humongous_chunks);
-    set_chunks_in_use(HumongousIndex, NULL);
+  while (humongous_chunks != NULL) {
+#ifdef ASSERT
+    humongous_chunks->set_is_free(true);
+#endif
+    if (TraceMetadataChunkAllocation && Verbose) {
+      gclog_or_tty->print(PTR_FORMAT " (" SIZE_FORMAT ") ",
+                          humongous_chunks,
+                          humongous_chunks->word_size());
+    }
+    assert(humongous_chunks->word_size() == (size_t)
+           align_size_up(humongous_chunks->word_size(),
+                             HumongousChunkGranularity),
+           err_msg("Humongous chunk size is wrong: word size " SIZE_FORMAT
+                   " granularity " SIZE_FORMAT,
+                   humongous_chunks->word_size(), HumongousChunkGranularity));
+    Metachunk* next_humongous_chunks = humongous_chunks->next();
+    chunk_manager->humongous_dictionary()->return_chunk(humongous_chunks);
+    humongous_chunks = next_humongous_chunks;
   }
-  chunk_manager->locked_verify();
+  if (TraceMetadataChunkAllocation && Verbose) {
+    gclog_or_tty->print_cr("");
+    gclog_or_tty->print_cr("updated dictionary count %d %s",
+                     chunk_manager->humongous_dictionary()->total_count(),
+                     chunk_size_name(HumongousIndex));
+  }
+  set_chunks_in_use(HumongousIndex, NULL);
+  chunk_manager->slow_locked_verify();
 }
 
-void SpaceManager::deallocate(MetaWord* p) {
+const char* SpaceManager::chunk_size_name(ChunkIndex index) const {
+  switch (index) {
+    case SpecializedIndex:
+      return "Specialized";
+    case SmallIndex:
+      return "Small";
+    case MediumIndex:
+      return "Medium";
+    case HumongousIndex:
+      return "Humongous";
+    default:
+      return NULL;
+  }
+}
+
+ChunkIndex ChunkManager::list_index(size_t size) {
+  switch (size) {
+    case SpecializedChunk:
+      assert(SpecializedChunk == ClassSpecializedChunk,
+             "Need branch for ClassSpecializedChunk");
+      return SpecializedIndex;
+    case SmallChunk:
+    case ClassSmallChunk:
+      return SmallIndex;
+    case MediumChunk:
+    case ClassMediumChunk:
+      return MediumIndex;
+    default:
+      assert(size > MediumChunk || size > ClassMediumChunk,
+             "Not a humongous chunk");
+      return HumongousIndex;
+  }
+}
+
+void SpaceManager::deallocate(MetaWord* p, size_t word_size) {
   assert_lock_strong(_lock);
-  ShouldNotReachHere();  // Where is this needed.
-#ifdef DEALLOCATE_BLOCKS
-  Metablock* block = Metablock::metablock_from_data(p);
-  // This is expense but kept it until integration JJJ
-  assert(contains((address)block), "Block does not belong to this metaspace");
-  block_freelists()->return_block(block, word_size);
-#endif
+  size_t min_size = TreeChunk<Metablock, FreeList>::min_size();
+  assert(word_size >= min_size,
+    err_msg("Should not deallocate dark matter " SIZE_FORMAT, word_size));
+  block_freelists()->return_block(p, word_size);
 }
 
 // Adds a chunk to the list of chunks in use.
@@ -2290,52 +2108,13 @@
 
   // Find the correct list and and set the current
   // chunk for that list.
-  switch (new_chunk->word_size()) {
-  case SpaceManager::SmallChunk :
-    if (chunks_in_use(SmallIndex) == NULL) {
-      // First chunk to add to the list
-      set_chunks_in_use(SmallIndex, new_chunk);
-    } else {
-      assert(current_chunk()->word_size() == SpaceManager::SmallChunk,
-        err_msg( "Incorrect mix of sizes in chunk list "
-        SIZE_FORMAT " new chunk " SIZE_FORMAT,
-        current_chunk()->word_size(), new_chunk->word_size()));
-      current_chunk()->set_next(new_chunk);
-    }
-    // Make current chunk
+  ChunkIndex index = ChunkManager::list_index(new_chunk->word_size());
+
+  if (index != HumongousIndex) {
     set_current_chunk(new_chunk);
-    break;
-  case SpaceManager::MediumChunk :
-    if (chunks_in_use(MediumIndex) == NULL) {
-      // About to add the first medium chunk so teminate the
-      // small chunk list.  In general once medium chunks are
-      // being added, we're past the need for small chunks.
-      if (current_chunk() != NULL) {
-        // Only a small chunk or the initial chunk could be
-        // the current chunk if this is the first medium chunk.
-        assert(current_chunk()->word_size() == SpaceManager::SmallChunk ||
-          chunks_in_use(SmallIndex) == NULL,
-          err_msg("Should be a small chunk or initial chunk, current chunk "
-          SIZE_FORMAT " new chunk " SIZE_FORMAT,
-          current_chunk()->word_size(), new_chunk->word_size()));
-        current_chunk()->set_next(NULL);
-      }
-      // First chunk to add to the list
-      set_chunks_in_use(MediumIndex, new_chunk);
-
-    } else {
-      // As a minimum the first medium chunk added would
-      // have become the _current_chunk
-      // so the _current_chunk has to be non-NULL here
-      // (although not necessarily still the first medium chunk).
-      assert(current_chunk()->word_size() == SpaceManager::MediumChunk,
-             "A medium chunk should the current chunk");
-      current_chunk()->set_next(new_chunk);
-    }
-    // Make current chunk
-    set_current_chunk(new_chunk);
-    break;
-  default: {
+    new_chunk->set_next(chunks_in_use(index));
+    set_chunks_in_use(index, new_chunk);
+  } else {
     // For null class loader data and DumpSharedSpaces, the first chunk isn't
     // small, so small will be null.  Link this first chunk as the current
     // chunk.
@@ -2350,8 +2129,7 @@
     new_chunk->set_next(chunks_in_use(HumongousIndex));
     set_chunks_in_use(HumongousIndex, new_chunk);
 
-    assert(new_chunk->word_size() > MediumChunk, "List inconsistency");
-  }
+    assert(new_chunk->word_size() > medium_chunk_size(), "List inconsistency");
   }
 
   assert(new_chunk->is_empty(), "Not ready for reuse");
@@ -2363,53 +2141,59 @@
   }
 }
 
+Metachunk* SpaceManager::get_new_chunk(size_t word_size,
+                                       size_t grow_chunks_by_words) {
+
+  Metachunk* next = vs_list()->get_new_chunk(word_size,
+                                             grow_chunks_by_words,
+                                             medium_chunk_bunch());
+
+  if (TraceMetadataHumongousAllocation &&
+      SpaceManager::is_humongous(next->word_size())) {
+    gclog_or_tty->print_cr("  new humongous chunk word size " PTR_FORMAT,
+                           next->word_size());
+  }
+
+  return next;
+}
+
 MetaWord* SpaceManager::allocate(size_t word_size) {
   MutexLockerEx cl(lock(), Mutex::_no_safepoint_check_flag);
 
-  size_t block_overhead = Metablock::overhead();
   // If only the dictionary is going to be used (i.e., no
   // indexed free list), then there is a minimum size requirement.
   // MinChunkSize is a placeholder for the real minimum size JJJ
-  size_t byte_size_with_overhead = (word_size + block_overhead) * BytesPerWord;
-#ifdef DEALLOCATE_BLOCKS
-  size_t raw_bytes_size = MAX2(ARENA_ALIGN(byte_size_with_overhead),
-                               MinChunkSize * BytesPerWord);
-#else
-  size_t raw_bytes_size = ARENA_ALIGN(byte_size_with_overhead);
-#endif
+  size_t byte_size = word_size * BytesPerWord;
+
+  size_t byte_size_with_overhead = byte_size + Metablock::overhead();
+
+  size_t raw_bytes_size = MAX2(byte_size_with_overhead,
+                               Metablock::min_block_byte_size());
+  raw_bytes_size = ARENA_ALIGN(raw_bytes_size);
   size_t raw_word_size = raw_bytes_size / BytesPerWord;
   assert(raw_word_size * BytesPerWord == raw_bytes_size, "Size problem");
 
   BlockFreelist* fl =  block_freelists();
-  Metablock* block = NULL;
+  MetaWord* p = NULL;
   // Allocation from the dictionary is expensive in the sense that
   // the dictionary has to be searched for a size.  Don't allocate
   // from the dictionary until it starts to get fat.  Is this
   // a reasonable policy?  Maybe an skinny dictionary is fast enough
   // for allocations.  Do some profiling.  JJJ
-  if (fl->totalSize() > allocation_from_dictionary_limit) {
-    block = fl->get_block(raw_word_size);
+  if (fl->total_size() > allocation_from_dictionary_limit) {
+    p = fl->get_block(raw_word_size);
   }
-  if (block == NULL) {
-    block = allocate_work(raw_word_size);
-    if (block == NULL) {
-      return NULL;
-    }
+  if (p == NULL) {
+    p = allocate_work(raw_word_size);
   }
   Metadebug::deallocate_block_a_lot(this, raw_word_size);
 
-  // Push the allocation past the word containing the size and leader.
-#ifdef ASSERT
-  MetaWord* result =  block->data();
-  return result;
-#else
-  return (MetaWord*) block;
-#endif
+  return p;
 }
 
 // Returns the address of spaced allocated for "word_size".
 // This methods does not know about blocks (Metablocks)
-Metablock* SpaceManager::allocate_work(size_t word_size) {
+MetaWord* SpaceManager::allocate_work(size_t word_size) {
   assert_lock_strong(_lock);
 #ifdef ASSERT
   if (Metadebug::test_metadata_failure()) {
@@ -2417,7 +2201,7 @@
   }
 #endif
   // Is there space in the current chunk?
-  Metablock* result = NULL;
+  MetaWord* result = NULL;
 
   // For DumpSharedSpaces, only allocate out of the current chunk which is
   // never null because we gave it the size we wanted.   Caller reports out
@@ -2436,8 +2220,8 @@
   }
   if (result > 0) {
     inc_allocation_total(word_size);
-    assert(result != (Metablock*) chunks_in_use(MediumIndex), "Head of the list is being allocated");
-    assert(result->word_size() == word_size, "Size not set correctly");
+    assert(result != (MetaWord*) chunks_in_use(MediumIndex),
+           "Head of the list is being allocated");
   }
 
   return result;
@@ -2447,25 +2231,29 @@
   // If there are blocks in the dictionary, then
   // verfication of chunks does not work since
   // being in the dictionary alters a chunk.
-  if (block_freelists()->totalSize() == 0) {
-    // Skip the small chunks because their next link points to
-    // medium chunks.  This is because the small chunk is the
-    // current chunk (for allocations) until it is full and the
-    // the addition of the next chunk does not NULL the next
-    // like of the small chunk.
-    for (ChunkIndex i = MediumIndex; i < NumberOfFreeLists; i = next_chunk_index(i)) {
+  if (block_freelists()->total_size() == 0) {
+    for (ChunkIndex i = ZeroIndex; i < NumberOfInUseLists; i = next_chunk_index(i)) {
       Metachunk* curr = chunks_in_use(i);
       while (curr != NULL) {
         curr->verify();
+        verify_chunk_size(curr);
         curr = curr->next();
       }
     }
   }
 }
 
+void SpaceManager::verify_chunk_size(Metachunk* chunk) {
+  assert(is_humongous(chunk->word_size()) ||
+         chunk->word_size() == medium_chunk_size() ||
+         chunk->word_size() == small_chunk_size() ||
+         chunk->word_size() == specialized_chunk_size(),
+         "Chunk size is wrong");
+  return;
+}
+
 #ifdef ASSERT
 void SpaceManager::verify_allocation_total() {
-#if 0
   // Verification is only guaranteed at a safepoint.
   if (SafepointSynchronize::is_at_safepoint()) {
     gclog_or_tty->print_cr("Chunk " PTR_FORMAT " allocation_total " SIZE_FORMAT
@@ -2478,7 +2266,6 @@
   assert(allocation_total() == sum_used_in_chunks_in_use(),
     err_msg("allocation total is not consistent %d vs %d",
             allocation_total(), sum_used_in_chunks_in_use()));
-#endif
 }
 
 #endif
@@ -2491,8 +2278,8 @@
   size_t capacity = 0;
 
   // Add up statistics for all chunks in this SpaceManager.
-  for (ChunkIndex index = SmallIndex;
-       index < NumberOfFreeLists;
+  for (ChunkIndex index = ZeroIndex;
+       index < NumberOfInUseLists;
        index = next_chunk_index(index)) {
     for (Metachunk* curr = chunks_in_use(index);
          curr != NULL;
@@ -2509,7 +2296,7 @@
     }
   }
 
-  size_t free = current_chunk()->free_word_size();
+  size_t free = current_chunk() == NULL ? 0 : current_chunk()->free_word_size();
   // Free space isn't wasted.
   waste -= free;
 
@@ -2518,26 +2305,19 @@
                 " waste " SIZE_FORMAT, curr_total, used, free, capacity, waste);
 }
 
-#ifdef ASSERT
+#ifndef PRODUCT
 void SpaceManager::mangle_freed_chunks() {
-  for (ChunkIndex index = SmallIndex;
-       index < NumberOfFreeLists;
+  for (ChunkIndex index = ZeroIndex;
+       index < NumberOfInUseLists;
        index = next_chunk_index(index)) {
     for (Metachunk* curr = chunks_in_use(index);
          curr != NULL;
          curr = curr->next()) {
-      // Try to detect incorrectly terminated small chunk
-      // list.
-      assert(index == MediumIndex || curr != chunks_in_use(MediumIndex),
-             err_msg("Mangling medium chunks in small chunks? "
-                     "curr " PTR_FORMAT " medium list " PTR_FORMAT,
-                     curr, chunks_in_use(MediumIndex)));
       curr->mangle();
     }
   }
 }
-#endif // ASSERT
-
+#endif // PRODUCT
 
 // MetaspaceAux
 
@@ -2566,9 +2346,6 @@
   return free * BytesPerWord;
 }
 
-// The total words available for metadata allocation.  This
-// uses Metaspace capacity_words() which is the total words
-// in chunks allocated for a Metaspace.
 size_t MetaspaceAux::capacity_in_bytes(Metaspace::MetadataType mdtype) {
   size_t capacity = free_chunks_total(mdtype);
   ClassLoaderDataGraphMetaspaceIterator iter;
@@ -2588,14 +2365,13 @@
   return reserved * BytesPerWord;
 }
 
-size_t MetaspaceAux::min_chunk_size() { return SpaceManager::MediumChunk; }
+size_t MetaspaceAux::min_chunk_size() { return Metaspace::first_chunk_word_size(); }
 
 size_t MetaspaceAux::free_chunks_total(Metaspace::MetadataType mdtype) {
   ChunkManager* chunk = (mdtype == Metaspace::ClassType) ?
                             Metaspace::class_space_list()->chunk_manager() :
                             Metaspace::space_list()->chunk_manager();
-
-  chunk->verify_free_chunks_total();
+  chunk->slow_verify();
   return chunk->free_chunks_total();
 }
 
@@ -2669,26 +2445,44 @@
 // Print total fragmentation for class and data metaspaces separately
 void MetaspaceAux::print_waste(outputStream* out) {
 
-  size_t small_waste = 0, medium_waste = 0, large_waste = 0;
-  size_t cls_small_waste = 0, cls_medium_waste = 0, cls_large_waste = 0;
+  size_t specialized_waste = 0, small_waste = 0, medium_waste = 0, large_waste = 0;
+  size_t specialized_count = 0, small_count = 0, medium_count = 0, large_count = 0;
+  size_t cls_specialized_waste = 0, cls_small_waste = 0, cls_medium_waste = 0, cls_large_waste = 0;
+  size_t cls_specialized_count = 0, cls_small_count = 0, cls_medium_count = 0, cls_large_count = 0;
 
   ClassLoaderDataGraphMetaspaceIterator iter;
   while (iter.repeat()) {
     Metaspace* msp = iter.get_next();
     if (msp != NULL) {
+      specialized_waste += msp->vsm()->sum_waste_in_chunks_in_use(SpecializedIndex);
+      specialized_count += msp->vsm()->sum_count_in_chunks_in_use(SpecializedIndex);
       small_waste += msp->vsm()->sum_waste_in_chunks_in_use(SmallIndex);
+      small_count += msp->vsm()->sum_count_in_chunks_in_use(SmallIndex);
       medium_waste += msp->vsm()->sum_waste_in_chunks_in_use(MediumIndex);
+      medium_count += msp->vsm()->sum_count_in_chunks_in_use(MediumIndex);
       large_waste += msp->vsm()->sum_waste_in_chunks_in_use(HumongousIndex);
-
+      large_count += msp->vsm()->sum_count_in_chunks_in_use(HumongousIndex);
+
+      cls_specialized_waste += msp->class_vsm()->sum_waste_in_chunks_in_use(SpecializedIndex);
+      cls_specialized_count += msp->class_vsm()->sum_count_in_chunks_in_use(SpecializedIndex);
       cls_small_waste += msp->class_vsm()->sum_waste_in_chunks_in_use(SmallIndex);
+      cls_small_count += msp->class_vsm()->sum_count_in_chunks_in_use(SmallIndex);
       cls_medium_waste += msp->class_vsm()->sum_waste_in_chunks_in_use(MediumIndex);
+      cls_medium_count += msp->class_vsm()->sum_count_in_chunks_in_use(MediumIndex);
       cls_large_waste += msp->class_vsm()->sum_waste_in_chunks_in_use(HumongousIndex);
+      cls_large_count += msp->class_vsm()->sum_count_in_chunks_in_use(HumongousIndex);
     }
   }
   out->print_cr("Total fragmentation waste (words) doesn't count free space");
-  out->print("  data: small " SIZE_FORMAT " medium " SIZE_FORMAT,
-             small_waste, medium_waste);
-  out->print_cr(" class: small " SIZE_FORMAT, cls_small_waste);
+  out->print_cr("  data: " SIZE_FORMAT " specialized(s) " SIZE_FORMAT ", "
+                        SIZE_FORMAT " small(s) " SIZE_FORMAT ", "
+                        SIZE_FORMAT " medium(s) " SIZE_FORMAT,
+             specialized_count, specialized_waste, small_count,
+             small_waste, medium_count, medium_waste);
+  out->print_cr(" class: " SIZE_FORMAT " specialized(s) " SIZE_FORMAT ", "
+                           SIZE_FORMAT " small(s) " SIZE_FORMAT,
+             cls_specialized_count, cls_specialized_waste,
+             cls_small_count, cls_small_waste);
 }
 
 // Dump global metaspace things from the end of ClassLoaderDataGraph
@@ -2699,16 +2493,18 @@
   print_waste(out);
 }
 
+void MetaspaceAux::verify_free_chunks() {
+  Metaspace::space_list()->chunk_manager()->verify();
+  Metaspace::class_space_list()->chunk_manager()->verify();
+}
+
 // Metaspace methods
 
 size_t Metaspace::_first_chunk_word_size = 0;
-
-Metaspace::Metaspace(Mutex* lock, size_t word_size) {
-  initialize(lock, word_size);
-}
-
-Metaspace::Metaspace(Mutex* lock) {
-  initialize(lock);
+size_t Metaspace::_first_class_chunk_word_size = 0;
+
+Metaspace::Metaspace(Mutex* lock, MetaspaceType type) {
+  initialize(lock, type);
 }
 
 Metaspace::~Metaspace() {
@@ -2760,11 +2556,18 @@
       }
     }
 
-    // Initialize this before initializing the VirtualSpaceList
+    // Initialize these before initializing the VirtualSpaceList
     _first_chunk_word_size = InitialBootClassLoaderMetaspaceSize / BytesPerWord;
+    _first_chunk_word_size = align_word_size_up(_first_chunk_word_size);
+    // Make the first class chunk bigger than a medium chunk so it's not put
+    // on the medium chunk list.   The next chunk will be small and progress
+    // from there.  This size calculated by -version.
+    _first_class_chunk_word_size = MIN2((size_t)MediumChunk*6,
+                                       (ClassMetaspaceSize/BytesPerWord)*2);
+    _first_class_chunk_word_size = align_word_size_up(_first_class_chunk_word_size);
     // Arbitrarily set the initial virtual space to a multiple
     // of the boot class loader size.
-    size_t word_size = VIRTUALSPACEMULTIPLIER * Metaspace::first_chunk_word_size();
+    size_t word_size = VIRTUALSPACEMULTIPLIER * first_chunk_word_size();
     // Initialize the list of virtual spaces.
     _space_list = new VirtualSpaceList(word_size);
   }
@@ -2779,23 +2582,8 @@
   _class_space_list = new VirtualSpaceList(rs);
 }
 
-
-void Metaspace::initialize(Mutex* lock, size_t initial_size) {
-  // Use SmallChunk size if not specified.   If specified, use this size for
-  // the data metaspace.
-  size_t word_size;
-  size_t class_word_size;
-  if (initial_size == 0) {
-    word_size = (size_t) SpaceManager::SmallChunk;
-    class_word_size = (size_t) SpaceManager::SmallChunk;
-  } else {
-    word_size = initial_size;
-    // Make the first class chunk bigger than a medium chunk so it's not put
-    // on the medium chunk list.   The next chunk will be small and progress
-    // from there.  This size calculated by -version.
-    class_word_size = MIN2((size_t)SpaceManager::MediumChunk*5,
-                           (ClassMetaspaceSize/BytesPerWord)*2);
-  }
+void Metaspace::initialize(Mutex* lock,
+                           MetaspaceType type) {
 
   assert(space_list() != NULL,
     "Metadata VirtualSpaceList has not been initialized");
@@ -2804,6 +2592,11 @@
   if (_vsm == NULL) {
     return;
   }
+  size_t word_size;
+  size_t class_word_size;
+  vsm()->get_initial_chunk_sizes(type,
+                                 &word_size,
+                                 &class_word_size);
 
   assert(class_space_list() != NULL,
     "Class VirtualSpaceList has not been initialized");
@@ -2818,7 +2611,8 @@
 
   // Allocate chunk for metadata objects
   Metachunk* new_chunk =
-     space_list()->current_virtual_space()->get_chunk_vs_with_expand(word_size);
+     space_list()->get_initialization_chunk(word_size,
+                                            vsm()->medium_chunk_bunch());
   assert(!DumpSharedSpaces || new_chunk != NULL, "should have enough space for both chunks");
   if (new_chunk != NULL) {
     // Add to this manager's list of chunks in use and current_chunk().
@@ -2827,19 +2621,24 @@
 
   // Allocate chunk for class metadata objects
   Metachunk* class_chunk =
-     class_space_list()->current_virtual_space()->get_chunk_vs_with_expand(class_word_size);
+     class_space_list()->get_initialization_chunk(class_word_size,
+                                                  class_vsm()->medium_chunk_bunch());
   if (class_chunk != NULL) {
     class_vsm()->add_chunk(class_chunk, true);
   }
 }
 
+size_t Metaspace::align_word_size_up(size_t word_size) {
+  size_t byte_size = word_size * wordSize;
+  return ReservedSpace::allocation_align_size_up(byte_size) / wordSize;
+}
 
 MetaWord* Metaspace::allocate(size_t word_size, MetadataType mdtype) {
   // DumpSharedSpaces doesn't use class metadata area (yet)
   if (mdtype == ClassType && !DumpSharedSpaces) {
-    return class_vsm()->allocate(word_size);
+    return  class_vsm()->allocate(word_size);
   } else {
-    return vsm()->allocate(word_size);
+    return  vsm()->allocate(word_size);
   }
 }
 
@@ -2853,6 +2652,7 @@
     gclog_or_tty->print_cr("Increase capacity to GC from " SIZE_FORMAT
       " to " SIZE_FORMAT, before_inc, MetaspaceGC::capacity_until_GC());
   }
+
   result = allocate(word_size, mdtype);
 
   return result;
@@ -2889,37 +2689,39 @@
 void Metaspace::deallocate(MetaWord* ptr, size_t word_size, bool is_class) {
   if (SafepointSynchronize::is_at_safepoint()) {
     assert(Thread::current()->is_VM_thread(), "should be the VM thread");
-    // Don't take lock
-#ifdef DEALLOCATE_BLOCKS
-    if (is_class) {
-      class_vsm()->deallocate(ptr);
-    } else {
-      vsm()->deallocate(ptr);
+    // Don't take Heap_lock
+    MutexLocker ml(vsm()->lock());
+    if (word_size < TreeChunk<Metablock, FreeList>::min_size()) {
+      // Dark matter.  Too small for dictionary.
+#ifdef ASSERT
+      Copy::fill_to_words((HeapWord*)ptr, word_size, 0xf5f5f5f5);
+#endif
+      return;
     }
-#else
-#ifdef ASSERT
-    Copy::fill_to_words((HeapWord*)ptr, word_size, metadata_deallocate);
-#endif
-#endif
-
+    if (is_class) {
+       class_vsm()->deallocate(ptr, word_size);
+    } else {
+      vsm()->deallocate(ptr, word_size);
+    }
   } else {
     MutexLocker ml(vsm()->lock());
 
-#ifdef DEALLOCATE_BLOCKS
-    if (is_class) {
-      class_vsm()->deallocate(ptr);
-    } else {
-      vsm()->deallocate(ptr);
+    if (word_size < TreeChunk<Metablock, FreeList>::min_size()) {
+      // Dark matter.  Too small for dictionary.
+#ifdef ASSERT
+      Copy::fill_to_words((HeapWord*)ptr, word_size, 0xf5f5f5f5);
+#endif
+      return;
     }
-#else
-#ifdef ASSERT
-    Copy::fill_to_words((HeapWord*)ptr, word_size, metadata_deallocate);
-#endif
-#endif
+    if (is_class) {
+      class_vsm()->deallocate(ptr, word_size);
+    } else {
+      vsm()->deallocate(ptr, word_size);
+    }
   }
 }
 
-MetaWord* Metaspace::allocate(ClassLoaderData* loader_data, size_t word_size,
+Metablock* Metaspace::allocate(ClassLoaderData* loader_data, size_t word_size,
                               bool read_only, MetadataType mdtype, TRAPS) {
   if (HAS_PENDING_EXCEPTION) {
     assert(false, "Should not allocate with exception pending");
@@ -2943,7 +2745,7 @@
     if (result == NULL) {
       report_out_of_shared_space(read_only ? SharedReadOnly : SharedReadWrite);
     }
-    return result;
+    return Metablock::initialize(result, word_size);
   }
 
   result = loader_data->metaspace_non_null()->allocate(word_size, mdtype);
@@ -2951,11 +2753,17 @@
   if (result == NULL) {
     // Try to clean out some memory and retry.
     result =
-    Universe::heap()->collector_policy()->satisfy_failed_metadata_allocation(
+      Universe::heap()->collector_policy()->satisfy_failed_metadata_allocation(
         loader_data, word_size, mdtype);
 
     // If result is still null, we are out of memory.
     if (result == NULL) {
+      if (Verbose && TraceMetadataChunkAllocation) {
+        gclog_or_tty->print_cr("Metaspace allocation failed for size "
+          SIZE_FORMAT, word_size);
+        if (loader_data->metaspace_or_null() != NULL) loader_data->metaspace_or_null()->dump(gclog_or_tty);
+        MetaspaceAux::dump(gclog_or_tty);
+      }
       // -XX:+HeapDumpOnOutOfMemoryError and -XX:OnOutOfMemoryError support
       report_java_out_of_memory("Metadata space");
 
@@ -2967,7 +2775,7 @@
       THROW_OOP_0(Universe::out_of_memory_error_perm_gen());
     }
   }
-  return result;
+  return Metablock::initialize(result, word_size);
 }
 
 void Metaspace::print_on(outputStream* out) const {
@@ -2978,15 +2786,17 @@
   }
 }
 
-#ifndef PRODUCT
-bool Metaspace::contains(const void * ptr) const {
+bool Metaspace::contains(const void * ptr) {
   if (MetaspaceShared::is_in_shared_space(ptr)) {
     return true;
   }
-  MutexLockerEx cl(SpaceManager::expand_lock(), Mutex::_no_safepoint_check_flag);
+  // This is checked while unlocked.  As long as the virtualspaces are added
+  // at the end, the pointer will be in one of them.  The virtual spaces
+  // aren't deleted presently.  When they are, some sort of locking might
+  // be needed.  Note, locking this can cause inversion problems with the
+  // caller in MetaspaceObj::is_metadata() function.
   return space_list()->contains(ptr) || class_space_list()->contains(ptr);
 }
-#endif
 
 void Metaspace::verify() {
   vsm()->verify();
--- a/src/share/vm/memory/metaspace.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/memory/metaspace.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -57,12 +57,10 @@
 //
 
 class ClassLoaderData;
+class Metablock;
 class MetaWord;
 class Mutex;
 class outputStream;
-class FreeChunk;
-template <class Chunk_t> class FreeList;
-template <class Chunk_t> class BinaryTreeDictionary;
 class SpaceManager;
 
 // Metaspaces each have a  SpaceManager and allocations
@@ -89,11 +87,23 @@
 
  public:
   enum MetadataType {ClassType, NonClassType};
+  enum MetaspaceType {
+    StandardMetaspaceType,
+    BootMetaspaceType,
+    ROMetaspaceType,
+    ReadWriteMetaspaceType,
+    AnonymousMetaspaceType,
+    ReflectionMetaspaceType
+  };
 
  private:
-  void initialize(Mutex* lock, size_t initial_size = 0);
+  void initialize(Mutex* lock, MetaspaceType type);
+
+  // Align up the word size to the allocation word size
+  static size_t align_word_size_up(size_t);
 
   static size_t _first_chunk_word_size;
+  static size_t _first_class_chunk_word_size;
 
   SpaceManager* _vsm;
   SpaceManager* vsm() const { return _vsm; }
@@ -112,8 +122,7 @@
 
  public:
 
-  Metaspace(Mutex* lock, size_t initial_size);
-  Metaspace(Mutex* lock);
+  Metaspace(Mutex* lock, MetaspaceType type);
   ~Metaspace();
 
   // Initialize globals for Metaspace
@@ -121,6 +130,7 @@
   static void initialize_class_space(ReservedSpace rs);
 
   static size_t first_chunk_word_size() { return _first_chunk_word_size; }
+  static size_t first_class_chunk_word_size() { return _first_class_chunk_word_size; }
 
   char*  bottom() const;
   size_t used_words(MetadataType mdtype) const;
@@ -128,7 +138,7 @@
   size_t capacity_words(MetadataType mdtype) const;
   size_t waste_words(MetadataType mdtype) const;
 
-  static MetaWord* allocate(ClassLoaderData* loader_data, size_t size,
+  static Metablock* allocate(ClassLoaderData* loader_data, size_t size,
                             bool read_only, MetadataType mdtype, TRAPS);
   void deallocate(MetaWord* ptr, size_t byte_size, bool is_class);
 
@@ -137,11 +147,7 @@
 
   static bool is_initialized() { return _class_space_list != NULL; }
 
-#ifndef PRODUCT
-  bool contains(const void *ptr) const;
-  bool contains_class(const void *ptr) const;
-#endif
-
+  static bool contains(const void *ptr);
   void dump(outputStream* const out) const;
 
   void print_on(outputStream* st) const;
@@ -191,6 +197,7 @@
 
   static void print_waste(outputStream* out);
   static void dump(outputStream* out);
+  static void verify_free_chunks();
 };
 
 // Metaspace are deallocated when their class loader are GC'ed.
--- a/src/share/vm/memory/metaspaceCounters.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/memory/metaspaceCounters.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
--- a/src/share/vm/memory/metaspaceCounters.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/memory/metaspaceCounters.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
--- a/src/share/vm/memory/metaspaceShared.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/memory/metaspaceShared.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -373,17 +373,44 @@
   md_top = wc.get_top();
 
   // Print shared spaces all the time
-  const char* fmt = "%s space: " PTR_FORMAT " out of " PTR_FORMAT " words allocated at " PTR_FORMAT ".";
+  const char* fmt = "%s space: %9d [ %4.1f%% of total] out of %9d bytes [%4.1f%% used] at " PTR_FORMAT;
   Metaspace* ro_space = _loader_data->ro_metaspace();
   Metaspace* rw_space = _loader_data->rw_metaspace();
-  tty->print_cr(fmt, "ro", ro_space->used_words(Metaspace::NonClassType),
-                ro_space->capacity_words(Metaspace::NonClassType),
-                ro_space->bottom());
-  tty->print_cr(fmt, "rw", rw_space->used_words(Metaspace::NonClassType),
-                rw_space->capacity_words(Metaspace::NonClassType),
-                rw_space->bottom());
-  tty->print_cr(fmt, "md", md_top - md_low, md_end-md_low, md_low);
-  tty->print_cr(fmt, "mc", mc_top - mc_low, mc_end-mc_low, mc_low);
+  const size_t BPW = BytesPerWord;
+
+  // Allocated size of each space (may not be all occupied)
+  const size_t ro_alloced = ro_space->capacity_words(Metaspace::NonClassType) * BPW;
+  const size_t rw_alloced = rw_space->capacity_words(Metaspace::NonClassType) * BPW;
+  const size_t md_alloced = md_end-md_low;
+  const size_t mc_alloced = mc_end-mc_low;
+  const size_t total_alloced = ro_alloced + rw_alloced + md_alloced + mc_alloced;
+
+  // Occupied size of each space.
+  const size_t ro_bytes = ro_space->used_words(Metaspace::NonClassType) * BPW;
+  const size_t rw_bytes = rw_space->used_words(Metaspace::NonClassType) * BPW;
+  const size_t md_bytes = size_t(md_top - md_low);
+  const size_t mc_bytes = size_t(mc_top - mc_low);
+
+  // Percent of total size
+  const size_t total_bytes = ro_bytes + rw_bytes + md_bytes + mc_bytes;
+  const double ro_t_perc = ro_bytes / double(total_bytes) * 100.0;
+  const double rw_t_perc = rw_bytes / double(total_bytes) * 100.0;
+  const double md_t_perc = md_bytes / double(total_bytes) * 100.0;
+  const double mc_t_perc = mc_bytes / double(total_bytes) * 100.0;
+
+  // Percent of fullness of each space
+  const double ro_u_perc = ro_bytes / double(ro_alloced) * 100.0;
+  const double rw_u_perc = rw_bytes / double(rw_alloced) * 100.0;
+  const double md_u_perc = md_bytes / double(md_alloced) * 100.0;
+  const double mc_u_perc = mc_bytes / double(mc_alloced) * 100.0;
+  const double total_u_perc = total_bytes / double(total_alloced) * 100.0;
+
+  tty->print_cr(fmt, "ro", ro_bytes, ro_t_perc, ro_alloced, ro_u_perc, ro_space->bottom());
+  tty->print_cr(fmt, "rw", rw_bytes, rw_t_perc, rw_alloced, rw_u_perc, rw_space->bottom());
+  tty->print_cr(fmt, "md", md_bytes, md_t_perc, md_alloced, md_u_perc, md_low);
+  tty->print_cr(fmt, "mc", mc_bytes, mc_t_perc, mc_alloced, mc_u_perc, mc_low);
+  tty->print_cr("total   : %9d [100.0%% of total] out of %9d bytes [%4.1f%% used]",
+                 total_bytes, total_alloced, total_u_perc);
 
   // Update the vtable pointers in all of the Klass objects in the
   // heap. They should point to newly generated vtable.
@@ -431,7 +458,7 @@
 }
 
 static void link_shared_classes(Klass* obj, TRAPS) {
-  Klass* k = Klass::cast(obj);
+  Klass* k = obj;
   if (k->oop_is_instance()) {
     InstanceKlass* ik = (InstanceKlass*) k;
     // Link the class to cause the bytecodes to be rewritten and the
@@ -663,8 +690,8 @@
   if (_ro_base == NULL || _rw_base == NULL) {
     return false;
   } else {
-    return ((p > _ro_base && p < (_ro_base + SharedReadOnlySize)) ||
-            (p > _rw_base && p < (_rw_base + SharedReadWriteSize)));
+    return ((p >= _ro_base && p < (_ro_base + SharedReadOnlySize)) ||
+            (p >= _rw_base && p < (_rw_base + SharedReadWriteSize)));
   }
 }
 
@@ -689,17 +716,15 @@
 bool MetaspaceShared::map_shared_spaces(FileMapInfo* mapinfo) {
   size_t image_alignment = mapinfo->alignment();
 
-  // Map in the shared memory and then map the regions on top of it
+#ifndef _WINDOWS
+  // Map in the shared memory and then map the regions on top of it.
+  // On Windows, don't map the memory here because it will cause the
+  // mappings of the regions to fail.
   ReservedSpace shared_rs = mapinfo->reserve_shared_memory();
   if (!shared_rs.is_reserved()) return false;
+#endif
 
-  // Split reserved memory into pieces (windows needs this)
-  ReservedSpace ro_rs   = shared_rs.first_part(SharedReadOnlySize);
-  ReservedSpace tmp_rs1 = shared_rs.last_part(SharedReadOnlySize);
-  ReservedSpace rw_rs   = tmp_rs1.first_part(SharedReadWriteSize);
-  ReservedSpace tmp_rs2 = tmp_rs1.last_part(SharedReadWriteSize);
-  ReservedSpace md_rs   = tmp_rs2.first_part(SharedMiscDataSize);
-  ReservedSpace mc_rs   = tmp_rs2.last_part(SharedMiscDataSize);
+  assert(!DumpSharedSpaces, "Should not be called with DumpSharedSpaces");
 
   // Map each shared region
   if ((_ro_base = mapinfo->map_region(ro)) != NULL &&
@@ -716,8 +741,10 @@
     if (_rw_base != NULL) mapinfo->unmap_region(rw);
     if (_md_base != NULL) mapinfo->unmap_region(md);
     if (_mc_base != NULL) mapinfo->unmap_region(mc);
+#ifndef _WINDOWS
     // Release the entire mapped region
     shared_rs.release();
+#endif
     // If -Xshare:on is specified, print out the error message and exit VM,
     // otherwise, set UseSharedSpaces to false and continue.
     if (RequireSharedSpaces) {
--- a/src/share/vm/memory/resourceArea.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/memory/resourceArea.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -26,18 +26,7 @@
 #include "memory/allocation.inline.hpp"
 #include "memory/resourceArea.hpp"
 #include "runtime/mutexLocker.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
+#include "runtime/thread.inline.hpp"
 
 //------------------------------ResourceMark-----------------------------------
 debug_only(int ResourceArea::_warned;)      // to suppress multiple warnings
--- a/src/share/vm/memory/resourceArea.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/memory/resourceArea.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -26,18 +26,7 @@
 #define SHARE_VM_MEMORY_RESOURCEAREA_HPP
 
 #include "memory/allocation.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
+#include "runtime/thread.inline.hpp"
 
 // The resource area holds temporary data structures in the VM.
 // The actual allocation areas are thread local. Typical usage:
@@ -127,15 +116,21 @@
   void reset_to_mark() {
     if (UseMallocOnly) free_malloced_objects();
 
-    if( _chunk->next() )        // Delete later chunks
+    if( _chunk->next() ) {       // Delete later chunks
+      // reset arena size before delete chunks. Otherwise, the total
+      // arena size could exceed total chunk size
+      assert(_area->size_in_bytes() > size_in_bytes(), "Sanity check");
+      _area->set_size_in_bytes(size_in_bytes());
       _chunk->next_chop();
+    } else {
+      assert(_area->size_in_bytes() == size_in_bytes(), "Sanity check");
+    }
     _area->_chunk = _chunk;     // Roll back arena to saved chunk
     _area->_hwm = _hwm;
     _area->_max = _max;
 
     // clear out this chunk (to detect allocation bugs)
     if (ZapResourceArea) memset(_hwm, badResourceValue, _max - _hwm);
-    _area->set_size_in_bytes(size_in_bytes());
   }
 
   ~ResourceMark() {
@@ -219,15 +214,21 @@
   void reset_to_mark() {
     if (UseMallocOnly) free_malloced_objects();
 
-    if( _chunk->next() )        // Delete later chunks
+    if( _chunk->next() ) {        // Delete later chunks
+      // reset arena size before delete chunks. Otherwise, the total
+      // arena size could exceed total chunk size
+      assert(_area->size_in_bytes() > size_in_bytes(), "Sanity check");
+      _area->set_size_in_bytes(size_in_bytes());
       _chunk->next_chop();
+    } else {
+      assert(_area->size_in_bytes() == size_in_bytes(), "Sanity check");
+    }
     _area->_chunk = _chunk;     // Roll back arena to saved chunk
     _area->_hwm = _hwm;
     _area->_max = _max;
 
     // clear out this chunk (to detect allocation bugs)
     if (ZapResourceArea) memset(_hwm, badResourceValue, _max - _hwm);
-    _area->set_size_in_bytes(size_in_bytes());
   }
 
   ~DeoptResourceMark() {
--- a/src/share/vm/memory/sharedHeap.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/memory/sharedHeap.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -154,10 +154,12 @@
   if (!_process_strong_tasks->is_task_claimed(SH_PS_JNIHandles_oops_do))
     JNIHandles::oops_do(roots);
   // All threads execute this; the individual threads are task groups.
+  CLDToOopClosure roots_from_clds(roots);
+  CLDToOopClosure* roots_from_clds_p = (is_scavenging ? NULL : &roots_from_clds);
   if (ParallelGCThreads > 0) {
-    Threads::possibly_parallel_oops_do(roots, code_roots);
+    Threads::possibly_parallel_oops_do(roots, roots_from_clds_p ,code_roots);
   } else {
-    Threads::oops_do(roots, code_roots);
+    Threads::oops_do(roots, roots_from_clds_p, code_roots);
   }
   if (!_process_strong_tasks-> is_task_claimed(SH_PS_ObjectSynchronizer_oops_do))
     ObjectSynchronizer::oops_do(roots);
--- a/src/share/vm/memory/space.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/memory/space.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -411,7 +411,6 @@
     assert(q->forwardee() == NULL, "should be forwarded to NULL");
   }
 
-  VALIDATE_MARK_SWEEP_ONLY(MarkSweep::register_live_oop(q, size));
   compact_top += size;
 
   // we need to update the offset table so that the beginnings of objects can be
@@ -470,13 +469,10 @@
     if (oop(q)->is_gc_marked()) {
       // q is alive
 
-      VALIDATE_MARK_SWEEP_ONLY(MarkSweep::track_interior_pointers(oop(q)));
       // point all the oops to the new location
       size_t size = oop(q)->adjust_pointers();
-      VALIDATE_MARK_SWEEP_ONLY(MarkSweep::check_interior_pointers());
 
       debug_only(prev_q = q);
-      VALIDATE_MARK_SWEEP_ONLY(MarkSweep::validate_live_oop(oop(q), size));
 
       q += size;
     } else {
@@ -790,7 +786,9 @@
 
 // Very general, slow implementation.
 HeapWord* ContiguousSpace::block_start_const(const void* p) const {
-  assert(MemRegion(bottom(), end()).contains(p), "p not in space");
+  assert(MemRegion(bottom(), end()).contains(p),
+         err_msg("p (" PTR_FORMAT ") not in space [" PTR_FORMAT ", " PTR_FORMAT ")",
+                  p, bottom(), end()));
   if (p >= top()) {
     return top();
   } else {
@@ -800,19 +798,27 @@
       last = cur;
       cur += oop(cur)->size();
     }
-    assert(oop(last)->is_oop(), "Should be an object start");
+    assert(oop(last)->is_oop(),
+           err_msg(PTR_FORMAT " should be an object start", last));
     return last;
   }
 }
 
 size_t ContiguousSpace::block_size(const HeapWord* p) const {
-  assert(MemRegion(bottom(), end()).contains(p), "p not in space");
+  assert(MemRegion(bottom(), end()).contains(p),
+         err_msg("p (" PTR_FORMAT ") not in space [" PTR_FORMAT ", " PTR_FORMAT ")",
+                  p, bottom(), end()));
   HeapWord* current_top = top();
-  assert(p <= current_top, "p is not a block start");
-  assert(p == current_top || oop(p)->is_oop(), "p is not a block start");
-  if (p < current_top)
+  assert(p <= current_top,
+         err_msg("p > current top - p: " PTR_FORMAT ", current top: " PTR_FORMAT,
+                  p, current_top));
+  assert(p == current_top || oop(p)->is_oop(),
+         err_msg("p (" PTR_FORMAT ") is not a block start - "
+                 "current_top: " PTR_FORMAT ", is_oop: %s",
+                 p, current_top, BOOL_TO_STR(oop(p)->is_oop())));
+  if (p < current_top) {
     return oop(p)->size();
-  else {
+  } else {
     assert(p == current_top, "just checking");
     return pointer_delta(end(), (HeapWord*) p);
   }
--- a/src/share/vm/memory/space.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/memory/space.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -655,16 +655,10 @@
       assert(block_is_obj(q),                                                   \
              "should be at block boundaries, and should be looking at objs");   \
                                                                                 \
-      VALIDATE_MARK_SWEEP_ONLY(MarkSweep::track_interior_pointers(oop(q)));     \
-                                                                                \
       /* point all the oops to the new location */                              \
       size_t size = oop(q)->adjust_pointers();                                  \
       size = adjust_obj_size(size);                                             \
                                                                                 \
-      VALIDATE_MARK_SWEEP_ONLY(MarkSweep::check_interior_pointers());           \
-                                                                                \
-      VALIDATE_MARK_SWEEP_ONLY(MarkSweep::validate_live_oop(oop(q), size));     \
-                                                                                \
       q += size;                                                                \
     }                                                                           \
                                                                                 \
@@ -685,12 +679,9 @@
     Prefetch::write(q, interval);                                               \
     if (oop(q)->is_gc_marked()) {                                               \
       /* q is alive */                                                          \
-      VALIDATE_MARK_SWEEP_ONLY(MarkSweep::track_interior_pointers(oop(q)));     \
       /* point all the oops to the new location */                              \
       size_t size = oop(q)->adjust_pointers();                                  \
       size = adjust_obj_size(size);                                             \
-      VALIDATE_MARK_SWEEP_ONLY(MarkSweep::check_interior_pointers());           \
-      VALIDATE_MARK_SWEEP_ONLY(MarkSweep::validate_live_oop(oop(q), size));     \
       debug_only(prev_q = q);                                                   \
       q += size;                                                                \
     } else {                                                                    \
@@ -725,7 +716,6 @@
       size_t size = obj_size(q);                                                \
       assert(!oop(q)->is_gc_marked(),                                           \
              "should be unmarked (special dense prefix handling)");             \
-      VALIDATE_MARK_SWEEP_ONLY(MarkSweep::live_oop_moved_to(q, size, q));       \
       debug_only(prev_q = q);                                                   \
       q += size;                                                                \
     }                                                                           \
@@ -759,8 +749,6 @@
       Prefetch::write(compaction_top, copy_interval);                           \
                                                                                 \
       /* copy object and reinit its mark */                                     \
-      VALIDATE_MARK_SWEEP_ONLY(MarkSweep::live_oop_moved_to(q, size,            \
-                                                            compaction_top));   \
       assert(q != compaction_top, "everything in this pass should be moving");  \
       Copy::aligned_conjoint_words(q, compaction_top, size);                    \
       oop(compaction_top)->init_mark();                                         \
--- a/src/share/vm/memory/tenuredGeneration.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/memory/tenuredGeneration.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -62,7 +62,7 @@
                                        _virtual_space.reserved_size(),
                                        _the_space, _gen_counters);
 #ifndef SERIALGC
-  if (UseParNewGC && ParallelGCThreads > 0) {
+  if (UseParNewGC) {
     typedef ParGCAllocBufferWithBOT* ParGCAllocBufferWithBOTPtr;
     _alloc_buffers = NEW_C_HEAP_ARRAY(ParGCAllocBufferWithBOTPtr,
                                       ParallelGCThreads, mtGC);
--- a/src/share/vm/memory/threadLocalAllocBuffer.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/memory/threadLocalAllocBuffer.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -28,19 +28,8 @@
 #include "memory/threadLocalAllocBuffer.inline.hpp"
 #include "memory/universe.inline.hpp"
 #include "oops/oop.inline.hpp"
+#include "runtime/thread.inline.hpp"
 #include "utilities/copy.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 
 // Thread-Local Edens support
 
--- a/src/share/vm/memory/universe.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/memory/universe.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -62,6 +62,7 @@
 #include "runtime/javaCalls.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/synchronizer.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/timer.hpp"
 #include "runtime/vm_operations.hpp"
 #include "services/memoryService.hpp"
@@ -69,18 +70,6 @@
 #include "utilities/events.hpp"
 #include "utilities/hashtable.inline.hpp"
 #include "utilities/preserveException.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 #ifndef SERIALGC
 #include "gc_implementation/concurrentMarkSweep/cmsAdaptiveSizePolicy.hpp"
 #include "gc_implementation/concurrentMarkSweep/cmsCollectorPolicy.hpp"
@@ -239,7 +228,7 @@
   if (size < alignment || size % alignment != 0) {
     ResourceMark rm;
     stringStream st;
-    st.print("Size of %s (%ld bytes) must be aligned to %ld bytes", name, size, alignment);
+    st.print("Size of %s (" UINTX_FORMAT " bytes) must be aligned to " UINTX_FORMAT " bytes", name, size, alignment);
     char* error = st.as_string();
     vm_exit_during_initialization(error);
   }
@@ -346,7 +335,7 @@
   // ---
   // New
   // Have already been initialized.
-  Klass::cast(_objectArrayKlassObj)->append_to_sibling_list();
+  _objectArrayKlassObj->append_to_sibling_list();
 
   // Compute is_jdk version flags.
   // Only 1.3 or later has the java.lang.Shutdown class.
@@ -418,6 +407,10 @@
     assert(i == _fullgc_alot_dummy_array->length(), "just checking");
   }
   #endif
+
+  // Initialize dependency array for null class loader
+  ClassLoaderData::the_null_class_loader_data()->init_dependencies(CHECK);
+
 }
 
 // CDS support for patching vtables in metadata in the shared archive.
@@ -425,14 +418,10 @@
 // from MetaspaceObj, because the latter does not have virtual functions.
 // If the metadata type has a vtable, it cannot be shared in the read-only
 // section of the CDS archive, because the vtable pointer is patched.
-static inline void* dereference(void* addr) {
-  return *(void**)addr;
-}
-
 static inline void add_vtable(void** list, int* n, void* o, int count) {
   guarantee((*n) < count, "vtable list too small");
-  void* vtable = dereference(o);
-  assert(dereference(vtable) != NULL, "invalid vtable");
+  void* vtable = dereference_vptr(o);
+  assert(*(void**)(vtable) != NULL, "invalid vtable");
   list[(*n)++] = vtable;
 }
 
@@ -1304,6 +1293,8 @@
   if (!silent) gclog_or_tty->print("cldg ");
   ClassLoaderDataGraph::verify();
 #endif
+  if (!silent) gclog_or_tty->print("metaspace chunks ");
+  MetaspaceAux::verify_free_chunks();
   if (!silent) gclog_or_tty->print("hand ");
   JNIHandles::verify();
   if (!silent) gclog_or_tty->print("C-heap ");
--- a/src/share/vm/oops/annotations.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/oops/annotations.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -61,6 +61,9 @@
   free_contents(loader_data, methods_annotations());
   free_contents(loader_data, methods_parameter_annotations());
   free_contents(loader_data, methods_default_annotations());
+
+  // Recursively deallocate optional Annotations linked through this one
+  MetadataFactory::free_metadata(loader_data, type_annotations());
 }
 
 // Set the annotation at 'idnum' to 'anno'.
--- a/src/share/vm/oops/annotations.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/oops/annotations.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -38,7 +38,8 @@
 typedef Array<u1> AnnotationArray;
 
 // Class to hold the various types of annotations. The only metadata that points
-// to this is InstanceKlass.
+// to this is InstanceKlass, or another Annotations instance if this is a
+// a type_annotation instance.
 
 class Annotations: public MetaspaceObj {
 
@@ -58,6 +59,8 @@
   // such annotations.
   // Index is the idnum, which is initially the same as the methods array index.
   Array<AnnotationArray*>*     _methods_default_annotations;
+  // Type annotations for this class, or null if none.
+  Annotations*                 _type_annotations;
 
   // Constructor where some some values are known to not be null
   Annotations(Array<AnnotationArray*>* fa, Array<AnnotationArray*>* ma,
@@ -66,7 +69,8 @@
                  _fields_annotations(fa),
                  _methods_annotations(ma),
                  _methods_parameter_annotations(mpa),
-                 _methods_default_annotations(mda) {}
+                 _methods_default_annotations(mda),
+                 _type_annotations(NULL) {}
 
  public:
   // Allocate instance of this class
@@ -81,22 +85,26 @@
   static int size()    { return sizeof(Annotations) / wordSize; }
 
   // Constructor to initialize to null
-  Annotations() : _class_annotations(NULL), _fields_annotations(NULL),
+  Annotations() : _class_annotations(NULL),
+                  _fields_annotations(NULL),
                   _methods_annotations(NULL),
                   _methods_parameter_annotations(NULL),
-                  _methods_default_annotations(NULL) {}
+                  _methods_default_annotations(NULL),
+                  _type_annotations(NULL) {}
 
   AnnotationArray* class_annotations() const                       { return _class_annotations; }
   Array<AnnotationArray*>* fields_annotations() const              { return _fields_annotations; }
   Array<AnnotationArray*>* methods_annotations() const             { return _methods_annotations; }
   Array<AnnotationArray*>* methods_parameter_annotations() const   { return _methods_parameter_annotations; }
   Array<AnnotationArray*>* methods_default_annotations() const     { return _methods_default_annotations; }
+  Annotations* type_annotations() const                            { return _type_annotations; }
 
   void set_class_annotations(AnnotationArray* md)                     { _class_annotations = md; }
   void set_fields_annotations(Array<AnnotationArray*>* md)            { _fields_annotations = md; }
   void set_methods_annotations(Array<AnnotationArray*>* md)           { _methods_annotations = md; }
   void set_methods_parameter_annotations(Array<AnnotationArray*>* md) { _methods_parameter_annotations = md; }
   void set_methods_default_annotations(Array<AnnotationArray*>* md)   { _methods_default_annotations = md; }
+  void set_type_annotations(Annotations* annos)                       { _type_annotations = annos; }
 
   // Redefine classes support
   AnnotationArray* get_method_annotations_of(int idnum)
@@ -129,6 +137,7 @@
   inline AnnotationArray* get_method_annotations_from(int idnum, Array<AnnotationArray*>* annos);
   void set_annotations(Array<AnnotationArray*>* md, Array<AnnotationArray*>** md_p)  { *md_p = md; }
 
+  bool is_klass() const { return false; }
  private:
   void set_methods_annotations_of(instanceKlassHandle ik,
                                   int idnum, AnnotationArray* anno,
--- a/src/share/vm/oops/arrayKlass.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/oops/arrayKlass.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -67,7 +67,7 @@
 Method* ArrayKlass::uncached_lookup_method(Symbol* name, Symbol* signature) const {
   // There are no methods in an array klass but the super class (Object) has some
   assert(super(), "super klass must be present");
-  return Klass::cast(super())->uncached_lookup_method(name, signature);
+  return super()->uncached_lookup_method(name, signature);
 }
 
 ArrayKlass::ArrayKlass(Symbol* name) {
--- a/src/share/vm/oops/compiledICHolder.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/oops/compiledICHolder.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -48,8 +48,8 @@
 // Verification
 
 void CompiledICHolder::verify_on(outputStream* st) {
-  guarantee(holder_method()->is_metadata(),   "should be in permspace");
+  guarantee(holder_method()->is_metadata(),   "should be in metaspace");
   guarantee(holder_method()->is_method(), "should be method");
-  guarantee(holder_klass()->is_metadata(),    "should be in permspace");
+  guarantee(holder_klass()->is_metadata(),    "should be in metaspace");
   guarantee(holder_klass()->is_klass(),   "should be klass");
 }
--- a/src/share/vm/oops/constMethod.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/oops/constMethod.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -34,29 +34,38 @@
 const u2 ConstMethod::UNSET_IDNUM = 0xFFFF;
 
 ConstMethod* ConstMethod::allocate(ClassLoaderData* loader_data,
-                                            int byte_code_size,
-                                            int compressed_line_number_size,
-                                            int localvariable_table_length,
-                                            int exception_table_length,
-                                            int checked_exceptions_length,
-                                            TRAPS) {
+                                   int byte_code_size,
+                                   int compressed_line_number_size,
+                                   int localvariable_table_length,
+                                   int exception_table_length,
+                                   int checked_exceptions_length,
+                                   int method_parameters_length,
+                                   u2  generic_signature_index,
+                                   MethodType method_type,
+                                   TRAPS) {
   int size = ConstMethod::size(byte_code_size,
-                                      compressed_line_number_size,
-                                      localvariable_table_length,
-                                      exception_table_length,
-                                      checked_exceptions_length);
+                               compressed_line_number_size,
+                               localvariable_table_length,
+                               exception_table_length,
+                               checked_exceptions_length,
+                               method_parameters_length,
+                               generic_signature_index);
   return new (loader_data, size, true, THREAD) ConstMethod(
-                       byte_code_size, compressed_line_number_size,
-                       localvariable_table_length, exception_table_length,
-                       checked_exceptions_length, size);
+      byte_code_size, compressed_line_number_size, localvariable_table_length,
+      exception_table_length, checked_exceptions_length,
+      method_parameters_length, generic_signature_index,
+      method_type, size);
 }
 
 ConstMethod::ConstMethod(int byte_code_size,
-                                       int compressed_line_number_size,
-                                       int localvariable_table_length,
-                                       int exception_table_length,
-                                       int checked_exceptions_length,
-                                       int size) {
+                         int compressed_line_number_size,
+                         int localvariable_table_length,
+                         int exception_table_length,
+                         int checked_exceptions_length,
+                         int method_parameters_length,
+                         u2  generic_signature_index,
+                         MethodType method_type,
+                         int size) {
 
   No_Safepoint_Verifier no_safepoint;
   set_interpreter_kind(Interpreter::invalid);
@@ -65,10 +74,13 @@
   set_stackmap_data(NULL);
   set_code_size(byte_code_size);
   set_constMethod_size(size);
-  set_inlined_tables_length(checked_exceptions_length,
+  set_inlined_tables_length(generic_signature_index,
+                            checked_exceptions_length,
                             compressed_line_number_size,
                             localvariable_table_length,
-                            exception_table_length);
+                            exception_table_length,
+                            method_parameters_length);
+  set_method_type(method_type);
   assert(this->size() == size, "wrong size for object");
 }
 
@@ -85,10 +97,12 @@
 // How big must this constMethodObject be?
 
 int ConstMethod::size(int code_size,
-                                    int compressed_line_number_size,
-                                    int local_variable_table_length,
-                                    int exception_table_length,
-                                    int checked_exceptions_length) {
+                      int compressed_line_number_size,
+                      int local_variable_table_length,
+                      int exception_table_length,
+                      int checked_exceptions_length,
+                      int method_parameters_length,
+                      u2  generic_signature_index) {
   int extra_bytes = code_size;
   if (compressed_line_number_size > 0) {
     extra_bytes += compressed_line_number_size;
@@ -106,13 +120,19 @@
     extra_bytes += sizeof(u2);
     extra_bytes += exception_table_length * sizeof(ExceptionTableElement);
   }
+  if (generic_signature_index != 0) {
+    extra_bytes += sizeof(u2);
+  }
+  if (method_parameters_length > 0) {
+    extra_bytes += sizeof(u2);
+    extra_bytes += method_parameters_length * sizeof(MethodParametersElement);
+  }
   int extra_words = align_size_up(extra_bytes, BytesPerWord) / BytesPerWord;
   return align_object_size(header_size() + extra_words);
 }
 
 Method* ConstMethod::method() const {
-    return InstanceKlass::cast(_constants->pool_holder())->method_with_idnum(
-                               _method_idnum);
+    return _constants->pool_holder()->method_with_idnum(_method_idnum);
   }
 
 // linenumber table - note that length is unknown until decompression,
@@ -124,10 +144,29 @@
   return code_end();
 }
 
-u2* ConstMethod::checked_exceptions_length_addr() const {
+u2* ConstMethod::generic_signature_index_addr() const {
   // Located at the end of the constMethod.
+  assert(has_generic_signature(), "called only if generic signature exists");
+  return last_u2_element();
+}
+
+u2* ConstMethod::checked_exceptions_length_addr() const {
+  // Located immediately before the generic signature index.
   assert(has_checked_exceptions(), "called only if table is present");
-  return last_u2_element();
+  if(has_method_parameters()) {
+    // If method parameters present, locate immediately before them.
+    return (u2*)method_parameters_start() - 1;
+  } else {
+    // Else, the exception table is at the end of the constMethod.
+    return has_generic_signature() ? (last_u2_element() - 1) :
+                                     last_u2_element();
+  }
+}
+
+u2* ConstMethod::method_parameters_length_addr() const {
+  assert(has_method_parameters(), "called only if table is present");
+  return has_generic_signature() ? (last_u2_element() - 1) :
+                                    last_u2_element();
 }
 
 u2* ConstMethod::exception_table_length_addr() const {
@@ -136,8 +175,14 @@
     // If checked_exception present, locate immediately before them.
     return (u2*) checked_exceptions_start() - 1;
   } else {
-    // Else, the exception table is at the end of the constMethod.
-    return last_u2_element();
+    if(has_method_parameters()) {
+      // If method parameters present, locate immediately before them.
+      return (u2*)method_parameters_start() - 1;
+    } else {
+      // Else, the exception table is at the end of the constMethod.
+    return has_generic_signature() ? (last_u2_element() - 1) :
+                                      last_u2_element();
+  }
   }
 }
 
@@ -151,37 +196,74 @@
       // If checked_exception present, locate immediately before them.
       return (u2*) checked_exceptions_start() - 1;
     } else {
-      // Else, the linenumber table is at the end of the constMethod.
-      return last_u2_element();
+      if(has_method_parameters()) {
+        // If method parameters present, locate immediately before them.
+        return (u2*)method_parameters_start() - 1;
+      } else {
+        // Else, the exception table is at the end of the constMethod.
+      return has_generic_signature() ? (last_u2_element() - 1) :
+                                        last_u2_element();
     }
   }
+  }
 }
 
-
 // Update the flags to indicate the presence of these optional fields.
-void ConstMethod::set_inlined_tables_length(
-                                              int checked_exceptions_len,
-                                              int compressed_line_number_size,
-                                              int localvariable_table_len,
-                                              int exception_table_len) {
-  // Must be done in the order below, otherwise length_addr accessors
-  // will not work. Only set bit in header if length is positive.
+void ConstMethod::set_inlined_tables_length(u2  generic_signature_index,
+                                            int checked_exceptions_len,
+                                            int compressed_line_number_size,
+                                            int localvariable_table_len,
+                                            int exception_table_len,
+                                            int method_parameters_len) {
   assert(_flags == 0, "Error");
-  if (compressed_line_number_size > 0) {
+  if (compressed_line_number_size > 0)
     _flags |= _has_linenumber_table;
-  }
-  if (checked_exceptions_len > 0) {
+  if (generic_signature_index != 0)
+    _flags |= _has_generic_signature;
+  if (method_parameters_len > 0)
+    _flags |= _has_method_parameters;
+  if (checked_exceptions_len > 0)
     _flags |= _has_checked_exceptions;
-    *(checked_exceptions_length_addr()) = checked_exceptions_len;
-  }
-  if (exception_table_len > 0) {
+  if (exception_table_len > 0)
     _flags |= _has_exception_table;
+  if (localvariable_table_len > 0)
+    _flags |= _has_localvariable_table;
+
+  // This code is extremely brittle and should possibly be revised.
+  // The *_length_addr functions walk backwards through the
+  // constMethod data, using each of the length indexes ahead of them,
+  // as well as the flags variable.  Therefore, the indexes must be
+  // initialized in reverse order, or else they will compute the wrong
+  // offsets.  Moving the initialization of _flags into a separate
+  // block solves *half* of the problem, but the following part will
+  // still break if the order is not exactly right.
+  //
+  // Also, the servicability agent needs to be informed anytime
+  // anything is added here.  It might be advisable to have some sort
+  // of indication of this inline.
+  if (generic_signature_index != 0)
+    *(generic_signature_index_addr()) = generic_signature_index;
+  // New data should probably go here.
+  if (method_parameters_len > 0)
+    *(method_parameters_length_addr()) = method_parameters_len;
+  if (checked_exceptions_len > 0)
+    *(checked_exceptions_length_addr()) = checked_exceptions_len;
+  if (exception_table_len > 0)
     *(exception_table_length_addr()) = exception_table_len;
-  }
-  if (localvariable_table_len > 0) {
-    _flags |= _has_localvariable_table;
+  if (localvariable_table_len > 0)
     *(localvariable_table_length_addr()) = localvariable_table_len;
-  }
+}
+
+int ConstMethod::method_parameters_length() const {
+  return has_method_parameters() ? *(method_parameters_length_addr()) : 0;
+}
+
+MethodParametersElement* ConstMethod::method_parameters_start() const {
+  u2* addr = method_parameters_length_addr();
+  u2 length = *addr;
+  assert(length > 0, "should only be called if table is present");
+  addr -= length * sizeof(MethodParametersElement) / sizeof(u2);
+  return (MethodParametersElement*) addr;
 }
 
 
@@ -274,6 +356,10 @@
   }
   guarantee(compressed_table_end <= m_end, "invalid method layout");
   // Verify checked exceptions, exception table and local variable tables
+  if (has_method_parameters()) {
+    u2* addr = method_parameters_length_addr();
+    guarantee(*addr > 0 && (address) addr >= compressed_table_end && (address) addr < m_end, "invalid method layout");
+  }
   if (has_checked_exceptions()) {
     u2* addr = checked_exceptions_length_addr();
     guarantee(*addr > 0 && (address) addr >= compressed_table_end && (address) addr < m_end, "invalid method layout");
@@ -294,6 +380,8 @@
     uncompressed_table_start = (u2*) exception_table_start();
   } else if (has_checked_exceptions()) {
       uncompressed_table_start = (u2*) checked_exceptions_start();
+  } else if (has_method_parameters()) {
+      uncompressed_table_start = (u2*) method_parameters_start();
   } else {
       uncompressed_table_start = (u2*) m_end;
   }
--- a/src/share/vm/oops/constMethod.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/oops/constMethod.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -45,7 +45,8 @@
 // | constMethod_size                                     |
 // | interp_kind  | flags    | code_size                  |
 // | name index              | signature index            |
-// | method_idnum            | generic_signature_index    |
+// | method_idnum            | max_stack                  |
+// | max_locals              | size_of_parameters         |
 // |------------------------------------------------------|
 // |                                                      |
 // | byte codes                                           |
@@ -55,27 +56,39 @@
 // |  (see class CompressedLineNumberReadStream)          |
 // |  (note that length is unknown until decompressed)    |
 // |  (access flags bit tells whether table is present)   |
-// |  (indexed from start of ConstMethod*)              |
+// |  (indexed from start of ConstMethod*)                |
 // |  (elements not necessarily sorted!)                  |
 // |------------------------------------------------------|
 // | localvariable table elements + length (length last)  |
 // |  (length is u2, elements are 6-tuples of u2)         |
 // |  (see class LocalVariableTableElement)               |
 // |  (access flags bit tells whether table is present)   |
-// |  (indexed from end of ConstMethod*)                |
+// |  (indexed from end of ConstMethod*)                  |
 // |------------------------------------------------------|
 // | exception table + length (length last)               |
 // |  (length is u2, elements are 4-tuples of u2)         |
 // |  (see class ExceptionTableElement)                   |
 // |  (access flags bit tells whether table is present)   |
-// |  (indexed from end of ConstMethod*)                |
+// |  (indexed from end of ConstMethod*)                  |
 // |------------------------------------------------------|
 // | checked exceptions elements + length (length last)   |
 // |  (length is u2, elements are u2)                     |
 // |  (see class CheckedExceptionElement)                 |
 // |  (access flags bit tells whether table is present)   |
-// |  (indexed from end of ConstMethod*)                |
+// |  (indexed from end of ConstMethod*)                  |
 // |------------------------------------------------------|
+// | method parameters elements + length (length last)    |
+// |  (length is u2, elements are u2, u4 structures)      |
+// |  (see class MethodParametersElement)                 |
+// |  (access flags bit tells whether table is present)   |
+// |  (indexed from end of ConstMethod*)                  |
+// |------------------------------------------------------|
+// | generic signature index (u2)                         |
+// |  (indexed from start of constMethodOop)              |
+// |------------------------------------------------------|
+//
+// IMPORTANT: If anything gets added here, there need to be changes to
+// ensure that ServicabilityAgent doesn't get broken as a result!
 
 
 // Utitily class decribing elements in checked exceptions table inlined in Method*.
@@ -105,15 +118,34 @@
   u2 catch_type_index;
 };
 
+// Utility class describing elements in method parameters
+class MethodParametersElement VALUE_OBJ_CLASS_SPEC {
+ public:
+  u2 name_cp_index;
+  // This has to happen, otherwise it will cause SIGBUS from a
+  // misaligned u4 on some architectures (ie SPARC)
+  // because MethodParametersElements are only aligned mod 2
+  // within the ConstMethod container  u2 flags_hi;
+  u2 flags_hi;
+  u2 flags_lo;
+};
+
 
 class ConstMethod : public MetaspaceObj {
   friend class VMStructs;
+
+public:
+  typedef enum { NORMAL, OVERPASS } MethodType;
+
 private:
   enum {
     _has_linenumber_table = 1,
     _has_checked_exceptions = 2,
     _has_localvariable_table = 4,
-    _has_exception_table = 8
+    _has_exception_table = 8,
+    _has_generic_signature = 16,
+    _has_method_parameters = 32,
+    _is_overpass = 64
   };
 
   // Bit vector of signature
@@ -140,32 +172,45 @@
   u2                _method_idnum;               // unique identification number for the method within the class
                                                  // initially corresponds to the index into the methods array.
                                                  // but this may change with redefinition
-  u2                _generic_signature_index;    // Generic signature (index in constant pool, 0 if absent)
-
+  u2                _max_stack;                  // Maximum number of entries on the expression stack
+  u2                _max_locals;                 // Number of local variables used by this method
+  u2                _size_of_parameters;         // size of the parameter block (receiver + arguments) in words
 
   // Constructor
   ConstMethod(int byte_code_size,
-                     int compressed_line_number_size,
-                     int localvariable_table_length,
-                     int exception_table_length,
-                     int checked_exceptions_length,
-                     int size);
+              int compressed_line_number_size,
+              int localvariable_table_length,
+              int exception_table_length,
+              int checked_exceptions_length,
+              int method_parameters_length,
+              u2  generic_signature_index,
+              MethodType is_overpass,
+              int size);
 public:
+
   static ConstMethod* allocate(ClassLoaderData* loader_data,
-                                 int byte_code_size,
-                                 int compressed_line_number_size,
-                                 int localvariable_table_length,
-                                 int exception_table_length,
-                                 int checked_exceptions_length,
-                                 TRAPS);
+                               int byte_code_size,
+                               int compressed_line_number_size,
+                               int localvariable_table_length,
+                               int exception_table_length,
+                               int checked_exceptions_length,
+                               int method_parameters_length,
+                               u2  generic_signature_index,
+                               MethodType mt,
+                               TRAPS);
 
   bool is_constMethod() const { return true; }
 
   // Inlined tables
-  void set_inlined_tables_length(int checked_exceptions_len,
+  void set_inlined_tables_length(u2  generic_signature_index,
+                                 int checked_exceptions_len,
                                  int compressed_line_number_size,
                                  int localvariable_table_len,
-                                 int exception_table_len);
+                                 int exception_table_len,
+                                 int method_parameters_length);
+
+  bool has_generic_signature() const
+    { return (_flags & _has_generic_signature) != 0; }
 
   bool has_linenumber_table() const
     { return (_flags & _has_linenumber_table) != 0; }
@@ -179,6 +224,22 @@
   bool has_exception_handler() const
     { return (_flags & _has_exception_table) != 0; }
 
+  bool has_method_parameters() const
+    { return (_flags & _has_method_parameters) != 0; }
+
+  MethodType method_type() const {
+    return ((_flags & _is_overpass) == 0) ? NORMAL : OVERPASS;
+  }
+
+  void set_method_type(MethodType mt) {
+    if (mt == NORMAL) {
+      _flags &= ~(_is_overpass);
+    } else {
+      _flags |= _is_overpass;
+    }
+  }
+
+
   void set_interpreter_kind(int kind)      { _interpreter_kind = kind; }
   int  interpreter_kind(void) const        { return _interpreter_kind; }
 
@@ -231,8 +292,18 @@
   void set_signature_index(int index)            { _signature_index = index; }
 
   // generics support
-  int generic_signature_index() const            { return _generic_signature_index; }
-  void set_generic_signature_index(int index)    { _generic_signature_index = index; }
+  int generic_signature_index() const            {
+    if (has_generic_signature()) {
+      return *generic_signature_index_addr();
+    } else {
+      return 0;
+    }
+  }
+  void set_generic_signature_index(u2 index)    {
+    assert(has_generic_signature(), "");
+    u2* addr = generic_signature_index_addr();
+    *addr = index;
+  }
 
   // Sizing
   static int header_size() {
@@ -241,9 +312,11 @@
 
   // Size needed
   static int size(int code_size, int compressed_line_number_size,
-                         int local_variable_table_length,
-                         int exception_table_length,
-                         int checked_exceptions_length);
+                  int local_variable_table_length,
+                  int exception_table_length,
+                  int checked_exceptions_length,
+                  int method_parameters_length,
+                  u2  generic_signature_index);
 
   int size() const                    { return _constMethod_size;}
   void set_constMethod_size(int size)     { _constMethod_size = size; }
@@ -260,9 +333,11 @@
   // linenumber table - note that length is unknown until decompression,
   // see class CompressedLineNumberReadStream.
   u_char* compressed_linenumber_table() const;         // not preserved by gc
+  u2* generic_signature_index_addr() const;
   u2* checked_exceptions_length_addr() const;
   u2* localvariable_table_length_addr() const;
   u2* exception_table_length_addr() const;
+  u2* method_parameters_length_addr() const;
 
   // checked exceptions
   int checked_exceptions_length() const;
@@ -276,6 +351,10 @@
   int exception_table_length() const;
   ExceptionTableElement* exception_table_start() const;
 
+  // method parameters table
+  int method_parameters_length() const;
+  MethodParametersElement* method_parameters_start() const;
+
   // byte codes
   void    set_code(address code) {
     if (code_size() > 0) {
@@ -293,12 +372,32 @@
   static ByteSize constants_offset()
                             { return byte_offset_of(ConstMethod, _constants); }
 
+  static ByteSize max_stack_offset()
+                            { return byte_offset_of(ConstMethod, _max_stack); }
+  static ByteSize size_of_locals_offset()
+                            { return byte_offset_of(ConstMethod, _max_locals); }
+  static ByteSize size_of_parameters_offset()
+                            { return byte_offset_of(ConstMethod, _size_of_parameters); }
+
+
   // Unique id for the method
   static const u2 MAX_IDNUM;
   static const u2 UNSET_IDNUM;
   u2 method_idnum() const                        { return _method_idnum; }
   void set_method_idnum(u2 idnum)                { _method_idnum = idnum; }
 
+  // max stack
+  int  max_stack() const                         { return _max_stack; }
+  void set_max_stack(int size)                   { _max_stack = size; }
+
+  // max locals
+  int  max_locals() const                        { return _max_locals; }
+  void set_max_locals(int size)                  { _max_locals = size; }
+
+  // size of parameters
+  int  size_of_parameters() const                { return _size_of_parameters; }
+  void set_size_of_parameters(int size)          { _size_of_parameters = size; }
+
   // Deallocation for RedefineClasses
   void deallocate_contents(ClassLoaderData* loader_data);
   bool is_klass() const { return false; }
--- a/src/share/vm/oops/constantPool.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/oops/constantPool.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -66,7 +66,7 @@
   set_pool_holder(NULL);
   set_flags(0);
   // only set to non-zero if constant pool is merged by RedefineClasses
-  set_orig_length(0);
+  set_version(0);
   set_lock(new Monitor(Monitor::nonleaf + 2, "A constant pool lock"));
   // all fields are initialized; needed for GC
   set_on_stack(false);
@@ -228,7 +228,7 @@
       } else {
         do_resolve = true;
         name   = this_oop->unresolved_klass_at(which);
-        loader = Handle(THREAD, InstanceKlass::cast(this_oop->pool_holder())->class_loader());
+        loader = Handle(THREAD, this_oop->pool_holder()->class_loader());
       }
     }
   } // unlocking constantPool
@@ -247,7 +247,7 @@
 
   if (do_resolve) {
     // this_oop must be unlocked during resolve_or_fail
-    oop protection_domain = Klass::cast(this_oop->pool_holder())->protection_domain();
+    oop protection_domain = this_oop->pool_holder()->protection_domain();
     Handle h_prot (THREAD, protection_domain);
     Klass* k_oop = SystemDictionary::resolve_or_fail(name, loader, h_prot, true, THREAD);
     KlassHandle k;
@@ -315,7 +315,7 @@
         vframeStream vfst(JavaThread::current());
         if (!vfst.at_end()) {
           line_number = vfst.method()->line_number_from_bci(vfst.bci());
-          Symbol* s = InstanceKlass::cast(vfst.method()->method_holder())->source_file_name();
+          Symbol* s = vfst.method()->method_holder()->source_file_name();
           if (s != NULL) {
             source_file = s->as_C_string();
           }
@@ -325,11 +325,11 @@
         // only print something if the classes are different
         if (source_file != NULL) {
           tty->print("RESOLVE %s %s %s:%d\n",
-                     InstanceKlass::cast(this_oop->pool_holder())->external_name(),
+                     this_oop->pool_holder()->external_name(),
                      InstanceKlass::cast(k())->external_name(), source_file, line_number);
         } else {
           tty->print("RESOLVE %s %s\n",
-                     InstanceKlass::cast(this_oop->pool_holder())->external_name(),
+                     this_oop->pool_holder()->external_name(),
                      InstanceKlass::cast(k())->external_name());
         }
       }
@@ -339,10 +339,8 @@
       // Only updated constant pool - if it is resolved.
       do_resolve = this_oop->tag_at(which).is_unresolved_klass();
       if (do_resolve) {
-        ClassLoaderData* this_key = InstanceKlass::cast(this_oop->pool_holder())->class_loader_data();
-        if (!this_key->is_the_null_class_loader_data()) {
-          this_key->record_dependency(k(), CHECK_NULL); // Can throw OOM
-        }
+        ClassLoaderData* this_key = this_oop->pool_holder()->class_loader_data();
+        this_key->record_dependency(k(), CHECK_NULL); // Can throw OOM
         this_oop->klass_at_put(which, k());
       }
     }
@@ -367,8 +365,8 @@
     assert(entry.is_unresolved(), "must be either symbol or klass");
     Thread *thread = Thread::current();
     Symbol* name = entry.get_symbol();
-    oop loader = InstanceKlass::cast(this_oop->pool_holder())->class_loader();
-    oop protection_domain = Klass::cast(this_oop->pool_holder())->protection_domain();
+    oop loader = this_oop->pool_holder()->class_loader();
+    oop protection_domain = this_oop->pool_holder()->protection_domain();
     Handle h_prot (thread, protection_domain);
     Handle h_loader (thread, loader);
     Klass* k = SystemDictionary::find(name, h_loader, h_prot, thread);
@@ -409,8 +407,8 @@
   } else {
     assert(entry.is_unresolved(), "must be either symbol or klass");
     Symbol*  name  = entry.get_symbol();
-    oop loader = InstanceKlass::cast(this_oop->pool_holder())->class_loader();
-    oop protection_domain = Klass::cast(this_oop->pool_holder())->protection_domain();
+    oop loader = this_oop->pool_holder()->class_loader();
+    oop protection_domain = this_oop->pool_holder()->protection_domain();
     Handle h_loader(THREAD, loader);
     Handle h_prot  (THREAD, protection_domain);
     KlassHandle k(THREAD, SystemDictionary::find(name, h_loader, h_prot, THREAD));
@@ -1143,16 +1141,21 @@
   int from_oplen = operand_array_length(from_cp->operands());
   int old_oplen  = operand_array_length(to_cp->operands());
   if (from_oplen != 0) {
+    ClassLoaderData* loader_data = to_cp->pool_holder()->class_loader_data();
     // append my operands to the target's operands array
     if (old_oplen == 0) {
-      to_cp->set_operands(from_cp->operands());  // reuse; do not merge
+      // Can't just reuse from_cp's operand list because of deallocation issues
+      int len = from_cp->operands()->length();
+      Array<u2>* new_ops = MetadataFactory::new_array<u2>(loader_data, len, CHECK);
+      Copy::conjoint_memory_atomic(
+          from_cp->operands()->adr_at(0), new_ops->adr_at(0), len * sizeof(u2));
+      to_cp->set_operands(new_ops);
     } else {
       int old_len  = to_cp->operands()->length();
       int from_len = from_cp->operands()->length();
       int old_off  = old_oplen * sizeof(u2);
       int from_off = from_oplen * sizeof(u2);
       // Use the metaspace for the destination constant pool
-      ClassLoaderData* loader_data = to_cp->pool_holder()->class_loader_data();
       Array<u2>* new_operands = MetadataFactory::new_array<u2>(loader_data, old_len + from_len, CHECK);
       int fillp = 0, len = 0;
       // first part of dest
@@ -1785,7 +1788,7 @@
     assert(cp_patches->at(index).is_null(),
            err_msg("Unused constant pool patch at %d in class file %s",
                    index,
-                   InstanceKlass::cast(pool_holder())->external_name()));
+                   pool_holder()->external_name()));
   }
 #endif // ASSERT
 }
@@ -1943,7 +1946,7 @@
   st->print(" for ");
   pool_holder()->print_value_on(st);
   if (pool_holder() != NULL) {
-    bool extra = (InstanceKlass::cast(pool_holder())->constants() != this);
+    bool extra = (pool_holder()->constants() != this);
     if (extra)  st->print(" (extra)");
   }
   if (cache() != NULL) {
--- a/src/share/vm/oops/constantPool.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/oops/constantPool.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -86,8 +86,8 @@
   friend class Universe;             // For null constructor
  private:
   Array<u1>*           _tags;        // the tag array describing the constant pool's contents
-  ConstantPoolCache* _cache;       // the cache holding interpreter runtime information
-  Klass*               _pool_holder; // the corresponding class
+  ConstantPoolCache*   _cache;       // the cache holding interpreter runtime information
+  InstanceKlass*       _pool_holder; // the corresponding class
   Array<u2>*           _operands;    // for variable-sized (InvokeDynamic) nodes, usually empty
 
   // Array of resolved objects from the constant pool and map from resolved
@@ -103,8 +103,8 @@
   union {
     // set for CDS to restore resolved references
     int                _resolved_reference_length;
-  // only set to non-zero if constant pool is merged by RedefineClasses
-  int                  _orig_length;
+    // keeps version number for redefined classes (used in backtrace)
+    int                _version;
   } _saved;
 
   Monitor*             _lock;
@@ -193,9 +193,9 @@
   void set_on_stack(const bool value);
 
   // Klass holding pool
-  Klass* pool_holder() const              { return _pool_holder; }
-  void set_pool_holder(Klass* k)          { _pool_holder = k; }
-  Klass** pool_holder_addr()              { return &_pool_holder; }
+  InstanceKlass* pool_holder() const      { return _pool_holder; }
+  void set_pool_holder(InstanceKlass* k)  { _pool_holder = k; }
+  InstanceKlass** pool_holder_addr()      { return &_pool_holder; }
 
   // Interpreter runtime support
   ConstantPoolCache* cache() const        { return _cache; }
@@ -784,8 +784,11 @@
   static void copy_cp_to_impl(constantPoolHandle from_cp, int start_i, int end_i, constantPoolHandle to_cp, int to_i, TRAPS);
   static void copy_entry_to(constantPoolHandle from_cp, int from_i, constantPoolHandle to_cp, int to_i, TRAPS);
   int  find_matching_entry(int pattern_i, constantPoolHandle search_cp, TRAPS);
-  int  orig_length() const                { return _saved._orig_length; }
-  void set_orig_length(int orig_length)   { _saved._orig_length = orig_length; }
+  int  version() const                    { return _saved._version; }
+  void set_version(int version)           { _saved._version = version; }
+  void increment_and_save_version(int version) {
+    _saved._version = version >= 0 ? (version + 1) : version;  // keep overflow
+  }
 
   void set_resolved_reference_length(int length) { _saved._resolved_reference_length = length; }
   int  resolved_reference_length() const  { return _saved._resolved_reference_length; }
--- a/src/share/vm/oops/cpCache.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/oops/cpCache.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -231,8 +231,8 @@
 
 
 void ConstantPoolCacheEntry::set_interface_call(methodHandle method, int index) {
-  Klass* interf = method->method_holder();
-  assert(InstanceKlass::cast(interf)->is_interface(), "must be an interface");
+  InstanceKlass* interf = method->method_holder();
+  assert(interf->is_interface(), "must be an interface");
   assert(!method->is_final_method(), "interfaces do not have final methods; cannot link to one here");
   set_f1(interf);
   set_f2(index);
@@ -243,25 +243,17 @@
 }
 
 
-void ConstantPoolCacheEntry::set_method_handle(constantPoolHandle cpool,
-                                               methodHandle adapter,
-                                               Handle appendix, Handle method_type,
-                                               objArrayHandle resolved_references) {
-  set_method_handle_common(cpool, Bytecodes::_invokehandle, adapter, appendix, method_type, resolved_references);
+void ConstantPoolCacheEntry::set_method_handle(constantPoolHandle cpool, const CallInfo &call_info) {
+  set_method_handle_common(cpool, Bytecodes::_invokehandle, call_info);
 }
 
-void ConstantPoolCacheEntry::set_dynamic_call(constantPoolHandle cpool,
-                                              methodHandle adapter,
-                                              Handle appendix, Handle method_type,
-                                              objArrayHandle resolved_references) {
-  set_method_handle_common(cpool, Bytecodes::_invokedynamic, adapter, appendix, method_type, resolved_references);
+void ConstantPoolCacheEntry::set_dynamic_call(constantPoolHandle cpool, const CallInfo &call_info) {
+  set_method_handle_common(cpool, Bytecodes::_invokedynamic, call_info);
 }
 
 void ConstantPoolCacheEntry::set_method_handle_common(constantPoolHandle cpool,
                                                       Bytecodes::Code invoke_code,
-                                                      methodHandle adapter,
-                                                      Handle appendix, Handle method_type,
-                                                      objArrayHandle resolved_references) {
+                                                      const CallInfo &call_info) {
   // NOTE: This CPCE can be the subject of data races.
   // There are three words to update: flags, refs[f2], f1 (in that order).
   // Writers must store all other values before f1.
@@ -276,6 +268,9 @@
     return;
   }
 
+  const methodHandle adapter = call_info.resolved_method();
+  const Handle appendix      = call_info.resolved_appendix();
+  const Handle method_type   = call_info.resolved_method_type();
   const bool has_appendix    = appendix.not_null();
   const bool has_method_type = method_type.not_null();
 
@@ -315,6 +310,7 @@
   // This allows us to create fewer method oops, while keeping type safety.
   //
 
+  objArrayHandle resolved_references = cpool->resolved_references();
   // Store appendix, if any.
   if (has_appendix) {
     const int appendix_index = f2_as_index() + _indy_resolved_references_appendix_offset;
@@ -375,7 +371,7 @@
         int holder_index = cpool->uncached_klass_ref_index_at(constant_pool_index());
         if (cpool->tag_at(holder_index).is_klass()) {
           Klass* klass = cpool->resolved_klass_at(holder_index);
-          if (!Klass::cast(klass)->oop_is_instance())
+          if (!klass->oop_is_instance())
             klass = SystemDictionary::Object_klass();
           return InstanceKlass::cast(klass)->method_at_vtable(f2_as_index());
         }
@@ -421,7 +417,7 @@
         if (!(*trace_name_printed)) {
           // RC_TRACE_MESG macro has an embedded ResourceMark
           RC_TRACE_MESG(("adjust: name=%s",
-            Klass::cast(old_method->method_holder())->external_name()));
+            old_method->method_holder()->external_name()));
           *trace_name_printed = true;
         }
         // RC_TRACE macro has an embedded ResourceMark
@@ -449,7 +445,7 @@
       if (!(*trace_name_printed)) {
         // RC_TRACE_MESG macro has an embedded ResourceMark
         RC_TRACE_MESG(("adjust: name=%s",
-          Klass::cast(old_method->method_holder())->external_name()));
+          old_method->method_holder()->external_name()));
         *trace_name_printed = true;
       }
       // RC_TRACE macro has an embedded ResourceMark
--- a/src/share/vm/oops/cpCache.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/oops/cpCache.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -117,6 +117,8 @@
 // The fields are volatile so that they are stored in the order written in the
 // source code.  The _indices field with the bytecode must be written last.
 
+class CallInfo;
+
 class ConstantPoolCacheEntry VALUE_OBJ_CLASS_SPEC {
   friend class VMStructs;
   friend class constantPoolCacheKlass;
@@ -223,18 +225,12 @@
 
   void set_method_handle(
     constantPoolHandle cpool,                    // holding constant pool (required for locking)
-    methodHandle method,                         // adapter for invokeExact, etc.
-    Handle appendix,                             // stored in refs[f2+0]; could be a java.lang.invoke.MethodType
-    Handle method_type,                          // stored in refs[f2+1]; is a java.lang.invoke.MethodType
-    objArrayHandle resolved_references
+    const CallInfo &call_info                    // Call link information
   );
 
   void set_dynamic_call(
     constantPoolHandle cpool,                    // holding constant pool (required for locking)
-    methodHandle method,                         // adapter for this call site
-    Handle appendix,                             // stored in refs[f2+0]; could be a java.lang.invoke.CallSite
-    Handle method_type,                          // stored in refs[f2+1]; is a java.lang.invoke.MethodType
-    objArrayHandle resolved_references
+    const CallInfo &call_info                    // Call link information
   );
 
   // Common code for invokedynamic and MH invocations.
@@ -255,10 +251,7 @@
   void set_method_handle_common(
     constantPoolHandle cpool,                    // holding constant pool (required for locking)
     Bytecodes::Code invoke_code,                 // _invokehandle or _invokedynamic
-    methodHandle adapter,                        // invoker method (f1)
-    Handle appendix,                             // appendix such as CallSite, MethodType, etc. (refs[f2+0])
-    Handle method_type,                          // MethodType (refs[f2+1])
-    objArrayHandle resolved_references
+    const CallInfo &call_info                    // Call link information
   );
 
   // invokedynamic and invokehandle call sites have two entries in the
--- a/src/share/vm/oops/fieldInfo.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/oops/fieldInfo.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -43,14 +43,29 @@
  public:
   // fields
   // Field info extracted from the class file and stored
-  // as an array of 7 shorts
+  // as an array of 6 shorts.
+
+#define FIELDINFO_TAG_SIZE             2
+#define FIELDINFO_TAG_BLANK            0
+#define FIELDINFO_TAG_OFFSET           1
+#define FIELDINFO_TAG_TYPE_PLAIN       2
+#define FIELDINFO_TAG_TYPE_CONTENDED   3
+#define FIELDINFO_TAG_MASK             3
+
+  // Packed field has the tag, and can be either of:
+  //    hi bits <--------------------------- lo bits
+  //   |---------high---------|---------low---------|
+  //    ..........................................00  - blank
+  //    [------------------offset----------------]01  - real field offset
+  //    ......................[-------type-------]10  - plain field with type
+  //    [--contention_group--][-------type-------]11  - contended field with type and contention group
   enum FieldOffset {
     access_flags_offset      = 0,
     name_index_offset        = 1,
     signature_index_offset   = 2,
     initval_index_offset     = 3,
-    low_offset               = 4,
-    high_offset              = 5,
+    low_packed_offset        = 4,
+    high_packed_offset       = 5,
     field_slots              = 6
   };
 
@@ -76,17 +91,90 @@
   void initialize(u2 access_flags,
                   u2 name_index,
                   u2 signature_index,
-                  u2 initval_index,
-                  u4 offset) {
+                  u2 initval_index) {
     _shorts[access_flags_offset] = access_flags;
     _shorts[name_index_offset] = name_index;
     _shorts[signature_index_offset] = signature_index;
     _shorts[initval_index_offset] = initval_index;
-    set_offset(offset);
+    _shorts[low_packed_offset] = 0;
+    _shorts[high_packed_offset] = 0;
   }
 
   u2 access_flags() const                        { return _shorts[access_flags_offset];            }
-  u4 offset() const                              { return build_int_from_shorts(_shorts[low_offset], _shorts[high_offset]); }
+  u4 offset() const {
+    u2 lo = _shorts[low_packed_offset];
+    switch(lo & FIELDINFO_TAG_MASK) {
+      case FIELDINFO_TAG_OFFSET:
+        return build_int_from_shorts(_shorts[low_packed_offset], _shorts[high_packed_offset]) >> FIELDINFO_TAG_SIZE;
+#ifndef PRODUCT
+      case FIELDINFO_TAG_TYPE_PLAIN:
+        ShouldNotReachHere2("Asking offset for the plain type field");
+      case FIELDINFO_TAG_TYPE_CONTENDED:
+        ShouldNotReachHere2("Asking offset for the contended type field");
+      case FIELDINFO_TAG_BLANK:
+        ShouldNotReachHere2("Asking offset for the blank field");
+#endif
+    }
+    ShouldNotReachHere();
+    return 0;
+  }
+
+  bool is_contended() const {
+    u2 lo = _shorts[low_packed_offset];
+    switch(lo & FIELDINFO_TAG_MASK) {
+      case FIELDINFO_TAG_TYPE_PLAIN:
+        return false;
+      case FIELDINFO_TAG_TYPE_CONTENDED:
+        return true;
+#ifndef PRODUCT
+      case FIELDINFO_TAG_OFFSET:
+        ShouldNotReachHere2("Asking contended flag for the field with offset");
+      case FIELDINFO_TAG_BLANK:
+        ShouldNotReachHere2("Asking contended flag for the blank field");
+#endif
+    }
+    ShouldNotReachHere();
+    return false;
+  }
+
+  u2 contended_group() const {
+    u2 lo = _shorts[low_packed_offset];
+    switch(lo & FIELDINFO_TAG_MASK) {
+      case FIELDINFO_TAG_TYPE_PLAIN:
+        return 0;
+      case FIELDINFO_TAG_TYPE_CONTENDED:
+        return _shorts[high_packed_offset];
+#ifndef PRODUCT
+      case FIELDINFO_TAG_OFFSET:
+        ShouldNotReachHere2("Asking the contended group for the field with offset");
+      case FIELDINFO_TAG_BLANK:
+        ShouldNotReachHere2("Asking the contended group for the blank field");
+#endif
+    }
+    ShouldNotReachHere();
+    return 0;
+ }
+
+  u2 allocation_type() const {
+    u2 lo = _shorts[low_packed_offset];
+    switch(lo & FIELDINFO_TAG_MASK) {
+      case FIELDINFO_TAG_TYPE_PLAIN:
+      case FIELDINFO_TAG_TYPE_CONTENDED:
+        return (lo >> FIELDINFO_TAG_SIZE);
+#ifndef PRODUCT
+      case FIELDINFO_TAG_OFFSET:
+        ShouldNotReachHere2("Asking the field type for field with offset");
+      case FIELDINFO_TAG_BLANK:
+        ShouldNotReachHere2("Asking the field type for the blank field");
+#endif
+    }
+    ShouldNotReachHere();
+    return 0;
+  }
+
+  bool is_offset_set() const {
+    return (_shorts[low_packed_offset] & FIELDINFO_TAG_MASK) == FIELDINFO_TAG_OFFSET;
+  }
 
   Symbol* name(constantPoolHandle cp) const {
     int index = name_index();
@@ -106,8 +194,46 @@
 
   void set_access_flags(u2 val)                  { _shorts[access_flags_offset] = val;             }
   void set_offset(u4 val)                        {
-    _shorts[low_offset] = extract_low_short_from_int(val);
-    _shorts[high_offset] = extract_high_short_from_int(val);
+    val = val << FIELDINFO_TAG_SIZE; // make room for tag
+    _shorts[low_packed_offset] = extract_low_short_from_int(val) | FIELDINFO_TAG_OFFSET;
+    _shorts[high_packed_offset] = extract_high_short_from_int(val);
+  }
+
+  void set_allocation_type(int type) {
+    u2 lo = _shorts[low_packed_offset];
+    switch(lo & FIELDINFO_TAG_MASK) {
+      case FIELDINFO_TAG_BLANK:
+        _shorts[low_packed_offset] = ((type << FIELDINFO_TAG_SIZE)) & 0xFFFF;
+        _shorts[low_packed_offset] &= ~FIELDINFO_TAG_MASK;
+        _shorts[low_packed_offset] |= FIELDINFO_TAG_TYPE_PLAIN;
+        return;
+#ifndef PRODUCT
+      case FIELDINFO_TAG_TYPE_PLAIN:
+      case FIELDINFO_TAG_TYPE_CONTENDED:
+      case FIELDINFO_TAG_OFFSET:
+        ShouldNotReachHere2("Setting the field type with overwriting");
+#endif
+    }
+    ShouldNotReachHere();
+  }
+
+  void set_contended_group(u2 val) {
+    u2 lo = _shorts[low_packed_offset];
+    switch(lo & FIELDINFO_TAG_MASK) {
+      case FIELDINFO_TAG_TYPE_PLAIN:
+        _shorts[low_packed_offset] |= FIELDINFO_TAG_TYPE_CONTENDED;
+        _shorts[high_packed_offset] = val;
+        return;
+#ifndef PRODUCT
+      case FIELDINFO_TAG_TYPE_CONTENDED:
+        ShouldNotReachHere2("Overwriting contended group");
+      case FIELDINFO_TAG_BLANK:
+        ShouldNotReachHere2("Setting contended group for the blank field");
+      case FIELDINFO_TAG_OFFSET:
+        ShouldNotReachHere2("Setting contended group for field with offset");
+#endif
+    }
+    ShouldNotReachHere();
   }
 
   bool is_internal() const {
--- a/src/share/vm/oops/fieldStreams.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/oops/fieldStreams.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -160,9 +160,26 @@
     return field()->offset();
   }
 
+  int allocation_type() const {
+    return field()->allocation_type();
+  }
+
   void set_offset(int offset) {
     field()->set_offset(offset);
   }
+
+  bool is_offset_set() const {
+    return field()->is_offset_set();
+  }
+
+  bool is_contended() const {
+    return field()->is_contended();
+  }
+
+  int contended_group() const {
+    return field()->contended_group();
+  }
+
 };
 
 // Iterate over only the internal fields
--- a/src/share/vm/oops/instanceKlass.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/oops/instanceKlass.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -47,24 +47,14 @@
 #include "oops/symbol.hpp"
 #include "prims/jvmtiExport.hpp"
 #include "prims/jvmtiRedefineClassesTrace.hpp"
+#include "prims/methodComparator.hpp"
 #include "runtime/fieldDescriptor.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/javaCalls.hpp"
 #include "runtime/mutexLocker.hpp"
+#include "runtime/thread.inline.hpp"
 #include "services/threadService.hpp"
 #include "utilities/dtrace.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 #ifndef SERIALGC
 #include "gc_implementation/concurrentMarkSweep/cmsOopClosures.inline.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
@@ -171,6 +161,8 @@
 
 #endif //  ndef DTRACE_ENABLED
 
+volatile int InstanceKlass::_total_instanceKlass_count = 0;
+
 Klass* InstanceKlass::allocate_instance_klass(ClassLoaderData* loader_data,
                                                 int vtable_len,
                                                 int itable_len,
@@ -214,6 +206,7 @@
         access_flags, !host_klass.is_null());
   }
 
+  Atomic::inc(&_total_instanceKlass_count);
   return ik;
 }
 
@@ -372,6 +365,9 @@
   set_protection_domain(NULL);
   set_signers(NULL);
   set_init_lock(NULL);
+
+  // We should deallocate the Annotations instance
+  MetadataFactory::free_metadata(loader_data, annotations());
   set_annotations(NULL);
 }
 
@@ -610,7 +606,7 @@
       }
 
       // relocate jsrs and link methods after they are all rewritten
-      this_oop->relocate_and_link_methods(CHECK_false);
+      this_oop->link_methods(CHECK_false);
 
       // Initialize the vtable and interface table after
       // methods have been rewritten since rewrite may
@@ -658,10 +654,31 @@
 // Now relocate and link method entry points after class is rewritten.
 // This is outside is_rewritten flag. In case of an exception, it can be
 // executed more than once.
-void InstanceKlass::relocate_and_link_methods(TRAPS) {
-  assert(is_loaded(), "must be loaded");
-  instanceKlassHandle this_oop(THREAD, this);
-  Rewriter::relocate_and_link(this_oop, CHECK);
+void InstanceKlass::link_methods(TRAPS) {
+  int len = methods()->length();
+  for (int i = len-1; i >= 0; i--) {
+    methodHandle m(THREAD, methods()->at(i));
+
+    // Set up method entry points for compiler and interpreter    .
+    m->link_method(m, CHECK);
+
+    // This is for JVMTI and unrelated to relocator but the last thing we do
+#ifdef ASSERT
+    if (StressMethodComparator) {
+      ResourceMark rm(THREAD);
+      static int nmc = 0;
+      for (int j = i; j >= 0 && j >= i-4; j--) {
+        if ((++nmc % 1000) == 0)  tty->print_cr("Have run MethodComparator %d times...", nmc);
+        bool z = MethodComparator::methods_EMCP(m(),
+                   methods()->at(j));
+        if (j == i && !z) {
+          tty->print("MethodComparator FAIL: "); m->print(); m->print_codes();
+          assert(z, "method must compare equal to itself");
+        }
+      }
+    }
+#endif //ASSERT
+  }
 }
 
 
@@ -727,8 +744,8 @@
 
   // Step 7
   Klass* super_klass = this_oop->super();
-  if (super_klass != NULL && !this_oop->is_interface() && Klass::cast(super_klass)->should_be_initialized()) {
-    Klass::cast(super_klass)->initialize(THREAD);
+  if (super_klass != NULL && !this_oop->is_interface() && super_klass->should_be_initialized()) {
+    super_klass->initialize(THREAD);
 
     if (HAS_PENDING_EXCEPTION) {
       Handle e(THREAD, PENDING_EXCEPTION);
@@ -743,6 +760,35 @@
     }
   }
 
+  if (this_oop->has_default_methods()) {
+    // Step 7.5: initialize any interfaces which have default methods
+    for (int i = 0; i < this_oop->local_interfaces()->length(); ++i) {
+      Klass* iface = this_oop->local_interfaces()->at(i);
+      InstanceKlass* ik = InstanceKlass::cast(iface);
+      if (ik->has_default_methods() && ik->should_be_initialized()) {
+        ik->initialize(THREAD);
+
+        if (HAS_PENDING_EXCEPTION) {
+          Handle e(THREAD, PENDING_EXCEPTION);
+          CLEAR_PENDING_EXCEPTION;
+          {
+            EXCEPTION_MARK;
+            // Locks object, set state, and notify all waiting threads
+            this_oop->set_initialization_state_and_notify(
+                initialization_error, THREAD);
+
+            // ignore any exception thrown, superclass initialization error is
+            // thrown below
+            CLEAR_PENDING_EXCEPTION;
+          }
+          DTRACE_CLASSINIT_PROBE_WAIT(
+              super__failed, InstanceKlass::cast(this_oop()), -1, wait);
+          THROW_OOP(e());
+        }
+      }
+    }
+  }
+
   // Step 8
   {
     assert(THREAD->is_Java_thread(), "non-JavaThread in initialize_impl");
@@ -895,7 +941,7 @@
 }
 
 bool InstanceKlass::compute_is_subtype_of(Klass* k) {
-  if (Klass::cast(k)->is_interface()) {
+  if (k->is_interface()) {
     return implements_interface(k);
   } else {
     return Klass::compute_is_subtype_of(k);
@@ -904,7 +950,7 @@
 
 bool InstanceKlass::implements_interface(Klass* k) const {
   if (this == k) return true;
-  assert(Klass::cast(k)->is_interface(), "should be an interface class");
+  assert(k->is_interface(), "should be an interface class");
   for (int i = 0; i < transitive_interfaces()->length(); i++) {
     if (transitive_interfaces()->at(i) == k) {
       return true;
@@ -1023,6 +1069,13 @@
 }
 
 void InstanceKlass::call_class_initializer_impl(instanceKlassHandle this_oop, TRAPS) {
+  if (ReplayCompiles &&
+      (ReplaySuppressInitializers == 1 ||
+       ReplaySuppressInitializers >= 2 && this_oop->class_loader() != NULL)) {
+    // Hide the existence of the initializer for the purpose of replaying the compile
+    return;
+  }
+
   methodHandle h_method(THREAD, this_oop->class_initializer());
   assert(!this_oop->is_initialized(), "we cannot initialize twice");
   if (TraceClassInitialization) {
@@ -1071,7 +1124,7 @@
   const int n = local_interfaces()->length();
   for (int i = 0; i < n; i++) {
     Klass* intf1 = local_interfaces()->at(i);
-    assert(Klass::cast(intf1)->is_interface(), "just checking type");
+    assert(intf1->is_interface(), "just checking type");
     // search for field in current interface
     if (InstanceKlass::cast(intf1)->find_local_field(name, sig, fd)) {
       assert(fd->is_static(), "interface field must be static");
@@ -1142,7 +1195,7 @@
     if (InstanceKlass::cast(klass)->find_local_field_from_offset(offset, is_static, fd)) {
       return true;
     }
-    klass = Klass::cast(klass)->super();
+    klass = klass->super();
   }
   return false;
 }
@@ -1252,11 +1305,7 @@
 }
 #endif
 
-Method* InstanceKlass::find_method(Symbol* name, Symbol* signature) const {
-  return InstanceKlass::find_method(methods(), name, signature);
-}
-
-Method* InstanceKlass::find_method(Array<Method*>* methods, Symbol* name, Symbol* signature) {
+static int binary_search(Array<Method*>* methods, Symbol* name) {
   int len = methods->length();
   // methods are sorted, so do binary search
   int l = 0;
@@ -1267,43 +1316,70 @@
     assert(m->is_method(), "must be method");
     int res = m->name()->fast_compare(name);
     if (res == 0) {
-      // found matching name; do linear search to find matching signature
-      // first, quick check for common case
-      if (m->signature() == signature) return m;
-      // search downwards through overloaded methods
-      int i;
-      for (i = mid - 1; i >= l; i--) {
-        Method* m = methods->at(i);
-        assert(m->is_method(), "must be method");
-        if (m->name() != name) break;
-        if (m->signature() == signature) return m;
-      }
-      // search upwards
-      for (i = mid + 1; i <= h; i++) {
-        Method* m = methods->at(i);
-        assert(m->is_method(), "must be method");
-        if (m->name() != name) break;
-        if (m->signature() == signature) return m;
-      }
-      // not found
-#ifdef ASSERT
-      int index = linear_search(methods, name, signature);
-      assert(index == -1, err_msg("binary search should have found entry %d", index));
-#endif
-      return NULL;
+      return mid;
     } else if (res < 0) {
       l = mid + 1;
     } else {
       h = mid - 1;
     }
   }
+  return -1;
+}
+
+Method* InstanceKlass::find_method(Symbol* name, Symbol* signature) const {
+  return InstanceKlass::find_method(methods(), name, signature);
+}
+
+Method* InstanceKlass::find_method(
+    Array<Method*>* methods, Symbol* name, Symbol* signature) {
+  int hit = binary_search(methods, name);
+  if (hit != -1) {
+    Method* m = methods->at(hit);
+    // Do linear search to find matching signature.  First, quick check
+    // for common case
+    if (m->signature() == signature) return m;
+    // search downwards through overloaded methods
+    int i;
+    for (i = hit - 1; i >= 0; --i) {
+        Method* m = methods->at(i);
+        assert(m->is_method(), "must be method");
+        if (m->name() != name) break;
+        if (m->signature() == signature) return m;
+    }
+    // search upwards
+    for (i = hit + 1; i < methods->length(); ++i) {
+        Method* m = methods->at(i);
+        assert(m->is_method(), "must be method");
+        if (m->name() != name) break;
+        if (m->signature() == signature) return m;
+    }
+    // not found
 #ifdef ASSERT
-  int index = linear_search(methods, name, signature);
-  assert(index == -1, err_msg("binary search should have found entry %d", index));
+    int index = linear_search(methods, name, signature);
+    assert(index == -1, err_msg("binary search should have found entry %d", index));
 #endif
+  }
   return NULL;
 }
 
+int InstanceKlass::find_method_by_name(Symbol* name, int* end) {
+  return find_method_by_name(methods(), name, end);
+}
+
+int InstanceKlass::find_method_by_name(
+    Array<Method*>* methods, Symbol* name, int* end_ptr) {
+  assert(end_ptr != NULL, "just checking");
+  int start = binary_search(methods, name);
+  int end = start + 1;
+  if (start != -1) {
+    while (start - 1 >= 0 && (methods->at(start - 1))->name() == name) --start;
+    while (end < methods->length() && (methods->at(end))->name() == name) ++end;
+    *end_ptr = end;
+    return start;
+  }
+  return -1;
+}
+
 Method* InstanceKlass::uncached_lookup_method(Symbol* name, Symbol* signature) const {
   Klass* klass = const_cast<InstanceKlass*>(this);
   while (klass != NULL) {
@@ -2258,6 +2334,9 @@
   if (_array_name != NULL)  _array_name->decrement_refcount();
   if (_source_file_name != NULL) _source_file_name->decrement_refcount();
   if (_source_debug_extension != NULL) FREE_C_HEAP_ARRAY(char, _source_debug_extension, mtClass);
+
+  assert(_total_instanceKlass_count >= 1, "Sanity check");
+  Atomic::dec(&_total_instanceKlass_count);
 }
 
 void InstanceKlass::set_source_file_name(Symbol* n) {
@@ -2307,19 +2386,19 @@
 bool InstanceKlass::is_same_class_package(Klass* class2) {
   Klass* class1 = this;
   oop classloader1 = InstanceKlass::cast(class1)->class_loader();
-  Symbol* classname1 = Klass::cast(class1)->name();
-
-  if (Klass::cast(class2)->oop_is_objArray()) {
+  Symbol* classname1 = class1->name();
+
+  if (class2->oop_is_objArray()) {
     class2 = ObjArrayKlass::cast(class2)->bottom_klass();
   }
   oop classloader2;
-  if (Klass::cast(class2)->oop_is_instance()) {
+  if (class2->oop_is_instance()) {
     classloader2 = InstanceKlass::cast(class2)->class_loader();
   } else {
-    assert(Klass::cast(class2)->oop_is_typeArray(), "should be type array");
+    assert(class2->oop_is_typeArray(), "should be type array");
     classloader2 = NULL;
   }
-  Symbol* classname2 = Klass::cast(class2)->name();
+  Symbol* classname2 = class2->name();
 
   return InstanceKlass::is_same_class_package(classloader1, classname1,
                                               classloader2, classname2);
@@ -2328,7 +2407,7 @@
 bool InstanceKlass::is_same_class_package(oop classloader2, Symbol* classname2) {
   Klass* class1 = this;
   oop classloader1 = InstanceKlass::cast(class1)->class_loader();
-  Symbol* classname1 = Klass::cast(class1)->name();
+  Symbol* classname1 = class1->name();
 
   return InstanceKlass::is_same_class_package(classloader1, classname1,
                                               classloader2, classname2);
@@ -2419,7 +2498,7 @@
 bool InstanceKlass::is_same_package_member_impl(instanceKlassHandle class1,
                                                 Klass* class2_oop, TRAPS) {
   if (class2_oop == class1())                       return true;
-  if (!Klass::cast(class2_oop)->oop_is_instance())  return false;
+  if (!class2_oop->oop_is_instance())  return false;
   instanceKlassHandle class2(THREAD, class2_oop);
 
   // must be in same package before we try anything else
@@ -2811,11 +2890,7 @@
     st->print(BULLET"fake entry for mirror: ");
     mirrored_klass->print_value_on_maybe_null(st);
     st->cr();
-    st->print(BULLET"fake entry resolved_constructor: ");
-    Method* ctor = java_lang_Class::resolved_constructor(obj);
-    ctor->print_value_on_maybe_null(st);
     Klass* array_klass = java_lang_Class::array_klass(obj);
-    st->cr();
     st->print(BULLET"fake entry for array: ");
     array_klass->print_value_on_maybe_null(st);
     st->cr();
@@ -2952,7 +3027,7 @@
   if (im != NULL) {
     guarantee(is_interface(), "only interfaces should have implementor set");
     guarantee(im->is_klass(), "should be klass");
-    guarantee(!Klass::cast(im)->is_interface() || im == this,
+    guarantee(!im->is_interface() || im == this,
       "implementors cannot be interfaces");
   }
 
@@ -2961,7 +3036,7 @@
     Array<Klass*>* local_interfaces = this->local_interfaces();
     for (int j = 0; j < local_interfaces->length(); j++) {
       Klass* e = local_interfaces->at(j);
-      guarantee(e->is_klass() && Klass::cast(e)->is_interface(), "invalid local interface");
+      guarantee(e->is_klass() && e->is_interface(), "invalid local interface");
     }
   }
 
@@ -2970,7 +3045,7 @@
     Array<Klass*>* transitive_interfaces = this->transitive_interfaces();
     for (int j = 0; j < transitive_interfaces->length(); j++) {
       Klass* e = transitive_interfaces->at(j);
-      guarantee(e->is_klass() && Klass::cast(e)->is_interface(), "invalid transitive interface");
+      guarantee(e->is_klass() && e->is_interface(), "invalid transitive interface");
     }
   }
 
--- a/src/share/vm/oops/instanceKlass.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/oops/instanceKlass.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -31,6 +31,7 @@
 #include "oops/fieldInfo.hpp"
 #include "oops/instanceOop.hpp"
 #include "oops/klassVtable.hpp"
+#include "runtime/atomic.hpp"
 #include "runtime/handles.hpp"
 #include "runtime/os.hpp"
 #include "utilities/accessFlags.hpp"
@@ -133,6 +134,7 @@
 class InstanceKlass: public Klass {
   friend class VMStructs;
   friend class ClassFileParser;
+  friend class CompileReplay;
 
  protected:
   // Constructor
@@ -169,6 +171,11 @@
     initialization_error                // error happened during initialization
   };
 
+  static int number_of_instance_classes() { return _total_instanceKlass_count; }
+
+ private:
+  static volatile int _total_instanceKlass_count;
+
  protected:
   // Protection domain.
   oop             _protection_domain;
@@ -218,12 +225,17 @@
   u2              _java_fields_count;    // The number of declared Java fields
   int             _nonstatic_oop_map_size;// size in words of nonstatic oop map blocks
 
+  // _is_marked_dependent can be set concurrently, thus cannot be part of the
+  // _misc_flags.
   bool            _is_marked_dependent;  // used for marking during flushing and deoptimization
+
   enum {
     _misc_rewritten            = 1 << 0, // methods rewritten.
     _misc_has_nonstatic_fields = 1 << 1, // for sizing with UseCompressedOops
     _misc_should_verify_class  = 1 << 2, // allow caching of preverification
-    _misc_is_anonymous         = 1 << 3  // has embedded _inner_classes field
+    _misc_is_anonymous         = 1 << 3, // has embedded _inner_classes field
+    _misc_is_contended         = 1 << 4, // marked with contended annotation
+    _misc_has_default_methods  = 1 << 5  // class/superclass/implemented interfaces has default methods
   };
   u2              _misc_flags;
   u2              _minor_version;        // minor version number of class file
@@ -245,7 +257,7 @@
   unsigned char * _cached_class_file_bytes;       // JVMTI: cached class file, before retransformable agent modified it in CFLH
   jint            _cached_class_file_len;         // JVMTI: length of above
   JvmtiCachedClassFieldMap* _jvmti_cached_class_field_map;  // JVMTI: used during heap iteration
-  volatile u2     _idnum_allocated_count;         // JNI/JVMTI: increments with the addition of methods, old ids don't change
+
   // Method array.
   Array<Method*>* _methods;
   // Interface (Klass*s) this class declares locally to implement.
@@ -269,6 +281,8 @@
   //     ...
   Array<u2>*      _fields;
 
+  volatile u2     _idnum_allocated_count;         // JNI/JVMTI: increments with the addition of methods, old ids don't change
+
   // Class states are defined as ClassState (see above).
   // Place the _init_state here to utilize the unused 2-byte after
   // _idnum_allocated_count.
@@ -449,7 +463,7 @@
   bool link_class_or_fail(TRAPS); // returns false on failure
   void unlink_class();
   void rewrite_class(TRAPS);
-  void relocate_and_link_methods(TRAPS);
+  void link_methods(TRAPS);
   Method* class_initializer();
 
   // set the class to initialized if no static initializer is present
@@ -492,6 +506,13 @@
   // (returns NULL if not found)
   Method* lookup_method_in_all_interfaces(Symbol* name, Symbol* signature) const;
 
+  // Find method indices by name.  If a method with the specified name is
+  // found the index to the first method is returned, and 'end' is filled in
+  // with the index of first non-name-matching method.  If no method is found
+  // -1 is returned.
+  int find_method_by_name(Symbol* name, int* end);
+  static int find_method_by_name(Array<Method*>* methods, Symbol* name, int* end);
+
   // constant pool
   ConstantPool* constants() const        { return _constants; }
   void set_constants(ConstantPool* c)    { _constants = c; }
@@ -526,6 +547,23 @@
     }
   }
 
+  // Oop that keeps the metadata for this class from being unloaded
+  // in places where the metadata is stored in other places, like nmethods
+  oop klass_holder() const {
+    return is_anonymous() ? java_mirror() : class_loader();
+  }
+
+  bool is_contended() const                {
+    return (_misc_flags & _misc_is_contended) != 0;
+  }
+  void set_is_contended(bool value)        {
+    if (value) {
+      _misc_flags |= _misc_is_contended;
+    } else {
+      _misc_flags &= ~_misc_is_contended;
+    }
+  }
+
   // signers
   objArrayOop signers() const              { return _signers; }
   void set_signers(objArrayOop s)          { klass_oop_store((oop*)&_signers, s); }
@@ -592,6 +630,17 @@
     return _jvmti_cached_class_field_map;
   }
 
+  bool has_default_methods() const {
+    return (_misc_flags & _misc_has_default_methods) != 0;
+  }
+  void set_has_default_methods(bool b) {
+    if (b) {
+      _misc_flags |= _misc_has_default_methods;
+    } else {
+      _misc_flags &= ~_misc_has_default_methods;
+    }
+  }
+
   // for adding methods, ConstMethod::UNSET_IDNUM means no more ids available
   inline u2 next_method_idnum();
   void set_initial_method_idnum(u2 value)             { _idnum_allocated_count = value; }
@@ -636,6 +685,10 @@
     if (annotations() == NULL) return NULL;
     return annotations()->fields_annotations();
   }
+  Annotations* type_annotations() const {
+    if (annotations() == NULL) return NULL;
+    return annotations()->type_annotations();
+  }
 
   // allocation
   instanceOop allocate_instance(TRAPS);
@@ -728,7 +781,6 @@
   GrowableArray<Klass*>* compute_secondary_supers(int num_extra_slots);
   bool compute_is_subtype_of(Klass* k);
   bool can_be_primary_super_slow() const;
-  Klass* java_super() const              { return super(); }
   int oop_size(oop obj)  const             { return size_helper(); }
   bool oop_is_instance_slow() const        { return true; }
 
@@ -750,6 +802,10 @@
     return (InstanceKlass*) k;
   }
 
+  InstanceKlass* java_super() const {
+    return (super() == NULL) ? NULL : cast(super());
+  }
+
   // Sizing (in words)
   static int header_size()            { return align_object_offset(sizeof(InstanceKlass)/HeapWordSize); }
   static int size(int vtable_length, int itable_length,
--- a/src/share/vm/oops/klass.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/oops/klass.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -56,7 +56,7 @@
 
   while (t != NULL) {
     if (t == k) return true;
-    t = Klass::cast(t)->super();
+    t = t->super();
   }
   return false;
 }
@@ -243,16 +243,16 @@
       juint j = super_depth();
       assert(j == my_depth, "computed accessor gets right answer");
       Klass* t = this;
-      while (!Klass::cast(t)->can_be_primary_super()) {
-        t = Klass::cast(t)->super();
-        j = Klass::cast(t)->super_depth();
+      while (!t->can_be_primary_super()) {
+        t = t->super();
+        j = t->super_depth();
       }
       for (juint j1 = j+1; j1 < primary_super_limit(); j1++) {
         assert(primary_super_of_depth(j1) == NULL, "super list padding");
       }
       while (t != NULL) {
         assert(primary_super_of_depth(j) == t, "super list initialization");
-        t = Klass::cast(t)->super();
+        t = t->super();
         --j;
       }
       assert(j == (juint)-1, "correct depth count");
@@ -333,7 +333,7 @@
 
 
 Klass* Klass::subklass() const {
-  return _subklass == NULL ? NULL : Klass::cast(_subklass);
+  return _subklass == NULL ? NULL : _subklass;
 }
 
 InstanceKlass* Klass::superklass() const {
@@ -342,7 +342,7 @@
 }
 
 Klass* Klass::next_sibling() const {
-  return _next_sibling == NULL ? NULL : Klass::cast(_next_sibling);
+  return _next_sibling == NULL ? NULL : _next_sibling;
 }
 
 void Klass::set_subklass(Klass* s) {
@@ -373,29 +373,22 @@
   debug_only(verify();)
 }
 
-void Klass::remove_from_sibling_list() {
-  // remove receiver from sibling list
-  InstanceKlass* super = superklass();
-  assert(super != NULL || this == SystemDictionary::Object_klass(), "should have super");
-  if (super == NULL) return;        // special case: class Object
-  if (super->subklass() == this) {
-    // first subklass
-    super->set_subklass(_next_sibling);
-  } else {
-    Klass* sib = super->subklass();
-    while (sib->next_sibling() != this) {
-      sib = sib->next_sibling();
-    };
-    sib->set_next_sibling(_next_sibling);
-  }
-}
-
 bool Klass::is_loader_alive(BoolObjectClosure* is_alive) {
   assert(is_metadata(), "p is not meta-data");
   assert(ClassLoaderDataGraph::contains((address)this), "is in the metaspace");
+
+#ifdef ASSERT
   // The class is alive iff the class loader is alive.
   oop loader = class_loader();
-  return (loader == NULL) || is_alive->do_object_b(loader);
+  bool loader_alive = (loader == NULL) || is_alive->do_object_b(loader);
+#endif // ASSERT
+
+  // The class is alive if it's mirror is alive (which should be marked if the
+  // loader is alive) unless it's an anoymous class.
+  bool mirror_alive = is_alive->do_object_b(java_mirror());
+  assert(!mirror_alive || loader_alive, "loader must be alive if the mirror is"
+                        " but not the other way around with anonymous classes");
+  return mirror_alive;
 }
 
 void Klass::clean_weak_klass_links(BoolObjectClosure* is_alive) {
@@ -416,10 +409,10 @@
     Klass* sub = current->subklass_oop();
     while (sub != NULL && !sub->is_loader_alive(is_alive)) {
 #ifndef PRODUCT
-        if (TraceClassUnloading && WizardMode) {
-          ResourceMark rm;
+      if (TraceClassUnloading && WizardMode) {
+        ResourceMark rm;
         tty->print_cr("[Unlinking class (subclass) %s]", sub->external_name());
-        }
+      }
 #endif
       sub = sub->next_sibling_oop();
     }
@@ -431,16 +424,16 @@
     // Find and set the first alive sibling
     Klass* sibling = current->next_sibling_oop();
     while (sibling != NULL && !sibling->is_loader_alive(is_alive)) {
-          if (TraceClassUnloading && WizardMode) {
-            ResourceMark rm;
+      if (TraceClassUnloading && WizardMode) {
+        ResourceMark rm;
         tty->print_cr("[Unlinking class (sibling) %s]", sibling->external_name());
-          }
+      }
       sibling = sibling->next_sibling_oop();
-      }
+    }
     current->set_next_sibling(sibling);
     if (sibling != NULL) {
       stack.push(sibling);
-}
+    }
 
     // Clean the implementors list and method data.
     if (current->oop_is_instance()) {
@@ -554,7 +547,11 @@
     InstanceKlass* ik = (InstanceKlass*) this;
     if (ik->is_anonymous()) {
       assert(EnableInvokeDynamic, "");
-      intptr_t hash = ik->java_mirror()->identity_hash();
+      intptr_t hash = 0;
+      if (ik->java_mirror() != NULL) {
+        // java_mirror might not be created yet, return 0 as hash.
+        hash = ik->java_mirror()->identity_hash();
+      }
       char     hash_buf[40];
       sprintf(hash_buf, "/" UINTX_FORMAT, (uintx)hash);
       size_t   hash_len = strlen(hash_buf);
--- a/src/share/vm/oops/klass.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/oops/klass.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -267,7 +267,6 @@
   Klass* subklass() const;
   Klass* next_sibling() const;
   void append_to_sibling_list();           // add newly created receiver to superklass' subklass list
-  void remove_from_sibling_list();         // remove receiver from sibling list
 
   void set_next_link(Klass* k) { _next_link = k; }
   Klass* next_link() const { return _next_link; }   // The next klass defined by the class loader.
@@ -422,12 +421,6 @@
   // if not, throw either an Error or an Exception.
   virtual void check_valid_for_instantiation(bool throwError, TRAPS);
 
-  // Casting
-  static Klass* cast(Klass* k) {
-    assert(k->is_klass(), "cast to Klass");
-    return k;
-  }
-
   // array copying
   virtual void  copy_array(arrayOop s, int src_pos, arrayOop d, int dst_pos, int length, TRAPS);
 
@@ -458,6 +451,8 @@
 
   oop class_loader() const;
 
+  virtual oop klass_holder() const      { return class_loader(); }
+
  protected:
   virtual Klass* array_klass_impl(bool or_null, int rank, TRAPS);
   virtual Klass* array_klass_impl(bool or_null, TRAPS);
@@ -587,8 +582,8 @@
   // garbage collection support
   virtual void oops_do(OopClosure* cl);
 
-  // Checks if the class loader is alive.
-  // Iff the class loader is alive the Klass is considered alive.
+  // Iff the class loader (or mirror for anonymous classes) is alive the
+  // Klass is considered alive.
   // The is_alive closure passed in depends on the Garbage Collector used.
   bool is_loader_alive(BoolObjectClosure* is_alive);
 
--- a/src/share/vm/oops/klassVtable.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/oops/klassVtable.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -54,22 +54,16 @@
 // the same name and signature as m), then m is a Miranda method which is
 // entered as a public abstract method in C's vtable.  From then on it should
 // treated as any other public method in C for method over-ride purposes.
-void klassVtable::compute_vtable_size_and_num_mirandas(int &vtable_length,
-                                                       int &num_miranda_methods,
-                                                       Klass* super,
-                                                       Array<Method*>* methods,
-                                                       AccessFlags class_flags,
-                                                       Handle classloader,
-                                                       Symbol* classname,
-                                                       Array<Klass*>* local_interfaces,
-                                                       TRAPS
-                                                       ) {
-
+void klassVtable::compute_vtable_size_and_num_mirandas(
+    int* vtable_length_ret, int* num_new_mirandas,
+    GrowableArray<Method*>* all_mirandas, Klass* super,
+    Array<Method*>* methods, AccessFlags class_flags,
+    Handle classloader, Symbol* classname, Array<Klass*>* local_interfaces,
+    TRAPS) {
   No_Safepoint_Verifier nsv;
 
   // set up default result values
-  vtable_length = 0;
-  num_miranda_methods = 0;
+  int vtable_length = 0;
 
   // start off with super's vtable length
   InstanceKlass* sk = (InstanceKlass*)super;
@@ -86,9 +80,12 @@
     }
   }
 
+  GrowableArray<Method*> new_mirandas(20);
   // compute the number of mirandas methods that must be added to the end
-  num_miranda_methods = get_num_mirandas(super, methods, local_interfaces);
-  vtable_length += (num_miranda_methods * vtableEntry::size());
+  get_mirandas(&new_mirandas, all_mirandas, super, methods, local_interfaces);
+  *num_new_mirandas = new_mirandas.length();
+
+  vtable_length += *num_new_mirandas * vtableEntry::size();
 
   if (Universe::is_bootstrapping() && vtable_length == 0) {
     // array classes don't have their superclass set correctly during
@@ -109,6 +106,8 @@
          "bad vtable size for class Object");
   assert(vtable_length % vtableEntry::size() == 0, "bad vtable length");
   assert(vtable_length >= Universe::base_vtable_size(), "vtable too small");
+
+  *vtable_length_ret = vtable_length;
 }
 
 int klassVtable::index_of(Method* m, int len) const {
@@ -191,7 +190,7 @@
     }
 
     // add miranda methods; it will also update the value of initialized
-    fill_in_mirandas(initialized);
+    fill_in_mirandas(&initialized);
 
     // In class hierarchies where the accessibility is not increasing (i.e., going from private ->
     // package_private -> publicprotected), the vtable might actually be smaller than our initial
@@ -249,6 +248,11 @@
   return superk;
 }
 
+// Methods that are "effectively" final don't need vtable entries.
+bool method_is_effectively_final(
+    AccessFlags klass_flags, methodHandle target) {
+  return target->is_final() || klass_flags.is_final() && !target->is_overpass();
+}
 
 // Update child's copy of super vtable for overrides
 // OR return true if a new vtable entry is required
@@ -269,7 +273,7 @@
     return false;
   }
 
-  if (klass->is_final() || target_method()->is_final()) {
+  if (method_is_effectively_final(klass->access_flags(), target_method)) {
     // a final method never needs a new entry; final methods can be statically
     // resolved and they have to be present in the vtable only if they override
     // a super's method, in which case they re-use its entry
@@ -303,7 +307,7 @@
     if (super_method->name() == name && super_method->signature() == signature) {
 
       // get super_klass for method_holder for the found method
-      InstanceKlass* super_klass =  InstanceKlass::cast(super_method->method_holder());
+      InstanceKlass* super_klass =  super_method->method_holder();
 
       if ((super_klass->is_override(super_method, target_loader, target_classname, THREAD)) ||
       ((klass->major_version() >= VTABLE_TRANSITIVE_OVERRIDE_VERSION)
@@ -406,7 +410,8 @@
                                          Symbol* classname,
                                          AccessFlags class_flags,
                                          TRAPS) {
-  if ((class_flags.is_final() || target_method()->is_final()) ||
+
+  if (method_is_effectively_final(class_flags, target_method) ||
       // a final method never needs a new entry; final methods can be statically
       // resolved and they have to be present in the vtable only if they override
       // a super's method, in which case they re-use its entry
@@ -447,7 +452,7 @@
     }
     // get the class holding the matching method
     // make sure you use that class for is_override
-    InstanceKlass* superk = InstanceKlass::cast(super_method->method_holder());
+    InstanceKlass* superk = super_method->method_holder();
     // we want only instance method matches
     // pretend private methods are not in the super vtable
     // since we do override around them: e.g. a.m pub/b.m private/c.m pub,
@@ -502,7 +507,7 @@
 
   // miranda methods are interface methods in a class's vtable
   if (mhk->is_interface()) {
-    assert(m->is_public() && m->is_abstract(), "should be public and abstract");
+    assert(m->is_public(), "should be public");
     assert(ik()->implements_interface(method_holder) , "this class should implement the interface");
     assert(is_miranda(m, ik()->methods(), ik()->super()), "should be a miranda_method");
     return true;
@@ -532,19 +537,19 @@
   return false;
 }
 
-void klassVtable::add_new_mirandas_to_list(GrowableArray<Method*>* list_of_current_mirandas,
-                                           Array<Method*>* current_interface_methods,
-                                           Array<Method*>* class_methods,
-                                           Klass* super) {
+void klassVtable::add_new_mirandas_to_lists(
+    GrowableArray<Method*>* new_mirandas, GrowableArray<Method*>* all_mirandas,
+    Array<Method*>* current_interface_methods, Array<Method*>* class_methods,
+    Klass* super) {
   // iterate thru the current interface's method to see if it a miranda
   int num_methods = current_interface_methods->length();
   for (int i = 0; i < num_methods; i++) {
     Method* im = current_interface_methods->at(i);
     bool is_duplicate = false;
-    int num_of_current_mirandas = list_of_current_mirandas->length();
+    int num_of_current_mirandas = new_mirandas->length();
     // check for duplicate mirandas in different interfaces we implement
     for (int j = 0; j < num_of_current_mirandas; j++) {
-      Method* miranda = list_of_current_mirandas->at(j);
+      Method* miranda = new_mirandas->at(j);
       if ((im->name() == miranda->name()) &&
           (im->signature() == miranda->signature())) {
         is_duplicate = true;
@@ -557,51 +562,47 @@
         InstanceKlass *sk = InstanceKlass::cast(super);
         // check if it is a duplicate of a super's miranda
         if (sk->lookup_method_in_all_interfaces(im->name(), im->signature()) == NULL) {
-          list_of_current_mirandas->append(im);
+          new_mirandas->append(im);
+        }
+        if (all_mirandas != NULL) {
+          all_mirandas->append(im);
         }
       }
     }
   }
 }
 
-void klassVtable::get_mirandas(GrowableArray<Method*>* mirandas,
+void klassVtable::get_mirandas(GrowableArray<Method*>* new_mirandas,
+                               GrowableArray<Method*>* all_mirandas,
                                Klass* super, Array<Method*>* class_methods,
                                Array<Klass*>* local_interfaces) {
-  assert((mirandas->length() == 0) , "current mirandas must be 0");
+  assert((new_mirandas->length() == 0) , "current mirandas must be 0");
 
   // iterate thru the local interfaces looking for a miranda
   int num_local_ifs = local_interfaces->length();
   for (int i = 0; i < num_local_ifs; i++) {
     InstanceKlass *ik = InstanceKlass::cast(local_interfaces->at(i));
-    add_new_mirandas_to_list(mirandas, ik->methods(), class_methods, super);
+    add_new_mirandas_to_lists(new_mirandas, all_mirandas,
+                              ik->methods(), class_methods, super);
     // iterate thru each local's super interfaces
     Array<Klass*>* super_ifs = ik->transitive_interfaces();
     int num_super_ifs = super_ifs->length();
     for (int j = 0; j < num_super_ifs; j++) {
       InstanceKlass *sik = InstanceKlass::cast(super_ifs->at(j));
-      add_new_mirandas_to_list(mirandas, sik->methods(), class_methods, super);
+      add_new_mirandas_to_lists(new_mirandas, all_mirandas,
+                                sik->methods(), class_methods, super);
     }
   }
 }
 
-// get number of mirandas
-int klassVtable::get_num_mirandas(Klass* super, Array<Method*>* class_methods, Array<Klass*>* local_interfaces) {
-  ResourceMark rm;
-  GrowableArray<Method*>* mirandas = new GrowableArray<Method*>(20);
-  get_mirandas(mirandas, super, class_methods, local_interfaces);
-  return mirandas->length();
-}
-
 // fill in mirandas
-void klassVtable::fill_in_mirandas(int& initialized) {
-  ResourceMark rm;
-  GrowableArray<Method*>* mirandas = new GrowableArray<Method*>(20);
-  InstanceKlass *this_ik = ik();
-  get_mirandas(mirandas, this_ik->super(), this_ik->methods(), this_ik->local_interfaces());
-  int num_mirandas = mirandas->length();
-  for (int i = 0; i < num_mirandas; i++) {
-    put_method_at(mirandas->at(i), initialized);
-    initialized++;
+void klassVtable::fill_in_mirandas(int* initialized) {
+  GrowableArray<Method*> mirandas(20);
+  get_mirandas(&mirandas, NULL, ik()->super(), ik()->methods(),
+               ik()->local_interfaces());
+  for (int i = 0; i < mirandas.length(); i++) {
+    put_method_at(mirandas.at(i), *initialized);
+    ++(*initialized);
   }
 }
 
@@ -629,7 +630,7 @@
           if (!(*trace_name_printed)) {
             // RC_TRACE_MESG macro has an embedded ResourceMark
             RC_TRACE_MESG(("adjust: name=%s",
-                           Klass::cast(old_method->method_holder())->external_name()));
+                           old_method->method_holder()->external_name()));
             *trace_name_printed = true;
           }
           // RC_TRACE macro has an embedded ResourceMark
@@ -744,8 +745,8 @@
     Method* target = klass->uncached_lookup_method(method_name, method_signature);
     while (target != NULL && target->is_static()) {
       // continue with recursive lookup through the superclass
-      Klass* super = Klass::cast(target->method_holder())->super();
-      target = (super == NULL) ? (Method*)NULL : Klass::cast(super)->uncached_lookup_method(method_name, method_signature);
+      Klass* super = target->method_holder()->super();
+      target = (super == NULL) ? (Method*)NULL : super->uncached_lookup_method(method_name, method_signature);
     }
     if (target == NULL || !target->is_public() || target->is_abstract()) {
       // Entry do not resolve. Leave it empty
@@ -754,7 +755,7 @@
       // if checkconstraints requested
       methodHandle  target_h (THREAD, target); // preserve across gc
       if (checkconstraints) {
-        Handle method_holder_loader (THREAD, InstanceKlass::cast(target->method_holder())->class_loader());
+        Handle method_holder_loader (THREAD, target->method_holder()->class_loader());
         if (method_holder_loader() != interface_loader()) {
           ResourceMark rm(THREAD);
           char* failed_type_name =
@@ -824,7 +825,7 @@
           if (!(*trace_name_printed)) {
             // RC_TRACE_MESG macro has an embedded ResourceMark
             RC_TRACE_MESG(("adjust: name=%s",
-              Klass::cast(old_method->method_holder())->external_name()));
+              old_method->method_holder()->external_name()));
             *trace_name_printed = true;
           }
           // RC_TRACE macro has an embedded ResourceMark
@@ -851,7 +852,7 @@
   // Handle array argument
   for(int i = 0; i < transitive_intf->length(); i++) {
     Klass* intf = transitive_intf->at(i);
-    assert(Klass::cast(intf)->is_interface(), "sanity check");
+    assert(intf->is_interface(), "sanity check");
 
     // Find no. of methods excluding a <clinit>
     int method_count = InstanceKlass::cast(intf)->methods()->length();
@@ -959,9 +960,9 @@
 
 // m must be a method in an interface
 int klassItable::compute_itable_index(Method* m) {
-  Klass* intf = m->method_holder();
-  assert(InstanceKlass::cast(intf)->is_interface(), "sanity check");
-  Array<Method*>* methods = InstanceKlass::cast(intf)->methods();
+  InstanceKlass* intf = m->method_holder();
+  assert(intf->is_interface(), "sanity check");
+  Array<Method*>* methods = intf->methods();
   int index = 0;
   while(methods->at(index) != m) {
     index++;
--- a/src/share/vm/oops/klassVtable.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/oops/klassVtable.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -84,11 +84,11 @@
   bool is_initialized();
 
   // computes vtable length (in words) and the number of miranda methods
-  static void compute_vtable_size_and_num_mirandas(int &vtable_length, int &num_miranda_methods,
-                                                   Klass* super, Array<Method*>* methods,
-                                                   AccessFlags class_flags, Handle classloader,
-                                                   Symbol* classname, Array<Klass*>* local_interfaces,
-                                                   TRAPS);
+  static void compute_vtable_size_and_num_mirandas(
+      int* vtable_length, int* num_new_mirandas,
+      GrowableArray<Method*>* all_mirandas, Klass* super,
+      Array<Method*>* methods, AccessFlags class_flags, Handle classloader,
+      Symbol* classname, Array<Klass*>* local_interfaces, TRAPS);
 
   // RedefineClasses() API support:
   // If any entry of this vtable points to any of old_methods,
@@ -125,12 +125,17 @@
 
   // support for miranda methods
   bool is_miranda_entry_at(int i);
-  void fill_in_mirandas(int& initialized);
+  void fill_in_mirandas(int* initialized);
   static bool is_miranda(Method* m, Array<Method*>* class_methods, Klass* super);
-  static void add_new_mirandas_to_list(GrowableArray<Method*>* list_of_current_mirandas, Array<Method*>* current_interface_methods, Array<Method*>* class_methods, Klass* super);
-  static void get_mirandas(GrowableArray<Method*>* mirandas, Klass* super, Array<Method*>* class_methods, Array<Klass*>* local_interfaces);
-  static int get_num_mirandas(Klass* super, Array<Method*>* class_methods, Array<Klass*>* local_interfaces);
-
+  static void add_new_mirandas_to_lists(
+      GrowableArray<Method*>* new_mirandas,
+      GrowableArray<Method*>* all_mirandas,
+      Array<Method*>* current_interface_methods, Array<Method*>* class_methods,
+      Klass* super);
+  static void get_mirandas(
+      GrowableArray<Method*>* new_mirandas,
+      GrowableArray<Method*>* all_mirandas, Klass* super,
+      Array<Method*>* class_methods, Array<Klass*>* local_interfaces);
 
   void verify_against(outputStream* st, klassVtable* vt, int index);
   inline InstanceKlass* ik() const;
--- a/src/share/vm/oops/markOop.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/oops/markOop.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -24,19 +24,7 @@
 
 #include "precompiled.hpp"
 #include "oops/markOop.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
-
+#include "runtime/thread.inline.hpp"
 
 void markOopDesc::print_on(outputStream* st) const {
   if (is_locked()) {
--- a/src/share/vm/oops/method.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/oops/method.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -35,6 +35,7 @@
 #include "memory/generation.hpp"
 #include "memory/metadataFactory.hpp"
 #include "memory/oopFactory.hpp"
+#include "oops/constMethod.hpp"
 #include "oops/methodData.hpp"
 #include "oops/method.hpp"
 #include "oops/oop.inline.hpp"
@@ -57,22 +58,28 @@
 // Implementation of Method
 
 Method* Method::allocate(ClassLoaderData* loader_data,
-                            int byte_code_size,
-                            AccessFlags access_flags,
-                            int compressed_line_number_size,
-                            int localvariable_table_length,
-                            int exception_table_length,
-                            int checked_exceptions_length,
-                            TRAPS) {
+                         int byte_code_size,
+                         AccessFlags access_flags,
+                         int compressed_line_number_size,
+                         int localvariable_table_length,
+                         int exception_table_length,
+                         int checked_exceptions_length,
+                         int method_parameters_length,
+                         u2  generic_signature_index,
+                         ConstMethod::MethodType method_type,
+                         TRAPS) {
   assert(!access_flags.is_native() || byte_code_size == 0,
          "native methods should not contain byte codes");
   ConstMethod* cm = ConstMethod::allocate(loader_data,
-                                      byte_code_size,
-                                      compressed_line_number_size,
-                                      localvariable_table_length,
-                                      exception_table_length,
-                                      checked_exceptions_length,
-                                      CHECK_NULL);
+                                          byte_code_size,
+                                          compressed_line_number_size,
+                                          localvariable_table_length,
+                                          exception_table_length,
+                                          checked_exceptions_length,
+                                          method_parameters_length,
+                                          generic_signature_index,
+                                          method_type,
+                                          CHECK_NULL);
 
   int size = Method::size(access_flags.is_native());
 
@@ -149,11 +156,11 @@
 }
 
 char* Method::name_and_sig_as_C_string() const {
-  return name_and_sig_as_C_string(Klass::cast(constants()->pool_holder()), name(), signature());
+  return name_and_sig_as_C_string(constants()->pool_holder(), name(), signature());
 }
 
 char* Method::name_and_sig_as_C_string(char* buf, int size) const {
-  return name_and_sig_as_C_string(Klass::cast(constants()->pool_holder()), name(), signature(), buf, size);
+  return name_and_sig_as_C_string(constants()->pool_holder(), name(), signature(), buf, size);
 }
 
 char* Method::name_and_sig_as_C_string(Klass* klass, Symbol* method_name, Symbol* signature) {
@@ -187,16 +194,16 @@
   return buf;
 }
 
-int  Method::fast_exception_handler_bci_for(KlassHandle ex_klass, int throw_bci, TRAPS) {
+int Method::fast_exception_handler_bci_for(methodHandle mh, KlassHandle ex_klass, int throw_bci, TRAPS) {
   // exception table holds quadruple entries of the form (beg_bci, end_bci, handler_bci, klass_index)
   // access exception table
-  ExceptionTable table(this);
+  ExceptionTable table(mh());
   int length = table.length();
   // iterate through all entries sequentially
-  constantPoolHandle pool(THREAD, constants());
+  constantPoolHandle pool(THREAD, mh->constants());
   for (int i = 0; i < length; i ++) {
     //reacquire the table in case a GC happened
-    ExceptionTable table(this);
+    ExceptionTable table(mh());
     int beg_bci = table.start_pc(i);
     int end_bci = table.end_pc(i);
     assert(beg_bci <= end_bci, "inconsistent exception table");
@@ -240,12 +247,12 @@
       warning("oopmap should only be accessed by the "
               "VM, GC task or CMS threads (or during debugging)");
       InterpreterOopMap local_mask;
-      InstanceKlass::cast(method_holder())->mask_for(h_this, bci, &local_mask);
+      method_holder()->mask_for(h_this, bci, &local_mask);
       local_mask.print();
     }
   }
 #endif
-  InstanceKlass::cast(method_holder())->mask_for(h_this, bci, mask);
+  method_holder()->mask_for(h_this, bci, mask);
   return;
 }
 
@@ -520,7 +527,7 @@
 bool Method::is_final_method() const {
   // %%% Should return true for private methods also,
   // since there is no way to override them.
-  return is_final() || Klass::cast(method_holder())->is_final();
+  return is_final() || method_holder()->is_final();
 }
 
 
@@ -552,7 +559,7 @@
 
 bool Method::has_valid_initializer_flags() const {
   return (is_static() ||
-          InstanceKlass::cast(method_holder())->major_version() < 51);
+          method_holder()->major_version() < 51);
 }
 
 bool Method::is_static_initializer() const {
@@ -575,8 +582,8 @@
     for (int i = 0; i < length; i++) {
       CheckedExceptionElement* table = h_this->checked_exceptions_start(); // recompute on each iteration, not gc safe
       Klass* k = h_this->constants()->klass_at(table[i].class_cp_index, CHECK_(objArrayHandle()));
-      assert(Klass::cast(k)->is_subclass_of(SystemDictionary::Throwable_klass()), "invalid exception class");
-      mirrors->obj_at_put(i, Klass::cast(k)->java_mirror());
+      assert(k->is_subclass_of(SystemDictionary::Throwable_klass()), "invalid exception class");
+      mirrors->obj_at_put(i, k->java_mirror());
     }
     return mirrors;
   }
@@ -614,8 +621,8 @@
   if( constants()->tag_at(klass_index).is_unresolved_klass() ) {
     Thread *thread = Thread::current();
     Symbol* klass_name = constants()->klass_name_at(klass_index);
-    Handle loader(thread, InstanceKlass::cast(method_holder())->class_loader());
-    Handle prot  (thread, Klass::cast(method_holder())->protection_domain());
+    Handle loader(thread, method_holder()->class_loader());
+    Handle prot  (thread, method_holder()->protection_domain());
     return SystemDictionary::find(klass_name, loader, prot, thread) != NULL;
   } else {
     return true;
@@ -932,7 +939,7 @@
 
   // If method is an interface, we skip it - except if it
   // is a miranda method
-  if (InstanceKlass::cast(method_holder())->is_interface()) {
+  if (method_holder()->is_interface()) {
     // Check that method is not a miranda method
     if (ik->lookup_method(name(), signature()) == NULL) {
       // No implementation exist - so miranda method
@@ -1017,7 +1024,7 @@
     ConstantPool* cp_oop = ConstantPool::allocate(loader_data, cp_length, CHECK_(empty));
     cp = constantPoolHandle(THREAD, cp_oop);
   }
-  cp->set_pool_holder(holder());
+  cp->set_pool_holder(InstanceKlass::cast(holder()));
   cp->symbol_at_put(_imcp_invoke_name,       name);
   cp->symbol_at_put(_imcp_invoke_signature,  signature);
   cp->set_preresolution();
@@ -1030,8 +1037,10 @@
 
   methodHandle m;
   {
-    Method* m_oop = Method::allocate(loader_data, 0, accessFlags_from(flags_bits),
-                                              0, 0, 0, 0, CHECK_(empty));
+    Method* m_oop = Method::allocate(loader_data, 0,
+                                     accessFlags_from(flags_bits),
+                                     0, 0, 0, 0, 0, 0,
+                                     ConstMethod::NORMAL, CHECK_(empty));
     m = methodHandle(THREAD, m_oop);
   }
   m->set_constants(cp());
@@ -1064,8 +1073,8 @@
 }
 
 Klass* Method::check_non_bcp_klass(Klass* klass) {
-  if (klass != NULL && Klass::cast(klass)->class_loader() != NULL) {
-    if (Klass::cast(klass)->oop_is_objArray())
+  if (klass != NULL && klass->class_loader() != NULL) {
+    if (klass->oop_is_objArray())
       klass = ObjArrayKlass::cast(klass)->bottom_klass();
     return klass;
   }
@@ -1079,19 +1088,24 @@
   assert(!m->is_native(), "cannot rewrite native methods");
   // Allocate new Method*
   AccessFlags flags = m->access_flags();
+  u2  generic_signature_index = m->generic_signature_index();
   int checked_exceptions_len = m->checked_exceptions_length();
   int localvariable_len = m->localvariable_table_length();
   int exception_table_len = m->exception_table_length();
+  int method_parameters_len = m->method_parameters_length();
 
-  ClassLoaderData* loader_data = m()->method_holder()->class_loader_data();
+  ClassLoaderData* loader_data = m->method_holder()->class_loader_data();
   Method* newm_oop = Method::allocate(loader_data,
-                                               new_code_length,
-                                              flags,
-                                              new_compressed_linenumber_size,
-                                              localvariable_len,
-                                              exception_table_len,
-                                              checked_exceptions_len,
-                                              CHECK_(methodHandle()));
+                                      new_code_length,
+                                      flags,
+                                      new_compressed_linenumber_size,
+                                      localvariable_len,
+                                      exception_table_len,
+                                      checked_exceptions_len,
+                                      method_parameters_len,
+                                      generic_signature_index,
+                                      m->method_type(),
+                                      CHECK_(methodHandle()));
   methodHandle newm (THREAD, newm_oop);
   int new_method_size = newm->method_size();
 
@@ -1155,8 +1169,12 @@
 vmSymbols::SID Method::klass_id_for_intrinsics(Klass* holder) {
   // if loader is not the default loader (i.e., != NULL), we can't know the intrinsics
   // because we are not loading from core libraries
-  if (InstanceKlass::cast(holder)->class_loader() != NULL)
+  // exception: the AES intrinsics come from lib/ext/sunjce_provider.jar
+  // which does not use the class default class loader so we check for its loader here
+  if ((InstanceKlass::cast(holder)->class_loader() != NULL) &&
+       InstanceKlass::cast(holder)->class_loader()->klass()->name() != vmSymbols::sun_misc_Launcher_ExtClassLoader()) {
     return vmSymbols::NO_SID;   // regardless of name, no intrinsics here
+  }
 
   // see if the klass name is well-known:
   Symbol* klass_name = InstanceKlass::cast(holder)->name();
@@ -1229,8 +1247,8 @@
     return false;
   }
   bool sig_is_loaded = true;
-  Handle class_loader(THREAD, InstanceKlass::cast(m->method_holder())->class_loader());
-  Handle protection_domain(THREAD, Klass::cast(m->method_holder())->protection_domain());
+  Handle class_loader(THREAD, m->method_holder()->class_loader());
+  Handle protection_domain(THREAD, m->method_holder()->protection_domain());
   ResourceMark rm(THREAD);
   Symbol*  signature = m->signature();
   for(SignatureStream ss(signature); !ss.is_done(); ss.next()) {
@@ -1256,8 +1274,8 @@
 }
 
 bool Method::has_unloaded_classes_in_signature(methodHandle m, TRAPS) {
-  Handle class_loader(THREAD, InstanceKlass::cast(m->method_holder())->class_loader());
-  Handle protection_domain(THREAD, Klass::cast(m->method_holder())->protection_domain());
+  Handle class_loader(THREAD, m->method_holder()->class_loader());
+  Handle protection_domain(THREAD, m->method_holder()->protection_domain());
   ResourceMark rm(THREAD);
   Symbol*  signature = m->signature();
   for(SignatureStream ss(signature); !ss.is_done(); ss.next()) {
@@ -1319,13 +1337,15 @@
                                  Array<AnnotationArray*>* methods_annotations,
                                  Array<AnnotationArray*>* methods_parameter_annotations,
                                  Array<AnnotationArray*>* methods_default_annotations,
+                                 Array<AnnotationArray*>* methods_type_annotations,
                                  bool idempotent) {
   int length = methods->length();
   if (length > 1) {
     bool do_annotations = false;
     if (methods_annotations != NULL ||
         methods_parameter_annotations != NULL ||
-        methods_default_annotations != NULL) {
+        methods_default_annotations != NULL ||
+        methods_type_annotations != NULL) {
       do_annotations = true;
     }
     if (do_annotations) {
@@ -1344,6 +1364,7 @@
     assert(methods_annotations == NULL           || methods_annotations->length() == methods->length(), "");
     assert(methods_parameter_annotations == NULL || methods_parameter_annotations->length() == methods->length(), "");
     assert(methods_default_annotations == NULL   || methods_default_annotations->length() == methods->length(), "");
+    assert(methods_type_annotations == NULL   || methods_type_annotations->length() == methods->length(), "");
     if (do_annotations) {
       ResourceMark rm;
       // Allocate temporary storage
@@ -1351,6 +1372,7 @@
       reorder_based_on_method_index(methods, methods_annotations, temp_array);
       reorder_based_on_method_index(methods, methods_parameter_annotations, temp_array);
       reorder_based_on_method_index(methods, methods_default_annotations, temp_array);
+      reorder_based_on_method_index(methods, methods_type_annotations, temp_array);
     }
 
     // Reset method ordering
@@ -1464,7 +1486,7 @@
 
 
 Bytecodes::Code Method::orig_bytecode_at(int bci) const {
-  BreakpointInfo* bp = InstanceKlass::cast(method_holder())->breakpoints();
+  BreakpointInfo* bp = method_holder()->breakpoints();
   for (; bp != NULL; bp = bp->next()) {
     if (bp->match(this, bci)) {
       return bp->orig_bytecode();
@@ -1476,7 +1498,7 @@
 
 void Method::set_orig_bytecode_at(int bci, Bytecodes::Code code) {
   assert(code != Bytecodes::_breakpoint, "cannot patch breakpoints this way");
-  BreakpointInfo* bp = InstanceKlass::cast(method_holder())->breakpoints();
+  BreakpointInfo* bp = method_holder()->breakpoints();
   for (; bp != NULL; bp = bp->next()) {
     if (bp->match(this, bci)) {
       bp->set_orig_bytecode(code);
@@ -1486,7 +1508,7 @@
 }
 
 void Method::set_breakpoint(int bci) {
-  InstanceKlass* ik = InstanceKlass::cast(method_holder());
+  InstanceKlass* ik = method_holder();
   BreakpointInfo *bp = new BreakpointInfo(this, bci);
   bp->set_next(ik->breakpoints());
   ik->set_breakpoints(bp);
@@ -1495,7 +1517,7 @@
 }
 
 static void clear_matches(Method* m, int bci) {
-  InstanceKlass* ik = InstanceKlass::cast(m->method_holder());
+  InstanceKlass* ik = m->method_holder();
   BreakpointInfo* prev_bp = NULL;
   BreakpointInfo* next_bp;
   for (BreakpointInfo* bp = ik->breakpoints(); bp != NULL; bp = next_bp) {
@@ -1778,7 +1800,7 @@
 bool Method::is_method_id(jmethodID mid) {
   Method* m = resolve_jmethod_id(mid);
   assert(m != NULL, "should be called with non-null method");
-  InstanceKlass* ik = InstanceKlass::cast(m->method_holder());
+  InstanceKlass* ik = m->method_holder();
   ClassLoaderData* cld = ik->class_loader_data();
   if (cld->jmethod_ids() == NULL) return false;
   return (cld->jmethod_ids()->contains((Method**)mid));
@@ -1806,6 +1828,23 @@
   loader_data->jmethod_ids()->clear_all_methods();
 }
 
+
+// Check that this pointer is valid by checking that the vtbl pointer matches
+bool Method::is_valid_method() const {
+  if (this == NULL) {
+    return false;
+  } else if (!is_metaspace_object()) {
+    return false;
+  } else {
+    Method m;
+    // This assumes that the vtbl pointer is the first word of a C++ object.
+    // This assumption is also in universe.cpp patch_klass_vtble
+    void* vtbl2 = dereference_vptr((void*)&m);
+    void* this_vtbl = dereference_vptr((void*)this);
+    return vtbl2 == this_vtbl;
+  }
+}
+
 #ifndef PRODUCT
 void Method::print_jmethod_ids(ClassLoaderData* loader_data, outputStream* out) {
   out->print_cr("jni_method_id count = %d", loader_data->jmethod_ids()->count_methods());
@@ -1927,7 +1966,7 @@
   guarantee(constMethod()->is_metadata(), "should be metadata");
   MethodData* md = method_data();
   guarantee(md == NULL ||
-      md->is_metadata(), "should be in permspace");
+      md->is_metadata(), "should be metadata");
   guarantee(md == NULL ||
       md->is_methodData(), "should be method data");
 }
--- a/src/share/vm/oops/method.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/oops/method.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -30,7 +30,6 @@
 #include "compiler/oopMap.hpp"
 #include "interpreter/invocationCounter.hpp"
 #include "oops/annotations.hpp"
-#include "oops/constMethod.hpp"
 #include "oops/constantPool.hpp"
 #include "oops/instanceKlass.hpp"
 #include "oops/oop.hpp"
@@ -74,12 +73,9 @@
 // |------------------------------------------------------|
 // | result_index (C++ interpreter only)                  |
 // |------------------------------------------------------|
-// | method_size             | max_stack                  |
-// | max_locals              | size_of_parameters         |
+// | method_size             |   intrinsic_id|   flags    |
 // |------------------------------------------------------|
-// |intrinsic_id|   flags    |  throwout_count            |
-// |------------------------------------------------------|
-// | num_breakpoints         |  (unused)                  |
+// | throwout_count          |   num_breakpoints          |
 // |------------------------------------------------------|
 // | invocation_counter                                   |
 // | backedge_counter                                     |
@@ -104,6 +100,7 @@
 class LocalVariableTableElement;
 class AdapterHandlerEntry;
 class MethodData;
+class ConstMethod;
 
 class Method : public Metadata {
  friend class VMStructs;
@@ -118,9 +115,6 @@
   int               _result_index;               // C++ interpreter needs for converting results to/from stack
 #endif
   u2                _method_size;                // size of this object
-  u2                _max_stack;                  // Maximum number of entries on the expression stack
-  u2                _max_locals;                 // Number of local variables used by this method
-  u2                _size_of_parameters;         // size of the parameter block (receiver + arguments) in words
   u1                _intrinsic_id;               // vmSymbols::intrinsic_id (0 == _none)
   u1                _jfr_towrite  : 1,           // Flags
                     _force_inline : 1,
@@ -158,16 +152,21 @@
   // Constructor
   Method(ConstMethod* xconst, AccessFlags access_flags, int size);
  public:
+
   static Method* allocate(ClassLoaderData* loader_data,
-                            int byte_code_size,
-                            AccessFlags access_flags,
-                            int compressed_line_number_size,
-                            int localvariable_table_length,
-                            int exception_table_length,
-                            int checked_exceptions_length,
-                            TRAPS);
+                          int byte_code_size,
+                          AccessFlags access_flags,
+                          int compressed_line_number_size,
+                          int localvariable_table_length,
+                          int exception_table_length,
+                          int checked_exceptions_length,
+                          int method_parameters_length,
+                          u2 generic_signature_index,
+                          ConstMethod::MethodType method_type,
+                          TRAPS);
 
-  Method() { assert(DumpSharedSpaces || UseSharedSpaces, "only for CDS"); }
+  // CDS and vtbl checking can create an empty Method to get vtbl pointer.
+  Method(){}
 
   // The Method vtable is restored by this call when the Method is in the
   // shared archive.  See patch_klass_vtables() in metaspaceShared.cpp for
@@ -207,26 +206,33 @@
 
   // annotations support
   AnnotationArray* annotations() const           {
-    InstanceKlass* ik = InstanceKlass::cast(method_holder());
+    InstanceKlass* ik = method_holder();
     if (ik->annotations() == NULL) {
       return NULL;
     }
     return ik->annotations()->get_method_annotations_of(method_idnum());
   }
   AnnotationArray* parameter_annotations() const {
-    InstanceKlass* ik = InstanceKlass::cast(method_holder());
+    InstanceKlass* ik = method_holder();
     if (ik->annotations() == NULL) {
       return NULL;
     }
     return ik->annotations()->get_method_parameter_annotations_of(method_idnum());
   }
   AnnotationArray* annotation_default() const    {
-    InstanceKlass* ik = InstanceKlass::cast(method_holder());
+    InstanceKlass* ik = method_holder();
     if (ik->annotations() == NULL) {
       return NULL;
     }
     return ik->annotations()->get_method_default_annotations_of(method_idnum());
   }
+  AnnotationArray* type_annotations() const {
+  InstanceKlass* ik = method_holder();
+  Annotations* type_annos = ik->type_annotations();
+  if (type_annos == NULL)
+    return NULL;
+  return type_annos->get_method_annotations_of(method_idnum());
+}
 
 #ifdef CC_INTERP
   void set_result_index(BasicType type);
@@ -286,13 +292,13 @@
 
   // max stack
   // return original max stack size for method verification
-  int  verifier_max_stack() const                { return _max_stack; }
-  int           max_stack() const                { return _max_stack + extra_stack_entries(); }
-  void      set_max_stack(int size)              {        _max_stack = size; }
+  int  verifier_max_stack() const                { return constMethod()->max_stack(); }
+  int           max_stack() const                { return constMethod()->max_stack() + extra_stack_entries(); }
+  void      set_max_stack(int size)              {        constMethod()->set_max_stack(size); }
 
   // max locals
-  int  max_locals() const                        { return _max_locals; }
-  void set_max_locals(int size)                  { _max_locals = size; }
+  int  max_locals() const                        { return constMethod()->max_locals(); }
+  void set_max_locals(int size)                  { constMethod()->set_max_locals(size); }
 
   int highest_comp_level() const;
   void set_highest_comp_level(int level);
@@ -310,7 +316,8 @@
   void set_interpreter_throwout_count(int count) { _interpreter_throwout_count = count; }
 
   // size of parameters
-  int  size_of_parameters() const                { return _size_of_parameters; }
+  int  size_of_parameters() const                { return constMethod()->size_of_parameters(); }
+  void set_size_of_parameters(int size)          { constMethod()->set_size_of_parameters(size); }
 
   bool has_stackmap_table() const {
     return constMethod()->has_stackmap_table();
@@ -344,7 +351,7 @@
   // exception handler which caused the exception to be thrown, which
   // is needed for proper retries. See, for example,
   // InterpreterRuntime::exception_handler_for_exception.
-  int fast_exception_handler_bci_for(KlassHandle ex_klass, int throw_bci, TRAPS);
+  static int fast_exception_handler_bci_for(methodHandle mh, KlassHandle ex_klass, int throw_bci, TRAPS);
 
   // method data access
   MethodData* method_data() const              {
@@ -474,6 +481,12 @@
   void print_codes_on(outputStream* st) const                      PRODUCT_RETURN;
   void print_codes_on(int from, int to, outputStream* st) const    PRODUCT_RETURN;
 
+  // method parameters
+  int method_parameters_length() const
+                         { return constMethod()->method_parameters_length(); }
+  MethodParametersElement* method_parameters_start() const
+                          { return constMethod()->method_parameters_start(); }
+
   // checked exceptions
   int checked_exceptions_length() const
                          { return constMethod()->checked_exceptions_length(); }
@@ -494,7 +507,7 @@
                        { return constMethod()->compressed_linenumber_table(); }
 
   // method holder (the Klass* holding this method)
-  Klass* method_holder() const                 { return constants()->pool_holder(); }
+  InstanceKlass* method_holder() const         { return constants()->pool_holder(); }
 
   void compute_size_of_parameters(Thread *thread); // word size of parameters (receiver if any + arguments)
   Symbol* klass_name() const;                    // returns the name of the method holder
@@ -587,8 +600,6 @@
 #ifdef CC_INTERP
   static ByteSize result_index_offset()          { return byte_offset_of(Method, _result_index ); }
 #endif /* CC_INTERP */
-  static ByteSize size_of_locals_offset()        { return byte_offset_of(Method, _max_locals        ); }
-  static ByteSize size_of_parameters_offset()    { return byte_offset_of(Method, _size_of_parameters); }
   static ByteSize from_compiled_offset()         { return byte_offset_of(Method, _from_compiled_entry); }
   static ByteSize code_offset()                  { return byte_offset_of(Method, _code); }
   static ByteSize invocation_counter_offset()    { return byte_offset_of(Method, _invocation_counter); }
@@ -604,7 +615,6 @@
   static ByteSize from_interpreted_offset()      { return byte_offset_of(Method, _from_interpreted_entry ); }
   static ByteSize interpreter_entry_offset()     { return byte_offset_of(Method, _i2i_entry ); }
   static ByteSize signature_handler_offset()     { return in_ByteSize(sizeof(Method) + wordSize);      }
-  static ByteSize max_stack_offset()             { return byte_offset_of(Method, _max_stack         ); }
 
   // for code generation
   static int method_data_offset_in_bytes()       { return offset_of(Method, _method_data); }
@@ -695,18 +705,18 @@
 
   // Get this method's jmethodID -- allocate if it doesn't exist
   jmethodID jmethod_id()                            { methodHandle this_h(this);
-                                                      return InstanceKlass::get_jmethod_id(InstanceKlass::cast(method_holder()), this_h); }
+                                                      return InstanceKlass::get_jmethod_id(method_holder(), this_h); }
 
   // Lookup the jmethodID for this method.  Return NULL if not found.
   // NOTE that this function can be called from a signal handler
   // (see AsyncGetCallTrace support for Forte Analyzer) and this
   // needs to be async-safe. No allocation should be done and
   // so handles are not used to avoid deadlock.
-  jmethodID find_jmethod_id_or_null()               { return InstanceKlass::cast(method_holder())->jmethod_id_or_null(this); }
+  jmethodID find_jmethod_id_or_null()               { return method_holder()->jmethod_id_or_null(this); }
 
   // JNI static invoke cached itable index accessors
-  int cached_itable_index()                         { return InstanceKlass::cast(method_holder())->cached_itable_index(method_idnum()); }
-  void set_cached_itable_index(int index)           { InstanceKlass::cast(method_holder())->set_cached_itable_index(method_idnum(), index); }
+  int cached_itable_index()                         { return method_holder()->cached_itable_index(method_idnum()); }
+  void set_cached_itable_index(int index)           { method_holder()->set_cached_itable_index(method_idnum(), index); }
 
   // Support for inlining of intrinsic methods
   vmIntrinsics::ID intrinsic_id() const          { return (vmIntrinsics::ID) _intrinsic_id;           }
@@ -725,14 +735,18 @@
   void set_dont_inline(bool x)  {        _dont_inline = x;  }
   bool  is_hidden()             { return _hidden;           }
   void set_hidden(bool x)       {        _hidden = x;       }
+  ConstMethod::MethodType method_type() const {
+      return _constMethod->method_type();
+  }
+  bool is_overpass() const { return method_type() == ConstMethod::OVERPASS; }
 
   // On-stack replacement support
   bool has_osr_nmethod(int level, bool match_level) {
-   return InstanceKlass::cast(method_holder())->lookup_osr_nmethod(this, InvocationEntryBci, level, match_level) != NULL;
+   return method_holder()->lookup_osr_nmethod(this, InvocationEntryBci, level, match_level) != NULL;
   }
 
   nmethod* lookup_osr_nmethod_for(int bci, int level, bool match_level) {
-    return InstanceKlass::cast(method_holder())->lookup_osr_nmethod(this, bci, level, match_level);
+    return method_holder()->lookup_osr_nmethod(this, bci, level, match_level);
   }
 
   // Inline cache support
@@ -790,11 +804,9 @@
                            Array<AnnotationArray*>* methods_annotations,
                            Array<AnnotationArray*>* methods_parameter_annotations,
                            Array<AnnotationArray*>* methods_default_annotations,
+                           Array<AnnotationArray*>* methods_type_annotations,
                            bool idempotent = false);
 
-  // size of parameters
-  void set_size_of_parameters(int size)          { _size_of_parameters = size; }
-
   // Deallocation function for redefine classes or if an error occurs
   void deallocate_contents(ClassLoaderData* loader_data);
 
@@ -806,6 +818,9 @@
 
   const char* internal_name() const { return "{method}"; }
 
+  // Check for valid method pointer
+  bool is_valid_method() const;
+
   // Verify
   void verify() { verify_on(tty); }
   void verify_on(outputStream* st);
--- a/src/share/vm/oops/objArrayKlass.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/oops/objArrayKlass.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -81,18 +81,13 @@
       Array<Klass*>* element_supers = element_klass->secondary_supers();
       for( int i = element_supers->length()-1; i >= 0; i-- ) {
         Klass* elem_super = element_supers->at(i);
-        if (Klass::cast(elem_super)->array_klass_or_null() == NULL) {
+        if (elem_super->array_klass_or_null() == NULL) {
           supers_exist = false;
           break;
         }
       }
       if (!supers_exist) {
         // Oops.  Not allocated yet.  Back out, allocate it, and retry.
-#ifndef PRODUCT
-        if (WizardMode) {
-          tty->print_cr("Must retry array klass creation for depth %d",n);
-        }
-#endif
         KlassHandle ek;
         {
           MutexUnlocker mu(MultiArray_lock);
@@ -172,7 +167,7 @@
   } else {
     bk = element_klass();
   }
-  assert(bk != NULL && (Klass::cast(bk)->oop_is_instance() || Klass::cast(bk)->oop_is_typeArray()), "invalid bottom klass");
+  assert(bk != NULL && (bk->oop_is_instance() || bk->oop_is_typeArray()), "invalid bottom klass");
   this->set_bottom_klass(bk);
   this->set_class_loader_data(bk->class_loader_data());
 
@@ -254,7 +249,7 @@
     // We have to make sure all elements conform to the destination array
     Klass* bound = ObjArrayKlass::cast(d->klass())->element_klass();
     Klass* stype = ObjArrayKlass::cast(s->klass())->element_klass();
-    if (stype == bound || Klass::cast(stype)->is_subtype_of(bound)) {
+    if (stype == bound || stype->is_subtype_of(bound)) {
       // elements are guaranteed to be subtypes, so no check necessary
       bs->write_ref_array_pre(dst, length);
       Copy::conjoint_oops_atomic(src, dst, length);
@@ -271,7 +266,7 @@
         oop new_val = element_is_null ? oop(NULL)
                                       : oopDesc::decode_heap_oop_not_null(element);
         if (element_is_null ||
-            Klass::cast((new_val->klass()))->is_subtype_of(bound)) {
+            (new_val->klass())->is_subtype_of(bound)) {
           bs->write_ref_field_pre(p, new_val);
           *p = *from;
         } else {
@@ -381,7 +376,7 @@
 
 GrowableArray<Klass*>* ObjArrayKlass::compute_secondary_supers(int num_extra_slots) {
   // interfaces = { cloneable_klass, serializable_klass, elemSuper[], ... };
-  Array<Klass*>* elem_supers = Klass::cast(element_klass())->secondary_supers();
+  Array<Klass*>* elem_supers = element_klass()->secondary_supers();
   int num_elem_supers = elem_supers == NULL ? 0 : elem_supers->length();
   int num_secondaries = num_extra_slots + 2 + num_elem_supers;
   if (num_secondaries == 2) {
@@ -411,7 +406,7 @@
 }
 
 void ObjArrayKlass::initialize(TRAPS) {
-  Klass::cast(bottom_klass())->initialize(THREAD);  // dispatches to either InstanceKlass or TypeArrayKlass
+  bottom_klass()->initialize(THREAD);  // dispatches to either InstanceKlass or TypeArrayKlass
 }
 
 #define ObjArrayKlass_SPECIALIZED_OOP_ITERATE(T, a, p, do_oop) \
@@ -607,7 +602,7 @@
     return JVM_ACC_ABSTRACT | JVM_ACC_FINAL | JVM_ACC_PUBLIC;
   }
   // Return the flags of the bottom element type.
-  jint element_flags = Klass::cast(bottom_klass())->compute_modifier_flags(CHECK_0);
+  jint element_flags = bottom_klass()->compute_modifier_flags(CHECK_0);
 
   return (element_flags & (JVM_ACC_PUBLIC | JVM_ACC_PRIVATE | JVM_ACC_PROTECTED))
                         | (JVM_ACC_ABSTRACT | JVM_ACC_FINAL);
@@ -686,7 +681,7 @@
   guarantee(element_klass()->is_klass(), "should be klass");
   guarantee(bottom_klass()->is_metadata(), "should be in metaspace");
   guarantee(bottom_klass()->is_klass(), "should be klass");
-  Klass* bk = Klass::cast(bottom_klass());
+  Klass* bk = bottom_klass();
   guarantee(bk->oop_is_instance() || bk->oop_is_typeArray(),  "invalid bottom klass");
 }
 
--- a/src/share/vm/oops/objArrayKlass.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/oops/objArrayKlass.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -74,7 +74,7 @@
   void  copy_array(arrayOop s, int src_pos, arrayOop d, int dst_pos, int length, TRAPS);
 
   // Compute protection domain
-  oop protection_domain() { return Klass::cast(bottom_klass())->protection_domain(); }
+  oop protection_domain() { return bottom_klass()->protection_domain(); }
 
  private:
   // Either oop or narrowOop depending on UseCompressedOops.
--- a/src/share/vm/oops/oop.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/oops/oop.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -27,19 +27,8 @@
 #include "classfile/javaClasses.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/handles.inline.hpp"
+#include "runtime/thread.inline.hpp"
 #include "utilities/copy.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 
 bool always_do_update_barrier = false;
 
--- a/src/share/vm/oops/oopsHierarchy.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/oops/oopsHierarchy.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -26,20 +26,8 @@
 #include "gc_interface/collectedHeap.hpp"
 #include "gc_interface/collectedHeap.inline.hpp"
 #include "oops/oopsHierarchy.hpp"
-#include "runtime/thread.hpp"
+#include "runtime/thread.inline.hpp"
 #include "utilities/globalDefinitions.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 
 #ifdef CHECK_UNHANDLED_OOPS
 
--- a/src/share/vm/oops/symbol.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/oops/symbol.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -153,17 +153,15 @@
 
 void Symbol::print_symbol_on(outputStream* st) const {
   st = st ? st : tty;
-  int length = UTF8::unicode_length((const char*)bytes(), utf8_length());
-  const char *ptr = (const char *)bytes();
-  jchar value;
-  for (int index = 0; index < length; index++) {
-    ptr = UTF8::next(ptr, &value);
-    if (value >= 32 && value < 127 || value == '\'' || value == '\\') {
-      st->put(value);
-    } else {
-      st->print("\\u%04x", value);
-    }
-  }
+  st->print("%s", as_quoted_ascii());
+}
+
+char* Symbol::as_quoted_ascii() const {
+  const char *ptr = (const char *)&_body[0];
+  int quoted_length = UTF8::quoted_ascii_length(ptr, utf8_length());
+  char* result = NEW_RESOURCE_ARRAY(char, quoted_length + 1);
+  UTF8::as_quoted_ascii(ptr, result, quoted_length + 1);
+  return result;
 }
 
 jchar* Symbol::as_unicode(int& length) const {
--- a/src/share/vm/oops/symbol.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/oops/symbol.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -189,6 +189,8 @@
   // Use buf if needed buffer length is <= size.
   char* as_C_string_flexible_buffer(Thread* t, char* buf, int size) const;
 
+  // Returns an escaped form of a Java string.
+  char* as_quoted_ascii() const;
 
   // Returns a null terminated utf8 string in a resource array
   char* as_utf8() const { return as_C_string(); }
--- a/src/share/vm/opto/addnode.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/addnode.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -189,6 +189,11 @@
       set_req(1, addx);
       set_req(2, a22);
       progress = this;
+      PhaseIterGVN *igvn = phase->is_IterGVN();
+      if (add2->outcnt() == 0 && igvn) {
+        // add disconnected.
+        igvn->_worklist.push(add2);
+      }
     }
   }
 
@@ -624,6 +629,11 @@
     if( t22->singleton() && (t22 != Type::TOP) ) {  // Right input is an add of a constant?
       set_req(Address, phase->transform(new (phase->C) AddPNode(in(Base),in(Address),add->in(1))));
       set_req(Offset, add->in(2));
+      PhaseIterGVN *igvn = phase->is_IterGVN();
+      if (add->outcnt() == 0 && igvn) {
+        // add disconnected.
+        igvn->_worklist.push((Node*)add);
+      }
       return this;              // Made progress
     }
   }
--- a/src/share/vm/opto/block.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/block.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -292,7 +292,7 @@
   void needed_for_next_call(Node *this_call, VectorSet &next_call, Block_Array &bbs);
   bool schedule_local(PhaseCFG *cfg, Matcher &m, GrowableArray<int> &ready_cnt, VectorSet &next_call);
   // Cleanup if any code lands between a Call and his Catch
-  void call_catch_cleanup(Block_Array &bbs);
+  void call_catch_cleanup(Block_Array &bbs, Compile *C);
   // Detect implicit-null-check opportunities.  Basically, find NULL checks
   // with suitable memory ops nearby.  Use the memory op to do the NULL check.
   // I can generate a memory op if there is not one nearby.
--- a/src/share/vm/opto/bytecodeInfo.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/bytecodeInfo.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,6 +23,7 @@
  */
 
 #include "precompiled.hpp"
+#include "ci/ciReplay.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "classfile/vmSymbols.hpp"
 #include "compiler/compileBroker.hpp"
@@ -45,7 +46,8 @@
   _method(callee),
   _site_invoke_ratio(site_invoke_ratio),
   _max_inline_level(max_inline_level),
-  _count_inline_bcs(method()->code_size_for_inlining())
+  _count_inline_bcs(method()->code_size_for_inlining()),
+  _subtrees(c->comp_arena(), 2, 0, NULL)
 {
   NOT_PRODUCT(_count_inlines = 0;)
   if (_caller_jvms != NULL) {
@@ -150,7 +152,7 @@
   } else {
     // Not hot.  Check for medium-sized pre-existing nmethod at cold sites.
     if (callee_method->has_compiled_code() &&
-        callee_method->instructions_size(CompLevel_full_optimization) > inline_small_code_size)
+        callee_method->instructions_size() > inline_small_code_size)
       return "already compiled into a medium method";
   }
   if (size > max_inline_size) {
@@ -192,7 +194,7 @@
     }
 
     if (callee_method->has_compiled_code() &&
-        callee_method->instructions_size(CompLevel_full_optimization) > InlineSmallCode) {
+        callee_method->instructions_size() > InlineSmallCode) {
       wci_result->set_profit(wci_result->profit() * 0.1);
       // %%% adjust wci_result->size()?
     }
@@ -208,16 +210,18 @@
   if ( callee_method->dont_inline())                        return "don't inline by annotation";
   if ( callee_method->has_unloaded_classes_in_signature())  return "unloaded signature classes";
 
-  if (callee_method->force_inline() || callee_method->should_inline()) {
+  if (callee_method->should_inline()) {
     // ignore heuristic controls on inlining
     return NULL;
   }
 
   // Now perform checks which are heuristic
 
-  if (callee_method->has_compiled_code() &&
-      callee_method->instructions_size(CompLevel_full_optimization) > InlineSmallCode) {
+  if (!callee_method->force_inline()) {
+    if (callee_method->has_compiled_code() &&
+        callee_method->instructions_size() > InlineSmallCode) {
     return "already compiled into a big method";
+    }
   }
 
   // don't inline exception code unless the top method belongs to an
@@ -235,6 +239,12 @@
     return "disallowed by CompilerOracle";
   }
 
+#ifndef PRODUCT
+  if (ciReplay::should_not_inline(callee_method)) {
+    return "disallowed by ciReplay";
+  }
+#endif
+
   if (UseStringCache) {
     // Do not inline StringCache::profile() method used only at the beginning.
     if (callee_method->name() == ciSymbol::profile_name() &&
@@ -270,12 +280,15 @@
 //-----------------------------try_to_inline-----------------------------------
 // return NULL if ok, reason for not inlining otherwise
 // Relocated from "InliningClosure::try_to_inline"
-const char* InlineTree::try_to_inline(ciMethod* callee_method, ciMethod* caller_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result) {
-
+const char* InlineTree::try_to_inline(ciMethod* callee_method, ciMethod* caller_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result, bool& should_delay) {
   // Old algorithm had funny accumulating BC-size counters
   if (UseOldInlining && ClipInlining
       && (int)count_inline_bcs() >= DesiredMethodLimit) {
-    return "size > DesiredMethodLimit";
+    if (!callee_method->force_inline() || !IncrementalInline) {
+      return "size > DesiredMethodLimit";
+    } else if (!C->inlining_incrementally()) {
+      should_delay = true;
+    }
   }
 
   const char *msg = NULL;
@@ -296,8 +309,13 @@
   if (callee_method->code_size() > MaxTrivialSize) {
 
     // don't inline into giant methods
-    if (C->unique() > (uint)NodeCountInliningCutoff) {
-      return "NodeCountInliningCutoff";
+    if (C->over_inlining_cutoff()) {
+      if ((!callee_method->force_inline() && !caller_method->is_compiled_lambda_form())
+          || !IncrementalInline) {
+        return "NodeCountInliningCutoff";
+      } else {
+        should_delay = true;
+      }
     }
 
     if ((!UseInterpreter || CompileTheWorld) &&
@@ -316,7 +334,11 @@
     return "not an accessor";
   }
   if (inline_level() > _max_inline_level) {
-    return "inlining too deep";
+    if (!callee_method->force_inline() || !IncrementalInline) {
+      return "inlining too deep";
+    } else if (!C->inlining_incrementally()) {
+      should_delay = true;
+    }
   }
 
   // detect direct and indirect recursive inlining
@@ -341,7 +363,11 @@
 
   if (UseOldInlining && ClipInlining
       && (int)count_inline_bcs() + size >= DesiredMethodLimit) {
-    return "size > DesiredMethodLimit";
+    if (!callee_method->force_inline() || !IncrementalInline) {
+      return "size > DesiredMethodLimit";
+    } else if (!C->inlining_incrementally()) {
+      should_delay = true;
+    }
   }
 
   // ok, inline this method
@@ -396,7 +422,7 @@
 //------------------------------print_inlining---------------------------------
 // Really, the failure_msg can be a success message also.
 void InlineTree::print_inlining(ciMethod* callee_method, int caller_bci, const char* failure_msg) const {
-  CompileTask::print_inlining(callee_method, inline_level(), caller_bci, failure_msg ? failure_msg : "inline");
+  C->print_inlining(callee_method, inline_level(), caller_bci, failure_msg ? failure_msg : "inline");
   if (callee_method == NULL)  tty->print(" callee not monotonic or profiled");
   if (Verbose && callee_method) {
     const InlineTree *top = this;
@@ -406,8 +432,9 @@
 }
 
 //------------------------------ok_to_inline-----------------------------------
-WarmCallInfo* InlineTree::ok_to_inline(ciMethod* callee_method, JVMState* jvms, ciCallProfile& profile, WarmCallInfo* initial_wci) {
+WarmCallInfo* InlineTree::ok_to_inline(ciMethod* callee_method, JVMState* jvms, ciCallProfile& profile, WarmCallInfo* initial_wci, bool& should_delay) {
   assert(callee_method != NULL, "caller checks for optimized virtual!");
+  assert(!should_delay, "should be initialized to false");
 #ifdef ASSERT
   // Make sure the incoming jvms has the same information content as me.
   // This means that we can eventually make this whole class AllStatic.
@@ -437,7 +464,7 @@
 
   // Check if inlining policy says no.
   WarmCallInfo wci = *(initial_wci);
-  failure_msg = try_to_inline(callee_method, caller_method, caller_bci, profile, &wci);
+  failure_msg = try_to_inline(callee_method, caller_method, caller_bci, profile, &wci, should_delay);
   if (failure_msg != NULL && C->log() != NULL) {
     C->log()->inline_fail(failure_msg);
   }
--- a/src/share/vm/opto/c2_globals.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/c2_globals.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -115,6 +115,12 @@
   notproduct(bool, VerifyOpto, false,                                       \
           "Apply more time consuming verification during compilation")      \
                                                                             \
+  notproduct(bool, VerifyIdealNodeCount, false,                             \
+          "Verify that tracked dead ideal node count is accurate")          \
+                                                                            \
+  notproduct(bool, PrintIdealNodeCount, false,                              \
+          "Print liveness counts of ideal nodes")                           \
+                                                                            \
   notproduct(bool, VerifyOptoOopOffsets, false,                             \
           "Check types of base addresses in field references")              \
                                                                             \
@@ -439,6 +445,9 @@
   product(bool, DoEscapeAnalysis, true,                                     \
           "Perform escape analysis")                                        \
                                                                             \
+  develop(bool, ExitEscapeAnalysisOnTimeout, true,                          \
+          "Exit or throw assert in EA when it reaches time limit")          \
+                                                                            \
   notproduct(bool, PrintEscapeAnalysis, false,                              \
           "Print the results of escape analysis")                           \
                                                                             \
@@ -507,6 +516,9 @@
   develop(bool, SpecialArraysEquals, true,                                  \
           "special version of Arrays.equals(char[],char[])")                \
                                                                             \
+  product(bool, SpecialEncodeISOArray, true,                                \
+          "special version of ISO_8859_1$Encoder.encodeISOArray")           \
+                                                                            \
   develop(bool, BailoutToInterpreterForThrows, false,                       \
           "Compiled methods which throws/catches exceptions will be "       \
           "deopt and intp.")                                                \
@@ -597,6 +609,16 @@
                                                                             \
   develop(bool, VerifyAliases, false,                                       \
           "perform extra checks on the results of alias analysis")          \
+                                                                            \
+  product(bool, IncrementalInline, true,                                    \
+          "do post parse inlining")                                         \
+                                                                            \
+  develop(bool, AlwaysIncrementalInline, false,                             \
+          "do all inlining incrementally")                                  \
+                                                                            \
+  product(intx, LiveNodeCountInliningCutoff, 20000,                         \
+          "max number of live nodes in a method")                           \
+
 
 C2_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG)
 
--- a/src/share/vm/opto/callGenerator.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/callGenerator.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -139,7 +139,7 @@
   if (!is_static) {
     // Make an explicit receiver null_check as part of this call.
     // Since we share a map with the caller, his JVMS gets adjusted.
-    kit.null_check_receiver(method());
+    kit.null_check_receiver_before_call(method());
     if (kit.stopped()) {
       // And dump it back to the caller, decorated with any exceptions:
       return kit.transfer_exceptions_into_jvms();
@@ -207,7 +207,7 @@
        >= (uint)ImplicitNullCheckThreshold))) {
     // Make an explicit receiver null_check as part of this call.
     // Since we share a map with the caller, his JVMS gets adjusted.
-    receiver = kit.null_check_receiver(method());
+    receiver = kit.null_check_receiver_before_call(method());
     if (kit.stopped()) {
       // And dump it back to the caller, decorated with any exceptions:
       return kit.transfer_exceptions_into_jvms();
@@ -262,8 +262,11 @@
 
 // Allow inlining decisions to be delayed
 class LateInlineCallGenerator : public DirectCallGenerator {
+ protected:
   CallGenerator* _inline_cg;
 
+  virtual bool do_late_inline_check(JVMState* jvms) { return true; }
+
  public:
   LateInlineCallGenerator(ciMethod* method, CallGenerator* inline_cg) :
     DirectCallGenerator(method, true), _inline_cg(inline_cg) {}
@@ -274,9 +277,14 @@
   virtual void do_late_inline();
 
   virtual JVMState* generate(JVMState* jvms) {
+    Compile *C = Compile::current();
+    C->print_inlining_skip(this);
+
     // Record that this call site should be revisited once the main
     // parse is finished.
-    Compile::current()->add_late_inline(this);
+    if (!is_mh_late_inline()) {
+      C->add_late_inline(this);
+    }
 
     // Emit the CallStaticJava and request separate projections so
     // that the late inlining logic can distinguish between fall
@@ -285,15 +293,33 @@
     return DirectCallGenerator::generate(jvms);
   }
 
+  virtual void print_inlining_late(const char* msg) {
+    CallNode* call = call_node();
+    Compile* C = Compile::current();
+    C->print_inlining_insert(this);
+    C->print_inlining(method(), call->jvms()->depth()-1, call->jvms()->bci(), msg);
+  }
+
 };
 
-
 void LateInlineCallGenerator::do_late_inline() {
   // Can't inline it
   if (call_node() == NULL || call_node()->outcnt() == 0 ||
       call_node()->in(0) == NULL || call_node()->in(0)->is_top())
     return;
 
+  for (int i1 = 0; i1 < method()->arg_size(); i1++) {
+    if (call_node()->in(TypeFunc::Parms + i1)->is_top()) {
+      assert(Compile::current()->inlining_incrementally(), "shouldn't happen during parsing");
+      return;
+    }
+  }
+
+  if (call_node()->in(TypeFunc::Memory)->is_top()) {
+    assert(Compile::current()->inlining_incrementally(), "shouldn't happen during parsing");
+    return;
+  }
+
   CallStaticJavaNode* call = call_node();
 
   // Make a clone of the JVMState that appropriate to use for driving a parse
@@ -307,7 +333,9 @@
 
   // Make sure the state is a MergeMem for parsing.
   if (!map->in(TypeFunc::Memory)->is_MergeMem()) {
-    map->set_req(TypeFunc::Memory, MergeMemNode::make(C, map->in(TypeFunc::Memory)));
+    Node* mem = MergeMemNode::make(C, map->in(TypeFunc::Memory));
+    C->initial_gvn()->set_type_bottom(mem);
+    map->set_req(TypeFunc::Memory, mem);
   }
 
   // Make enough space for the expression stack and transfer the incoming arguments
@@ -320,6 +348,13 @@
     }
   }
 
+  if (!do_late_inline_check(jvms)) {
+    map->disconnect_inputs(NULL, C);
+    return;
+  }
+
+  C->print_inlining_insert(this);
+
   CompileLog* log = C->log();
   if (log != NULL) {
     log->head("late_inline method='%d'", log->identify(method()));
@@ -354,6 +389,10 @@
     result = (result_size == 1) ? kit.pop() : kit.pop_pair();
   }
 
+  C->set_has_loops(C->has_loops() || _inline_cg->method()->has_loops());
+  C->env()->notice_inlined_method(_inline_cg->method());
+  C->set_inlining_progress(true);
+
   kit.replace_call(call, result);
 }
 
@@ -362,6 +401,83 @@
   return new LateInlineCallGenerator(method, inline_cg);
 }
 
+class LateInlineMHCallGenerator : public LateInlineCallGenerator {
+  ciMethod* _caller;
+  int _attempt;
+  bool _input_not_const;
+
+  virtual bool do_late_inline_check(JVMState* jvms);
+  virtual bool already_attempted() const { return _attempt > 0; }
+
+ public:
+  LateInlineMHCallGenerator(ciMethod* caller, ciMethod* callee, bool input_not_const) :
+    LateInlineCallGenerator(callee, NULL), _caller(caller), _attempt(0), _input_not_const(input_not_const) {}
+
+  virtual bool is_mh_late_inline() const { return true; }
+
+  virtual JVMState* generate(JVMState* jvms) {
+    JVMState* new_jvms = LateInlineCallGenerator::generate(jvms);
+    if (_input_not_const) {
+      // inlining won't be possible so no need to enqueue right now.
+      call_node()->set_generator(this);
+    } else {
+      Compile::current()->add_late_inline(this);
+    }
+    return new_jvms;
+  }
+
+  virtual void print_inlining_late(const char* msg) {
+    if (!_input_not_const) return;
+    LateInlineCallGenerator::print_inlining_late(msg);
+  }
+};
+
+bool LateInlineMHCallGenerator::do_late_inline_check(JVMState* jvms) {
+
+  CallGenerator* cg = for_method_handle_inline(jvms, _caller, method(), _input_not_const);
+
+  if (!_input_not_const) {
+    _attempt++;
+  }
+
+  if (cg != NULL) {
+    assert(!cg->is_late_inline() && cg->is_inline(), "we're doing late inlining");
+    _inline_cg = cg;
+    Compile::current()->dec_number_of_mh_late_inlines();
+    return true;
+  }
+
+  call_node()->set_generator(this);
+  return false;
+}
+
+CallGenerator* CallGenerator::for_mh_late_inline(ciMethod* caller, ciMethod* callee, bool input_not_const) {
+  Compile::current()->inc_number_of_mh_late_inlines();
+  CallGenerator* cg = new LateInlineMHCallGenerator(caller, callee, input_not_const);
+  return cg;
+}
+
+class LateInlineStringCallGenerator : public LateInlineCallGenerator {
+
+ public:
+  LateInlineStringCallGenerator(ciMethod* method, CallGenerator* inline_cg) :
+    LateInlineCallGenerator(method, inline_cg) {}
+
+  virtual JVMState* generate(JVMState* jvms) {
+    Compile *C = Compile::current();
+    C->print_inlining_skip(this);
+
+    C->add_string_late_inline(this);
+
+    JVMState* new_jvms =  DirectCallGenerator::generate(jvms);
+    return new_jvms;
+  }
+};
+
+CallGenerator* CallGenerator::for_string_late_inline(ciMethod* method, CallGenerator* inline_cg) {
+  return new LateInlineStringCallGenerator(method, inline_cg);
+}
+
 
 //---------------------------WarmCallGenerator--------------------------------
 // Internal class which handles initial deferral of inlining decisions.
@@ -491,7 +607,7 @@
               jvms->bci(), log->identify(_predicted_receiver));
   }
 
-  receiver = kit.null_check_receiver(method());
+  receiver = kit.null_check_receiver_before_call(method());
   if (kit.stopped()) {
     return kit.transfer_exceptions_into_jvms();
   }
@@ -580,35 +696,53 @@
 }
 
 
-CallGenerator* CallGenerator::for_method_handle_call(JVMState* jvms, ciMethod* caller, ciMethod* callee) {
+CallGenerator* CallGenerator::for_method_handle_call(JVMState* jvms, ciMethod* caller, ciMethod* callee, bool delayed_forbidden) {
   assert(callee->is_method_handle_intrinsic() ||
          callee->is_compiled_lambda_form(), "for_method_handle_call mismatch");
-  CallGenerator* cg = CallGenerator::for_method_handle_inline(jvms, caller, callee);
-  if (cg != NULL)
-    return cg;
-  return CallGenerator::for_direct_call(callee);
+  bool input_not_const;
+  CallGenerator* cg = CallGenerator::for_method_handle_inline(jvms, caller, callee, input_not_const);
+  Compile* C = Compile::current();
+  if (cg != NULL) {
+    if (!delayed_forbidden && AlwaysIncrementalInline) {
+      return CallGenerator::for_late_inline(callee, cg);
+    } else {
+      return cg;
+    }
+  }
+  int bci = jvms->bci();
+  ciCallProfile profile = caller->call_profile_at_bci(bci);
+  int call_site_count = caller->scale_count(profile.count());
+
+  if (IncrementalInline && call_site_count > 0 &&
+      (input_not_const || !C->inlining_incrementally() || C->over_inlining_cutoff())) {
+    return CallGenerator::for_mh_late_inline(caller, callee, input_not_const);
+  } else {
+    // Out-of-line call.
+    return CallGenerator::for_direct_call(callee);
+  }
 }
 
-CallGenerator* CallGenerator::for_method_handle_inline(JVMState* jvms, ciMethod* caller, ciMethod* callee) {
+CallGenerator* CallGenerator::for_method_handle_inline(JVMState* jvms, ciMethod* caller, ciMethod* callee, bool& input_not_const) {
   GraphKit kit(jvms);
   PhaseGVN& gvn = kit.gvn();
   Compile* C = kit.C;
   vmIntrinsics::ID iid = callee->intrinsic_id();
+  input_not_const = true;
   switch (iid) {
   case vmIntrinsics::_invokeBasic:
     {
-      // get MethodHandle receiver
+      // Get MethodHandle receiver:
       Node* receiver = kit.argument(0);
       if (receiver->Opcode() == Op_ConP) {
+        input_not_const = false;
         const TypeOopPtr* oop_ptr = receiver->bottom_type()->is_oopptr();
         ciMethod* target = oop_ptr->const_oop()->as_method_handle()->get_vmtarget();
         guarantee(!target->is_method_handle_intrinsic(), "should not happen");  // XXX remove
         const int vtable_index = Method::invalid_vtable_index;
-        CallGenerator* cg = C->call_generator(target, vtable_index, false, jvms, true, PROB_ALWAYS);
+        CallGenerator* cg = C->call_generator(target, vtable_index, false, jvms, true, PROB_ALWAYS, true, true);
+        assert(!cg->is_late_inline() || cg->is_mh_late_inline(), "no late inline here");
         if (cg != NULL && cg->is_inline())
           return cg;
-      } else {
-        if (PrintInlining)  CompileTask::print_inlining(callee, jvms->depth() - 1, jvms->bci(), "receiver not constant");
       }
     }
     break;
@@ -618,9 +752,10 @@
   case vmIntrinsics::_linkToSpecial:
   case vmIntrinsics::_linkToInterface:
     {
-      // pop MemberName argument
+      // Get MemberName argument:
       Node* member_name = kit.argument(callee->arg_size() - 1);
       if (member_name->Opcode() == Op_ConP) {
+        input_not_const = false;
         const TypeOopPtr* oop_ptr = member_name->bottom_type()->is_oopptr();
         ciMethod* target = oop_ptr->const_oop()->as_member_name()->get_vmtarget();
 
@@ -653,9 +788,25 @@
             }
           }
         }
-        const int vtable_index = Method::invalid_vtable_index;
-        const bool call_is_virtual = target->is_abstract();  // FIXME workaround
-        CallGenerator* cg = C->call_generator(target, vtable_index, call_is_virtual, jvms, true, PROB_ALWAYS);
+
+        // Try to get the most accurate receiver type
+        const bool is_virtual              = (iid == vmIntrinsics::_linkToVirtual);
+        const bool is_virtual_or_interface = (is_virtual || iid == vmIntrinsics::_linkToInterface);
+        int  vtable_index       = Method::invalid_vtable_index;
+        bool call_does_dispatch = false;
+
+        if (is_virtual_or_interface) {
+          ciInstanceKlass* klass = target->holder();
+          Node*             receiver_node = kit.argument(0);
+          const TypeOopPtr* receiver_type = gvn.type(receiver_node)->isa_oopptr();
+          // call_does_dispatch and vtable_index are out-parameters.  They might be changed.
+          target = C->optimize_virtual_call(caller, jvms->bci(), klass, target, receiver_type,
+                                            is_virtual,
+                                            call_does_dispatch, vtable_index);  // out-parameters
+        }
+
+        CallGenerator* cg = C->call_generator(target, vtable_index, call_does_dispatch, jvms, true, PROB_ALWAYS, true, true);
+        assert(!cg->is_late_inline() || cg->is_mh_late_inline(), "no late inline here");
         if (cg != NULL && cg->is_inline())
           return cg;
       }
@@ -670,6 +821,129 @@
 }
 
 
+//------------------------PredictedIntrinsicGenerator------------------------------
+// Internal class which handles all predicted Intrinsic calls.
+class PredictedIntrinsicGenerator : public CallGenerator {
+  CallGenerator* _intrinsic;
+  CallGenerator* _cg;
+
+public:
+  PredictedIntrinsicGenerator(CallGenerator* intrinsic,
+                              CallGenerator* cg)
+    : CallGenerator(cg->method())
+  {
+    _intrinsic = intrinsic;
+    _cg        = cg;
+  }
+
+  virtual bool      is_virtual()   const    { return true; }
+  virtual bool      is_inlined()   const    { return true; }
+  virtual bool      is_intrinsic() const    { return true; }
+
+  virtual JVMState* generate(JVMState* jvms);
+};
+
+
+CallGenerator* CallGenerator::for_predicted_intrinsic(CallGenerator* intrinsic,
+                                                      CallGenerator* cg) {
+  return new PredictedIntrinsicGenerator(intrinsic, cg);
+}
+
+
+JVMState* PredictedIntrinsicGenerator::generate(JVMState* jvms) {
+  GraphKit kit(jvms);
+  PhaseGVN& gvn = kit.gvn();
+
+  CompileLog* log = kit.C->log();
+  if (log != NULL) {
+    log->elem("predicted_intrinsic bci='%d' method='%d'",
+              jvms->bci(), log->identify(method()));
+  }
+
+  Node* slow_ctl = _intrinsic->generate_predicate(kit.sync_jvms());
+  if (kit.failing())
+    return NULL;  // might happen because of NodeCountInliningCutoff
+
+  SafePointNode* slow_map = NULL;
+  JVMState* slow_jvms;
+  if (slow_ctl != NULL) {
+    PreserveJVMState pjvms(&kit);
+    kit.set_control(slow_ctl);
+    if (!kit.stopped()) {
+      slow_jvms = _cg->generate(kit.sync_jvms());
+      if (kit.failing())
+        return NULL;  // might happen because of NodeCountInliningCutoff
+      assert(slow_jvms != NULL, "must be");
+      kit.add_exception_states_from(slow_jvms);
+      kit.set_map(slow_jvms->map());
+      if (!kit.stopped())
+        slow_map = kit.stop();
+    }
+  }
+
+  if (kit.stopped()) {
+    // Predicate is always false.
+    kit.set_jvms(slow_jvms);
+    return kit.transfer_exceptions_into_jvms();
+  }
+
+  // Generate intrinsic code:
+  JVMState* new_jvms = _intrinsic->generate(kit.sync_jvms());
+  if (new_jvms == NULL) {
+    // Intrinsic failed, so use slow code or make a direct call.
+    if (slow_map == NULL) {
+      CallGenerator* cg = CallGenerator::for_direct_call(method());
+      new_jvms = cg->generate(kit.sync_jvms());
+    } else {
+      kit.set_jvms(slow_jvms);
+      return kit.transfer_exceptions_into_jvms();
+    }
+  }
+  kit.add_exception_states_from(new_jvms);
+  kit.set_jvms(new_jvms);
+
+  // Need to merge slow and fast?
+  if (slow_map == NULL) {
+    // The fast path is the only path remaining.
+    return kit.transfer_exceptions_into_jvms();
+  }
+
+  if (kit.stopped()) {
+    // Intrinsic method threw an exception, so it's just the slow path after all.
+    kit.set_jvms(slow_jvms);
+    return kit.transfer_exceptions_into_jvms();
+  }
+
+  // Finish the diamond.
+  kit.C->set_has_split_ifs(true); // Has chance for split-if optimization
+  RegionNode* region = new (kit.C) RegionNode(3);
+  region->init_req(1, kit.control());
+  region->init_req(2, slow_map->control());
+  kit.set_control(gvn.transform(region));
+  Node* iophi = PhiNode::make(region, kit.i_o(), Type::ABIO);
+  iophi->set_req(2, slow_map->i_o());
+  kit.set_i_o(gvn.transform(iophi));
+  kit.merge_memory(slow_map->merged_memory(), region, 2);
+  uint tos = kit.jvms()->stkoff() + kit.sp();
+  uint limit = slow_map->req();
+  for (uint i = TypeFunc::Parms; i < limit; i++) {
+    // Skip unused stack slots; fast forward to monoff();
+    if (i == tos) {
+      i = kit.jvms()->monoff();
+      if( i >= limit ) break;
+    }
+    Node* m = kit.map()->in(i);
+    Node* n = slow_map->in(i);
+    if (m != n) {
+      const Type* t = gvn.type(m)->meet(gvn.type(n));
+      Node* phi = PhiNode::make(region, m, t);
+      phi->set_req(2, n);
+      kit.map()->set_req(i, gvn.transform(phi));
+    }
+  }
+  return kit.transfer_exceptions_into_jvms();
+}
+
 //-------------------------UncommonTrapCallGenerator-----------------------------
 // Internal class which handles all out-of-line calls checking receiver type.
 class UncommonTrapCallGenerator : public CallGenerator {
--- a/src/share/vm/opto/callGenerator.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/callGenerator.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -68,6 +68,12 @@
 
   // is_late_inline: supports conversion of call into an inline
   virtual bool      is_late_inline() const      { return false; }
+  // same but for method handle calls
+  virtual bool      is_mh_late_inline() const   { return false; }
+
+  // for method handle calls: have we tried inlinining the call already?
+  virtual bool      already_attempted() const   { ShouldNotReachHere(); return false; }
+
   // Replace the call with an inline version of the code
   virtual void do_late_inline() { ShouldNotReachHere(); }
 
@@ -112,11 +118,13 @@
   static CallGenerator* for_virtual_call(ciMethod* m, int vtable_index);  // virtual, interface
   static CallGenerator* for_dynamic_call(ciMethod* m);   // invokedynamic
 
-  static CallGenerator* for_method_handle_call(  JVMState* jvms, ciMethod* caller, ciMethod* callee);
-  static CallGenerator* for_method_handle_inline(JVMState* jvms, ciMethod* caller, ciMethod* callee);
+  static CallGenerator* for_method_handle_call(  JVMState* jvms, ciMethod* caller, ciMethod* callee, bool delayed_forbidden);
+  static CallGenerator* for_method_handle_inline(JVMState* jvms, ciMethod* caller, ciMethod* callee, bool& input_not_const);
 
   // How to generate a replace a direct call with an inline version
   static CallGenerator* for_late_inline(ciMethod* m, CallGenerator* inline_cg);
+  static CallGenerator* for_mh_late_inline(ciMethod* caller, ciMethod* callee, bool input_not_const);
+  static CallGenerator* for_string_late_inline(ciMethod* m, CallGenerator* inline_cg);
 
   // How to make a call but defer the decision whether to inline or not.
   static CallGenerator* for_warm_call(WarmCallInfo* ci,
@@ -143,10 +151,15 @@
   // Registry for intrinsics:
   static CallGenerator* for_intrinsic(ciMethod* m);
   static void register_intrinsic(ciMethod* m, CallGenerator* cg);
+  static CallGenerator* for_predicted_intrinsic(CallGenerator* intrinsic,
+                                                CallGenerator* cg);
+  virtual Node* generate_predicate(JVMState* jvms) { return NULL; };
 
-  static void print_inlining(ciMethod* callee, int inline_level, int bci, const char* msg) {
+  virtual void print_inlining_late(const char* msg) { ShouldNotReachHere(); }
+
+  static void print_inlining(Compile* C, ciMethod* callee, int inline_level, int bci, const char* msg) {
     if (PrintInlining)
-      CompileTask::print_inlining(callee, inline_level, bci, msg);
+      C->print_inlining(callee, inline_level, bci, msg);
   }
 };
 
--- a/src/share/vm/opto/callnode.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/callnode.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -25,6 +25,7 @@
 #include "precompiled.hpp"
 #include "ci/bcEscapeAnalyzer.hpp"
 #include "compiler/oopMap.hpp"
+#include "opto/callGenerator.hpp"
 #include "opto/callnode.hpp"
 #include "opto/escape.hpp"
 #include "opto/locknode.hpp"
@@ -164,13 +165,13 @@
 
 
 #ifndef PRODUCT
-void ReturnNode::dump_req() const {
+void ReturnNode::dump_req(outputStream *st) const {
   // Dump the required inputs, enclosed in '(' and ')'
   uint i;                       // Exit value of loop
-  for( i=0; i<req(); i++ ) {    // For all required inputs
-    if( i == TypeFunc::Parms ) tty->print("returns");
-    if( in(i) ) tty->print("%c%d ", Compile::current()->node_arena()->contains(in(i)) ? ' ' : 'o', in(i)->_idx);
-    else tty->print("_ ");
+  for (i = 0; i < req(); i++) {    // For all required inputs
+    if (i == TypeFunc::Parms) st->print("returns");
+    if (in(i)) st->print("%c%d ", Compile::current()->node_arena()->contains(in(i)) ? ' ' : 'o', in(i)->_idx);
+    else st->print("_ ");
   }
 }
 #endif
@@ -207,13 +208,13 @@
 }
 
 #ifndef PRODUCT
-void RethrowNode::dump_req() const {
+void RethrowNode::dump_req(outputStream *st) const {
   // Dump the required inputs, enclosed in '(' and ')'
   uint i;                       // Exit value of loop
-  for( i=0; i<req(); i++ ) {    // For all required inputs
-    if( i == TypeFunc::Parms ) tty->print("exception");
-    if( in(i) ) tty->print("%c%d ", Compile::current()->node_arena()->contains(in(i)) ? ' ' : 'o', in(i)->_idx);
-    else tty->print("_ ");
+  for (i = 0; i < req(); i++) {    // For all required inputs
+    if (i == TypeFunc::Parms) st->print("exception");
+    if (in(i)) st->print("%c%d ", Compile::current()->node_arena()->contains(in(i)) ? ' ' : 'o', in(i)->_idx);
+    else st->print("_ ");
   }
 }
 #endif
@@ -329,7 +330,8 @@
     st->print(" %s%d]=#ScObj" INT32_FORMAT, msg, i, sco_n);
     return;
   }
-  if( OptoReg::is_valid(regalloc->get_reg_first(n))) { // Check for undefined
+  if (regalloc->node_regs_max_index() > 0 &&
+      OptoReg::is_valid(regalloc->get_reg_first(n))) { // Check for undefined
     char buf[50];
     regalloc->dump_register(n,buf);
     st->print(" %s%d]=%s",msg,i,buf);
@@ -380,7 +382,7 @@
 //------------------------------format-----------------------------------------
 void JVMState::format(PhaseRegAlloc *regalloc, const Node *n, outputStream* st) const {
   st->print("        #");
-  if( _method ) {
+  if (_method) {
     _method->print_short_name(st);
     st->print(" @ bci:%d ",_bci);
   } else {
@@ -392,21 +394,22 @@
     MachSafePointNode *mcall = n->as_MachSafePoint();
     uint i;
     // Print locals
-    for( i = 0; i < (uint)loc_size(); i++ )
-      format_helper( regalloc, st, mcall->local(this, i), "L[", i, &scobjs );
+    for (i = 0; i < (uint)loc_size(); i++)
+      format_helper(regalloc, st, mcall->local(this, i), "L[", i, &scobjs);
     // Print stack
     for (i = 0; i < (uint)stk_size(); i++) {
       if ((uint)(_stkoff + i) >= mcall->len())
         st->print(" oob ");
       else
-       format_helper( regalloc, st, mcall->stack(this, i), "STK[", i, &scobjs );
+       format_helper(regalloc, st, mcall->stack(this, i), "STK[", i, &scobjs);
     }
     for (i = 0; (int)i < nof_monitors(); i++) {
       Node *box = mcall->monitor_box(this, i);
       Node *obj = mcall->monitor_obj(this, i);
-      if ( OptoReg::is_valid(regalloc->get_reg_first(box)) ) {
+      if (regalloc->node_regs_max_index() > 0 &&
+          OptoReg::is_valid(regalloc->get_reg_first(box))) {
         box = BoxLockNode::box_node(box);
-        format_helper( regalloc, st, box, "MON-BOX[", i, &scobjs );
+        format_helper(regalloc, st, box, "MON-BOX[", i, &scobjs);
       } else {
         OptoReg::Name box_reg = BoxLockNode::reg(box);
         st->print(" MON-BOX%d=%s+%d",
@@ -419,7 +422,7 @@
         if (BoxLockNode::box_node(box)->is_eliminated())
           obj_msg = "MON-OBJ(LOCK ELIMINATED)[";
       }
-      format_helper( regalloc, st, obj, obj_msg, i, &scobjs );
+      format_helper(regalloc, st, obj, obj_msg, i, &scobjs);
     }
 
     for (i = 0; i < (uint)scobjs.length(); i++) {
@@ -462,9 +465,9 @@
           st->print(" [");
           cifield = iklass->nonstatic_field_at(0);
           cifield->print_name_on(st);
-          format_helper( regalloc, st, fld_node, ":", 0, &scobjs );
+          format_helper(regalloc, st, fld_node, ":", 0, &scobjs);
         } else {
-          format_helper( regalloc, st, fld_node, "[", 0, &scobjs );
+          format_helper(regalloc, st, fld_node, "[", 0, &scobjs);
         }
         for (uint j = 1; j < nf; j++) {
           fld_node = mcall->in(first_ind+j);
@@ -472,9 +475,9 @@
             st->print(", [");
             cifield = iklass->nonstatic_field_at(j);
             cifield->print_name_on(st);
-            format_helper( regalloc, st, fld_node, ":", j, &scobjs );
+            format_helper(regalloc, st, fld_node, ":", j, &scobjs);
           } else {
-            format_helper( regalloc, st, fld_node, ", [", j, &scobjs );
+            format_helper(regalloc, st, fld_node, ", [", j, &scobjs);
           }
         }
       }
@@ -482,7 +485,7 @@
     }
   }
   st->print_cr("");
-  if (caller() != NULL)  caller()->format(regalloc, n, st);
+  if (caller() != NULL) caller()->format(regalloc, n, st);
 }
 
 
@@ -585,15 +588,15 @@
 uint CallNode::cmp( const Node &n ) const
 { return _tf == ((CallNode&)n)._tf && _jvms == ((CallNode&)n)._jvms; }
 #ifndef PRODUCT
-void CallNode::dump_req() const {
+void CallNode::dump_req(outputStream *st) const {
   // Dump the required inputs, enclosed in '(' and ')'
   uint i;                       // Exit value of loop
-  for( i=0; i<req(); i++ ) {    // For all required inputs
-    if( i == TypeFunc::Parms ) tty->print("(");
-    if( in(i) ) tty->print("%c%d ", Compile::current()->node_arena()->contains(in(i)) ? ' ' : 'o', in(i)->_idx);
-    else tty->print("_ ");
+  for (i = 0; i < req(); i++) {    // For all required inputs
+    if (i == TypeFunc::Parms) st->print("(");
+    if (in(i)) st->print("%c%d ", Compile::current()->node_arena()->contains(in(i)) ? ' ' : 'o', in(i)->_idx);
+    else st->print("_ ");
   }
-  tty->print(")");
+  st->print(")");
 }
 
 void CallNode::dump_spec(outputStream *st) const {
@@ -751,7 +754,7 @@
         projs->fallthrough_ioproj = pn;
       for (DUIterator j = pn->outs(); pn->has_out(j); j++) {
         Node* e = pn->out(j);
-        if (e->Opcode() == Op_CreateEx && e->in(0)->is_CatchProj()) {
+        if (e->Opcode() == Op_CreateEx && e->in(0)->is_CatchProj() && e->outcnt() > 0) {
           assert(projs->exobj == NULL, "only one");
           projs->exobj = e;
         }
@@ -775,16 +778,38 @@
   // and the exception object may not exist if an exception handler
   // swallows the exception but all the other must exist and be found.
   assert(projs->fallthrough_proj      != NULL, "must be found");
-  assert(projs->fallthrough_catchproj != NULL, "must be found");
-  assert(projs->fallthrough_memproj   != NULL, "must be found");
-  assert(projs->fallthrough_ioproj    != NULL, "must be found");
-  assert(projs->catchall_catchproj    != NULL, "must be found");
+  assert(Compile::current()->inlining_incrementally() || projs->fallthrough_catchproj != NULL, "must be found");
+  assert(Compile::current()->inlining_incrementally() || projs->fallthrough_memproj   != NULL, "must be found");
+  assert(Compile::current()->inlining_incrementally() || projs->fallthrough_ioproj    != NULL, "must be found");
+  assert(Compile::current()->inlining_incrementally() || projs->catchall_catchproj    != NULL, "must be found");
   if (separate_io_proj) {
-    assert(projs->catchall_memproj      != NULL, "must be found");
-    assert(projs->catchall_ioproj       != NULL, "must be found");
+    assert(Compile::current()->inlining_incrementally() || projs->catchall_memproj    != NULL, "must be found");
+    assert(Compile::current()->inlining_incrementally() || projs->catchall_ioproj     != NULL, "must be found");
   }
 }
 
+Node *CallNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  CallGenerator* cg = generator();
+  if (can_reshape && cg != NULL && cg->is_mh_late_inline() && !cg->already_attempted()) {
+    // Check whether this MH handle call becomes a candidate for inlining
+    ciMethod* callee = cg->method();
+    vmIntrinsics::ID iid = callee->intrinsic_id();
+    if (iid == vmIntrinsics::_invokeBasic) {
+      if (in(TypeFunc::Parms)->Opcode() == Op_ConP) {
+        phase->C->prepend_late_inline(cg);
+        set_generator(NULL);
+      }
+    } else {
+      assert(callee->has_member_arg(), "wrong type of call?");
+      if (in(TypeFunc::Parms + callee->arg_size() - 1)->Opcode() == Op_ConP) {
+        phase->C->prepend_late_inline(cg);
+        set_generator(NULL);
+      }
+    }
+  }
+  return SafePointNode::Ideal(phase, can_reshape);
+}
+
 
 //=============================================================================
 uint CallJavaNode::size_of() const { return sizeof(*this); }
--- a/src/share/vm/opto/callnode.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/callnode.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -126,7 +126,7 @@
   virtual uint ideal_reg() const { return NotAMachineReg; }
   virtual uint match_edge(uint idx) const;
 #ifndef PRODUCT
-  virtual void dump_req() const;
+  virtual void dump_req(outputStream *st = tty) const;
 #endif
 };
 
@@ -147,7 +147,7 @@
   virtual uint match_edge(uint idx) const;
   virtual uint ideal_reg() const { return NotAMachineReg; }
 #ifndef PRODUCT
-  virtual void dump_req() const;
+  virtual void dump_req(outputStream *st = tty) const;
 #endif
 };
 
@@ -344,17 +344,26 @@
   OopMap *oop_map() const { return _oop_map; }
   void set_oop_map(OopMap *om) { _oop_map = om; }
 
+ private:
+  void verify_input(JVMState* jvms, uint idx) const {
+    assert(verify_jvms(jvms), "jvms must match");
+    Node* n = in(idx);
+    assert((!n->bottom_type()->isa_long() && !n->bottom_type()->isa_double()) ||
+           in(idx + 1)->is_top(), "2nd half of long/double");
+  }
+
+ public:
   // Functionality from old debug nodes which has changed
   Node *local(JVMState* jvms, uint idx) const {
-    assert(verify_jvms(jvms), "jvms must match");
+    verify_input(jvms, jvms->locoff() + idx);
     return in(jvms->locoff() + idx);
   }
   Node *stack(JVMState* jvms, uint idx) const {
-    assert(verify_jvms(jvms), "jvms must match");
+    verify_input(jvms, jvms->stkoff() + idx);
     return in(jvms->stkoff() + idx);
   }
   Node *argument(JVMState* jvms, uint idx) const {
-    assert(verify_jvms(jvms), "jvms must match");
+    verify_input(jvms, jvms->argoff() + idx);
     return in(jvms->argoff() + idx);
   }
   Node *monitor_box(JVMState* jvms, uint idx) const {
@@ -498,6 +507,7 @@
   Node* exobj;
 };
 
+class CallGenerator;
 
 //------------------------------CallNode---------------------------------------
 // Call nodes now subsume the function of debug nodes at callsites, so they
@@ -508,26 +518,31 @@
   const TypeFunc *_tf;        // Function type
   address      _entry_point;  // Address of method being called
   float        _cnt;          // Estimate of number of times called
+  CallGenerator* _generator;  // corresponding CallGenerator for some late inline calls
 
   CallNode(const TypeFunc* tf, address addr, const TypePtr* adr_type)
     : SafePointNode(tf->domain()->cnt(), NULL, adr_type),
       _tf(tf),
       _entry_point(addr),
-      _cnt(COUNT_UNKNOWN)
+      _cnt(COUNT_UNKNOWN),
+      _generator(NULL)
   {
     init_class_id(Class_Call);
   }
 
-  const TypeFunc* tf()        const { return _tf; }
-  const address entry_point() const { return _entry_point; }
-  const float   cnt()         const { return _cnt; }
+  const TypeFunc* tf()         const { return _tf; }
+  const address  entry_point() const { return _entry_point; }
+  const float    cnt()         const { return _cnt; }
+  CallGenerator* generator()   const { return _generator; }
 
-  void set_tf(const TypeFunc* tf) { _tf = tf; }
-  void set_entry_point(address p) { _entry_point = p; }
-  void set_cnt(float c)           { _cnt = c; }
+  void set_tf(const TypeFunc* tf)       { _tf = tf; }
+  void set_entry_point(address p)       { _entry_point = p; }
+  void set_cnt(float c)                 { _cnt = c; }
+  void set_generator(CallGenerator* cg) { _generator = cg; }
 
   virtual const Type *bottom_type() const;
   virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
   virtual Node *Identity( PhaseTransform *phase ) { return this; }
   virtual uint        cmp( const Node &n ) const;
   virtual uint        size_of() const = 0;
@@ -564,7 +579,7 @@
   virtual uint match_edge(uint idx) const;
 
 #ifndef PRODUCT
-  virtual void        dump_req()  const;
+  virtual void        dump_req(outputStream *st = tty) const;
   virtual void        dump_spec(outputStream *st) const;
 #endif
 };
--- a/src/share/vm/opto/cfgnode.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/cfgnode.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -363,6 +363,49 @@
   return true; // The Region node is unreachable - it is dead.
 }
 
+bool RegionNode::try_clean_mem_phi(PhaseGVN *phase) {
+  // Incremental inlining + PhaseStringOpts sometimes produce:
+  //
+  // cmpP with 1 top input
+  //           |
+  //          If
+  //         /  \
+  //   IfFalse  IfTrue  /- Some Node
+  //         \  /      /    /
+  //        Region    / /-MergeMem
+  //             \---Phi
+  //
+  //
+  // It's expected by PhaseStringOpts that the Region goes away and is
+  // replaced by If's control input but because there's still a Phi,
+  // the Region stays in the graph. The top input from the cmpP is
+  // propagated forward and a subgraph that is useful goes away. The
+  // code below replaces the Phi with the MergeMem so that the Region
+  // is simplified.
+
+  PhiNode* phi = has_unique_phi();
+  if (phi && phi->type() == Type::MEMORY && req() == 3 && phi->is_diamond_phi(true)) {
+    MergeMemNode* m = NULL;
+    assert(phi->req() == 3, "same as region");
+    for (uint i = 1; i < 3; ++i) {
+      Node *mem = phi->in(i);
+      if (mem && mem->is_MergeMem() && in(i)->outcnt() == 1) {
+        // Nothing is control-dependent on path #i except the region itself.
+        m = mem->as_MergeMem();
+        uint j = 3 - i;
+        Node* other = phi->in(j);
+        if (other && other == m->base_memory()) {
+          // m is a successor memory to other, and is not pinned inside the diamond, so push it out.
+          // This will allow the diamond to collapse completely.
+          phase->is_IterGVN()->replace_node(phi, m);
+          return true;
+        }
+      }
+    }
+  }
+  return false;
+}
+
 //------------------------------Ideal------------------------------------------
 // Return a node which is more "ideal" than the current node.  Must preserve
 // the CFG, but we can still strip out dead paths.
@@ -375,6 +418,10 @@
   bool has_phis = false;
   if (can_reshape) {            // Need DU info to check for Phi users
     has_phis = (has_phi() != NULL);       // Cache result
+    if (has_phis && try_clean_mem_phi(phase)) {
+      has_phis = false;
+    }
+
     if (!has_phis) {            // No Phi users?  Nothing merging?
       for (uint i = 1; i < req()-1; i++) {
         Node *if1 = in(i);
@@ -1005,7 +1052,9 @@
 //------------------------------is_diamond_phi---------------------------------
 // Does this Phi represent a simple well-shaped diamond merge?  Return the
 // index of the true path or 0 otherwise.
-int PhiNode::is_diamond_phi() const {
+// If check_control_only is true, do not inspect the If node at the
+// top, and return -1 (not an edge number) on success.
+int PhiNode::is_diamond_phi(bool check_control_only) const {
   // Check for a 2-path merge
   Node *region = in(0);
   if( !region ) return 0;
@@ -1018,6 +1067,7 @@
   Node *iff = ifp1->in(0);
   if( !iff || !iff->is_If() ) return 0;
   if( iff != ifp2->in(0) ) return 0;
+  if (check_control_only)  return -1;
   // Check for a proper bool/cmp
   const Node *b = iff->in(1);
   if( !b->is_Bool() ) return 0;
@@ -1566,6 +1616,10 @@
     Node* n = in(j);            // Get the input
     if (rc == NULL || phase->type(rc) == Type::TOP) {
       if (n != top) {           // Not already top?
+        PhaseIterGVN *igvn = phase->is_IterGVN();
+        if (can_reshape && igvn != NULL) {
+          igvn->_worklist.push(r);
+        }
         set_req(j, top);        // Nuke it down
         progress = this;        // Record progress
       }
--- a/src/share/vm/opto/cfgnode.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/cfgnode.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -95,6 +95,7 @@
   virtual Node *Identity( PhaseTransform *phase );
   virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
   virtual const RegMask &out_RegMask() const;
+  bool try_clean_mem_phi(PhaseGVN *phase);
 };
 
 //------------------------------JProjNode--------------------------------------
@@ -181,7 +182,7 @@
   LoopSafety simple_data_loop_check(Node *in) const;
   // Is it unsafe data loop? It becomes a dead loop if this phi node removed.
   bool is_unsafe_data_reference(Node *in) const;
-  int  is_diamond_phi() const;
+  int  is_diamond_phi(bool check_control_only = false) const;
   virtual int Opcode() const;
   virtual bool pinned() const { return in(0) != 0; }
   virtual const TypePtr *adr_type() const { verify_adr_type(true); return _adr_type; }
--- a/src/share/vm/opto/chaitin.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/chaitin.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1495,7 +1495,7 @@
             cisc->ins_req(1,src);         // Requires a memory edge
           }
           b->_nodes.map(j,cisc);          // Insert into basic block
-          n->subsume_by(cisc); // Correct graph
+          n->subsume_by(cisc, C); // Correct graph
           //
           ++_used_cisc_instructions;
 #ifndef PRODUCT
--- a/src/share/vm/opto/classes.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/classes.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -127,6 +127,7 @@
 macro(DivMod)
 macro(DivModI)
 macro(DivModL)
+macro(EncodeISOArray)
 macro(EncodeP)
 macro(EncodePKlass)
 macro(ExpD)
--- a/src/share/vm/opto/compile.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/compile.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,11 +23,13 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "code/exceptionHandlerTable.hpp"
 #include "code/nmethod.hpp"
 #include "compiler/compileLog.hpp"
+#include "compiler/disassembler.hpp"
 #include "compiler/oopMap.hpp"
 #include "opto/addnode.hpp"
 #include "opto/block.hpp"
@@ -134,7 +136,7 @@
 
 void Compile::register_intrinsic(CallGenerator* cg) {
   if (_intrinsics == NULL) {
-    _intrinsics = new GrowableArray<CallGenerator*>(60);
+    _intrinsics = new (comp_arena())GrowableArray<CallGenerator*>(comp_arena(), 60, 0, NULL);
   }
   // This code is stolen from ciObjectFactory::insert.
   // Really, GrowableArray should have methods for
@@ -316,7 +318,12 @@
 }
 
 
-
+static inline bool not_a_node(const Node* n) {
+  if (n == NULL)                   return true;
+  if (((intptr_t)n & 1) != 0)      return true;  // uninitialized, etc.
+  if (*(address*)n == badAddress)  return true;  // kill by Node::destruct
+  return false;
+}
 
 // Identify all nodes that are reachable from below, useful.
 // Use breadth-first pass that records state in a Unique_Node_List,
@@ -337,12 +344,42 @@
     uint max = n->len();
     for( uint i = 0; i < max; ++i ) {
       Node *m = n->in(i);
-      if( m == NULL ) continue;
+      if (not_a_node(m))  continue;
       useful.push(m);
     }
   }
 }
 
+// Update dead_node_list with any missing dead nodes using useful
+// list. Consider all non-useful nodes to be useless i.e., dead nodes.
+void Compile::update_dead_node_list(Unique_Node_List &useful) {
+  uint max_idx = unique();
+  VectorSet& useful_node_set = useful.member_set();
+
+  for (uint node_idx = 0; node_idx < max_idx; node_idx++) {
+    // If node with index node_idx is not in useful set,
+    // mark it as dead in dead node list.
+    if (! useful_node_set.test(node_idx) ) {
+      record_dead_node(node_idx);
+    }
+  }
+}
+
+void Compile::remove_useless_late_inlines(GrowableArray<CallGenerator*>* inlines, Unique_Node_List &useful) {
+  int shift = 0;
+  for (int i = 0; i < inlines->length(); i++) {
+    CallGenerator* cg = inlines->at(i);
+    CallNode* call = cg->call_node();
+    if (shift > 0) {
+      inlines->at_put(i-shift, cg);
+    }
+    if (!useful.member(call)) {
+      shift++;
+    }
+  }
+  inlines->trunc_to(inlines->length()-shift);
+}
+
 // Disconnect all useless nodes by disconnecting those at the boundary.
 void Compile::remove_useless_nodes(Unique_Node_List &useful) {
   uint next = 0;
@@ -372,6 +409,9 @@
       remove_macro_node(n);
     }
   }
+  // clean up the late inline lists
+  remove_useless_late_inlines(&_string_late_inlines, useful);
+  remove_useless_late_inlines(&_late_inlines, useful);
   debug_only(verify_graph_edges(true/*check for no_dead_code*/);)
 }
 
@@ -582,11 +622,21 @@
                   _inner_loops(0),
                   _scratch_const_size(-1),
                   _in_scratch_emit_size(false),
+                  _dead_node_list(comp_arena()),
+                  _dead_node_count(0),
 #ifndef PRODUCT
                   _trace_opto_output(TraceOptoOutput || method()->has_option("TraceOptoOutput")),
                   _printer(IdealGraphPrinter::printer()),
 #endif
-                  _congraph(NULL) {
+                  _congraph(NULL),
+                  _late_inlines(comp_arena(), 2, 0, NULL),
+                  _string_late_inlines(comp_arena(), 2, 0, NULL),
+                  _late_inlines_pos(0),
+                  _number_of_mh_late_inlines(0),
+                  _inlining_progress(false),
+                  _inlining_incrementally(false),
+                  _print_inlining_list(NULL),
+                  _print_inlining(0) {
   C = this;
 
   CompileWrapper cw(this);
@@ -642,6 +692,9 @@
   PhaseGVN gvn(node_arena(), estimated_size);
   set_initial_gvn(&gvn);
 
+  if (PrintInlining  || PrintIntrinsics NOT_PRODUCT( || PrintOptoInlining)) {
+    _print_inlining_list = new (comp_arena())GrowableArray<PrintInliningBuffer>(comp_arena(), 1, 1, PrintInliningBuffer());
+  }
   { // Scope for timing the parser
     TracePhase t3("parse", &_t_parser, true);
 
@@ -708,28 +761,13 @@
       rethrow_exceptions(kit.transfer_exceptions_into_jvms());
     }
 
-    if (!failing() && has_stringbuilder()) {
-      {
-        // remove useless nodes to make the usage analysis simpler
-        ResourceMark rm;
-        PhaseRemoveUseless pru(initial_gvn(), &for_igvn);
-      }
-
-      {
-        ResourceMark rm;
-        print_method("Before StringOpts", 3);
-        PhaseStringOpts pso(initial_gvn(), &for_igvn);
-        print_method("After StringOpts", 3);
-      }
-
-      // now inline anything that we skipped the first time around
-      while (_late_inlines.length() > 0) {
-        CallGenerator* cg = _late_inlines.pop();
-        cg->do_late_inline();
-        if (failing())  return;
-      }
+    assert(IncrementalInline || (_late_inlines.length() == 0 && !has_mh_late_inlines()), "incremental inlining is off");
+
+    if (_late_inlines.length() == 0 && !has_mh_late_inlines() && !failing() && has_stringbuilder()) {
+      inline_string_calls(true);
     }
-    assert(_late_inlines.length() == 0, "should have been processed");
+
+    if (failing())  return;
 
     print_method("Before RemoveUseless", 3);
 
@@ -873,7 +911,14 @@
     _trace_opto_output(TraceOptoOutput),
     _printer(NULL),
 #endif
-    _congraph(NULL) {
+    _dead_node_list(comp_arena()),
+    _dead_node_count(0),
+    _congraph(NULL),
+    _number_of_mh_late_inlines(0),
+    _inlining_progress(false),
+    _inlining_incrementally(false),
+    _print_inlining_list(NULL),
+    _print_inlining(0) {
   C = this;
 
 #ifndef PRODUCT
@@ -1069,6 +1114,72 @@
   assert(_top == NULL || top()->is_top(), "");
 }
 
+#ifdef ASSERT
+uint Compile::count_live_nodes_by_graph_walk() {
+  Unique_Node_List useful(comp_arena());
+  // Get useful node list by walking the graph.
+  identify_useful_nodes(useful);
+  return useful.size();
+}
+
+void Compile::print_missing_nodes() {
+
+  // Return if CompileLog is NULL and PrintIdealNodeCount is false.
+  if ((_log == NULL) && (! PrintIdealNodeCount)) {
+    return;
+  }
+
+  // This is an expensive function. It is executed only when the user
+  // specifies VerifyIdealNodeCount option or otherwise knows the
+  // additional work that needs to be done to identify reachable nodes
+  // by walking the flow graph and find the missing ones using
+  // _dead_node_list.
+
+  Unique_Node_List useful(comp_arena());
+  // Get useful node list by walking the graph.
+  identify_useful_nodes(useful);
+
+  uint l_nodes = C->live_nodes();
+  uint l_nodes_by_walk = useful.size();
+
+  if (l_nodes != l_nodes_by_walk) {
+    if (_log != NULL) {
+      _log->begin_head("mismatched_nodes count='%d'", abs((int) (l_nodes - l_nodes_by_walk)));
+      _log->stamp();
+      _log->end_head();
+    }
+    VectorSet& useful_member_set = useful.member_set();
+    int last_idx = l_nodes_by_walk;
+    for (int i = 0; i < last_idx; i++) {
+      if (useful_member_set.test(i)) {
+        if (_dead_node_list.test(i)) {
+          if (_log != NULL) {
+            _log->elem("mismatched_node_info node_idx='%d' type='both live and dead'", i);
+          }
+          if (PrintIdealNodeCount) {
+            // Print the log message to tty
+              tty->print_cr("mismatched_node idx='%d' both live and dead'", i);
+              useful.at(i)->dump();
+          }
+        }
+      }
+      else if (! _dead_node_list.test(i)) {
+        if (_log != NULL) {
+          _log->elem("mismatched_node_info node_idx='%d' type='neither live nor dead'", i);
+        }
+        if (PrintIdealNodeCount) {
+          // Print the log message to tty
+          tty->print_cr("mismatched_node idx='%d' type='neither live nor dead'", i);
+        }
+      }
+    }
+    if (_log != NULL) {
+      _log->tail("mismatched_nodes");
+    }
+  }
+}
+#endif
+
 #ifndef PRODUCT
 void Compile::verify_top(Node* tn) const {
   if (tn != NULL) {
@@ -1660,6 +1771,124 @@
   assert(predicate_count()==0, "should be clean!");
 }
 
+// StringOpts and late inlining of string methods
+void Compile::inline_string_calls(bool parse_time) {
+  {
+    // remove useless nodes to make the usage analysis simpler
+    ResourceMark rm;
+    PhaseRemoveUseless pru(initial_gvn(), for_igvn());
+  }
+
+  {
+    ResourceMark rm;
+    print_method("Before StringOpts", 3);
+    PhaseStringOpts pso(initial_gvn(), for_igvn());
+    print_method("After StringOpts", 3);
+  }
+
+  // now inline anything that we skipped the first time around
+  if (!parse_time) {
+    _late_inlines_pos = _late_inlines.length();
+  }
+
+  while (_string_late_inlines.length() > 0) {
+    CallGenerator* cg = _string_late_inlines.pop();
+    cg->do_late_inline();
+    if (failing())  return;
+  }
+  _string_late_inlines.trunc_to(0);
+}
+
+void Compile::inline_incrementally_one(PhaseIterGVN& igvn) {
+  assert(IncrementalInline, "incremental inlining should be on");
+  PhaseGVN* gvn = initial_gvn();
+
+  set_inlining_progress(false);
+  for_igvn()->clear();
+  gvn->replace_with(&igvn);
+
+  int i = 0;
+
+  for (; i <_late_inlines.length() && !inlining_progress(); i++) {
+    CallGenerator* cg = _late_inlines.at(i);
+    _late_inlines_pos = i+1;
+    cg->do_late_inline();
+    if (failing())  return;
+  }
+  int j = 0;
+  for (; i < _late_inlines.length(); i++, j++) {
+    _late_inlines.at_put(j, _late_inlines.at(i));
+  }
+  _late_inlines.trunc_to(j);
+
+  {
+    ResourceMark rm;
+    PhaseRemoveUseless pru(C->initial_gvn(), C->for_igvn());
+  }
+
+  igvn = PhaseIterGVN(gvn);
+}
+
+// Perform incremental inlining until bound on number of live nodes is reached
+void Compile::inline_incrementally(PhaseIterGVN& igvn) {
+  PhaseGVN* gvn = initial_gvn();
+
+  set_inlining_incrementally(true);
+  set_inlining_progress(true);
+  uint low_live_nodes = 0;
+
+  while(inlining_progress() && _late_inlines.length() > 0) {
+
+    if (live_nodes() > (uint)LiveNodeCountInliningCutoff) {
+      if (low_live_nodes < (uint)LiveNodeCountInliningCutoff * 8 / 10) {
+        // PhaseIdealLoop is expensive so we only try it once we are
+        // out of loop and we only try it again if the previous helped
+        // got the number of nodes down significantly
+        PhaseIdealLoop ideal_loop( igvn, false, true );
+        if (failing())  return;
+        low_live_nodes = live_nodes();
+        _major_progress = true;
+      }
+
+      if (live_nodes() > (uint)LiveNodeCountInliningCutoff) {
+        break;
+      }
+    }
+
+    inline_incrementally_one(igvn);
+
+    if (failing())  return;
+
+    igvn.optimize();
+
+    if (failing())  return;
+  }
+
+  assert( igvn._worklist.size() == 0, "should be done with igvn" );
+
+  if (_string_late_inlines.length() > 0) {
+    assert(has_stringbuilder(), "inconsistent");
+    for_igvn()->clear();
+    initial_gvn()->replace_with(&igvn);
+
+    inline_string_calls(false);
+
+    if (failing())  return;
+
+    {
+      ResourceMark rm;
+      PhaseRemoveUseless pru(initial_gvn(), for_igvn());
+    }
+
+    igvn = PhaseIterGVN(gvn);
+
+    igvn.optimize();
+  }
+
+  set_inlining_incrementally(false);
+}
+
+
 //------------------------------Optimize---------------------------------------
 // Given a graph, optimize it.
 void Compile::Optimize() {
@@ -1692,6 +1921,12 @@
 
   if (failing())  return;
 
+  inline_incrementally(igvn);
+
+  print_method("Incremental Inline", 2);
+
+  if (failing())  return;
+
   // Perform escape analysis
   if (_do_escape_analysis && ConnectionGraph::has_candidates(this)) {
     if (has_loops()) {
@@ -1814,6 +2049,7 @@
 
  } // (End scope of igvn; run destructor if necessary for asserts.)
 
+  dump_inlining();
   // A method with only infinite loops has no edges entering loops from root
   {
     NOT_PRODUCT( TracePhase t2("graphReshape", &_t_graphReshaping, TimeCompiler); )
@@ -2087,7 +2323,7 @@
 
 // Eliminate trivially redundant StoreCMs and accumulate their
 // precedence edges.
-static void eliminate_redundant_card_marks(Node* n) {
+void Compile::eliminate_redundant_card_marks(Node* n) {
   assert(n->Opcode() == Op_StoreCM, "expected StoreCM");
   if (n->in(MemNode::Address)->outcnt() > 1) {
     // There are multiple users of the same address so it might be
@@ -2122,7 +2358,7 @@
         // Eliminate the previous StoreCM
         prev->set_req(MemNode::Memory, mem->in(MemNode::Memory));
         assert(mem->outcnt() == 0, "should be dead");
-        mem->disconnect_inputs(NULL);
+        mem->disconnect_inputs(NULL, this);
       } else {
         prev = mem;
       }
@@ -2133,7 +2369,7 @@
 
 //------------------------------final_graph_reshaping_impl----------------------
 // Implement items 1-5 from final_graph_reshaping below.
-static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) {
+void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) {
 
   if ( n->outcnt() == 0 ) return; // dead node
   uint nop = n->Opcode();
@@ -2163,8 +2399,7 @@
 
 #ifdef ASSERT
   if( n->is_Mem() ) {
-    Compile* C = Compile::current();
-    int alias_idx = C->get_alias_index(n->as_Mem()->adr_type());
+    int alias_idx = get_alias_index(n->as_Mem()->adr_type());
     assert( n->in(0) != NULL || alias_idx != Compile::AliasIdxRaw ||
             // oop will be recorded in oop map if load crosses safepoint
             n->is_Load() && (n->as_Load()->bottom_type()->isa_oopptr() ||
@@ -2213,7 +2448,7 @@
     break;
   case Op_Opaque1:              // Remove Opaque Nodes before matching
   case Op_Opaque2:              // Remove Opaque Nodes before matching
-    n->subsume_by(n->in(1));
+    n->subsume_by(n->in(1), this);
     break;
   case Op_CallStaticJava:
   case Op_CallJava:
@@ -2337,8 +2572,7 @@
         int op = t->isa_oopptr() ? Op_ConN : Op_ConNKlass;
 
         // Look for existing ConN node of the same exact type.
-        Compile* C = Compile::current();
-        Node* r  = C->root();
+        Node* r  = root();
         uint cnt = r->outcnt();
         for (uint i = 0; i < cnt; i++) {
           Node* m = r->raw_out(i);
@@ -2352,14 +2586,14 @@
           // Decode a narrow oop to match address
           // [R12 + narrow_oop_reg<<3 + offset]
           if (t->isa_oopptr()) {
-            nn = new (C) DecodeNNode(nn, t);
+            nn = new (this) DecodeNNode(nn, t);
           } else {
-            nn = new (C) DecodeNKlassNode(nn, t);
+            nn = new (this) DecodeNKlassNode(nn, t);
           }
           n->set_req(AddPNode::Base, nn);
           n->set_req(AddPNode::Address, nn);
           if (addp->outcnt() == 0) {
-            addp->disconnect_inputs(NULL);
+            addp->disconnect_inputs(NULL, this);
           }
         }
       }
@@ -2371,7 +2605,6 @@
 #ifdef _LP64
   case Op_CastPP:
     if (n->in(1)->is_DecodeN() && Matcher::gen_narrow_oop_implicit_null_checks()) {
-      Compile* C = Compile::current();
       Node* in1 = n->in(1);
       const Type* t = n->bottom_type();
       Node* new_in1 = in1->clone();
@@ -2400,9 +2633,9 @@
         new_in1->set_req(0, n->in(0));
       }
 
-      n->subsume_by(new_in1);
+      n->subsume_by(new_in1, this);
       if (in1->outcnt() == 0) {
-        in1->disconnect_inputs(NULL);
+        in1->disconnect_inputs(NULL, this);
       }
     }
     break;
@@ -2419,7 +2652,6 @@
       }
       assert(in1->is_DecodeNarrowPtr(), "sanity");
 
-      Compile* C = Compile::current();
       Node* new_in2 = NULL;
       if (in2->is_DecodeNarrowPtr()) {
         assert(in2->Opcode() == in1->Opcode(), "must be same node type");
@@ -2432,7 +2664,7 @@
           // oops implicit null check is not generated.
           // This will allow to generate normal oop implicit null check.
           if (Matcher::gen_narrow_oop_implicit_null_checks())
-            new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
+            new_in2 = ConNode::make(this, TypeNarrowOop::NULL_PTR);
           //
           // This transformation together with CastPP transformation above
           // will generated code for implicit NULL checks for compressed oops.
@@ -2471,19 +2703,19 @@
           //    NullCheck base_reg
           //
         } else if (t->isa_oopptr()) {
-          new_in2 = ConNode::make(C, t->make_narrowoop());
+          new_in2 = ConNode::make(this, t->make_narrowoop());
         } else if (t->isa_klassptr()) {
-          new_in2 = ConNode::make(C, t->make_narrowklass());
+          new_in2 = ConNode::make(this, t->make_narrowklass());
         }
       }
       if (new_in2 != NULL) {
-        Node* cmpN = new (C) CmpNNode(in1->in(1), new_in2);
-        n->subsume_by( cmpN );
+        Node* cmpN = new (this) CmpNNode(in1->in(1), new_in2);
+        n->subsume_by(cmpN, this);
         if (in1->outcnt() == 0) {
-          in1->disconnect_inputs(NULL);
+          in1->disconnect_inputs(NULL, this);
         }
         if (in2->outcnt() == 0) {
-          in2->disconnect_inputs(NULL);
+          in2->disconnect_inputs(NULL, this);
         }
       }
     }
@@ -2501,21 +2733,20 @@
   case Op_EncodePKlass: {
     Node* in1 = n->in(1);
     if (in1->is_DecodeNarrowPtr()) {
-      n->subsume_by(in1->in(1));
+      n->subsume_by(in1->in(1), this);
     } else if (in1->Opcode() == Op_ConP) {
-      Compile* C = Compile::current();
       const Type* t = in1->bottom_type();
       if (t == TypePtr::NULL_PTR) {
         assert(t->isa_oopptr(), "null klass?");
-        n->subsume_by(ConNode::make(C, TypeNarrowOop::NULL_PTR));
+        n->subsume_by(ConNode::make(this, TypeNarrowOop::NULL_PTR), this);
       } else if (t->isa_oopptr()) {
-        n->subsume_by(ConNode::make(C, t->make_narrowoop()));
+        n->subsume_by(ConNode::make(this, t->make_narrowoop()), this);
       } else if (t->isa_klassptr()) {
-        n->subsume_by(ConNode::make(C, t->make_narrowklass()));
+        n->subsume_by(ConNode::make(this, t->make_narrowklass()), this);
       }
     }
     if (in1->outcnt() == 0) {
-      in1->disconnect_inputs(NULL);
+      in1->disconnect_inputs(NULL, this);
     }
     break;
   }
@@ -2538,7 +2769,7 @@
           }
         }
         assert(proj != NULL, "must be found");
-        p->subsume_by(proj);
+        p->subsume_by(proj, this);
       }
     }
     break;
@@ -2558,7 +2789,7 @@
           unique_in = NULL;
       }
       if (unique_in != NULL) {
-        n->subsume_by(unique_in);
+        n->subsume_by(unique_in, this);
       }
     }
     break;
@@ -2571,16 +2802,15 @@
       Node* d = n->find_similar(Op_DivI);
       if (d) {
         // Replace them with a fused divmod if supported
-        Compile* C = Compile::current();
         if (Matcher::has_match_rule(Op_DivModI)) {
-          DivModINode* divmod = DivModINode::make(C, n);
-          d->subsume_by(divmod->div_proj());
-          n->subsume_by(divmod->mod_proj());
+          DivModINode* divmod = DivModINode::make(this, n);
+          d->subsume_by(divmod->div_proj(), this);
+          n->subsume_by(divmod->mod_proj(), this);
         } else {
           // replace a%b with a-((a/b)*b)
-          Node* mult = new (C) MulINode(d, d->in(2));
-          Node* sub  = new (C) SubINode(d->in(1), mult);
-          n->subsume_by( sub );
+          Node* mult = new (this) MulINode(d, d->in(2));
+          Node* sub  = new (this) SubINode(d->in(1), mult);
+          n->subsume_by(sub, this);
         }
       }
     }
@@ -2592,16 +2822,15 @@
       Node* d = n->find_similar(Op_DivL);
       if (d) {
         // Replace them with a fused divmod if supported
-        Compile* C = Compile::current();
         if (Matcher::has_match_rule(Op_DivModL)) {
-          DivModLNode* divmod = DivModLNode::make(C, n);
-          d->subsume_by(divmod->div_proj());
-          n->subsume_by(divmod->mod_proj());
+          DivModLNode* divmod = DivModLNode::make(this, n);
+          d->subsume_by(divmod->div_proj(), this);
+          n->subsume_by(divmod->mod_proj(), this);
         } else {
           // replace a%b with a-((a/b)*b)
-          Node* mult = new (C) MulLNode(d, d->in(2));
-          Node* sub  = new (C) SubLNode(d->in(1), mult);
-          n->subsume_by( sub );
+          Node* mult = new (this) MulLNode(d, d->in(2));
+          Node* sub  = new (this) SubLNode(d->in(1), mult);
+          n->subsume_by(sub, this);
         }
       }
     }
@@ -2620,8 +2849,8 @@
     if (n->req()-1 > 2) {
       // Replace many operand PackNodes with a binary tree for matching
       PackNode* p = (PackNode*) n;
-      Node* btp = p->binary_tree_pack(Compile::current(), 1, n->req());
-      n->subsume_by(btp);
+      Node* btp = p->binary_tree_pack(this, 1, n->req());
+      n->subsume_by(btp, this);
     }
     break;
   case Op_Loop:
@@ -2645,18 +2874,16 @@
       if (t != NULL && t->is_con()) {
         juint shift = t->get_con();
         if (shift > mask) { // Unsigned cmp
-          Compile* C = Compile::current();
-          n->set_req(2, ConNode::make(C, TypeInt::make(shift & mask)));
+          n->set_req(2, ConNode::make(this, TypeInt::make(shift & mask)));
         }
       } else {
         if (t == NULL || t->_lo < 0 || t->_hi > (int)mask) {
-          Compile* C = Compile::current();
-          Node* shift = new (C) AndINode(in2, ConNode::make(C, TypeInt::make(mask)));
+          Node* shift = new (this) AndINode(in2, ConNode::make(this, TypeInt::make(mask)));
           n->set_req(2, shift);
         }
       }
       if (in2->outcnt() == 0) { // Remove dead node
-        in2->disconnect_inputs(NULL);
+        in2->disconnect_inputs(NULL, this);
       }
     }
     break;
@@ -2674,7 +2901,7 @@
 //------------------------------final_graph_reshaping_walk---------------------
 // Replacing Opaque nodes with their input in final_graph_reshaping_impl(),
 // requires that the walk visits a node's inputs before visiting the node.
-static void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &frc ) {
+void Compile::final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &frc ) {
   ResourceArea *area = Thread::current()->resource_area();
   Unique_Node_List sfpt(area);
 
@@ -2741,7 +2968,7 @@
           n->set_req(j, in->in(1));
         }
         if (in->outcnt() == 0) {
-          in->disconnect_inputs(NULL);
+          in->disconnect_inputs(NULL, this);
         }
       }
     }
@@ -3014,7 +3241,8 @@
 }
 
 Compile::TracePhase::TracePhase(const char* name, elapsedTimer* accumulator, bool dolog)
-  : TraceTime(NULL, accumulator, false NOT_PRODUCT( || TimeCompiler ), false)
+  : TraceTime(NULL, accumulator, false NOT_PRODUCT( || TimeCompiler ), false),
+    _phase_name(name), _dolog(dolog)
 {
   if (dolog) {
     C = Compile::current();
@@ -3024,15 +3252,34 @@
     _log = NULL;
   }
   if (_log != NULL) {
-    _log->begin_head("phase name='%s' nodes='%d'", name, C->unique());
+    _log->begin_head("phase name='%s' nodes='%d' live='%d'", _phase_name, C->unique(), C->live_nodes());
     _log->stamp();
     _log->end_head();
   }
 }
 
 Compile::TracePhase::~TracePhase() {
+
+  C = Compile::current();
+  if (_dolog) {
+    _log = C->log();
+  } else {
+    _log = NULL;
+  }
+
+#ifdef ASSERT
+  if (PrintIdealNodeCount) {
+    tty->print_cr("phase name='%s' nodes='%d' live='%d' live_graph_walk='%d'",
+                  _phase_name, C->unique(), C->live_nodes(), C->count_live_nodes_by_graph_walk());
+  }
+
+  if (VerifyIdealNodeCount) {
+    Compile::current()->print_missing_nodes();
+  }
+#endif
+
   if (_log != NULL) {
-    _log->done("phase nodes='%d'", C->unique());
+    _log->done("phase name='%s' nodes='%d' live='%d'", _phase_name, C->unique(), C->live_nodes());
   }
 }
 
@@ -3047,9 +3294,9 @@
   case T_LONG:
   case T_DOUBLE:  return (_v._value.j == other._v._value.j);
   case T_OBJECT:
-  case T_METADATA: return (_v._metadata == other._v._metadata);
   case T_ADDRESS: return (_v._value.l == other._v._value.l);
   case T_VOID:    return (_v._value.l == other._v._value.l);  // jump-table entries
+  case T_METADATA: return (_v._metadata == other._v._metadata);
   default: ShouldNotReachHere();
   }
   return false;
@@ -3248,3 +3495,33 @@
     cb.consts()->relocate((address) constant_addr, relocInfo::internal_word_type);
   }
 }
+
+void Compile::dump_inlining() {
+  if (PrintInlining || PrintIntrinsics NOT_PRODUCT( || PrintOptoInlining)) {
+    // Print inlining message for candidates that we couldn't inline
+    // for lack of space or non constant receiver
+    for (int i = 0; i < _late_inlines.length(); i++) {
+      CallGenerator* cg = _late_inlines.at(i);
+      cg->print_inlining_late("live nodes > LiveNodeCountInliningCutoff");
+    }
+    Unique_Node_List useful;
+    useful.push(root());
+    for (uint next = 0; next < useful.size(); ++next) {
+      Node* n  = useful.at(next);
+      if (n->is_Call() && n->as_Call()->generator() != NULL && n->as_Call()->generator()->call_node() == n) {
+        CallNode* call = n->as_Call();
+        CallGenerator* cg = call->generator();
+        cg->print_inlining_late("receiver not constant");
+      }
+      uint max = n->len();
+      for ( uint i = 0; i < max; ++i ) {
+        Node *m = n->in(i);
+        if ( m == NULL ) continue;
+        useful.push(m);
+      }
+    }
+    for (int i = 0; i < _print_inlining_list->length(); i++) {
+      tty->print(_print_inlining_list->at(i).ss()->as_string());
+    }
+  }
+}
--- a/src/share/vm/opto/compile.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/compile.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -30,6 +30,7 @@
 #include "code/debugInfoRec.hpp"
 #include "code/exceptionHandlerTable.hpp"
 #include "compiler/compilerOracle.hpp"
+#include "compiler/compileBroker.hpp"
 #include "libadt/dict.hpp"
 #include "libadt/port.hpp"
 #include "libadt/vectset.hpp"
@@ -71,10 +72,13 @@
 class JVMState;
 class TypeData;
 class TypePtr;
+class TypeOopPtr;
 class TypeFunc;
 class Unique_Node_List;
 class nmethod;
 class WarmCallInfo;
+class Node_Stack;
+struct Final_Reshape_Counts;
 
 //------------------------------Compile----------------------------------------
 // This class defines a top-level Compiler invocation.
@@ -98,6 +102,8 @@
    private:
     Compile*    C;
     CompileLog* _log;
+    const char* _phase_name;
+    bool _dolog;
    public:
     TracePhase(const char* name, elapsedTimer* accumulator, bool dolog);
     ~TracePhase();
@@ -149,7 +155,7 @@
   private:
     BasicType _type;
     union {
-    jvalue    _value;
+      jvalue    _value;
       Metadata* _metadata;
     } _v;
     int       _offset;         // offset of this constant (in bytes) relative to the constant table base.
@@ -275,6 +281,8 @@
   int                   _orig_pc_slot_offset_in_bytes;
 
   int                   _major_progress;        // Count of something big happening
+  bool                  _inlining_progress;     // progress doing incremental inlining?
+  bool                  _inlining_incrementally;// Are we doing incremental inlining (post parse)
   bool                  _has_loops;             // True if the method _may_ have some loops
   bool                  _has_split_ifs;         // True if the method _may_ have some split-if
   bool                  _has_unsafe_access;     // True if the method _may_ produce faults in unsafe loads or stores.
@@ -313,6 +321,9 @@
 
   // Node management
   uint                  _unique;                // Counter for unique Node indices
+  VectorSet             _dead_node_list;        // Set of dead nodes
+  uint                  _dead_node_count;       // Number of dead nodes; VectorSet::Size() is O(N).
+                                                // So use this to keep count and make the call O(1).
   debug_only(static int _debug_idx;)            // Monotonic counter (not reset), use -XX:BreakAtNode=<idx>
   Arena                 _node_arena;            // Arena for new-space Nodes
   Arena                 _old_arena;             // Arena for old-space Nodes, lifetime during xform
@@ -359,9 +370,69 @@
   Unique_Node_List*     _for_igvn;              // Initial work-list for next round of Iterative GVN
   WarmCallInfo*         _warm_calls;            // Sorted work-list for heat-based inlining.
 
-  GrowableArray<CallGenerator*> _late_inlines;  // List of CallGenerators to be revisited after
-                                                // main parsing has finished.
+  GrowableArray<CallGenerator*> _late_inlines;        // List of CallGenerators to be revisited after
+                                                      // main parsing has finished.
+  GrowableArray<CallGenerator*> _string_late_inlines; // same but for string operations
+
+  int                           _late_inlines_pos;    // Where in the queue should the next late inlining candidate go (emulate depth first inlining)
+  uint                          _number_of_mh_late_inlines; // number of method handle late inlining still pending
+
+
+  // Inlining may not happen in parse order which would make
+  // PrintInlining output confusing. Keep track of PrintInlining
+  // pieces in order.
+  class PrintInliningBuffer : public ResourceObj {
+   private:
+    CallGenerator* _cg;
+    stringStream* _ss;
+
+   public:
+    PrintInliningBuffer()
+      : _cg(NULL) { _ss = new stringStream(); }
+
+    stringStream* ss() const { return _ss; }
+    CallGenerator* cg() const { return _cg; }
+    void set_cg(CallGenerator* cg) { _cg = cg; }
+  };
+
+  GrowableArray<PrintInliningBuffer>* _print_inlining_list;
+  int _print_inlining;
+
+ public:
 
+  outputStream* print_inlining_stream() const {
+    return _print_inlining_list->at(_print_inlining).ss();
+  }
+
+  void print_inlining_skip(CallGenerator* cg) {
+    if (PrintInlining) {
+      _print_inlining_list->at(_print_inlining).set_cg(cg);
+      _print_inlining++;
+      _print_inlining_list->insert_before(_print_inlining, PrintInliningBuffer());
+    }
+  }
+
+  void print_inlining_insert(CallGenerator* cg) {
+    if (PrintInlining) {
+      for (int i = 0; i < _print_inlining_list->length(); i++) {
+        if (_print_inlining_list->at(i).cg() == cg) {
+          _print_inlining_list->insert_before(i+1, PrintInliningBuffer());
+          _print_inlining = i+1;
+          _print_inlining_list->at(i).set_cg(NULL);
+          return;
+        }
+      }
+      ShouldNotReachHere();
+    }
+  }
+
+  void print_inlining(ciMethod* method, int inline_level, int bci, const char* msg = NULL) {
+    stringStream ss;
+    CompileTask::print_inlining(&ss, method, inline_level, bci, msg);
+    print_inlining_stream()->print(ss.as_string());
+  }
+
+ private:
   // Matching, CFG layout, allocation, code generation
   PhaseCFG*             _cfg;                   // Results of CFG finding
   bool                  _select_24_bit_instr;   // We selected an instruction with a 24-bit result
@@ -428,6 +499,10 @@
   int               fixed_slots() const         { assert(_fixed_slots >= 0, "");         return _fixed_slots; }
   void          set_fixed_slots(int n)          { _fixed_slots = n; }
   int               major_progress() const      { return _major_progress; }
+  void          set_inlining_progress(bool z)   { _inlining_progress = z; }
+  int               inlining_progress() const   { return _inlining_progress; }
+  void          set_inlining_incrementally(bool z) { _inlining_incrementally = z; }
+  int               inlining_incrementally() const { return _inlining_incrementally; }
   void          set_major_progress()            { _major_progress++; }
   void        clear_major_progress()            { _major_progress = 0; }
   int               num_loop_opts() const       { return _num_loop_opts; }
@@ -534,7 +609,7 @@
   ciEnv*            env() const                 { return _env; }
   CompileLog*       log() const                 { return _log; }
   bool              failing() const             { return _env->failing() || _failure_reason != NULL; }
-  const char* failure_reason() { return _failure_reason; }
+  const char*       failure_reason() { return _failure_reason; }
   bool              failure_reason_is(const char* r) { return (r==_failure_reason) || (r!=NULL && _failure_reason!=NULL && strcmp(r, _failure_reason)==0); }
 
   void record_failure(const char* reason);
@@ -549,7 +624,7 @@
     record_method_not_compilable(reason, true);
   }
   bool check_node_count(uint margin, const char* reason) {
-    if (unique() + margin > (uint)MaxNodeLimit) {
+    if (live_nodes() + margin > (uint)MaxNodeLimit) {
       record_method_not_compilable(reason);
       return true;
     } else {
@@ -558,25 +633,41 @@
   }
 
   // Node management
-  uint              unique() const              { return _unique; }
-  uint         next_unique()                    { return _unique++; }
-  void          set_unique(uint i)              { _unique = i; }
-  static int        debug_idx()                 { return debug_only(_debug_idx)+0; }
-  static void   set_debug_idx(int i)            { debug_only(_debug_idx = i); }
-  Arena*            node_arena()                { return &_node_arena; }
-  Arena*            old_arena()                 { return &_old_arena; }
-  RootNode*         root() const                { return _root; }
-  void          set_root(RootNode* r)           { _root = r; }
-  StartNode*        start() const;              // (Derived from root.)
+  uint         unique() const              { return _unique; }
+  uint         next_unique()               { return _unique++; }
+  void         set_unique(uint i)          { _unique = i; }
+  static int   debug_idx()                 { return debug_only(_debug_idx)+0; }
+  static void  set_debug_idx(int i)        { debug_only(_debug_idx = i); }
+  Arena*       node_arena()                { return &_node_arena; }
+  Arena*       old_arena()                 { return &_old_arena; }
+  RootNode*    root() const                { return _root; }
+  void         set_root(RootNode* r)       { _root = r; }
+  StartNode*   start() const;              // (Derived from root.)
   void         init_start(StartNode* s);
-  Node*             immutable_memory();
+  Node*        immutable_memory();
 
-  Node*             recent_alloc_ctl() const    { return _recent_alloc_ctl; }
-  Node*             recent_alloc_obj() const    { return _recent_alloc_obj; }
-  void          set_recent_alloc(Node* ctl, Node* obj) {
+  Node*        recent_alloc_ctl() const    { return _recent_alloc_ctl; }
+  Node*        recent_alloc_obj() const    { return _recent_alloc_obj; }
+  void         set_recent_alloc(Node* ctl, Node* obj) {
                                                   _recent_alloc_ctl = ctl;
                                                   _recent_alloc_obj = obj;
-                                                }
+                                           }
+  void         record_dead_node(uint idx)  { if (_dead_node_list.test_set(idx)) return;
+                                             _dead_node_count++;
+                                           }
+  uint         dead_node_count()           { return _dead_node_count; }
+  void         reset_dead_node_list()      { _dead_node_list.Reset();
+                                             _dead_node_count = 0;
+                                           }
+  uint          live_nodes() const         {
+    int  val = _unique - _dead_node_count;
+    assert (val >= 0, err_msg_res("number of tracked dead nodes %d more than created nodes %d", _unique, _dead_node_count));
+            return (uint) val;
+                                           }
+#ifdef ASSERT
+  uint         count_live_nodes_by_graph_walk();
+  void         print_missing_nodes();
+#endif
 
   // Constant table
   ConstantTable&   constant_table() { return _constant_table; }
@@ -650,9 +741,17 @@
 
   // Decide how to build a call.
   // The profile factor is a discount to apply to this site's interp. profile.
-  CallGenerator*    call_generator(ciMethod* call_method, int vtable_index, bool call_is_virtual, JVMState* jvms, bool allow_inline, float profile_factor, bool allow_intrinsics = true);
+  CallGenerator*    call_generator(ciMethod* call_method, int vtable_index, bool call_does_dispatch, JVMState* jvms, bool allow_inline, float profile_factor, bool allow_intrinsics = true, bool delayed_forbidden = false);
   bool should_delay_inlining(ciMethod* call_method, JVMState* jvms);
 
+  // Helper functions to identify inlining potential at call-site
+  ciMethod* optimize_virtual_call(ciMethod* caller, int bci, ciInstanceKlass* klass,
+                                  ciMethod* callee, const TypeOopPtr* receiver_type,
+                                  bool is_virtual,
+                                  bool &call_does_dispatch, int &vtable_index);
+  ciMethod* optimize_inlining(ciMethod* caller, int bci, ciInstanceKlass* klass,
+                              ciMethod* callee, const TypeOopPtr* receiver_type);
+
   // Report if there were too many traps at a current method and bci.
   // Report if a trap was recorded, and/or PerMethodTrapLimit was exceeded.
   // If there is no MDO at all, report no trap unless told to assume it.
@@ -678,14 +777,46 @@
 
 
   void              identify_useful_nodes(Unique_Node_List &useful);
-  void              remove_useless_nodes  (Unique_Node_List &useful);
+  void              update_dead_node_list(Unique_Node_List &useful);
+  void              remove_useless_nodes (Unique_Node_List &useful);
 
   WarmCallInfo*     warm_calls() const          { return _warm_calls; }
   void          set_warm_calls(WarmCallInfo* l) { _warm_calls = l; }
   WarmCallInfo* pop_warm_call();
 
   // Record this CallGenerator for inlining at the end of parsing.
-  void              add_late_inline(CallGenerator* cg) { _late_inlines.push(cg); }
+  void              add_late_inline(CallGenerator* cg)        {
+    _late_inlines.insert_before(_late_inlines_pos, cg);
+    _late_inlines_pos++;
+  }
+
+  void              prepend_late_inline(CallGenerator* cg)    {
+    _late_inlines.insert_before(0, cg);
+  }
+
+  void              add_string_late_inline(CallGenerator* cg) {
+    _string_late_inlines.push(cg);
+  }
+
+  void remove_useless_late_inlines(GrowableArray<CallGenerator*>* inlines, Unique_Node_List &useful);
+
+  void dump_inlining();
+
+  bool over_inlining_cutoff() const {
+    if (!inlining_incrementally()) {
+      return unique() > (uint)NodeCountInliningCutoff;
+    } else {
+      return live_nodes() > (uint)LiveNodeCountInliningCutoff;
+    }
+  }
+
+  void inc_number_of_mh_late_inlines() { _number_of_mh_late_inlines++; }
+  void dec_number_of_mh_late_inlines() { assert(_number_of_mh_late_inlines > 0, "_number_of_mh_late_inlines < 0 !"); _number_of_mh_late_inlines--; }
+  bool has_mh_late_inlines() const     { return _number_of_mh_late_inlines > 0; }
+
+  void inline_incrementally_one(PhaseIterGVN& igvn);
+  void inline_incrementally(PhaseIterGVN& igvn);
+  void inline_string_calls(bool parse_time);
 
   // Matching, CFG layout, allocation, code generation
   PhaseCFG*         cfg()                       { return _cfg; }
@@ -892,6 +1023,11 @@
   static juint  _intrinsic_hist_count[vmIntrinsics::ID_LIMIT];
   static jubyte _intrinsic_hist_flags[vmIntrinsics::ID_LIMIT];
 #endif
+  // Function calls made by the public function final_graph_reshaping.
+  // No need to be made public as they are not called elsewhere.
+  void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc);
+  void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &frc );
+  void eliminate_redundant_card_marks(Node* n);
 
  public:
 
--- a/src/share/vm/opto/doCall.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/doCall.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -40,25 +40,30 @@
 #include "prims/nativeLookup.hpp"
 #include "runtime/sharedRuntime.hpp"
 
-void trace_type_profile(ciMethod *method, int depth, int bci, ciMethod *prof_method, ciKlass *prof_klass, int site_count, int receiver_count) {
+void trace_type_profile(Compile* C, ciMethod *method, int depth, int bci, ciMethod *prof_method, ciKlass *prof_klass, int site_count, int receiver_count) {
   if (TraceTypeProfile || PrintInlining NOT_PRODUCT(|| PrintOptoInlining)) {
+    outputStream* out = tty;
     if (!PrintInlining) {
       if (NOT_PRODUCT(!PrintOpto &&) !PrintCompilation) {
         method->print_short_name();
         tty->cr();
       }
       CompileTask::print_inlining(prof_method, depth, bci);
+    } else {
+      out = C->print_inlining_stream();
     }
-    CompileTask::print_inline_indent(depth);
-    tty->print(" \\-> TypeProfile (%d/%d counts) = ", receiver_count, site_count);
-    prof_klass->name()->print_symbol();
-    tty->cr();
+    CompileTask::print_inline_indent(depth, out);
+    out->print(" \\-> TypeProfile (%d/%d counts) = ", receiver_count, site_count);
+    stringStream ss;
+    prof_klass->name()->print_symbol_on(&ss);
+    out->print(ss.as_string());
+    out->cr();
   }
 }
 
-CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool call_is_virtual,
+CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool call_does_dispatch,
                                        JVMState* jvms, bool allow_inline,
-                                       float prof_factor, bool allow_intrinsics) {
+                                       float prof_factor, bool allow_intrinsics, bool delayed_forbidden) {
   ciMethod*       caller   = jvms->method();
   int             bci      = jvms->bci();
   Bytecodes::Code bytecode = caller->java_code_at_bci(bci);
@@ -77,7 +82,7 @@
   // See how many times this site has been invoked.
   int site_count = profile.count();
   int receiver_count = -1;
-  if (call_is_virtual && UseTypeProfile && profile.has_receiver(0)) {
+  if (call_does_dispatch && UseTypeProfile && profile.has_receiver(0)) {
     // Receivers in the profile structure are ordered by call counts
     // so that the most called (major) receiver is profile.receiver(0).
     receiver_count = profile.receiver_count(0);
@@ -89,7 +94,7 @@
     int r2id = (rid != -1 && profile.has_receiver(1))? log->identify(profile.receiver(1)):-1;
     log->begin_elem("call method='%d' count='%d' prof_factor='%g'",
                     log->identify(callee), site_count, prof_factor);
-    if (call_is_virtual)  log->print(" virtual='1'");
+    if (call_does_dispatch)  log->print(" virtual='1'");
     if (allow_inline)     log->print(" inline='1'");
     if (receiver_count >= 0) {
       log->print(" receiver='%d' receiver_count='%d'", rid, receiver_count);
@@ -106,8 +111,18 @@
   // We do this before the strict f.p. check below because the
   // intrinsics handle strict f.p. correctly.
   if (allow_inline && allow_intrinsics) {
-    CallGenerator* cg = find_intrinsic(callee, call_is_virtual);
-    if (cg != NULL)  return cg;
+    CallGenerator* cg = find_intrinsic(callee, call_does_dispatch);
+    if (cg != NULL) {
+      if (cg->is_predicted()) {
+        // Code without intrinsic but, hopefully, inlined.
+        CallGenerator* inline_cg = this->call_generator(callee,
+              vtable_index, call_does_dispatch, jvms, allow_inline, prof_factor, false);
+        if (inline_cg != NULL) {
+          cg = CallGenerator::for_predicted_intrinsic(cg, inline_cg);
+        }
+      }
+      return cg;
+    }
   }
 
   // Do method handle calls.
@@ -115,7 +130,9 @@
   // MethodHandle.invoke* are native methods which obviously don't
   // have bytecodes and so normal inlining fails.
   if (callee->is_method_handle_intrinsic()) {
-    return CallGenerator::for_method_handle_call(jvms, caller, callee);
+    CallGenerator* cg = CallGenerator::for_method_handle_call(jvms, caller, callee, delayed_forbidden);
+    assert(cg == NULL || !delayed_forbidden || !cg->is_late_inline() || cg->is_mh_late_inline(), "unexpected CallGenerator");
+    return cg;
   }
 
   // Do not inline strict fp into non-strict code, or the reverse
@@ -132,7 +149,7 @@
     float expected_uses = past_uses;
 
     // Try inlining a bytecoded method:
-    if (!call_is_virtual) {
+    if (!call_does_dispatch) {
       InlineTree* ilt;
       if (UseOldInlining) {
         ilt = InlineTree::find_subtree_from_root(this->ilt(), jvms->caller(), jvms->method());
@@ -146,32 +163,39 @@
       WarmCallInfo scratch_ci;
       if (!UseOldInlining)
         scratch_ci.init(jvms, callee, profile, prof_factor);
-      WarmCallInfo* ci = ilt->ok_to_inline(callee, jvms, profile, &scratch_ci);
+      bool should_delay = false;
+      WarmCallInfo* ci = ilt->ok_to_inline(callee, jvms, profile, &scratch_ci, should_delay);
       assert(ci != &scratch_ci, "do not let this pointer escape");
       bool allow_inline   = (ci != NULL && !ci->is_cold());
       bool require_inline = (allow_inline && ci->is_hot());
 
       if (allow_inline) {
         CallGenerator* cg = CallGenerator::for_inline(callee, expected_uses);
-        if (require_inline && cg != NULL && should_delay_inlining(callee, jvms)) {
+
+        if (require_inline && cg != NULL) {
           // Delay the inlining of this method to give us the
           // opportunity to perform some high level optimizations
           // first.
-          return CallGenerator::for_late_inline(callee, cg);
+          if (should_delay_inlining(callee, jvms)) {
+            assert(!delayed_forbidden, "strange");
+            return CallGenerator::for_string_late_inline(callee, cg);
+          } else if ((should_delay || AlwaysIncrementalInline) && !delayed_forbidden) {
+            return CallGenerator::for_late_inline(callee, cg);
+          }
         }
-        if (cg == NULL) {
+        if (cg == NULL || should_delay) {
           // Fall through.
         } else if (require_inline || !InlineWarmCalls) {
           return cg;
         } else {
-          CallGenerator* cold_cg = call_generator(callee, vtable_index, call_is_virtual, jvms, false, prof_factor);
+          CallGenerator* cold_cg = call_generator(callee, vtable_index, call_does_dispatch, jvms, false, prof_factor);
           return CallGenerator::for_warm_call(ci, cold_cg, cg);
         }
       }
     }
 
     // Try using the type profile.
-    if (call_is_virtual && site_count > 0 && receiver_count > 0) {
+    if (call_does_dispatch && site_count > 0 && receiver_count > 0) {
       // The major receiver's count >= TypeProfileMajorReceiverPercent of site_count.
       bool have_major_receiver = (100.*profile.receiver_prob(0) >= (float)TypeProfileMajorReceiverPercent);
       ciMethod* receiver_method = NULL;
@@ -185,7 +209,7 @@
       if (receiver_method != NULL) {
         // The single majority receiver sufficiently outweighs the minority.
         CallGenerator* hit_cg = this->call_generator(receiver_method,
-              vtable_index, !call_is_virtual, jvms, allow_inline, prof_factor);
+              vtable_index, !call_does_dispatch, jvms, allow_inline, prof_factor);
         if (hit_cg != NULL) {
           // Look up second receiver.
           CallGenerator* next_hit_cg = NULL;
@@ -195,7 +219,7 @@
                                                                profile.receiver(1));
             if (next_receiver_method != NULL) {
               next_hit_cg = this->call_generator(next_receiver_method,
-                                  vtable_index, !call_is_virtual, jvms,
+                                  vtable_index, !call_does_dispatch, jvms,
                                   allow_inline, prof_factor);
               if (next_hit_cg != NULL && !next_hit_cg->is_inline() &&
                   have_major_receiver && UseOnlyInlinedBimorphic) {
@@ -223,13 +247,13 @@
           }
           if (miss_cg != NULL) {
             if (next_hit_cg != NULL) {
-              trace_type_profile(jvms->method(), jvms->depth() - 1, jvms->bci(), next_receiver_method, profile.receiver(1), site_count, profile.receiver_count(1));
+              trace_type_profile(C, jvms->method(), jvms->depth() - 1, jvms->bci(), next_receiver_method, profile.receiver(1), site_count, profile.receiver_count(1));
               // We don't need to record dependency on a receiver here and below.
               // Whenever we inline, the dependency is added by Parse::Parse().
               miss_cg = CallGenerator::for_predicted_call(profile.receiver(1), miss_cg, next_hit_cg, PROB_MAX);
             }
             if (miss_cg != NULL) {
-              trace_type_profile(jvms->method(), jvms->depth() - 1, jvms->bci(), receiver_method, profile.receiver(0), site_count, receiver_count);
+              trace_type_profile(C, jvms->method(), jvms->depth() - 1, jvms->bci(), receiver_method, profile.receiver(0), site_count, receiver_count);
               CallGenerator* cg = CallGenerator::for_predicted_call(profile.receiver(0), miss_cg, hit_cg, profile.receiver_prob(0));
               if (cg != NULL)  return cg;
             }
@@ -241,7 +265,7 @@
 
   // There was no special inlining tactic, or it bailed out.
   // Use a more generic tactic, like a simple call.
-  if (call_is_virtual) {
+  if (call_does_dispatch) {
     return CallGenerator::for_virtual_call(callee, vtable_index);
   } else {
     // Class Hierarchy Analysis or Type Profile reveals a unique target,
@@ -324,7 +348,7 @@
     return true;
   }
 
-  assert(dest_method->will_link(method()->holder(), klass, bc()), "dest_method: typeflow responsibility");
+  assert(dest_method->is_loaded(), "dest_method: typeflow responsibility");
   return false;
 }
 
@@ -340,7 +364,7 @@
   // Set frequently used booleans
   const bool is_virtual = bc() == Bytecodes::_invokevirtual;
   const bool is_virtual_or_interface = is_virtual || bc() == Bytecodes::_invokeinterface;
-  const bool has_receiver = is_virtual_or_interface || bc() == Bytecodes::_invokespecial;
+  const bool has_receiver = Bytecodes::has_receiver(bc());
 
   // Find target being called
   bool             will_link;
@@ -370,7 +394,10 @@
   // Note:  In the absence of miranda methods, an abstract class K can perform
   // an invokevirtual directly on an interface method I.m if K implements I.
 
+  // orig_callee is the resolved callee which's signature includes the
+  // appendix argument.
   const int nargs = orig_callee->arg_size();
+  const bool is_signature_polymorphic = MethodHandles::is_signature_polymorphic(orig_callee->intrinsic_id());
 
   // Push appendix argument (MethodType, CallSite, etc.), if one.
   if (iter().has_appendix()) {
@@ -387,25 +414,18 @@
   // Then we may introduce a run-time check and inline on the path where it succeeds.
   // The other path may uncommon_trap, check for another receiver, or do a v-call.
 
-  // Choose call strategy.
-  bool call_is_virtual = is_virtual_or_interface;
-  int vtable_index = Method::invalid_vtable_index;
-  ciMethod* callee = orig_callee;
+  // Try to get the most accurate receiver type
+  ciMethod* callee             = orig_callee;
+  int       vtable_index       = Method::invalid_vtable_index;
+  bool      call_does_dispatch = false;
 
-  // Try to get the most accurate receiver type
   if (is_virtual_or_interface) {
     Node*             receiver_node = stack(sp() - nargs);
     const TypeOopPtr* receiver_type = _gvn.type(receiver_node)->isa_oopptr();
-    ciMethod* optimized_virtual_method = optimize_inlining(method(), bci(), klass, orig_callee, receiver_type);
-
-    // Have the call been sufficiently improved such that it is no longer a virtual?
-    if (optimized_virtual_method != NULL) {
-      callee          = optimized_virtual_method;
-      call_is_virtual = false;
-    } else if (!UseInlineCaches && is_virtual && callee->is_loaded()) {
-      // We can make a vtable call at this site
-      vtable_index = callee->resolve_vtable_index(method()->holder(), klass);
-    }
+    // call_does_dispatch and vtable_index are out-parameters.  They might be changed.
+    callee = C->optimize_virtual_call(method(), bci(), klass, orig_callee, receiver_type,
+                                      is_virtual,
+                                      call_does_dispatch, vtable_index);  // out-parameters
   }
 
   // Note:  It's OK to try to inline a virtual call.
@@ -421,7 +441,7 @@
   // Decide call tactic.
   // This call checks with CHA, the interpreter profile, intrinsics table, etc.
   // It decides whether inlining is desirable or not.
-  CallGenerator* cg = C->call_generator(callee, vtable_index, call_is_virtual, jvms, try_inline, prof_factor());
+  CallGenerator* cg = C->call_generator(callee, vtable_index, call_does_dispatch, jvms, try_inline, prof_factor());
 
   // NOTE:  Don't use orig_callee and callee after this point!  Use cg->method() instead.
   orig_callee = callee = NULL;
@@ -461,7 +481,7 @@
     // the call site, perhaps because it did not match a pattern the
     // intrinsic was expecting to optimize. Should always be possible to
     // get a normal java call that may inline in that case
-    cg = C->call_generator(cg->method(), vtable_index, call_is_virtual, jvms, try_inline, prof_factor(), /* allow_intrinsics= */ false);
+    cg = C->call_generator(cg->method(), vtable_index, call_does_dispatch, jvms, try_inline, prof_factor(), /* allow_intrinsics= */ false);
     if ((new_jvms = cg->generate(jvms)) == NULL) {
       guarantee(failing(), "call failed to generate:  calls should work");
       return;
@@ -496,55 +516,50 @@
     round_double_result(cg->method());
 
     ciType* rtype = cg->method()->return_type();
-    if (Bytecodes::has_optional_appendix(iter().cur_bc_raw())) {
+    ciType* ctype = declared_signature->return_type();
+
+    if (Bytecodes::has_optional_appendix(iter().cur_bc_raw()) || is_signature_polymorphic) {
       // Be careful here with return types.
-      ciType* ctype = declared_signature->return_type();
       if (ctype != rtype) {
         BasicType rt = rtype->basic_type();
         BasicType ct = ctype->basic_type();
-        Node* retnode = peek();
         if (ct == T_VOID) {
           // It's OK for a method  to return a value that is discarded.
           // The discarding does not require any special action from the caller.
           // The Java code knows this, at VerifyType.isNullConversion.
           pop_node(rt);  // whatever it was, pop it
-          retnode = top();
         } else if (rt == T_INT || is_subword_type(rt)) {
-          // FIXME: This logic should be factored out.
-          if (ct == T_BOOLEAN) {
-            retnode = _gvn.transform( new (C) AndINode(retnode, intcon(0x1)) );
-          } else if (ct == T_CHAR) {
-            retnode = _gvn.transform( new (C) AndINode(retnode, intcon(0xFFFF)) );
-          } else if (ct == T_BYTE) {
-            retnode = _gvn.transform( new (C) LShiftINode(retnode, intcon(24)) );
-            retnode = _gvn.transform( new (C) RShiftINode(retnode, intcon(24)) );
-          } else if (ct == T_SHORT) {
-            retnode = _gvn.transform( new (C) LShiftINode(retnode, intcon(16)) );
-            retnode = _gvn.transform( new (C) RShiftINode(retnode, intcon(16)) );
-          } else {
-            assert(ct == T_INT, err_msg_res("rt=%s, ct=%s", type2name(rt), type2name(ct)));
-          }
+          // Nothing.  These cases are handled in lambda form bytecode.
+          assert(ct == T_INT || is_subword_type(ct), err_msg_res("must match: rt=%s, ct=%s", type2name(rt), type2name(ct)));
         } else if (rt == T_OBJECT || rt == T_ARRAY) {
           assert(ct == T_OBJECT || ct == T_ARRAY, err_msg_res("rt=%s, ct=%s", type2name(rt), type2name(ct)));
           if (ctype->is_loaded()) {
             const TypeOopPtr* arg_type = TypeOopPtr::make_from_klass(rtype->as_klass());
             const Type*       sig_type = TypeOopPtr::make_from_klass(ctype->as_klass());
             if (arg_type != NULL && !arg_type->higher_equal(sig_type)) {
+              Node* retnode = pop();
               Node* cast_obj = _gvn.transform(new (C) CheckCastPPNode(control(), retnode, sig_type));
-              pop();
               push(cast_obj);
             }
           }
         } else {
-          assert(ct == rt, err_msg("unexpected mismatch rt=%d, ct=%d", rt, ct));
+          assert(rt == ct, err_msg_res("unexpected mismatch: rt=%s, ct=%s", type2name(rt), type2name(ct)));
           // push a zero; it's better than getting an oop/int mismatch
-          retnode = pop_node(rt);
-          retnode = zerocon(ct);
+          pop_node(rt);
+          Node* retnode = zerocon(ct);
           push_node(ct, retnode);
         }
         // Now that the value is well-behaved, continue with the call-site type.
         rtype = ctype;
       }
+    } else {
+      // Symbolic resolution enforces the types to be the same.
+      // NOTE: We must relax the assert for unloaded types because two
+      // different ciType instances of the same unloaded class type
+      // can appear to be "loaded" by different loaders (depending on
+      // the accessing class).
+      assert(!rtype->is_loaded() || !ctype->is_loaded() || rtype == ctype,
+             err_msg_res("mismatched return types: rtype=%s, ctype=%s", rtype->name(), ctype->name()));
     }
 
     // If the return type of the method is not loaded, assert that the
@@ -562,7 +577,7 @@
       }
       // If there is going to be a trap, put it at the next bytecode:
       set_bci(iter().next_bci());
-      do_null_assert(peek(), T_OBJECT);
+      null_assert(peek());
       set_bci(iter().cur_bci()); // put it back
     }
   }
@@ -862,17 +877,39 @@
 #endif //PRODUCT
 
 
+ciMethod* Compile::optimize_virtual_call(ciMethod* caller, int bci, ciInstanceKlass* klass,
+                                         ciMethod* callee, const TypeOopPtr* receiver_type,
+                                         bool is_virtual,
+                                         bool& call_does_dispatch, int& vtable_index) {
+  // Set default values for out-parameters.
+  call_does_dispatch = true;
+  vtable_index       = Method::invalid_vtable_index;
+
+  // Choose call strategy.
+  ciMethod* optimized_virtual_method = optimize_inlining(caller, bci, klass, callee, receiver_type);
+
+  // Have the call been sufficiently improved such that it is no longer a virtual?
+  if (optimized_virtual_method != NULL) {
+    callee             = optimized_virtual_method;
+    call_does_dispatch = false;
+  } else if (!UseInlineCaches && is_virtual && callee->is_loaded()) {
+    // We can make a vtable call at this site
+    vtable_index = callee->resolve_vtable_index(caller->holder(), klass);
+  }
+  return callee;
+}
+
 // Identify possible target method and inlining style
-ciMethod* Parse::optimize_inlining(ciMethod* caller, int bci, ciInstanceKlass* klass,
-                                   ciMethod *dest_method, const TypeOopPtr* receiver_type) {
+ciMethod* Compile::optimize_inlining(ciMethod* caller, int bci, ciInstanceKlass* klass,
+                                     ciMethod* callee, const TypeOopPtr* receiver_type) {
   // only use for virtual or interface calls
 
   // If it is obviously final, do not bother to call find_monomorphic_target,
   // because the class hierarchy checks are not needed, and may fail due to
   // incompletely loaded classes.  Since we do our own class loading checks
   // in this module, we may confidently bind to any method.
-  if (dest_method->can_be_statically_bound()) {
-    return dest_method;
+  if (callee->can_be_statically_bound()) {
+    return callee;
   }
 
   // Attempt to improve the receiver
@@ -881,8 +918,8 @@
   if (receiver_type != NULL) {
     // Array methods are all inherited from Object, and are monomorphic.
     if (receiver_type->isa_aryptr() &&
-        dest_method->holder() == env()->Object_klass()) {
-      return dest_method;
+        callee->holder() == env()->Object_klass()) {
+      return callee;
     }
 
     // All other interesting cases are instance klasses.
@@ -902,7 +939,7 @@
   }
 
   ciInstanceKlass*   calling_klass = caller->holder();
-  ciMethod* cha_monomorphic_target = dest_method->find_monomorphic_target(calling_klass, klass, actual_receiver);
+  ciMethod* cha_monomorphic_target = callee->find_monomorphic_target(calling_klass, klass, actual_receiver);
   if (cha_monomorphic_target != NULL) {
     assert(!cha_monomorphic_target->is_abstract(), "");
     // Look at the method-receiver type.  Does it add "too much information"?
@@ -920,10 +957,10 @@
         cha_monomorphic_target->print();
         tty->cr();
       }
-      if (C->log() != NULL) {
-        C->log()->elem("missed_CHA_opportunity klass='%d' method='%d'",
-                       C->log()->identify(klass),
-                       C->log()->identify(cha_monomorphic_target));
+      if (log() != NULL) {
+        log()->elem("missed_CHA_opportunity klass='%d' method='%d'",
+                       log()->identify(klass),
+                       log()->identify(cha_monomorphic_target));
       }
       cha_monomorphic_target = NULL;
     }
@@ -935,7 +972,7 @@
     // by dynamic class loading.  Be sure to test the "static" receiver
     // dest_method here, as opposed to the actual receiver, which may
     // falsely lead us to believe that the receiver is final or private.
-    C->dependencies()->assert_unique_concrete_method(actual_receiver, cha_monomorphic_target);
+    dependencies()->assert_unique_concrete_method(actual_receiver, cha_monomorphic_target);
     return cha_monomorphic_target;
   }
 
@@ -944,7 +981,7 @@
   if (actual_receiver_is_exact) {
     // In case of evolution, there is a dependence on every inlined method, since each
     // such method can be changed when its class is redefined.
-    ciMethod* exact_method = dest_method->resolve_invoke(calling_klass, actual_receiver);
+    ciMethod* exact_method = callee->resolve_invoke(calling_klass, actual_receiver);
     if (exact_method != NULL) {
 #ifndef PRODUCT
       if (PrintOpto) {
--- a/src/share/vm/opto/escape.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/escape.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -523,7 +523,8 @@
     case Op_AryEq:
     case Op_StrComp:
     case Op_StrEquals:
-    case Op_StrIndexOf: {
+    case Op_StrIndexOf:
+    case Op_EncodeISOArray: {
       add_local_var(n, PointsToNode::ArgEscape);
       delayed_worklist->push(n); // Process it later.
       break;
@@ -701,7 +702,8 @@
     case Op_AryEq:
     case Op_StrComp:
     case Op_StrEquals:
-    case Op_StrIndexOf: {
+    case Op_StrIndexOf:
+    case Op_EncodeISOArray: {
       // char[] arrays passed to string intrinsic do not escape but
       // they are not scalar replaceable. Adjust escape state for them.
       // Start from in(2) edge since in(1) is memory edge.
@@ -893,12 +895,16 @@
                                        arg_has_oops && (i > TypeFunc::Parms);
 #ifdef ASSERT
           if (!(is_arraycopy ||
-                call->as_CallLeaf()->_name != NULL &&
-                (strcmp(call->as_CallLeaf()->_name, "g1_wb_pre")  == 0 ||
-                 strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 ))
-          ) {
+                (call->as_CallLeaf()->_name != NULL &&
+                 (strcmp(call->as_CallLeaf()->_name, "g1_wb_pre")  == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "aescrypt_encryptBlock") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "aescrypt_decryptBlock") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_encryptAESCrypt") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_decryptAESCrypt") == 0)
+                  ))) {
             call->dump();
-            assert(false, "EA: unexpected CallLeaf");
+            fatal(err_msg_res("EA unexpected CallLeaf %s", call->as_CallLeaf()->_name));
           }
 #endif
           // Always process arraycopy's destination object since
@@ -1080,7 +1086,7 @@
       C->log()->text("%s", (iterations >= CG_BUILD_ITER_LIMIT) ? "iterations" : "time");
       C->log()->end_elem(" limit'");
     }
-    assert(false, err_msg_res("infinite EA connection graph build (%f sec, %d iterations) with %d nodes and worklist size %d",
+    assert(ExitEscapeAnalysisOnTimeout, err_msg_res("infinite EA connection graph build (%f sec, %d iterations) with %d nodes and worklist size %d",
            time.seconds(), iterations, nodes_size(), ptnodes_worklist.length()));
     // Possible infinite build_connection_graph loop,
     // bailout (no changes to ideal graph were made).
@@ -1382,12 +1388,12 @@
     // Non-escaped allocation returned from Java or runtime call have
     // unknown values in fields.
     for (EdgeIterator i(pta); i.has_next(); i.next()) {
-      PointsToNode* ptn = i.get();
-      if (ptn->is_Field() && ptn->as_Field()->is_oop()) {
-        if (add_edge(ptn, phantom_obj)) {
+      PointsToNode* field = i.get();
+      if (field->is_Field() && field->as_Field()->is_oop()) {
+        if (add_edge(field, phantom_obj)) {
           // New edge was added
           new_edges++;
-          add_field_uses_to_worklist(ptn->as_Field());
+          add_field_uses_to_worklist(field->as_Field());
         }
       }
     }
@@ -1409,30 +1415,30 @@
   // captured by Initialize node.
   //
   for (EdgeIterator i(pta); i.has_next(); i.next()) {
-    PointsToNode* ptn = i.get(); // Field (AddP)
-    if (!ptn->is_Field() || !ptn->as_Field()->is_oop())
+    PointsToNode* field = i.get(); // Field (AddP)
+    if (!field->is_Field() || !field->as_Field()->is_oop())
       continue; // Not oop field
-    int offset = ptn->as_Field()->offset();
+    int offset = field->as_Field()->offset();
     if (offset == Type::OffsetBot) {
       if (!visited_bottom_offset) {
         // OffsetBot is used to reference array's element,
         // always add reference to NULL to all Field nodes since we don't
         // known which element is referenced.
-        if (add_edge(ptn, null_obj)) {
+        if (add_edge(field, null_obj)) {
           // New edge was added
           new_edges++;
-          add_field_uses_to_worklist(ptn->as_Field());
+          add_field_uses_to_worklist(field->as_Field());
           visited_bottom_offset = true;
         }
       }
     } else {
       // Check only oop fields.
-      const Type* adr_type = ptn->ideal_node()->as_AddP()->bottom_type();
+      const Type* adr_type = field->ideal_node()->as_AddP()->bottom_type();
       if (adr_type->isa_rawptr()) {
 #ifdef ASSERT
         // Raw pointers are used for initializing stores so skip it
         // since it should be recorded already
-        Node* base = get_addp_base(ptn->ideal_node());
+        Node* base = get_addp_base(field->ideal_node());
         assert(adr_type->isa_rawptr() && base->is_Proj() &&
                (base->in(0) == alloc),"unexpected pointer type");
 #endif
@@ -1442,10 +1448,54 @@
         offsets_worklist.append(offset);
         Node* value = NULL;
         if (ini != NULL) {
-          BasicType ft = UseCompressedOops ? T_NARROWOOP : T_OBJECT;
-          Node* store = ini->find_captured_store(offset, type2aelembytes(ft), phase);
-          if (store != NULL && store->is_Store()) {
+          // StoreP::memory_type() == T_ADDRESS
+          BasicType ft = UseCompressedOops ? T_NARROWOOP : T_ADDRESS;
+          Node* store = ini->find_captured_store(offset, type2aelembytes(ft, true), phase);
+          // Make sure initializing store has the same type as this AddP.
+          // This AddP may reference non existing field because it is on a
+          // dead branch of bimorphic call which is not eliminated yet.
+          if (store != NULL && store->is_Store() &&
+              store->as_Store()->memory_type() == ft) {
             value = store->in(MemNode::ValueIn);
+#ifdef ASSERT
+            if (VerifyConnectionGraph) {
+              // Verify that AddP already points to all objects the value points to.
+              PointsToNode* val = ptnode_adr(value->_idx);
+              assert((val != NULL), "should be processed already");
+              PointsToNode* missed_obj = NULL;
+              if (val->is_JavaObject()) {
+                if (!field->points_to(val->as_JavaObject())) {
+                  missed_obj = val;
+                }
+              } else {
+                if (!val->is_LocalVar() || (val->edge_count() == 0)) {
+                  tty->print_cr("----------init store has invalid value -----");
+                  store->dump();
+                  val->dump();
+                  assert(val->is_LocalVar() && (val->edge_count() > 0), "should be processed already");
+                }
+                for (EdgeIterator j(val); j.has_next(); j.next()) {
+                  PointsToNode* obj = j.get();
+                  if (obj->is_JavaObject()) {
+                    if (!field->points_to(obj->as_JavaObject())) {
+                      missed_obj = obj;
+                      break;
+                    }
+                  }
+                }
+              }
+              if (missed_obj != NULL) {
+                tty->print_cr("----------field---------------------------------");
+                field->dump();
+                tty->print_cr("----------missed referernce to object-----------");
+                missed_obj->dump();
+                tty->print_cr("----------object referernced by init store -----");
+                store->dump();
+                val->dump();
+                assert(!field->points_to(missed_obj->as_JavaObject()), "missed JavaObject reference");
+              }
+            }
+#endif
           } else {
             // There could be initializing stores which follow allocation.
             // For example, a volatile field store is not collected
@@ -1458,10 +1508,10 @@
         }
         if (value == NULL) {
           // A field's initializing value was not recorded. Add NULL.
-          if (add_edge(ptn, null_obj)) {
+          if (add_edge(field, null_obj)) {
             // New edge was added
             new_edges++;
-            add_field_uses_to_worklist(ptn->as_Field());
+            add_field_uses_to_worklist(field->as_Field());
           }
         }
       }
@@ -1603,7 +1653,26 @@
       }
       // Verify that all fields have initializing values.
       if (field->edge_count() == 0) {
+        tty->print_cr("----------field does not have references----------");
         field->dump();
+        for (BaseIterator i(field); i.has_next(); i.next()) {
+          PointsToNode* base = i.get();
+          tty->print_cr("----------field has next base---------------------");
+          base->dump();
+          if (base->is_JavaObject() && (base != phantom_obj) && (base != null_obj)) {
+            tty->print_cr("----------base has fields-------------------------");
+            for (EdgeIterator j(base); j.has_next(); j.next()) {
+              j.get()->dump();
+            }
+            tty->print_cr("----------base has references---------------------");
+            for (UseIterator j(base); j.has_next(); j.next()) {
+              j.get()->dump();
+            }
+          }
+        }
+        for (UseIterator i(field); i.has_next(); i.next()) {
+          i.get()->dump();
+        }
         assert(field->edge_count() > 0, "sanity");
       }
     }
@@ -1963,7 +2032,7 @@
   if (is_JavaObject()) {
     return (this == ptn);
   }
-  assert(is_LocalVar(), "sanity");
+  assert(is_LocalVar() || is_Field(), "sanity");
   for (EdgeIterator i(this); i.has_next(); i.next()) {
     if (i.get() == ptn)
       return true;
@@ -2253,7 +2322,7 @@
       }
     }
   }
-  if ((int)C->unique() + 2*NodeLimitFudgeFactor > MaxNodeLimit) {
+  if ((int) (C->live_nodes() + 2*NodeLimitFudgeFactor) > MaxNodeLimit) {
     if (C->do_escape_analysis() == true && !C->failing()) {
       // Retry compilation without escape analysis.
       // If this is the first failure, the sentinel string will "stick"
@@ -2514,15 +2583,22 @@
       }
       // Otherwise skip it (the call updated 'result' value).
     } else if (result->Opcode() == Op_SCMemProj) {
-      assert(result->in(0)->is_LoadStore(), "sanity");
-      const Type *at = igvn->type(result->in(0)->in(MemNode::Address));
+      Node* mem = result->in(0);
+      Node* adr = NULL;
+      if (mem->is_LoadStore()) {
+        adr = mem->in(MemNode::Address);
+      } else {
+        assert(mem->Opcode() == Op_EncodeISOArray, "sanity");
+        adr = mem->in(3); // Memory edge corresponds to destination array
+      }
+      const Type *at = igvn->type(adr);
       if (at != Type::TOP) {
         assert (at->isa_ptr() != NULL, "pointer type required.");
         int idx = C->get_alias_index(at->is_ptr());
         assert(idx != alias_idx, "Object is not scalar replaceable if a LoadStore node access its field");
         break;
       }
-      result = result->in(0)->in(MemNode::Memory);
+      result = mem->in(MemNode::Memory);
     }
   }
   if (result->is_Phi()) {
@@ -2860,6 +2936,11 @@
         if (m->is_MergeMem()) {
           assert(_mergemem_worklist.contains(m->as_MergeMem()), "EA: missing MergeMem node in the worklist");
         }
+      } else if (use->Opcode() == Op_EncodeISOArray) {
+        if (use->in(MemNode::Memory) == n || use->in(3) == n) {
+          // EncodeISOArray overwrites destination array
+          memnode_worklist.append_if_missing(use);
+        }
       } else {
         uint op = use->Opcode();
         if (!(op == Op_CmpP || op == Op_Conv2B ||
@@ -2895,6 +2976,16 @@
       n = n->as_MemBar()->proj_out(TypeFunc::Memory);
       if (n == NULL)
         continue;
+    } else if (n->Opcode() == Op_EncodeISOArray) {
+      // get the memory projection
+      for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+        Node *use = n->fast_out(i);
+        if (use->Opcode() == Op_SCMemProj) {
+          n = use;
+          break;
+        }
+      }
+      assert(n->Opcode() == Op_SCMemProj, "memory projection required");
     } else {
       assert(n->is_Mem(), "memory node required.");
       Node *addr = n->in(MemNode::Address);
@@ -2932,7 +3023,7 @@
       Node *use = n->fast_out(i);
       if (use->is_Phi() || use->is_ClearArray()) {
         memnode_worklist.append_if_missing(use);
-      } else if(use->is_Mem() && use->in(MemNode::Memory) == n) {
+      } else if (use->is_Mem() && use->in(MemNode::Memory) == n) {
         if (use->Opcode() == Op_StoreCM) // Ignore cardmark stores
           continue;
         memnode_worklist.append_if_missing(use);
@@ -2943,6 +3034,11 @@
         assert(use->in(MemNode::Memory) != n, "EA: missing memory path");
       } else if (use->is_MergeMem()) {
         assert(_mergemem_worklist.contains(use->as_MergeMem()), "EA: missing MergeMem node in the worklist");
+      } else if (use->Opcode() == Op_EncodeISOArray) {
+        if (use->in(MemNode::Memory) == n || use->in(3) == n) {
+          // EncodeISOArray overwrites destination array
+          memnode_worklist.append_if_missing(use);
+        }
       } else {
         uint op = use->Opcode();
         if (!(op == Op_StoreCM ||
@@ -3123,10 +3219,14 @@
     EscapeState fields_es = fields_escape_state();
     tty->print("%s(%s) ", esc_names[(int)es], esc_names[(int)fields_es]);
     if (nt == PointsToNode::JavaObject && !this->scalar_replaceable())
-      tty->print("NSR");
+      tty->print("NSR ");
   }
   if (is_Field()) {
     FieldNode* f = (FieldNode*)this;
+    if (f->is_oop())
+      tty->print("oop ");
+    if (f->offset() > 0)
+      tty->print("+%d ", f->offset());
     tty->print("(");
     for (BaseIterator i(f); i.has_next(); i.next()) {
       PointsToNode* b = i.get();
--- a/src/share/vm/opto/gcm.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/gcm.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1359,7 +1359,7 @@
   // If we inserted any instructions between a Call and his CatchNode,
   // clone the instructions on all paths below the Catch.
   for( i=0; i < _num_blocks; i++ )
-    _blocks[i]->call_catch_cleanup(_bbs);
+    _blocks[i]->call_catch_cleanup(_bbs, C);
 
 #ifndef PRODUCT
   if (trace_opto_pipelining()) {
--- a/src/share/vm/opto/graphKit.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/graphKit.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -93,6 +93,16 @@
   return jvms;
 }
 
+//--------------------------------sync_jvms_for_reexecute---------------------
+// Make sure our current jvms agrees with our parse state.  This version
+// uses the reexecute_sp for reexecuting bytecodes.
+JVMState* GraphKit::sync_jvms_for_reexecute() {
+  JVMState* jvms = this->jvms();
+  jvms->set_bci(bci());          // Record the new bci in the JVMState
+  jvms->set_sp(reexecute_sp());  // Record the new sp in the JVMState
+  return jvms;
+}
+
 #ifdef ASSERT
 bool GraphKit::jvms_in_sync() const {
   Parse* parse = is_Parse();
@@ -143,7 +153,7 @@
 void GraphKit::stop_and_kill_map() {
   SafePointNode* dead_map = stop();
   if (dead_map != NULL) {
-    dead_map->disconnect_inputs(NULL); // Mark the map as killed.
+    dead_map->disconnect_inputs(NULL, C); // Mark the map as killed.
     assert(dead_map->is_killed(), "must be so marked");
   }
 }
@@ -826,7 +836,16 @@
   // Walk the inline list to fill in the correct set of JVMState's
   // Also fill in the associated edges for each JVMState.
 
-  JVMState* youngest_jvms = sync_jvms();
+  // If the bytecode needs to be reexecuted we need to put
+  // the arguments back on the stack.
+  const bool should_reexecute = jvms()->should_reexecute();
+  JVMState* youngest_jvms = should_reexecute ? sync_jvms_for_reexecute() : sync_jvms();
+
+  // NOTE: set_bci (called from sync_jvms) might reset the reexecute bit to
+  // undefined if the bci is different.  This is normal for Parse but it
+  // should not happen for LibraryCallKit because only one bci is processed.
+  assert(!is_LibraryCallKit() || (jvms()->should_reexecute() == should_reexecute),
+         "in LibraryCallKit the reexecute bit should not change");
 
   // If we are guaranteed to throw, we can prune everything but the
   // input to the current bytecode.
@@ -860,7 +879,7 @@
   }
 
   // Presize the call:
-  debug_only(uint non_debug_edges = call->req());
+  DEBUG_ONLY(uint non_debug_edges = call->req());
   call->add_req_batch(top(), youngest_jvms->debug_depth());
   assert(call->req() == non_debug_edges + youngest_jvms->debug_depth(), "");
 
@@ -965,7 +984,7 @@
   assert(call->jvms()->debug_depth() == call->req() - non_debug_edges, "");
 }
 
-bool GraphKit::compute_stack_effects(int& inputs, int& depth, bool for_parse) {
+bool GraphKit::compute_stack_effects(int& inputs, int& depth) {
   Bytecodes::Code code = java_bc();
   if (code == Bytecodes::_wide) {
     code = method()->java_code_at_bci(bci() + 1);
@@ -1005,14 +1024,11 @@
   case Bytecodes::_getfield:
   case Bytecodes::_putfield:
     {
+      bool ignored_will_link;
+      ciField* field = method()->get_field_at_bci(bci(), ignored_will_link);
+      int      size  = field->type()->size();
       bool is_get = (depth >= 0), is_static = (depth & 1);
-      ciBytecodeStream iter(method());
-      iter.reset_to_bci(bci());
-      iter.next();
-      bool ignored_will_link;
-      ciField* field = iter.get_field(ignored_will_link);
-      int      size  = field->type()->size();
-      inputs  = (is_static ? 0 : 1);
+      inputs = (is_static ? 0 : 1);
       if (is_get) {
         depth = size - inputs;
       } else {
@@ -1028,26 +1044,11 @@
   case Bytecodes::_invokedynamic:
   case Bytecodes::_invokeinterface:
     {
-      ciBytecodeStream iter(method());
-      iter.reset_to_bci(bci());
-      iter.next();
       bool ignored_will_link;
       ciSignature* declared_signature = NULL;
-      ciMethod* callee = iter.get_method(ignored_will_link, &declared_signature);
+      ciMethod* ignored_callee = method()->get_method_at_bci(bci(), ignored_will_link, &declared_signature);
       assert(declared_signature != NULL, "cannot be null");
-      // (Do not use ciMethod::arg_size(), because
-      // it might be an unloaded method, which doesn't
-      // know whether it is static or not.)
-      if (for_parse) {
-        // Case 1: When called from parse we are *before* the invoke (in the
-        //         caller) and need to to adjust the inputs by an appendix
-        //         argument that will be pushed implicitly.
-        inputs = callee->invoke_arg_size(code) - (iter.has_appendix() ? 1 : 0);
-      } else {
-        // Case 2: Here we are *after* the invoke (in the callee) and need to
-        //         remove any appendix arguments that were popped.
-        inputs = callee->invoke_arg_size(code) - (callee->has_member_arg() ? 1 : 0);
-      }
+      inputs   = declared_signature->arg_size_for_bc(code);
       int size = declared_signature->return_type()->size();
       depth = size - inputs;
     }
@@ -1178,7 +1179,7 @@
   Node *chk = NULL;
   switch(type) {
     case T_LONG   : chk = new (C) CmpLNode(value, _gvn.zerocon(T_LONG)); break;
-    case T_INT    : chk = new (C) CmpINode( value, _gvn.intcon(0)); break;
+    case T_INT    : chk = new (C) CmpINode(value, _gvn.intcon(0)); break;
     case T_ARRAY  : // fall through
       type = T_OBJECT;  // simplify further tests
     case T_OBJECT : {
@@ -1229,7 +1230,8 @@
       break;
     }
 
-    default      : ShouldNotReachHere();
+    default:
+      fatal(err_msg_res("unexpected type: %s", type2name(type)));
   }
   assert(chk != NULL, "sanity check");
   chk = _gvn.transform(chk);
@@ -1769,11 +1771,21 @@
   CallProjections callprojs;
   call->extract_projections(&callprojs, true);
 
+  Node* init_mem = call->in(TypeFunc::Memory);
+  Node* final_mem = final_state->in(TypeFunc::Memory);
+  Node* final_ctl = final_state->in(TypeFunc::Control);
+  Node* final_io = final_state->in(TypeFunc::I_O);
+
   // Replace all the old call edges with the edges from the inlining result
-  C->gvn_replace_by(callprojs.fallthrough_catchproj, final_state->in(TypeFunc::Control));
-  C->gvn_replace_by(callprojs.fallthrough_memproj,   final_state->in(TypeFunc::Memory));
-  C->gvn_replace_by(callprojs.fallthrough_ioproj,    final_state->in(TypeFunc::I_O));
-  Node* final_mem = final_state->in(TypeFunc::Memory);
+  if (callprojs.fallthrough_catchproj != NULL) {
+    C->gvn_replace_by(callprojs.fallthrough_catchproj, final_ctl);
+  }
+  if (callprojs.fallthrough_memproj != NULL) {
+    C->gvn_replace_by(callprojs.fallthrough_memproj,   final_mem);
+  }
+  if (callprojs.fallthrough_ioproj != NULL) {
+    C->gvn_replace_by(callprojs.fallthrough_ioproj,    final_io);
+  }
 
   // Replace the result with the new result if it exists and is used
   if (callprojs.resproj != NULL && result != NULL) {
@@ -1782,10 +1794,15 @@
 
   if (ejvms == NULL) {
     // No exception edges to simply kill off those paths
-    C->gvn_replace_by(callprojs.catchall_catchproj, C->top());
-    C->gvn_replace_by(callprojs.catchall_memproj,   C->top());
-    C->gvn_replace_by(callprojs.catchall_ioproj,    C->top());
-
+    if (callprojs.catchall_catchproj != NULL) {
+      C->gvn_replace_by(callprojs.catchall_catchproj, C->top());
+    }
+    if (callprojs.catchall_memproj != NULL) {
+      C->gvn_replace_by(callprojs.catchall_memproj,   C->top());
+    }
+    if (callprojs.catchall_ioproj != NULL) {
+      C->gvn_replace_by(callprojs.catchall_ioproj,    C->top());
+    }
     // Replace the old exception object with top
     if (callprojs.exobj != NULL) {
       C->gvn_replace_by(callprojs.exobj, C->top());
@@ -1797,10 +1814,15 @@
     SafePointNode* ex_map = ekit.combine_and_pop_all_exception_states();
 
     Node* ex_oop = ekit.use_exception_state(ex_map);
-
-    C->gvn_replace_by(callprojs.catchall_catchproj, ekit.control());
-    C->gvn_replace_by(callprojs.catchall_memproj,   ekit.reset_memory());
-    C->gvn_replace_by(callprojs.catchall_ioproj,    ekit.i_o());
+    if (callprojs.catchall_catchproj != NULL) {
+      C->gvn_replace_by(callprojs.catchall_catchproj, ekit.control());
+    }
+    if (callprojs.catchall_memproj != NULL) {
+      C->gvn_replace_by(callprojs.catchall_memproj,   ekit.reset_memory());
+    }
+    if (callprojs.catchall_ioproj != NULL) {
+      C->gvn_replace_by(callprojs.catchall_ioproj,    ekit.i_o());
+    }
 
     // Replace the old exception object with the newly created one
     if (callprojs.exobj != NULL) {
@@ -1809,7 +1831,7 @@
   }
 
   // Disconnect the call from the graph
-  call->disconnect_inputs(NULL);
+  call->disconnect_inputs(NULL, C);
   C->gvn_replace_by(call, C->top());
 
   // Clean up any MergeMems that feed other MergeMems since the
@@ -1861,15 +1883,17 @@
   // occurs here, the runtime will make sure an MDO exists.  There is
   // no need to call method()->ensure_method_data() at this point.
 
+  // Set the stack pointer to the right value for reexecution:
+  set_sp(reexecute_sp());
+
 #ifdef ASSERT
   if (!must_throw) {
     // Make sure the stack has at least enough depth to execute
     // the current bytecode.
-    int inputs, ignore;
-    if (compute_stack_effects(inputs, ignore)) {
-      assert(sp() >= inputs, "must have enough JVMS stack to execute");
-      // It is a frequent error in library_call.cpp to issue an
-      // uncommon trap with the _sp value already popped.
+    int inputs, ignored_depth;
+    if (compute_stack_effects(inputs, ignored_depth)) {
+      assert(sp() >= inputs, err_msg_res("must have enough JVMS stack to execute %s: sp=%d, inputs=%d",
+             Bytecodes::name(java_bc()), sp(), inputs));
     }
   }
 #endif
@@ -1900,7 +1924,8 @@
   case Deoptimization::Action_make_not_compilable:
     break;
   default:
-    assert(false, "bad action");
+    fatal(err_msg_res("unknown action %d: %s", action, Deoptimization::trap_action_name(action)));
+    break;
 #endif
   }
 
@@ -2667,7 +2692,7 @@
       case SSC_always_false:
         // It needs a null check because a null will *pass* the cast check.
         // A non-null value will always produce an exception.
-        return do_null_assert(obj, T_OBJECT);
+        return null_assert(obj);
       }
     }
   }
@@ -2786,7 +2811,7 @@
   mb->init_req(TypeFunc::Control, control());
   mb->init_req(TypeFunc::Memory,  reset_memory());
   Node* membar = _gvn.transform(mb);
-  set_control(_gvn.transform(new (C) ProjNode(membar,TypeFunc::Control) ));
+  set_control(_gvn.transform(new (C) ProjNode(membar, TypeFunc::Control)));
   set_all_memory_call(membar);
   return membar;
 }
@@ -2975,7 +3000,7 @@
   set_control( _gvn.transform(new (C) ProjNode(allocx, TypeFunc::Control) ) );
   // create memory projection for i_o
   set_memory ( _gvn.transform( new (C) ProjNode(allocx, TypeFunc::Memory, true) ), rawidx );
-  make_slow_call_ex(allocx, env()->OutOfMemoryError_klass(), true);
+  make_slow_call_ex(allocx, env()->Throwable_klass(), true);
 
   // create a memory projection as for the normal control path
   Node* malloc = _gvn.transform(new (C) ProjNode(allocx, TypeFunc::Memory));
@@ -3148,7 +3173,7 @@
     Node* cmp_lh = _gvn.transform( new(C) CmpINode(layout_val, intcon(layout_con)) );
     Node* bol_lh = _gvn.transform( new(C) BoolNode(cmp_lh, BoolTest::eq) );
     { BuildCutout unless(this, bol_lh, PROB_MAX);
-      _sp += nargs;
+      inc_sp(nargs);
       uncommon_trap(Deoptimization::Reason_class_check,
                     Deoptimization::Action_maybe_recompile);
     }
@@ -3391,7 +3416,7 @@
   {
     PreserveJVMState pjvms(this);
     set_control(iffalse);
-    _sp += nargs;
+    inc_sp(nargs);
     uncommon_trap(reason, Deoptimization::Action_maybe_recompile);
   }
   Node* iftrue = _gvn.transform(new (C) IfTrueNode(iff));
--- a/src/share/vm/opto/graphKit.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/graphKit.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -41,6 +41,7 @@
 class FastLockNode;
 class FastUnlockNode;
 class IdealKit;
+class LibraryCallKit;
 class Parse;
 class RootNode;
 
@@ -60,11 +61,13 @@
   PhaseGVN         &_gvn;       // Some optimizations while parsing
   SafePointNode*    _map;       // Parser map from JVM to Nodes
   SafePointNode*    _exceptions;// Parser map(s) for exception state(s)
-  int               _sp;        // JVM Expression Stack Pointer
   int               _bci;       // JVM Bytecode Pointer
   ciMethod*         _method;    // JVM Current Method
 
  private:
+  int               _sp;        // JVM Expression Stack Pointer; don't modify directly!
+
+ private:
   SafePointNode*     map_not_null() const {
     assert(_map != NULL, "must call stopped() to test for reset compiler map");
     return _map;
@@ -80,7 +83,8 @@
   }
 #endif
 
-  virtual Parse* is_Parse() const { return NULL; }
+  virtual Parse*          is_Parse()          const { return NULL; }
+  virtual LibraryCallKit* is_LibraryCallKit() const { return NULL; }
 
   ciEnv*        env()           const { return _env; }
   PhaseGVN&     gvn()           const { return _gvn; }
@@ -141,7 +145,7 @@
                                         _bci = jvms->bci();
                                         _method = jvms->has_method() ? jvms->method() : NULL; }
   void set_map(SafePointNode* m)      { _map = m; debug_only(verify_map()); }
-  void set_sp(int i)                  { assert(i >= 0, "must be non-negative"); _sp = i; }
+  void set_sp(int sp)                 { assert(sp >= 0, err_msg_res("sp must be non-negative: %d", sp)); _sp = sp; }
   void clean_stack(int from_sp); // clear garbage beyond from_sp to top
 
   void inc_sp(int i)                  { set_sp(sp() + i); }
@@ -149,7 +153,9 @@
   void set_bci(int bci)               { _bci = bci; }
 
   // Make sure jvms has current bci & sp.
-  JVMState* sync_jvms()     const;
+  JVMState* sync_jvms() const;
+  JVMState* sync_jvms_for_reexecute();
+
 #ifdef ASSERT
   // Make sure JVMS has an updated copy of bci and sp.
   // Also sanity-check method, depth, and monitor depth.
@@ -286,7 +292,7 @@
   // How many stack inputs does the current BC consume?
   // And, how does the stack change after the bytecode?
   // Returns false if unknown.
-  bool compute_stack_effects(int& inputs, int& depth, bool for_parse = false);
+  bool compute_stack_effects(int& inputs, int& depth);
 
   // Add a fixed offset to a pointer
   Node* basic_plus_adr(Node* base, Node* ptr, intptr_t offset) {
@@ -337,20 +343,37 @@
   Node* load_object_klass(Node* object);
   // Find out the length of an array.
   Node* load_array_length(Node* array);
+
+
   // Helper function to do a NULL pointer check or ZERO check based on type.
-  Node* null_check_common(Node* value, BasicType type,
-                          bool assert_null, Node* *null_control);
   // Throw an exception if a given value is null.
   // Return the value cast to not-null.
   // Be clever about equivalent dominating null checks.
-  Node* do_null_check(Node* value, BasicType type) {
-    return null_check_common(value, type, false, NULL);
+  Node* null_check_common(Node* value, BasicType type,
+                          bool assert_null = false, Node* *null_control = NULL);
+  Node* null_check(Node* value, BasicType type = T_OBJECT) {
+    return null_check_common(value, type);
+  }
+  Node* null_check_receiver() {
+    assert(argument(0)->bottom_type()->isa_ptr(), "must be");
+    return null_check(argument(0));
+  }
+  Node* zero_check_int(Node* value) {
+    assert(value->bottom_type()->basic_type() == T_INT,
+        err_msg_res("wrong type: %s", type2name(value->bottom_type()->basic_type())));
+    return null_check_common(value, T_INT);
+  }
+  Node* zero_check_long(Node* value) {
+    assert(value->bottom_type()->basic_type() == T_LONG,
+        err_msg_res("wrong type: %s", type2name(value->bottom_type()->basic_type())));
+    return null_check_common(value, T_LONG);
   }
   // Throw an uncommon trap if a given value is __not__ null.
   // Return the value cast to null, and be clever about dominating checks.
-  Node* do_null_assert(Node* value, BasicType type) {
-    return null_check_common(value, type, true, NULL);
+  Node* null_assert(Node* value, BasicType type = T_OBJECT) {
+    return null_check_common(value, type, true);
   }
+
   // Null check oop.  Return null-path control into (*null_control).
   // Return a cast-not-null node which depends on the not-null control.
   // If never_see_null, use an uncommon trap (*null_control sees a top).
@@ -371,9 +394,9 @@
   // Replace all occurrences of one node by another.
   void replace_in_map(Node* old, Node* neww);
 
-  void  push(Node* n)     { map_not_null();        _map->set_stack(_map->_jvms,   _sp++, n); }
-  Node* pop()             { map_not_null(); return _map->stack(    _map->_jvms, --_sp); }
-  Node* peek(int off = 0) { map_not_null(); return _map->stack(    _map->_jvms,   _sp - off - 1); }
+  void  push(Node* n)     { map_not_null();        _map->set_stack(_map->_jvms,   _sp++        , n); }
+  Node* pop()             { map_not_null(); return _map->stack(    _map->_jvms, --_sp             ); }
+  Node* peek(int off = 0) { map_not_null(); return _map->stack(    _map->_jvms,   _sp - off - 1   ); }
 
   void push_pair(Node* ldval) {
     push(ldval);
@@ -580,19 +603,15 @@
 
   //---------- help for generating calls --------------
 
-  // Do a null check on the receiver, which is in argument(0).
-  Node* null_check_receiver(ciMethod* callee) {
+  // Do a null check on the receiver as it would happen before the call to
+  // callee (with all arguments still on the stack).
+  Node* null_check_receiver_before_call(ciMethod* callee) {
     assert(!callee->is_static(), "must be a virtual method");
-    int nargs = 1 + callee->signature()->size();
-    // Null check on self without removing any arguments.  The argument
-    // null check technically happens in the wrong place, which can lead to
-    // invalid stack traces when the primitive is inlined into a method
-    // which handles NullPointerExceptions.
-    Node* receiver = argument(0);
-    _sp += nargs;
-    receiver = do_null_check(receiver, T_OBJECT);
-    _sp -= nargs;
-    return receiver;
+    const int nargs = callee->arg_size();
+    inc_sp(nargs);
+    Node* n = null_check_receiver();
+    dec_sp(nargs);
+    return n;
   }
 
   // Fill in argument edges for the call from argument(0), argument(1), ...
@@ -645,6 +664,9 @@
                   klass, reason_string, must_throw, keep_exact_action);
   }
 
+  // SP when bytecode needs to be reexecuted.
+  virtual int reexecute_sp() { return sp(); }
+
   // Report if there were too many traps at the current method and bci.
   // Report if a trap was recorded, and/or PerMethodTrapLimit was exceeded.
   // If there is no MDO at all, report no trap unless told to assume it.
--- a/src/share/vm/opto/idealGraphPrinter.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/idealGraphPrinter.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -547,7 +547,7 @@
 
         // max. 2 chars allowed
         if (value >= -9 && value <= 99) {
-          sprintf(buffer, INT64_FORMAT, value);
+          sprintf(buffer, JLONG_FORMAT, value);
           print_prop(short_name, buffer);
         } else {
           print_prop(short_name, "L");
--- a/src/share/vm/opto/ifg.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/ifg.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -573,7 +573,7 @@
               (n2lidx(def) && !liveout.member(n2lidx(def)) ) ) {
             b->_nodes.remove(j - 1);
             if( lrgs(r)._def == n ) lrgs(r)._def = 0;
-            n->disconnect_inputs(NULL);
+            n->disconnect_inputs(NULL, C);
             _cfg._bbs.map(n->_idx,NULL);
             n->replace_by(C->top());
             // Since yanking a Node from block, high pressure moves up one
--- a/src/share/vm/opto/lcm.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/lcm.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -175,6 +175,7 @@
     case Op_StrEquals:
     case Op_StrIndexOf:
     case Op_AryEq:
+    case Op_EncodeISOArray:
       // Not a legit memory op for implicit null check regardless of
       // embedded loads
       continue;
@@ -1006,7 +1007,7 @@
 //------------------------------call_catch_cleanup-----------------------------
 // If we inserted any instructions between a Call and his CatchNode,
 // clone the instructions on all paths below the Catch.
-void Block::call_catch_cleanup(Block_Array &bbs) {
+void Block::call_catch_cleanup(Block_Array &bbs, Compile* C) {
 
   // End of region to clone
   uint end = end_idx();
@@ -1068,7 +1069,7 @@
 
   // Remove the now-dead cloned ops
   for(uint i3 = beg; i3 < end; i3++ ) {
-    _nodes[beg]->disconnect_inputs(NULL);
+    _nodes[beg]->disconnect_inputs(NULL, C);
     _nodes.remove(beg);
   }
 
@@ -1081,7 +1082,7 @@
       Node *n = sb->_nodes[j];
       if (n->outcnt() == 0 &&
           (!n->is_Proj() || n->as_Proj()->in(0)->outcnt() == 1) ){
-        n->disconnect_inputs(NULL);
+        n->disconnect_inputs(NULL, C);
         sb->_nodes.remove(j);
         new_cnt--;
       }
--- a/src/share/vm/opto/library_call.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/library_call.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -44,18 +44,22 @@
  public:
  private:
   bool             _is_virtual;
+  bool             _is_predicted;
   vmIntrinsics::ID _intrinsic_id;
 
  public:
-  LibraryIntrinsic(ciMethod* m, bool is_virtual, vmIntrinsics::ID id)
+  LibraryIntrinsic(ciMethod* m, bool is_virtual, bool is_predicted, vmIntrinsics::ID id)
     : InlineCallGenerator(m),
       _is_virtual(is_virtual),
+      _is_predicted(is_predicted),
       _intrinsic_id(id)
   {
   }
   virtual bool is_intrinsic() const { return true; }
   virtual bool is_virtual()   const { return _is_virtual; }
+  virtual bool is_predicted()   const { return _is_predicted; }
   virtual JVMState* generate(JVMState* jvms);
+  virtual Node* generate_predicate(JVMState* jvms);
   vmIntrinsics::ID intrinsic_id() const { return _intrinsic_id; }
 };
 
@@ -63,29 +67,64 @@
 // Local helper class for LibraryIntrinsic:
 class LibraryCallKit : public GraphKit {
  private:
-  LibraryIntrinsic* _intrinsic;   // the library intrinsic being called
+  LibraryIntrinsic* _intrinsic;     // the library intrinsic being called
+  Node*             _result;        // the result node, if any
+  int               _reexecute_sp;  // the stack pointer when bytecode needs to be reexecuted
 
   const TypeOopPtr* sharpen_unsafe_type(Compile::AliasType* alias_type, const TypePtr *adr_type, bool is_native_ptr = false);
 
  public:
-  LibraryCallKit(JVMState* caller, LibraryIntrinsic* intrinsic)
-    : GraphKit(caller),
-      _intrinsic(intrinsic)
+  LibraryCallKit(JVMState* jvms, LibraryIntrinsic* intrinsic)
+    : GraphKit(jvms),
+      _intrinsic(intrinsic),
+      _result(NULL)
   {
+    // Check if this is a root compile.  In that case we don't have a caller.
+    if (!jvms->has_method()) {
+      _reexecute_sp = sp();
+    } else {
+      // Find out how many arguments the interpreter needs when deoptimizing
+      // and save the stack pointer value so it can used by uncommon_trap.
+      // We find the argument count by looking at the declared signature.
+      bool ignored_will_link;
+      ciSignature* declared_signature = NULL;
+      ciMethod* ignored_callee = caller()->get_method_at_bci(bci(), ignored_will_link, &declared_signature);
+      const int nargs = declared_signature->arg_size_for_bc(caller()->java_code_at_bci(bci()));
+      _reexecute_sp = sp() + nargs;  // "push" arguments back on stack
+    }
   }
 
+  virtual LibraryCallKit* is_LibraryCallKit() const { return (LibraryCallKit*)this; }
+
   ciMethod*         caller()    const    { return jvms()->method(); }
   int               bci()       const    { return jvms()->bci(); }
   LibraryIntrinsic* intrinsic() const    { return _intrinsic; }
   vmIntrinsics::ID  intrinsic_id() const { return _intrinsic->intrinsic_id(); }
   ciMethod*         callee()    const    { return _intrinsic->method(); }
-  ciSignature*      signature() const    { return callee()->signature(); }
-  int               arg_size()  const    { return callee()->arg_size(); }
 
   bool try_to_inline();
+  Node* try_to_predicate();
+
+  void push_result() {
+    // Push the result onto the stack.
+    if (!stopped() && result() != NULL) {
+      BasicType bt = result()->bottom_type()->basic_type();
+      push_node(bt, result());
+    }
+  }
+
+ private:
+  void fatal_unexpected_iid(vmIntrinsics::ID iid) {
+    fatal(err_msg_res("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)));
+  }
+
+  void  set_result(Node* n) { assert(_result == NULL, "only set once"); _result = n; }
+  void  set_result(RegionNode* region, PhiNode* value);
+  Node*     result() { return _result; }
+
+  virtual int reexecute_sp() { return _reexecute_sp; }
 
   // Helper functions to inline natives
-  void push_result(RegionNode* region, PhiNode* value);
   Node* generate_guard(Node* test, RegionNode* region, float true_prob);
   Node* generate_slow_guard(Node* test, RegionNode* region);
   Node* generate_fair_guard(Node* test, RegionNode* region);
@@ -103,21 +142,19 @@
                               bool disjoint_bases, const char* &name, bool dest_uninitialized);
   Node* load_mirror_from_klass(Node* klass);
   Node* load_klass_from_mirror_common(Node* mirror, bool never_see_null,
-                                      int nargs,
                                       RegionNode* region, int null_path,
                                       int offset);
-  Node* load_klass_from_mirror(Node* mirror, bool never_see_null, int nargs,
+  Node* load_klass_from_mirror(Node* mirror, bool never_see_null,
                                RegionNode* region, int null_path) {
     int offset = java_lang_Class::klass_offset_in_bytes();
-    return load_klass_from_mirror_common(mirror, never_see_null, nargs,
+    return load_klass_from_mirror_common(mirror, never_see_null,
                                          region, null_path,
                                          offset);
   }
   Node* load_array_klass_from_mirror(Node* mirror, bool never_see_null,
-                                     int nargs,
                                      RegionNode* region, int null_path) {
     int offset = java_lang_Class::array_klass_offset_in_bytes();
-    return load_klass_from_mirror_common(mirror, never_see_null, nargs,
+    return load_klass_from_mirror_common(mirror, never_see_null,
                                          region, null_path,
                                          offset);
   }
@@ -148,6 +185,7 @@
   CallJavaNode* generate_method_call_virtual(vmIntrinsics::ID method_id) {
     return generate_method_call(method_id, true, false);
   }
+  Node * load_field_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString, bool is_exact, bool is_static);
 
   Node* make_string_method_node(int opcode, Node* str1_start, Node* cnt1, Node* str2_start, Node* cnt2);
   Node* make_string_method_node(int opcode, Node* str1, Node* str2);
@@ -155,16 +193,14 @@
   bool inline_string_indexOf();
   Node* string_indexOf(Node* string_object, ciTypeArray* target_array, jint offset, jint cache_i, jint md2_i);
   bool inline_string_equals();
-  Node* pop_math_arg();
+  Node* round_double_node(Node* n);
   bool runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName);
   bool inline_math_native(vmIntrinsics::ID id);
   bool inline_trig(vmIntrinsics::ID id);
-  bool inline_trans(vmIntrinsics::ID id);
-  bool inline_abs(vmIntrinsics::ID id);
-  bool inline_sqrt(vmIntrinsics::ID id);
+  bool inline_math(vmIntrinsics::ID id);
+  bool inline_exp();
+  bool inline_pow();
   void finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName);
-  bool inline_pow(vmIntrinsics::ID id);
-  bool inline_exp(vmIntrinsics::ID id);
   bool inline_min_max(vmIntrinsics::ID id);
   Node* generate_min_max(vmIntrinsics::ID id, Node* x, Node* y);
   // This returns Type::AnyPtr, RawPtr, or OopPtr.
@@ -173,7 +209,7 @@
   // Helper for inline_unsafe_access.
   // Generates the guards that check whether the result of
   // Unsafe.getObject should be recorded in an SATB log buffer.
-  void insert_pre_barrier(Node* base_oop, Node* offset, Node* pre_val, int nargs, bool need_mem_bar);
+  void insert_pre_barrier(Node* base_oop, Node* offset, Node* pre_val, bool need_mem_bar);
   bool inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile);
   bool inline_unsafe_prefetch(bool is_native_ptr, bool is_store, bool is_static);
   bool inline_unsafe_allocate();
@@ -246,13 +282,15 @@
   typedef enum { LS_xadd, LS_xchg, LS_cmpxchg } LoadStoreKind;
   bool inline_unsafe_load_store(BasicType type,  LoadStoreKind kind);
   bool inline_unsafe_ordered_store(BasicType type);
+  bool inline_unsafe_fence(vmIntrinsics::ID id);
   bool inline_fp_conversions(vmIntrinsics::ID id);
-  bool inline_numberOfLeadingZeros(vmIntrinsics::ID id);
-  bool inline_numberOfTrailingZeros(vmIntrinsics::ID id);
-  bool inline_bitCount(vmIntrinsics::ID id);
-  bool inline_reverseBytes(vmIntrinsics::ID id);
-
+  bool inline_number_methods(vmIntrinsics::ID id);
   bool inline_reference_get();
+  bool inline_aescrypt_Block(vmIntrinsics::ID id);
+  bool inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id);
+  Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting);
+  Node* get_key_start_from_aescrypt_object(Node* aescrypt_object);
+  bool inline_encodeISOArray();
 };
 
 
@@ -298,6 +336,9 @@
     case vmIntrinsics::_getAndSetInt:
     case vmIntrinsics::_getAndSetLong:
     case vmIntrinsics::_getAndSetObject:
+    case vmIntrinsics::_loadFence:
+    case vmIntrinsics::_storeFence:
+    case vmIntrinsics::_fullFence:
       break;  // InlineNatives does not control String.compareTo
     case vmIntrinsics::_Reference_get:
       break;  // InlineNatives does not control Reference.get
@@ -306,18 +347,23 @@
     }
   }
 
+  bool is_predicted = false;
+
   switch (id) {
   case vmIntrinsics::_compareTo:
     if (!SpecialStringCompareTo)  return NULL;
+    if (!Matcher::match_rule_supported(Op_StrComp))  return NULL;
     break;
   case vmIntrinsics::_indexOf:
     if (!SpecialStringIndexOf)  return NULL;
     break;
   case vmIntrinsics::_equals:
     if (!SpecialStringEquals)  return NULL;
+    if (!Matcher::match_rule_supported(Op_StrEquals))  return NULL;
     break;
   case vmIntrinsics::_equalsC:
     if (!SpecialArraysEquals)  return NULL;
+    if (!Matcher::match_rule_supported(Op_AryEq))  return NULL;
     break;
   case vmIntrinsics::_arraycopy:
     if (!InlineArrayCopy)  return NULL;
@@ -336,6 +382,10 @@
     // These also use the arraycopy intrinsic mechanism:
     if (!InlineArrayCopy)  return NULL;
     break;
+  case vmIntrinsics::_encodeISOArray:
+    if (!SpecialEncodeISOArray)  return NULL;
+    if (!Matcher::match_rule_supported(Op_EncodeISOArray))  return NULL;
+    break;
   case vmIntrinsics::_checkIndex:
     // We do not intrinsify this.  The optimizer does fine with it.
     return NULL;
@@ -370,6 +420,19 @@
     if (!Matcher::match_rule_supported(Op_CountTrailingZerosL)) return NULL;
     break;
 
+  case vmIntrinsics::_reverseBytes_c:
+    if (!Matcher::match_rule_supported(Op_ReverseBytesUS)) return NULL;
+    break;
+  case vmIntrinsics::_reverseBytes_s:
+    if (!Matcher::match_rule_supported(Op_ReverseBytesS))  return NULL;
+    break;
+  case vmIntrinsics::_reverseBytes_i:
+    if (!Matcher::match_rule_supported(Op_ReverseBytesI))  return NULL;
+    break;
+  case vmIntrinsics::_reverseBytes_l:
+    if (!Matcher::match_rule_supported(Op_ReverseBytesL))  return NULL;
+    break;
+
   case vmIntrinsics::_Reference_get:
     // Use the intrinsic version of Reference.get() so that the value in
     // the referent field can be registered by the G1 pre-barrier code.
@@ -413,6 +476,18 @@
     break;
 #endif
 
+  case vmIntrinsics::_aescrypt_encryptBlock:
+  case vmIntrinsics::_aescrypt_decryptBlock:
+    if (!UseAESIntrinsics) return NULL;
+    break;
+
+  case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
+  case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
+    if (!UseAESIntrinsics) return NULL;
+    // these two require the predicated logic
+    is_predicted = true;
+    break;
+
  default:
     assert(id <= vmIntrinsics::LAST_COMPILER_INLINE, "caller responsibility");
     assert(id != vmIntrinsics::_Object_init && id != vmIntrinsics::_invoke, "enum out of order?");
@@ -444,7 +519,7 @@
     if (!InlineUnsafeOps)  return NULL;
   }
 
-  return new LibraryIntrinsic(m, is_virtual, (vmIntrinsics::ID) id);
+  return new LibraryIntrinsic(m, is_virtual, is_predicted, (vmIntrinsics::ID) id);
 }
 
 //----------------------register_library_intrinsics-----------------------
@@ -464,10 +539,13 @@
     tty->print_cr("Intrinsic %s", str);
   }
 #endif
-
+  ciMethod* callee = kit.callee();
+  const int bci    = kit.bci();
+
+  // Try to inline the intrinsic.
   if (kit.try_to_inline()) {
     if (PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) {
-      CompileTask::print_inlining(kit.callee(), jvms->depth() - 1, kit.bci(), is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)");
+      C->print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)");
     }
     C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_worked);
     if (C->log()) {
@@ -476,6 +554,8 @@
                      (is_virtual() ? " virtual='1'" : ""),
                      C->unique() - nodes);
     }
+    // Push the result from the inlined method onto the stack.
+    kit.push_result();
     return kit.transfer_exceptions_into_jvms();
   }
 
@@ -484,12 +564,59 @@
     if (jvms->has_method()) {
       // Not a root compile.
       const char* msg = is_virtual() ? "failed to inline (intrinsic, virtual)" : "failed to inline (intrinsic)";
-      CompileTask::print_inlining(kit.callee(), jvms->depth() - 1, kit.bci(), msg);
+      C->print_inlining(callee, jvms->depth() - 1, bci, msg);
     } else {
       // Root compile
       tty->print("Did not generate intrinsic %s%s at bci:%d in",
                vmIntrinsics::name_at(intrinsic_id()),
-               (is_virtual() ? " (virtual)" : ""), kit.bci());
+               (is_virtual() ? " (virtual)" : ""), bci);
+    }
+  }
+  C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_failed);
+  return NULL;
+}
+
+Node* LibraryIntrinsic::generate_predicate(JVMState* jvms) {
+  LibraryCallKit kit(jvms, this);
+  Compile* C = kit.C;
+  int nodes = C->unique();
+#ifndef PRODUCT
+  assert(is_predicted(), "sanity");
+  if ((PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) && Verbose) {
+    char buf[1000];
+    const char* str = vmIntrinsics::short_name_as_C_string(intrinsic_id(), buf, sizeof(buf));
+    tty->print_cr("Predicate for intrinsic %s", str);
+  }
+#endif
+  ciMethod* callee = kit.callee();
+  const int bci    = kit.bci();
+
+  Node* slow_ctl = kit.try_to_predicate();
+  if (!kit.failing()) {
+    if (PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) {
+      C->print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)");
+    }
+    C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_worked);
+    if (C->log()) {
+      C->log()->elem("predicate_intrinsic id='%s'%s nodes='%d'",
+                     vmIntrinsics::name_at(intrinsic_id()),
+                     (is_virtual() ? " virtual='1'" : ""),
+                     C->unique() - nodes);
+    }
+    return slow_ctl; // Could be NULL if the check folds.
+  }
+
+  // The intrinsic bailed out
+  if (PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) {
+    if (jvms->has_method()) {
+      // Not a root compile.
+      const char* msg = "failed to generate predicate for intrinsic";
+      C->print_inlining(kit.callee(), jvms->depth() - 1, bci, msg);
+    } else {
+      // Root compile
+      C->print_inlining_stream()->print("Did not generate predicate for intrinsic %s%s at bci:%d in",
+                                        vmIntrinsics::name_at(intrinsic_id()),
+                                        (is_virtual() ? " (virtual)" : ""), bci);
     }
   }
   C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_failed);
@@ -501,6 +628,7 @@
   const bool is_store       = true;
   const bool is_native_ptr  = true;
   const bool is_static      = true;
+  const bool is_volatile    = true;
 
   if (!jvms()->has_method()) {
     // Root JVMState has a null method.
@@ -510,13 +638,11 @@
   }
   assert(merged_memory(), "");
 
+
   switch (intrinsic_id()) {
-  case vmIntrinsics::_hashCode:
-    return inline_native_hashcode(intrinsic()->is_virtual(), !is_static);
-  case vmIntrinsics::_identityHashCode:
-    return inline_native_hashcode(/*!virtual*/ false, is_static);
-  case vmIntrinsics::_getClass:
-    return inline_native_getClass();
+  case vmIntrinsics::_hashCode:                 return inline_native_hashcode(intrinsic()->is_virtual(), !is_static);
+  case vmIntrinsics::_identityHashCode:         return inline_native_hashcode(/*!virtual*/ false,         is_static);
+  case vmIntrinsics::_getClass:                 return inline_native_getClass();
 
   case vmIntrinsics::_dsin:
   case vmIntrinsics::_dcos:
@@ -527,203 +653,118 @@
   case vmIntrinsics::_dexp:
   case vmIntrinsics::_dlog:
   case vmIntrinsics::_dlog10:
-  case vmIntrinsics::_dpow:
-    return inline_math_native(intrinsic_id());
+  case vmIntrinsics::_dpow:                     return inline_math_native(intrinsic_id());
 
   case vmIntrinsics::_min:
-  case vmIntrinsics::_max:
-    return inline_min_max(intrinsic_id());
-
-  case vmIntrinsics::_arraycopy:
-    return inline_arraycopy();
-
-  case vmIntrinsics::_compareTo:
-    return inline_string_compareTo();
-  case vmIntrinsics::_indexOf:
-    return inline_string_indexOf();
-  case vmIntrinsics::_equals:
-    return inline_string_equals();
-
-  case vmIntrinsics::_getObject:
-    return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT, false);
-  case vmIntrinsics::_getBoolean:
-    return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN, false);
-  case vmIntrinsics::_getByte:
-    return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE, false);
-  case vmIntrinsics::_getShort:
-    return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT, false);
-  case vmIntrinsics::_getChar:
-    return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR, false);
-  case vmIntrinsics::_getInt:
-    return inline_unsafe_access(!is_native_ptr, !is_store, T_INT, false);
-  case vmIntrinsics::_getLong:
-    return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG, false);
-  case vmIntrinsics::_getFloat:
-    return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT, false);
-  case vmIntrinsics::_getDouble:
-    return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE, false);
-
-  case vmIntrinsics::_putObject:
-    return inline_unsafe_access(!is_native_ptr, is_store, T_OBJECT, false);
-  case vmIntrinsics::_putBoolean:
-    return inline_unsafe_access(!is_native_ptr, is_store, T_BOOLEAN, false);
-  case vmIntrinsics::_putByte:
-    return inline_unsafe_access(!is_native_ptr, is_store, T_BYTE, false);
-  case vmIntrinsics::_putShort:
-    return inline_unsafe_access(!is_native_ptr, is_store, T_SHORT, false);
-  case vmIntrinsics::_putChar:
-    return inline_unsafe_access(!is_native_ptr, is_store, T_CHAR, false);
-  case vmIntrinsics::_putInt:
-    return inline_unsafe_access(!is_native_ptr, is_store, T_INT, false);
-  case vmIntrinsics::_putLong:
-    return inline_unsafe_access(!is_native_ptr, is_store, T_LONG, false);
-  case vmIntrinsics::_putFloat:
-    return inline_unsafe_access(!is_native_ptr, is_store, T_FLOAT, false);
-  case vmIntrinsics::_putDouble:
-    return inline_unsafe_access(!is_native_ptr, is_store, T_DOUBLE, false);
-
-  case vmIntrinsics::_getByte_raw:
-    return inline_unsafe_access(is_native_ptr, !is_store, T_BYTE, false);
-  case vmIntrinsics::_getShort_raw:
-    return inline_unsafe_access(is_native_ptr, !is_store, T_SHORT, false);
-  case vmIntrinsics::_getChar_raw:
-    return inline_unsafe_access(is_native_ptr, !is_store, T_CHAR, false);
-  case vmIntrinsics::_getInt_raw:
-    return inline_unsafe_access(is_native_ptr, !is_store, T_INT, false);
-  case vmIntrinsics::_getLong_raw:
-    return inline_unsafe_access(is_native_ptr, !is_store, T_LONG, false);
-  case vmIntrinsics::_getFloat_raw:
-    return inline_unsafe_access(is_native_ptr, !is_store, T_FLOAT, false);
-  case vmIntrinsics::_getDouble_raw:
-    return inline_unsafe_access(is_native_ptr, !is_store, T_DOUBLE, false);
-  case vmIntrinsics::_getAddress_raw:
-    return inline_unsafe_access(is_native_ptr, !is_store, T_ADDRESS, false);
-
-  case vmIntrinsics::_putByte_raw:
-    return inline_unsafe_access(is_native_ptr, is_store, T_BYTE, false);
-  case vmIntrinsics::_putShort_raw:
-    return inline_unsafe_access(is_native_ptr, is_store, T_SHORT, false);
-  case vmIntrinsics::_putChar_raw:
-    return inline_unsafe_access(is_native_ptr, is_store, T_CHAR, false);
-  case vmIntrinsics::_putInt_raw:
-    return inline_unsafe_access(is_native_ptr, is_store, T_INT, false);
-  case vmIntrinsics::_putLong_raw:
-    return inline_unsafe_access(is_native_ptr, is_store, T_LONG, false);
-  case vmIntrinsics::_putFloat_raw:
-    return inline_unsafe_access(is_native_ptr, is_store, T_FLOAT, false);
-  case vmIntrinsics::_putDouble_raw:
-    return inline_unsafe_access(is_native_ptr, is_store, T_DOUBLE, false);
-  case vmIntrinsics::_putAddress_raw:
-    return inline_unsafe_access(is_native_ptr, is_store, T_ADDRESS, false);
-
-  case vmIntrinsics::_getObjectVolatile:
-    return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT, true);
-  case vmIntrinsics::_getBooleanVolatile:
-    return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN, true);
-  case vmIntrinsics::_getByteVolatile:
-    return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE, true);
-  case vmIntrinsics::_getShortVolatile:
-    return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT, true);
-  case vmIntrinsics::_getCharVolatile:
-    return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR, true);
-  case vmIntrinsics::_getIntVolatile:
-    return inline_unsafe_access(!is_native_ptr, !is_store, T_INT, true);
-  case vmIntrinsics::_getLongVolatile:
-    return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG, true);
-  case vmIntrinsics::_getFloatVolatile:
-    return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT, true);
-  case vmIntrinsics::_getDoubleVolatile:
-    return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE, true);
-
-  case vmIntrinsics::_putObjectVolatile:
-    return inline_unsafe_access(!is_native_ptr, is_store, T_OBJECT, true);
-  case vmIntrinsics::_putBooleanVolatile:
-    return inline_unsafe_access(!is_native_ptr, is_store, T_BOOLEAN, true);
-  case vmIntrinsics::_putByteVolatile:
-    return inline_unsafe_access(!is_native_ptr, is_store, T_BYTE, true);
-  case vmIntrinsics::_putShortVolatile:
-    return inline_unsafe_access(!is_native_ptr, is_store, T_SHORT, true);
-  case vmIntrinsics::_putCharVolatile:
-    return inline_unsafe_access(!is_native_ptr, is_store, T_CHAR, true);
-  case vmIntrinsics::_putIntVolatile:
-    return inline_unsafe_access(!is_native_ptr, is_store, T_INT, true);
-  case vmIntrinsics::_putLongVolatile:
-    return inline_unsafe_access(!is_native_ptr, is_store, T_LONG, true);
-  case vmIntrinsics::_putFloatVolatile:
-    return inline_unsafe_access(!is_native_ptr, is_store, T_FLOAT, true);
-  case vmIntrinsics::_putDoubleVolatile:
-    return inline_unsafe_access(!is_native_ptr, is_store, T_DOUBLE, true);
-
-  case vmIntrinsics::_prefetchRead:
-    return inline_unsafe_prefetch(!is_native_ptr, !is_store, !is_static);
-  case vmIntrinsics::_prefetchWrite:
-    return inline_unsafe_prefetch(!is_native_ptr, is_store, !is_static);
-  case vmIntrinsics::_prefetchReadStatic:
-    return inline_unsafe_prefetch(!is_native_ptr, !is_store, is_static);
-  case vmIntrinsics::_prefetchWriteStatic:
-    return inline_unsafe_prefetch(!is_native_ptr, is_store, is_static);
-
-  case vmIntrinsics::_compareAndSwapObject:
-    return inline_unsafe_load_store(T_OBJECT, LS_cmpxchg);
-  case vmIntrinsics::_compareAndSwapInt:
-    return inline_unsafe_load_store(T_INT, LS_cmpxchg);
-  case vmIntrinsics::_compareAndSwapLong:
-    return inline_unsafe_load_store(T_LONG, LS_cmpxchg);
-
-  case vmIntrinsics::_putOrderedObject:
-    return inline_unsafe_ordered_store(T_OBJECT);
-  case vmIntrinsics::_putOrderedInt:
-    return inline_unsafe_ordered_store(T_INT);
-  case vmIntrinsics::_putOrderedLong:
-    return inline_unsafe_ordered_store(T_LONG);
-
-  case vmIntrinsics::_getAndAddInt:
-    return inline_unsafe_load_store(T_INT, LS_xadd);
-  case vmIntrinsics::_getAndAddLong:
-    return inline_unsafe_load_store(T_LONG, LS_xadd);
-  case vmIntrinsics::_getAndSetInt:
-    return inline_unsafe_load_store(T_INT, LS_xchg);
-  case vmIntrinsics::_getAndSetLong:
-    return inline_unsafe_load_store(T_LONG, LS_xchg);
-  case vmIntrinsics::_getAndSetObject:
-    return inline_unsafe_load_store(T_OBJECT, LS_xchg);
-
-  case vmIntrinsics::_currentThread:
-    return inline_native_currentThread();
-  case vmIntrinsics::_isInterrupted:
-    return inline_native_isInterrupted();
+  case vmIntrinsics::_max:                      return inline_min_max(intrinsic_id());
+
+  case vmIntrinsics::_arraycopy:                return inline_arraycopy();
+
+  case vmIntrinsics::_compareTo:                return inline_string_compareTo();
+  case vmIntrinsics::_indexOf:                  return inline_string_indexOf();
+  case vmIntrinsics::_equals:                   return inline_string_equals();
+
+  case vmIntrinsics::_getObject:                return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT,  !is_volatile);
+  case vmIntrinsics::_getBoolean:               return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN, !is_volatile);
+  case vmIntrinsics::_getByte:                  return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE,    !is_volatile);
+  case vmIntrinsics::_getShort:                 return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT,   !is_volatile);
+  case vmIntrinsics::_getChar:                  return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR,    !is_volatile);
+  case vmIntrinsics::_getInt:                   return inline_unsafe_access(!is_native_ptr, !is_store, T_INT,     !is_volatile);
+  case vmIntrinsics::_getLong:                  return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG,    !is_volatile);
+  case vmIntrinsics::_getFloat:                 return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT,   !is_volatile);
+  case vmIntrinsics::_getDouble:                return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE,  !is_volatile);
+
+  case vmIntrinsics::_putObject:                return inline_unsafe_access(!is_native_ptr,  is_store, T_OBJECT,  !is_volatile);
+  case vmIntrinsics::_putBoolean:               return inline_unsafe_access(!is_native_ptr,  is_store, T_BOOLEAN, !is_volatile);
+  case vmIntrinsics::_putByte:                  return inline_unsafe_access(!is_native_ptr,  is_store, T_BYTE,    !is_volatile);
+  case vmIntrinsics::_putShort:                 return inline_unsafe_access(!is_native_ptr,  is_store, T_SHORT,   !is_volatile);
+  case vmIntrinsics::_putChar:                  return inline_unsafe_access(!is_native_ptr,  is_store, T_CHAR,    !is_volatile);
+  case vmIntrinsics::_putInt:                   return inline_unsafe_access(!is_native_ptr,  is_store, T_INT,     !is_volatile);
+  case vmIntrinsics::_putLong:                  return inline_unsafe_access(!is_native_ptr,  is_store, T_LONG,    !is_volatile);
+  case vmIntrinsics::_putFloat:                 return inline_unsafe_access(!is_native_ptr,  is_store, T_FLOAT,   !is_volatile);
+  case vmIntrinsics::_putDouble:                return inline_unsafe_access(!is_native_ptr,  is_store, T_DOUBLE,  !is_volatile);
+
+  case vmIntrinsics::_getByte_raw:              return inline_unsafe_access( is_native_ptr, !is_store, T_BYTE,    !is_volatile);
+  case vmIntrinsics::_getShort_raw:             return inline_unsafe_access( is_native_ptr, !is_store, T_SHORT,   !is_volatile);
+  case vmIntrinsics::_getChar_raw:              return inline_unsafe_access( is_native_ptr, !is_store, T_CHAR,    !is_volatile);
+  case vmIntrinsics::_getInt_raw:               return inline_unsafe_access( is_native_ptr, !is_store, T_INT,     !is_volatile);
+  case vmIntrinsics::_getLong_raw:              return inline_unsafe_access( is_native_ptr, !is_store, T_LONG,    !is_volatile);
+  case vmIntrinsics::_getFloat_raw:             return inline_unsafe_access( is_native_ptr, !is_store, T_FLOAT,   !is_volatile);
+  case vmIntrinsics::_getDouble_raw:            return inline_unsafe_access( is_native_ptr, !is_store, T_DOUBLE,  !is_volatile);
+  case vmIntrinsics::_getAddress_raw:           return inline_unsafe_access( is_native_ptr, !is_store, T_ADDRESS, !is_volatile);
+
+  case vmIntrinsics::_putByte_raw:              return inline_unsafe_access( is_native_ptr,  is_store, T_BYTE,    !is_volatile);
+  case vmIntrinsics::_putShort_raw:             return inline_unsafe_access( is_native_ptr,  is_store, T_SHORT,   !is_volatile);
+  case vmIntrinsics::_putChar_raw:              return inline_unsafe_access( is_native_ptr,  is_store, T_CHAR,    !is_volatile);
+  case vmIntrinsics::_putInt_raw:               return inline_unsafe_access( is_native_ptr,  is_store, T_INT,     !is_volatile);
+  case vmIntrinsics::_putLong_raw:              return inline_unsafe_access( is_native_ptr,  is_store, T_LONG,    !is_volatile);
+  case vmIntrinsics::_putFloat_raw:             return inline_unsafe_access( is_native_ptr,  is_store, T_FLOAT,   !is_volatile);
+  case vmIntrinsics::_putDouble_raw:            return inline_unsafe_access( is_native_ptr,  is_store, T_DOUBLE,  !is_volatile);
+  case vmIntrinsics::_putAddress_raw:           return inline_unsafe_access( is_native_ptr,  is_store, T_ADDRESS, !is_volatile);
+
+  case vmIntrinsics::_getObjectVolatile:        return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT,   is_volatile);
+  case vmIntrinsics::_getBooleanVolatile:       return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN,  is_volatile);
+  case vmIntrinsics::_getByteVolatile:          return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE,     is_volatile);
+  case vmIntrinsics::_getShortVolatile:         return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT,    is_volatile);
+  case vmIntrinsics::_getCharVolatile:          return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR,     is_volatile);
+  case vmIntrinsics::_getIntVolatile:           return inline_unsafe_access(!is_native_ptr, !is_store, T_INT,      is_volatile);
+  case vmIntrinsics::_getLongVolatile:          return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG,     is_volatile);
+  case vmIntrinsics::_getFloatVolatile:         return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT,    is_volatile);
+  case vmIntrinsics::_getDoubleVolatile:        return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE,   is_volatile);
+
+  case vmIntrinsics::_putObjectVolatile:        return inline_unsafe_access(!is_native_ptr,  is_store, T_OBJECT,   is_volatile);
+  case vmIntrinsics::_putBooleanVolatile:       return inline_unsafe_access(!is_native_ptr,  is_store, T_BOOLEAN,  is_volatile);
+  case vmIntrinsics::_putByteVolatile:          return inline_unsafe_access(!is_native_ptr,  is_store, T_BYTE,     is_volatile);
+  case vmIntrinsics::_putShortVolatile:         return inline_unsafe_access(!is_native_ptr,  is_store, T_SHORT,    is_volatile);
+  case vmIntrinsics::_putCharVolatile:          return inline_unsafe_access(!is_native_ptr,  is_store, T_CHAR,     is_volatile);
+  case vmIntrinsics::_putIntVolatile:           return inline_unsafe_access(!is_native_ptr,  is_store, T_INT,      is_volatile);
+  case vmIntrinsics::_putLongVolatile:          return inline_unsafe_access(!is_native_ptr,  is_store, T_LONG,     is_volatile);
+  case vmIntrinsics::_putFloatVolatile:         return inline_unsafe_access(!is_native_ptr,  is_store, T_FLOAT,    is_volatile);
+  case vmIntrinsics::_putDoubleVolatile:        return inline_unsafe_access(!is_native_ptr,  is_store, T_DOUBLE,   is_volatile);
+
+  case vmIntrinsics::_prefetchRead:             return inline_unsafe_prefetch(!is_native_ptr, !is_store, !is_static);
+  case vmIntrinsics::_prefetchWrite:            return inline_unsafe_prefetch(!is_native_ptr,  is_store, !is_static);
+  case vmIntrinsics::_prefetchReadStatic:       return inline_unsafe_prefetch(!is_native_ptr, !is_store,  is_static);
+  case vmIntrinsics::_prefetchWriteStatic:      return inline_unsafe_prefetch(!is_native_ptr,  is_store,  is_static);
+
+  case vmIntrinsics::_compareAndSwapObject:     return inline_unsafe_load_store(T_OBJECT, LS_cmpxchg);
+  case vmIntrinsics::_compareAndSwapInt:        return inline_unsafe_load_store(T_INT,    LS_cmpxchg);
+  case vmIntrinsics::_compareAndSwapLong:       return inline_unsafe_load_store(T_LONG,   LS_cmpxchg);
+
+  case vmIntrinsics::_putOrderedObject:         return inline_unsafe_ordered_store(T_OBJECT);
+  case vmIntrinsics::_putOrderedInt:            return inline_unsafe_ordered_store(T_INT);
+  case vmIntrinsics::_putOrderedLong:           return inline_unsafe_ordered_store(T_LONG);
+
+  case vmIntrinsics::_getAndAddInt:             return inline_unsafe_load_store(T_INT,    LS_xadd);
+  case vmIntrinsics::_getAndAddLong:            return inline_unsafe_load_store(T_LONG,   LS_xadd);
+  case vmIntrinsics::_getAndSetInt:             return inline_unsafe_load_store(T_INT,    LS_xchg);
+  case vmIntrinsics::_getAndSetLong:            return inline_unsafe_load_store(T_LONG,   LS_xchg);
+  case vmIntrinsics::_getAndSetObject:          return inline_unsafe_load_store(T_OBJECT, LS_xchg);
+
+  case vmIntrinsics::_loadFence:
+  case vmIntrinsics::_storeFence:
+  case vmIntrinsics::_fullFence:                return inline_unsafe_fence(intrinsic_id());
+
+  case vmIntrinsics::_currentThread:            return inline_native_currentThread();
+  case vmIntrinsics::_isInterrupted:            return inline_native_isInterrupted();
 
 #ifdef TRACE_HAVE_INTRINSICS
-  case vmIntrinsics::_classID:
-    return inline_native_classID();
-  case vmIntrinsics::_threadID:
-    return inline_native_threadID();
-  case vmIntrinsics::_counterTime:
-    return inline_native_time_funcs(CAST_FROM_FN_PTR(address, TRACE_TIME_METHOD), "counterTime");
+  case vmIntrinsics::_classID:                  return inline_native_classID();
+  case vmIntrinsics::_threadID:                 return inline_native_threadID();
+  case vmIntrinsics::_counterTime:              return inline_native_time_funcs(CAST_FROM_FN_PTR(address, TRACE_TIME_METHOD), "counterTime");
 #endif
-  case vmIntrinsics::_currentTimeMillis:
-    return inline_native_time_funcs(CAST_FROM_FN_PTR(address, os::javaTimeMillis), "currentTimeMillis");
-  case vmIntrinsics::_nanoTime:
-    return inline_native_time_funcs(CAST_FROM_FN_PTR(address, os::javaTimeNanos), "nanoTime");
-  case vmIntrinsics::_allocateInstance:
-    return inline_unsafe_allocate();
-  case vmIntrinsics::_copyMemory:
-    return inline_unsafe_copyMemory();
-  case vmIntrinsics::_newArray:
-    return inline_native_newArray();
-  case vmIntrinsics::_getLength:
-    return inline_native_getLength();
-  case vmIntrinsics::_copyOf:
-    return inline_array_copyOf(false);
-  case vmIntrinsics::_copyOfRange:
-    return inline_array_copyOf(true);
-  case vmIntrinsics::_equalsC:
-    return inline_array_equals();
-  case vmIntrinsics::_clone:
-    return inline_native_clone(intrinsic()->is_virtual());
-
-  case vmIntrinsics::_isAssignableFrom:
-    return inline_native_subtype_check();
+  case vmIntrinsics::_currentTimeMillis:        return inline_native_time_funcs(CAST_FROM_FN_PTR(address, os::javaTimeMillis), "currentTimeMillis");
+  case vmIntrinsics::_nanoTime:                 return inline_native_time_funcs(CAST_FROM_FN_PTR(address, os::javaTimeNanos), "nanoTime");
+  case vmIntrinsics::_allocateInstance:         return inline_unsafe_allocate();
+  case vmIntrinsics::_copyMemory:               return inline_unsafe_copyMemory();
+  case vmIntrinsics::_newArray:                 return inline_native_newArray();
+  case vmIntrinsics::_getLength:                return inline_native_getLength();
+  case vmIntrinsics::_copyOf:                   return inline_array_copyOf(false);
+  case vmIntrinsics::_copyOfRange:              return inline_array_copyOf(true);
+  case vmIntrinsics::_equalsC:                  return inline_array_equals();
+  case vmIntrinsics::_clone:                    return inline_native_clone(intrinsic()->is_virtual());
+
+  case vmIntrinsics::_isAssignableFrom:         return inline_native_subtype_check();
 
   case vmIntrinsics::_isInstance:
   case vmIntrinsics::_getModifiers:
@@ -732,40 +773,39 @@
   case vmIntrinsics::_isPrimitive:
   case vmIntrinsics::_getSuperclass:
   case vmIntrinsics::_getComponentType:
-  case vmIntrinsics::_getClassAccessFlags:
-    return inline_native_Class_query(intrinsic_id());
+  case vmIntrinsics::_getClassAccessFlags:      return inline_native_Class_query(intrinsic_id());
 
   case vmIntrinsics::_floatToRawIntBits:
   case vmIntrinsics::_floatToIntBits:
   case vmIntrinsics::_intBitsToFloat:
   case vmIntrinsics::_doubleToRawLongBits:
   case vmIntrinsics::_doubleToLongBits:
-  case vmIntrinsics::_longBitsToDouble:
-    return inline_fp_conversions(intrinsic_id());
+  case vmIntrinsics::_longBitsToDouble:         return inline_fp_conversions(intrinsic_id());
 
   case vmIntrinsics::_numberOfLeadingZeros_i:
   case vmIntrinsics::_numberOfLeadingZeros_l:
-    return inline_numberOfLeadingZeros(intrinsic_id());
-
   case vmIntrinsics::_numberOfTrailingZeros_i:
   case vmIntrinsics::_numberOfTrailingZeros_l:
-    return inline_numberOfTrailingZeros(intrinsic_id());
-
   case vmIntrinsics::_bitCount_i:
   case vmIntrinsics::_bitCount_l:
-    return inline_bitCount(intrinsic_id());
-
   case vmIntrinsics::_reverseBytes_i:
   case vmIntrinsics::_reverseBytes_l:
   case vmIntrinsics::_reverseBytes_s:
-  case vmIntrinsics::_reverseBytes_c:
-    return inline_reverseBytes((vmIntrinsics::ID) intrinsic_id());
-
-  case vmIntrinsics::_getCallerClass:
-    return inline_native_Reflection_getCallerClass();
-
-  case vmIntrinsics::_Reference_get:
-    return inline_reference_get();
+  case vmIntrinsics::_reverseBytes_c:           return inline_number_methods(intrinsic_id());
+
+  case vmIntrinsics::_getCallerClass:           return inline_native_Reflection_getCallerClass();
+
+  case vmIntrinsics::_Reference_get:            return inline_reference_get();
+
+  case vmIntrinsics::_aescrypt_encryptBlock:
+  case vmIntrinsics::_aescrypt_decryptBlock:    return inline_aescrypt_Block(intrinsic_id());
+
+  case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
+  case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
+    return inline_cipherBlockChaining_AESCrypt(intrinsic_id());
+
+  case vmIntrinsics::_encodeISOArray:
+    return inline_encodeISOArray();
 
   default:
     // If you get here, it may be that someone has added a new intrinsic
@@ -780,13 +820,43 @@
   }
 }
 
-//------------------------------push_result------------------------------
+Node* LibraryCallKit::try_to_predicate() {
+  if (!jvms()->has_method()) {
+    // Root JVMState has a null method.
+    assert(map()->memory()->Opcode() == Op_Parm, "");
+    // Insert the memory aliasing node
+    set_all_memory(reset_memory());
+  }
+  assert(merged_memory(), "");
+
+  switch (intrinsic_id()) {
+  case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
+    return inline_cipherBlockChaining_AESCrypt_predicate(false);
+  case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
+    return inline_cipherBlockChaining_AESCrypt_predicate(true);
+
+  default:
+    // If you get here, it may be that someone has added a new intrinsic
+    // to the list in vmSymbols.hpp without implementing it here.
+#ifndef PRODUCT
+    if ((PrintMiscellaneous && (Verbose || WizardMode)) || PrintOpto) {
+      tty->print_cr("*** Warning: Unimplemented predicate for intrinsic %s(%d)",
+                    vmIntrinsics::name_at(intrinsic_id()), intrinsic_id());
+    }
+#endif
+    Node* slow_ctl = control();
+    set_control(top()); // No fast path instrinsic
+    return slow_ctl;
+  }
+}
+
+//------------------------------set_result-------------------------------
 // Helper function for finishing intrinsics.
-void LibraryCallKit::push_result(RegionNode* region, PhiNode* value) {
+void LibraryCallKit::set_result(RegionNode* region, PhiNode* value) {
   record_for_igvn(region);
   set_control(_gvn.transform(region));
-  BasicType value_type = value->type()->basic_type();
-  push_node(value_type, _gvn.transform(value));
+  set_result( _gvn.transform(value));
+  assert(value->type()->basic_type() == result()->bottom_type()->basic_type(), "sanity");
 }
 
 //------------------------------generate_guard---------------------------
@@ -975,7 +1045,6 @@
 // to Int nodes containing the lenghts of str1 and str2.
 //
 Node* LibraryCallKit::make_string_method_node(int opcode, Node* str1_start, Node* cnt1, Node* str2_start, Node* cnt2) {
-
   Node* result = NULL;
   switch (opcode) {
   case Op_StrIndexOf:
@@ -1002,51 +1071,23 @@
 }
 
 //------------------------------inline_string_compareTo------------------------
+// public int java.lang.String.compareTo(String anotherString);
 bool LibraryCallKit::inline_string_compareTo() {
-
-  if (!Matcher::has_match_rule(Op_StrComp)) return false;
-
-  _sp += 2;
-  Node *argument = pop();  // pop non-receiver first:  it was pushed second
-  Node *receiver = pop();
-
-  // Null check on self without removing any arguments.  The argument
-  // null check technically happens in the wrong place, which can lead to
-  // invalid stack traces when string compare is inlined into a method
-  // which handles NullPointerExceptions.
-  _sp += 2;
-  receiver = do_null_check(receiver, T_OBJECT);
-  argument = do_null_check(argument, T_OBJECT);
-  _sp -= 2;
+  Node* receiver = null_check(argument(0));
+  Node* arg      = null_check(argument(1));
   if (stopped()) {
     return true;
   }
-
-  Node* compare = make_string_method_node(Op_StrComp, receiver, argument);
-  push(compare);
+  set_result(make_string_method_node(Op_StrComp, receiver, arg));
   return true;
 }
 
 //------------------------------inline_string_equals------------------------
 bool LibraryCallKit::inline_string_equals() {
-
-  if (!Matcher::has_match_rule(Op_StrEquals)) return false;
-
-  int nargs = 2;
-  _sp += nargs;
-  Node* argument = pop();  // pop non-receiver first:  it was pushed second
-  Node* receiver = pop();
-
-  // Null check on self without removing any arguments.  The argument
-  // null check technically happens in the wrong place, which can lead to
-  // invalid stack traces when string compare is inlined into a method
-  // which handles NullPointerExceptions.
-  _sp += nargs;
-  receiver = do_null_check(receiver, T_OBJECT);
-  //should not do null check for argument for String.equals(), because spec
-  //allows to specify NULL as argument.
-  _sp -= nargs;
-
+  Node* receiver = null_check_receiver();
+  // NOTE: Do not null check argument for String.equals() because spec
+  // allows to specify NULL as argument.
+  Node* argument = this->argument(1);
   if (stopped()) {
     return true;
   }
@@ -1070,9 +1111,7 @@
   ciInstanceKlass* klass = env()->String_klass();
 
   if (!stopped()) {
-    _sp += nargs;          // gen_instanceof might do an uncommon trap
     Node* inst = gen_instanceof(argument, makecon(TypeKlassPtr::make(klass)));
-    _sp -= nargs;
     Node* cmp  = _gvn.transform(new (C) CmpINode(inst, intcon(1)));
     Node* bol  = _gvn.transform(new (C) BoolNode(cmp, BoolTest::ne));
 
@@ -1104,7 +1143,7 @@
     Node* receiver_cnt  = load_String_length(no_ctrl, receiver);
 
     // Get start addr of argument
-    Node* argument_val   = load_String_value(no_ctrl, argument);
+    Node* argument_val    = load_String_value(no_ctrl, argument);
     Node* argument_offset = load_String_offset(no_ctrl, argument);
     Node* argument_start = array_element_address(argument_val, argument_offset, T_CHAR);
 
@@ -1133,24 +1172,15 @@
   set_control(_gvn.transform(region));
   record_for_igvn(region);
 
-  push(_gvn.transform(phi));
-
+  set_result(_gvn.transform(phi));
   return true;
 }
 
 //------------------------------inline_array_equals----------------------------
 bool LibraryCallKit::inline_array_equals() {
-
-  if (!Matcher::has_match_rule(Op_AryEq)) return false;
-
-  _sp += 2;
-  Node *argument2 = pop();
-  Node *argument1 = pop();
-
-  Node* equals =
-    _gvn.transform(new (C) AryEqNode(control(), memory(TypeAryPtr::CHARS),
-                                        argument1, argument2) );
-  push(equals);
+  Node* arg1 = argument(0);
+  Node* arg2 = argument(1);
+  set_result(_gvn.transform(new (C) AryEqNode(control(), memory(TypeAryPtr::CHARS), arg1, arg2)));
   return true;
 }
 
@@ -1222,7 +1252,7 @@
   float likely   = PROB_LIKELY(0.9);
   float unlikely = PROB_UNLIKELY(0.9);
 
-  const int nargs = 2; // number of arguments to push back for uncommon trap in predicate
+  const int nargs = 0; // no arguments to push back for uncommon trap in predicate
 
   Node* source        = load_String_value(no_ctrl, string_object);
   Node* sourceOffset  = load_String_offset(no_ctrl, string_object);
@@ -1293,10 +1323,8 @@
 
 //------------------------------inline_string_indexOf------------------------
 bool LibraryCallKit::inline_string_indexOf() {
-
-  _sp += 2;
-  Node *argument = pop();  // pop non-receiver first:  it was pushed second
-  Node *receiver = pop();
+  Node* receiver = argument(0);
+  Node* arg      = argument(1);
 
   Node* result;
   // Disable the use of pcmpestri until it can be guaranteed that
@@ -1306,15 +1334,8 @@
     // Generate SSE4.2 version of indexOf
     // We currently only have match rules that use SSE4.2
 
-    // Null check on self without removing any arguments.  The argument
-    // null check technically happens in the wrong place, which can lead to
-    // invalid stack traces when string compare is inlined into a method
-    // which handles NullPointerExceptions.
-    _sp += 2;
-    receiver = do_null_check(receiver, T_OBJECT);
-    argument = do_null_check(argument, T_OBJECT);
-    _sp -= 2;
-
+    receiver = null_check(receiver);
+    arg      = null_check(arg);
     if (stopped()) {
       return true;
     }
@@ -1336,12 +1357,12 @@
     Node* source_cnt  = load_String_length(no_ctrl, receiver);
 
     // Get start addr of substring
-    Node* substr = load_String_value(no_ctrl, argument);
-    Node* substr_offset = load_String_offset(no_ctrl, argument);
+    Node* substr = load_String_value(no_ctrl, arg);
+    Node* substr_offset = load_String_offset(no_ctrl, arg);
     Node* substr_start = array_element_address(substr, substr_offset, T_CHAR);
 
     // Get length of source string
-    Node* substr_cnt  = load_String_length(no_ctrl, argument);
+    Node* substr_cnt  = load_String_length(no_ctrl, arg);
 
     // Check for substr count > string count
     Node* cmp = _gvn.transform( new(C) CmpINode(substr_cnt, source_cnt) );
@@ -1374,10 +1395,10 @@
 
   } else { // Use LibraryCallKit::string_indexOf
     // don't intrinsify if argument isn't a constant string.
-    if (!argument->is_Con()) {
+    if (!arg->is_Con()) {
      return false;
     }
-    const TypeOopPtr* str_type = _gvn.type(argument)->isa_oopptr();
+    const TypeOopPtr* str_type = _gvn.type(arg)->isa_oopptr();
     if (str_type == NULL) {
       return false;
     }
@@ -1408,21 +1429,15 @@
      return false;
     }
 
-    // Null check on self without removing any arguments.  The argument
-    // null check technically happens in the wrong place, which can lead to
-    // invalid stack traces when string compare is inlined into a method
-    // which handles NullPointerExceptions.
-    _sp += 2;
-    receiver = do_null_check(receiver, T_OBJECT);
-    // No null check on the argument is needed since it's a constant String oop.
-    _sp -= 2;
+    receiver = null_check(receiver, T_OBJECT);
+    // NOTE: No null check on the argument is needed since it's a constant String oop.
     if (stopped()) {
       return true;
     }
 
     // The null string as a pattern always returns 0 (match at beginning of string)
     if (c == 0) {
-      push(intcon(0));
+      set_result(intcon(0));
       return true;
     }
 
@@ -1445,47 +1460,54 @@
 
     result = string_indexOf(receiver, pat, o, cache, md2);
   }
-
-  push(result);
+  set_result(result);
   return true;
 }
 
-//--------------------------pop_math_arg--------------------------------
-// Pop a double argument to a math function from the stack
-// rounding it if necessary.
-Node * LibraryCallKit::pop_math_arg() {
-  Node *arg = pop_pair();
-  if( Matcher::strict_fp_requires_explicit_rounding && UseSSE<=1 )
-    arg = _gvn.transform( new (C) RoundDoubleNode(0, arg) );
-  return arg;
+//--------------------------round_double_node--------------------------------
+// Round a double node if necessary.
+Node* LibraryCallKit::round_double_node(Node* n) {
+  if (Matcher::strict_fp_requires_explicit_rounding && UseSSE <= 1)
+    n = _gvn.transform(new (C) RoundDoubleNode(0, n));
+  return n;
+}
+
+//------------------------------inline_math-----------------------------------
+// public static double Math.abs(double)
+// public static double Math.sqrt(double)
+// public static double Math.log(double)
+// public static double Math.log10(double)
+bool LibraryCallKit::inline_math(vmIntrinsics::ID id) {
+  Node* arg = round_double_node(argument(0));
+  Node* n;
+  switch (id) {
+  case vmIntrinsics::_dabs:   n = new (C) AbsDNode(    arg);  break;
+  case vmIntrinsics::_dsqrt:  n = new (C) SqrtDNode(0, arg);  break;
+  case vmIntrinsics::_dlog:   n = new (C) LogDNode(    arg);  break;
+  case vmIntrinsics::_dlog10: n = new (C) Log10DNode(  arg);  break;
+  default:  fatal_unexpected_iid(id);  break;
+  }
+  set_result(_gvn.transform(n));
+  return true;
 }
 
 //------------------------------inline_trig----------------------------------
 // Inline sin/cos/tan instructions, if possible.  If rounding is required, do
 // argument reduction which will turn into a fast/slow diamond.
 bool LibraryCallKit::inline_trig(vmIntrinsics::ID id) {
-  _sp += arg_size();            // restore stack pointer
-  Node* arg = pop_math_arg();
-  Node* trig = NULL;
+  Node* arg = round_double_node(argument(0));
+  Node* n = NULL;
 
   switch (id) {
-  case vmIntrinsics::_dsin:
-    trig = _gvn.transform((Node*)new (C) SinDNode(arg));
-    break;
-  case vmIntrinsics::_dcos:
-    trig = _gvn.transform((Node*)new (C) CosDNode(arg));
-    break;
-  case vmIntrinsics::_dtan:
-    trig = _gvn.transform((Node*)new (C) TanDNode(arg));
-    break;
-  default:
-    assert(false, "bad intrinsic was passed in");
-    return false;
+  case vmIntrinsics::_dsin:  n = new (C) SinDNode(arg);  break;
+  case vmIntrinsics::_dcos:  n = new (C) CosDNode(arg);  break;
+  case vmIntrinsics::_dtan:  n = new (C) TanDNode(arg);  break;
+  default:  fatal_unexpected_iid(id);  break;
   }
+  n = _gvn.transform(n);
 
   // Rounding required?  Check for argument reduction!
-  if( Matcher::strict_fp_requires_explicit_rounding ) {
-
+  if (Matcher::strict_fp_requires_explicit_rounding) {
     static const double     pi_4 =  0.7853981633974483;
     static const double neg_pi_4 = -0.7853981633974483;
     // pi/2 in 80-bit extended precision
@@ -1520,8 +1542,8 @@
     // probably do the math inside the SIN encoding.
 
     // Make the merge point
-    RegionNode *r = new (C) RegionNode(3);
-    Node *phi = new (C) PhiNode(r,Type::DOUBLE);
+    RegionNode* r = new (C) RegionNode(3);
+    Node* phi = new (C) PhiNode(r, Type::DOUBLE);
 
     // Flatten arg so we need only 1 test
     Node *abs = _gvn.transform(new (C) AbsDNode(arg));
@@ -1536,7 +1558,7 @@
     set_control(opt_iff(r,iff));
 
     // Set fast path result
-    phi->init_req(2,trig);
+    phi->init_req(2, n);
 
     // Slow path - non-blocking leaf call
     Node* call = NULL;
@@ -1558,37 +1580,18 @@
       break;
     }
     assert(control()->in(0) == call, "");
-    Node* slow_result = _gvn.transform(new (C) ProjNode(call,TypeFunc::Parms));
-    r->init_req(1,control());
-    phi->init_req(1,slow_result);
+    Node* slow_result = _gvn.transform(new (C) ProjNode(call, TypeFunc::Parms));
+    r->init_req(1, control());
+    phi->init_req(1, slow_result);
 
     // Post-merge
     set_control(_gvn.transform(r));
     record_for_igvn(r);
-    trig = _gvn.transform(phi);
+    n = _gvn.transform(phi);
 
     C->set_has_split_ifs(true); // Has chance for split-if optimization
   }
-  // Push result back on JVM stack
-  push_pair(trig);
-  return true;
-}
-
-//------------------------------inline_sqrt-------------------------------------
-// Inline square root instruction, if possible.
-bool LibraryCallKit::inline_sqrt(vmIntrinsics::ID id) {
-  assert(id == vmIntrinsics::_dsqrt, "Not square root");
-  _sp += arg_size();        // restore stack pointer
-  push_pair(_gvn.transform(new (C) SqrtDNode(0, pop_math_arg())));
-  return true;
-}
-
-//------------------------------inline_abs-------------------------------------
-// Inline absolute value instruction, if possible.
-bool LibraryCallKit::inline_abs(vmIntrinsics::ID id) {
-  assert(id == vmIntrinsics::_dabs, "Not absolute value");
-  _sp += arg_size();        // restore stack pointer
-  push_pair(_gvn.transform(new (C) AbsDNode(pop_math_arg())));
+  set_result(n);
   return true;
 }
 
@@ -1597,24 +1600,18 @@
   //result=(result.isNaN())? funcAddr():result;
   // Check: If isNaN() by checking result!=result? then either trap
   // or go to runtime
-  Node* cmpisnan = _gvn.transform(new (C) CmpDNode(result,result));
+  Node* cmpisnan = _gvn.transform(new (C) CmpDNode(result, result));
   // Build the boolean node
-  Node* bolisnum = _gvn.transform( new (C) BoolNode(cmpisnan, BoolTest::eq) );
+  Node* bolisnum = _gvn.transform(new (C) BoolNode(cmpisnan, BoolTest::eq));
 
   if (!too_many_traps(Deoptimization::Reason_intrinsic)) {
-    {
-      BuildCutout unless(this, bolisnum, PROB_STATIC_FREQUENT);
-      // End the current control-flow path
-      push_pair(x);
-      if (y != NULL) {
-        push_pair(y);
-      }
+    { BuildCutout unless(this, bolisnum, PROB_STATIC_FREQUENT);
       // The pow or exp intrinsic returned a NaN, which requires a call
       // to the runtime.  Recompile with the runtime call.
       uncommon_trap(Deoptimization::Reason_intrinsic,
                     Deoptimization::Action_make_not_entrant);
     }
-    push_pair(result);
+    set_result(result);
   } else {
     // If this inlining ever returned NaN in the past, we compile a call
     // to the runtime to properly handle corner cases
@@ -1624,7 +1621,7 @@
     Node* if_fast = _gvn.transform( new (C) IfTrueNode(iff) );
 
     if (!if_slow->is_top()) {
-      RegionNode* result_region = new(C) RegionNode(3);
+      RegionNode* result_region = new (C) RegionNode(3);
       PhiNode*    result_val = new (C) PhiNode(result_region, Type::DOUBLE);
 
       result_region->init_req(1, if_fast);
@@ -1644,9 +1641,9 @@
 
       result_region->init_req(2, control());
       result_val->init_req(2, value);
-      push_result(result_region, result_val);
+      set_result(result_region, result_val);
     } else {
-      push_pair(result);
+      set_result(result);
     }
   }
 }
@@ -1654,25 +1651,19 @@
 //------------------------------inline_exp-------------------------------------
 // Inline exp instructions, if possible.  The Intel hardware only misses
 // really odd corner cases (+/- Infinity).  Just uncommon-trap them.
-bool LibraryCallKit::inline_exp(vmIntrinsics::ID id) {
-  assert(id == vmIntrinsics::_dexp, "Not exp");
-
-  _sp += arg_size();        // restore stack pointer
-  Node *x = pop_math_arg();
-  Node *result = _gvn.transform(new (C) ExpDNode(0,x));
-
-  finish_pow_exp(result, x, NULL, OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP");
+bool LibraryCallKit::inline_exp() {
+  Node* arg = round_double_node(argument(0));
+  Node* n   = _gvn.transform(new (C) ExpDNode(0, arg));
+
+  finish_pow_exp(n, arg, NULL, OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP");
 
   C->set_has_split_ifs(true); // Has chance for split-if optimization
-
   return true;
 }
 
 //------------------------------inline_pow-------------------------------------
 // Inline power instructions, if possible.
-bool LibraryCallKit::inline_pow(vmIntrinsics::ID id) {
-  assert(id == vmIntrinsics::_dpow, "Not pow");
-
+bool LibraryCallKit::inline_pow() {
   // Pseudocode for pow
   // if (x <= 0.0) {
   //   long longy = (long)y;
@@ -1690,15 +1681,14 @@
   // }
   // return result;
 
-  _sp += arg_size();        // restore stack pointer
-  Node* y = pop_math_arg();
-  Node* x = pop_math_arg();
+  Node* x = round_double_node(argument(0));
+  Node* y = round_double_node(argument(2));
 
   Node* result = NULL;
 
   if (!too_many_traps(Deoptimization::Reason_intrinsic)) {
     // Short form: skip the fancy tests and just check for NaN result.
-    result = _gvn.transform( new (C) PowDNode(0, x, y) );
+    result = _gvn.transform(new (C) PowDNode(0, x, y));
   } else {
     // If this inlining ever returned NaN in the past, include all
     // checks + call to the runtime.
@@ -1816,55 +1806,23 @@
     // Post merge
     set_control(_gvn.transform(r));
     record_for_igvn(r);
-    result=_gvn.transform(phi);
+    result = _gvn.transform(phi);
   }
 
   finish_pow_exp(result, x, y, OptoRuntime::Math_DD_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dpow), "POW");
 
   C->set_has_split_ifs(true); // Has chance for split-if optimization
-
-  return true;
-}
-
-//------------------------------inline_trans-------------------------------------
-// Inline transcendental instructions, if possible.  The Intel hardware gets
-// these right, no funny corner cases missed.
-bool LibraryCallKit::inline_trans(vmIntrinsics::ID id) {
-  _sp += arg_size();        // restore stack pointer
-  Node* arg = pop_math_arg();
-  Node* trans = NULL;
-
-  switch (id) {
-  case vmIntrinsics::_dlog:
-    trans = _gvn.transform((Node*)new (C) LogDNode(arg));
-    break;
-  case vmIntrinsics::_dlog10:
-    trans = _gvn.transform((Node*)new (C) Log10DNode(arg));
-    break;
-  default:
-    assert(false, "bad intrinsic was passed in");
-    return false;
-  }
-
-  // Push result back on JVM stack
-  push_pair(trans);
   return true;
 }
 
 //------------------------------runtime_math-----------------------------
 bool LibraryCallKit::runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName) {
-  Node* a = NULL;
-  Node* b = NULL;
-
   assert(call_type == OptoRuntime::Math_DD_D_Type() || call_type == OptoRuntime::Math_D_D_Type(),
          "must be (DD)D or (D)D type");
 
   // Inputs
-  _sp += arg_size();        // restore stack pointer
-  if (call_type == OptoRuntime::Math_DD_D_Type()) {
-    b = pop_math_arg();
-  }
-  a = pop_math_arg();
+  Node* a = round_double_node(argument(0));
+  Node* b = (call_type == OptoRuntime::Math_DD_D_Type()) ? round_double_node(argument(2)) : NULL;
 
   const TypePtr* no_memory_effects = NULL;
   Node* trig = make_runtime_call(RC_LEAF, call_type, funcAddr, funcName,
@@ -1876,43 +1834,43 @@
   assert(value_top == top(), "second value must be top");
 #endif
 
-  push_pair(value);
+  set_result(value);
   return true;
 }
 
 //------------------------------inline_math_native-----------------------------
 bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) {
+#define FN_PTR(f) CAST_FROM_FN_PTR(address, f)
   switch (id) {
     // These intrinsics are not properly supported on all hardware
-  case vmIntrinsics::_dcos: return Matcher::has_match_rule(Op_CosD) ? inline_trig(id) :
-    runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dcos), "COS");
-  case vmIntrinsics::_dsin: return Matcher::has_match_rule(Op_SinD) ? inline_trig(id) :
-    runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dsin), "SIN");
-  case vmIntrinsics::_dtan: return Matcher::has_match_rule(Op_TanD) ? inline_trig(id) :
-    runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dtan), "TAN");
-
-  case vmIntrinsics::_dlog:   return Matcher::has_match_rule(Op_LogD) ? inline_trans(id) :
-    runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dlog), "LOG");
-  case vmIntrinsics::_dlog10: return Matcher::has_match_rule(Op_Log10D) ? inline_trans(id) :
-    runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), "LOG10");
+  case vmIntrinsics::_dcos:   return Matcher::has_match_rule(Op_CosD)   ? inline_trig(id) :
+    runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dcos),   "COS");
+  case vmIntrinsics::_dsin:   return Matcher::has_match_rule(Op_SinD)   ? inline_trig(id) :
+    runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dsin),   "SIN");
+  case vmIntrinsics::_dtan:   return Matcher::has_match_rule(Op_TanD)   ? inline_trig(id) :
+    runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dtan),   "TAN");
+
+  case vmIntrinsics::_dlog:   return Matcher::has_match_rule(Op_LogD)   ? inline_math(id) :
+    runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog),   "LOG");
+  case vmIntrinsics::_dlog10: return Matcher::has_match_rule(Op_Log10D) ? inline_math(id) :
+    runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog10), "LOG10");
 
     // These intrinsics are supported on all hardware
-  case vmIntrinsics::_dsqrt: return Matcher::has_match_rule(Op_SqrtD) ? inline_sqrt(id) : false;
-  case vmIntrinsics::_dabs:  return Matcher::has_match_rule(Op_AbsD)  ? inline_abs(id)  : false;
-
-  case vmIntrinsics::_dexp:  return
-    Matcher::has_match_rule(Op_ExpD) ? inline_exp(id) :
-    runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP");
-  case vmIntrinsics::_dpow:  return
-    Matcher::has_match_rule(Op_PowD) ? inline_pow(id) :
-    runtime_math(OptoRuntime::Math_DD_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dpow), "POW");
+  case vmIntrinsics::_dsqrt:  return Matcher::has_match_rule(Op_SqrtD)  ? inline_math(id) : false;
+  case vmIntrinsics::_dabs:   return Matcher::has_match_rule(Op_AbsD)   ? inline_math(id) : false;
+
+  case vmIntrinsics::_dexp:   return Matcher::has_match_rule(Op_ExpD)   ? inline_exp()    :
+    runtime_math(OptoRuntime::Math_D_D_Type(),  FN_PTR(SharedRuntime::dexp),  "EXP");
+  case vmIntrinsics::_dpow:   return Matcher::has_match_rule(Op_PowD)   ? inline_pow()    :
+    runtime_math(OptoRuntime::Math_DD_D_Type(), FN_PTR(SharedRuntime::dpow),  "POW");
+#undef FN_PTR
 
    // These intrinsics are not yet correctly implemented
   case vmIntrinsics::_datan2:
     return false;
 
   default:
-    ShouldNotReachHere();
+    fatal_unexpected_iid(id);
     return false;
   }
 }
@@ -1927,8 +1885,7 @@
 
 //----------------------------inline_min_max-----------------------------------
 bool LibraryCallKit::inline_min_max(vmIntrinsics::ID id) {
-  push(generate_min_max(id, argument(0), argument(1)));
-
+  set_result(generate_min_max(id, argument(0), argument(1)));
   return true;
 }
 
@@ -2151,99 +2108,37 @@
   }
 }
 
-//-------------------inline_numberOfLeadingZeros_int/long-----------------------
-// inline int Integer.numberOfLeadingZeros(int)
-// inline int Long.numberOfLeadingZeros(long)
-bool LibraryCallKit::inline_numberOfLeadingZeros(vmIntrinsics::ID id) {
-  assert(id == vmIntrinsics::_numberOfLeadingZeros_i || id == vmIntrinsics::_numberOfLeadingZeros_l, "not numberOfLeadingZeros");
-  if (id == vmIntrinsics::_numberOfLeadingZeros_i && !Matcher::match_rule_supported(Op_CountLeadingZerosI)) return false;
-  if (id == vmIntrinsics::_numberOfLeadingZeros_l && !Matcher::match_rule_supported(Op_CountLeadingZerosL)) return false;
-  _sp += arg_size();  // restore stack pointer
+//--------------------------inline_number_methods-----------------------------
+// inline int     Integer.numberOfLeadingZeros(int)
+// inline int        Long.numberOfLeadingZeros(long)
+//
+// inline int     Integer.numberOfTrailingZeros(int)
+// inline int        Long.numberOfTrailingZeros(long)
+//
+// inline int     Integer.bitCount(int)
+// inline int        Long.bitCount(long)
+//
+// inline char  Character.reverseBytes(char)
+// inline short     Short.reverseBytes(short)
+// inline int     Integer.reverseBytes(int)
+// inline long       Long.reverseBytes(long)
+bool LibraryCallKit::inline_number_methods(vmIntrinsics::ID id) {
+  Node* arg = argument(0);
+  Node* n;
   switch (id) {
-  case vmIntrinsics::_numberOfLeadingZeros_i:
-    push(_gvn.transform(new (C) CountLeadingZerosINode(pop())));
-    break;
-  case vmIntrinsics::_numberOfLeadingZeros_l:
-    push(_gvn.transform(new (C) CountLeadingZerosLNode(pop_pair())));
-    break;
-  default:
-    ShouldNotReachHere();
-  }
-  return true;
-}
-
-//-------------------inline_numberOfTrailingZeros_int/long----------------------
-// inline int Integer.numberOfTrailingZeros(int)
-// inline int Long.numberOfTrailingZeros(long)
-bool LibraryCallKit::inline_numberOfTrailingZeros(vmIntrinsics::ID id) {
-  assert(id == vmIntrinsics::_numberOfTrailingZeros_i || id == vmIntrinsics::_numberOfTrailingZeros_l, "not numberOfTrailingZeros");
-  if (id == vmIntrinsics::_numberOfTrailingZeros_i && !Matcher::match_rule_supported(Op_CountTrailingZerosI)) return false;
-  if (id == vmIntrinsics::_numberOfTrailingZeros_l && !Matcher::match_rule_supported(Op_CountTrailingZerosL)) return false;
-  _sp += arg_size();  // restore stack pointer
-  switch (id) {
-  case vmIntrinsics::_numberOfTrailingZeros_i:
-    push(_gvn.transform(new (C) CountTrailingZerosINode(pop())));
-    break;
-  case vmIntrinsics::_numberOfTrailingZeros_l:
-    push(_gvn.transform(new (C) CountTrailingZerosLNode(pop_pair())));
-    break;
-  default:
-    ShouldNotReachHere();
+  case vmIntrinsics::_numberOfLeadingZeros_i:   n = new (C) CountLeadingZerosINode( arg);  break;
+  case vmIntrinsics::_numberOfLeadingZeros_l:   n = new (C) CountLeadingZerosLNode( arg);  break;
+  case vmIntrinsics::_numberOfTrailingZeros_i:  n = new (C) CountTrailingZerosINode(arg);  break;
+  case vmIntrinsics::_numberOfTrailingZeros_l:  n = new (C) CountTrailingZerosLNode(arg);  break;
+  case vmIntrinsics::_bitCount_i:               n = new (C) PopCountINode(          arg);  break;
+  case vmIntrinsics::_bitCount_l:               n = new (C) PopCountLNode(          arg);  break;
+  case vmIntrinsics::_reverseBytes_c:           n = new (C) ReverseBytesUSNode(0,   arg);  break;
+  case vmIntrinsics::_reverseBytes_s:           n = new (C) ReverseBytesSNode( 0,   arg);  break;
+  case vmIntrinsics::_reverseBytes_i:           n = new (C) ReverseBytesINode( 0,   arg);  break;
+  case vmIntrinsics::_reverseBytes_l:           n = new (C) ReverseBytesLNode( 0,   arg);  break;
+  default:  fatal_unexpected_iid(id);  break;
   }
-  return true;
-}
-
-//----------------------------inline_bitCount_int/long-----------------------
-// inline int Integer.bitCount(int)
-// inline int Long.bitCount(long)
-bool LibraryCallKit::inline_bitCount(vmIntrinsics::ID id) {
-  assert(id == vmIntrinsics::_bitCount_i || id == vmIntrinsics::_bitCount_l, "not bitCount");
-  if (id == vmIntrinsics::_bitCount_i && !Matcher::has_match_rule(Op_PopCountI)) return false;
-  if (id == vmIntrinsics::_bitCount_l && !Matcher::has_match_rule(Op_PopCountL)) return false;
-  _sp += arg_size();  // restore stack pointer
-  switch (id) {
-  case vmIntrinsics::_bitCount_i:
-    push(_gvn.transform(new (C) PopCountINode(pop())));
-    break;
-  case vmIntrinsics::_bitCount_l:
-    push(_gvn.transform(new (C) PopCountLNode(pop_pair())));
-    break;
-  default:
-    ShouldNotReachHere();
-  }
-  return true;
-}
-
-//----------------------------inline_reverseBytes_int/long/char/short-------------------
-// inline Integer.reverseBytes(int)
-// inline Long.reverseBytes(long)
-// inline Character.reverseBytes(char)
-// inline Short.reverseBytes(short)
-bool LibraryCallKit::inline_reverseBytes(vmIntrinsics::ID id) {
-  assert(id == vmIntrinsics::_reverseBytes_i || id == vmIntrinsics::_reverseBytes_l ||
-         id == vmIntrinsics::_reverseBytes_c || id == vmIntrinsics::_reverseBytes_s,
-         "not reverse Bytes");
-  if (id == vmIntrinsics::_reverseBytes_i && !Matcher::has_match_rule(Op_ReverseBytesI))  return false;
-  if (id == vmIntrinsics::_reverseBytes_l && !Matcher::has_match_rule(Op_ReverseBytesL))  return false;
-  if (id == vmIntrinsics::_reverseBytes_c && !Matcher::has_match_rule(Op_ReverseBytesUS)) return false;
-  if (id == vmIntrinsics::_reverseBytes_s && !Matcher::has_match_rule(Op_ReverseBytesS))  return false;
-  _sp += arg_size();  // restore stack pointer
-  switch (id) {
-  case vmIntrinsics::_reverseBytes_i:
-    push(_gvn.transform(new (C) ReverseBytesINode(0, pop())));
-    break;
-  case vmIntrinsics::_reverseBytes_l:
-    push_pair(_gvn.transform(new (C) ReverseBytesLNode(0, pop_pair())));
-    break;
-  case vmIntrinsics::_reverseBytes_c:
-    push(_gvn.transform(new (C) ReverseBytesUSNode(0, pop())));
-    break;
-  case vmIntrinsics::_reverseBytes_s:
-    push(_gvn.transform(new (C) ReverseBytesSNode(0, pop())));
-    break;
-  default:
-    ;
-  }
+  set_result(_gvn.transform(n));
   return true;
 }
 
@@ -2253,7 +2148,7 @@
 
 // Helper that guards and inserts a pre-barrier.
 void LibraryCallKit::insert_pre_barrier(Node* base_oop, Node* offset,
-                                        Node* pre_val, int nargs, bool need_mem_bar) {
+                                        Node* pre_val, bool need_mem_bar) {
   // We could be accessing the referent field of a reference object. If so, when G1
   // is enabled, we need to log the value in the referent field in an SATB buffer.
   // This routine performs some compile time filters and generates suitable
@@ -2303,8 +2198,8 @@
   //   }
   // }
 
-  float likely  = PROB_LIKELY(0.999);
-  float unlikely  = PROB_UNLIKELY(0.999);
+  float likely   = PROB_LIKELY(  0.999);
+  float unlikely = PROB_UNLIKELY(0.999);
 
   IdealKit ideal(this);
 #define __ ideal.
@@ -2316,9 +2211,7 @@
       sync_kit(ideal);
 
       Node* ref_klass_con = makecon(TypeKlassPtr::make(env()->Reference_klass()));
-      _sp += nargs;  // gen_instanceof might do an uncommon trap
       Node* is_instof = gen_instanceof(base_oop, ref_klass_con);
-      _sp -= nargs;
 
       // Update IdealKit memory and control from graphKit.
       __ sync_kit(this);
@@ -2402,7 +2295,7 @@
   {
     ResourceMark rm;
     // Check the signatures.
-    ciSignature* sig = signature();
+    ciSignature* sig = callee()->signature();
 #ifdef ASSERT
     if (!is_store) {
       // Object getObject(Object base, int/long offset), etc.
@@ -2440,42 +2333,19 @@
 
   C->set_has_unsafe_access(true);  // Mark eventual nmethod as "unsafe".
 
-  int type_words = type2size[ (type == T_ADDRESS) ? T_LONG : type ];
-
-  // Argument words:  "this" plus (oop/offset) or (lo/hi) args plus maybe 1 or 2 value words
-  int nargs = 1 + (is_native_ptr ? 2 : 3) + (is_store ? type_words : 0);
-  assert(callee()->arg_size() == nargs, "must be");
-
-  debug_only(int saved_sp = _sp);
-  _sp += nargs;
-
-  Node* val;
-  debug_only(val = (Node*)(uintptr_t)-1);
-
-
-  if (is_store) {
-    // Get the value being stored.  (Pop it first; it was pushed last.)
-    switch (type) {
-    case T_DOUBLE:
-    case T_LONG:
-    case T_ADDRESS:
-      val = pop_pair();
-      break;
-    default:
-      val = pop();
-    }
-  }
+  Node* receiver = argument(0);  // type: oop
 
   // Build address expression.  See the code in inline_unsafe_prefetch.
-  Node *adr;
-  Node *heap_base_oop = top();
+  Node* adr;
+  Node* heap_base_oop = top();
   Node* offset = top();
+  Node* val;
 
   if (!is_native_ptr) {
+    // The base is either a Java object or a value produced by Unsafe.staticFieldBase
+    Node* base = argument(1);  // type: oop
     // The offset is a value produced by Unsafe.staticFieldOffset or Unsafe.objectFieldOffset
-    offset = pop_pair();
-    // The base is either a Java object or a value produced by Unsafe.staticFieldBase
-    Node* base   = pop();
+    offset = argument(2);  // type: long
     // We currently rely on the cookies produced by Unsafe.xxxFieldOffset
     // to be plain byte offsets, which are also the same as those accepted
     // by oopDesc::field_base.
@@ -2485,18 +2355,14 @@
     offset = ConvL2X(offset);
     adr = make_unsafe_address(base, offset);
     heap_base_oop = base;
+    val = is_store ? argument(4) : NULL;
   } else {
-    Node* ptr = pop_pair();
-    // Adjust Java long to machine word:
-    ptr = ConvL2X(ptr);
+    Node* ptr = argument(1);  // type: long
+    ptr = ConvL2X(ptr);  // adjust Java long to machine word
     adr = make_unsafe_address(NULL, ptr);
+    val = is_store ? argument(3) : NULL;
   }
 
-  // Pop receiver last:  it was pushed first.
-  Node *receiver = pop();
-
-  assert(saved_sp == _sp, "must have correct argument count");
-
   const TypePtr *adr_type = _gvn.type(adr)->isa_ptr();
 
   // First guess at the value type.
@@ -2530,13 +2396,7 @@
     }
   }
 
-  // Null check on self without removing any arguments.  The argument
-  // null check technically happens in the wrong place, which can lead to
-  // invalid stack traces when the primitive is inlined into a method
-  // which handles NullPointerExceptions.
-  _sp += nargs;
-  do_null_check(receiver, T_OBJECT);
-  _sp -= nargs;
+  receiver = null_check(receiver);
   if (stopped()) {
     return true;
   }
@@ -2568,34 +2428,36 @@
 
   if (!is_store) {
     Node* p = make_load(control(), adr, value_type, type, adr_type, is_volatile);
-    // load value and push onto stack
+    // load value
     switch (type) {
     case T_BOOLEAN:
     case T_CHAR:
     case T_BYTE:
     case T_SHORT:
     case T_INT:
+    case T_LONG:
     case T_FLOAT:
-      push(p);
+    case T_DOUBLE:
       break;
     case T_OBJECT:
       if (need_read_barrier) {
-        insert_pre_barrier(heap_base_oop, offset, p, nargs, !(is_volatile || need_mem_bar));
+        insert_pre_barrier(heap_base_oop, offset, p, !(is_volatile || need_mem_bar));
       }
-      push(p);
       break;
     case T_ADDRESS:
       // Cast to an int type.
-      p = _gvn.transform( new (C) CastP2XNode(NULL,p) );
+      p = _gvn.transform(new (C) CastP2XNode(NULL, p));
       p = ConvX2L(p);
-      push_pair(p);
+      break;
+    default:
+      fatal(err_msg_res("unexpected type %d: %s", type, type2name(type)));
       break;
-    case T_DOUBLE:
-    case T_LONG:
-      push_pair( p );
-      break;
-    default: ShouldNotReachHere();
     }
+    // The load node has the control of the preceding MemBarCPUOrder.  All
+    // following nodes will have the control of the MemBarCPUOrder inserted at
+    // the end of this method.  So, pushing the load onto the stack at a later
+    // point is fine.
+    set_result(p);
   } else {
     // place effect of store into memory
     switch (type) {
@@ -2659,7 +2521,7 @@
   {
     ResourceMark rm;
     // Check the signatures.
-    ciSignature* sig = signature();
+    ciSignature* sig = callee()->signature();
 #ifdef ASSERT
     // Object getObject(Object base, int/long offset), etc.
     BasicType rtype = sig->return_type()->basic_type();
@@ -2677,19 +2539,21 @@
 
   C->set_has_unsafe_access(true);  // Mark eventual nmethod as "unsafe".
 
-  // Argument words:  "this" if not static, plus (oop/offset) or (lo/hi) args
-  int nargs = (is_static ? 0 : 1) + (is_native_ptr ? 2 : 3);
-
-  debug_only(int saved_sp = _sp);
-  _sp += nargs;
+  const int idx = is_static ? 0 : 1;
+  if (!is_static) {
+    null_check_receiver();
+    if (stopped()) {
+      return true;
+    }
+  }
 
   // Build address expression.  See the code in inline_unsafe_access.
   Node *adr;
   if (!is_native_ptr) {
+    // The base is either a Java object or a value produced by Unsafe.staticFieldBase
+    Node* base   = argument(idx + 0);  // type: oop
     // The offset is a value produced by Unsafe.staticFieldOffset or Unsafe.objectFieldOffset
-    Node* offset = pop_pair();
-    // The base is either a Java object or a value produced by Unsafe.staticFieldBase
-    Node* base   = pop();
+    Node* offset = argument(idx + 1);  // type: long
     // We currently rely on the cookies produced by Unsafe.xxxFieldOffset
     // to be plain byte offsets, which are also the same as those accepted
     // by oopDesc::field_base.
@@ -2699,31 +2563,11 @@
     offset = ConvL2X(offset);
     adr = make_unsafe_address(base, offset);
   } else {
-    Node* ptr = pop_pair();
-    // Adjust Java long to machine word:
-    ptr = ConvL2X(ptr);
+    Node* ptr = argument(idx + 0);  // type: long
+    ptr = ConvL2X(ptr);  // adjust Java long to machine word
     adr = make_unsafe_address(NULL, ptr);
   }
 
-  if (is_static) {
-    assert(saved_sp == _sp, "must have correct argument count");
-  } else {
-    // Pop receiver last:  it was pushed first.
-    Node *receiver = pop();
-    assert(saved_sp == _sp, "must have correct argument count");
-
-    // Null check on self without removing any arguments.  The argument
-    // null check technically happens in the wrong place, which can lead to
-    // invalid stack traces when the primitive is inlined into a method
-    // which handles NullPointerExceptions.
-    _sp += nargs;
-    do_null_check(receiver, T_OBJECT);
-    _sp -= nargs;
-    if (stopped()) {
-      return true;
-    }
-  }
-
   // Generate the read or write prefetch
   Node *prefetch;
   if (is_store) {
@@ -2738,7 +2582,22 @@
 }
 
 //----------------------------inline_unsafe_load_store----------------------------
-
+// This method serves a couple of different customers (depending on LoadStoreKind):
+//
+// LS_cmpxchg:
+//   public final native boolean compareAndSwapObject(Object o, long offset, Object expected, Object x);
+//   public final native boolean compareAndSwapInt(   Object o, long offset, int    expected, int    x);
+//   public final native boolean compareAndSwapLong(  Object o, long offset, long   expected, long   x);
+//
+// LS_xadd:
+//   public int  getAndAddInt( Object o, long offset, int  delta)
+//   public long getAndAddLong(Object o, long offset, long delta)
+//
+// LS_xchg:
+//   int    getAndSet(Object o, long offset, int    newValue)
+//   long   getAndSet(Object o, long offset, long   newValue)
+//   Object getAndSet(Object o, long offset, Object newValue)
+//
 bool LibraryCallKit::inline_unsafe_load_store(BasicType type, LoadStoreKind kind) {
   // This basic scheme here is the same as inline_unsafe_access, but
   // differs in enough details that combining them would make the code
@@ -2753,7 +2612,8 @@
   BasicType rtype;
   {
     ResourceMark rm;
-    ciSignature* sig = signature();
+    // Check the signatures.
+    ciSignature* sig = callee()->signature();
     rtype = sig->return_type()->basic_type();
     if (kind == LS_xadd || kind == LS_xchg) {
       // Check the signatures.
@@ -2778,28 +2638,31 @@
   }
 #endif //PRODUCT
 
-  // number of stack slots per value argument (1 or 2)
-  int type_words = type2size[type];
-
   C->set_has_unsafe_access(true);  // Mark eventual nmethod as "unsafe".
 
-  // Argument words:  "this" plus oop plus offset (plus oldvalue) plus newvalue/delta;
-  int nargs = 1 + 1 + 2  + ((kind == LS_cmpxchg) ? type_words : 0) + type_words;
-
-  // pop arguments: newval, offset, base, and receiver
-  debug_only(int saved_sp = _sp);
-  _sp += nargs;
-  Node* newval   = (type_words == 1) ? pop() : pop_pair();
-  Node* oldval   = (kind == LS_cmpxchg) ? ((type_words == 1) ? pop() : pop_pair()) : NULL;
-  Node *offset   = pop_pair();
-  Node *base     = pop();
-  Node *receiver = pop();
-  assert(saved_sp == _sp, "must have correct argument count");
-
-  //  Null check receiver.
-  _sp += nargs;
-  do_null_check(receiver, T_OBJECT);
-  _sp -= nargs;
+  // Get arguments:
+  Node* receiver = NULL;
+  Node* base     = NULL;
+  Node* offset   = NULL;
+  Node* oldval   = NULL;
+  Node* newval   = NULL;
+  if (kind == LS_cmpxchg) {
+    const bool two_slot_type = type2size[type] == 2;
+    receiver = argument(0);  // type: oop
+    base     = argument(1);  // type: oop
+    offset   = argument(2);  // type: long
+    oldval   = argument(4);  // type: oop, int, or long
+    newval   = argument(two_slot_type ? 6 : 5);  // type: oop, int, or long
+  } else if (kind == LS_xadd || kind == LS_xchg){
+    receiver = argument(0);  // type: oop
+    base     = argument(1);  // type: oop
+    offset   = argument(2);  // type: long
+    oldval   = NULL;
+    newval   = argument(4);  // type: oop, int, or long
+  }
+
+  // Null check receiver.
+  receiver = null_check(receiver);
   if (stopped()) {
     return true;
   }
@@ -2905,7 +2768,7 @@
     post_barrier(control(), load_store, base, adr, alias_idx, newval, T_OBJECT, true);
     break;
   default:
-    ShouldNotReachHere();
+    fatal(err_msg_res("unexpected type %d: %s", type, type2name(type)));
     break;
   }
 
@@ -2926,10 +2789,14 @@
 #endif
 
   assert(type2size[load_store->bottom_type()->basic_type()] == type2size[rtype], "result type should match");
-  push_node(load_store->bottom_type()->basic_type(), load_store);
+  set_result(load_store);
   return true;
 }
 
+//----------------------------inline_unsafe_ordered_store----------------------
+// public native void sun.misc.Unsafe.putOrderedObject(Object o, long offset, Object x);
+// public native void sun.misc.Unsafe.putOrderedInt(Object o, long offset, int x);
+// public native void sun.misc.Unsafe.putOrderedLong(Object o, long offset, long x);
 bool LibraryCallKit::inline_unsafe_ordered_store(BasicType type) {
   // This is another variant of inline_unsafe_access, differing in
   // that it always issues store-store ("release") barrier and ensures
@@ -2941,7 +2808,7 @@
   {
     ResourceMark rm;
     // Check the signatures.
-    ciSignature* sig = signature();
+    ciSignature* sig = callee()->signature();
 #ifdef ASSERT
     BasicType rtype = sig->return_type()->basic_type();
     assert(rtype == T_VOID, "must return void");
@@ -2952,27 +2819,16 @@
   }
 #endif //PRODUCT
 
-  // number of stack slots per value argument (1 or 2)
-  int type_words = type2size[type];
-
   C->set_has_unsafe_access(true);  // Mark eventual nmethod as "unsafe".
 
-  // Argument words:  "this" plus oop plus offset plus value;
-  int nargs = 1 + 1 + 2 + type_words;
-
-  // pop arguments: val, offset, base, and receiver
-  debug_only(int saved_sp = _sp);
-  _sp += nargs;
-  Node* val      = (type_words == 1) ? pop() : pop_pair();
-  Node *offset   = pop_pair();
-  Node *base     = pop();
-  Node *receiver = pop();
-  assert(saved_sp == _sp, "must have correct argument count");
-
-  //  Null check receiver.
-  _sp += nargs;
-  do_null_check(receiver, T_OBJECT);
-  _sp -= nargs;
+  // Get arguments:
+  Node* receiver = argument(0);  // type: oop
+  Node* base     = argument(1);  // type: oop
+  Node* offset   = argument(2);  // type: long
+  Node* val      = argument(4);  // type: oop, int, or long
+
+  // Null check receiver.
+  receiver = null_check(receiver);
   if (stopped()) {
     return true;
   }
@@ -2989,7 +2845,7 @@
   insert_mem_bar(Op_MemBarRelease);
   insert_mem_bar(Op_MemBarCPUOrder);
   // Ensure that the store is atomic for longs:
-  bool require_atomic_access = true;
+  const bool require_atomic_access = true;
   Node* store;
   if (type == T_OBJECT) // reference stores need a store barrier.
     store = store_oop_to_unknown(control(), base, adr, adr_type, val, type);
@@ -3000,20 +2856,37 @@
   return true;
 }
 
+bool LibraryCallKit::inline_unsafe_fence(vmIntrinsics::ID id) {
+  // Regardless of form, don't allow previous ld/st to move down,
+  // then issue acquire, release, or volatile mem_bar.
+  insert_mem_bar(Op_MemBarCPUOrder);
+  switch(id) {
+    case vmIntrinsics::_loadFence:
+      insert_mem_bar(Op_MemBarAcquire);
+      return true;
+    case vmIntrinsics::_storeFence:
+      insert_mem_bar(Op_MemBarRelease);
+      return true;
+    case vmIntrinsics::_fullFence:
+      insert_mem_bar(Op_MemBarVolatile);
+      return true;
+    default:
+      fatal_unexpected_iid(id);
+      return false;
+  }
+}
+
+//----------------------------inline_unsafe_allocate---------------------------
+// public native Object sun.mics.Unsafe.allocateInstance(Class<?> cls);
 bool LibraryCallKit::inline_unsafe_allocate() {
   if (callee()->is_static())  return false;  // caller must have the capability!
-  int nargs = 1 + 1;
-  assert(signature()->size() == nargs-1, "alloc has 1 argument");
-  null_check_receiver(callee());  // check then ignore argument(0)
-  _sp += nargs;  // set original stack for use by uncommon_trap
-  Node* cls = do_null_check(argument(1), T_OBJECT);
-  _sp -= nargs;
+
+  null_check_receiver();  // null-check, then ignore
+  Node* cls = null_check(argument(1));
   if (stopped())  return true;
 
-  Node* kls = load_klass_from_mirror(cls, false, nargs, NULL, 0);
-  _sp += nargs;  // set original stack for use by uncommon_trap
-  kls = do_null_check(kls, T_OBJECT);
-  _sp -= nargs;
+  Node* kls = load_klass_from_mirror(cls, false, NULL, 0);
+  kls = null_check(kls);
   if (stopped())  return true;  // argument was like int.class
 
   // Note:  The argument might still be an illegal value like
@@ -3024,12 +2897,11 @@
   // can generate code to load it as unsigned byte.
   Node* inst = make_load(NULL, insp, TypeInt::UBYTE, T_BOOLEAN);
   Node* bits = intcon(InstanceKlass::fully_initialized);
-  Node* test = _gvn.transform( new (C) SubINode(inst, bits) );
+  Node* test = _gvn.transform(new (C) SubINode(inst, bits));
   // The 'test' is non-zero if we need to take a slow path.
 
   Node* obj = new_instance(kls, test);
-  push(obj);
-
+  set_result(obj);
   return true;
 }
 
@@ -3040,15 +2912,10 @@
  * return myklass->trace_id & ~0x3
  */
 bool LibraryCallKit::inline_native_classID() {
-  int nargs = 1 + 1;
-  null_check_receiver(callee());  // check then ignore argument(0)
-  _sp += nargs;
-  Node* cls = do_null_check(argument(1), T_OBJECT);
-  _sp -= nargs;
-  Node* kls = load_klass_from_mirror(cls, false, nargs, NULL, 0);
-  _sp += nargs;
-  kls = do_null_check(kls, T_OBJECT);
-  _sp -= nargs;
+  null_check_receiver();  // null-check, then ignore
+  Node* cls = null_check(argument(1), T_OBJECT);
+  Node* kls = load_klass_from_mirror(cls, false, NULL, 0);
+  kls = null_check(kls, T_OBJECT);
   ByteSize offset = TRACE_ID_OFFSET;
   Node* insp = basic_plus_adr(kls, in_bytes(offset));
   Node* tvalue = make_load(NULL, insp, TypeLong::LONG, T_LONG);
@@ -3059,7 +2926,7 @@
 
   const TypePtr *adr_type = _gvn.type(insp)->isa_ptr();
   store_to_memory(control(), insp, orl, T_LONG, adr_type);
-  push_pair(andl);
+  set_result(andl);
   return true;
 }
 
@@ -3074,13 +2941,12 @@
   size_t thread_id_size = OSThread::thread_id_size();
   if (thread_id_size == (size_t) BytesPerLong) {
     threadid = ConvL2I(make_load(control(), p, TypeLong::LONG, T_LONG));
-    push(threadid);
   } else if (thread_id_size == (size_t) BytesPerInt) {
     threadid = make_load(control(), p, TypeInt::INT, T_INT);
-    push(threadid);
   } else {
     ShouldNotReachHere();
   }
+  set_result(threadid);
   return true;
 }
 #endif
@@ -3089,29 +2955,28 @@
 // inline code for System.currentTimeMillis() and System.nanoTime()
 // these have the same type and signature
 bool LibraryCallKit::inline_native_time_funcs(address funcAddr, const char* funcName) {
-  const TypeFunc *tf = OptoRuntime::void_long_Type();
+  const TypeFunc* tf = OptoRuntime::void_long_Type();
   const TypePtr* no_memory_effects = NULL;
   Node* time = make_runtime_call(RC_LEAF, tf, funcAddr, funcName, no_memory_effects);
   Node* value = _gvn.transform(new (C) ProjNode(time, TypeFunc::Parms+0));
 #ifdef ASSERT
-  Node* value_top = _gvn.transform(new (C) ProjNode(time, TypeFunc::Parms + 1));
+  Node* value_top = _gvn.transform(new (C) ProjNode(time, TypeFunc::Parms+1));
   assert(value_top == top(), "second value must be top");
 #endif
-  push_pair(value);
+  set_result(value);
   return true;
 }
 
 //------------------------inline_native_currentThread------------------
 bool LibraryCallKit::inline_native_currentThread() {
   Node* junk = NULL;
-  push(generate_current_thread(junk));
+  set_result(generate_current_thread(junk));
   return true;
 }
 
 //------------------------inline_native_isInterrupted------------------
+// private native boolean java.lang.Thread.isInterrupted(boolean ClearInterrupted);
 bool LibraryCallKit::inline_native_isInterrupted() {
-  const int nargs = 1+1;  // receiver + boolean
-  assert(nargs == arg_size(), "sanity");
   // Add a fast path to t.isInterrupted(clear_int):
   //   (t == Thread.current() && (!TLS._osthread._interrupted || !clear_int))
   //   ? TLS._osthread._interrupted : /*slow path:*/ t.isInterrupted(clear_int)
@@ -3123,14 +2988,23 @@
 
   // We only go to the fast case code if we pass two guards.
   // Paths which do not pass are accumulated in the slow_region.
+
+  enum {
+    no_int_result_path   = 1, // t == Thread.current() && !TLS._osthread._interrupted
+    no_clear_result_path = 2, // t == Thread.current() &&  TLS._osthread._interrupted && !clear_int
+    slow_result_path     = 3, // slow path: t.isInterrupted(clear_int)
+    PATH_LIMIT
+  };
+
+  // Ensure that it's not possible to move the load of TLS._osthread._interrupted flag
+  // out of the function.
+  insert_mem_bar(Op_MemBarCPUOrder);
+
+  RegionNode* result_rgn = new (C) RegionNode(PATH_LIMIT);
+  PhiNode*    result_val = new (C) PhiNode(result_rgn, TypeInt::BOOL);
+
   RegionNode* slow_region = new (C) RegionNode(1);
   record_for_igvn(slow_region);
-  RegionNode* result_rgn = new (C) RegionNode(1+3); // fast1, fast2, slow
-  PhiNode*    result_val = new (C) PhiNode(result_rgn, TypeInt::BOOL);
-  enum { no_int_result_path   = 1,
-         no_clear_result_path = 2,
-         slow_result_path     = 3
-  };
 
   // (a) Receiving thread must be the current thread.
   Node* rec_thr = argument(0);
@@ -3139,14 +3013,13 @@
   Node* cmp_thr = _gvn.transform( new (C) CmpPNode(cur_thr, rec_thr) );
   Node* bol_thr = _gvn.transform( new (C) BoolNode(cmp_thr, BoolTest::ne) );
 
-  bool known_current_thread = (_gvn.type(bol_thr) == TypeInt::ZERO);
-  if (!known_current_thread)
-    generate_slow_guard(bol_thr, slow_region);
+  generate_slow_guard(bol_thr, slow_region);
 
   // (b) Interrupt bit on TLS must be false.
   Node* p = basic_plus_adr(top()/*!oop*/, tls_ptr, in_bytes(JavaThread::osthread_offset()));
   Node* osthread = make_load(NULL, p, TypeRawPtr::NOTNULL, T_ADDRESS);
   p = basic_plus_adr(top()/*!oop*/, osthread, in_bytes(OSThread::interrupted_offset()));
+
   // Set the control input on the field _interrupted read to prevent it floating up.
   Node* int_bit = make_load(control(), p, TypeInt::BOOL, T_INT);
   Node* cmp_bit = _gvn.transform( new (C) CmpINode(int_bit, intcon(0)) );
@@ -3191,27 +3064,24 @@
     Node* slow_val = set_results_for_java_call(slow_call);
     // this->control() comes from set_results_for_java_call
 
-    // If we know that the result of the slow call will be true, tell the optimizer!
-    if (known_current_thread)  slow_val = intcon(1);
-
     Node* fast_io  = slow_call->in(TypeFunc::I_O);
     Node* fast_mem = slow_call->in(TypeFunc::Memory);
+
     // These two phis are pre-filled with copies of of the fast IO and Memory
-    Node* io_phi   = PhiNode::make(result_rgn, fast_io,  Type::ABIO);
-    Node* mem_phi  = PhiNode::make(result_rgn, fast_mem, Type::MEMORY, TypePtr::BOTTOM);
+    PhiNode* result_mem  = PhiNode::make(result_rgn, fast_mem, Type::MEMORY, TypePtr::BOTTOM);
+    PhiNode* result_io   = PhiNode::make(result_rgn, fast_io,  Type::ABIO);
 
     result_rgn->init_req(slow_result_path, control());
-    io_phi    ->init_req(slow_result_path, i_o());
-    mem_phi   ->init_req(slow_result_path, reset_memory());
+    result_io ->init_req(slow_result_path, i_o());
+    result_mem->init_req(slow_result_path, reset_memory());
     result_val->init_req(slow_result_path, slow_val);
 
-    set_all_memory( _gvn.transform(mem_phi) );
-    set_i_o(        _gvn.transform(io_phi) );
+    set_all_memory(_gvn.transform(result_mem));
+    set_i_o(       _gvn.transform(result_io));
   }
 
-  push_result(result_rgn, result_val);
   C->set_has_split_ifs(true); // Has chance for split-if optimization
-
+  set_result(result_rgn, result_val);
   return true;
 }
 
@@ -3231,7 +3101,6 @@
 // If the region is NULL, force never_see_null = true.
 Node* LibraryCallKit::load_klass_from_mirror_common(Node* mirror,
                                                     bool never_see_null,
-                                                    int nargs,
                                                     RegionNode* region,
                                                     int null_path,
                                                     int offset) {
@@ -3239,7 +3108,6 @@
   Node* p = basic_plus_adr(mirror, offset);
   const TypeKlassPtr*  kls_type = TypeKlassPtr::OBJECT_OR_NULL;
   Node* kls = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), p, TypeRawPtr::BOTTOM, kls_type) );
-  _sp += nargs; // any deopt will start just before call to enclosing method
   Node* null_ctl = top();
   kls = null_check_oop(kls, &null_ctl, never_see_null);
   if (region != NULL) {
@@ -3248,7 +3116,6 @@
   } else {
     assert(null_ctl == top(), "no loose ends");
   }
-  _sp -= nargs;
   return kls;
 }
 
@@ -3273,7 +3140,6 @@
 
 //-------------------------inline_native_Class_query-------------------
 bool LibraryCallKit::inline_native_Class_query(vmIntrinsics::ID id) {
-  int nargs = 1+0;  // just the Class mirror, in most cases
   const Type* return_type = TypeInt::BOOL;
   Node* prim_return_value = top();  // what happens if it's a primitive class?
   bool never_see_null = !too_many_traps(Deoptimization::Reason_null_check);
@@ -3281,11 +3147,14 @@
 
   enum { _normal_path = 1, _prim_path = 2, PATH_LIMIT };
 
+  Node* mirror = argument(0);
+  Node* obj    = top();
+
   switch (id) {
   case vmIntrinsics::_isInstance:
-    nargs = 1+1;  // the Class mirror, plus the object getting queried about
     // nothing is an instance of a primitive type
     prim_return_value = intcon(0);
+    obj = argument(1);
     break;
   case vmIntrinsics::_getModifiers:
     prim_return_value = intcon(JVM_ACC_ABSTRACT | JVM_ACC_FINAL | JVM_ACC_PUBLIC);
@@ -3316,12 +3185,10 @@
     return_type = TypeInt::INT;  // not bool!  6297094
     break;
   default:
-    ShouldNotReachHere();
+    fatal_unexpected_iid(id);
+    break;
   }
 
-  Node* mirror =                      argument(0);
-  Node* obj    = (nargs <= 1)? top(): argument(1);
-
   const TypeInstPtr* mirror_con = _gvn.type(mirror)->isa_instptr();
   if (mirror_con == NULL)  return false;  // cannot happen?
 
@@ -3348,9 +3215,7 @@
   // For Reflection.getClassAccessFlags(), the null check occurs in
   // the wrong place; see inline_unsafe_access(), above, for a similar
   // situation.
-  _sp += nargs;  // set original stack for use by uncommon_trap
-  mirror = do_null_check(mirror, T_OBJECT);
-  _sp -= nargs;
+  mirror = null_check(mirror);
   // If mirror or obj is dead, only null-path is taken.
   if (stopped())  return true;
 
@@ -3358,11 +3223,10 @@
 
   // Now load the mirror's klass metaobject, and null-check it.
   // Side-effects region with the control path if the klass is null.
-  Node* kls = load_klass_from_mirror(mirror, never_see_null, nargs,
-                                     region, _prim_path);
+  Node* kls = load_klass_from_mirror(mirror, never_see_null, region, _prim_path);
   // If kls is null, we have a primitive mirror.
   phi->init_req(_prim_path, prim_return_value);
-  if (stopped()) { push_result(region, phi); return true; }
+  if (stopped()) { set_result(region, phi); return true; }
 
   Node* p;  // handy temp
   Node* null_ctl;
@@ -3373,9 +3237,7 @@
   switch (id) {
   case vmIntrinsics::_isInstance:
     // nothing is an instance of a primitive type
-    _sp += nargs;          // gen_instanceof might do an uncommon trap
     query_value = gen_instanceof(obj, kls);
-    _sp -= nargs;
     break;
 
   case vmIntrinsics::_getModifiers:
@@ -3450,16 +3312,16 @@
     break;
 
   default:
-    ShouldNotReachHere();
+    fatal_unexpected_iid(id);
+    break;
   }
 
   // Fall-through is the normal case of a query to a real class.
   phi->init_req(1, query_value);
   region->init_req(1, control());
 
-  push_result(region, phi);
   C->set_has_split_ifs(true); // Has chance for split-if optimization
-
+  set_result(region, phi);
   return true;
 }
 
@@ -3467,8 +3329,6 @@
 // This intrinsic takes the JNI calls out of the heart of
 // UnsafeFieldAccessorImpl.set, which improves Field.set, readObject, etc.
 bool LibraryCallKit::inline_native_subtype_check() {
-  int nargs = 1+1;  // the Class mirror, plus the other class getting examined
-
   // Pull both arguments off the stack.
   Node* args[2];                // two java.lang.Class mirrors: superc, subc
   args[0] = argument(0);
@@ -3499,11 +3359,9 @@
   int which_arg;
   for (which_arg = 0; which_arg <= 1; which_arg++) {
     Node* arg = args[which_arg];
-    _sp += nargs;  // set original stack for use by uncommon_trap
-    arg = do_null_check(arg, T_OBJECT);
-    _sp -= nargs;
+    arg = null_check(arg);
     if (stopped())  break;
-    args[which_arg] = _gvn.transform(arg);
+    args[which_arg] = arg;
 
     Node* p = basic_plus_adr(arg, class_klass_offset);
     Node* kls = LoadKlassNode::make(_gvn, immutable_memory(), p, adr_type, kls_type);
@@ -3515,9 +3373,7 @@
   for (which_arg = 0; which_arg <= 1; which_arg++) {
     Node* kls = klasses[which_arg];
     Node* null_ctl = top();
-    _sp += nargs;  // set original stack for use by uncommon_trap
     kls = null_check_oop(kls, &null_ctl, never_see_null);
-    _sp -= nargs;
     int prim_path = (which_arg == 0 ? _prim_0_path : _prim_1_path);
     region->init_req(prim_path, null_ctl);
     if (stopped())  break;
@@ -3567,8 +3423,7 @@
   }
 
   set_control(_gvn.transform(region));
-  push(_gvn.transform(phi));
-
+  set_result(_gvn.transform(phi));
   return true;
 }
 
@@ -3616,14 +3471,12 @@
 
 
 //-----------------------inline_native_newArray--------------------------
+// private static native Object java.lang.reflect.newArray(Class<?> componentType, int length);
 bool LibraryCallKit::inline_native_newArray() {
-  int nargs = 2;
   Node* mirror    = argument(0);
   Node* count_val = argument(1);
 
-  _sp += nargs;  // set original stack for use by uncommon_trap
-  mirror = do_null_check(mirror, T_OBJECT);
-  _sp -= nargs;
+  mirror = null_check(mirror);
   // If mirror or obj is dead, only null-path is taken.
   if (stopped())  return true;
 
@@ -3637,7 +3490,6 @@
 
   bool never_see_null = !too_many_traps(Deoptimization::Reason_null_check);
   Node* klass_node = load_array_klass_from_mirror(mirror, never_see_null,
-                                                  nargs,
                                                   result_reg, _slow_path);
   Node* normal_ctl   = control();
   Node* no_array_ctl = result_reg->in(_slow_path);
@@ -3664,7 +3516,7 @@
     // Normal case:  The array type has been cached in the java.lang.Class.
     // The following call works fine even if the array type is polymorphic.
     // It could be a dynamic mix of int[], boolean[], Object[], etc.
-    Node* obj = new_array(klass_node, count_val, nargs);
+    Node* obj = new_array(klass_node, count_val, 0);  // no arguments to push
     result_reg->init_req(_normal_path, control());
     result_val->init_req(_normal_path, obj);
     result_io ->init_req(_normal_path, i_o());
@@ -3674,23 +3526,18 @@
   // Return the combined state.
   set_i_o(        _gvn.transform(result_io)  );
   set_all_memory( _gvn.transform(result_mem) );
-  push_result(result_reg, result_val);
+
   C->set_has_split_ifs(true); // Has chance for split-if optimization
-
+  set_result(result_reg, result_val);
   return true;
 }
 
 //----------------------inline_native_getLength--------------------------
+// public static native int java.lang.reflect.Array.getLength(Object array);
 bool LibraryCallKit::inline_native_getLength() {
   if (too_many_traps(Deoptimization::Reason_intrinsic))  return false;
 
-  int nargs = 1;
-  Node* array = argument(0);
-
-  _sp += nargs;  // set original stack for use by uncommon_trap
-  array = do_null_check(array, T_OBJECT);
-  _sp -= nargs;
-
+  Node* array = null_check(argument(0));
   // If array is dead, only null-path is taken.
   if (stopped())  return true;
 
@@ -3700,7 +3547,6 @@
   if (non_array != NULL) {
     PreserveJVMState pjvms(this);
     set_control(non_array);
-    _sp += nargs;  // push the arguments back on the stack
     uncommon_trap(Deoptimization::Reason_intrinsic,
                   Deoptimization::Action_maybe_recompile);
   }
@@ -3710,19 +3556,20 @@
 
   // The works fine even if the array type is polymorphic.
   // It could be a dynamic mix of int[], boolean[], Object[], etc.
-  push( load_array_length(array) );
-
-  C->set_has_split_ifs(true); // Has chance for split-if optimization
-
+  Node* result = load_array_length(array);
+
+  C->set_has_split_ifs(true);  // Has chance for split-if optimization
+  set_result(result);
   return true;
 }
 
 //------------------------inline_array_copyOf----------------------------
+// public static <T,U> T[] java.util.Arrays.copyOf(     U[] original, int newLength,         Class<? extends T[]> newType);
+// public static <T,U> T[] java.util.Arrays.copyOfRange(U[] original, int from,      int to, Class<? extends T[]> newType);
 bool LibraryCallKit::inline_array_copyOf(bool is_copyOfRange) {
   if (too_many_traps(Deoptimization::Reason_intrinsic))  return false;
 
-  // Restore the stack and pop off the arguments.
-  int nargs = 3 + (is_copyOfRange? 1: 0);
+  // Get the arguments.
   Node* original          = argument(0);
   Node* start             = is_copyOfRange? argument(1): intcon(0);
   Node* end               = is_copyOfRange? argument(2): argument(1);
@@ -3730,23 +3577,21 @@
 
   Node* newcopy;
 
-  //set the original stack and the reexecute bit for the interpreter to reexecute
-  //the bytecode that invokes Arrays.copyOf if deoptimization happens
+  // Set the original stack and the reexecute bit for the interpreter to reexecute
+  // the bytecode that invokes Arrays.copyOf if deoptimization happens.
   { PreserveReexecuteState preexecs(this);
-    _sp += nargs;
     jvms()->set_should_reexecute(true);
 
-    array_type_mirror = do_null_check(array_type_mirror, T_OBJECT);
-    original          = do_null_check(original, T_OBJECT);
+    array_type_mirror = null_check(array_type_mirror);
+    original          = null_check(original);
 
     // Check if a null path was taken unconditionally.
     if (stopped())  return true;
 
     Node* orig_length = load_array_length(original);
 
-    Node* klass_node = load_klass_from_mirror(array_type_mirror, false, 0,
-                                              NULL, 0);
-    klass_node = do_null_check(klass_node, T_OBJECT);
+    Node* klass_node = load_klass_from_mirror(array_type_mirror, false, NULL, 0);
+    klass_node = null_check(klass_node);
 
     RegionNode* bailout = new (C) RegionNode(1);
     record_for_igvn(bailout);
@@ -3769,7 +3614,7 @@
 
     Node* length = end;
     if (_gvn.type(start) != TypeInt::ZERO) {
-      length = _gvn.transform( new (C) SubINode(end, start) );
+      length = _gvn.transform(new (C) SubINode(end, start));
     }
 
     // Bail out if length is negative.
@@ -3780,19 +3625,18 @@
 
     if (bailout->req() > 1) {
       PreserveJVMState pjvms(this);
-      set_control( _gvn.transform(bailout) );
+      set_control(_gvn.transform(bailout));
       uncommon_trap(Deoptimization::Reason_intrinsic,
                     Deoptimization::Action_maybe_recompile);
     }
 
     if (!stopped()) {
-
       // How many elements will we copy from the original?
       // The answer is MinI(orig_length - start, length).
-      Node* orig_tail = _gvn.transform( new(C) SubINode(orig_length, start) );
+      Node* orig_tail = _gvn.transform(new (C) SubINode(orig_length, start));
       Node* moved = generate_min_max(vmIntrinsics::_min, orig_tail, length);
 
-      newcopy = new_array(klass_node, length, 0);
+      newcopy = new_array(klass_node, length, 0);  // no argments to push
 
       // Generate a direct call to the right arraycopy function(s).
       // We know the copy is disjoint but we might not know if the
@@ -3807,14 +3651,12 @@
                          original, start, newcopy, intcon(0), moved,
                          disjoint_bases, length_never_negative);
     }
-  } //original reexecute and sp are set back here
-
-  if(!stopped()) {
-    push(newcopy);
-  }
+  } // original reexecute is set back here
 
   C->set_has_split_ifs(true); // Has chance for split-if optimization
-
+  if (!stopped()) {
+    set_result(newcopy);
+  }
   return true;
 }
 
@@ -3830,7 +3672,7 @@
                      vtable_index*vtableEntry::size()) * wordSize +
                      vtableEntry::method_offset_in_bytes();
   Node* entry_addr  = basic_plus_adr(obj_klass, entry_offset);
-  Node* target_call = make_load(NULL, entry_addr, TypeInstPtr::NOTNULL, T_OBJECT);
+  Node* target_call = make_load(NULL, entry_addr, TypePtr::NOTNULL, T_ADDRESS);
 
   // Compare the target method with the expected method (e.g., Object.hashCode).
   const TypePtr* native_call_addr = TypeMetadataPtr::make(method);
@@ -3866,7 +3708,7 @@
                            SharedRuntime::get_resolve_static_call_stub(),
                            method, bci());
   } else if (is_virtual) {
-    null_check_receiver(method);
+    null_check_receiver();
     int vtable_index = Method::invalid_vtable_index;
     if (UseInlineCaches) {
       // Suppress the vtable call
@@ -3880,7 +3722,7 @@
                           SharedRuntime::get_resolve_virtual_call_stub(),
                           method, vtable_index, bci());
   } else {  // neither virtual nor static:  opt_virtual
-    null_check_receiver(method);
+    null_check_receiver();
     slow_call = new(C) CallStaticJavaNode(tf,
                                 SharedRuntime::get_resolve_opt_virtual_call_stub(),
                                 method, bci());
@@ -3909,7 +3751,7 @@
   Node* obj = NULL;
   if (!is_static) {
     // Check for hashing null object
-    obj = null_check_receiver(callee());
+    obj = null_check_receiver();
     if (stopped())  return true;        // unconditionally null
     result_reg->init_req(_null_path, top());
     result_val->init_req(_null_path, top());
@@ -3925,9 +3767,9 @@
 
   // Unconditionally null?  Then return right away.
   if (stopped()) {
-    set_control( result_reg->in(_null_path) );
+    set_control( result_reg->in(_null_path));
     if (!stopped())
-      push(      result_val ->in(_null_path) );
+      set_result(result_val->in(_null_path));
     return true;
   }
 
@@ -4000,8 +3842,7 @@
   if (!stopped()) {
     // No need for PreserveJVMState, because we're using up the present state.
     set_all_memory(init_mem);
-    vmIntrinsics::ID hashCode_id = vmIntrinsics::_hashCode;
-    if (is_static)   hashCode_id = vmIntrinsics::_identityHashCode;
+    vmIntrinsics::ID hashCode_id = is_static ? vmIntrinsics::_identityHashCode : vmIntrinsics::_hashCode;
     CallJavaNode* slow_call = generate_method_call(hashCode_id, is_virtual, is_static);
     Node* slow_result = set_results_for_java_call(slow_call);
     // this->control() comes from set_results_for_java_call
@@ -4014,48 +3855,38 @@
   // Return the combined state.
   set_i_o(        _gvn.transform(result_io)  );
   set_all_memory( _gvn.transform(result_mem) );
-  push_result(result_reg, result_val);
-
+
+  set_result(result_reg, result_val);
   return true;
 }
 
 //---------------------------inline_native_getClass----------------------------
+// public final native Class<?> java.lang.Object.getClass();
+//
 // Build special case code for calls to getClass on an object.
 bool LibraryCallKit::inline_native_getClass() {
-  Node* obj = null_check_receiver(callee());
+  Node* obj = null_check_receiver();
   if (stopped())  return true;
-  push( load_mirror_from_klass(load_object_klass(obj)) );
+  set_result(load_mirror_from_klass(load_object_klass(obj)));
   return true;
 }
 
 //-----------------inline_native_Reflection_getCallerClass---------------------
+// public static native Class<?> sun.reflect.Reflection.getCallerClass(int realFramesToSkip);
+//
 // In the presence of deep enough inlining, getCallerClass() becomes a no-op.
 //
 // NOTE that this code must perform the same logic as
 // vframeStream::security_get_caller_frame in that it must skip
 // Method.invoke() and auxiliary frames.
-
-
-
-
 bool LibraryCallKit::inline_native_Reflection_getCallerClass() {
-  ciMethod*       method = callee();
-
 #ifndef PRODUCT
   if ((PrintIntrinsics || PrintInlining || PrintOptoInlining) && Verbose) {
     tty->print_cr("Attempting to inline sun.reflect.Reflection.getCallerClass");
   }
 #endif
 
-  debug_only(int saved_sp = _sp);
-
-  // Argument words:  (int depth)
-  int nargs = 1;
-
-  _sp += nargs;
-  Node* caller_depth_node = pop();
-
-  assert(saved_sp == _sp, "must have correct argument count");
+  Node* caller_depth_node = argument(0);
 
   // The depth value must be a constant in order for the runtime call
   // to be eliminated.
@@ -4127,7 +3958,8 @@
       tty->print_cr("  Bailing out because caller depth (%d) exceeded inlining depth (%d)", caller_depth_type->get_con(), _depth);
       tty->print_cr("  JVM state at this point:");
       for (int i = _depth; i >= 1; i--) {
-        tty->print_cr("   %d) %s", i, jvms()->of_depth(i)->method()->name()->as_utf8());
+        ciMethod* m = jvms()->of_depth(i)->method();
+        tty->print_cr("   %d) %s.%s", i, m->holder()->name()->as_utf8(), m->name()->as_utf8());
       }
     }
 #endif
@@ -4137,14 +3969,17 @@
   // Acquire method holder as java.lang.Class
   ciInstanceKlass* caller_klass  = caller_jvms->method()->holder();
   ciInstance*      caller_mirror = caller_klass->java_mirror();
+
   // Push this as a constant
-  push(makecon(TypeInstPtr::make(caller_mirror)));
+  set_result(makecon(TypeInstPtr::make(caller_mirror)));
+
 #ifndef PRODUCT
   if ((PrintIntrinsics || PrintInlining || PrintOptoInlining) && Verbose) {
     tty->print_cr("  Succeeded: caller = %s.%s, caller depth = %d, depth = %d", caller_klass->name()->as_utf8(), caller_jvms->method()->name()->as_utf8(), caller_depth_type->get_con(), _depth);
     tty->print_cr("  JVM state at this point:");
     for (int i = _depth; i >= 1; i--) {
-      tty->print_cr("   %d) %s", i, jvms()->of_depth(i)->method()->name()->as_utf8());
+      ciMethod* m = jvms()->of_depth(i)->method();
+      tty->print_cr("   %d) %s.%s", i, m->holder()->name()->as_utf8(), m->name()->as_utf8());
     }
   }
 #endif
@@ -4180,36 +4015,23 @@
 }
 
 bool LibraryCallKit::inline_fp_conversions(vmIntrinsics::ID id) {
-  // restore the arguments
-  _sp += arg_size();
+  Node* arg = argument(0);
+  Node* result;
 
   switch (id) {
-  case vmIntrinsics::_floatToRawIntBits:
-    push(_gvn.transform( new (C) MoveF2INode(pop())));
-    break;
-
-  case vmIntrinsics::_intBitsToFloat:
-    push(_gvn.transform( new (C) MoveI2FNode(pop())));
-    break;
-
-  case vmIntrinsics::_doubleToRawLongBits:
-    push_pair(_gvn.transform( new (C) MoveD2LNode(pop_pair())));
-    break;
-
-  case vmIntrinsics::_longBitsToDouble:
-    push_pair(_gvn.transform( new (C) MoveL2DNode(pop_pair())));
-    break;
+  case vmIntrinsics::_floatToRawIntBits:    result = new (C) MoveF2INode(arg);  break;
+  case vmIntrinsics::_intBitsToFloat:       result = new (C) MoveI2FNode(arg);  break;
+  case vmIntrinsics::_doubleToRawLongBits:  result = new (C) MoveD2LNode(arg);  break;
+  case vmIntrinsics::_longBitsToDouble:     result = new (C) MoveL2DNode(arg);  break;
 
   case vmIntrinsics::_doubleToLongBits: {
-    Node* value = pop_pair();
-
     // two paths (plus control) merge in a wood
     RegionNode *r = new (C) RegionNode(3);
     Node *phi = new (C) PhiNode(r, TypeLong::LONG);
 
-    Node *cmpisnan = _gvn.transform( new (C) CmpDNode(value, value));
+    Node *cmpisnan = _gvn.transform(new (C) CmpDNode(arg, arg));
     // Build the boolean node
-    Node *bolisnan = _gvn.transform( new (C) BoolNode( cmpisnan, BoolTest::ne ) );
+    Node *bolisnan = _gvn.transform(new (C) BoolNode(cmpisnan, BoolTest::ne));
 
     // Branch either way.
     // NaN case is less traveled, which makes all the difference.
@@ -4227,35 +4049,30 @@
     r->init_req(1, iftrue);
 
     // Else fall through
-    Node *iffalse = _gvn.transform( new (C) IfFalseNode(opt_ifisnan) );
+    Node *iffalse = _gvn.transform(new (C) IfFalseNode(opt_ifisnan));
     set_control(iffalse);
 
-    phi->init_req(2, _gvn.transform( new (C) MoveD2LNode(value)));
+    phi->init_req(2, _gvn.transform(new (C) MoveD2LNode(arg)));
     r->init_req(2, iffalse);
 
     // Post merge
     set_control(_gvn.transform(r));
     record_for_igvn(r);
 
-    Node* result = _gvn.transform(phi);
+    C->set_has_split_ifs(true); // Has chance for split-if optimization
+    result = phi;
     assert(result->bottom_type()->isa_long(), "must be");
-    push_pair(result);
-
-    C->set_has_split_ifs(true); // Has chance for split-if optimization
-
     break;
   }
 
   case vmIntrinsics::_floatToIntBits: {
-    Node* value = pop();
-
     // two paths (plus control) merge in a wood
     RegionNode *r = new (C) RegionNode(3);
     Node *phi = new (C) PhiNode(r, TypeInt::INT);
 
-    Node *cmpisnan = _gvn.transform( new (C) CmpFNode(value, value));
+    Node *cmpisnan = _gvn.transform(new (C) CmpFNode(arg, arg));
     // Build the boolean node
-    Node *bolisnan = _gvn.transform( new (C) BoolNode( cmpisnan, BoolTest::ne ) );
+    Node *bolisnan = _gvn.transform(new (C) BoolNode(cmpisnan, BoolTest::ne));
 
     // Branch either way.
     // NaN case is less traveled, which makes all the difference.
@@ -4273,29 +4090,27 @@
     r->init_req(1, iftrue);
 
     // Else fall through
-    Node *iffalse = _gvn.transform( new (C) IfFalseNode(opt_ifisnan) );
+    Node *iffalse = _gvn.transform(new (C) IfFalseNode(opt_ifisnan));
     set_control(iffalse);
 
-    phi->init_req(2, _gvn.transform( new (C) MoveF2INode(value)));
+    phi->init_req(2, _gvn.transform(new (C) MoveF2INode(arg)));
     r->init_req(2, iffalse);
 
     // Post merge
     set_control(_gvn.transform(r));
     record_for_igvn(r);
 
-    Node* result = _gvn.transform(phi);
+    C->set_has_split_ifs(true); // Has chance for split-if optimization
+    result = phi;
     assert(result->bottom_type()->isa_int(), "must be");
-    push(result);
-
-    C->set_has_split_ifs(true); // Has chance for split-if optimization
-
     break;
   }
 
   default:
-    ShouldNotReachHere();
+    fatal_unexpected_iid(id);
+    break;
   }
-
+  set_result(_gvn.transform(result));
   return true;
 }
 
@@ -4306,23 +4121,19 @@
 #endif //_LP64
 
 //----------------------inline_unsafe_copyMemory-------------------------
+// public native void sun.misc.Unsafe.copyMemory(Object srcBase, long srcOffset, Object destBase, long destOffset, long bytes);
 bool LibraryCallKit::inline_unsafe_copyMemory() {
   if (callee()->is_static())  return false;  // caller must have the capability!
-  int nargs = 1 + 5 + 3;  // 5 args:  (src: ptr,off, dst: ptr,off, size)
-  assert(signature()->size() == nargs-1, "copy has 5 arguments");
-  null_check_receiver(callee());  // check then ignore argument(0)
+  null_check_receiver();  // null-check receiver
   if (stopped())  return true;
 
   C->set_has_unsafe_access(true);  // Mark eventual nmethod as "unsafe".
 
-  Node* src_ptr = argument(1);
-  Node* src_off = ConvL2X(argument(2));
-  assert(argument(3)->is_top(), "2nd half of long");
-  Node* dst_ptr = argument(4);
-  Node* dst_off = ConvL2X(argument(5));
-  assert(argument(6)->is_top(), "2nd half of long");
-  Node* size    = ConvL2X(argument(7));
-  assert(argument(8)->is_top(), "2nd half of long");
+  Node* src_ptr =         argument(1);   // type: oop
+  Node* src_off = ConvL2X(argument(2));  // type: long
+  Node* dst_ptr =         argument(4);   // type: oop
+  Node* dst_off = ConvL2X(argument(5));  // type: long
+  Node* size    = ConvL2X(argument(7));  // type: long
 
   assert(Unsafe_field_offset_to_byte_offset(11) == 11,
          "fieldOffset must be byte-scaled");
@@ -4442,6 +4253,8 @@
 }
 
 //------------------------inline_native_clone----------------------------
+// protected native Object java.lang.Object.clone();
+//
 // Here are the simple edge cases:
 //  null receiver => normal trap
 //  virtual and clone was overridden => slow path to out-of-line clone
@@ -4458,20 +4271,16 @@
 // can be sharply typed as an object array, a type array, or an instance.
 //
 bool LibraryCallKit::inline_native_clone(bool is_virtual) {
-  int nargs = 1;
   PhiNode* result_val;
 
-  //set the original stack and the reexecute bit for the interpreter to reexecute
-  //the bytecode that invokes Object.clone if deoptimization happens
+  // Set the reexecute bit for the interpreter to reexecute
+  // the bytecode that invokes Object.clone if deoptimization happens.
   { PreserveReexecuteState preexecs(this);
     jvms()->set_should_reexecute(true);
 
-    //null_check_receiver will adjust _sp (push and pop)
-    Node* obj = null_check_receiver(callee());
+    Node* obj = null_check_receiver();
     if (stopped())  return true;
 
-    _sp += nargs;
-
     Node* obj_klass = load_object_klass(obj);
     const TypeKlassPtr* tklass = _gvn.type(obj_klass)->isa_klassptr();
     const TypeOopPtr*   toop   = ((tklass != NULL)
@@ -4508,7 +4317,7 @@
       set_control(array_ctl);
       Node* obj_length = load_array_length(obj);
       Node* obj_size  = NULL;
-      Node* alloc_obj = new_array(obj_klass, obj_length, 0, &obj_size);
+      Node* alloc_obj = new_array(obj_klass, obj_length, 0, &obj_size);  // no arguments to push
 
       if (!use_ReduceInitialCardMarks()) {
         // If it is an oop array, it requires very special treatment,
@@ -4608,10 +4417,9 @@
     set_control(    _gvn.transform(result_reg) );
     set_i_o(        _gvn.transform(result_i_o) );
     set_all_memory( _gvn.transform(result_mem) );
-  } //original reexecute and sp are set back here
-
-  push(_gvn.transform(result_val));
-
+  } // original reexecute is set back here
+
+  set_result(_gvn.transform(result_val));
   return true;
 }
 
@@ -4652,25 +4460,25 @@
 
 
 //------------------------------inline_arraycopy-----------------------
+// public static native void java.lang.System.arraycopy(Object src,  int  srcPos,
+//                                                      Object dest, int destPos,
+//                                                      int length);
 bool LibraryCallKit::inline_arraycopy() {
-  // Restore the stack and pop off the arguments.
-  int nargs = 5;  // 2 oops, 3 ints, no size_t or long
-  assert(callee()->signature()->size() == nargs, "copy has 5 arguments");
-
-  Node *src         = argument(0);
-  Node *src_offset  = argument(1);
-  Node *dest        = argument(2);
-  Node *dest_offset = argument(3);
-  Node *length      = argument(4);
+  // Get the arguments.
+  Node* src         = argument(0);  // type: oop
+  Node* src_offset  = argument(1);  // type: int
+  Node* dest        = argument(2);  // type: oop
+  Node* dest_offset = argument(3);  // type: int
+  Node* length      = argument(4);  // type: int
 
   // Compile time checks.  If any of these checks cannot be verified at compile time,
   // we do not make a fast path for this call.  Instead, we let the call remain as it
   // is.  The checks we choose to mandate at compile time are:
   //
   // (1) src and dest are arrays.
-  const Type* src_type = src->Value(&_gvn);
+  const Type* src_type  = src->Value(&_gvn);
   const Type* dest_type = dest->Value(&_gvn);
-  const TypeAryPtr* top_src = src_type->isa_aryptr();
+  const TypeAryPtr* top_src  = src_type->isa_aryptr();
   const TypeAryPtr* top_dest = dest_type->isa_aryptr();
   if (top_src  == NULL || top_src->klass()  == NULL ||
       top_dest == NULL || top_dest->klass() == NULL) {
@@ -4725,15 +4533,13 @@
   record_for_igvn(slow_region);
 
   // (3) operands must not be null
-  // We currently perform our null checks with the do_null_check routine.
+  // We currently perform our null checks with the null_check routine.
   // This means that the null exceptions will be reported in the caller
   // rather than (correctly) reported inside of the native arraycopy call.
   // This should be corrected, given time.  We do our null check with the
   // stack pointer restored.
-  _sp += nargs;
-  src  = do_null_check(src,  T_ARRAY);
-  dest = do_null_check(dest, T_ARRAY);
-  _sp -= nargs;
+  src  = null_check(src,  T_ARRAY);
+  dest = null_check(dest, T_ARRAY);
 
   // (4) src_offset must not be negative.
   generate_negative_guard(src_offset, slow_region);
@@ -5076,7 +4882,7 @@
   slow_control = top();
   if (slow_region != NULL)
     slow_control = _gvn.transform(slow_region);
-  debug_only(slow_region = (RegionNode*)badAddress);
+  DEBUG_ONLY(slow_region = (RegionNode*)badAddress);
 
   set_control(checked_control);
   if (!stopped()) {
@@ -5570,34 +5376,64 @@
                     src_start, dest_start, copy_length XTOP);
 }
 
+//-------------inline_encodeISOArray-----------------------------------
+// encode char[] to byte[] in ISO_8859_1
+bool LibraryCallKit::inline_encodeISOArray() {
+  assert(callee()->signature()->size() == 5, "encodeISOArray has 5 parameters");
+  // no receiver since it is static method
+  Node *src         = argument(0);
+  Node *src_offset  = argument(1);
+  Node *dst         = argument(2);
+  Node *dst_offset  = argument(3);
+  Node *length      = argument(4);
+
+  const Type* src_type = src->Value(&_gvn);
+  const Type* dst_type = dst->Value(&_gvn);
+  const TypeAryPtr* top_src = src_type->isa_aryptr();
+  const TypeAryPtr* top_dest = dst_type->isa_aryptr();
+  if (top_src  == NULL || top_src->klass()  == NULL ||
+      top_dest == NULL || top_dest->klass() == NULL) {
+    // failed array check
+    return false;
+  }
+
+  // Figure out the size and type of the elements we will be copying.
+  BasicType src_elem = src_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  BasicType dst_elem = dst_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  if (src_elem != T_CHAR || dst_elem != T_BYTE) {
+    return false;
+  }
+  Node* src_start = array_element_address(src, src_offset, src_elem);
+  Node* dst_start = array_element_address(dst, dst_offset, dst_elem);
+  // 'src_start' points to src array + scaled offset
+  // 'dst_start' points to dst array + scaled offset
+
+  const TypeAryPtr* mtype = TypeAryPtr::BYTES;
+  Node* enc = new (C) EncodeISOArrayNode(control(), memory(mtype), src_start, dst_start, length);
+  enc = _gvn.transform(enc);
+  Node* res_mem = _gvn.transform(new (C) SCMemProjNode(enc));
+  set_memory(res_mem, mtype);
+  set_result(enc);
+  return true;
+}
+
 //----------------------------inline_reference_get----------------------------
-
+// public T java.lang.ref.Reference.get();
 bool LibraryCallKit::inline_reference_get() {
-  const int nargs = 1; // self
-
-  guarantee(java_lang_ref_Reference::referent_offset > 0,
-            "should have already been set");
-
-  int referent_offset = java_lang_ref_Reference::referent_offset;
-
-  // Restore the stack and pop off the argument
-  _sp += nargs;
-  Node *reference_obj = pop();
-
-  // Null check on self without removing any arguments.
-  _sp += nargs;
-  reference_obj = do_null_check(reference_obj, T_OBJECT);
-  _sp -= nargs;;
-
+  const int referent_offset = java_lang_ref_Reference::referent_offset;
+  guarantee(referent_offset > 0, "should have already been set");
+
+  // Get the argument:
+  Node* reference_obj = null_check_receiver();
   if (stopped()) return true;
 
-  Node *adr = basic_plus_adr(reference_obj, reference_obj, referent_offset);
+  Node* adr = basic_plus_adr(reference_obj, reference_obj, referent_offset);
 
   ciInstanceKlass* klass = env()->Object_klass();
   const TypeOopPtr* object_type = TypeOopPtr::make_from_klass(klass);
 
   Node* no_ctrl = NULL;
-  Node *result = make_load(no_ctrl, adr, object_type, T_OBJECT);
+  Node* result = make_load(no_ctrl, adr, object_type, T_OBJECT);
 
   // Use the pre-barrier to record the value in the referent field
   pre_barrier(false /* do_load */,
@@ -5610,6 +5446,252 @@
   // across safepoint since GC can change its value.
   insert_mem_bar(Op_MemBarCPUOrder);
 
-  push(result);
+  set_result(result);
+  return true;
+}
+
+
+Node * LibraryCallKit::load_field_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString,
+                                              bool is_exact=true, bool is_static=false) {
+
+  const TypeInstPtr* tinst = _gvn.type(fromObj)->isa_instptr();
+  assert(tinst != NULL, "obj is null");
+  assert(tinst->klass()->is_loaded(), "obj is not loaded");
+  assert(!is_exact || tinst->klass_is_exact(), "klass not exact");
+
+  ciField* field = tinst->klass()->as_instance_klass()->get_field_by_name(ciSymbol::make(fieldName),
+                                                                          ciSymbol::make(fieldTypeString),
+                                                                          is_static);
+  if (field == NULL) return (Node *) NULL;
+  assert (field != NULL, "undefined field");
+
+  // Next code  copied from Parse::do_get_xxx():
+
+  // Compute address and memory type.
+  int offset  = field->offset_in_bytes();
+  bool is_vol = field->is_volatile();
+  ciType* field_klass = field->type();
+  assert(field_klass->is_loaded(), "should be loaded");
+  const TypePtr* adr_type = C->alias_type(field)->adr_type();
+  Node *adr = basic_plus_adr(fromObj, fromObj, offset);
+  BasicType bt = field->layout_type();
+
+  // Build the resultant type of the load
+  const Type *type = TypeOopPtr::make_from_klass(field_klass->as_klass());
+
+  // Build the load.
+  Node* loadedField = make_load(NULL, adr, type, bt, adr_type, is_vol);
+  return loadedField;
+}
+
+
+//------------------------------inline_aescrypt_Block-----------------------
+bool LibraryCallKit::inline_aescrypt_Block(vmIntrinsics::ID id) {
+  address stubAddr;
+  const char *stubName;
+  assert(UseAES, "need AES instruction support");
+
+  switch(id) {
+  case vmIntrinsics::_aescrypt_encryptBlock:
+    stubAddr = StubRoutines::aescrypt_encryptBlock();
+    stubName = "aescrypt_encryptBlock";
+    break;
+  case vmIntrinsics::_aescrypt_decryptBlock:
+    stubAddr = StubRoutines::aescrypt_decryptBlock();
+    stubName = "aescrypt_decryptBlock";
+    break;
+  }
+  if (stubAddr == NULL) return false;
+
+  Node* aescrypt_object = argument(0);
+  Node* src             = argument(1);
+  Node* src_offset      = argument(2);
+  Node* dest            = argument(3);
+  Node* dest_offset     = argument(4);
+
+  // (1) src and dest are arrays.
+  const Type* src_type = src->Value(&_gvn);
+  const Type* dest_type = dest->Value(&_gvn);
+  const TypeAryPtr* top_src = src_type->isa_aryptr();
+  const TypeAryPtr* top_dest = dest_type->isa_aryptr();
+  assert (top_src  != NULL && top_src->klass()  != NULL &&  top_dest != NULL && top_dest->klass() != NULL, "args are strange");
+
+  // for the quick and dirty code we will skip all the checks.
+  // we are just trying to get the call to be generated.
+  Node* src_start  = src;
+  Node* dest_start = dest;
+  if (src_offset != NULL || dest_offset != NULL) {
+    assert(src_offset != NULL && dest_offset != NULL, "");
+    src_start  = array_element_address(src,  src_offset,  T_BYTE);
+    dest_start = array_element_address(dest, dest_offset, T_BYTE);
+  }
+
+  // now need to get the start of its expanded key array
+  // this requires a newer class file that has this array as littleEndian ints, otherwise we revert to java
+  Node* k_start = get_key_start_from_aescrypt_object(aescrypt_object);
+  if (k_start == NULL) return false;
+
+  // Call the stub.
+  make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::aescrypt_block_Type(),
+                    stubAddr, stubName, TypePtr::BOTTOM,
+                    src_start, dest_start, k_start);
+
   return true;
 }
+
+//------------------------------inline_cipherBlockChaining_AESCrypt-----------------------
+bool LibraryCallKit::inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id) {
+  address stubAddr;
+  const char *stubName;
+
+  assert(UseAES, "need AES instruction support");
+
+  switch(id) {
+  case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
+    stubAddr = StubRoutines::cipherBlockChaining_encryptAESCrypt();
+    stubName = "cipherBlockChaining_encryptAESCrypt";
+    break;
+  case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
+    stubAddr = StubRoutines::cipherBlockChaining_decryptAESCrypt();
+    stubName = "cipherBlockChaining_decryptAESCrypt";
+    break;
+  }
+  if (stubAddr == NULL) return false;
+
+  Node* cipherBlockChaining_object = argument(0);
+  Node* src                        = argument(1);
+  Node* src_offset                 = argument(2);
+  Node* len                        = argument(3);
+  Node* dest                       = argument(4);
+  Node* dest_offset                = argument(5);
+
+  // (1) src and dest are arrays.
+  const Type* src_type = src->Value(&_gvn);
+  const Type* dest_type = dest->Value(&_gvn);
+  const TypeAryPtr* top_src = src_type->isa_aryptr();
+  const TypeAryPtr* top_dest = dest_type->isa_aryptr();
+  assert (top_src  != NULL && top_src->klass()  != NULL
+          &&  top_dest != NULL && top_dest->klass() != NULL, "args are strange");
+
+  // checks are the responsibility of the caller
+  Node* src_start  = src;
+  Node* dest_start = dest;
+  if (src_offset != NULL || dest_offset != NULL) {
+    assert(src_offset != NULL && dest_offset != NULL, "");
+    src_start  = array_element_address(src,  src_offset,  T_BYTE);
+    dest_start = array_element_address(dest, dest_offset, T_BYTE);
+  }
+
+  // if we are in this set of code, we "know" the embeddedCipher is an AESCrypt object
+  // (because of the predicated logic executed earlier).
+  // so we cast it here safely.
+  // this requires a newer class file that has this array as littleEndian ints, otherwise we revert to java
+
+  Node* embeddedCipherObj = load_field_from_object(cipherBlockChaining_object, "embeddedCipher", "Lcom/sun/crypto/provider/SymmetricCipher;", /*is_exact*/ false);
+  if (embeddedCipherObj == NULL) return false;
+
+  // cast it to what we know it will be at runtime
+  const TypeInstPtr* tinst = _gvn.type(cipherBlockChaining_object)->isa_instptr();
+  assert(tinst != NULL, "CBC obj is null");
+  assert(tinst->klass()->is_loaded(), "CBC obj is not loaded");
+  ciKlass* klass_AESCrypt = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make("com/sun/crypto/provider/AESCrypt"));
+  if (!klass_AESCrypt->is_loaded()) return false;
+
+  ciInstanceKlass* instklass_AESCrypt = klass_AESCrypt->as_instance_klass();
+  const TypeKlassPtr* aklass = TypeKlassPtr::make(instklass_AESCrypt);
+  const TypeOopPtr* xtype = aklass->as_instance_type();
+  Node* aescrypt_object = new(C) CheckCastPPNode(control(), embeddedCipherObj, xtype);
+  aescrypt_object = _gvn.transform(aescrypt_object);
+
+  // we need to get the start of the aescrypt_object's expanded key array
+  Node* k_start = get_key_start_from_aescrypt_object(aescrypt_object);
+  if (k_start == NULL) return false;
+
+  // similarly, get the start address of the r vector
+  Node* objRvec = load_field_from_object(cipherBlockChaining_object, "r", "[B", /*is_exact*/ false);
+  if (objRvec == NULL) return false;
+  Node* r_start = array_element_address(objRvec, intcon(0), T_BYTE);
+
+  // Call the stub, passing src_start, dest_start, k_start, r_start and src_len
+  make_runtime_call(RC_LEAF|RC_NO_FP,
+                    OptoRuntime::cipherBlockChaining_aescrypt_Type(),
+                    stubAddr, stubName, TypePtr::BOTTOM,
+                    src_start, dest_start, k_start, r_start, len);
+
+  // return is void so no result needs to be pushed
+
+  return true;
+}
+
+//------------------------------get_key_start_from_aescrypt_object-----------------------
+Node * LibraryCallKit::get_key_start_from_aescrypt_object(Node *aescrypt_object) {
+  Node* objAESCryptKey = load_field_from_object(aescrypt_object, "K", "[I", /*is_exact*/ false);
+  assert (objAESCryptKey != NULL, "wrong version of com.sun.crypto.provider.AESCrypt");
+  if (objAESCryptKey == NULL) return (Node *) NULL;
+
+  // now have the array, need to get the start address of the K array
+  Node* k_start = array_element_address(objAESCryptKey, intcon(0), T_INT);
+  return k_start;
+}
+
+//----------------------------inline_cipherBlockChaining_AESCrypt_predicate----------------------------
+// Return node representing slow path of predicate check.
+// the pseudo code we want to emulate with this predicate is:
+// for encryption:
+//    if (embeddedCipherObj instanceof AESCrypt) do_intrinsic, else do_javapath
+// for decryption:
+//    if ((embeddedCipherObj instanceof AESCrypt) && (cipher!=plain)) do_intrinsic, else do_javapath
+//    note cipher==plain is more conservative than the original java code but that's OK
+//
+Node* LibraryCallKit::inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting) {
+  // First, check receiver for NULL since it is virtual method.
+  Node* objCBC = argument(0);
+  objCBC = null_check(objCBC);
+
+  if (stopped()) return NULL; // Always NULL
+
+  // Load embeddedCipher field of CipherBlockChaining object.
+  Node* embeddedCipherObj = load_field_from_object(objCBC, "embeddedCipher", "Lcom/sun/crypto/provider/SymmetricCipher;", /*is_exact*/ false);
+
+  // get AESCrypt klass for instanceOf check
+  // AESCrypt might not be loaded yet if some other SymmetricCipher got us to this compile point
+  // will have same classloader as CipherBlockChaining object
+  const TypeInstPtr* tinst = _gvn.type(objCBC)->isa_instptr();
+  assert(tinst != NULL, "CBCobj is null");
+  assert(tinst->klass()->is_loaded(), "CBCobj is not loaded");
+
+  // we want to do an instanceof comparison against the AESCrypt class
+  ciKlass* klass_AESCrypt = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make("com/sun/crypto/provider/AESCrypt"));
+  if (!klass_AESCrypt->is_loaded()) {
+    // if AESCrypt is not even loaded, we never take the intrinsic fast path
+    Node* ctrl = control();
+    set_control(top()); // no regular fast path
+    return ctrl;
+  }
+  ciInstanceKlass* instklass_AESCrypt = klass_AESCrypt->as_instance_klass();
+
+  Node* instof = gen_instanceof(embeddedCipherObj, makecon(TypeKlassPtr::make(instklass_AESCrypt)));
+  Node* cmp_instof  = _gvn.transform(new (C) CmpINode(instof, intcon(1)));
+  Node* bool_instof  = _gvn.transform(new (C) BoolNode(cmp_instof, BoolTest::ne));
+
+  Node* instof_false = generate_guard(bool_instof, NULL, PROB_MIN);
+
+  // for encryption, we are done
+  if (!decrypting)
+    return instof_false;  // even if it is NULL
+
+  // for decryption, we need to add a further check to avoid
+  // taking the intrinsic path when cipher and plain are the same
+  // see the original java code for why.
+  RegionNode* region = new(C) RegionNode(3);
+  region->init_req(1, instof_false);
+  Node* src = argument(1);
+  Node* dest = argument(4);
+  Node* cmp_src_dest = _gvn.transform(new (C) CmpPNode(src, dest));
+  Node* bool_src_dest = _gvn.transform(new (C) BoolNode(cmp_src_dest, BoolTest::eq));
+  Node* src_dest_conjoint = generate_guard(bool_src_dest, NULL, PROB_MIN);
+  region->init_req(2, src_dest_conjoint);
+
+  record_for_igvn(region);
+  return _gvn.transform(region);
+}
--- a/src/share/vm/opto/locknode.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/locknode.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -165,7 +165,7 @@
   kill_dead_locals();
 
   // Null check; get casted pointer.
-  Node *obj = do_null_check(peek(), T_OBJECT);
+  Node* obj = null_check(peek());
   // Check for locking null object
   if (stopped()) return;
 
--- a/src/share/vm/opto/loopTransform.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/loopTransform.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -269,10 +269,10 @@
 bool IdealLoopTree::policy_peeling( PhaseIdealLoop *phase ) const {
   Node *test = ((IdealLoopTree*)this)->tail();
   int  body_size = ((IdealLoopTree*)this)->_body.size();
-  int  uniq      = phase->C->unique();
+  int  live_node_count = phase->C->live_nodes();
   // Peeling does loop cloning which can result in O(N^2) node construction
   if( body_size > 255 /* Prevent overflow for large body_size */
-      || (body_size * body_size + uniq > MaxNodeLimit) ) {
+      || (body_size * body_size + live_node_count > MaxNodeLimit) ) {
     return false;           // too large to safely clone
   }
   while( test != _head ) {      // Scan till run off top of loop
@@ -601,7 +601,7 @@
     return false;
   if (new_body_size > unroll_limit ||
       // Unrolling can result in a large amount of node construction
-      new_body_size >= MaxNodeLimit - phase->C->unique()) {
+      new_body_size >= MaxNodeLimit - (uint) phase->C->live_nodes()) {
     return false;
   }
 
@@ -613,6 +613,7 @@
       case Op_StrComp:
       case Op_StrEquals:
       case Op_StrIndexOf:
+      case Op_EncodeISOArray:
       case Op_AryEq: {
         return false;
       }
@@ -717,6 +718,7 @@
       case Op_StrComp:
       case Op_StrEquals:
       case Op_StrIndexOf:
+      case Op_EncodeISOArray:
       case Op_AryEq: {
         // Do not unroll a loop with String intrinsics code.
         // String intrinsics are large and have loops.
@@ -2268,7 +2270,7 @@
 
   // Skip next optimizations if running low on nodes. Note that
   // policy_unswitching and policy_maximally_unroll have this check.
-  uint nodes_left = MaxNodeLimit - phase->C->unique();
+  uint nodes_left = MaxNodeLimit - (uint) phase->C->live_nodes();
   if ((2 * _body.size()) > nodes_left) {
     return true;
   }
--- a/src/share/vm/opto/loopUnswitch.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/loopUnswitch.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -59,7 +59,7 @@
   if (!_head->is_Loop()) {
     return false;
   }
-  uint nodes_left = MaxNodeLimit - phase->C->unique();
+  uint nodes_left = MaxNodeLimit - phase->C->live_nodes();
   if (2 * _body.size() > nodes_left) {
     return false; // Too speculative if running low on nodes.
   }
--- a/src/share/vm/opto/loopopts.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/loopopts.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -729,7 +729,7 @@
   for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
     weight += region->fast_out(i)->outcnt();
   }
-  int nodes_left = MaxNodeLimit - C->unique();
+  int nodes_left = MaxNodeLimit - C->live_nodes();
   if (weight * 8 > nodes_left) {
 #ifndef PRODUCT
     if (PrintOpto)
--- a/src/share/vm/opto/machnode.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/machnode.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -506,7 +506,7 @@
 #ifndef PRODUCT
 void MachNullCheckNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
   int reg = ra_->get_reg_first(in(1)->in(_vidx));
-  tty->print("%s %s", Name(), Matcher::regName[reg]);
+  st->print("%s %s", Name(), Matcher::regName[reg]);
 }
 #endif
 
--- a/src/share/vm/opto/macro.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/macro.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -361,14 +361,21 @@
       }
       // Otherwise skip it (the call updated 'mem' value).
     } else if (mem->Opcode() == Op_SCMemProj) {
-      assert(mem->in(0)->is_LoadStore(), "sanity");
-      const TypePtr* atype = mem->in(0)->in(MemNode::Address)->bottom_type()->is_ptr();
+      mem = mem->in(0);
+      Node* adr = NULL;
+      if (mem->is_LoadStore()) {
+        adr = mem->in(MemNode::Address);
+      } else {
+        assert(mem->Opcode() == Op_EncodeISOArray, "sanity");
+        adr = mem->in(3); // Destination array
+      }
+      const TypePtr* atype = adr->bottom_type()->is_ptr();
       int adr_idx = Compile::current()->get_alias_index(atype);
       if (adr_idx == alias_idx) {
         assert(false, "Object is not scalar replaceable if a LoadStore node access its field");
         return NULL;
       }
-      mem = mem->in(0)->in(MemNode::Memory);
+      mem = mem->in(MemNode::Memory);
     } else {
       return mem;
     }
@@ -445,7 +452,7 @@
         }
         values.at_put(j, val);
       } else if (val->Opcode() == Op_SCMemProj) {
-        assert(val->in(0)->is_LoadStore(), "sanity");
+        assert(val->in(0)->is_LoadStore() || val->in(0)->Opcode() == Op_EncodeISOArray, "sanity");
         assert(false, "Object is not scalar replaceable if a LoadStore node access its field");
         return NULL;
       } else {
@@ -2262,7 +2269,7 @@
   Node *slow_ctrl = _fallthroughproj->clone();
   transform_later(slow_ctrl);
   _igvn.hash_delete(_fallthroughproj);
-  _fallthroughproj->disconnect_inputs(NULL);
+  _fallthroughproj->disconnect_inputs(NULL, C);
   region->init_req(1, slow_ctrl);
   // region inputs are now complete
   transform_later(region);
@@ -2327,7 +2334,7 @@
   Node *slow_ctrl = _fallthroughproj->clone();
   transform_later(slow_ctrl);
   _igvn.hash_delete(_fallthroughproj);
-  _fallthroughproj->disconnect_inputs(NULL);
+  _fallthroughproj->disconnect_inputs(NULL, C);
   region->init_req(1, slow_ctrl);
   // region inputs are now complete
   transform_later(region);
--- a/src/share/vm/opto/matcher.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/matcher.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -342,6 +342,7 @@
   // Reset node counter so MachNodes start with _idx at 0
   int nodes = C->unique(); // save value
   C->set_unique(0);
+  C->reset_dead_node_list();
 
   // Recursively match trees from old space into new space.
   // Correct leaves of new-space Nodes; they point to old-space.
@@ -918,6 +919,7 @@
     case Op_AryEq:
     case Op_MemBarVolatile:
     case Op_MemBarCPUOrder: // %%% these ideals should have narrower adr_type?
+    case Op_EncodeISOArray:
       nidx = Compile::AliasIdxTop;
       nat = NULL;
       break;
@@ -1981,6 +1983,7 @@
       case Op_StrEquals:
       case Op_StrIndexOf:
       case Op_AryEq:
+      case Op_EncodeISOArray:
         set_shared(n); // Force result into register (it will be anyways)
         break;
       case Op_ConP: {  // Convert pointers above the centerline to NUL
@@ -2182,6 +2185,13 @@
         n->del_req(4);
         break;
       }
+      case Op_EncodeISOArray: {
+        // Restructure into a binary tree for Matching.
+        Node* pair = new (C) BinaryNode(n->in(3), n->in(4));
+        n->set_req(3, pair);
+        n->del_req(4);
+        break;
+      }
       default:
         break;
       }
--- a/src/share/vm/opto/memnode.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/memnode.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -2725,10 +2725,8 @@
     zend  = phase->transform( new(C) URShiftXNode(zend,  shift) );
   }
 
+  // Bulk clear double-words
   Node* zsize = phase->transform( new(C) SubXNode(zend, zbase) );
-  Node* zinit = phase->zerocon((unit == BytesPerLong) ? T_LONG : T_INT);
-
-  // Bulk clear double-words
   Node* adr = phase->transform( new(C) AddPNode(dest, dest, start_offset) );
   mem = new (C) ClearArrayNode(ctl, mem, zsize, adr);
   return phase->transform(mem);
@@ -2799,6 +2797,26 @@
 }
 
 //=============================================================================
+//------------------------------match_edge-------------------------------------
+// Do not match memory edge
+uint EncodeISOArrayNode::match_edge(uint idx) const {
+  return idx == 2 || idx == 3; // EncodeISOArray src (Binary dst len)
+}
+
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node.  Strip out
+// control copies
+Node *EncodeISOArrayNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  return remove_dead_region(phase, can_reshape) ? this : NULL;
+}
+
+//------------------------------Value------------------------------------------
+const Type *EncodeISOArrayNode::Value(PhaseTransform *phase) const {
+  if (in(0) && phase->type(in(0)) == Type::TOP) return Type::TOP;
+  return bottom_type();
+}
+
+//=============================================================================
 MemBarNode::MemBarNode(Compile* C, int alias_idx, Node* precedent)
   : MultiNode(TypeFunc::Parms + (precedent == NULL? 0: 1)),
     _adr_type(C->get_adr_type(alias_idx))
--- a/src/share/vm/opto/memnode.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/memnode.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -888,6 +888,22 @@
   virtual const Type* bottom_type() const { return TypeInt::BOOL; }
 };
 
+
+//------------------------------EncodeISOArray--------------------------------
+// encode char[] to byte[] in ISO_8859_1
+class EncodeISOArrayNode: public Node {
+public:
+  EncodeISOArrayNode(Node *control, Node* arymem, Node* s1, Node* s2, Node* c): Node(control, arymem, s1, s2, c) {};
+  virtual int Opcode() const;
+  virtual bool depends_only_on_test() const { return false; }
+  virtual const Type* bottom_type() const { return TypeInt::INT; }
+  virtual const TypePtr* adr_type() const { return TypePtr::BOTTOM; }
+  virtual uint match_edge(uint idx) const;
+  virtual uint ideal_reg() const { return Op_RegI; }
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *Value(PhaseTransform *phase) const;
+};
+
 //------------------------------MemBar-----------------------------------------
 // There are different flavors of Memory Barriers to match the Java Memory
 // Model.  Monitor-enter and volatile-load act as Aquires: no following ref
--- a/src/share/vm/opto/mulnode.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/mulnode.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -479,24 +479,27 @@
     return new (phase->C) AndINode(load,phase->intcon(mask&0xFFFF));
 
   // Masking bits off of a Short?  Loading a Character does some masking
-  if (lop == Op_LoadS && (mask & 0xFFFF0000) == 0 ) {
-    Node *ldus = new (phase->C) LoadUSNode(load->in(MemNode::Control),
-                                              load->in(MemNode::Memory),
-                                              load->in(MemNode::Address),
-                                              load->adr_type());
-    ldus = phase->transform(ldus);
-    return new (phase->C) AndINode(ldus, phase->intcon(mask & 0xFFFF));
-  }
+  if (can_reshape &&
+      load->outcnt() == 1 && load->unique_out() == this) {
+    if (lop == Op_LoadS && (mask & 0xFFFF0000) == 0 ) {
+      Node *ldus = new (phase->C) LoadUSNode(load->in(MemNode::Control),
+                                             load->in(MemNode::Memory),
+                                             load->in(MemNode::Address),
+                                             load->adr_type());
+      ldus = phase->transform(ldus);
+      return new (phase->C) AndINode(ldus, phase->intcon(mask & 0xFFFF));
+    }
 
-  // Masking sign bits off of a Byte?  Do an unsigned byte load plus
-  // an and.
-  if (lop == Op_LoadB && (mask & 0xFFFFFF00) == 0) {
-    Node* ldub = new (phase->C) LoadUBNode(load->in(MemNode::Control),
-                                              load->in(MemNode::Memory),
-                                              load->in(MemNode::Address),
-                                              load->adr_type());
-    ldub = phase->transform(ldub);
-    return new (phase->C) AndINode(ldub, phase->intcon(mask));
+    // Masking sign bits off of a Byte?  Do an unsigned byte load plus
+    // an and.
+    if (lop == Op_LoadB && (mask & 0xFFFFFF00) == 0) {
+      Node* ldub = new (phase->C) LoadUBNode(load->in(MemNode::Control),
+                                             load->in(MemNode::Memory),
+                                             load->in(MemNode::Address),
+                                             load->adr_type());
+      ldub = phase->transform(ldub);
+      return new (phase->C) AndINode(ldub, phase->intcon(mask));
+    }
   }
 
   // Masking off sign bits?  Dont make them!
@@ -923,7 +926,9 @@
       set_req(2, phase->intcon(0));
       return this;
     }
-    else if( ld->Opcode() == Op_LoadUS )
+    else if( can_reshape &&
+             ld->Opcode() == Op_LoadUS &&
+             ld->outcnt() == 1 && ld->unique_out() == shl)
       // Replace zero-extension-load with sign-extension-load
       return new (phase->C) LoadSNode( ld->in(MemNode::Control),
                                 ld->in(MemNode::Memory),
--- a/src/share/vm/opto/node.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/node.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -57,7 +57,7 @@
   int new_debug_idx = old_debug_idx+1;
   if (new_debug_idx > 0) {
     // Arrange that the lowest five decimal digits of _debug_idx
-    // will repeat thos of _idx.  In case this is somehow pathological,
+    // will repeat those of _idx. In case this is somehow pathological,
     // we continue to assign negative numbers (!) consecutively.
     const int mod = 100000;
     int bump = (int)(_idx - new_debug_idx) % mod;
@@ -67,7 +67,7 @@
   }
   Compile::set_debug_idx(new_debug_idx);
   set_debug_idx( new_debug_idx );
-  assert(Compile::current()->unique() < (uint)MaxNodeLimit, "Node limit exceeded");
+  assert(Compile::current()->unique() < (UINT_MAX - 1), "Node limit exceeded UINT_MAX");
   if (BreakAtNode != 0 && (_debug_idx == BreakAtNode || (int)_idx == BreakAtNode)) {
     tty->print_cr("BreakAtNode: _idx=%d _debug_idx=%d", _idx, _debug_idx);
     BREAKPOINT;
@@ -802,7 +802,7 @@
 //-------------------------disconnect_inputs-----------------------------------
 // NULL out all inputs to eliminate incoming Def-Use edges.
 // Return the number of edges between 'n' and 'this'
-int Node::disconnect_inputs(Node *n) {
+int Node::disconnect_inputs(Node *n, Compile* C) {
   int edges_to_n = 0;
 
   uint cnt = req();
@@ -824,6 +824,9 @@
 
   // Node::destruct requires all out edges be deleted first
   // debug_only(destruct();)   // no reuse benefit expected
+  if (edges_to_n == 0) {
+    C->record_dead_node(_idx);
+  }
   return edges_to_n;
 }
 
@@ -1473,35 +1476,35 @@
 }
 
 #ifdef ASSERT
-static void dump_orig(Node* orig) {
+static void dump_orig(Node* orig, outputStream *st) {
   Compile* C = Compile::current();
-  if (NotANode(orig))  orig = NULL;
-  if (orig != NULL && !C->node_arena()->contains(orig))  orig = NULL;
-  if (orig == NULL)  return;
-  tty->print(" !orig=");
+  if (NotANode(orig)) orig = NULL;
+  if (orig != NULL && !C->node_arena()->contains(orig)) orig = NULL;
+  if (orig == NULL) return;
+  st->print(" !orig=");
   Node* fast = orig->debug_orig(); // tortoise & hare algorithm to detect loops
-  if (NotANode(fast))  fast = NULL;
+  if (NotANode(fast)) fast = NULL;
   while (orig != NULL) {
     bool discon = is_disconnected(orig);  // if discon, print [123] else 123
-    if (discon)  tty->print("[");
+    if (discon) st->print("[");
     if (!Compile::current()->node_arena()->contains(orig))
-      tty->print("o");
-    tty->print("%d", orig->_idx);
-    if (discon)  tty->print("]");
+      st->print("o");
+    st->print("%d", orig->_idx);
+    if (discon) st->print("]");
     orig = orig->debug_orig();
-    if (NotANode(orig))  orig = NULL;
-    if (orig != NULL && !C->node_arena()->contains(orig))  orig = NULL;
-    if (orig != NULL)  tty->print(",");
+    if (NotANode(orig)) orig = NULL;
+    if (orig != NULL && !C->node_arena()->contains(orig)) orig = NULL;
+    if (orig != NULL) st->print(",");
     if (fast != NULL) {
       // Step fast twice for each single step of orig:
       fast = fast->debug_orig();
-      if (NotANode(fast))  fast = NULL;
+      if (NotANode(fast)) fast = NULL;
       if (fast != NULL && fast != orig) {
         fast = fast->debug_orig();
-        if (NotANode(fast))  fast = NULL;
+        if (NotANode(fast)) fast = NULL;
       }
       if (fast == orig) {
-        tty->print("...");
+        st->print("...");
         break;
       }
     }
@@ -1528,35 +1531,34 @@
 
 //------------------------------dump------------------------------------------
 // Dump a Node
-void Node::dump() const {
+void Node::dump(const char* suffix, outputStream *st) const {
   Compile* C = Compile::current();
   bool is_new = C->node_arena()->contains(this);
   _in_dump_cnt++;
-  tty->print("%c%d\t%s\t=== ",
-             is_new ? ' ' : 'o', _idx, Name());
+  st->print("%c%d\t%s\t=== ", is_new ? ' ' : 'o', _idx, Name());
 
   // Dump the required and precedence inputs
-  dump_req();
-  dump_prec();
+  dump_req(st);
+  dump_prec(st);
   // Dump the outputs
-  dump_out();
+  dump_out(st);
 
   if (is_disconnected(this)) {
 #ifdef ASSERT
-    tty->print("  [%d]",debug_idx());
-    dump_orig(debug_orig());
+    st->print("  [%d]",debug_idx());
+    dump_orig(debug_orig(), st);
 #endif
-    tty->cr();
+    st->cr();
     _in_dump_cnt--;
     return;                     // don't process dead nodes
   }
 
   // Dump node-specific info
-  dump_spec(tty);
+  dump_spec(st);
 #ifdef ASSERT
   // Dump the non-reset _debug_idx
-  if( Verbose && WizardMode ) {
-    tty->print("  [%d]",debug_idx());
+  if (Verbose && WizardMode) {
+    st->print("  [%d]",debug_idx());
   }
 #endif
 
@@ -1566,88 +1568,88 @@
     const TypeInstPtr  *toop = t->isa_instptr();
     const TypeKlassPtr *tkls = t->isa_klassptr();
     ciKlass*           klass = toop ? toop->klass() : (tkls ? tkls->klass() : NULL );
-    if( klass && klass->is_loaded() && klass->is_interface() ) {
-      tty->print("  Interface:");
-    } else if( toop ) {
-      tty->print("  Oop:");
-    } else if( tkls ) {
-      tty->print("  Klass:");
+    if (klass && klass->is_loaded() && klass->is_interface()) {
+      st->print("  Interface:");
+    } else if (toop) {
+      st->print("  Oop:");
+    } else if (tkls) {
+      st->print("  Klass:");
     }
-    t->dump();
-  } else if( t == Type::MEMORY ) {
-    tty->print("  Memory:");
-    MemNode::dump_adr_type(this, adr_type(), tty);
-  } else if( Verbose || WizardMode ) {
-    tty->print("  Type:");
-    if( t ) {
-      t->dump();
+    t->dump_on(st);
+  } else if (t == Type::MEMORY) {
+    st->print("  Memory:");
+    MemNode::dump_adr_type(this, adr_type(), st);
+  } else if (Verbose || WizardMode) {
+    st->print("  Type:");
+    if (t) {
+      t->dump_on(st);
     } else {
-      tty->print("no type");
+      st->print("no type");
     }
   } else if (t->isa_vect() && this->is_MachSpillCopy()) {
     // Dump MachSpillcopy vector type.
-    t->dump();
+    t->dump_on(st);
   }
   if (is_new) {
-    debug_only(dump_orig(debug_orig()));
+    debug_only(dump_orig(debug_orig(), st));
     Node_Notes* nn = C->node_notes_at(_idx);
     if (nn != NULL && !nn->is_clear()) {
       if (nn->jvms() != NULL) {
-        tty->print(" !jvms:");
-        nn->jvms()->dump_spec(tty);
+        st->print(" !jvms:");
+        nn->jvms()->dump_spec(st);
       }
     }
   }
-  tty->cr();
+  if (suffix) st->print(suffix);
   _in_dump_cnt--;
 }
 
 //------------------------------dump_req--------------------------------------
-void Node::dump_req() const {
+void Node::dump_req(outputStream *st) const {
   // Dump the required input edges
   for (uint i = 0; i < req(); i++) {    // For all required inputs
     Node* d = in(i);
     if (d == NULL) {
-      tty->print("_ ");
+      st->print("_ ");
     } else if (NotANode(d)) {
-      tty->print("NotANode ");  // uninitialized, sentinel, garbage, etc.
+      st->print("NotANode ");  // uninitialized, sentinel, garbage, etc.
     } else {
-      tty->print("%c%d ", Compile::current()->node_arena()->contains(d) ? ' ' : 'o', d->_idx);
+      st->print("%c%d ", Compile::current()->node_arena()->contains(d) ? ' ' : 'o', d->_idx);
     }
   }
 }
 
 
 //------------------------------dump_prec-------------------------------------
-void Node::dump_prec() const {
+void Node::dump_prec(outputStream *st) const {
   // Dump the precedence edges
   int any_prec = 0;
   for (uint i = req(); i < len(); i++) {       // For all precedence inputs
     Node* p = in(i);
     if (p != NULL) {
-      if( !any_prec++ ) tty->print(" |");
-      if (NotANode(p)) { tty->print("NotANode "); continue; }
-      tty->print("%c%d ", Compile::current()->node_arena()->contains(in(i)) ? ' ' : 'o', in(i)->_idx);
+      if (!any_prec++) st->print(" |");
+      if (NotANode(p)) { st->print("NotANode "); continue; }
+      st->print("%c%d ", Compile::current()->node_arena()->contains(in(i)) ? ' ' : 'o', in(i)->_idx);
     }
   }
 }
 
 //------------------------------dump_out--------------------------------------
-void Node::dump_out() const {
+void Node::dump_out(outputStream *st) const {
   // Delimit the output edges
-  tty->print(" [[");
+  st->print(" [[");
   // Dump the output edges
   for (uint i = 0; i < _outcnt; i++) {    // For all outputs
     Node* u = _out[i];
     if (u == NULL) {
-      tty->print("_ ");
+      st->print("_ ");
     } else if (NotANode(u)) {
-      tty->print("NotANode ");
+      st->print("NotANode ");
     } else {
-      tty->print("%c%d ", Compile::current()->node_arena()->contains(u) ? ' ' : 'o', u->_idx);
+      st->print("%c%d ", Compile::current()->node_arena()->contains(u) ? ' ' : 'o', u->_idx);
     }
   }
-  tty->print("]] ");
+  st->print("]] ");
 }
 
 //------------------------------dump_nodes-------------------------------------
@@ -1836,15 +1838,16 @@
   return idx;                   // True for other than index 0 (control)
 }
 
+static RegMask _not_used_at_all;
 // Register classes are defined for specific machines
 const RegMask &Node::out_RegMask() const {
   ShouldNotCallThis();
-  return *(new RegMask());
+  return _not_used_at_all;
 }
 
 const RegMask &Node::in_RegMask(uint) const {
   ShouldNotCallThis();
-  return *(new RegMask());
+  return _not_used_at_all;
 }
 
 //=============================================================================
--- a/src/share/vm/opto/node.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/node.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -410,7 +410,7 @@
   int replace_edge(Node* old, Node* neww);
   // NULL out all inputs to eliminate incoming Def-Use edges.
   // Return the number of edges between 'n' and 'this'
-  int  disconnect_inputs(Node *n);
+  int  disconnect_inputs(Node *n, Compile *c);
 
   // Quickly, return true if and only if I am Compile::current()->top().
   bool is_top() const {
@@ -458,9 +458,9 @@
   void replace_by(Node* new_node);
   // Globally replace this node by a given new node, updating all uses
   // and cutting input edges of old node.
-  void subsume_by(Node* new_node) {
+  void subsume_by(Node* new_node, Compile* c) {
     replace_by(new_node);
-    disconnect_inputs(NULL);
+    disconnect_inputs(NULL, c);
   }
   void set_req_X( uint i, Node *n, PhaseIterGVN *igvn );
   // Find the one non-null required input.  RegionNode only
@@ -994,12 +994,13 @@
 #ifndef PRODUCT
   Node* find(int idx) const;         // Search the graph for the given idx.
   Node* find_ctrl(int idx) const;    // Search control ancestors for the given idx.
-  void dump() const;                 // Print this node,
+  void dump() const { dump("\n"); }  // Print this node.
+  void dump(const char* suffix, outputStream *st = tty) const;// Print this node.
   void dump(int depth) const;        // Print this node, recursively to depth d
   void dump_ctrl(int depth) const;   // Print control nodes, to depth d
-  virtual void dump_req() const;     // Print required-edge info
-  virtual void dump_prec() const;    // Print precedence-edge info
-  virtual void dump_out() const;     // Print the output edge info
+  virtual void dump_req(outputStream *st = tty) const;     // Print required-edge info
+  virtual void dump_prec(outputStream *st = tty) const;    // Print precedence-edge info
+  virtual void dump_out(outputStream *st = tty) const;     // Print the output edge info
   virtual void dump_spec(outputStream *st) const {}; // Print per-node info
   void verify_edges(Unique_Node_List &visited); // Verify bi-directional edges
   void verify() const;               // Check Def-Use info for my subgraph
--- a/src/share/vm/opto/optoreg.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/optoreg.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -77,7 +77,7 @@
   // (We would like to have an operator+ for RegName, but it is not
   // a class, so this would be illegal in C++.)
 
-  static void dump( int );
+  static void dump(int, outputStream *st = tty);
 
   // Get the stack slot number of an OptoReg::Name
   static unsigned int reg2stack( OptoReg::Name r) {
--- a/src/share/vm/opto/output.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/output.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -513,7 +513,7 @@
           }
           adjust_block_start += diff;
           b->_nodes.map(idx, replacement);
-          mach->subsume_by(replacement);
+          mach->subsume_by(replacement, C);
           mach = replacement;
           progress = true;
 
@@ -1425,7 +1425,7 @@
               jmp_rule[i]   = mach->rule();
 #endif
               b->_nodes.map(j, replacement);
-              mach->subsume_by(replacement);
+              mach->subsume_by(replacement, C);
               n    = replacement;
               mach = replacement;
             }
--- a/src/share/vm/opto/parse.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/parse.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -70,7 +70,7 @@
   InlineTree *build_inline_tree_for_callee(ciMethod* callee_method,
                                            JVMState* caller_jvms,
                                            int caller_bci);
-  const char* try_to_inline(ciMethod* callee_method, ciMethod* caller_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result);
+  const char* try_to_inline(ciMethod* callee_method, ciMethod* caller_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result, bool& should_delay);
   const char* should_inline(ciMethod* callee_method, ciMethod* caller_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result) const;
   const char* should_not_inline(ciMethod* callee_method, ciMethod* caller_method, WarmCallInfo* wci_result) const;
   void        print_inlining(ciMethod *callee_method, int caller_bci, const char *failure_msg) const;
@@ -107,7 +107,7 @@
   // and may be accessed by find_subtree_from_root.
   // The call_method is the dest_method for a special or static invocation.
   // The call_method is an optimized virtual method candidate otherwise.
-  WarmCallInfo* ok_to_inline(ciMethod *call_method, JVMState* caller_jvms, ciCallProfile& profile, WarmCallInfo* wci);
+  WarmCallInfo* ok_to_inline(ciMethod *call_method, JVMState* caller_jvms, ciCallProfile& profile, WarmCallInfo* wci, bool& should_delay);
 
   // Information about inlined method
   JVMState*   caller_jvms()       const { return _caller_jvms; }
@@ -469,10 +469,6 @@
   // Helper function to uncommon-trap or bailout for non-compilable call-sites
   bool can_not_compile_call_site(ciMethod *dest_method, ciInstanceKlass *klass);
 
-  // Helper function to identify inlining potential at call-site
-  ciMethod* optimize_inlining(ciMethod* caller, int bci, ciInstanceKlass* klass,
-                              ciMethod *dest_method, const TypeOopPtr* receiver_type);
-
   // Helper function to setup for type-profile based inlining
   bool prepare_type_profile_inline(ciInstanceKlass* prof_klass, ciMethod* prof_method);
 
--- a/src/share/vm/opto/parse1.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/parse1.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -601,8 +601,8 @@
   set_map(entry_map);
   do_exits();
 
-  if (log)  log->done("parse nodes='%d' memory='%d'",
-                      C->unique(), C->node_arena()->used());
+  if (log)  log->done("parse nodes='%d' live='%d' memory='%d'",
+                      C->unique(), C->live_nodes(), C->node_arena()->used());
 }
 
 //---------------------------do_all_blocks-------------------------------------
@@ -1008,7 +1008,7 @@
   // If this is an inlined method, we may have to do a receiver null check.
   if (_caller->has_method() && is_normal_parse() && !method()->is_static()) {
     GraphKit kit(_caller);
-    kit.null_check_receiver(method());
+    kit.null_check_receiver_before_call(method());
     _caller = kit.transfer_exceptions_into_jvms();
     if (kit.stopped()) {
       _exits.add_exception_states_from(_caller);
@@ -1398,13 +1398,14 @@
 #ifdef ASSERT
     int pre_bc_sp = sp();
     int inputs, depth;
-    bool have_se = !stopped() && compute_stack_effects(inputs, depth, /*for_parse*/ true);
+    bool have_se = !stopped() && compute_stack_effects(inputs, depth);
     assert(!have_se || pre_bc_sp >= inputs, err_msg_res("have enough stack to execute this BC: pre_bc_sp=%d, inputs=%d", pre_bc_sp, inputs));
 #endif //ASSERT
 
     do_one_bytecode();
 
-    assert(!have_se || stopped() || failing() || (sp() - pre_bc_sp) == depth, "correct depth prediction");
+    assert(!have_se || stopped() || failing() || (sp() - pre_bc_sp) == depth,
+           err_msg_res("incorrect depth prediction: sp=%d, pre_bc_sp=%d, depth=%d", sp(), pre_bc_sp, depth));
 
     do_exceptions();
 
--- a/src/share/vm/opto/parse2.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/parse2.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -48,7 +48,7 @@
   const Type* elem = Type::TOP;
   Node* adr = array_addressing(elem_type, 0, &elem);
   if (stopped())  return;     // guaranteed null or range check
-  _sp -= 2;                   // Pop array and index
+  dec_sp(2);                  // Pop array and index
   const TypeAryPtr* adr_type = TypeAryPtr::get_array_body_type(elem_type);
   Node* ld = make_load(control(), adr, elem, elem_type, adr_type);
   push(ld);
@@ -60,7 +60,7 @@
   Node* adr = array_addressing(elem_type, 1);
   if (stopped())  return;     // guaranteed null or range check
   Node* val = pop();
-  _sp -= 2;                   // Pop array and index
+  dec_sp(2);                  // Pop array and index
   const TypeAryPtr* adr_type = TypeAryPtr::get_array_body_type(elem_type);
   store_to_memory(control(), adr, val, elem_type, adr_type);
 }
@@ -73,7 +73,7 @@
   Node *ary   = peek(1+vals);   // in case of exception
 
   // Null check the array base, with correct stack contents
-  ary = do_null_check(ary, T_ARRAY);
+  ary = null_check(ary, T_ARRAY);
   // Compile-time detect of null-exception?
   if (stopped())  return top();
 
@@ -681,7 +681,7 @@
 
 void Parse::do_irem() {
   // Must keep both values on the expression-stack during null-check
-  do_null_check(peek(), T_INT);
+  zero_check_int(peek());
   // Compile-time detect of null-exception?
   if (stopped())  return;
 
@@ -958,7 +958,7 @@
   DEBUG_ONLY(sync_jvms());   // argument(n) requires a synced jvms
   assert(argument(0) != NULL, "must exist");
   assert(bc_depth == 1 || argument(1) != NULL, "two must exist");
-  _sp += bc_depth;
+  inc_sp(bc_depth);
   return bc_depth;
 }
 
@@ -1581,8 +1581,8 @@
     set_pair_local( iter().get_index(), dstore_rounding(pop_pair()) );
     break;
 
-  case Bytecodes::_pop:  _sp -= 1;   break;
-  case Bytecodes::_pop2: _sp -= 2;   break;
+  case Bytecodes::_pop:  dec_sp(1);   break;
+  case Bytecodes::_pop2: dec_sp(2);   break;
   case Bytecodes::_swap:
     a = pop();
     b = pop();
@@ -1650,7 +1650,7 @@
 
   case Bytecodes::_arraylength: {
     // Must do null-check with value on expression stack
-    Node *ary = do_null_check(peek(), T_ARRAY);
+    Node *ary = null_check(peek(), T_ARRAY);
     // Compile-time detect of null-exception?
     if (stopped())  return;
     a = pop();
@@ -1667,15 +1667,15 @@
   case Bytecodes::_laload: {
     a = array_addressing(T_LONG, 0);
     if (stopped())  return;     // guaranteed null or range check
-    _sp -= 2;                   // Pop array and index
-    push_pair( make_load(control(), a, TypeLong::LONG, T_LONG, TypeAryPtr::LONGS));
+    dec_sp(2);                  // Pop array and index
+    push_pair(make_load(control(), a, TypeLong::LONG, T_LONG, TypeAryPtr::LONGS));
     break;
   }
   case Bytecodes::_daload: {
     a = array_addressing(T_DOUBLE, 0);
     if (stopped())  return;     // guaranteed null or range check
-    _sp -= 2;                   // Pop array and index
-    push_pair( make_load(control(), a, Type::DOUBLE, T_DOUBLE, TypeAryPtr::DOUBLES));
+    dec_sp(2);                  // Pop array and index
+    push_pair(make_load(control(), a, Type::DOUBLE, T_DOUBLE, TypeAryPtr::DOUBLES));
     break;
   }
   case Bytecodes::_bastore: array_store(T_BYTE);  break;
@@ -1699,7 +1699,7 @@
     a = array_addressing(T_LONG, 2);
     if (stopped())  return;     // guaranteed null or range check
     c = pop_pair();
-    _sp -= 2;                   // Pop array and index
+    dec_sp(2);                  // Pop array and index
     store_to_memory(control(), a, c, T_LONG, TypeAryPtr::LONGS);
     break;
   }
@@ -1707,7 +1707,7 @@
     a = array_addressing(T_DOUBLE, 2);
     if (stopped())  return;     // guaranteed null or range check
     c = pop_pair();
-    _sp -= 2;                   // Pop array and index
+    dec_sp(2);                  // Pop array and index
     c = dstore_rounding(c);
     store_to_memory(control(), a, c, T_DOUBLE, TypeAryPtr::DOUBLES);
     break;
@@ -1733,7 +1733,7 @@
     break;
   case Bytecodes::_idiv:
     // Must keep both values on the expression-stack during null-check
-    do_null_check(peek(), T_INT);
+    zero_check_int(peek());
     // Compile-time detect of null-exception?
     if (stopped())  return;
     b = pop();
@@ -2041,7 +2041,7 @@
   case Bytecodes::_lrem:
     // Must keep both values on the expression-stack during null-check
     assert(peek(0) == top(), "long word order");
-    do_null_check(peek(1), T_LONG);
+    zero_check_long(peek(1));
     // Compile-time detect of null-exception?
     if (stopped())  return;
     b = pop_pair();
@@ -2053,7 +2053,7 @@
   case Bytecodes::_ldiv:
     // Must keep both values on the expression-stack during null-check
     assert(peek(0) == top(), "long word order");
-    do_null_check(peek(1), T_LONG);
+    zero_check_long(peek(1));
     // Compile-time detect of null-exception?
     if (stopped())  return;
     b = pop_pair();
@@ -2175,7 +2175,7 @@
 
   case Bytecodes::_athrow:
     // null exception oop throws NULL pointer exception
-    do_null_check(peek(), T_OBJECT);
+    null_check(peek());
     if (stopped())  return;
     // Hook the thrown exception directly to subsequent handlers.
     if (BailoutToInterpreterForThrows) {
--- a/src/share/vm/opto/parse3.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/parse3.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -116,7 +116,7 @@
   Node* obj;
   if (is_field) {
     int obj_depth = is_get ? 0 : field->type()->size();
-    obj = do_null_check(peek(obj_depth), T_OBJECT);
+    obj = null_check(peek(obj_depth));
     // Compile-time detect of null-exception?
     if (stopped())  return;
 
@@ -126,11 +126,11 @@
 #endif
 
     if (is_get) {
-      --_sp;  // pop receiver before getting
+      (void) pop();  // pop receiver before getting
       do_get_xxx(obj, field, is_field);
     } else {
       do_put_xxx(obj, field, is_field);
-      --_sp;  // pop receiver after putting
+      (void) pop();  // pop receiver after putting
     }
   } else {
     const TypeInstPtr* tip = TypeInstPtr::make(field_holder->java_mirror());
@@ -230,7 +230,7 @@
     }
     // If there is going to be a trap, put it at the next bytecode:
     set_bci(iter().next_bci());
-    do_null_assert(peek(), T_OBJECT);
+    null_assert(peek());
     set_bci(iter().cur_bci()); // put it back
   }
 
@@ -463,7 +463,7 @@
     // Note: the reexecute bit will be set in GraphKit::add_safepoint_edges()
     // when AllocateArray node for newarray is created.
     { PreserveReexecuteState preexecs(this);
-      _sp += ndimensions;
+      inc_sp(ndimensions);
       // Pass 0 as nargs since uncommon trap code does not need to restore stack.
       obj = expand_multianewarray(array_klass, &length[0], ndimensions, 0);
     } //original reexecute and sp are set back here
@@ -492,7 +492,7 @@
     // Create a java array for dimension sizes
     Node* dims = NULL;
     { PreserveReexecuteState preexecs(this);
-      _sp += ndimensions;
+      inc_sp(ndimensions);
       Node* dims_array_klass = makecon(TypeKlassPtr::make(ciArrayKlass::make(ciType::make(T_INT))));
       dims = new_array(dims_array_klass, intcon(ndimensions), 0);
 
@@ -509,6 +509,7 @@
                           makecon(TypeKlassPtr::make(array_klass)),
                           dims);
   }
+  make_slow_call_ex(c, env()->Throwable_klass(), false);
 
   Node* res = _gvn.transform(new (C) ProjNode(c, TypeFunc::Parms));
 
--- a/src/share/vm/opto/parseHelper.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/parseHelper.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -84,7 +84,7 @@
                        C->log()->identify(tp->klass()));
       }
     }
-    do_null_assert(obj, T_OBJECT);
+    null_assert(obj);
     assert( stopped() || _gvn.type(peek())->higher_equal(TypePtr::NULL_PTR), "what's left behind is null" );
     if (!stopped()) {
       profile_null_checkcast();
@@ -116,7 +116,7 @@
       C->log()->elem("assert_null reason='instanceof' klass='%d'",
                      C->log()->identify(klass));
     }
-    do_null_assert(peek(), T_OBJECT);
+    null_assert(peek());
     assert( stopped() || _gvn.type(peek())->higher_equal(TypePtr::NULL_PTR), "what's left behind is null" );
     if (!stopped()) {
       // The object is now known to be null.
@@ -139,10 +139,10 @@
 // pull array from stack and check that the store is valid
 void Parse::array_store_check() {
 
-  // Shorthand access to array store elements
-  Node *obj = stack(_sp-1);
-  Node *idx = stack(_sp-2);
-  Node *ary = stack(_sp-3);
+  // Shorthand access to array store elements without popping them.
+  Node *obj = peek(0);
+  Node *idx = peek(1);
+  Node *ary = peek(2);
 
   if (_gvn.type(obj) == TypePtr::NULL_PTR) {
     // There's never a type check on null values.
--- a/src/share/vm/opto/phaseX.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/phaseX.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -75,6 +75,13 @@
   // nh->_sentinel must be in the current node space
 }
 
+void NodeHash::replace_with(NodeHash *nh) {
+  debug_only(_table = (Node**)badAddress);   // interact correctly w/ operator=
+  // just copy in all the fields
+  *this = *nh;
+  // nh->_sentinel must be in the current node space
+}
+
 //------------------------------hash_find--------------------------------------
 // Find in hash table
 Node *NodeHash::hash_find( const Node *n ) {
@@ -383,6 +390,8 @@
 
   // Identify nodes that are reachable from below, useful.
   C->identify_useful_nodes(_useful);
+  // Update dead node list
+  C->update_dead_node_list(_useful);
 
   // Remove all useless nodes from PhaseValues' recorded types
   // Must be done before disconnecting nodes to preserve hash-table-invariant
@@ -1190,7 +1199,7 @@
             }
           }
         }
-
+        C->record_dead_node(dead->_idx);
         if (dead->is_macro()) {
           C->remove_macro_node(dead);
         }
@@ -1199,6 +1208,11 @@
           continue;
         }
       }
+      // Constant node that has no out-edges and has only one in-edge from
+      // root is usually dead. However, sometimes reshaping walk makes
+      // it reachable by adding use edges. So, we will NOT count Con nodes
+      // as dead to be conservative about the dead node count at any
+      // given time.
     }
 
     // Aggressively kill globally dead uses
--- a/src/share/vm/opto/phaseX.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/phaseX.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -92,6 +92,7 @@
   }
 
   void   remove_useless_nodes(VectorSet &useful); // replace with sentinel
+  void replace_with(NodeHash* nh);
 
   Node  *sentinel() { return _sentinel; }
 
@@ -386,6 +387,11 @@
   Node  *transform( Node *n );
   Node  *transform_no_reclaim( Node *n );
 
+  void replace_with(PhaseGVN* gvn) {
+    _table.replace_with(&gvn->_table);
+    _types = gvn->_types;
+  }
+
   // Check for a simple dead loop when a data node references itself.
   DEBUG_ONLY(void dead_loop_check(Node *n);)
 };
--- a/src/share/vm/opto/postaloc.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/postaloc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -146,7 +146,7 @@
       }
     }
     // Disconnect control and remove precedence edges if any exist
-    old->disconnect_inputs(NULL);
+    old->disconnect_inputs(NULL, C);
   }
   return blk_adjust;
 }
@@ -513,7 +513,7 @@
         b->_nodes.remove(j--); phi_dex--;
         _cfg._bbs.map(phi->_idx,NULL);
         phi->replace_by(u);
-        phi->disconnect_inputs(NULL);
+        phi->disconnect_inputs(NULL, C);
         continue;
       }
       // Note that if value[pidx] exists, then we merged no new values here
--- a/src/share/vm/opto/reg_split.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/reg_split.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -747,7 +747,7 @@
             if( i >= cnt ) {    // Found one unique input
               assert(Find_id(n) == Find_id(u), "should be the same lrg");
               n->replace_by(u); // Then replace with unique input
-              n->disconnect_inputs(NULL);
+              n->disconnect_inputs(NULL, C);
               b->_nodes.remove(insidx);
               insidx--;
               b->_ihrp_index--;
--- a/src/share/vm/opto/regalloc.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/regalloc.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -40,6 +40,7 @@
                Phase(Register_Allocation), _cfg(cfg), _matcher(matcher),
                _node_oops(Thread::current()->resource_area()),
                _node_regs(0),
+               _node_regs_max_index(0),
                _framesize(0xdeadbeef)
 {
     int i;
--- a/src/share/vm/opto/regmask.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/regmask.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -108,13 +108,13 @@
 //------------------------------dump-------------------------------------------
 
 #ifndef PRODUCT
-void OptoReg::dump( int r ) {
-  switch( r ) {
-  case Special: tty->print("r---");   break;
-  case Bad:     tty->print("rBAD");   break;
+void OptoReg::dump(int r, outputStream *st) {
+  switch (r) {
+  case Special: st->print("r---"); break;
+  case Bad:     st->print("rBAD"); break;
   default:
-    if( r < _last_Mach_Reg ) tty->print(Matcher::regName[r]);
-    else tty->print("rS%d",r);
+    if (r < _last_Mach_Reg) st->print(Matcher::regName[r]);
+    else st->print("rS%d",r);
     break;
   }
 }
@@ -404,53 +404,53 @@
 
 #ifndef PRODUCT
 //------------------------------print------------------------------------------
-void RegMask::dump( ) const {
-  tty->print("[");
+void RegMask::dump(outputStream *st) const {
+  st->print("[");
   RegMask rm = *this;           // Structure copy into local temp
 
   OptoReg::Name start = rm.find_first_elem(); // Get a register
-  if( OptoReg::is_valid(start) ) { // Check for empty mask
+  if (OptoReg::is_valid(start)) { // Check for empty mask
     rm.Remove(start);           // Yank from mask
-    OptoReg::dump(start);       // Print register
+    OptoReg::dump(start, st);   // Print register
     OptoReg::Name last = start;
 
     // Now I have printed an initial register.
     // Print adjacent registers as "rX-rZ" instead of "rX,rY,rZ".
     // Begin looping over the remaining registers.
-    while( 1 ) {                //
+    while (1) {                 //
       OptoReg::Name reg = rm.find_first_elem(); // Get a register
-      if( !OptoReg::is_valid(reg) )
+      if (!OptoReg::is_valid(reg))
         break;                  // Empty mask, end loop
       rm.Remove(reg);           // Yank from mask
 
-      if( last+1 == reg ) {     // See if they are adjacent
+      if (last+1 == reg) {      // See if they are adjacent
         // Adjacent registers just collect into long runs, no printing.
         last = reg;
       } else {                  // Ending some kind of run
-        if( start == last ) {   // 1-register run; no special printing
-        } else if( start+1 == last ) {
-          tty->print(",");      // 2-register run; print as "rX,rY"
-          OptoReg::dump(last);
+        if (start == last) {    // 1-register run; no special printing
+        } else if (start+1 == last) {
+          st->print(",");       // 2-register run; print as "rX,rY"
+          OptoReg::dump(last, st);
         } else {                // Multi-register run; print as "rX-rZ"
-          tty->print("-");
-          OptoReg::dump(last);
+          st->print("-");
+          OptoReg::dump(last, st);
         }
-        tty->print(",");        // Seperate start of new run
+        st->print(",");         // Seperate start of new run
         start = last = reg;     // Start a new register run
-        OptoReg::dump(start); // Print register
+        OptoReg::dump(start, st); // Print register
       } // End of if ending a register run or not
     } // End of while regmask not empty
 
-    if( start == last ) {       // 1-register run; no special printing
-    } else if( start+1 == last ) {
-      tty->print(",");          // 2-register run; print as "rX,rY"
-      OptoReg::dump(last);
+    if (start == last) {        // 1-register run; no special printing
+    } else if (start+1 == last) {
+      st->print(",");           // 2-register run; print as "rX,rY"
+      OptoReg::dump(last, st);
     } else {                    // Multi-register run; print as "rX-rZ"
-      tty->print("-");
-      OptoReg::dump(last);
+      st->print("-");
+      OptoReg::dump(last, st);
     }
-    if( rm.is_AllStack() ) tty->print("...");
+    if (rm.is_AllStack()) st->print("...");
   }
-  tty->print("]");
+  st->print("]");
 }
 #endif
--- a/src/share/vm/opto/regmask.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/regmask.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -310,7 +310,7 @@
 
 #ifndef PRODUCT
   void print() const { dump(); }
-  void dump() const;            // Print a mask
+  void dump(outputStream *st = tty) const; // Print a mask
 #endif
 
   static const RegMask Empty;   // Common empty mask
--- a/src/share/vm/opto/runtime.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/runtime.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -236,7 +236,7 @@
   assert(check_compiled_frame(thread), "incorrect caller");
 
   // These checks are cheap to make and support reflective allocation.
-  int lh = Klass::cast(klass)->layout_helper();
+  int lh = klass->layout_helper();
   if (Klass::layout_helper_needs_slow_path(lh)
       || !InstanceKlass::cast(klass)->is_initialized()) {
     KlassHandle kh(THREAD, klass);
@@ -283,7 +283,7 @@
   // Scavenge and allocate an instance.
   oop result;
 
-  if (Klass::cast(array_type)->oop_is_typeArray()) {
+  if (array_type->oop_is_typeArray()) {
     // The oopFactory likes to work with the element type.
     // (We could bypass the oopFactory, since it doesn't add much value.)
     BasicType elem_type = TypeArrayKlass::cast(array_type)->element_type();
@@ -321,7 +321,7 @@
   // Scavenge and allocate an instance.
   oop result;
 
-  assert(Klass::cast(array_type)->oop_is_typeArray(), "should be called only for type array");
+  assert(array_type->oop_is_typeArray(), "should be called only for type array");
   // The oopFactory likes to work with the element type.
   BasicType elem_type = TypeArrayKlass::cast(array_type)->element_type();
   result = oopFactory::new_typeArray_nozero(elem_type, len, THREAD);
@@ -811,6 +811,48 @@
   return TypeFunc::make(domain, range);
 }
 
+// for aescrypt encrypt/decrypt operations, just three pointers returning void (length is constant)
+const TypeFunc* OptoRuntime::aescrypt_block_Type() {
+  // create input type (domain)
+  int num_args      = 3;
+  int argcnt = num_args;
+  const Type** fields = TypeTuple::fields(argcnt);
+  int argp = TypeFunc::Parms;
+  fields[argp++] = TypePtr::NOTNULL;    // src
+  fields[argp++] = TypePtr::NOTNULL;    // dest
+  fields[argp++] = TypePtr::NOTNULL;    // k array
+  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
+  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
+
+  // no result type needed
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = NULL; // void
+  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
+  return TypeFunc::make(domain, range);
+}
+
+// for cipherBlockChaining calls of aescrypt encrypt/decrypt, four pointers and a length, returning void
+const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() {
+  // create input type (domain)
+  int num_args      = 5;
+  int argcnt = num_args;
+  const Type** fields = TypeTuple::fields(argcnt);
+  int argp = TypeFunc::Parms;
+  fields[argp++] = TypePtr::NOTNULL;    // src
+  fields[argp++] = TypePtr::NOTNULL;    // dest
+  fields[argp++] = TypePtr::NOTNULL;    // k array
+  fields[argp++] = TypePtr::NOTNULL;    // r array
+  fields[argp++] = TypeInt::INT;        // src len
+  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
+  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
+
+  // no result type needed
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = NULL; // void
+  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
+  return TypeFunc::make(domain, range);
+}
+
 //------------- Interpreter state access for on stack replacement
 const TypeFunc* OptoRuntime::osr_end_Type() {
   // create input type (domain)
@@ -947,7 +989,7 @@
       // since we're notifying the VM on every catch.
       // Force deoptimization and the rest of the lookup
       // will be fine.
-      deoptimize_caller_frame(thread, true);
+      deoptimize_caller_frame(thread);
     }
 
     // Check the stack guard pages.  If enabled, look for handler in this frame;
@@ -1101,17 +1143,22 @@
 
 
 void OptoRuntime::deoptimize_caller_frame(JavaThread *thread, bool doit) {
-  // Deoptimize frame
-  if (doit) {
-    // Called from within the owner thread, so no need for safepoint
-    RegisterMap reg_map(thread);
-    frame stub_frame = thread->last_frame();
-    assert(stub_frame.is_runtime_frame() || exception_blob()->contains(stub_frame.pc()), "sanity check");
-    frame caller_frame = stub_frame.sender(&reg_map);
+  // Deoptimize the caller before continuing, as the compiled
+  // exception handler table may not be valid.
+  if (!StressCompiledExceptionHandlers && doit) {
+    deoptimize_caller_frame(thread);
+  }
+}
 
-    // Deoptimize the caller frame.
-    Deoptimization::deoptimize_frame(thread, caller_frame.id());
-  }
+void OptoRuntime::deoptimize_caller_frame(JavaThread *thread) {
+  // Called from within the owner thread, so no need for safepoint
+  RegisterMap reg_map(thread);
+  frame stub_frame = thread->last_frame();
+  assert(stub_frame.is_runtime_frame() || exception_blob()->contains(stub_frame.pc()), "sanity check");
+  frame caller_frame = stub_frame.sender(&reg_map);
+
+  // Deoptimize the caller frame.
+  Deoptimization::deoptimize_frame(thread, caller_frame.id());
 }
 
 
--- a/src/share/vm/opto/runtime.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/runtime.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -174,6 +174,7 @@
   static address handle_exception_C       (JavaThread* thread);
   static address handle_exception_C_helper(JavaThread* thread, nmethod*& nm);
   static address rethrow_C                (oopDesc* exception, JavaThread *thread, address return_pc );
+  static void deoptimize_caller_frame     (JavaThread *thread);
   static void deoptimize_caller_frame     (JavaThread *thread, bool doit);
   static bool is_deoptimized_caller_frame (JavaThread *thread);
 
@@ -280,6 +281,9 @@
 
   static const TypeFunc* array_fill_Type();
 
+  static const TypeFunc* aescrypt_block_Type();
+  static const TypeFunc* cipherBlockChaining_aescrypt_Type();
+
   // leaf on stack replacement interpreter accessor types
   static const TypeFunc* osr_end_Type();
 
--- a/src/share/vm/opto/stringopts.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/stringopts.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -241,13 +241,13 @@
 
       _stringopts->gvn()->transform(call);
       C->gvn_replace_by(uct, call);
-      uct->disconnect_inputs(NULL);
+      uct->disconnect_inputs(NULL, C);
     }
   }
 
   void cleanup() {
     // disconnect the hook node
-    _arguments->disconnect_inputs(NULL);
+    _arguments->disconnect_inputs(NULL, _stringopts->C);
   }
 };
 
@@ -265,7 +265,8 @@
     } else if (n->is_IfTrue()) {
       Compile* C = _stringopts->C;
       C->gvn_replace_by(n, n->in(0)->in(0));
-      C->gvn_replace_by(n->in(0), C->top());
+      // get rid of the other projection
+      C->gvn_replace_by(n->in(0)->as_If()->proj_out(false), C->top());
     }
   }
 }
@@ -358,7 +359,7 @@
     C->gvn_replace_by(mem_proj, mem);
   }
   C->gvn_replace_by(init, C->top());
-  init->disconnect_inputs(NULL);
+  init->disconnect_inputs(NULL, C);
 }
 
 Node_List PhaseStringOpts::collect_toString_calls() {
@@ -439,7 +440,7 @@
       }
       // Find the constructor call
       Node* result = alloc->result_cast();
-      if (result == NULL || !result->is_CheckCastPP()) {
+      if (result == NULL || !result->is_CheckCastPP() || alloc->in(TypeFunc::Memory)->is_top()) {
         // strange looking allocation
 #ifndef PRODUCT
         if (PrintOptimizeStringConcat) {
@@ -744,7 +745,9 @@
       ctrl_path.push(cn);
       ctrl_path.push(cn->proj_out(0));
       ctrl_path.push(cn->proj_out(0)->unique_out());
-      ctrl_path.push(cn->proj_out(0)->unique_out()->as_Catch()->proj_out(0));
+      if (cn->proj_out(0)->unique_out()->as_Catch()->proj_out(0) != NULL) {
+        ctrl_path.push(cn->proj_out(0)->unique_out()->as_Catch()->proj_out(0));
+      }
     } else {
       ShouldNotReachHere();
     }
@@ -762,6 +765,12 @@
     } else if (ptr->is_IfTrue()) {
       IfNode* iff = ptr->in(0)->as_If();
       BoolNode* b = iff->in(1)->isa_Bool();
+
+      if (b == NULL) {
+        fail = true;
+        break;
+      }
+
       Node* cmp = b->in(1);
       Node* v1 = cmp->in(1);
       Node* v2 = cmp->in(2);
@@ -826,6 +835,9 @@
           ptr->in(1)->in(0) != NULL && ptr->in(1)->in(0)->is_If()) {
         // Simple diamond.
         // XXX should check for possibly merging stores.  simple data merges are ok.
+        // The IGVN will make this simple diamond go away when it
+        // transforms the Region. Make sure it sees it.
+        Compile::current()->record_for_igvn(ptr);
         ptr = ptr->in(1)->in(0)->in(0);
         continue;
       }
@@ -1408,75 +1420,80 @@
                       Deoptimization::Action_make_not_entrant);
   }
 
-  // length now contains the number of characters needed for the
-  // char[] so create a new AllocateArray for the char[]
-  Node* char_array = NULL;
-  {
-    PreserveReexecuteState preexecs(&kit);
-    // The original jvms is for an allocation of either a String or
-    // StringBuffer so no stack adjustment is necessary for proper
-    // reexecution.  If we deoptimize in the slow path the bytecode
-    // will be reexecuted and the char[] allocation will be thrown away.
-    kit.jvms()->set_should_reexecute(true);
-    char_array = kit.new_array(__ makecon(TypeKlassPtr::make(ciTypeArrayKlass::make(T_CHAR))),
-                               length, 1);
-  }
+  Node* result;
+  if (!kit.stopped()) {
+
+    // length now contains the number of characters needed for the
+    // char[] so create a new AllocateArray for the char[]
+    Node* char_array = NULL;
+    {
+      PreserveReexecuteState preexecs(&kit);
+      // The original jvms is for an allocation of either a String or
+      // StringBuffer so no stack adjustment is necessary for proper
+      // reexecution.  If we deoptimize in the slow path the bytecode
+      // will be reexecuted and the char[] allocation will be thrown away.
+      kit.jvms()->set_should_reexecute(true);
+      char_array = kit.new_array(__ makecon(TypeKlassPtr::make(ciTypeArrayKlass::make(T_CHAR))),
+                                 length, 1);
+    }
+
+    // Mark the allocation so that zeroing is skipped since the code
+    // below will overwrite the entire array
+    AllocateArrayNode* char_alloc = AllocateArrayNode::Ideal_array_allocation(char_array, _gvn);
+    char_alloc->maybe_set_complete(_gvn);
 
-  // Mark the allocation so that zeroing is skipped since the code
-  // below will overwrite the entire array
-  AllocateArrayNode* char_alloc = AllocateArrayNode::Ideal_array_allocation(char_array, _gvn);
-  char_alloc->maybe_set_complete(_gvn);
-
-  // Now copy the string representations into the final char[]
-  Node* start = __ intcon(0);
-  for (int argi = 0; argi < sc->num_arguments(); argi++) {
-    Node* arg = sc->argument(argi);
-    switch (sc->mode(argi)) {
-      case StringConcat::IntMode: {
-        Node* end = __ AddI(start, string_sizes->in(argi));
-        // getChars words backwards so pass the ending point as well as the start
-        int_getChars(kit, arg, char_array, start, end);
-        start = end;
-        break;
+    // Now copy the string representations into the final char[]
+    Node* start = __ intcon(0);
+    for (int argi = 0; argi < sc->num_arguments(); argi++) {
+      Node* arg = sc->argument(argi);
+      switch (sc->mode(argi)) {
+        case StringConcat::IntMode: {
+          Node* end = __ AddI(start, string_sizes->in(argi));
+          // getChars words backwards so pass the ending point as well as the start
+          int_getChars(kit, arg, char_array, start, end);
+          start = end;
+          break;
+        }
+        case StringConcat::StringNullCheckMode:
+        case StringConcat::StringMode: {
+          start = copy_string(kit, arg, char_array, start);
+          break;
+        }
+        case StringConcat::CharMode: {
+          __ store_to_memory(kit.control(), kit.array_element_address(char_array, start, T_CHAR),
+                             arg, T_CHAR, char_adr_idx);
+          start = __ AddI(start, __ intcon(1));
+          break;
+        }
+        default:
+          ShouldNotReachHere();
       }
-      case StringConcat::StringNullCheckMode:
-      case StringConcat::StringMode: {
-        start = copy_string(kit, arg, char_array, start);
-        break;
-      }
-      case StringConcat::CharMode: {
-        __ store_to_memory(kit.control(), kit.array_element_address(char_array, start, T_CHAR),
-                           arg, T_CHAR, char_adr_idx);
-        start = __ AddI(start, __ intcon(1));
-        break;
-      }
-      default:
-        ShouldNotReachHere();
     }
-  }
 
-  // If we're not reusing an existing String allocation then allocate one here.
-  Node* result = sc->string_alloc();
-  if (result == NULL) {
-    PreserveReexecuteState preexecs(&kit);
-    // The original jvms is for an allocation of either a String or
-    // StringBuffer so no stack adjustment is necessary for proper
-    // reexecution.
-    kit.jvms()->set_should_reexecute(true);
-    result = kit.new_instance(__ makecon(TypeKlassPtr::make(C->env()->String_klass())));
+    // If we're not reusing an existing String allocation then allocate one here.
+    result = sc->string_alloc();
+    if (result == NULL) {
+      PreserveReexecuteState preexecs(&kit);
+      // The original jvms is for an allocation of either a String or
+      // StringBuffer so no stack adjustment is necessary for proper
+      // reexecution.
+      kit.jvms()->set_should_reexecute(true);
+      result = kit.new_instance(__ makecon(TypeKlassPtr::make(C->env()->String_klass())));
+    }
+
+    // Intialize the string
+    if (java_lang_String::has_offset_field()) {
+      kit.store_String_offset(kit.control(), result, __ intcon(0));
+      kit.store_String_length(kit.control(), result, length);
+    }
+    kit.store_String_value(kit.control(), result, char_array);
+  } else {
+    result = C->top();
   }
-
-  // Intialize the string
-  if (java_lang_String::has_offset_field()) {
-    kit.store_String_offset(kit.control(), result, __ intcon(0));
-    kit.store_String_length(kit.control(), result, length);
-  }
-  kit.store_String_value(kit.control(), result, char_array);
-
   // hook up the outgoing control and result
   kit.replace_call(sc->end(), result);
 
   // Unhook any hook nodes
-  string_sizes->disconnect_inputs(NULL);
+  string_sizes->disconnect_inputs(NULL, C);
   sc->cleanup();
 }
--- a/src/share/vm/opto/superword.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/superword.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1776,16 +1776,15 @@
     set_velt_type(n, container_type(n));
   }
 
-  // Propagate narrowed type backwards through operations
+  // Propagate integer narrowed type backwards through operations
   // that don't depend on higher order bits
   for (int i = _block.length() - 1; i >= 0; i--) {
     Node* n = _block.at(i);
     // Only integer types need be examined
-    const Type* vt = velt_type(n);
-    if (vt->basic_type() == T_INT) {
+    const Type* vtn = velt_type(n);
+    if (vtn->basic_type() == T_INT) {
       uint start, end;
       VectorNode::vector_operands(n, &start, &end);
-      const Type* vt = velt_type(n);
 
       for (uint j = start; j < end; j++) {
         Node* in  = n->in(j);
@@ -1801,6 +1800,24 @@
             }
           }
           if (same_type) {
+            // For right shifts of small integer types (bool, byte, char, short)
+            // we need precise information about sign-ness. Only Load nodes have
+            // this information because Store nodes are the same for signed and
+            // unsigned values. And any arithmetic operation after a load may
+            // expand a value to signed Int so such right shifts can't be used
+            // because vector elements do not have upper bits of Int.
+            const Type* vt = vtn;
+            if (VectorNode::is_shift(in)) {
+              Node* load = in->in(1);
+              if (load->is_Load() && in_bb(load) && (velt_type(load)->basic_type() == T_INT)) {
+                vt = velt_type(load);
+              } else if (in->Opcode() != Op_LShiftI) {
+                // Widen type to Int to avoid creation of right shift vector
+                // (align + data_size(s1) check in stmts_can_pack() will fail).
+                // Note, left shifts work regardless type.
+                vt = TypeInt::INT;
+              }
+            }
             set_velt_type(in, vt);
           }
         }
@@ -1841,7 +1858,20 @@
 // Smallest type containing range of values
 const Type* SuperWord::container_type(Node* n) {
   if (n->is_Mem()) {
-    return Type::get_const_basic_type(n->as_Mem()->memory_type());
+    BasicType bt = n->as_Mem()->memory_type();
+    if (n->is_Store() && (bt == T_CHAR)) {
+      // Use T_SHORT type instead of T_CHAR for stored values because any
+      // preceding arithmetic operation extends values to signed Int.
+      bt = T_SHORT;
+    }
+    if (n->Opcode() == Op_LoadUB) {
+      // Adjust type for unsigned byte loads, it is important for right shifts.
+      // T_BOOLEAN is used because there is no basic type representing type
+      // TypeInt::UBYTE. Use of T_BOOLEAN for vectors is fine because only
+      // size (one byte) and sign is important.
+      bt = T_BOOLEAN;
+    }
+    return Type::get_const_basic_type(bt);
   }
   const Type* t = _igvn.type(n);
   if (t->basic_type() == T_INT) {
--- a/src/share/vm/opto/type.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/type.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -61,7 +61,7 @@
   { Bad,             T_ILLEGAL,    "tuple:",        false, Node::NotAMachineReg, relocInfo::none          },  // Tuple
   { Bad,             T_ARRAY,      "array:",        false, Node::NotAMachineReg, relocInfo::none          },  // Array
 
-#if defined(IA32) || defined(AMD64)
+#ifndef SPARC
   { Bad,             T_ILLEGAL,    "vectors:",      false, Op_VecS,              relocInfo::none          },  // VectorS
   { Bad,             T_ILLEGAL,    "vectord:",      false, Op_VecD,              relocInfo::none          },  // VectorD
   { Bad,             T_ILLEGAL,    "vectorx:",      false, Op_VecX,              relocInfo::none          },  // VectorX
@@ -1542,10 +1542,10 @@
 static const char* longnamenear(jlong x, const char* xname, char* buf, jlong n) {
   if (n > x) {
     if (n >= x + 10000)  return NULL;
-    sprintf(buf, "%s+" INT64_FORMAT, xname, n - x);
+    sprintf(buf, "%s+" JLONG_FORMAT, xname, n - x);
   } else if (n < x) {
     if (n <= x - 10000)  return NULL;
-    sprintf(buf, "%s-" INT64_FORMAT, xname, x - n);
+    sprintf(buf, "%s-" JLONG_FORMAT, xname, x - n);
   } else {
     return xname;
   }
@@ -1557,11 +1557,11 @@
   if (n == min_jlong)
     return "min";
   else if (n < min_jlong + 10000)
-    sprintf(buf, "min+" INT64_FORMAT, n - min_jlong);
+    sprintf(buf, "min+" JLONG_FORMAT, n - min_jlong);
   else if (n == max_jlong)
     return "max";
   else if (n > max_jlong - 10000)
-    sprintf(buf, "max-" INT64_FORMAT, max_jlong - n);
+    sprintf(buf, "max-" JLONG_FORMAT, max_jlong - n);
   else if ((str = longnamenear(max_juint, "maxuint", buf, n)) != NULL)
     return str;
   else if ((str = longnamenear(max_jint, "maxint", buf, n)) != NULL)
@@ -1569,7 +1569,7 @@
   else if ((str = longnamenear(min_jint, "minint", buf, n)) != NULL)
     return str;
   else
-    sprintf(buf, INT64_FORMAT, n);
+    sprintf(buf, JLONG_FORMAT, n);
   return buf;
 }
 
--- a/src/share/vm/opto/type.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/type.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -242,8 +242,10 @@
   const TypeInt    *isa_int() const;             // Returns NULL if not an Int
   const TypeLong   *is_long() const;
   const TypeLong   *isa_long() const;            // Returns NULL if not a Long
+  const TypeD      *isa_double() const;          // Returns NULL if not a Double{Top,Con,Bot}
   const TypeD      *is_double_constant() const;  // Asserts it is a DoubleCon
   const TypeD      *isa_double_constant() const; // Returns NULL if not a DoubleCon
+  const TypeF      *isa_float() const;           // Returns NULL if not a Float{Top,Con,Bot}
   const TypeF      *is_float_constant() const;   // Asserts it is a FloatCon
   const TypeF      *isa_float_constant() const;  // Returns NULL if not a FloatCon
   const TypeTuple  *is_tuple() const;            // Collection of fields, NOT a pointer
@@ -1320,24 +1322,6 @@
   return ((TypeD*)this)->_d;
 }
 
-inline const TypeF *Type::is_float_constant() const {
-  assert( _base == FloatCon, "Not a Float" );
-  return (TypeF*)this;
-}
-
-inline const TypeF *Type::isa_float_constant() const {
-  return ( _base == FloatCon ? (TypeF*)this : NULL);
-}
-
-inline const TypeD *Type::is_double_constant() const {
-  assert( _base == DoubleCon, "Not a Double" );
-  return (TypeD*)this;
-}
-
-inline const TypeD *Type::isa_double_constant() const {
-  return ( _base == DoubleCon ? (TypeD*)this : NULL);
-}
-
 inline const TypeInt *Type::is_int() const {
   assert( _base == Int, "Not an Int" );
   return (TypeInt*)this;
@@ -1356,6 +1340,36 @@
   return ( _base == Long ? (TypeLong*)this : NULL);
 }
 
+inline const TypeF *Type::isa_float() const {
+  return ((_base == FloatTop ||
+           _base == FloatCon ||
+           _base == FloatBot) ? (TypeF*)this : NULL);
+}
+
+inline const TypeF *Type::is_float_constant() const {
+  assert( _base == FloatCon, "Not a Float" );
+  return (TypeF*)this;
+}
+
+inline const TypeF *Type::isa_float_constant() const {
+  return ( _base == FloatCon ? (TypeF*)this : NULL);
+}
+
+inline const TypeD *Type::isa_double() const {
+  return ((_base == DoubleTop ||
+           _base == DoubleCon ||
+           _base == DoubleBot) ? (TypeD*)this : NULL);
+}
+
+inline const TypeD *Type::is_double_constant() const {
+  assert( _base == DoubleCon, "Not a Double" );
+  return (TypeD*)this;
+}
+
+inline const TypeD *Type::isa_double_constant() const {
+  return ( _base == DoubleCon ? (TypeD*)this : NULL);
+}
+
 inline const TypeTuple *Type::is_tuple() const {
   assert( _base == Tuple, "Not a Tuple" );
   return (TypeTuple*)this;
--- a/src/share/vm/opto/vectornode.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/opto/vectornode.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -29,8 +29,7 @@
 //------------------------------VectorNode--------------------------------------
 
 // Return the vector operator for the specified scalar operation
-// and vector length.  Also used to check if the code generator
-// supports the vector operation.
+// and vector length.
 int VectorNode::opcode(int sopc, BasicType bt) {
   switch (sopc) {
   case Op_AddI:
@@ -75,7 +74,7 @@
     case T_BYTE:   return 0;   // Unimplemented
     case T_CHAR:
     case T_SHORT:  return Op_MulVS;
-    case T_INT:    return Matcher::match_rule_supported(Op_MulVI) ? Op_MulVI : 0; // SSE4_1
+    case T_INT:    return Op_MulVI;
     }
     ShouldNotReachHere();
   case Op_MulF:
@@ -104,9 +103,9 @@
     return Op_LShiftVL;
   case Op_RShiftI:
     switch (bt) {
-    case T_BOOLEAN:
+    case T_BOOLEAN:return Op_URShiftVB; // boolean is unsigned value
+    case T_CHAR:   return Op_URShiftVS; // char is unsigned value
     case T_BYTE:   return Op_RShiftVB;
-    case T_CHAR:
     case T_SHORT:  return Op_RShiftVS;
     case T_INT:    return Op_RShiftVI;
     }
@@ -116,10 +115,14 @@
     return Op_RShiftVL;
   case Op_URShiftI:
     switch (bt) {
-    case T_BOOLEAN:
-    case T_BYTE:   return Op_URShiftVB;
-    case T_CHAR:
-    case T_SHORT:  return Op_URShiftVS;
+    case T_BOOLEAN:return Op_URShiftVB;
+    case T_CHAR:   return Op_URShiftVS;
+    case T_BYTE:
+    case T_SHORT:  return 0; // Vector logical right shift for signed short
+                             // values produces incorrect Java result for
+                             // negative data because java code should convert
+                             // a short value into int value with sign
+                             // extension before a shift.
     case T_INT:    return Op_URShiftVI;
     }
     ShouldNotReachHere();
@@ -157,12 +160,14 @@
   return 0; // Unimplemented
 }
 
+// Also used to check if the code generator
+// supports the vector operation.
 bool VectorNode::implemented(int opc, uint vlen, BasicType bt) {
   if (is_java_primitive(bt) &&
       (vlen > 1) && is_power_of_2(vlen) &&
       Matcher::vector_size_supported(bt, vlen)) {
     int vopc = VectorNode::opcode(opc, bt);
-    return vopc > 0 && Matcher::has_match_rule(vopc);
+    return vopc > 0 && Matcher::match_rule_supported(vopc);
   }
   return false;
 }
--- a/src/share/vm/prims/forte.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/prims/forte.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -216,10 +216,7 @@
     // not yet valid.
 
     *method_p = method;
-
-    // See if gc may have invalidated method since we validated frame
-
-    if (!Universe::heap()->is_valid_method(method)) return false;
+    if (!method->is_valid_method()) return false;
 
     intptr_t bcx = fr->interpreter_frame_bcx();
 
@@ -394,19 +391,11 @@
   bool fully_decipherable = find_initial_Java_frame(thd, &top_frame, &initial_Java_frame, &method, &bci);
 
   // The frame might not be walkable but still recovered a method
-  // (e.g. an nmethod with no scope info for the pc
+  // (e.g. an nmethod with no scope info for the pc)
 
   if (method == NULL) return;
 
-  CollectedHeap* ch = Universe::heap();
-
-  // The method is not stored GC safe so see if GC became active
-  // after we entered AsyncGetCallTrace() and before we try to
-  // use the Method*.
-  // Yes, there is still a window after this check and before
-  // we use Method* below, but we can't lock out GC so that
-  // has to be an acceptable risk.
-  if (!ch->is_valid_method(method)) {
+  if (!method->is_valid_method()) {
     trace->num_frames = ticks_GC_active; // -2
     return;
   }
@@ -440,13 +429,7 @@
     bci = st.bci();
     method = st.method();
 
-    // The method is not stored GC safe so see if GC became active
-    // after we entered AsyncGetCallTrace() and before we try to
-    // use the Method*.
-    // Yes, there is still a window after this check and before
-    // we use Method* below, but we can't lock out GC so that
-    // has to be an acceptable risk.
-    if (!ch->is_valid_method(method)) {
+    if (!method->is_valid_method()) {
       // we throw away everything we've gathered in this sample since
       // none of it is safe
       trace->num_frames = ticks_GC_active; // -2
--- a/src/share/vm/prims/jni.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/prims/jni.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -24,6 +24,7 @@
  */
 
 #include "precompiled.hpp"
+#include "ci/ciReplay.hpp"
 #include "classfile/altHashing.hpp"
 #include "classfile/classLoader.hpp"
 #include "classfile/javaClasses.hpp"
@@ -68,6 +69,7 @@
 #include "runtime/reflection.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/signature.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/vm_operations.hpp"
 #include "services/runtimeService.hpp"
 #include "trace/tracing.hpp"
@@ -78,19 +80,15 @@
 #include "utilities/histogram.hpp"
 #ifdef TARGET_OS_FAMILY_linux
 # include "os_linux.inline.hpp"
-# include "thread_linux.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_solaris
 # include "os_solaris.inline.hpp"
-# include "thread_solaris.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_windows
 # include "os_windows.inline.hpp"
-# include "thread_windows.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_bsd
 # include "os_bsd.inline.hpp"
-# include "thread_bsd.inline.hpp"
 #endif
 
 static jint CurrentVersion = JNI_VERSION_1_6;
@@ -233,13 +231,13 @@
 intptr_t jfieldIDWorkaround::encode_klass_hash(Klass* k, intptr_t offset) {
   if (offset <= small_offset_mask) {
     Klass* field_klass = k;
-    Klass* super_klass = Klass::cast(field_klass)->super();
+    Klass* super_klass = field_klass->super();
     // With compressed oops the most super class with nonstatic fields would
     // be the owner of fields embedded in the header.
     while (InstanceKlass::cast(super_klass)->has_nonstatic_fields() &&
            InstanceKlass::cast(super_klass)->contains_field_offset(offset)) {
       field_klass = super_klass;   // super contains the field also
-      super_klass = Klass::cast(field_klass)->super();
+      super_klass = field_klass->super();
     }
     debug_only(No_Safepoint_Verifier nosafepoint;)
     uintptr_t klass_hash = field_klass->identity_hash();
@@ -249,7 +247,7 @@
     #ifndef PRODUCT
     {
       ResourceMark rm;
-      warning("VerifyJNIFields: long offset %d in %s", offset, Klass::cast(k)->external_name());
+      warning("VerifyJNIFields: long offset %d in %s", offset, k->external_name());
     }
     #endif
 #endif
@@ -265,7 +263,7 @@
     // Could use a non-blocking query for identity_hash here...
     if ((k->identity_hash() & klass_mask) == klass_hash)
       return true;
-    k = Klass::cast(k)->super();
+    k = k->super();
   } while (k != NULL);
   return false;
 }
@@ -283,7 +281,7 @@
       #ifndef PRODUCT
       if (Verbose) {
   ResourceMark rm;
-  warning("VerifyJNIFields: unverified offset %d for %s", offset, Klass::cast(k)->external_name());
+  warning("VerifyJNIFields: unverified offset %d for %s", offset, k->external_name());
       }
       #endif
 #endif
@@ -415,7 +413,7 @@
   }
 
   cls = (jclass)JNIHandles::make_local(
-    env, Klass::cast(k)->java_mirror());
+    env, k->java_mirror());
   return cls;
 JNI_END
 
@@ -536,7 +534,7 @@
 
   KlassHandle k1(THREAD, k);
   // Make sure class is initialized before handing id's out to methods
-  Klass::cast(k1())->initialize(CHECK_NULL);
+  k1()->initialize(CHECK_NULL);
   Method* m = InstanceKlass::cast(k1())->method_with_idnum(slot);
   ret = m==NULL? NULL : m->jmethod_id();  // return NULL if reflected method deleted
   return ret;
@@ -569,7 +567,7 @@
 
   KlassHandle k1(THREAD, k);
   // Make sure class is initialized before handing id's out to fields
-  Klass::cast(k1())->initialize(CHECK_NULL);
+  k1()->initialize(CHECK_NULL);
 
   // First check if this is a static field
   if (modifiers & JVM_ACC_STATIC) {
@@ -648,17 +646,17 @@
   // interfaces return NULL
   // proper classes return Klass::super()
   Klass* k = java_lang_Class::as_Klass(mirror);
-  if (Klass::cast(k)->is_interface()) return NULL;
+  if (k->is_interface()) return NULL;
 
   // return mirror for superclass
-  Klass* super = Klass::cast(k)->java_super();
+  Klass* super = k->java_super();
   // super2 is the value computed by the compiler's getSuperClass intrinsic:
-  debug_only(Klass* super2 = ( Klass::cast(k)->oop_is_array()
+  debug_only(Klass* super2 = ( k->oop_is_array()
                                  ? SystemDictionary::Object_klass()
-                                 : Klass::cast(k)->super() ) );
+                                 : k->super() ) );
   assert(super == super2,
          "java_super computation depends on interface, array, other super");
-  obj = (super == NULL) ? NULL : (jclass) JNIHandles::make_local(Klass::cast(super)->java_mirror());
+  obj = (super == NULL) ? NULL : (jclass) JNIHandles::make_local(super->java_mirror());
   return obj;
 JNI_END
 
@@ -686,7 +684,7 @@
   Klass* sub_klass   = java_lang_Class::as_Klass(sub_mirror);
   Klass* super_klass = java_lang_Class::as_Klass(super_mirror);
   assert(sub_klass != NULL && super_klass != NULL, "invalid arguments to jni_IsAssignableFrom");
-  jboolean ret = Klass::cast(sub_klass)->is_subtype_of(super_klass) ?
+  jboolean ret = sub_klass->is_subtype_of(super_klass) ?
                    JNI_TRUE : JNI_FALSE;
 #ifndef USDT2
   DTRACE_PROBE1(hotspot_jni, IsAssignableFrom__return, ret);
@@ -820,7 +818,7 @@
         ResourceMark rm(THREAD);
         jio_fprintf(defaultStream::error_stream(),
         ". Uncaught exception of type %s.",
-        Klass::cast(ex->klass())->external_name());
+        ex->klass()->external_name());
       }
     }
   }
@@ -1358,7 +1356,7 @@
     Method* m = Method::resolve_jmethod_id(method_id);
     number_of_parameters = m->size_of_parameters();
     Klass* holder = m->method_holder();
-    if (!(Klass::cast(holder))->is_interface()) {
+    if (!(holder)->is_interface()) {
       // non-interface call -- for that little speed boost, don't handlize
       debug_only(No_Safepoint_Verifier nosafepoint;)
       if (call_type == JNI_VIRTUAL) {
@@ -1423,7 +1421,7 @@
 
 static instanceOop alloc_object(jclass clazz, TRAPS) {
   KlassHandle k(THREAD, java_lang_Class::as_Klass(JNIHandles::resolve_non_null(clazz)));
-  Klass::cast(k())->check_valid_for_instantiation(false, CHECK_NULL);
+  k()->check_valid_for_instantiation(false, CHECK_NULL);
   InstanceKlass::cast(k())->initialize(CHECK_NULL);
   instanceOop ih = InstanceKlass::cast(k())->allocate_instance(THREAD);
   return ih;
@@ -1545,7 +1543,7 @@
 #endif /* USDT2 */
   Klass* k = JNIHandles::resolve_non_null(obj)->klass();
   jclass ret =
-    (jclass) JNIHandles::make_local(env, Klass::cast(k)->java_mirror());
+    (jclass) JNIHandles::make_local(env, k->java_mirror());
 #ifndef USDT2
   DTRACE_PROBE1(hotspot_jni, GetObjectClass__return, ret);
 #else /* USDT2 */
@@ -1610,7 +1608,7 @@
 
   // Make sure class is linked and initialized before handing id's out to
   // Method*s.
-  Klass::cast(klass())->initialize(CHECK_NULL);
+  klass()->initialize(CHECK_NULL);
 
   Method* m;
   if (name == vmSymbols::object_initializer_name() ||
@@ -2426,7 +2424,7 @@
   JNI_ArgumentPusherVaArg ap(methodID, args); \
   /* Make sure class is initialized before trying to invoke its method */ \
   KlassHandle k(THREAD, java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls))); \
-  Klass::cast(k())->initialize(CHECK_0); \
+  k()->initialize(CHECK_0); \
   jni_invoke_static(env, &jvalue, NULL, JNI_STATIC, methodID, &ap, CHECK_0); \
   va_end(args); \
   ret = jvalue.get_##ResultType(); \
@@ -2611,10 +2609,10 @@
   KlassHandle k(THREAD,
                 java_lang_Class::as_Klass(JNIHandles::resolve_non_null(clazz)));
   // Make sure class is initialized before handing id's out to fields
-  Klass::cast(k())->initialize(CHECK_NULL);
+  k()->initialize(CHECK_NULL);
 
   fieldDescriptor fd;
-  if (!Klass::cast(k())->oop_is_instance() ||
+  if (!k()->oop_is_instance() ||
       !InstanceKlass::cast(k())->find_field(fieldname, signame, false, &fd)) {
     THROW_MSG_0(vmSymbols::java_lang_NoSuchFieldError(), (char*) name);
   }
@@ -2976,16 +2974,16 @@
   KlassHandle k(THREAD,
                 java_lang_Class::as_Klass(JNIHandles::resolve_non_null(clazz)));
   // Make sure class is initialized before handing id's out to static fields
-  Klass::cast(k())->initialize(CHECK_NULL);
+  k()->initialize(CHECK_NULL);
 
   fieldDescriptor fd;
-  if (!Klass::cast(k())->oop_is_instance() ||
+  if (!k()->oop_is_instance() ||
       !InstanceKlass::cast(k())->find_field(fieldname, signame, true, &fd)) {
     THROW_MSG_0(vmSymbols::java_lang_NoSuchFieldError(), (char*) name);
   }
 
   // A jfieldID for a static field is a JNIid specifying the field holder and the offset within the Klass*
-  JNIid* id = InstanceKlass::cast(fd.field_holder())->jni_id_for(fd.offset());
+  JNIid* id = fd.field_holder()->jni_id_for(fd.offset());
   debug_only(id->set_is_static_field_id();)
 
   debug_only(id->verify(fd.field_holder()));
@@ -3439,7 +3437,7 @@
   jobjectArray ret = NULL;
   DT_RETURN_MARK(NewObjectArray, jobjectArray, (const jobjectArray&)ret);
   KlassHandle ek(THREAD, java_lang_Class::as_Klass(JNIHandles::resolve_non_null(elementClass)));
-  Klass* ako = Klass::cast(ek())->array_klass(CHECK_NULL);
+  Klass* ako = ek()->array_klass(CHECK_NULL);
   KlassHandle ak = KlassHandle(THREAD, ako);
   ObjArrayKlass::cast(ak())->initialize(CHECK_NULL);
   objArrayOop result = ObjArrayKlass::cast(ak())->allocate(length, CHECK_NULL);
@@ -3970,7 +3968,7 @@
     if (trial_name == NULL) {
       continue; // no such symbol, so this prefix wasn't used, try the next prefix
     }
-    method = Klass::cast(k())->lookup_method(trial_name, signature);
+    method = k()->lookup_method(trial_name, signature);
     if (method == NULL) {
       continue; // signature doesn't match, try the next prefix
     }
@@ -3987,12 +3985,12 @@
 }
 
 static bool register_native(KlassHandle k, Symbol* name, Symbol* signature, address entry, TRAPS) {
-  Method* method = Klass::cast(k())->lookup_method(name, signature);
+  Method* method = k()->lookup_method(name, signature);
   if (method == NULL) {
     ResourceMark rm;
     stringStream st;
     st.print("Method %s name or signature does not match",
-             Method::name_and_sig_as_C_string(Klass::cast(k()), name, signature));
+             Method::name_and_sig_as_C_string(k(), name, signature));
     THROW_MSG_(vmSymbols::java_lang_NoSuchMethodError(), st.as_string(), false);
   }
   if (!method->is_native()) {
@@ -4002,7 +4000,7 @@
       ResourceMark rm;
       stringStream st;
       st.print("Method %s is not declared as native",
-               Method::name_and_sig_as_C_string(Klass::cast(k()), name, signature));
+               Method::name_and_sig_as_C_string(k(), name, signature));
       THROW_MSG_(vmSymbols::java_lang_NoSuchMethodError(), st.as_string(), false);
     }
   }
@@ -4016,7 +4014,7 @@
   if (PrintJNIResolving) {
     ResourceMark rm(THREAD);
     tty->print_cr("[Registering JNI native method %s.%s]",
-      Klass::cast(method->method_holder())->external_name(),
+      method->method_holder()->external_name(),
       method->name()->as_C_string());
   }
   return true;
@@ -4058,7 +4056,7 @@
     if (name == NULL || signature == NULL) {
       ResourceMark rm;
       stringStream st;
-      st.print("Method %s.%s%s not found", Klass::cast(h_k())->external_name(), meth_name, meth_sig);
+      st.print("Method %s.%s%s not found", h_k()->external_name(), meth_name, meth_sig);
       // Must return negative value on failure
       THROW_MSG_(vmSymbols::java_lang_NoSuchMethodError(), st.as_string(), -1);
     }
@@ -4084,7 +4082,7 @@
 #endif /* USDT2 */
   Klass* k   = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(clazz));
   //%note jni_2
-  if (Klass::cast(k)->oop_is_instance()) {
+  if (k->oop_is_instance()) {
     for (int index = 0; index < InstanceKlass::cast(k)->methods()->length(); index++) {
       Method* m = InstanceKlass::cast(k)->methods()->at(index);
       if (m->is_native()) {
@@ -5043,6 +5041,9 @@
 
 #include "gc_interface/collectedHeap.hpp"
 #include "utilities/quickSort.hpp"
+#if INCLUDE_VM_STRUCTS
+#include "runtime/vmStructs.hpp"
+#endif
 
 #define run_unit_test(unit_test_function_call)              \
   tty->print_cr("Running test: " #unit_test_function_call); \
@@ -5055,6 +5056,9 @@
     run_unit_test(CollectedHeap::test_is_in());
     run_unit_test(QuickSort::test_quick_sort());
     run_unit_test(AltHashing::test_alt_hash());
+#if INCLUDE_VM_STRUCTS
+    run_unit_test(VMStructs::test());
+#endif
     tty->print_cr("All internal VM tests passed");
   }
 }
@@ -5151,6 +5155,7 @@
 
     // Check if we should compile all classes on bootclasspath
     NOT_PRODUCT(if (CompileTheWorld) ClassLoader::compile_the_world();)
+    NOT_PRODUCT(if (ReplayCompiles) ciReplay::replay(thread);)
     // Since this is not a JVM_ENTRY we have to set the thread state manually before leaving.
     ThreadStateTransition::transition_and_fence(thread, _thread_in_vm, _thread_in_native);
   } else {
--- a/src/share/vm/prims/jniCheck.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/prims/jniCheck.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -383,7 +383,7 @@
   ASSERT_OOPS_ALLOWED;
   assert(klass != NULL, "klass argument must have a value");
 
-  if (!Klass::cast(klass)->oop_is_instance() ||
+  if (!klass->oop_is_instance() ||
       !InstanceKlass::cast(klass)->is_subclass_of(SystemDictionary::Throwable_klass())) {
     ReportJNIFatalError(thr, fatal_class_not_a_throwable_class);
   }
--- a/src/share/vm/prims/jvm.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/prims/jvm.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -125,7 +125,7 @@
   int line_number = -1;
   const char * source_file = NULL;
   const char * trace = "explicit";
-  Klass* caller = NULL;
+  InstanceKlass* caller = NULL;
   JavaThread* jthread = JavaThread::current();
   if (jthread->has_last_Java_frame()) {
     vframeStream vfst(jthread);
@@ -153,17 +153,17 @@
     // that caller, otherwise keep quiet since this should be picked up elsewhere.
     bool found_it = false;
     if (!vfst.at_end() &&
-        InstanceKlass::cast(vfst.method()->method_holder())->name() == vmSymbols::java_lang_Class() &&
+        vfst.method()->method_holder()->name() == vmSymbols::java_lang_Class() &&
         vfst.method()->name() == vmSymbols::forName0_name()) {
       vfst.next();
       if (!vfst.at_end() &&
-          InstanceKlass::cast(vfst.method()->method_holder())->name() == vmSymbols::java_lang_Class() &&
+          vfst.method()->method_holder()->name() == vmSymbols::java_lang_Class() &&
           vfst.method()->name() == vmSymbols::forName_name()) {
         vfst.next();
         found_it = true;
       }
     } else if (last_caller != NULL &&
-               InstanceKlass::cast(last_caller->method_holder())->name() ==
+               last_caller->method_holder()->name() ==
                vmSymbols::java_lang_ClassLoader() &&
                (last_caller->name() == vmSymbols::loadClassInternal_name() ||
                 last_caller->name() == vmSymbols::loadClass_name())) {
@@ -182,7 +182,7 @@
         // show method name if it's a native method
         trace = vfst.method()->name_and_sig_as_C_string();
       }
-      Symbol* s = InstanceKlass::cast(caller)->source_file_name();
+      Symbol* s = caller->source_file_name();
       if (s != NULL) {
         source_file = s->as_C_string();
       }
@@ -190,8 +190,8 @@
   }
   if (caller != NULL) {
     if (to_class != caller) {
-      const char * from = Klass::cast(caller)->external_name();
-      const char * to = Klass::cast(to_class)->external_name();
+      const char * from = caller->external_name();
+      const char * to = to_class->external_name();
       // print in a single call to reduce interleaving between threads
       if (source_file != NULL) {
         tty->print("RESOLVE %s %s %s:%d (%s)\n", from, to, source_file, line_number, trace);
@@ -305,7 +305,7 @@
   assert(s->is_oop(), "JVM_ArrayCopy: src not an oop");
   assert(d->is_oop(), "JVM_ArrayCopy: dst not an oop");
   // Do copy
-  Klass::cast(s->klass())->copy_array(s, src_pos, d, dst_pos, length, thread);
+  s->klass()->copy_array(s, src_pos, d, dst_pos, length, thread);
 JVM_END
 
 
@@ -484,15 +484,6 @@
 JVM_END
 
 
-JVM_ENTRY(void, JVM_PrintStackTrace(JNIEnv *env, jobject receiver, jobject printable))
-  JVMWrapper("JVM_PrintStackTrace");
-  // Note: This is no longer used in Merlin, but we still support it for compatibility.
-  oop exception = JNIHandles::resolve_non_null(receiver);
-  oop stream    = JNIHandles::resolve_non_null(printable);
-  java_lang_Throwable::print_stack_trace(exception, stream);
-JVM_END
-
-
 JVM_ENTRY(jint, JVM_GetStackTraceDepth(JNIEnv *env, jobject throwable))
   JVMWrapper("JVM_GetStackTraceDepth");
   oop exception = JNIHandles::resolve(throwable);
@@ -675,7 +666,7 @@
 JVM_ENTRY(jclass, JVM_GetCallerClass(JNIEnv* env, int depth))
   JVMWrapper("JVM_GetCallerClass");
   Klass* k = thread->security_get_caller_class(depth);
-  return (k == NULL) ? NULL : (jclass) JNIHandles::make_local(env, Klass::cast(k)->java_mirror());
+  return (k == NULL) ? NULL : (jclass) JNIHandles::make_local(env, k->java_mirror());
 JVM_END
 
 
@@ -739,7 +730,7 @@
   if (TraceClassResolution) {
     trace_class_resolution(k);
   }
-  return (jclass) JNIHandles::make_local(env, Klass::cast(k)->java_mirror());
+  return (jclass) JNIHandles::make_local(env, k->java_mirror());
 JVM_END
 
 JVM_ENTRY(jclass, JVM_FindClassFromClassLoader(JNIEnv* env, const char* name,
@@ -785,8 +776,8 @@
   oop class_loader = NULL;
   oop protection_domain = NULL;
   if (from_class != NULL) {
-    class_loader = Klass::cast(from_class)->class_loader();
-    protection_domain = Klass::cast(from_class)->protection_domain();
+    class_loader = from_class->class_loader();
+    protection_domain = from_class->protection_domain();
   }
   Handle h_loader(THREAD, class_loader);
   Handle h_prot  (THREAD, protection_domain);
@@ -798,11 +789,11 @@
     ResourceMark rm;
     oop from_mirror = JNIHandles::resolve_non_null(from);
     Klass* from_class = java_lang_Class::as_Klass(from_mirror);
-    const char * from_name = Klass::cast(from_class)->external_name();
+    const char * from_name = from_class->external_name();
 
     oop mirror = JNIHandles::resolve_non_null(result);
     Klass* to_class = java_lang_Class::as_Klass(mirror);
-    const char * to = Klass::cast(to_class)->external_name();
+    const char * to = to_class->external_name();
     tty->print("RESOLVE %s %s (verification)\n", from_name, to);
   }
 
@@ -875,7 +866,7 @@
     trace_class_resolution(k);
   }
 
-  return (jclass) JNIHandles::make_local(env, Klass::cast(k)->java_mirror());
+  return (jclass) JNIHandles::make_local(env, k->java_mirror());
 }
 
 
@@ -936,7 +927,7 @@
                                                               CHECK_NULL);
 
   return (k == NULL) ? NULL :
-            (jclass) JNIHandles::make_local(env, Klass::cast(k)->java_mirror());
+            (jclass) JNIHandles::make_local(env, k->java_mirror());
 JVM_END
 
 
@@ -954,7 +945,7 @@
     // Consider caching interned string in Klass
     Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve(cls));
     assert(k->is_klass(), "just checking");
-    name = Klass::cast(k)->external_name();
+    name = k->external_name();
   }
   oop result = StringTable::intern((char*) name, CHECK_NULL);
   return (jstring) JNIHandles::make_local(env, result);
@@ -991,12 +982,12 @@
     // Regular instance klass, fill in all local interfaces
     for (int index = 0; index < size; index++) {
       Klass* k = InstanceKlass::cast(klass())->local_interfaces()->at(index);
-      result->obj_at_put(index, Klass::cast(k)->java_mirror());
+      result->obj_at_put(index, k->java_mirror());
     }
   } else {
     // All arrays implement java.lang.Cloneable and java.io.Serializable
-    result->obj_at_put(0, Klass::cast(SystemDictionary::Cloneable_klass())->java_mirror());
-    result->obj_at_put(1, Klass::cast(SystemDictionary::Serializable_klass())->java_mirror());
+    result->obj_at_put(0, SystemDictionary::Cloneable_klass()->java_mirror());
+    result->obj_at_put(1, SystemDictionary::Serializable_klass()->java_mirror());
   }
   return (jobjectArray) JNIHandles::make_local(env, result());
 JVM_END
@@ -1008,7 +999,7 @@
     return NULL;
   }
   Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
-  oop loader = Klass::cast(k)->class_loader();
+  oop loader = k->class_loader();
   return JNIHandles::make_local(env, loader);
 JVM_END
 
@@ -1020,8 +1011,8 @@
     return JNI_FALSE;
   }
   Klass* k = java_lang_Class::as_Klass(mirror);
-  jboolean result = Klass::cast(k)->is_interface();
-  assert(!result || Klass::cast(k)->oop_is_instance(),
+  jboolean result = k->is_interface();
+  assert(!result || k->oop_is_instance(),
          "all interfaces are instance types");
   // The compiler intrinsic for isInterface tests the
   // Klass::_access_flags bits in the same way.
@@ -1039,7 +1030,7 @@
 
   Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
   objArrayOop signers = NULL;
-  if (Klass::cast(k)->oop_is_instance()) {
+  if (k->oop_is_instance()) {
     signers = InstanceKlass::cast(k)->signers();
   }
 
@@ -1066,7 +1057,7 @@
     // Signers are only set once, ClassLoader.java, and thus shouldn't
     // be called with an array.  Only the bootstrap loader creates arrays.
     Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
-    if (Klass::cast(k)->oop_is_instance()) {
+    if (k->oop_is_instance()) {
       InstanceKlass::cast(k)->set_signers(objArrayOop(JNIHandles::resolve(signers)));
     }
   }
@@ -1085,7 +1076,7 @@
   }
 
   Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve(cls));
-  return (jobject) JNIHandles::make_local(env, Klass::cast(k)->protection_domain());
+  return (jobject) JNIHandles::make_local(env, k->protection_domain());
 JVM_END
 
 
@@ -1101,7 +1092,7 @@
     Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve(cls));
 
     // cls won't be an array, as this called only from ClassLoader.defineClass
-    if (Klass::cast(k)->oop_is_instance()) {
+    if (k->oop_is_instance()) {
       oop pd = JNIHandles::resolve(protection_domain);
       assert(pd == NULL || pd->is_oop(), "just checking");
       InstanceKlass::cast(k)->set_protection_domain(pd);
@@ -1124,7 +1115,7 @@
   Handle object (THREAD, JNIHandles::resolve(action));
 
   // get run() method
-  Method* m_oop = Klass::cast(object->klass())->uncached_lookup_method(
+  Method* m_oop = object->klass()->uncached_lookup_method(
                                            vmSymbols::run_method_name(),
                                            vmSymbols::void_object_signature());
   methodHandle m (THREAD, m_oop);
@@ -1228,7 +1219,7 @@
       privileged_context = Handle(thread, thread->privileged_stack_top()->privileged_context());
       protection_domain  = thread->privileged_stack_top()->protection_domain();
     } else {
-      protection_domain = InstanceKlass::cast(method->method_holder())->protection_domain();
+      protection_domain = method->method_holder()->protection_domain();
     }
 
     if ((previous_protection_domain != protection_domain) && (protection_domain != NULL)) {
@@ -1267,7 +1258,7 @@
 JVM_QUICK_ENTRY(jboolean, JVM_IsArrayClass(JNIEnv *env, jclass cls))
   JVMWrapper("JVM_IsArrayClass");
   Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
-  return (k != NULL) && Klass::cast(k)->oop_is_array() ? true : false;
+  return (k != NULL) && k->oop_is_array() ? true : false;
 JVM_END
 
 
@@ -1293,7 +1284,7 @@
     return JVM_ACC_ABSTRACT | JVM_ACC_FINAL | JVM_ACC_PUBLIC;
   }
 
-  Klass* k = Klass::cast(java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls)));
+  Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
   debug_only(int computed_modifiers = k->compute_modifier_flags(CHECK_0));
   assert(k->modifier_flags() == computed_modifiers, "modifiers cache is OK");
   return k->modifier_flags();
@@ -1308,7 +1299,7 @@
   // of an InstanceKlass
 
   if (java_lang_Class::is_primitive(JNIHandles::resolve_non_null(ofClass)) ||
-      ! Klass::cast(java_lang_Class::as_Klass(JNIHandles::resolve_non_null(ofClass)))->oop_is_instance()) {
+      ! java_lang_Class::as_Klass(JNIHandles::resolve_non_null(ofClass))->oop_is_instance()) {
     oop result = oopFactory::new_objArray(SystemDictionary::Class_klass(), 0, CHECK_NULL);
     return (jobjectArray)JNIHandles::make_local(env, result);
   }
@@ -1372,7 +1363,7 @@
 {
   // ofClass is a reference to a java_lang_Class object.
   if (java_lang_Class::is_primitive(JNIHandles::resolve_non_null(ofClass)) ||
-      ! Klass::cast(java_lang_Class::as_Klass(JNIHandles::resolve_non_null(ofClass)))->oop_is_instance()) {
+      ! java_lang_Class::as_Klass(JNIHandles::resolve_non_null(ofClass))->oop_is_instance()) {
     return NULL;
   }
 
@@ -1382,7 +1373,7 @@
                           )->compute_enclosing_class(&inner_is_member, CHECK_NULL);
   if (outer_klass == NULL)  return NULL;  // already a top-level class
   if (!inner_is_member)  return NULL;     // an anonymous class (inside a method)
-  return (jclass) JNIHandles::make_local(env, Klass::cast(outer_klass)->java_mirror());
+  return (jclass) JNIHandles::make_local(env, outer_klass->java_mirror());
 }
 JVM_END
 
@@ -1452,7 +1443,7 @@
   // Return null for arrays and primatives
   if (!java_lang_Class::is_primitive(JNIHandles::resolve(cls))) {
     Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve(cls));
-    if (Klass::cast(k)->oop_is_instance()) {
+    if (k->oop_is_instance()) {
       Symbol* sym = InstanceKlass::cast(k)->generic_signature();
       if (sym == NULL) return NULL;
       Handle str = java_lang_String::create_from_symbol(sym, CHECK_NULL);
@@ -1470,7 +1461,7 @@
   // Return null for arrays and primitives
   if (!java_lang_Class::is_primitive(JNIHandles::resolve(cls))) {
     Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve(cls));
-    if (Klass::cast(k)->oop_is_instance()) {
+    if (k->oop_is_instance()) {
       typeArrayOop a = Annotations::make_java_array(InstanceKlass::cast(k)->class_annotations(), CHECK_NULL);
       return (jbyteArray) JNIHandles::make_local(env, a);
     }
@@ -1515,7 +1506,7 @@
 JVM_END
 
 
-static Method* jvm_get_method_common(jobject method, TRAPS) {
+static Method* jvm_get_method_common(jobject method) {
   // some of this code was adapted from from jni_FromReflectedMethod
 
   oop reflected = JNIHandles::resolve_non_null(method);
@@ -1533,8 +1524,7 @@
   }
   Klass* k = java_lang_Class::as_Klass(mirror);
 
-  KlassHandle kh(THREAD, k);
-  Method* m = InstanceKlass::cast(kh())->method_with_idnum(slot);
+  Method* m = InstanceKlass::cast(k)->method_with_idnum(slot);
   if (m == NULL) {
     assert(false, "cannot find method");
     return NULL;  // robustness
@@ -1548,7 +1538,7 @@
   JVMWrapper("JVM_GetMethodAnnotations");
 
   // method is a handle to a java.lang.reflect.Method object
-  Method* m = jvm_get_method_common(method, CHECK_NULL);
+  Method* m = jvm_get_method_common(method);
   return (jbyteArray) JNIHandles::make_local(env,
     Annotations::make_java_array(m->annotations(), THREAD));
 JVM_END
@@ -1558,7 +1548,7 @@
   JVMWrapper("JVM_GetMethodDefaultAnnotationValue");
 
   // method is a handle to a java.lang.reflect.Method object
-  Method* m = jvm_get_method_common(method, CHECK_NULL);
+  Method* m = jvm_get_method_common(method);
   return (jbyteArray) JNIHandles::make_local(env,
     Annotations::make_java_array(m->annotation_default(), THREAD));
 JVM_END
@@ -1568,11 +1558,79 @@
   JVMWrapper("JVM_GetMethodParameterAnnotations");
 
   // method is a handle to a java.lang.reflect.Method object
-  Method* m = jvm_get_method_common(method, CHECK_NULL);
+  Method* m = jvm_get_method_common(method);
   return (jbyteArray) JNIHandles::make_local(env,
     Annotations::make_java_array(m->parameter_annotations(), THREAD));
 JVM_END
 
+/* Type use annotations support (JDK 1.8) */
+
+JVM_ENTRY(jbyteArray, JVM_GetClassTypeAnnotations(JNIEnv *env, jclass cls))
+  assert (cls != NULL, "illegal class");
+  JVMWrapper("JVM_GetClassTypeAnnotations");
+  ResourceMark rm(THREAD);
+  // Return null for arrays and primitives
+  if (!java_lang_Class::is_primitive(JNIHandles::resolve(cls))) {
+    Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve(cls));
+    if (k->oop_is_instance()) {
+      Annotations* type_annotations = InstanceKlass::cast(k)->type_annotations();
+      if (type_annotations != NULL) {
+        typeArrayOop a = Annotations::make_java_array(type_annotations->class_annotations(), CHECK_NULL);
+        return (jbyteArray) JNIHandles::make_local(env, a);
+      }
+    }
+  }
+  return NULL;
+JVM_END
+
+static void bounds_check(constantPoolHandle cp, jint index, TRAPS) {
+  if (!cp->is_within_bounds(index)) {
+    THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(), "Constant pool index out of bounds");
+  }
+}
+
+JVM_ENTRY(jobjectArray, JVM_GetMethodParameters(JNIEnv *env, jobject method))
+{
+  JVMWrapper("JVM_GetMethodParameters");
+  // method is a handle to a java.lang.reflect.Method object
+  Method* method_ptr = jvm_get_method_common(method);
+  methodHandle mh (THREAD, method_ptr);
+  Handle reflected_method (THREAD, JNIHandles::resolve_non_null(method));
+  const int num_params = mh->method_parameters_length();
+
+  if (0 != num_params) {
+    // make sure all the symbols are properly formatted
+    for (int i = 0; i < num_params; i++) {
+      MethodParametersElement* params = mh->method_parameters_start();
+      int index = params[i].name_cp_index;
+      bounds_check(mh->constants(), index, CHECK_NULL);
+
+      if (0 != index && !mh->constants()->tag_at(index).is_utf8()) {
+        THROW_MSG_0(vmSymbols::java_lang_IllegalArgumentException(),
+                    "Wrong type at constant pool index");
+      }
+
+    }
+
+    objArrayOop result_oop = oopFactory::new_objArray(SystemDictionary::reflect_Parameter_klass(), num_params, CHECK_NULL);
+    objArrayHandle result (THREAD, result_oop);
+
+    for (int i = 0; i < num_params; i++) {
+      MethodParametersElement* params = mh->method_parameters_start();
+      // For a 0 index, give a NULL symbol
+      Symbol* const sym = 0 != params[i].name_cp_index ?
+        mh->constants()->symbol_at(params[i].name_cp_index) : NULL;
+      int flags = build_int_from_shorts(params[i].flags_lo, params[i].flags_hi);
+      oop param = Reflection::new_parameter(reflected_method, i, sym,
+                                            flags, CHECK_NULL);
+      result->obj_at_put(i, param);
+    }
+    return (jobjectArray)JNIHandles::make_local(env, result());
+  } else {
+    return (jobjectArray)NULL;
+  }
+}
+JVM_END
 
 // New (JDK 1.4) reflection implementation /////////////////////////////////////
 
@@ -1583,7 +1641,7 @@
 
   // Exclude primitive types and array types
   if (java_lang_Class::is_primitive(JNIHandles::resolve_non_null(ofClass)) ||
-      Klass::cast(java_lang_Class::as_Klass(JNIHandles::resolve_non_null(ofClass)))->oop_is_array()) {
+      java_lang_Class::as_Klass(JNIHandles::resolve_non_null(ofClass))->oop_is_array()) {
     // Return empty array
     oop res = oopFactory::new_objArray(SystemDictionary::reflect_Field_klass(), 0, CHECK_NULL);
     return (jobjectArray) JNIHandles::make_local(env, res);
@@ -1646,7 +1704,7 @@
 
   // Exclude primitive types and array types
   if (java_lang_Class::is_primitive(JNIHandles::resolve_non_null(ofClass))
-      || Klass::cast(java_lang_Class::as_Klass(JNIHandles::resolve_non_null(ofClass)))->oop_is_array()) {
+      || java_lang_Class::as_Klass(JNIHandles::resolve_non_null(ofClass))->oop_is_array()) {
     // Return empty array
     oop res = oopFactory::new_objArray(SystemDictionary::reflect_Method_klass(), 0, CHECK_NULL);
     return (jobjectArray) JNIHandles::make_local(env, res);
@@ -1698,7 +1756,7 @@
 
   // Exclude primitive types and array types
   if (java_lang_Class::is_primitive(JNIHandles::resolve_non_null(ofClass))
-      || Klass::cast(java_lang_Class::as_Klass(JNIHandles::resolve_non_null(ofClass)))->oop_is_array()) {
+      || java_lang_Class::as_Klass(JNIHandles::resolve_non_null(ofClass))->oop_is_array()) {
     // Return empty array
     oop res = oopFactory::new_objArray(SystemDictionary::reflect_Constructor_klass(), 0 , CHECK_NULL);
     return (jobjectArray) JNIHandles::make_local(env, res);
@@ -1751,7 +1809,7 @@
     return JVM_ACC_ABSTRACT | JVM_ACC_FINAL | JVM_ACC_PUBLIC;
   }
 
-  Klass* k = Klass::cast(java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls)));
+  Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
   return k->access_flags().as_int() & JVM_ACC_WRITTEN_FLAGS;
 }
 JVM_END
@@ -1767,7 +1825,7 @@
   // Return null for primitives and arrays
   if (!java_lang_Class::is_primitive(JNIHandles::resolve_non_null(cls))) {
     Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
-    if (Klass::cast(k)->oop_is_instance()) {
+    if (k->oop_is_instance()) {
       instanceKlassHandle k_h(THREAD, k);
       Handle jcp = sun_reflect_ConstantPool::create(CHECK_NULL);
       sun_reflect_ConstantPool::set_cp(jcp(), k_h->constants());
@@ -1788,13 +1846,6 @@
 JVM_END
 
 
-static void bounds_check(constantPoolHandle cp, jint index, TRAPS) {
-  if (!cp->is_within_bounds(index)) {
-    THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(), "Constant pool index out of bounds");
-  }
-}
-
-
 JVM_ENTRY(jclass, JVM_ConstantPoolGetClassAt(JNIEnv *env, jobject obj, jobject unused, jint index))
 {
   JVMWrapper("JVM_ConstantPoolGetClassAt");
@@ -1809,7 +1860,6 @@
 }
 JVM_END
 
-
 JVM_ENTRY(jclass, JVM_ConstantPoolGetClassAtIfLoaded(JNIEnv *env, jobject obj, jobject unused, jint index))
 {
   JVMWrapper("JVM_ConstantPoolGetClassAtIfLoaded");
@@ -2043,12 +2093,12 @@
   if (java_lang_Class::is_primitive(r)) return false;
 
   Klass* k = java_lang_Class::as_Klass(r);
-  assert(Klass::cast(k)->oop_is_instance(), "must be an instance klass");
-  if (! Klass::cast(k)->oop_is_instance()) return false;
+  assert(k->oop_is_instance(), "must be an instance klass");
+  if (! k->oop_is_instance()) return false;
 
   ResourceMark rm(THREAD);
-  const char* name = Klass::cast(k)->name()->as_C_string();
-  bool system_class = Klass::cast(k)->class_loader() == NULL;
+  const char* name = k->name()->as_C_string();
+  bool system_class = k->class_loader() == NULL;
   return JavaAssertions::enabled(name, system_class);
 
 JVM_END
@@ -2079,7 +2129,7 @@
   JVMWrapper("JVM_GetClassNameUTF");
   Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
   k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
-  return Klass::cast(k)->name()->as_utf8();
+  return k->name()->as_utf8();
 JVM_END
 
 
@@ -2089,7 +2139,7 @@
   k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
   // types will have length zero if this is not an InstanceKlass
   // (length is determined by call to JVM_GetClassCPEntriesCount)
-  if (Klass::cast(k)->oop_is_instance()) {
+  if (k->oop_is_instance()) {
     ConstantPool* cp = InstanceKlass::cast(k)->constants();
     for (int index = cp->length() - 1; index >= 0; index--) {
       constantTag tag = cp->tag_at(index);
@@ -2103,7 +2153,7 @@
   JVMWrapper("JVM_GetClassCPEntriesCount");
   Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
   k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
-  if (!Klass::cast(k)->oop_is_instance())
+  if (!k->oop_is_instance())
     return 0;
   return InstanceKlass::cast(k)->constants()->length();
 JVM_END
@@ -2113,7 +2163,7 @@
   JVMWrapper("JVM_GetClassFieldsCount");
   Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
   k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
-  if (!Klass::cast(k)->oop_is_instance())
+  if (!k->oop_is_instance())
     return 0;
   return InstanceKlass::cast(k)->java_fields_count();
 JVM_END
@@ -2123,7 +2173,7 @@
   JVMWrapper("JVM_GetClassMethodsCount");
   Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve_non_null(cls));
   k = JvmtiThreadState::class_to_verify_considering_redefinition(k, thread);
-  if (!Klass::cast(k)->oop_is_instance())
+  if (!k->oop_is_instance())
     return 0;
   return InstanceKlass::cast(k)->methods()->length();
 JVM_END
@@ -3048,10 +3098,10 @@
 
     Method* m = vfst.method();
     if (!m->is_native()) {
-      Klass* holder = m->method_holder();
-      oop      loader = InstanceKlass::cast(holder)->class_loader();
+      InstanceKlass* holder = m->method_holder();
+      oop loader = holder->class_loader();
       if (loader != NULL && !java_lang_ClassLoader::is_trusted_loader(loader)) {
-        return (jclass) JNIHandles::make_local(env, Klass::cast(holder)->java_mirror());
+        return (jclass) JNIHandles::make_local(env, holder->java_mirror());
       }
     }
   }
@@ -3071,9 +3121,9 @@
 
     Method* m = vfst.method();
     if (!m->is_native()) {
-      Klass* holder = m->method_holder();
+      InstanceKlass* holder = m->method_holder();
       assert(holder->is_klass(), "just checking");
-      oop loader = InstanceKlass::cast(holder)->class_loader();
+      oop loader = holder->class_loader();
       if (loader != NULL && !java_lang_ClassLoader::is_trusted_loader(loader)) {
         return JNIHandles::make_local(env, loader);
       }
@@ -3123,7 +3173,7 @@
   // Fill in mirrors corresponding to method holders
   int index = 0;
   while (first != NULL) {
-    result->obj_at_put(index++, Klass::cast(first->klass())->java_mirror());
+    result->obj_at_put(index++, first->klass()->java_mirror());
     first = first->next;
   }
   assert(index == depth, "just checking");
@@ -3148,9 +3198,9 @@
 
   for(vframeStream vfst(thread); !vfst.at_end(); vfst.next()) {
     if (!vfst.method()->is_native()) {
-      Klass* holder = vfst.method()->method_holder();
+      InstanceKlass* holder = vfst.method()->method_holder();
       assert(holder->is_klass(), "just checking");
-      if (InstanceKlass::cast(holder)->name() == class_name_sym) {
+      if (holder->name() == class_name_sym) {
         return depth;
       }
       depth++;
@@ -3171,9 +3221,9 @@
 
     Method* m = vfst.method();
     if (!m->is_native()) {
-      Klass* holder = m->method_holder();
+      InstanceKlass* holder = m->method_holder();
       assert(holder->is_klass(), "just checking");
-      oop loader = InstanceKlass::cast(holder)->class_loader();
+      oop loader = holder->class_loader();
       if (loader != NULL && !java_lang_ClassLoader::is_trusted_loader(loader)) {
         return depth;
       }
@@ -3217,7 +3267,7 @@
 
   if (access.is_protected()) {
     // See if current_class is a subclass of field_class
-    if (Klass::cast(current_class)->is_subclass_of(field_class)) {
+    if (current_class->is_subclass_of(field_class)) {
       return true;
     }
   }
@@ -3241,8 +3291,8 @@
   }
 
   // Arrays not allowed here, must use JVM_AllocateNewArray
-  if (Klass::cast(java_lang_Class::as_Klass(curr_mirror))->oop_is_array() ||
-      Klass::cast(java_lang_Class::as_Klass(init_mirror))->oop_is_array()) {
+  if (java_lang_Class::as_Klass(curr_mirror)->oop_is_array() ||
+      java_lang_Class::as_Klass(init_mirror)->oop_is_array()) {
     ResourceMark rm(THREAD);
     THROW_0(vmSymbols::java_lang_InvalidClassException());
   }
@@ -3264,7 +3314,7 @@
   if (m.is_null()) {
     ResourceMark rm(THREAD);
     THROW_MSG_0(vmSymbols::java_lang_NoSuchMethodError(),
-                Method::name_and_sig_as_C_string(Klass::cast(init_klass()),
+                Method::name_and_sig_as_C_string(init_klass(),
                                           vmSymbols::object_initializer_name(),
                                           vmSymbols::void_method_signature()));
   }
@@ -3322,8 +3372,7 @@
   for (vframeStream vfst(thread); !vfst.at_end(); vfst.next()) {
     // UseNewReflection
     vfst.skip_reflection_related_frames(); // Only needed for 1.4 reflection
-    Klass* holder = vfst.method()->method_holder();
-    oop loader = InstanceKlass::cast(holder)->class_loader();
+    oop loader = vfst.method()->method_holder()->class_loader();
     if (loader != NULL) {
       return JNIHandles::make_local(env, loader);
     }
@@ -3365,9 +3414,9 @@
          !vfst.at_end() && loader == NULL;
          vfst.next()) {
       if (!vfst.method()->is_native()) {
-        Klass* holder = vfst.method()->method_holder();
-        loader             = InstanceKlass::cast(holder)->class_loader();
-        protection_domain  = InstanceKlass::cast(holder)->protection_domain();
+        InstanceKlass* holder = vfst.method()->method_holder();
+        loader             = holder->class_loader();
+        protection_domain  = holder->protection_domain();
       }
     }
   } else {
@@ -4246,7 +4295,7 @@
     return NULL;
   }
   Klass* k = java_lang_Class::as_Klass(mirror());
-  if (!Klass::cast(k)->oop_is_instance()) {
+  if (!k->oop_is_instance()) {
     return NULL;
   }
   instanceKlassHandle ik_h(THREAD, k);
@@ -4257,7 +4306,7 @@
   objArrayOop dest_o = oopFactory::new_objArray(SystemDictionary::Object_klass(), 3, CHECK_NULL);
   objArrayHandle dest(THREAD, dest_o);
   Klass* enc_k = ik_h->constants()->klass_at(encl_method_class_idx, CHECK_NULL);
-  dest->obj_at_put(0, Klass::cast(enc_k)->java_mirror());
+  dest->obj_at_put(0, enc_k->java_mirror());
   int encl_method_method_idx = ik_h->enclosing_method_method_index();
   if (encl_method_method_idx != 0) {
     Symbol* sym = ik_h->constants()->symbol_at(
--- a/src/share/vm/prims/jvm.h	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/prims/jvm.h	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -86,6 +86,8 @@
 
 #define JVM_INTERFACE_VERSION 4
 
+JNIEXPORT jobjectArray JNICALL
+JVM_GetMethodParameters(JNIEnv *env, jobject method);
 
 JNIEXPORT jint JNICALL
 JVM_GetInterfaceVersion(void);
@@ -210,9 +212,6 @@
 JNIEXPORT void JNICALL
 JVM_FillInStackTrace(JNIEnv *env, jobject throwable);
 
-JNIEXPORT void JNICALL
-JVM_PrintStackTrace(JNIEnv *env, jobject throwable, jobject printable);
-
 JNIEXPORT jint JNICALL
 JVM_GetStackTraceDepth(JNIEnv *env, jobject throwable);
 
@@ -519,6 +518,10 @@
 JNIEXPORT jbyteArray JNICALL
 JVM_GetMethodParameterAnnotations(JNIEnv *env, jobject method);
 
+/* Type use annotations support (JDK 1.8) */
+
+JNIEXPORT jbyteArray JNICALL
+JVM_GetClassTypeAnnotations(JNIEnv *env, jclass cls);
 
 /*
  * New (JDK 1.4) reflection implementation
--- a/src/share/vm/prims/jvmti.xml	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/prims/jvmti.xml	Sun Feb 03 22:43:57 2013 +0100
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="ISO-8859-1"?>
 <?xml-stylesheet type="text/xsl" href="jvmti.xsl"?>
 <!--
- Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
+ Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
  DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 
  This code is free software; you can redistribute it and/or modify it
@@ -358,7 +358,7 @@
 <specification label="JVM(TM) Tool Interface"
         majorversion="1"
         minorversion="2"
-        microversion="1">
+        microversion="2">
   <title subtitle="Version">
     <tm>JVM</tm> Tool Interface
   </title>
@@ -405,7 +405,7 @@
     interfaces are more appropriate than <jvmti/> for many tools. 
     For more information on the Java Platform Debugger Architecture, 
     see the 
-    <externallink id="http://java.sun.com/products/jpda/">Java 
+    <externallink id="http://docs.oracle.com/javase/7/docs/technotes/guides/jpda/architecture.html">Java 
       Platform Debugger Architecture website</externallink>. 
   </intro>
 
@@ -693,7 +693,7 @@
     An agent creates a <jvmti/> environment 
     by passing a <jvmti/> version 
     as the interface ID to the JNI Invocation API function 
-    <externallink id="http://java.sun.com/javase/6/docs/technotes/guides/jni/spec/invocation.html#GetEnv"><code>GetEnv</code></externallink>.
+    <externallink id="http://docs.oracle.com/javase/7/docs/technotes/guides/jni/spec/invocation.html#GetEnv"><code>GetEnv</code></externallink>.
     See <internallink id="jvmtiEnvAccess">Accessing <jvmti/> Functions</internallink>
     for more details on the creation and use of 
     <jvmti/> environments.
@@ -797,7 +797,7 @@
     Modified UTF-8 differs 
     from standard UTF-8 in the representation of supplementary characters 
     and of the null character. See the
-    <externallink id="http://java.sun.com/javase/6/docs/technotes/guides/jni/spec/types.html#wp16542">
+    <externallink id="http://docs.oracle.com/javase/7/docs/technotes/guides/jni/spec/types.html#wp16542">
       Modified UTF-8 Strings</externallink>
     section of the JNI specification for details.
   </intro>
@@ -827,7 +827,7 @@
     by calling <jvmti/> functions. 
     Access to <jvmti/> functions is by use of an interface pointer
     in the same manner as 
-    <externallink id="http://java.sun.com/javase/6/docs/technotes/guides/jni/spec/design.html">Java 
+    <externallink id="http://docs.oracle.com/javase/7/docs/technotes/guides/jni/spec/design.html">Java 
       Native Interface (JNI) functions</externallink> are accessed.
     The <jvmti/> interface pointer is called the 
     <i>environment pointer</i>.
@@ -919,7 +919,7 @@
     local references--these local references are created 
     during the <jvmti/> call.
     Local references are a resource that must be managed (see the 
-    <externallink id="http://java.sun.com/javase/6/docs/guide/jni/spec/functions.html#wp18654">JNI Documentation</externallink>).  
+    <externallink id="http://docs.oracle.com/javase/7/docs/technotes/guides/jni/spec/functions.html#wp18654">JNI Documentation</externallink>).  
     When threads return from native code all local references
     are freed.  Note that some threads, including typical
     agent threads, will never return from native code.
@@ -954,7 +954,7 @@
       <jvmti/> function.
       See the
       <externallink 
-        id="http://java.sun.com/javase/6/docs/technotes/guides/jni/spec/design.html#wp770"
+        id="http://docs.oracle.com/javase/7/docs/technotes/guides/jni/spec/design.html#wp770"
              >Java Exceptions</externallink>
       section of the JNI specification for information on handling exceptions.
     </intro>
@@ -2024,7 +2024,7 @@
 	<p/>
 	Upon execution of <code>proc</code>, the new thread will be attached to the
 	VM--see the JNI documentation on 
-	<externallink id="http://java.sun.com/javase/6/docs/technotes/guides/jni/spec/invocation.html#wp1060"
+	<externallink id="http://docs.oracle.com/javase/7/docs/technotes/guides/jni/spec/invocation.html#wp1060"
 		      >Attaching to the VM</externallink>.
       </description>
       <origin>jvmdiClone</origin>
@@ -2370,11 +2370,11 @@
 jvmtiError err;
 
 err = (*jvmti)-&gt;GetStackTrace(jvmti, aThread, 0, 5, 
-                               &amp;frames, &amp;count);
+                               frames, &amp;count);
 if (err == JVMTI_ERROR_NONE &amp;&amp; count &gt;= 1) {
    char *methodName;
    err = (*jvmti)-&gt;GetMethodName(jvmti, frames[0].method, 
-                       &amp;methodName, NULL);
+                       &amp;methodName, NULL, NULL);
    if (err == JVMTI_ERROR_NONE) {
       printf("Executing method: %s", methodName);
    }
@@ -4010,7 +4010,7 @@
 	  </inptr>
 	  <description>
 	    Details about the reference. 
-            Set when the <paramlink id="reference_kind"/> is
+            Set when the <datalink id="jvmtiHeapReferenceCallback.reference_kind">reference_kind</datalink> is
             <datalink id="JVMTI_HEAP_REFERENCE_FIELD"/>,
 	    <datalink id="JVMTI_HEAP_REFERENCE_STATIC_FIELD"/>,
 	    <datalink id="JVMTI_HEAP_REFERENCE_ARRAY_ELEMENT"/>,
@@ -4378,7 +4378,7 @@
         do not control which objects are visited but they do control which
         objects and primitive values are reported by the callbacks.
         For example, if the only callback that was set is
-        <paramlink id="array_primitive_value_callback"/> and <code>klass</code>
+        <fieldlink id="array_primitive_value_callback" struct="jvmtiHeapCallbacks"/> and <code>klass</code>
         is set to the array of bytes class, then only arrays of byte will be
         reported.  
         The table below summarizes this:
@@ -4414,7 +4414,7 @@
           </tr>
           <tr>
             <th align="left">
-              <fieldlink id="object_reference_callback" struct="jvmtiHeapCallbacks"/>
+              <fieldlink id="array_primitive_value_callback" struct="jvmtiHeapCallbacks"/>
               in <paramlink id="callbacks"/> set
             </th>
             <td>
@@ -4570,7 +4570,7 @@
         do not control which objects are visited but they do control which
         objects and primitive values are reported by the callbacks.
         For example, if the only callback that was set is
-        <paramlink id="array_primitive_value_callback"/> and <code>klass</code>
+        <fieldlink id="array_primitive_value_callback" struct="jvmtiHeapCallbacks"/> and <code>klass</code>
         is set to the array of bytes class, then only arrays of byte will be
         reported. The table below summarizes this (contrast this with 
         <functionlink id="FollowReferences"/>):
@@ -4606,7 +4606,7 @@
           </tr>
           <tr>
             <th align="left">
-              <fieldlink id="object_callback" struct="jvmtiHeapCallbacks"/>
+              <fieldlink id="array_primitive_value_callback" struct="jvmtiHeapCallbacks"/>
               in <paramlink id="callbacks"/> set
             </th>
             <td>
@@ -6478,7 +6478,7 @@
       <synopsis>Get Class Signature</synopsis>
       <description>
         For the class indicated by <code>klass</code>, return the 
-        <externallink id="http://java.sun.com/javase/6/docs/guide/jni/spec/types.html#wp16432">JNI 
+        <externallink id="http://docs.oracle.com/javase/7/docs/technotes/guides/jni/spec/types.html#wp16432">JNI 
             type signature</externallink> 
         and the generic signature of the class.
         For example, <code>java.util.List</code> is <code>"Ljava/util/List;"</code>
@@ -8763,7 +8763,7 @@
       Provides the ability to intercept and resend 
       Java Native Interface (JNI) function calls
       by manipulating the JNI function table.
-      See <externallink id="http://java.sun.com/javase/6/docs/guide/jni/spec/functions.html">JNI
+      See <externallink id="http://docs.oracle.com/javase/7/docs/technotes/guides/jni/spec/functions.html">JNI
 	Functions</externallink> in the <i>Java Native Interface Specification</i>.
       <p/>
       The following example illustrates intercepting the 
@@ -10446,7 +10446,7 @@
 	  for a class. The segment is typically a directory or JAR file.
 	  <p/>	  
 	  In the live phase the <paramlink id="segment"/> may be used to specify any platform-dependent
-	  path to a <externallink id="http://java.sun.com/javase/6/docs/guide/jar/jar.html">
+	  path to a <externallink id="http://docs.oracle.com/javase/7/docs/technotes/guides/jar/jar.html">
 	  JAR file</externallink>. The agent should take care that the JAR file does not
           contain any classes or resources other than those to be defined by the bootstrap
           class loader for the purposes of instrumentation.
@@ -10494,7 +10494,7 @@
 	  for a class. The segment is typically a directory or JAR file.
 	  <p/>	  
 	  In the live phase the <paramlink id="segment"/> is a platform-dependent path to a <externallink 
-	  id="http://java.sun.com/javase/6/docs/guide/jar/jar.html">JAR file</externallink> to be
+	  id="http://docs.oracle.com/javase/7/docs/technotes/guides/jar/jar.html">JAR file</externallink> to be
 	  searched after the system class loader unsuccessfully searches for a class. The agent should
           take care that the JAR file does not contain any classes or resources other than those to be
           defined by the system class loader for the purposes of instrumentation.
@@ -13128,6 +13128,12 @@
 	Unsigned 8 bits.
       </description>
     </basetype>
+    <basetype id="jchar">
+      <description>
+	Holds a Java programming language <code>char</code>.
+	Unsigned 16 bits.
+      </description>
+    </basetype>
     <basetype id="jint">
       <description>
 	Holds a Java programming language <code>int</code>. 
@@ -13285,7 +13291,7 @@
       <description>
 	Typedef for the JNI function table <code>JNINativeInterface</code>
 	defined in the 
-	<externallink id="http://java.sun.com/javase/6/docs/guide/jni/spec/functions.html#wp23720">JNI Specification</externallink>.
+	<externallink id="http://docs.oracle.com/javase/7/docs/technotes/guides/jni/spec/functions.html#wp23720">JNI Specification</externallink>.
 	The JNI reference implementation defines this with an underscore.
       </description>
     </basetype>
@@ -14252,6 +14258,9 @@
   <change date="6 August 2006" version="1.1.102">
       Add ResourceExhaustedEvent.
   </change>
+  <change date="11 October 2012" version="1.2.2">
+      Fixed the "HTTP" and "Missing Anchor" errors reported by the LinkCheck tool.
+  </change>
 </changehistory>
 
 </specification>
--- a/src/share/vm/prims/jvmtiClassFileReconstituter.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/prims/jvmtiClassFileReconstituter.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -228,7 +228,7 @@
 
   write_attribute_name_index("Code");
   write_u4(size);
-  write_u2(method->max_stack());
+  write_u2(method->verifier_max_stack());
   write_u2(method->max_locals());
   write_u4(code_size);
   copy_bytecodes(method, (unsigned char*)writeable_address(code_size));
@@ -753,7 +753,7 @@
 
   unsigned char* p = bytecodes;
   Bytecodes::Code code;
-  bool is_rewritten = InstanceKlass::cast(mh->method_holder())->is_rewritten();
+  bool is_rewritten = mh->method_holder()->is_rewritten();
 
   while ((code = bs.next()) >= 0) {
     assert(Bytecodes::is_java_code(code), "sanity check");
--- a/src/share/vm/prims/jvmtiEnv.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/prims/jvmtiEnv.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -53,24 +53,12 @@
 #include "runtime/osThread.hpp"
 #include "runtime/reflectionUtils.hpp"
 #include "runtime/signature.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/vframe.hpp"
 #include "runtime/vmThread.hpp"
 #include "services/threadService.hpp"
 #include "utilities/exceptions.hpp"
 #include "utilities/preserveException.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
-
 
 
 #define FIXLATER 0 // REMOVE this when completed.
@@ -2116,7 +2104,7 @@
       result[0] = tchar;
       result[1] = '\0';
     } else {
-      const char* class_sig = Klass::cast(k)->signature_name();
+      const char* class_sig = k->signature_name();
       result = (char *) jvmtiMalloc(strlen(class_sig)+1);
       strcpy(result, class_sig);
     }
@@ -2124,7 +2112,7 @@
   }
   if (generic_ptr != NULL) {
     *generic_ptr = NULL;
-    if (!isPrimitive && Klass::cast(k)->oop_is_instance()) {
+    if (!isPrimitive && k->oop_is_instance()) {
       Symbol* soo = InstanceKlass::cast(k)->generic_signature();
       if (soo != NULL) {
         const char *gen_sig = soo->as_C_string();
@@ -2155,7 +2143,7 @@
   } else {
     Klass* k = java_lang_Class::as_Klass(k_mirror);
     NULL_CHECK(k, JVMTI_ERROR_INVALID_CLASS);
-    result = Klass::cast(k)->jvmti_class_status();
+    result = k->jvmti_class_status();
   }
   *status_ptr = result;
 
@@ -2173,7 +2161,7 @@
   Klass* k_klass = java_lang_Class::as_Klass(k_mirror);
   NULL_CHECK(k_klass, JVMTI_ERROR_INVALID_CLASS);
 
-  if (!Klass::cast(k_klass)->oop_is_instance()) {
+  if (!k_klass->oop_is_instance()) {
     return JVMTI_ERROR_ABSENT_INFORMATION;
   }
 
@@ -2200,7 +2188,7 @@
   if (!java_lang_Class::is_primitive(k_mirror)) {
     Klass* k = java_lang_Class::as_Klass(k_mirror);
     NULL_CHECK(k, JVMTI_ERROR_INVALID_CLASS);
-    result = Klass::cast(k)->compute_modifier_flags(current_thread);
+    result = k->compute_modifier_flags(current_thread);
     JavaThread* THREAD = current_thread; // pass to macros
     if (HAS_PENDING_EXCEPTION) {
       CLEAR_PENDING_EXCEPTION;
@@ -2208,7 +2196,7 @@
     };
 
     // Reset the deleted  ACC_SUPER bit ( deleted in compute_modifier_flags()).
-    if(Klass::cast(k)->is_super()) {
+    if(k->is_super()) {
       result |= JVM_ACC_SUPER;
     }
   } else {
@@ -2237,11 +2225,11 @@
   NULL_CHECK(k, JVMTI_ERROR_INVALID_CLASS);
 
   // Return CLASS_NOT_PREPARED error as per JVMTI spec.
-  if (!(Klass::cast(k)->jvmti_class_status() & (JVMTI_CLASS_STATUS_PREPARED|JVMTI_CLASS_STATUS_ARRAY) )) {
+  if (!(k->jvmti_class_status() & (JVMTI_CLASS_STATUS_PREPARED|JVMTI_CLASS_STATUS_ARRAY) )) {
     return JVMTI_ERROR_CLASS_NOT_PREPARED;
   }
 
-  if (!Klass::cast(k)->oop_is_instance()) {
+  if (!k->oop_is_instance()) {
     *method_count_ptr = 0;
     *methods_ptr = (jmethodID*) jvmtiMalloc(0 * sizeof(jmethodID));
     return JVMTI_ERROR_NONE;
@@ -2293,11 +2281,11 @@
   NULL_CHECK(k, JVMTI_ERROR_INVALID_CLASS);
 
   // Return CLASS_NOT_PREPARED error as per JVMTI spec.
-  if (!(Klass::cast(k)->jvmti_class_status() & (JVMTI_CLASS_STATUS_PREPARED|JVMTI_CLASS_STATUS_ARRAY) )) {
+  if (!(k->jvmti_class_status() & (JVMTI_CLASS_STATUS_PREPARED|JVMTI_CLASS_STATUS_ARRAY) )) {
     return JVMTI_ERROR_CLASS_NOT_PREPARED;
   }
 
-  if (!Klass::cast(k)->oop_is_instance()) {
+  if (!k->oop_is_instance()) {
     *field_count_ptr = 0;
     *fields_ptr = (jfieldID*) jvmtiMalloc(0 * sizeof(jfieldID));
     return JVMTI_ERROR_NONE;
@@ -2348,10 +2336,10 @@
     NULL_CHECK(k, JVMTI_ERROR_INVALID_CLASS);
 
     // Return CLASS_NOT_PREPARED error as per JVMTI spec.
-    if (!(Klass::cast(k)->jvmti_class_status() & (JVMTI_CLASS_STATUS_PREPARED|JVMTI_CLASS_STATUS_ARRAY) ))
+    if (!(k->jvmti_class_status() & (JVMTI_CLASS_STATUS_PREPARED|JVMTI_CLASS_STATUS_ARRAY) ))
       return JVMTI_ERROR_CLASS_NOT_PREPARED;
 
-    if (!Klass::cast(k)->oop_is_instance()) {
+    if (!k->oop_is_instance()) {
       *interface_count_ptr = 0;
       *interfaces_ptr = (jclass*) jvmtiMalloc(0 * sizeof(jclass));
       return JVMTI_ERROR_NONE;
@@ -2363,8 +2351,8 @@
     for (int i_index = 0; i_index < result_length; i_index += 1) {
       Klass* klass_at = interface_list->at(i_index);
       assert(klass_at->is_klass(), "interfaces must be Klass*s");
-      assert(Klass::cast(klass_at)->is_interface(), "interfaces must be interfaces");
-      oop mirror_at = Klass::cast(klass_at)->java_mirror();
+      assert(klass_at->is_interface(), "interfaces must be interfaces");
+      oop mirror_at = klass_at->java_mirror();
       Handle handle_at = Handle(current_thread, mirror_at);
       result_list[i_index] = (jclass) jni_reference(handle_at);
     }
@@ -2468,7 +2456,7 @@
     bool result = false;
     if (!java_lang_Class::is_primitive(k_mirror)) {
       Klass* k = java_lang_Class::as_Klass(k_mirror);
-      if (k != NULL && Klass::cast(k)->is_interface()) {
+      if (k != NULL && k->is_interface()) {
         result = true;
       }
     }
@@ -2487,7 +2475,7 @@
     bool result = false;
     if (!java_lang_Class::is_primitive(k_mirror)) {
       Klass* k = java_lang_Class::as_Klass(k_mirror);
-      if (k != NULL && Klass::cast(k)->oop_is_array()) {
+      if (k != NULL && k->oop_is_array()) {
         result = true;
       }
     }
@@ -2512,7 +2500,7 @@
     Klass* k = java_lang_Class::as_Klass(k_mirror);
     NULL_CHECK(k, JVMTI_ERROR_INVALID_CLASS);
 
-    oop result_oop = Klass::cast(k)->class_loader();
+    oop result_oop = k->class_loader();
     if (result_oop == NULL) {
       *classloader_ptr = (jclass) jni_reference(Handle());
       return JVMTI_ERROR_NONE;
@@ -2535,7 +2523,7 @@
     }
     Klass* k = java_lang_Class::as_Klass(k_mirror);
     NULL_CHECK(k, JVMTI_ERROR_INVALID_CLASS);
-    if (!Klass::cast(k)->oop_is_instance()) {
+    if (!k->oop_is_instance()) {
       return JVMTI_ERROR_ABSENT_INFORMATION;
     }
     char* sde = InstanceKlass::cast(k)->source_debug_extension();
@@ -2822,7 +2810,7 @@
   JavaThread* current_thread  = JavaThread::current();
 
   // does the klass have any local variable information?
-  InstanceKlass* ik = InstanceKlass::cast(method_oop->method_holder());
+  InstanceKlass* ik = method_oop->method_holder();
   if (!ik->access_flags().has_localvariable_table()) {
     return (JVMTI_ERROR_ABSENT_INFORMATION);
   }
--- a/src/share/vm/prims/jvmtiEnvBase.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/prims/jvmtiEnvBase.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -590,7 +590,7 @@
 jclass
 JvmtiEnvBase::get_jni_class_non_null(Klass* k) {
   assert(k != NULL, "k != NULL");
-  return (jclass)jni_reference(Klass::cast(k)->java_mirror());
+  return (jclass)jni_reference(k->java_mirror());
 }
 
 #ifndef JVMTI_KERNEL
@@ -1365,7 +1365,7 @@
     // Method return type signature.
     char* ty_sign = 1 + strchr(signature->as_C_string(), ')');
 
-    if (!VM_GetOrSetLocal::is_assignable(ty_sign, Klass::cast(ob_kh()), current_thread)) {
+    if (!VM_GetOrSetLocal::is_assignable(ty_sign, ob_kh(), current_thread)) {
       return JVMTI_ERROR_TYPE_MISMATCH;
     }
     *ret_ob_h = ob_h;
--- a/src/share/vm/prims/jvmtiEnvBase.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/prims/jvmtiEnvBase.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -69,7 +69,7 @@
   enum {
     JDK15_JVMTI_VERSION = JVMTI_VERSION_1_0 +  33,  /* version: 1.0.33  */
     JDK16_JVMTI_VERSION = JVMTI_VERSION_1_1 + 102,  /* version: 1.1.102 */
-    JDK17_JVMTI_VERSION = JVMTI_VERSION_1_2 +   1   /* version: 1.2.1   */
+    JDK17_JVMTI_VERSION = JVMTI_VERSION_1_2 +   2   /* version: 1.2.2   */
   };
 
   static jvmtiPhase  get_phase()                    { return _phase; }
--- a/src/share/vm/prims/jvmtiExport.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/prims/jvmtiExport.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -196,7 +196,7 @@
   jobject to_jobject(oop obj) { return JNIHandles::make_local(_thread,obj); }
 #endif
 
-  jclass to_jclass(Klass* klass) { return (klass == NULL ? NULL : (jclass)to_jobject(Klass::cast(klass)->java_mirror())); }
+  jclass to_jclass(Klass* klass) { return (klass == NULL ? NULL : (jclass)to_jobject(klass->java_mirror())); }
 
   jmethodID to_jmethodID(methodHandle method) { return method->jmethod_id(); }
 
@@ -920,7 +920,7 @@
     if (ets->is_enabled(JVMTI_EVENT_CLASS_LOAD)) {
       EVT_TRACE(JVMTI_EVENT_CLASS_LOAD, ("JVMTI [%s] Evt Class Load sent %s",
                                          JvmtiTrace::safe_get_thread_name(thread),
-                                         kh()==NULL? "NULL" : Klass::cast(kh())->external_name() ));
+                                         kh()==NULL? "NULL" : kh()->external_name() ));
 
       JvmtiEnv *env = ets->get_env();
       JvmtiClassEventMark jem(thread, kh());
@@ -949,7 +949,7 @@
     if (ets->is_enabled(JVMTI_EVENT_CLASS_PREPARE)) {
       EVT_TRACE(JVMTI_EVENT_CLASS_PREPARE, ("JVMTI [%s] Evt Class Prepare sent %s",
                                             JvmtiTrace::safe_get_thread_name(thread),
-                                            kh()==NULL? "NULL" : Klass::cast(kh())->external_name() ));
+                                            kh()==NULL? "NULL" : kh()->external_name() ));
 
       JvmtiEnv *env = ets->get_env();
       JvmtiClassEventMark jem(thread, kh());
@@ -979,12 +979,12 @@
     for (JvmtiEnv* env = it.first(); env != NULL; env = it.next(env)) {
       if (env->is_enabled((jvmtiEvent)EXT_EVENT_CLASS_UNLOAD)) {
         EVT_TRACE(EXT_EVENT_CLASS_UNLOAD, ("JVMTI [?] Evt Class Unload sent %s",
-                  kh()==NULL? "NULL" : Klass::cast(kh())->external_name() ));
+                  kh()==NULL? "NULL" : kh()->external_name() ));
 
         // do everything manually, since this is a proxy - needs special care
         JNIEnv* jni_env = real_thread->jni_environment();
         jthread jt = (jthread)JNIHandles::make_local(real_thread, real_thread->threadObj());
-        jclass jk = (jclass)JNIHandles::make_local(real_thread, Klass::cast(kh())->java_mirror());
+        jclass jk = (jclass)JNIHandles::make_local(real_thread, kh()->java_mirror());
 
         // Before we call the JVMTI agent, we have to set the state in the
         // thread for which we are proxying.
@@ -1305,15 +1305,21 @@
         vframeStream st(thread);
         assert(!st.at_end(), "cannot be at end");
         Method* current_method = NULL;
+        // A GC may occur during the Method::fast_exception_handler_bci_for()
+        // call below if it needs to load the constraint class. Using a
+        // methodHandle to keep the 'current_method' from being deallocated
+        // if GC happens.
+        methodHandle current_mh = methodHandle(thread, current_method);
         int current_bci = -1;
         do {
           current_method = st.method();
+          current_mh = methodHandle(thread, current_method);
           current_bci = st.bci();
           do {
             should_repeat = false;
             KlassHandle eh_klass(thread, exception_handle()->klass());
-            current_bci = current_method->fast_exception_handler_bci_for(
-              eh_klass, current_bci, THREAD);
+            current_bci = Method::fast_exception_handler_bci_for(
+              current_mh, eh_klass, current_bci, THREAD);
             if (HAS_PENDING_EXCEPTION) {
               exception_handle = Handle(thread, PENDING_EXCEPTION);
               CLEAR_PENDING_EXCEPTION;
@@ -1328,8 +1334,7 @@
           catch_jmethodID = 0;
           current_bci = 0;
         } else {
-          catch_jmethodID = jem.to_jmethodID(
-                                     methodHandle(thread, current_method));
+          catch_jmethodID = jem.to_jmethodID(current_mh);
         }
 
         JvmtiJavaThreadEventTransition jet(thread);
@@ -2121,7 +2126,7 @@
     if (env->is_enabled(JVMTI_EVENT_VM_OBJECT_ALLOC)) {
       EVT_TRACE(JVMTI_EVENT_VM_OBJECT_ALLOC, ("JVMTI [%s] Evt vmobject alloc sent %s",
                                          JvmtiTrace::safe_get_thread_name(thread),
-                                         object==NULL? "NULL" : Klass::cast(java_lang_Class::as_Klass(object))->external_name()));
+                                         object==NULL? "NULL" : java_lang_Class::as_Klass(object)->external_name()));
 
       JvmtiVMObjectAllocEventMark jem(thread, h());
       JvmtiJavaThreadEventTransition jet(thread);
@@ -2177,7 +2182,7 @@
 jint JvmtiExport::load_agent_library(AttachOperation* op, outputStream* st) {
   char ebuf[1024];
   char buffer[JVM_MAXPATHLEN];
-  void* library;
+  void* library = NULL;
   jint result = JNI_ERR;
 
   // get agent name and options
@@ -2196,13 +2201,16 @@
     library = os::dll_load(agent, ebuf, sizeof ebuf);
   } else {
     // Try to load the agent from the standard dll directory
-    os::dll_build_name(buffer, sizeof(buffer), Arguments::get_dll_dir(), agent);
-    library = os::dll_load(buffer, ebuf, sizeof ebuf);
+    if (os::dll_build_name(buffer, sizeof(buffer), Arguments::get_dll_dir(),
+                           agent)) {
+      library = os::dll_load(buffer, ebuf, sizeof ebuf);
+    }
     if (library == NULL) {
       // not found - try local path
       char ns[1] = {0};
-      os::dll_build_name(buffer, sizeof(buffer), ns, agent);
-      library = os::dll_load(buffer, ebuf, sizeof ebuf);
+      if (os::dll_build_name(buffer, sizeof(buffer), ns, agent)) {
+        library = os::dll_load(buffer, ebuf, sizeof ebuf);
+      }
     }
   }
 
--- a/src/share/vm/prims/jvmtiExport.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/prims/jvmtiExport.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -64,6 +64,8 @@
 //
 class JvmtiExport : public AllStatic {
   friend class VMStructs;
+  friend class CompileReplay;
+
  private:
 
 #if INCLUDE_JVMTI
--- a/src/share/vm/prims/jvmtiGetLoadedClasses.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/prims/jvmtiGetLoadedClasses.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -169,7 +169,7 @@
   static void increment(Klass* k) {
     JvmtiGetLoadedClassesClosure* that = JvmtiGetLoadedClassesClosure::get_this();
     if (that->get_initiatingLoader() == NULL) {
-      for (Klass* l = k; l != NULL; l = Klass::cast(l)->array_klass_or_null()) {
+      for (Klass* l = k; l != NULL; l = l->array_klass_or_null()) {
         that->set_count(that->get_count() + 1);
       }
     } else if (k != NULL) {
@@ -182,7 +182,7 @@
     JvmtiGetLoadedClassesClosure* that = JvmtiGetLoadedClassesClosure::get_this();
     oop class_loader = loader_data->class_loader();
     if (class_loader == JNIHandles::resolve(that->get_initiatingLoader())) {
-      for (Klass* l = k; l != NULL; l = Klass::cast(l)->array_klass_or_null()) {
+      for (Klass* l = k; l != NULL; l = l->array_klass_or_null()) {
         that->set_count(that->get_count() + 1);
       }
     }
@@ -200,14 +200,14 @@
     JvmtiGetLoadedClassesClosure* that = JvmtiGetLoadedClassesClosure::get_this();
     if (that->available()) {
       if (that->get_initiatingLoader() == NULL) {
-        for (Klass* l = k; l != NULL; l = Klass::cast(l)->array_klass_or_null()) {
-          oop mirror = Klass::cast(l)->java_mirror();
+        for (Klass* l = k; l != NULL; l = l->array_klass_or_null()) {
+          oop mirror = l->java_mirror();
           that->set_element(that->get_index(), mirror);
           that->set_index(that->get_index() + 1);
         }
       } else if (k != NULL) {
         // if initiating loader not null, just include the instance with 1 dimension
-        oop mirror = Klass::cast(k)->java_mirror();
+        oop mirror = k->java_mirror();
         that->set_element(that->get_index(), mirror);
         that->set_index(that->get_index() + 1);
       }
@@ -219,8 +219,8 @@
     if (that->available()) {
       oop class_loader = loader_data->class_loader();
       if (class_loader == JNIHandles::resolve(that->get_initiatingLoader())) {
-        for (Klass* l = k; l != NULL; l = Klass::cast(l)->array_klass_or_null()) {
-          oop mirror = Klass::cast(l)->java_mirror();
+        for (Klass* l = k; l != NULL; l = l->array_klass_or_null()) {
+          oop mirror = l->java_mirror();
           that->set_element(that->get_index(), mirror);
           that->set_index(that->get_index() + 1);
         }
@@ -234,7 +234,7 @@
   static void increment_for_basic_type_arrays(Klass* k) {
     JvmtiGetLoadedClassesClosure* that = JvmtiGetLoadedClassesClosure::get_this();
     assert(that != NULL, "no JvmtiGetLoadedClassesClosure");
-    for (Klass* l = k; l != NULL; l = Klass::cast(l)->array_klass_or_null()) {
+    for (Klass* l = k; l != NULL; l = l->array_klass_or_null()) {
       that->set_count(that->get_count() + 1);
     }
   }
@@ -244,8 +244,8 @@
     JvmtiGetLoadedClassesClosure* that = JvmtiGetLoadedClassesClosure::get_this();
     assert(that != NULL, "no JvmtiGetLoadedClassesClosure");
     assert(that->available(), "no list");
-    for (Klass* l = k; l != NULL; l = Klass::cast(l)->array_klass_or_null()) {
-      oop mirror = Klass::cast(l)->java_mirror();
+    for (Klass* l = k; l != NULL; l = l->array_klass_or_null()) {
+      oop mirror = l->java_mirror();
       that->set_element(that->get_index(), mirror);
       that->set_index(that->get_index() + 1);
     }
--- a/src/share/vm/prims/jvmtiImpl.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/prims/jvmtiImpl.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -41,22 +41,11 @@
 #include "runtime/os.hpp"
 #include "runtime/serviceThread.hpp"
 #include "runtime/signature.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/vframe.hpp"
 #include "runtime/vframe_hp.hpp"
 #include "runtime/vm_operations.hpp"
 #include "utilities/exceptions.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 
 //
 // class JvmtiAgentThread
@@ -641,14 +630,14 @@
   int super_depth = klass->super_depth();
   int idx;
   for (idx = 0; idx < super_depth; idx++) {
-    if (Klass::cast(klass->primary_super_of_depth(idx))->name() == ty_sym) {
+    if (klass->primary_super_of_depth(idx)->name() == ty_sym) {
       return true;
     }
   }
   // Compare secondary supers
   Array<Klass*>* sec_supers = klass->secondary_supers();
   for (idx = 0; idx < sec_supers->length(); idx++) {
-    if (Klass::cast((Klass*) sec_supers->at(idx))->name() == ty_sym) {
+    if (((Klass*) sec_supers->at(idx))->name() == ty_sym) {
       return true;
     }
   }
@@ -726,7 +715,7 @@
     KlassHandle ob_kh = KlassHandle(cur_thread, obj->klass());
     NULL_CHECK(ob_kh, (_result = JVMTI_ERROR_INVALID_OBJECT, false));
 
-    if (!is_assignable(signature, Klass::cast(ob_kh()), cur_thread)) {
+    if (!is_assignable(signature, ob_kh(), cur_thread)) {
       _result = JVMTI_ERROR_TYPE_MISMATCH;
       return false;
     }
--- a/src/share/vm/prims/jvmtiRedefineClasses.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/prims/jvmtiRedefineClasses.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -222,7 +222,7 @@
   }
   Klass* the_class_oop = java_lang_Class::as_Klass(klass_mirror);
   // classes for arrays cannot be redefined
-  if (the_class_oop == NULL || !Klass::cast(the_class_oop)->oop_is_instance()) {
+  if (the_class_oop == NULL || !the_class_oop->oop_is_instance()) {
     return false;
   }
   return true;
@@ -573,8 +573,8 @@
   // Check for NULL superclass first since this might be java.lang.Object
   if (the_class->super() != scratch_class->super() &&
       (the_class->super() == NULL || scratch_class->super() == NULL ||
-       Klass::cast(the_class->super())->name() !=
-       Klass::cast(scratch_class->super())->name())) {
+       the_class->super()->name() !=
+       scratch_class->super()->name())) {
     return JVMTI_ERROR_UNSUPPORTED_REDEFINITION_HIERARCHY_CHANGED;
   }
 
@@ -592,8 +592,8 @@
     return JVMTI_ERROR_UNSUPPORTED_REDEFINITION_HIERARCHY_CHANGED;
   }
   for (i = 0; i < n_intfs; i++) {
-    if (Klass::cast(k_interfaces->at(i))->name() !=
-        Klass::cast(k_new_interfaces->at(i))->name()) {
+    if (k_interfaces->at(i)->name() !=
+        k_new_interfaces->at(i)->name()) {
       return JVMTI_ERROR_UNSUPPORTED_REDEFINITION_HIERARCHY_CHANGED;
     }
   }
@@ -1043,7 +1043,7 @@
 
     Rewriter::rewrite(scratch_class, THREAD);
     if (!HAS_PENDING_EXCEPTION) {
-      Rewriter::relocate_and_link(scratch_class, THREAD);
+      scratch_class->link_methods(THREAD);
     }
     if (HAS_PENDING_EXCEPTION) {
       Symbol* ex_name = PENDING_EXCEPTION->klass()->name();
@@ -1334,20 +1334,8 @@
     return JVMTI_ERROR_INTERNAL;
   }
 
-  int orig_length = old_cp->orig_length();
-  if (orig_length == 0) {
-    // This old_cp is an actual original constant pool. We save
-    // the original length in the merged constant pool so that
-    // merge_constant_pools() can be more efficient. If a constant
-    // pool has a non-zero orig_length() value, then that constant
-    // pool was created by a merge operation in RedefineClasses.
-    merge_cp->set_orig_length(old_cp->length());
-  } else {
-    // This old_cp is a merged constant pool from a previous
-    // RedefineClasses() calls so just copy the orig_length()
-    // value.
-    merge_cp->set_orig_length(old_cp->orig_length());
-  }
+  // Update the version number of the constant pool
+  merge_cp->increment_and_save_version(old_cp->version());
 
   ResourceMark rm(THREAD);
   _index_map_count = 0;
@@ -2417,18 +2405,19 @@
        int scratch_cp_length, TRAPS) {
   assert(scratch_cp->length() >= scratch_cp_length, "sanity check");
 
-    // scratch_cp is a merged constant pool and has enough space for a
-    // worst case merge situation. We want to associate the minimum
-    // sized constant pool with the klass to save space.
-    constantPoolHandle smaller_cp(THREAD,
-    ConstantPool::allocate(loader_data, scratch_cp_length,
-                                   THREAD));
-    // preserve orig_length() value in the smaller copy
-    int orig_length = scratch_cp->orig_length();
-    assert(orig_length != 0, "sanity check");
-    smaller_cp->set_orig_length(orig_length);
-    scratch_cp->copy_cp_to(1, scratch_cp_length - 1, smaller_cp, 1, THREAD);
-    scratch_cp = smaller_cp;
+  // scratch_cp is a merged constant pool and has enough space for a
+  // worst case merge situation. We want to associate the minimum
+  // sized constant pool with the klass to save space.
+  constantPoolHandle smaller_cp(THREAD,
+          ConstantPool::allocate(loader_data, scratch_cp_length, THREAD));
+
+  // preserve version() value in the smaller copy
+  int version = scratch_cp->version();
+  assert(version != 0, "sanity check");
+  smaller_cp->set_version(version);
+
+  scratch_cp->copy_cp_to(1, scratch_cp_length - 1, smaller_cp, 1, THREAD);
+  scratch_cp = smaller_cp;
 
   // attach new constant pool to klass
   scratch_cp->set_pool_holder(scratch_class());
@@ -2684,7 +2673,7 @@
     // interface, then we have to call adjust_method_entries() for
     // every InstanceKlass that has an itable since there isn't a
     // subclass relationship between an interface and an InstanceKlass.
-    if (ik->itable_length() > 0 && (Klass::cast(_the_class_oop)->is_interface()
+    if (ik->itable_length() > 0 && (_the_class_oop->is_interface()
         || ik->is_subclass_of(_the_class_oop))) {
       // ik->itable() creates a wrapper object; rm cleans it up
       ResourceMark rm(THREAD);
@@ -2929,7 +2918,7 @@
                                      Symbol* signature) {
     TempNewSymbol name_symbol = SymbolTable::probe(name_str, (int)name_len);
     if (name_symbol != NULL) {
-      Method* method = Klass::cast(the_class())->lookup_method(name_symbol, signature);
+      Method* method = the_class()->lookup_method(name_symbol, signature);
       if (method != NULL) {
         // Even if prefixed, intermediate methods must exist.
         if (method->is_native()) {
@@ -3338,7 +3327,20 @@
     the_class->set_access_flags(flags);
   }
 
-  // Replace annotation fields value
+  // Since there is currently no rewriting of type annotations indexes
+  // into the CP, we null out type annotations on scratch_class before
+  // we swap annotations with the_class rather than facing the
+  // possibility of shipping annotations with broken indexes to
+  // Java-land.
+  Annotations* new_annotations = scratch_class->annotations();
+  if (new_annotations != NULL) {
+    Annotations* new_type_annotations = new_annotations->type_annotations();
+    if (new_type_annotations != NULL) {
+      MetadataFactory::free_metadata(scratch_class->class_loader_data(), new_type_annotations);
+      new_annotations->set_type_annotations(NULL);
+    }
+  }
+  // Swap annotation fields values
   Annotations* old_annotations = the_class->annotations();
   the_class->set_annotations(scratch_class->annotations());
   scratch_class->set_annotations(old_annotations);
--- a/src/share/vm/prims/jvmtiTagMap.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/prims/jvmtiTagMap.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -2774,7 +2774,7 @@
 // a type array references its class
 inline bool VM_HeapWalkOperation::iterate_over_type_array(oop o) {
   Klass* k = o->klass();
-  oop mirror = Klass::cast(k)->java_mirror();
+  oop mirror = k->java_mirror();
   if (!CallbackInvoker::report_class_reference(o, mirror)) {
     return false;
   }
@@ -2823,7 +2823,7 @@
     // super (only if something more interesting than java.lang.Object)
     Klass* java_super = ik->java_super();
     if (java_super != NULL && java_super != SystemDictionary::Object_klass()) {
-      oop super = Klass::cast(java_super)->java_mirror();
+      oop super = java_super->java_mirror();
       if (!CallbackInvoker::report_superclass_reference(mirror, super)) {
         return false;
       }
@@ -2865,7 +2865,7 @@
             // If the entry is non-null it is resolved.
             if (entry == NULL) continue;
           } else {
-            entry = Klass::cast(pool->resolved_klass_at(i))->java_mirror();
+            entry = pool->resolved_klass_at(i)->java_mirror();
           }
           if (!CallbackInvoker::report_constant_pool_reference(mirror, entry, (jint)i)) {
             return false;
@@ -2879,7 +2879,7 @@
     //  but are specified by IterateOverReachableObjects and must be reported).
     Array<Klass*>* interfaces = ik->local_interfaces();
     for (i = 0; i < interfaces->length(); i++) {
-      oop interf = Klass::cast((Klass*)interfaces->at(i))->java_mirror();
+      oop interf = ((Klass*)interfaces->at(i))->java_mirror();
       if (interf == NULL) {
         continue;
       }
@@ -2928,7 +2928,7 @@
 // references from the class).
 inline bool VM_HeapWalkOperation::iterate_over_object(oop o) {
   // reference to the class
-  if (!CallbackInvoker::report_class_reference(o, Klass::cast(o->klass())->java_mirror())) {
+  if (!CallbackInvoker::report_class_reference(o, o->klass()->java_mirror())) {
     return false;
   }
 
--- a/src/share/vm/prims/jvmtiTrace.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/prims/jvmtiTrace.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -292,7 +292,7 @@
   if (k_oop == NULL) {
     return "INVALID";
   }
-  return Klass::cast(k_oop)->external_name();
+  return k_oop->external_name();
 }
 
 #endif /*JVMTI_TRACE */
--- a/src/share/vm/prims/methodHandles.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/prims/methodHandles.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -138,7 +138,7 @@
     oop clazz  = java_lang_reflect_Method::clazz(target_oop);
     int slot   = java_lang_reflect_Method::slot(target_oop);
     Klass* k = java_lang_Class::as_Klass(clazz);
-    if (k != NULL && Klass::cast(k)->oop_is_instance()) {
+    if (k != NULL && k->oop_is_instance()) {
       Method* m = InstanceKlass::cast(k)->method_with_idnum(slot);
       return init_method_MemberName(mname_oop, m, true, k);
     }
@@ -146,7 +146,7 @@
     oop clazz  = java_lang_reflect_Constructor::clazz(target_oop);
     int slot   = java_lang_reflect_Constructor::slot(target_oop);
     Klass* k = java_lang_Class::as_Klass(clazz);
-    if (k != NULL && Klass::cast(k)->oop_is_instance()) {
+    if (k != NULL && k->oop_is_instance()) {
       Method* m = InstanceKlass::cast(k)->method_with_idnum(slot);
       return init_method_MemberName(mname_oop, m, false, k);
     }
@@ -187,14 +187,14 @@
   } else if (mods.is_static()) {
     flags |= IS_METHOD | (JVM_REF_invokeStatic << REFERENCE_KIND_SHIFT);
   } else if (receiver_limit != mklass &&
-             !Klass::cast(receiver_limit)->is_subtype_of(mklass)) {
+             !receiver_limit->is_subtype_of(mklass)) {
     return NULL;  // bad receiver limit
-  } else if (Klass::cast(receiver_limit)->is_interface() &&
-             Klass::cast(mklass)->is_interface()) {
+  } else if (receiver_limit->is_interface() &&
+             mklass->is_interface()) {
     flags |= IS_METHOD | (JVM_REF_invokeInterface << REFERENCE_KIND_SHIFT);
     receiver_limit = mklass;  // ignore passed-in limit; interfaces are interconvertible
     vmindex = klassItable::compute_itable_index(m);
-  } else if (mklass != receiver_limit && Klass::cast(mklass)->is_interface()) {
+  } else if (mklass != receiver_limit && mklass->is_interface()) {
     flags |= IS_METHOD | (JVM_REF_invokeVirtual << REFERENCE_KIND_SHIFT);
     // it is a miranda method, so m->vtable_index is not what we want
     ResourceMark rm;
@@ -210,7 +210,7 @@
   java_lang_invoke_MemberName::set_flags(mname_oop,    flags);
   java_lang_invoke_MemberName::set_vmtarget(mname_oop, m);
   java_lang_invoke_MemberName::set_vmindex(mname_oop,  vmindex);   // vtable/itable index
-  java_lang_invoke_MemberName::set_clazz(mname_oop,    Klass::cast(receiver_limit)->java_mirror());
+  java_lang_invoke_MemberName::set_clazz(mname_oop,    receiver_limit->java_mirror());
   // Note:  name and type can be lazily computed by resolve_MemberName,
   // if Java code needs them as resolved String and MethodType objects.
   // The clazz must be eagerly stored, because it provides a GC
@@ -233,7 +233,7 @@
   methodHandle m = info.resolved_method();
   KlassHandle defc = info.resolved_klass();
   int vmindex = -1;
-  if (defc->is_interface() && Klass::cast(m->method_holder())->is_interface()) {
+  if (defc->is_interface() && m->method_holder()->is_interface()) {
     // LinkResolver does not report itable indexes!  (fix this?)
     vmindex = klassItable::compute_itable_index(m());
   } else if (m->can_be_statically_bound()) {
@@ -258,7 +258,7 @@
   java_lang_invoke_MemberName::set_flags(mname_oop,    flags);
   java_lang_invoke_MemberName::set_vmtarget(mname_oop, vmtarget);
   java_lang_invoke_MemberName::set_vmindex(mname_oop,  vmindex);
-  java_lang_invoke_MemberName::set_clazz(mname_oop,    Klass::cast(field_holder)->java_mirror());
+  java_lang_invoke_MemberName::set_clazz(mname_oop,    field_holder->java_mirror());
   if (name != NULL)
     java_lang_invoke_MemberName::set_name(mname_oop,   name);
   if (type != NULL)
@@ -299,7 +299,7 @@
   // The following test will fail spuriously during bootstrap of MethodHandle itself:
   //    if (klass != SystemDictionary::MethodHandle_klass())
   // Test the name instead:
-  if (Klass::cast(klass)->name() != vmSymbols::java_lang_invoke_MethodHandle())
+  if (klass->name() != vmSymbols::java_lang_invoke_MethodHandle())
     return false;
   Symbol* poly_sig = vmSymbols::object_array_object_signature();
   Method* m = InstanceKlass::cast(klass)->find_method(name, poly_sig);
@@ -363,7 +363,7 @@
 
 vmIntrinsics::ID MethodHandles::signature_polymorphic_name_id(Klass* klass, Symbol* name) {
   if (klass != NULL &&
-      Klass::cast(klass)->name() == vmSymbols::java_lang_invoke_MethodHandle()) {
+      klass->name() == vmSymbols::java_lang_invoke_MethodHandle()) {
     vmIntrinsics::ID iid = signature_polymorphic_name_id(name);
     if (iid != vmIntrinsics::_none)
       return iid;
@@ -539,7 +539,7 @@
 
 
 static oop object_java_mirror() {
-  return Klass::cast(SystemDictionary::Object_klass())->java_mirror();
+  return SystemDictionary::Object_klass()->java_mirror();
 }
 
 static oop field_name_or_null(Symbol* s) {
@@ -560,9 +560,9 @@
     if (s == vmSymbols::object_signature()) {
       return object_java_mirror();
     } else if (s == vmSymbols::class_signature()) {
-      return Klass::cast(SystemDictionary::Class_klass())->java_mirror();
+      return SystemDictionary::Class_klass()->java_mirror();
     } else if (s == vmSymbols::string_signature()) {
-      return Klass::cast(SystemDictionary::String_klass())->java_mirror();
+      return SystemDictionary::String_klass()->java_mirror();
     }
   }
   return NULL;
@@ -603,8 +603,8 @@
   {
     Klass* defc_klass = java_lang_Class::as_Klass(defc_oop());
     if (defc_klass == NULL)  return empty;  // a primitive; no resolution possible
-    if (!Klass::cast(defc_klass)->oop_is_instance()) {
-      if (!Klass::cast(defc_klass)->oop_is_array())  return empty;
+    if (!defc_klass->oop_is_instance()) {
+      if (!defc_klass->oop_is_array())  return empty;
       defc_klass = SystemDictionary::Object_klass();
     }
     defc = instanceKlassHandle(THREAD, defc_klass);
@@ -749,8 +749,8 @@
       DEBUG_ONLY(vmtarget = NULL);  // safety
       if (m.is_null())  break;
       if (!have_defc) {
-        Klass* defc = m->method_holder();
-        java_lang_invoke_MemberName::set_clazz(mname(), Klass::cast(defc)->java_mirror());
+        InstanceKlass* defc = m->method_holder();
+        java_lang_invoke_MemberName::set_clazz(mname(), defc->java_mirror());
       }
       if (!have_name) {
         //not java_lang_String::create_from_symbol; let's intern member names
@@ -767,7 +767,7 @@
     {
       // This is taken from LinkResolver::resolve_field, sans access checks.
       assert(vmtarget->is_klass(), "field vmtarget is Klass*");
-      if (!Klass::cast((Klass*) vmtarget)->oop_is_instance())  break;
+      if (!((Klass*) vmtarget)->oop_is_instance())  break;
       instanceKlassHandle defc(THREAD, (Klass*) vmtarget);
       DEBUG_ONLY(vmtarget = NULL);  // safety
       bool is_static = ((flags & JVM_ACC_STATIC) != 0);
@@ -805,7 +805,7 @@
 
   // %%% take caller into account!
 
-  if (k == NULL || !Klass::cast(k)->oop_is_instance())  return -1;
+  if (k == NULL || !k->oop_is_instance())  return -1;
 
   int rfill = 0, rlimit = results->length(), rskip = skip;
   // overflow measurement:
@@ -1032,7 +1032,7 @@
       if (!Reflection::verify_class_access(caller,
                                            reference_klass,
                                            true)) {
-        THROW_MSG_NULL(vmSymbols::java_lang_InternalError(), Klass::cast(reference_klass)->external_name());
+        THROW_MSG_NULL(vmSymbols::java_lang_InternalError(), reference_klass->external_name());
       }
     }
   }
@@ -1108,7 +1108,7 @@
   if (vmtarget == NULL) {
     x = NULL;
   } else if (vmtarget->is_klass()) {
-    x = Klass::cast((Klass*) vmtarget)->java_mirror();
+    x = ((Klass*) vmtarget)->java_mirror();
   } else if (vmtarget->is_method()) {
     Handle mname2 = MethodHandles::new_MemberName(CHECK_NULL);
     x = MethodHandles::init_method_MemberName(mname2(), (Method*)vmtarget, false, NULL);
@@ -1168,8 +1168,8 @@
     // Walk all nmethods depending on this call site.
     MutexLocker mu(Compile_lock, thread);
     Universe::flush_dependents_on(call_site, target);
+    java_lang_invoke_CallSite::set_target(call_site(), target());
   }
-  java_lang_invoke_CallSite::set_target(call_site(), target());
 }
 JVM_END
 
@@ -1180,8 +1180,8 @@
     // Walk all nmethods depending on this call site.
     MutexLocker mu(Compile_lock, thread);
     Universe::flush_dependents_on(call_site, target);
+    java_lang_invoke_CallSite::set_target_volatile(call_site(), target());
   }
-  java_lang_invoke_CallSite::set_target_volatile(call_site(), target());
 }
 JVM_END
 
@@ -1237,7 +1237,7 @@
   if (SystemDictionary::MethodHandle_klass() == NULL) {
     enable_MH = false;
   } else {
-    oop mirror = Klass::cast(SystemDictionary::MethodHandle_klass())->java_mirror();
+    oop mirror = SystemDictionary::MethodHandle_klass()->java_mirror();
     MH_class = (jclass) JNIHandles::make_local(env, mirror);
   }
 
--- a/src/share/vm/prims/nativeLookup.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/prims/nativeLookup.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -165,8 +165,7 @@
   // Note: It is critical for bootstrapping that Java_java_lang_ClassLoader_00024NativeLibrary_find
   // gets found the first time around - otherwise an infinite loop can occure. This is
   // another VM/library dependency
-  Handle loader(THREAD,
-                InstanceKlass::cast(method->method_holder())->class_loader());
+  Handle loader(THREAD, method->method_holder()->class_loader());
   if (loader.is_null()) {
     entry = lookup_special_native(jni_name);
     if (entry == NULL) {
@@ -350,7 +349,7 @@
     TempNewSymbol wrapper_symbol = SymbolTable::probe(wrapper_name, wrapper_name_len);
     if (wrapper_symbol != NULL) {
       KlassHandle kh(method->method_holder());
-      Method* wrapper_method = Klass::cast(kh())->lookup_method(wrapper_symbol,
+      Method* wrapper_method = kh()->lookup_method(wrapper_symbol,
                                                                   method->signature());
       if (wrapper_method != NULL && !wrapper_method->is_native()) {
         // we found a wrapper method, use its native entry
@@ -393,7 +392,7 @@
     if (PrintJNIResolving) {
       ResourceMark rm(THREAD);
       tty->print_cr("[Dynamic-linking native method %s.%s ... JNI]",
-        Klass::cast(method->method_holder())->external_name(),
+        method->method_holder()->external_name(),
         method->name()->as_C_string());
     }
   }
--- a/src/share/vm/prims/unsafe.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/prims/unsafe.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -124,6 +124,8 @@
       assert((void*)p->obj_field_addr<oop>((jint)byte_offset) == ptr_plus_disp,
              "raw [ptr+disp] must be consistent with oop::field_base");
     }
+    jlong p_size = HeapWordSize * (jlong)(p->size());
+    assert(byte_offset < p_size, err_msg("Unsafe access: offset " INT64_FORMAT " > object's size " INT64_FORMAT, byte_offset, p_size));
   }
 #endif
   if (sizeof(char*) == sizeof(jint))    // (this constant folds!)
@@ -466,6 +468,21 @@
 #endif
 UNSAFE_END
 
+UNSAFE_ENTRY(void, Unsafe_LoadFence(JNIEnv *env, jobject unsafe))
+  UnsafeWrapper("Unsafe_LoadFence");
+  OrderAccess::acquire();
+UNSAFE_END
+
+UNSAFE_ENTRY(void, Unsafe_StoreFence(JNIEnv *env, jobject unsafe))
+  UnsafeWrapper("Unsafe_StoreFence");
+  OrderAccess::release();
+UNSAFE_END
+
+UNSAFE_ENTRY(void, Unsafe_FullFence(JNIEnv *env, jobject unsafe))
+  UnsafeWrapper("Unsafe_FullFence");
+  OrderAccess::fence();
+UNSAFE_END
+
 ////// Data in the C heap.
 
 // Note:  These do not throw NullPointerException for bad pointers.
@@ -769,7 +786,7 @@
   oop mirror = JNIHandles::resolve_non_null(clazz);
 
   Klass* klass = java_lang_Class::as_Klass(mirror);
-  if (klass != NULL && Klass::cast(klass)->should_be_initialized()) {
+  if (klass != NULL && klass->should_be_initialized()) {
     InstanceKlass* k = InstanceKlass::cast(klass);
     k->initialize(CHECK);
   }
@@ -783,7 +800,7 @@
   }
   oop mirror = JNIHandles::resolve_non_null(clazz);
   Klass* klass = java_lang_Class::as_Klass(mirror);
-  if (klass != NULL && Klass::cast(klass)->should_be_initialized()) {
+  if (klass != NULL && klass->should_be_initialized()) {
     return true;
   }
   return false;
@@ -994,7 +1011,7 @@
 // not just a literal string.  For such ldc instructions, the verifier uses the
 // type Object instead of String, if the loaded constant is not in fact a String.
 
-static oop
+static instanceKlassHandle
 Unsafe_DefineAnonymousClass_impl(JNIEnv *env,
                                  jclass host_class, jbyteArray data, jobjectArray cp_patches_jh,
                                  HeapWord* *temp_alloc,
@@ -1071,32 +1088,39 @@
     anon_klass = instanceKlassHandle(THREAD, anonk);
   }
 
-  // let caller initialize it as needed...
-
-  return anon_klass->java_mirror();
+  return anon_klass;
 }
 
 UNSAFE_ENTRY(jclass, Unsafe_DefineAnonymousClass(JNIEnv *env, jobject unsafe, jclass host_class, jbyteArray data, jobjectArray cp_patches_jh))
 {
+  instanceKlassHandle anon_klass;
+  jobject res_jh = NULL;
+
   UnsafeWrapper("Unsafe_DefineAnonymousClass");
   ResourceMark rm(THREAD);
 
   HeapWord* temp_alloc = NULL;
 
-  jobject res_jh = NULL;
-
-  { oop res_oop = Unsafe_DefineAnonymousClass_impl(env,
-                                                   host_class, data, cp_patches_jh,
+  anon_klass = Unsafe_DefineAnonymousClass_impl(env, host_class, data,
+                                                cp_patches_jh,
                                                    &temp_alloc, THREAD);
-    if (res_oop != NULL)
-      res_jh = JNIHandles::make_local(env, res_oop);
-  }
+  if (anon_klass() != NULL)
+    res_jh = JNIHandles::make_local(env, anon_klass->java_mirror());
 
   // try/finally clause:
   if (temp_alloc != NULL) {
     FREE_C_HEAP_ARRAY(HeapWord, temp_alloc, mtInternal);
   }
 
+  // The anonymous class loader data has been artificially been kept alive to
+  // this point.   The mirror and any instances of this class have to keep
+  // it alive afterwards.
+  if (anon_klass() != NULL) {
+    anon_klass->class_loader_data()->set_keep_alive(false);
+  }
+
+  // let caller initialize it as needed...
+
   return (jclass) res_jh;
 }
 UNSAFE_END
@@ -1541,6 +1565,9 @@
     {CC"putOrderedObject",   CC"("OBJ"J"OBJ")V",         FN_PTR(Unsafe_SetOrderedObject)},
     {CC"putOrderedInt",      CC"("OBJ"JI)V",             FN_PTR(Unsafe_SetOrderedInt)},
     {CC"putOrderedLong",     CC"("OBJ"JJ)V",             FN_PTR(Unsafe_SetOrderedLong)},
+    {CC"loadFence",          CC"()V",                    FN_PTR(Unsafe_LoadFence)},
+    {CC"storeFence",         CC"()V",                    FN_PTR(Unsafe_StoreFence)},
+    {CC"fullFence",          CC"()V",                    FN_PTR(Unsafe_FullFence)},
     {CC"park",               CC"(ZJ)V",                  FN_PTR(Unsafe_Park)},
     {CC"unpark",             CC"("OBJ")V",               FN_PTR(Unsafe_Unpark)}
 
--- a/src/share/vm/runtime/aprofiler.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/aprofiler.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -129,7 +129,7 @@
   assert(!is_active(), "AllocationProfiler cannot be active while printing profile");
 
   tty->cr();
-  tty->print_cr("Allocation profile (sizes in bytes, cutoff = %ld bytes):", cutoff * BytesPerWord);
+  tty->print_cr("Allocation profile (sizes in bytes, cutoff = " SIZE_FORMAT " bytes):", cutoff * BytesPerWord);
   tty->cr();
 
   // Print regular instance klasses and basic type array klasses
--- a/src/share/vm/runtime/arguments.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/arguments.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -24,6 +24,7 @@
 
 #include "precompiled.hpp"
 #include "classfile/javaAssertions.hpp"
+#include "classfile/symbolTable.hpp"
 #include "compiler/compilerOracle.hpp"
 #include "memory/allocation.inline.hpp"
 #include "memory/cardTableRS.hpp"
@@ -257,6 +258,7 @@
   { "MaxPermHeapExpansion", JDK_Version::jdk(8),  JDK_Version::jdk(9) },
   { "CMSRevisitStackSize",           JDK_Version::jdk(8), JDK_Version::jdk(9) },
   { "PrintRevisitStats",             JDK_Version::jdk(8), JDK_Version::jdk(9) },
+  { "UseVectoredExceptions",         JDK_Version::jdk(8), JDK_Version::jdk(9) },
 #ifdef PRODUCT
   { "DesiredMethodLimit",
                            JDK_Version::jdk_update(7, 2), JDK_Version::jdk(8) },
@@ -530,7 +532,7 @@
 // Parses a memory size specification string.
 static bool atomull(const char *s, julong* result) {
   julong n = 0;
-  int args_read = sscanf(s, os::julong_format_specifier(), &n);
+  int args_read = sscanf(s, JULONG_FORMAT, &n);
   if (args_read != 1) {
     return false;
   }
@@ -791,6 +793,10 @@
     st->print("jvm_args: "); print_jvm_args_on(st);
   }
   st->print_cr("java_command: %s", java_command() ? java_command() : "<unknown>");
+  if (_java_class_path != NULL) {
+    char* path = _java_class_path->value();
+    st->print_cr("java_class_path (initial): %s", strlen(path) == 0 ? "<not set>" : path );
+  }
   st->print_cr("Launcher Type: %s", _sun_java_launcher);
 }
 
@@ -1077,10 +1083,6 @@
   }
 }
 
-// If the user has chosen ParallelGCThreads > 0, we set UseParNewGC
-// if it's not explictly set or unset. If the user has chosen
-// UseParNewGC and not explicitly set ParallelGCThreads we
-// set it, unless this is a single cpu machine.
 void Arguments::set_parnew_gc_flags() {
   assert(!UseSerialGC && !UseParallelOldGC && !UseParallelGC && !UseG1GC,
          "control point invariant");
@@ -1089,42 +1091,41 @@
   // Turn off AdaptiveSizePolicy for parnew until it is complete.
   disable_adaptive_size_policy("UseParNewGC");
 
-  if (ParallelGCThreads == 0) {
-    FLAG_SET_DEFAULT(ParallelGCThreads,
-                     Abstract_VM_Version::parallel_worker_threads());
-    if (ParallelGCThreads == 1) {
-      FLAG_SET_DEFAULT(UseParNewGC, false);
-      FLAG_SET_DEFAULT(ParallelGCThreads, 0);
-    }
+  if (FLAG_IS_DEFAULT(ParallelGCThreads)) {
+    FLAG_SET_DEFAULT(ParallelGCThreads, Abstract_VM_Version::parallel_worker_threads());
+    assert(ParallelGCThreads > 0, "We should always have at least one thread by default");
+  } else if (ParallelGCThreads == 0) {
+    jio_fprintf(defaultStream::error_stream(),
+        "The ParNew GC can not be combined with -XX:ParallelGCThreads=0\n");
+    vm_exit(1);
+  }
+
+  // By default YoungPLABSize and OldPLABSize are set to 4096 and 1024 respectively,
+  // these settings are default for Parallel Scavenger. For ParNew+Tenured configuration
+  // we set them to 1024 and 1024.
+  // See CR 6362902.
+  if (FLAG_IS_DEFAULT(YoungPLABSize)) {
+    FLAG_SET_DEFAULT(YoungPLABSize, (intx)1024);
   }
-  if (UseParNewGC) {
-    // By default YoungPLABSize and OldPLABSize are set to 4096 and 1024 respectively,
-    // these settings are default for Parallel Scavenger. For ParNew+Tenured configuration
-    // we set them to 1024 and 1024.
-    // See CR 6362902.
-    if (FLAG_IS_DEFAULT(YoungPLABSize)) {
-      FLAG_SET_DEFAULT(YoungPLABSize, (intx)1024);
+  if (FLAG_IS_DEFAULT(OldPLABSize)) {
+    FLAG_SET_DEFAULT(OldPLABSize, (intx)1024);
+  }
+
+  // AlwaysTenure flag should make ParNew promote all at first collection.
+  // See CR 6362902.
+  if (AlwaysTenure) {
+    FLAG_SET_CMDLINE(uintx, MaxTenuringThreshold, 0);
+  }
+  // When using compressed oops, we use local overflow stacks,
+  // rather than using a global overflow list chained through
+  // the klass word of the object's pre-image.
+  if (UseCompressedOops && !ParGCUseLocalOverflow) {
+    if (!FLAG_IS_DEFAULT(ParGCUseLocalOverflow)) {
+      warning("Forcing +ParGCUseLocalOverflow: needed if using compressed references");
     }
-    if (FLAG_IS_DEFAULT(OldPLABSize)) {
-      FLAG_SET_DEFAULT(OldPLABSize, (intx)1024);
-    }
-
-    // AlwaysTenure flag should make ParNew promote all at first collection.
-    // See CR 6362902.
-    if (AlwaysTenure) {
-      FLAG_SET_CMDLINE(uintx, MaxTenuringThreshold, 0);
-    }
-    // When using compressed oops, we use local overflow stacks,
-    // rather than using a global overflow list chained through
-    // the klass word of the object's pre-image.
-    if (UseCompressedOops && !ParGCUseLocalOverflow) {
-      if (!FLAG_IS_DEFAULT(ParGCUseLocalOverflow)) {
-        warning("Forcing +ParGCUseLocalOverflow: needed if using compressed references");
-      }
-      FLAG_SET_DEFAULT(ParGCUseLocalOverflow, true);
-    }
-    assert(ParGCUseLocalOverflow || !UseCompressedOops, "Error");
+    FLAG_SET_DEFAULT(ParGCUseLocalOverflow, true);
   }
+  assert(ParGCUseLocalOverflow || !UseCompressedOops, "Error");
 }
 
 // Adjust some sizes to suit CMS and/or ParNew needs; these work well on
@@ -1325,14 +1326,14 @@
   // then a saved space from compressed oops.
   if ((int)ObjectAlignmentInBytes > 256) {
     jio_fprintf(defaultStream::error_stream(),
-                "error: ObjectAlignmentInBytes=%d must not be greater then 256\n",
+                "error: ObjectAlignmentInBytes=%d must not be greater than 256\n",
                 (int)ObjectAlignmentInBytes);
     return false;
   }
   // In case page size is very small.
   if ((int)ObjectAlignmentInBytes >= os::vm_page_size()) {
     jio_fprintf(defaultStream::error_stream(),
-                "error: ObjectAlignmentInBytes=%d must be less then page size %d\n",
+                "error: ObjectAlignmentInBytes=%d must be less than page size %d\n",
                 (int)ObjectAlignmentInBytes, os::vm_page_size());
     return false;
   }
@@ -1453,39 +1454,35 @@
 
   // If no heap maximum was requested explicitly, use some reasonable fraction
   // of the physical memory, up to a maximum of 1GB.
-  if (UseParallelGC) {
-    FLAG_SET_DEFAULT(ParallelGCThreads,
-                     Abstract_VM_Version::parallel_worker_threads());
-
-    // If InitialSurvivorRatio or MinSurvivorRatio were not specified, but the
-    // SurvivorRatio has been set, reset their default values to SurvivorRatio +
-    // 2.  By doing this we make SurvivorRatio also work for Parallel Scavenger.
-    // See CR 6362902 for details.
-    if (!FLAG_IS_DEFAULT(SurvivorRatio)) {
-      if (FLAG_IS_DEFAULT(InitialSurvivorRatio)) {
-         FLAG_SET_DEFAULT(InitialSurvivorRatio, SurvivorRatio + 2);
-      }
-      if (FLAG_IS_DEFAULT(MinSurvivorRatio)) {
-        FLAG_SET_DEFAULT(MinSurvivorRatio, SurvivorRatio + 2);
-      }
+  FLAG_SET_DEFAULT(ParallelGCThreads,
+                   Abstract_VM_Version::parallel_worker_threads());
+  if (ParallelGCThreads == 0) {
+    jio_fprintf(defaultStream::error_stream(),
+        "The Parallel GC can not be combined with -XX:ParallelGCThreads=0\n");
+    vm_exit(1);
+  }
+
+
+  // If InitialSurvivorRatio or MinSurvivorRatio were not specified, but the
+  // SurvivorRatio has been set, reset their default values to SurvivorRatio +
+  // 2.  By doing this we make SurvivorRatio also work for Parallel Scavenger.
+  // See CR 6362902 for details.
+  if (!FLAG_IS_DEFAULT(SurvivorRatio)) {
+    if (FLAG_IS_DEFAULT(InitialSurvivorRatio)) {
+       FLAG_SET_DEFAULT(InitialSurvivorRatio, SurvivorRatio + 2);
     }
-
-    if (UseParallelOldGC) {
-      // Par compact uses lower default values since they are treated as
-      // minimums.  These are different defaults because of the different
-      // interpretation and are not ergonomically set.
-      if (FLAG_IS_DEFAULT(MarkSweepDeadRatio)) {
-        FLAG_SET_DEFAULT(MarkSweepDeadRatio, 1);
-      }
+    if (FLAG_IS_DEFAULT(MinSurvivorRatio)) {
+      FLAG_SET_DEFAULT(MinSurvivorRatio, SurvivorRatio + 2);
     }
   }
-  if (UseNUMA) {
-    if (FLAG_IS_DEFAULT(MinHeapDeltaBytes)) {
-      FLAG_SET_DEFAULT(MinHeapDeltaBytes, 64*M);
+
+  if (UseParallelOldGC) {
+    // Par compact uses lower default values since they are treated as
+    // minimums.  These are different defaults because of the different
+    // interpretation and are not ergonomically set.
+    if (FLAG_IS_DEFAULT(MarkSweepDeadRatio)) {
+      FLAG_SET_DEFAULT(MarkSweepDeadRatio, 1);
     }
-    // For those collectors or operating systems (eg, Windows) that do
-    // not support full UseNUMA, we will map to UseNUMAInterleaving for now
-    UseNUMAInterleaving = true;
   }
 }
 
@@ -1501,13 +1498,12 @@
                      Abstract_VM_Version::parallel_worker_threads());
   }
 
-  if (FLAG_IS_DEFAULT(MarkStackSize)) {
-    FLAG_SET_DEFAULT(MarkStackSize, 128 * TASKQUEUE_SIZE);
-  }
-  if (PrintGCDetails && Verbose) {
-    tty->print_cr("MarkStackSize: %uk  MarkStackSizeMax: %uk",
-      MarkStackSize / K, MarkStackSizeMax / K);
-    tty->print_cr("ConcGCThreads: %u", ConcGCThreads);
+  // MarkStackSize will be set (if it hasn't been set by the user)
+  // when concurrent marking is initialized.
+  // Its value will be based upon the number of parallel marking threads.
+  // But we do set the maximum mark stack size here.
+  if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
+    FLAG_SET_DEFAULT(MarkStackSizeMax, 128 * TASKQUEUE_SIZE);
   }
 
   if (FLAG_IS_DEFAULT(GCTimeRatio) || GCTimeRatio == 0) {
@@ -1519,6 +1515,12 @@
     // is allocation). We might consider increase it further.
     FLAG_SET_DEFAULT(GCTimeRatio, 9);
   }
+
+  if (PrintGCDetails && Verbose) {
+    tty->print_cr("MarkStackSize: %uk  MarkStackSizeMax: %uk",
+      MarkStackSize / K, MarkStackSizeMax / K);
+    tty->print_cr("ConcGCThreads: %u", ConcGCThreads);
+  }
 }
 
 void Arguments::set_heap_size() {
@@ -1780,6 +1782,24 @@
   return status;
 }
 
+void Arguments::check_deprecated_gcs() {
+  if (UseConcMarkSweepGC && !UseParNewGC) {
+    warning("Using the DefNew young collector with the CMS collector is deprecated "
+        "and will likely be removed in a future release");
+  }
+
+  if (UseParNewGC && !UseConcMarkSweepGC) {
+    // !UseConcMarkSweepGC means that we are using serial old gc. Unfortunately we don't
+    // set up UseSerialGC properly, so that can't be used in the check here.
+    warning("Using the ParNew young collector with the Serial old collector is deprecated "
+        "and will likely be removed in a future release");
+  }
+
+  if (CMSIncrementalMode) {
+    warning("Using incremental CMS is deprecated and will likely be removed in a future release");
+  }
+}
+
 // Check stack pages settings
 bool Arguments::check_stack_pages()
 {
@@ -1839,6 +1859,11 @@
   status = status && verify_percentage(MinHeapFreeRatio, "MinHeapFreeRatio");
   status = status && verify_percentage(MaxHeapFreeRatio, "MaxHeapFreeRatio");
 
+  // Divide by bucket size to prevent a large size from causing rollover when
+  // calculating amount of memory needed to be allocated for the String table.
+  status = status && verify_interval(StringTableSize, defaultStringTableSize,
+    (max_uintx / StringTable::bucket_size()), "StringTable size");
+
   if (MinHeapFreeRatio > MaxHeapFreeRatio) {
     jio_fprintf(defaultStream::error_stream(),
                 "MinHeapFreeRatio (" UINTX_FORMAT ") must be less than or "
@@ -1977,6 +2002,9 @@
   status = status && verify_min_value(ClassMetaspaceSize, 1*M,
                                       "ClassMetaspaceSize");
 
+  status = status && verify_interval(MarkStackSizeMax,
+                                  1, (max_jint - 1), "MarkStackSizeMax");
+
 #ifdef SPARC
   if (UseConcMarkSweepGC || UseG1GC) {
     // Issue a stern warning if the user has explicitly set
@@ -2564,7 +2592,9 @@
          FLAG_SET_CMDLINE(uintx, MaxNewSize, NewSize);
       }
 
+#ifndef _ALLBSD_SOURCE  // UseLargePages is not yet supported on BSD.
       FLAG_SET_DEFAULT(UseLargePages, true);
+#endif
 
       // Increase some data structure sizes for efficiency
       FLAG_SET_CMDLINE(uintx, BaseFootPrintEstimate, MaxHeapSize);
@@ -2771,6 +2801,11 @@
         return JNI_EINVAL;
       }
       FLAG_SET_CMDLINE(uintx, MaxDirectMemorySize, max_direct_memory_size);
+    } else if (match_option(option, "-XX:+UseVMInterruptibleIO", &tail)) {
+      // NOTE! In JDK 9, the UseVMInterruptibleIO flag will completely go
+      //       away and will cause VM initialization failures!
+      warning("-XX:+UseVMInterruptibleIO is obsolete and will be removed in a future release.");
+      FLAG_SET_CMDLINE(bool, UseVMInterruptibleIO, true);
     } else if (match_option(option, "-XX:", &tail)) { // -XX:xxxx
       // Skip -XX:Flags= since that case has already been handled
       if (strncmp(tail, "Flags=", strlen("Flags=")) != 0) {
@@ -2786,10 +2821,6 @@
 
   // Change the default value for flags  which have different default values
   // when working with older JDKs.
-  if (JDK_Version::current().compare_major(6) <= 0 &&
-      FLAG_IS_DEFAULT(UseVMInterruptibleIO)) {
-    FLAG_SET_DEFAULT(UseVMInterruptibleIO, true);
-  }
 #ifdef LINUX
  if (JDK_Version::current().compare_major(6) <= 0 &&
       FLAG_IS_DEFAULT(UseLinuxPosixThreadCPUClocks)) {
@@ -2983,11 +3014,6 @@
     FLAG_SET_DEFAULT(UseLargePages, false);
   }
 
-  // Add 2M to any size for SharedReadOnlySize to get around the JPRT setting
-  if (DumpSharedSpaces && !FLAG_IS_DEFAULT(SharedReadOnlySize)) {
-    SharedReadOnlySize = 14*M;
-  }
-
   if (DumpSharedSpaces) {
     if (RequireSharedSpaces) {
       warning("cannot dump shared archive while using shared archive");
@@ -3024,7 +3050,6 @@
   strcpy(shared_archive_path, jvm_path);
   strcat(shared_archive_path, os::file_separator());
   strcat(shared_archive_path, "classes");
-  DEBUG_ONLY(strcat(shared_archive_path, "_g");)
   strcat(shared_archive_path, ".jsa");
   SharedArchivePath = shared_archive_path;
 
@@ -3128,6 +3153,10 @@
   UNSUPPORTED_OPTION(UseG1GC, "G1 GC");
 #endif
 
+#ifdef _ALLBSD_SOURCE  // UseLargePages is not yet supported on BSD.
+  UNSUPPORTED_OPTION(UseLargePages, "-XX:+UseLargePages");
+#endif
+
 #if !INCLUDE_ALTERNATE_GCS
   if (UseParallelGC) {
     warning("Parallel GC is not supported in this VM.  Using Serial GC.");
@@ -3229,6 +3258,7 @@
   } else if (UseG1GC) {
     set_g1_gc_flags();
   }
+  check_deprecated_gcs();
 #endif // INCLUDE_ALTERNATE_GCS
 
 #ifdef SERIALGC
@@ -3273,6 +3303,18 @@
   if (!EliminateLocks) {
     EliminateNestedLocks = false;
   }
+  if (!Inline) {
+    IncrementalInline = false;
+  }
+#ifndef PRODUCT
+  if (!IncrementalInline) {
+    AlwaysIncrementalInline = false;
+  }
+#endif
+  if (IncrementalInline && FLAG_IS_DEFAULT(MaxNodeLimit)) {
+    // incremental inlining: bump MaxNodeLimit
+    FLAG_SET_DEFAULT(MaxNodeLimit, (intx)75000);
+  }
 #endif
 
   if (PrintAssembly && FLAG_IS_DEFAULT(DebugNonSafepoints)) {
@@ -3314,6 +3356,22 @@
   return JNI_OK;
 }
 
+jint Arguments::adjust_after_os() {
+#if INCLUDE_ALTERNATE_GCS
+  if (UseParallelGC || UseParallelOldGC) {
+    if (UseNUMA) {
+      if (FLAG_IS_DEFAULT(MinHeapDeltaBytes)) {
+        FLAG_SET_DEFAULT(MinHeapDeltaBytes, 64*M);
+      }
+      // For those collectors or operating systems (eg, Windows) that do
+      // not support full UseNUMA, we will map to UseNUMAInterleaving for now
+      UseNUMAInterleaving = true;
+    }
+  }
+#endif
+  return JNI_OK;
+}
+
 int Arguments::PropertyList_count(SystemProperty* pl) {
   int count = 0;
   while(pl != NULL) {
--- a/src/share/vm/runtime/arguments.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/arguments.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -409,8 +409,11 @@
  public:
   // Parses the arguments
   static jint parse(const JavaVMInitArgs* args);
+  // Adjusts the arguments after the OS have adjusted the arguments
+  static jint adjust_after_os();
   // Check for consistency in the selection of the garbage collector.
   static bool check_gc_consistency();
+  static void check_deprecated_gcs();
   // Check consistecy or otherwise of VM argument settings
   static bool check_vm_args_consistency();
   // Check stack pages settings
--- a/src/share/vm/runtime/atomic.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/atomic.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -36,36 +36,8 @@
 #ifdef TARGET_OS_FAMILY_bsd
 # include "os_bsd.inline.hpp"
 #endif
-#ifdef TARGET_OS_ARCH_linux_x86
-# include "atomic_linux_x86.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_linux_sparc
-# include "atomic_linux_sparc.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_linux_zero
-# include "atomic_linux_zero.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_solaris_x86
-# include "atomic_solaris_x86.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_solaris_sparc
-# include "atomic_solaris_sparc.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_windows_x86
-# include "atomic_windows_x86.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_linux_arm
-# include "atomic_linux_arm.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_linux_ppc
-# include "atomic_linux_ppc.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_bsd_x86
-# include "atomic_bsd_x86.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_bsd_zero
-# include "atomic_bsd_zero.inline.hpp"
-#endif
+
+#include "runtime/atomic.inline.hpp"
 
 jbyte Atomic::cmpxchg(jbyte exchange_value, volatile jbyte* dest, jbyte compare_value) {
   assert(sizeof(jbyte) == 1, "assumption.");
--- a/src/share/vm/runtime/atomic.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/atomic.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -30,60 +30,59 @@
 class Atomic : AllStatic {
  public:
   // Atomically store to a location
-  static void store    (jbyte    store_value, jbyte*    dest);
-  static void store    (jshort   store_value, jshort*   dest);
-  static void store    (jint     store_value, jint*     dest);
-  static void store    (jlong    store_value, jlong*    dest);
-  static void store_ptr(intptr_t store_value, intptr_t* dest);
-  static void store_ptr(void*    store_value, void*     dest);
+  inline static void store    (jbyte    store_value, jbyte*    dest);
+  inline static void store    (jshort   store_value, jshort*   dest);
+  inline static void store    (jint     store_value, jint*     dest);
+  inline static void store    (jlong    store_value, jlong*    dest);
+  inline static void store_ptr(intptr_t store_value, intptr_t* dest);
+  inline static void store_ptr(void*    store_value, void*     dest);
 
-  static void store    (jbyte    store_value, volatile jbyte*    dest);
-  static void store    (jshort   store_value, volatile jshort*   dest);
-  static void store    (jint     store_value, volatile jint*     dest);
-  static void store    (jlong    store_value, volatile jlong*    dest);
-  static void store_ptr(intptr_t store_value, volatile intptr_t* dest);
-  static void store_ptr(void*    store_value, volatile void*     dest);
+  inline static void store    (jbyte    store_value, volatile jbyte*    dest);
+  inline static void store    (jshort   store_value, volatile jshort*   dest);
+  inline static void store    (jint     store_value, volatile jint*     dest);
+  inline static void store    (jlong    store_value, volatile jlong*    dest);
+  inline static void store_ptr(intptr_t store_value, volatile intptr_t* dest);
+  inline static void store_ptr(void*    store_value, volatile void*     dest);
 
-  static jlong load(volatile jlong* src);
+  inline static jlong load(volatile jlong* src);
 
   // Atomically add to a location, return updated value
-  static jint     add    (jint     add_value, volatile jint*     dest);
-  static intptr_t add_ptr(intptr_t add_value, volatile intptr_t* dest);
-  static void*    add_ptr(intptr_t add_value, volatile void*     dest);
+  inline static jint     add    (jint     add_value, volatile jint*     dest);
+  inline static intptr_t add_ptr(intptr_t add_value, volatile intptr_t* dest);
+  inline static void*    add_ptr(intptr_t add_value, volatile void*     dest);
 
-  static jlong    add    (jlong    add_value, volatile jlong*    dest);
+         static jlong    add    (jlong    add_value, volatile jlong*    dest);
 
   // Atomically increment location
-  static void inc    (volatile jint*     dest);
-  static void inc_ptr(volatile intptr_t* dest);
-  static void inc_ptr(volatile void*     dest);
+  inline static void inc    (volatile jint*     dest);
+  inline static void inc_ptr(volatile intptr_t* dest);
+  inline static void inc_ptr(volatile void*     dest);
 
   // Atomically decrement a location
-  static void dec    (volatile jint*     dest);
-  static void dec_ptr(volatile intptr_t* dest);
-  static void dec_ptr(volatile void*     dest);
+  inline static void dec    (volatile jint*     dest);
+  inline static void dec_ptr(volatile intptr_t* dest);
+  inline static void dec_ptr(volatile void*     dest);
 
   // Performs atomic exchange of *dest with exchange_value.  Returns old prior value of *dest.
-  static jint         xchg(jint     exchange_value, volatile jint*     dest);
-  static unsigned int xchg(unsigned int exchange_value,
-                           volatile unsigned int* dest);
+  inline static jint         xchg(jint         exchange_value, volatile jint*         dest);
+         static unsigned int xchg(unsigned int exchange_value, volatile unsigned int* dest);
 
-  static intptr_t xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest);
-  static void*    xchg_ptr(void*    exchange_value, volatile void*   dest);
+  inline static intptr_t xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest);
+  inline static void*    xchg_ptr(void*    exchange_value, volatile void*   dest);
 
   // Performs atomic compare of *dest and compare_value, and exchanges *dest with exchange_value
   // if the comparison succeeded.  Returns prior value of *dest.  Guarantees a two-way memory
   // barrier across the cmpxchg.  I.e., it's really a 'fence_cmpxchg_acquire'.
-  static jbyte    cmpxchg    (jbyte    exchange_value, volatile jbyte*    dest, jbyte    compare_value);
-  static jint     cmpxchg    (jint     exchange_value, volatile jint*     dest, jint     compare_value);
-  static jlong    cmpxchg    (jlong    exchange_value, volatile jlong*    dest, jlong    compare_value);
+         static jbyte    cmpxchg    (jbyte    exchange_value, volatile jbyte*    dest, jbyte    compare_value);
+  inline static jint     cmpxchg    (jint     exchange_value, volatile jint*     dest, jint     compare_value);
+  inline static jlong    cmpxchg    (jlong    exchange_value, volatile jlong*    dest, jlong    compare_value);
 
-  static unsigned int cmpxchg(unsigned int exchange_value,
-                              volatile unsigned int* dest,
-                              unsigned int compare_value);
+         static unsigned int cmpxchg(unsigned int exchange_value,
+                                     volatile unsigned int* dest,
+                                     unsigned int compare_value);
 
-  static intptr_t cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value);
-  static void*    cmpxchg_ptr(void*    exchange_value, volatile void*     dest, void*    compare_value);
+  inline static intptr_t cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value);
+  inline static void*    cmpxchg_ptr(void*    exchange_value, volatile void*     dest, void*    compare_value);
 };
 
 #endif // SHARE_VM_RUNTIME_ATOMIC_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/runtime/atomic.inline.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_RUNTIME_ATOMIC_INLINE_HPP
+#define SHARE_VM_RUNTIME_ATOMIC_INLINE_HPP
+
+#include "runtime/atomic.hpp"
+
+// Linux
+#ifdef TARGET_OS_ARCH_linux_x86
+# include "atomic_linux_x86.inline.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_linux_sparc
+# include "atomic_linux_sparc.inline.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_linux_zero
+# include "atomic_linux_zero.inline.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_linux_arm
+# include "atomic_linux_arm.inline.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_linux_ppc
+# include "atomic_linux_ppc.inline.hpp"
+#endif
+
+// Solaris
+#ifdef TARGET_OS_ARCH_solaris_x86
+# include "atomic_solaris_x86.inline.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_solaris_sparc
+# include "atomic_solaris_sparc.inline.hpp"
+#endif
+
+// Windows
+#ifdef TARGET_OS_ARCH_windows_x86
+# include "atomic_windows_x86.inline.hpp"
+#endif
+
+// BSD
+#ifdef TARGET_OS_ARCH_bsd_x86
+# include "atomic_bsd_x86.inline.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_bsd_zero
+# include "atomic_bsd_zero.inline.hpp"
+#endif
+
+#endif // SHARE_VM_RUNTIME_ATOMIC_INLINE_HPP
--- a/src/share/vm/runtime/biasedLocking.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/biasedLocking.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -39,7 +39,7 @@
 static GrowableArray<markOop>* _preserved_mark_stack = NULL;
 
 static void enable_biased_locking(Klass* k) {
-  Klass::cast(k)->set_prototype_header(markOopDesc::biased_locking_prototype());
+  k->set_prototype_header(markOopDesc::biased_locking_prototype());
 }
 
 class VM_EnableBiasedLocking: public VM_Operation {
@@ -149,7 +149,7 @@
     if (TraceBiasedLocking) {
       ResourceMark rm;
       tty->print_cr("  (Skipping revocation of object of type %s because it's no longer biased)",
-                    Klass::cast(obj->klass())->external_name());
+                    obj->klass()->external_name());
     }
     return BiasedLocking::NOT_BIASED;
   }
@@ -161,7 +161,7 @@
   if (TraceBiasedLocking && (Verbose || !is_bulk)) {
     ResourceMark rm;
     tty->print_cr("Revoking bias of object " INTPTR_FORMAT " , mark " INTPTR_FORMAT " , type %s , prototype header " INTPTR_FORMAT " , allow rebias %d , requesting thread " INTPTR_FORMAT,
-                  (intptr_t) obj, (intptr_t) mark, Klass::cast(obj->klass())->external_name(), (intptr_t) Klass::cast(obj->klass())->prototype_header(), (allow_rebias ? 1 : 0), (intptr_t) requesting_thread);
+                  (intptr_t) obj, (intptr_t) mark, obj->klass()->external_name(), (intptr_t) obj->klass()->prototype_header(), (allow_rebias ? 1 : 0), (intptr_t) requesting_thread);
   }
 
   JavaThread* biased_thread = mark->biased_locker();
@@ -326,7 +326,7 @@
     tty->print_cr("* Beginning bulk revocation (kind == %s) because of object "
                   INTPTR_FORMAT " , mark " INTPTR_FORMAT " , type %s",
                   (bulk_rebias ? "rebias" : "revoke"),
-                  (intptr_t) o, (intptr_t) o->mark(), Klass::cast(o->klass())->external_name());
+                  (intptr_t) o, (intptr_t) o->mark(), o->klass()->external_name());
   }
 
   jlong cur_time = os::javaTimeMillis();
@@ -334,7 +334,7 @@
 
 
   Klass* k_o = o->klass();
-  Klass* klass = Klass::cast(k_o);
+  Klass* klass = k_o;
 
   if (bulk_rebias) {
     // Use the epoch in the klass of the object to implicitly revoke
@@ -546,7 +546,7 @@
       return BIAS_REVOKED;
     }
   } else if (mark->has_bias_pattern()) {
-    Klass* k = Klass::cast(obj->klass());
+    Klass* k = obj->klass();
     markOop prototype_header = k->prototype_header();
     if (!prototype_header->has_bias_pattern()) {
       // This object has a stale bias from before the bulk revocation
@@ -590,7 +590,7 @@
   if (heuristics == HR_NOT_BIASED) {
     return NOT_BIASED;
   } else if (heuristics == HR_SINGLE_REVOKE) {
-    Klass *k = Klass::cast(obj->klass());
+    Klass *k = obj->klass();
     markOop prototype_header = k->prototype_header();
     if (mark->biased_locker() == THREAD &&
         prototype_header->bias_epoch() == mark->bias_epoch()) {
--- a/src/share/vm/runtime/compilationPolicy.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/compilationPolicy.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -97,6 +97,9 @@
 // This is intended to force compiles for methods (usually for
 // debugging) that would otherwise be interpreted for some reason.
 bool CompilationPolicy::must_be_compiled(methodHandle m, int comp_level) {
+  // Don't allow Xcomp to cause compiles in replay mode
+  if (ReplayCompiles) return false;
+
   if (m->has_compiled_code()) return false;       // already compiled
   if (!can_be_compiled(m, comp_level)) return false;
 
@@ -322,6 +325,16 @@
       return NULL;
     }
   }
+  if (CompileTheWorld || ReplayCompiles) {
+    // Don't trigger other compiles in testing mode
+    if (bci == InvocationEntryBci) {
+      reset_counter_for_invocation_event(method);
+    } else {
+      reset_counter_for_back_branch_event(method);
+    }
+    return NULL;
+  }
+
   if (bci == InvocationEntryBci) {
     // when code cache is full, compilation gets switched off, UseCompiler
     // is set to false
@@ -627,7 +640,7 @@
   // negative filter: should send NOT be inlined?  returns NULL (--> inline) or rejection msg
   if (m->is_abstract()) return (_msg = "abstract method");
   // note: we allow ik->is_abstract()
-  if (!InstanceKlass::cast(m->method_holder())->is_initialized()) return (_msg = "method holder not initialized");
+  if (!m->method_holder()->is_initialized()) return (_msg = "method holder not initialized");
   if (m->is_native()) return (_msg = "native method");
   nmethod* m_code = m->code();
   if (m_code != NULL && m_code->code_size() > InlineSmallCode)
--- a/src/share/vm/runtime/deoptimization.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/deoptimization.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -721,7 +721,7 @@
         guarantee(false, "wrong number of expression stack elements during deopt");
       }
       VerifyOopClosure verify;
-      iframe->oops_interpreted_do(&verify, &rm, false);
+      iframe->oops_interpreted_do(&verify, NULL, &rm, false);
       callee_size_of_parameters = mh->size_of_parameters();
       callee_max_locals = mh->max_locals();
       is_top_frame = false;
@@ -1191,12 +1191,12 @@
 
   if (!constant_pool->tag_at(index).is_symbol()) return;
 
-  Handle class_loader (THREAD, InstanceKlass::cast(constant_pool->pool_holder())->class_loader());
+  Handle class_loader (THREAD, constant_pool->pool_holder()->class_loader());
   Symbol*  symbol  = constant_pool->symbol_at(index);
 
   // class name?
   if (symbol->byte_at(0) != '(') {
-    Handle protection_domain (THREAD, Klass::cast(constant_pool->pool_holder())->protection_domain());
+    Handle protection_domain (THREAD, constant_pool->pool_holder()->protection_domain());
     SystemDictionary::resolve_or_null(symbol, class_loader, protection_domain, CHECK);
     return;
   }
@@ -1206,7 +1206,7 @@
   for (SignatureStream ss(symbol); !ss.is_done(); ss.next()) {
     if (ss.is_object()) {
       Symbol* class_name = ss.as_symbol(CHECK);
-      Handle protection_domain (THREAD, Klass::cast(constant_pool->pool_holder())->protection_domain());
+      Handle protection_domain (THREAD, constant_pool->pool_holder()->protection_domain());
       SystemDictionary::resolve_or_null(class_name, class_loader, protection_domain, CHECK);
     }
   }
@@ -1242,8 +1242,8 @@
   nmethodLocker nl(fr.pc());
 
   // Log a message
-  Events::log_deopt_message(thread, "Uncommon trap %d fr.pc " INTPTR_FORMAT,
-                            trap_request, fr.pc());
+  Events::log(thread, "Uncommon trap: trap_request=" PTR32_FORMAT " fr.pc=" INTPTR_FORMAT,
+              trap_request, fr.pc());
 
   {
     ResourceMark rm;
@@ -1274,6 +1274,11 @@
     MethodData* trap_mdo =
       get_method_data(thread, trap_method, create_if_missing);
 
+    // Log a message
+    Events::log_deopt_message(thread, "Uncommon trap: reason=%s action=%s pc=" INTPTR_FORMAT " method=%s @ %d",
+                              trap_reason_name(reason), trap_action_name(action), fr.pc(),
+                              trap_method->name_and_sig_as_C_string(), trap_bci);
+
     // Print a bunch of diagnostics, if requested.
     if (TraceDeoptimization || LogCompilation) {
       ResourceMark rm;
--- a/src/share/vm/runtime/fieldDescriptor.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/fieldDescriptor.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -36,7 +36,7 @@
 
 
 oop fieldDescriptor::loader() const {
-  return InstanceKlass::cast(_cp->pool_holder())->class_loader();
+  return _cp->pool_holder()->class_loader();
 }
 
 Symbol* fieldDescriptor::generic_signature() const {
@@ -45,7 +45,7 @@
   }
 
   int idx = 0;
-  InstanceKlass* ik = InstanceKlass::cast(field_holder());
+  InstanceKlass* ik = field_holder();
   for (AllFieldStream fs(ik); !fs.done(); fs.next()) {
     if (idx == _index) {
       return fs.generic_signature();
@@ -58,13 +58,24 @@
 }
 
 AnnotationArray* fieldDescriptor::annotations() const {
-  InstanceKlass* ik = InstanceKlass::cast(field_holder());
+  InstanceKlass* ik = field_holder();
   Array<AnnotationArray*>* md = ik->fields_annotations();
   if (md == NULL)
     return NULL;
   return md->at(index());
 }
 
+AnnotationArray* fieldDescriptor::type_annotations() const {
+  InstanceKlass* ik = field_holder();
+  Annotations* type_annos = ik->type_annotations();
+  if (type_annos == NULL)
+    return NULL;
+  Array<AnnotationArray*>* md = type_annos->fields_annotations();
+  if (md == NULL)
+    return NULL;
+  return md->at(index());
+}
+
 constantTag fieldDescriptor::initial_value_tag() const {
   return constants()->tag_at(initial_value_index());
 }
--- a/src/share/vm/runtime/fieldDescriptor.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/fieldDescriptor.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -43,12 +43,12 @@
 
   // update the access_flags for the field in the klass
   void update_klass_field_access_flag() {
-    InstanceKlass* ik = InstanceKlass::cast(field_holder());
+    InstanceKlass* ik = field_holder();
     ik->field(index())->set_access_flags(_access_flags.as_short());
   }
 
   FieldInfo* field() const {
-    InstanceKlass* ik = InstanceKlass::cast(field_holder());
+    InstanceKlass* ik = field_holder();
     return ik->field(_index);
   }
 
@@ -59,46 +59,47 @@
   Symbol* signature() const {
     return field()->signature(_cp);
   }
-  Klass* field_holder() const        { return _cp->pool_holder(); }
-  ConstantPool* constants() const    { return _cp(); }
-  AccessFlags access_flags() const     { return _access_flags; }
-  oop loader() const;
+  InstanceKlass* field_holder()   const    { return _cp->pool_holder(); }
+  ConstantPool* constants()       const    { return _cp(); }
+  AccessFlags access_flags()      const    { return _access_flags; }
+  oop loader()                    const;
   // Offset (in words) of field from start of instanceOop / Klass*
-  int offset() const                   { return field()->offset(); }
-  Symbol* generic_signature() const;
-  int index() const                    { return _index; }
-  AnnotationArray* annotations() const;
+  int offset()                    const    { return field()->offset(); }
+  Symbol* generic_signature()     const;
+  int index()                     const    { return _index; }
+  AnnotationArray* annotations()  const;
+  AnnotationArray* type_annotations()  const;
 
   // Initial field value
-  bool has_initial_value() const          { return field()->initval_index() != 0; }
-  int initial_value_index() const         { return field()->initval_index(); }
+  bool has_initial_value()        const    { return field()->initval_index() != 0; }
+  int initial_value_index()       const    { return field()->initval_index(); }
   constantTag initial_value_tag() const;  // The tag will return true on one of is_int(), is_long(), is_single(), is_double()
-  jint        int_initial_value() const;
-  jlong       long_initial_value() const;
-  jfloat      float_initial_value() const;
-  jdouble     double_initial_value() const;
-  oop         string_initial_value(TRAPS) const;
+  jint int_initial_value()        const;
+  jlong long_initial_value()      const;
+  jfloat float_initial_value()    const;
+  jdouble double_initial_value()  const;
+  oop string_initial_value(TRAPS) const;
 
   // Field signature type
-  BasicType field_type() const            { return FieldType::basic_type(signature()); }
+  BasicType field_type()          const    { return FieldType::basic_type(signature()); }
 
   // Access flags
-  bool is_public() const                  { return access_flags().is_public(); }
-  bool is_private() const                 { return access_flags().is_private(); }
-  bool is_protected() const               { return access_flags().is_protected(); }
-  bool is_package_private() const         { return !is_public() && !is_private() && !is_protected(); }
+  bool is_public()                const    { return access_flags().is_public(); }
+  bool is_private()               const    { return access_flags().is_private(); }
+  bool is_protected()             const    { return access_flags().is_protected(); }
+  bool is_package_private()       const    { return !is_public() && !is_private() && !is_protected(); }
 
-  bool is_static() const                  { return access_flags().is_static(); }
-  bool is_final() const                   { return access_flags().is_final(); }
-  bool is_volatile() const                { return access_flags().is_volatile(); }
-  bool is_transient() const               { return access_flags().is_transient(); }
+  bool is_static()                const    { return access_flags().is_static(); }
+  bool is_final()                 const    { return access_flags().is_final(); }
+  bool is_volatile()              const    { return access_flags().is_volatile(); }
+  bool is_transient()             const    { return access_flags().is_transient(); }
 
-  bool is_synthetic() const               { return access_flags().is_synthetic(); }
+  bool is_synthetic()             const    { return access_flags().is_synthetic(); }
 
-  bool is_field_access_watched() const    { return access_flags().is_field_access_watched(); }
+  bool is_field_access_watched()  const    { return access_flags().is_field_access_watched(); }
   bool is_field_modification_watched() const
-                                          { return access_flags().is_field_modification_watched(); }
-  bool has_generic_signature() const      { return access_flags().field_has_generic_signature(); }
+                                           { return access_flags().is_field_modification_watched(); }
+  bool has_generic_signature()    const    { return access_flags().field_has_generic_signature(); }
 
   void set_is_field_access_watched(const bool value) {
     _access_flags.set_is_field_access_watched(value);
--- a/src/share/vm/runtime/fprofiler.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/fprofiler.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -26,18 +26,6 @@
 #define SHARE_VM_RUNTIME_FPROFILER_HPP
 
 #include "runtime/timer.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 
 // a simple flat profiler for Java
 
--- a/src/share/vm/runtime/frame.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/frame.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,6 +23,7 @@
  */
 
 #include "precompiled.hpp"
+#include "compiler/disassembler.hpp"
 #include "gc_interface/collectedHeap.inline.hpp"
 #include "interpreter/interpreter.hpp"
 #include "interpreter/oopMapCache.hpp"
@@ -879,7 +880,8 @@
 }
 
 
-void frame::oops_interpreted_do(OopClosure* f, const RegisterMap* map, bool query_oop_map_cache) {
+void frame::oops_interpreted_do(OopClosure* f, CLDToOopClosure* cld_f,
+    const RegisterMap* map, bool query_oop_map_cache) {
   assert(is_interpreted_frame(), "Not an interpreted frame");
   assert(map != NULL, "map must be set");
   Thread *thread = Thread::current();
@@ -906,6 +908,16 @@
   }
 
   // process fixed part
+  if (cld_f != NULL) {
+    // The method pointer in the frame might be the only path to the method's
+    // klass, and the klass needs to be kept alive while executing. The GCs
+    // don't trace through method pointers, so typically in similar situations
+    // the mirror or the class loader of the klass are installed as a GC root.
+    // To minimze the overhead of doing that here, we ask the GC to pass down a
+    // closure that knows how to keep klasses alive given a ClassLoaderData.
+    cld_f->do_cld(m->method_holder()->class_loader_data());
+  }
+
 #if !defined(PPC) || defined(ZERO)
   if (m->is_native()) {
 #ifdef CC_INTERP
@@ -1108,7 +1120,7 @@
 }
 
 
-void frame::oops_do_internal(OopClosure* f, CodeBlobClosure* cf, RegisterMap* map, bool use_interpreter_oop_map_cache) {
+void frame::oops_do_internal(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf, RegisterMap* map, bool use_interpreter_oop_map_cache) {
 #ifndef PRODUCT
   // simulate GC crash here to dump java thread in error report
   if (CrashGCForDumpingJavaThread) {
@@ -1117,7 +1129,7 @@
   }
 #endif
   if (is_interpreted_frame()) {
-    oops_interpreted_do(f, map, use_interpreter_oop_map_cache);
+    oops_interpreted_do(f, cld_f, map, use_interpreter_oop_map_cache);
   } else if (is_entry_frame()) {
     oops_entry_do(f, map);
   } else if (CodeCache::contains(pc())) {
@@ -1278,7 +1290,7 @@
     }
   }
   COMPILER2_PRESENT(assert(DerivedPointerTable::is_empty(), "must be empty before verify");)
-  oops_do_internal(&VerifyOopClosure::verify_oop, NULL, (RegisterMap*)map, false);
+  oops_do_internal(&VerifyOopClosure::verify_oop, NULL, NULL, (RegisterMap*)map, false);
 }
 
 
--- a/src/share/vm/runtime/frame.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/frame.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -25,7 +25,6 @@
 #ifndef SHARE_VM_RUNTIME_FRAME_HPP
 #define SHARE_VM_RUNTIME_FRAME_HPP
 
-#include "asm/assembler.hpp"
 #include "oops/method.hpp"
 #include "runtime/basicLock.hpp"
 #include "runtime/monitorChunk.hpp"
@@ -413,19 +412,19 @@
 
   // Oops-do's
   void oops_compiled_arguments_do(Symbol* signature, bool has_receiver, const RegisterMap* reg_map, OopClosure* f);
-  void oops_interpreted_do(OopClosure* f, const RegisterMap* map, bool query_oop_map_cache = true);
+  void oops_interpreted_do(OopClosure* f, CLDToOopClosure* cld_f, const RegisterMap* map, bool query_oop_map_cache = true);
 
  private:
   void oops_interpreted_arguments_do(Symbol* signature, bool has_receiver, OopClosure* f);
 
   // Iteration of oops
-  void oops_do_internal(OopClosure* f, CodeBlobClosure* cf, RegisterMap* map, bool use_interpreter_oop_map_cache);
+  void oops_do_internal(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf, RegisterMap* map, bool use_interpreter_oop_map_cache);
   void oops_entry_do(OopClosure* f, const RegisterMap* map);
   void oops_code_blob_do(OopClosure* f, CodeBlobClosure* cf, const RegisterMap* map);
   int adjust_offset(Method* method, int index); // helper for above fn
  public:
   // Memory management
-  void oops_do(OopClosure* f, CodeBlobClosure* cf, RegisterMap* map) { oops_do_internal(f, cf, map, true); }
+  void oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf, RegisterMap* map) { oops_do_internal(f, cld_f, cf, map, true); }
   void nmethods_do(CodeBlobClosure* cf);
 
   // RedefineClasses support for finding live interpreted methods on the stack
--- a/src/share/vm/runtime/globals.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/globals.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -533,6 +533,9 @@
   product(intx, UseSSE, 99,                                                 \
           "Highest supported SSE instructions set on x86/x64")              \
                                                                             \
+  product(bool, UseAES, false,                                               \
+          "Control whether AES instructions can be used on x86/x64")        \
+                                                                            \
   product(uintx, LargePageSizeInBytes, 0,                                   \
           "Large page size (0 to let VM choose the page size")              \
                                                                             \
@@ -635,6 +638,9 @@
   product(bool, UseSSE42Intrinsics, false,                                  \
           "SSE4.2 versions of intrinsics")                                  \
                                                                             \
+  product(bool, UseAESIntrinsics, false,                                    \
+          "use intrinsics for AES versions of crypto")                      \
+                                                                            \
   develop(bool, TraceCallFixup, false,                                      \
           "traces all call fixups")                                         \
                                                                             \
@@ -851,9 +857,6 @@
   develop(bool, BreakAtWarning, false,                                      \
           "Execute breakpoint upon encountering VM warning")                \
                                                                             \
-  product_pd(bool, UseVectoredExceptions,                                   \
-          "Temp Flag - Use Vectored Exceptions rather than SEH (Windows Only)") \
-                                                                            \
   develop(bool, TraceVMOperation, false,                                    \
           "Trace vm operations")                                            \
                                                                             \
@@ -919,15 +922,21 @@
   develop(bool, PrintExceptionHandlers, false,                              \
           "Print exception handler tables for all nmethods when generated") \
                                                                             \
+  develop(bool, StressCompiledExceptionHandlers, false,                     \
+         "Exercise compiled exception handlers")                            \
+                                                                            \
   develop(bool, InterceptOSException, false,                                \
           "Starts debugger when an implicit OS (e.g., NULL) "               \
           "exception happens")                                              \
                                                                             \
-  notproduct(bool, PrintCodeCache, false,                                   \
-          "Print the compiled_code cache when exiting")                     \
+  product(bool, PrintCodeCache, false,                                      \
+          "Print the code cache memory usage when exiting")                 \
                                                                             \
   develop(bool, PrintCodeCache2, false,                                     \
-          "Print detailed info on the compiled_code cache when exiting")    \
+          "Print detailed usage info on the code cache when exiting")       \
+                                                                            \
+  product(bool, PrintCodeCacheOnCompilation, false,                         \
+          "Print the code cache memory usage each time a method is compiled") \
                                                                             \
   diagnostic(bool, PrintStubCode, false,                                    \
           "Print generated stub code")                                      \
@@ -963,18 +972,6 @@
   notproduct(uintx, WarnOnStalledSpinLock, 0,                               \
           "Prints warnings for stalled SpinLocks")                          \
                                                                             \
-  develop(bool, InitializeJavaLangSystem, true,                             \
-          "Initialize java.lang.System - turn off for individual "          \
-          "method debugging")                                               \
-                                                                            \
-  develop(bool, InitializeJavaLangString, true,                             \
-          "Initialize java.lang.String - turn off for individual "          \
-          "method debugging")                                               \
-                                                                            \
-  develop(bool, InitializeJavaLangExceptionsErrors, true,                   \
-          "Initialize various error and exception classes - turn off for "  \
-          "individual method debugging")                                    \
-                                                                            \
   product(bool, RegisterFinalizersAtInit, true,                             \
           "Register finalizable objects at end of Object.<init> or "        \
           "after allocation")                                               \
@@ -1069,7 +1066,7 @@
                                                                             \
   product(intx, ClearFPUAtPark, 0, "(Unsafe,Unstable)" )                    \
                                                                             \
-  product(intx, hashCode, 0,                                                \
+  product(intx, hashCode, 5,                                                \
          "(Unstable) select hashCode generation algorithm" )                \
                                                                             \
   product(intx, WorkAroundNPTLTimedWaitHang, 1,                             \
@@ -1095,13 +1092,6 @@
   product(bool, ReduceSignalUsage, false,                                   \
           "Reduce the use of OS signals in Java and/or the VM")             \
                                                                             \
-  notproduct(bool, ValidateMarkSweep, false,                                \
-          "Do extra validation during MarkSweep collection")                \
-                                                                            \
-  notproduct(bool, RecordMarkSweepCompaction, false,                        \
-          "Enable GC-to-GC recording and querying of compaction during "    \
-          "MarkSweep")                                                      \
-                                                                            \
   develop_pd(bool, ShareVtableStubs,                                        \
           "Share vtable stubs (smaller code but worse branch prediction")   \
                                                                             \
@@ -1167,6 +1157,18 @@
   notproduct(bool, PrintCompactFieldsSavings, false,                        \
           "Print how many words were saved with CompactFields")             \
                                                                             \
+  notproduct(bool, PrintFieldLayout, false,                                 \
+          "Print field layout for each class")                              \
+                                                                            \
+  product(intx, ContendedPaddingWidth, 128,                                 \
+          "How many bytes to pad the fields/classes marked @Contended with")\
+                                                                            \
+  product(bool, EnableContended, true,                                      \
+          "Enable @Contended annotation support")                           \
+                                                                            \
+  product(bool, RestrictContended, true,                                    \
+          "Restrict @Contended to trusted classes")                         \
+                                                                            \
   product(bool, UseBiasedLocking, true,                                     \
           "Enable biased locking in JVM")                                   \
                                                                             \
@@ -1600,7 +1602,7 @@
   develop(bool, CMSTraceThreadState, false,                                 \
           "Trace the CMS thread state (enable the trace_state() method)")   \
                                                                             \
-  product(bool, CMSClassUnloadingEnabled, false,                            \
+  product(bool, CMSClassUnloadingEnabled, true,                             \
           "Whether class unloading enabled when using CMS GC")              \
                                                                             \
   product(uintx, CMSClassUnloadingMaxInterval, 0,                           \
@@ -1824,7 +1826,7 @@
                                                                             \
   product(intx, CMSIsTooFullPercentage, 98,                                 \
           "An absolute ceiling above which CMS will always consider the "   \
-          "perm gen ripe for collection")                                   \
+          "unloading of classes when class unloading is enabled")           \
                                                                             \
   develop(bool, CMSTestInFreeList, false,                                   \
           "Check if the coalesced range is already in the "                 \
@@ -1893,13 +1895,13 @@
           "Metadata deallocation alot interval")                            \
                                                                             \
   develop(bool, TraceMetadataChunkAllocation, false,                        \
-          "Trace humongous metadata allocations")                           \
+          "Trace chunk metadata allocations")                               \
                                                                             \
   product(bool, TraceMetadataHumongousAllocation, false,                    \
           "Trace humongous metadata allocations")                           \
                                                                             \
   develop(bool, TraceMetavirtualspaceAllocation, false,                     \
-          "Trace humongous metadata allocations")                           \
+          "Trace virtual space metadata allocations")                       \
                                                                             \
   notproduct(bool, ExecuteInternalVMTests, false,                           \
           "Enable execution of internal VM tests.")                         \
@@ -2211,7 +2213,8 @@
   develop(bool, TraceClassLoaderData, false,                                \
           "Trace class loader loader_data lifetime")                        \
                                                                             \
-  product(uintx, InitialBootClassLoaderMetaspaceSize, 3*M,                  \
+  product(uintx, InitialBootClassLoaderMetaspaceSize,                       \
+          NOT_LP64(2200*K) LP64_ONLY(4*M),                                  \
           "Initial size of the boot class loader data metaspace")           \
                                                                             \
   product(bool, TraceGen0Time, false,                                       \
@@ -3186,6 +3189,26 @@
   product(ccstrlist, CompileCommand, "",                                    \
           "Prepend to .hotspot_compiler; e.g. log,java/lang/String.<init>") \
                                                                             \
+  develop(bool, ReplayCompiles, false,                                      \
+          "Enable replay of compilations from ReplayDataFile")              \
+                                                                            \
+  develop(ccstr, ReplayDataFile, "replay.txt",                              \
+          "file containing compilation replay information")                 \
+                                                                            \
+  develop(intx, ReplaySuppressInitializers, 2,                              \
+          "Controls handling of class initialization during replay"         \
+          "0 - don't do anything special"                                   \
+          "1 - treat all class initializers as empty"                       \
+          "2 - treat class initializers for application classes as empty"   \
+          "3 - allow all class initializers to run during bootstrap but"    \
+          "    pretend they are empty after starting replay")               \
+                                                                            \
+  develop(bool, ReplayIgnoreInitErrors, false,                              \
+          "Ignore exceptions thrown during initialization for replay")      \
+                                                                            \
+  develop(bool, DumpReplayDataOnError, true,                                \
+          "record replay data for crashing compiler threads")               \
+                                                                            \
   product(bool, CICompilerCountPerCPU, false,                               \
           "1 compiler thread for log(N CPUs)")                              \
                                                                             \
@@ -3511,10 +3534,10 @@
   /* Shared spaces */                                                       \
                                                                             \
   product(bool, UseSharedSpaces, true,                                      \
-          "Use shared spaces in the permanent generation")                  \
+          "Use shared spaces for metadata")                                 \
                                                                             \
   product(bool, RequireSharedSpaces, false,                                 \
-          "Require shared spaces in the permanent generation")              \
+          "Require shared spaces for metadata")                             \
                                                                             \
   product(bool, DumpSharedSpaces, false,                                    \
            "Special mode: JVM reads a class list, loads classes, builds "   \
@@ -3525,16 +3548,16 @@
           "Print usage of shared spaces")                                   \
                                                                             \
   product(uintx, SharedReadWriteSize,  NOT_LP64(12*M) LP64_ONLY(16*M),      \
-          "Size of read-write space in permanent generation (in bytes)")    \
+          "Size of read-write space for metadata (in bytes)")               \
                                                                             \
   product(uintx, SharedReadOnlySize,  NOT_LP64(12*M) LP64_ONLY(16*M),       \
-          "Size of read-only space in permanent generation (in bytes)")     \
+          "Size of read-only space for metadata (in bytes)")                \
                                                                             \
   product(uintx, SharedMiscDataSize,    NOT_LP64(2*M) LP64_ONLY(4*M),       \
-          "Size of the shared data area adjacent to the heap (in bytes)")   \
+          "Size of the shared miscellaneous data area (in bytes)")          \
                                                                             \
   product(uintx, SharedMiscCodeSize,    120*K,                              \
-          "Size of the shared code area adjacent to the heap (in bytes)")   \
+          "Size of the shared miscellaneous code area (in bytes)")          \
                                                                             \
   product(uintx, SharedDummyBlockSize, 0,                                   \
           "Size of dummy block used to shift heap addresses (in bytes)")    \
@@ -3590,9 +3613,18 @@
   diagnostic(bool, PrintDTraceDOF, false,                                   \
              "Print the DTrace DOF passed to the system for JSDT probes")   \
                                                                             \
-  product(uintx, StringTableSize, 1009,                                     \
+  product(uintx, StringTableSize, defaultStringTableSize,                   \
           "Number of buckets in the interned String table")                 \
                                                                             \
+  develop(bool, TraceDefaultMethods, false,                                 \
+          "Trace the default method processing steps")                      \
+                                                                            \
+  develop(bool, ParseAllGenericSignatures, false,                           \
+          "Parse all generic signatures while classloading")                \
+                                                                            \
+  develop(bool, VerifyGenericSignatures, false,                             \
+          "Abort VM on erroneous or inconsistent generic signatures")       \
+                                                                            \
   product(bool, UseVMInterruptibleIO, false,                                \
           "(Unstable, Solaris-specific) Thread interrupt before or with "   \
           "EINTR for I/O operations results in OS_INTRPT. The default value"\
--- a/src/share/vm/runtime/handles.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/handles.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -27,21 +27,18 @@
 #include "oops/constantPool.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/handles.inline.hpp"
+#include "runtime/thread.inline.hpp"
 #ifdef TARGET_OS_FAMILY_linux
 # include "os_linux.inline.hpp"
-# include "thread_linux.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_solaris
 # include "os_solaris.inline.hpp"
-# include "thread_solaris.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_windows
 # include "os_windows.inline.hpp"
-# include "thread_windows.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_bsd
 # include "os_bsd.inline.hpp"
-# include "thread_bsd.inline.hpp"
 #endif
 
 #ifdef ASSERT
@@ -158,13 +155,18 @@
 
   // Delete later chunks
   if( _chunk->next() ) {
+    // reset arena size before delete chunks. Otherwise, the total
+    // arena size could exceed total chunk size
+    assert(area->size_in_bytes() > size_in_bytes(), "Sanity check");
+    area->set_size_in_bytes(size_in_bytes());
     _chunk->next_chop();
+  } else {
+    assert(area->size_in_bytes() == size_in_bytes(), "Sanity check");
   }
   // Roll back arena to saved top markers
   area->_chunk = _chunk;
   area->_hwm = _hwm;
   area->_max = _max;
-  area->set_size_in_bytes(_size_in_bytes);
 #ifdef ASSERT
   // clear out first chunk (to detect allocation bugs)
   if (ZapVMHandleArea) {
--- a/src/share/vm/runtime/handles.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/handles.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -297,6 +297,7 @@
   void set_previous_handle_mark(HandleMark* mark) { _previous_handle_mark = mark; }
   HandleMark* previous_handle_mark() const        { return _previous_handle_mark; }
 
+  size_t size_in_bytes() const { return _size_in_bytes; }
  public:
   HandleMark();                            // see handles_inline.hpp
   HandleMark(Thread* thread)                      { initialize(thread); }
--- a/src/share/vm/runtime/handles.inline.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/handles.inline.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -26,18 +26,7 @@
 #define SHARE_VM_RUNTIME_HANDLES_INLINE_HPP
 
 #include "runtime/handles.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
+#include "runtime/thread.inline.hpp"
 
 // these inline functions are in a separate file to break an include cycle
 // between Thread and Handle
@@ -91,6 +80,8 @@
       _thread = Thread::current();                                     \
     }                                                                  \
     _thread->metadata_handles()->push((Metadata*)_value);              \
+  } else {                                                             \
+    _thread = NULL;                                                    \
   }                                                                    \
 }                                                                      \
 inline name##Handle& name##Handle::operator=(const name##Handle &s) {  \
@@ -105,6 +96,8 @@
       _thread = Thread::current();                                     \
     }                                                                  \
     _thread->metadata_handles()->push((Metadata*)_value);              \
+  } else {                                                             \
+    _thread = NULL;                                                    \
   }                                                                    \
   return *this;                                                        \
 }                                                                      \
@@ -136,13 +129,18 @@
   HandleArea* area = _area;   // help compilers with poor alias analysis
   // Delete later chunks
   if( _chunk->next() ) {
+    // reset arena size before delete chunks. Otherwise, the total
+    // arena size could exceed total chunk size
+    assert(area->size_in_bytes() > size_in_bytes(), "Sanity check");
+    area->set_size_in_bytes(size_in_bytes());
     _chunk->next_chop();
+  } else {
+    assert(area->size_in_bytes() == size_in_bytes(), "Sanity check");
   }
   // Roll back arena to saved top markers
   area->_chunk = _chunk;
   area->_hwm = _hwm;
   area->_max = _max;
-  area->set_size_in_bytes(_size_in_bytes);
   debug_only(area->_handle_mark_nesting--);
 }
 
--- a/src/share/vm/runtime/interfaceSupport.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/interfaceSupport.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -31,22 +31,11 @@
 #include "runtime/orderAccess.hpp"
 #include "runtime/os.hpp"
 #include "runtime/safepoint.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/vmThread.hpp"
 #include "utilities/globalDefinitions.hpp"
 #include "utilities/preserveException.hpp"
 #include "utilities/top.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 
 // Wrapper for all entry points to the virtual machine.
 // The HandleMarkCleaner is a faster version of HandleMark.
--- a/src/share/vm/runtime/java.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/java.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -54,6 +54,7 @@
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/statSampler.hpp"
 #include "runtime/task.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/timer.hpp"
 #include "runtime/vm_operations.hpp"
 #include "services/memReporter.hpp"
@@ -79,18 +80,6 @@
 #ifdef TARGET_ARCH_ppc
 # include "vm_version_ppc.hpp"
 #endif
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 #ifndef SERIALGC
 #include "gc_implementation/concurrentMarkSweep/concurrentMarkSweepThread.hpp"
 #include "gc_implementation/parallelScavenge/psScavenge.hpp"
@@ -379,6 +368,12 @@
   if (CITime) {
     CompileBroker::print_times();
   }
+
+  if (PrintCodeCache) {
+    MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
+    CodeCache::print();
+  }
+
 #ifdef COMPILER2
   if (PrintPreciseBiasedLockingStatistics) {
     OptoRuntime::print_named_counters();
@@ -553,6 +548,10 @@
     BeforeExit_lock->notify_all();
   }
 
+  // Shutdown NMT before exit. Otherwise,
+  // it will run into trouble when system destroys static variables.
+  MemTracker::shutdown(MemTracker::NMT_normal);
+
   #undef BEFORE_EXIT_NOT_RUN
   #undef BEFORE_EXIT_RUNNING
   #undef BEFORE_EXIT_DONE
@@ -688,6 +687,7 @@
 
 JDK_Version JDK_Version::_current;
 const char* JDK_Version::_runtime_name;
+const char* JDK_Version::_runtime_version;
 
 void JDK_Version::initialize() {
   jdk_version_info info;
--- a/src/share/vm/runtime/java.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/java.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -75,6 +75,7 @@
 
   static JDK_Version _current;
   static const char* _runtime_name;
+  static const char* _runtime_version;
 
   // In this class, we promote the minor version of release to be the
   // major version for releases >= 5 in anticipation of the JDK doing the
@@ -189,6 +190,13 @@
     _runtime_name = name;
   }
 
+  static const char* runtime_version() {
+    return _runtime_version;
+  }
+  static void set_runtime_version(const char* version) {
+    _runtime_version = version;
+  }
+
   // Convenience methods for queries on the current major/minor version
   static bool is_jdk12x_version() {
     return current().compare_major(2) == 0;
--- a/src/share/vm/runtime/javaCalls.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/javaCalls.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -39,18 +39,7 @@
 #include "runtime/mutexLocker.hpp"
 #include "runtime/signature.hpp"
 #include "runtime/stubRoutines.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
+#include "runtime/thread.inline.hpp"
 
 // -----------------------------------------------------
 // Implementation of JavaCallWrapper
@@ -189,7 +178,7 @@
   assert(method->name() == vmSymbols::object_initializer_name(),    "Should only be called for default constructor");
   assert(method->signature() == vmSymbols::void_method_signature(), "Should only be called for default constructor");
 
-  InstanceKlass* ik = InstanceKlass::cast(method->method_holder());
+  InstanceKlass* ik = method->method_holder();
   if (ik->is_initialized() && ik->has_vanilla_constructor()) {
     // safe to skip constructor call
   } else {
@@ -344,11 +333,11 @@
 
 
 #ifdef ASSERT
-  { Klass* holder = method->method_holder();
+  { InstanceKlass* holder = method->method_holder();
     // A klass might not be initialized since JavaCall's might be used during the executing of
     // the <clinit>. For example, a Thread.start might start executing on an object that is
     // not fully initialized! (bad Java programming style)
-    assert(InstanceKlass::cast(holder)->is_linked(), "rewritting must have taken place");
+    assert(holder->is_linked(), "rewritting must have taken place");
   }
 #endif
 
--- a/src/share/vm/runtime/javaCalls.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/javaCalls.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -29,6 +29,7 @@
 #include "oops/method.hpp"
 #include "runtime/handles.hpp"
 #include "runtime/javaFrameAnchor.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/vmThread.hpp"
 #ifdef TARGET_ARCH_x86
 # include "jniTypes_x86.hpp"
@@ -45,18 +46,6 @@
 #ifdef TARGET_ARCH_ppc
 # include "jniTypes_ppc.hpp"
 #endif
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 
 // A JavaCallWrapper is constructed before each JavaCall and destructed after the call.
 // Its purpose is to allocate/deallocate a new handle block and to save/restore the last
--- a/src/share/vm/runtime/jniHandles.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/jniHandles.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -28,18 +28,7 @@
 #include "prims/jvmtiExport.hpp"
 #include "runtime/jniHandles.hpp"
 #include "runtime/mutexLocker.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
+#include "runtime/thread.inline.hpp"
 
 
 JNIHandleBlock* JNIHandles::_global_handles       = NULL;
--- a/src/share/vm/runtime/memprofiler.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/memprofiler.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -35,19 +35,8 @@
 #include "runtime/mutexLocker.hpp"
 #include "runtime/os.hpp"
 #include "runtime/task.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/vmThread.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 
 #ifndef PRODUCT
 
--- a/src/share/vm/runtime/mutex.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/mutex.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -26,22 +26,19 @@
 #include "precompiled.hpp"
 #include "runtime/mutex.hpp"
 #include "runtime/osThread.hpp"
+#include "runtime/thread.inline.hpp"
 #include "utilities/events.hpp"
 #ifdef TARGET_OS_FAMILY_linux
 # include "mutex_linux.inline.hpp"
-# include "thread_linux.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_solaris
 # include "mutex_solaris.inline.hpp"
-# include "thread_solaris.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_windows
 # include "mutex_windows.inline.hpp"
-# include "thread_windows.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_bsd
 # include "mutex_bsd.inline.hpp"
-# include "thread_bsd.inline.hpp"
 #endif
 
 // o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o
--- a/src/share/vm/runtime/mutexLocker.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/mutexLocker.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -25,20 +25,9 @@
 #include "precompiled.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/safepoint.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/threadLocalStorage.hpp"
 #include "runtime/vmThread.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 
 // Mutexes used in the VM (see comment in mutexLocker.hpp):
 //
@@ -140,6 +129,7 @@
 Monitor* JfrMsg_lock                  = NULL;
 Mutex*   JfrBuffer_lock               = NULL;
 Mutex*   JfrStream_lock               = NULL;
+Monitor* PeriodicTask_lock            = NULL;
 
 #define MAX_NUM_MUTEX 128
 static Monitor * _mutex_array[MAX_NUM_MUTEX];
@@ -285,6 +275,7 @@
   def(JfrMsg_lock                  , Monitor, nonleaf+2,   true);
   def(JfrBuffer_lock               , Mutex,   nonleaf+3,   true);
   def(JfrStream_lock               , Mutex,   nonleaf+4,   true);
+  def(PeriodicTask_lock            , Monitor, nonleaf+5,   true);
 }
 
 GCMutexLocker::GCMutexLocker(Monitor * mutex) {
--- a/src/share/vm/runtime/mutexLocker.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/mutexLocker.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -142,6 +142,7 @@
 extern Monitor* JfrMsg_lock;                     // protects JFR messaging
 extern Mutex*   JfrBuffer_lock;                  // protects JFR buffer operations
 extern Mutex*   JfrStream_lock;                  // protects JFR stream access
+extern Monitor* PeriodicTask_lock;               // protects the periodic task structure
 
 // A MutexLocker provides mutual exclusion with respect to a given mutex
 // for the scope which contains the locker.  The lock is an OS lock, not
--- a/src/share/vm/runtime/objectMonitor.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/objectMonitor.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,25 +34,21 @@
 #include "runtime/objectMonitor.inline.hpp"
 #include "runtime/osThread.hpp"
 #include "runtime/stubRoutines.hpp"
-#include "runtime/thread.hpp"
+#include "runtime/thread.inline.hpp"
 #include "services/threadService.hpp"
 #include "utilities/dtrace.hpp"
 #include "utilities/preserveException.hpp"
 #ifdef TARGET_OS_FAMILY_linux
 # include "os_linux.inline.hpp"
-# include "thread_linux.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_solaris
 # include "os_solaris.inline.hpp"
-# include "thread_solaris.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_windows
 # include "os_windows.inline.hpp"
-# include "thread_windows.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_bsd
 # include "os_bsd.inline.hpp"
-# include "thread_bsd.inline.hpp"
 #endif
 
 #if defined(__GNUC__) && !defined(IA64)
@@ -657,8 +653,7 @@
     assert (_succ != Self, "invariant") ;
     if (_Responsible == Self) {
         _Responsible = NULL ;
-        // Dekker pivot-point.
-        // Consider OrderAccess::storeload() here
+        OrderAccess::fence(); // Dekker pivot-point
 
         // We may leave threads on cxq|EntryList without a designated
         // "Responsible" thread.  This is benign.  When this thread subsequently
@@ -678,10 +673,6 @@
         //
         // The MEMBAR, above, prevents the LD of cxq|EntryList in the subsequent
         // exit operation from floating above the ST Responsible=null.
-        //
-        // In *practice* however, EnterI() is always followed by some atomic
-        // operation such as the decrement of _count in ::enter().  Those atomics
-        // obviate the need for the explicit MEMBAR, above.
     }
 
     // We've acquired ownership with CAS().
--- a/src/share/vm/runtime/objectMonitor.inline.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/objectMonitor.inline.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -101,10 +101,12 @@
   return _count;
 }
 
+// Do NOT set _count = 0. There is a race such that _count could
+// be set while inflating prior to setting _owner
+// Just use Atomic::inc/dec and assert 0 when monitor put on free list
 inline void ObjectMonitor::set_owner(void* owner) {
   _owner = owner;
   _recursions = 0;
-  _count = 0;
 }
 
 
--- a/src/share/vm/runtime/os.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/os.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -44,6 +44,7 @@
 #include "runtime/mutexLocker.hpp"
 #include "runtime/os.hpp"
 #include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
 #include "services/attachListener.hpp"
 #include "services/memTracker.hpp"
 #include "services/threadService.hpp"
@@ -51,19 +52,15 @@
 #include "utilities/events.hpp"
 #ifdef TARGET_OS_FAMILY_linux
 # include "os_linux.inline.hpp"
-# include "thread_linux.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_solaris
 # include "os_solaris.inline.hpp"
-# include "thread_solaris.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_windows
 # include "os_windows.inline.hpp"
-# include "thread_windows.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_bsd
 # include "os_bsd.inline.hpp"
-# include "thread_bsd.inline.hpp"
 #endif
 
 # include <signal.h>
@@ -397,12 +394,16 @@
     // Try to load verify dll first. In 1.3 java dll depends on it and is not
     // always able to find it when the loading executable is outside the JDK.
     // In order to keep working with 1.2 we ignore any loading errors.
-    dll_build_name(buffer, sizeof(buffer), Arguments::get_dll_dir(), "verify");
-    dll_load(buffer, ebuf, sizeof(ebuf));
+    if (dll_build_name(buffer, sizeof(buffer), Arguments::get_dll_dir(),
+                       "verify")) {
+      dll_load(buffer, ebuf, sizeof(ebuf));
+    }
 
     // Load java dll
-    dll_build_name(buffer, sizeof(buffer), Arguments::get_dll_dir(), "java");
-    _native_java_library = dll_load(buffer, ebuf, sizeof(ebuf));
+    if (dll_build_name(buffer, sizeof(buffer), Arguments::get_dll_dir(),
+                       "java")) {
+      _native_java_library = dll_load(buffer, ebuf, sizeof(ebuf));
+    }
     if (_native_java_library == NULL) {
       vm_exit_during_initialization("Unable to load native library", ebuf);
     }
@@ -410,8 +411,10 @@
 #if defined(__OpenBSD__)
     // Work-around OpenBSD's lack of $ORIGIN support by pre-loading libnet.so
     // ignore errors
-    dll_build_name(buffer, sizeof(buffer), Arguments::get_dll_dir(), "net");
-    dll_load(buffer, ebuf, sizeof(ebuf));
+    if (dll_build_name(buffer, sizeof(buffer), Arguments::get_dll_dir(),
+                       "net")) {
+      dll_load(buffer, ebuf, sizeof(ebuf));
+    }
 #endif
   }
   static jboolean onLoaded = JNI_FALSE;
@@ -576,7 +579,9 @@
     // if NULL is returned the calling functions assume out of memory.
     size = 1;
   }
-
+  if (size > size + space_before + space_after) { // Check for rollover.
+    return NULL;
+  }
   NOT_PRODUCT(if (MallocVerifyInterval > 0) check_heap());
   u_char* ptr = (u_char*)::malloc(size + space_before + space_after);
 
@@ -600,9 +605,7 @@
   if (PrintMalloc && tty != NULL) tty->print_cr("os::malloc " SIZE_FORMAT " bytes --> " PTR_FORMAT, size, memblock);
 
   // we do not track MallocCushion memory
-  if (MemTracker::is_on()) {
     MemTracker::record_malloc((address)memblock, size, memflags, caller == 0 ? CALLER_PC : caller);
-  }
 
   return memblock;
 }
@@ -613,7 +616,7 @@
   NOT_PRODUCT(inc_stat_counter(&num_mallocs, 1));
   NOT_PRODUCT(inc_stat_counter(&alloc_bytes, size));
   void* ptr = ::realloc(memblock, size);
-  if (ptr != NULL && MemTracker::is_on()) {
+  if (ptr != NULL) {
     MemTracker::record_realloc((address)memblock, (address)ptr, size, memflags,
      caller == 0 ? CALLER_PC : caller);
   }
@@ -1158,7 +1161,7 @@
   if (inpath == NULL) {
     return NULL;
   }
-  strncpy(inpath, path, strlen(path));
+  strcpy(inpath, path);
   int count = 1;
   char* p = strchr(inpath, psepchar);
   // Get a count of elements to allocate memory
@@ -1401,7 +1404,7 @@
 
 char* os::reserve_memory(size_t bytes, char* addr, size_t alignment_hint) {
   char* result = pd_reserve_memory(bytes, addr, alignment_hint);
-  if (result != NULL && MemTracker::is_on()) {
+  if (result != NULL) {
     MemTracker::record_virtual_memory_reserve((address)result, bytes, CALLER_PC);
   }
 
@@ -1409,7 +1412,7 @@
 }
 char* os::attempt_reserve_memory_at(size_t bytes, char* addr) {
   char* result = pd_attempt_reserve_memory_at(bytes, addr);
-  if (result != NULL && MemTracker::is_on()) {
+  if (result != NULL) {
     MemTracker::record_virtual_memory_reserve((address)result, bytes, CALLER_PC);
   }
   return result;
@@ -1422,7 +1425,7 @@
 
 bool os::commit_memory(char* addr, size_t bytes, bool executable) {
   bool res = pd_commit_memory(addr, bytes, executable);
-  if (res && MemTracker::is_on()) {
+  if (res) {
     MemTracker::record_virtual_memory_commit((address)addr, bytes, CALLER_PC);
   }
   return res;
@@ -1431,7 +1434,7 @@
 bool os::commit_memory(char* addr, size_t size, size_t alignment_hint,
                               bool executable) {
   bool res = os::pd_commit_memory(addr, size, alignment_hint, executable);
-  if (res && MemTracker::is_on()) {
+  if (res) {
     MemTracker::record_virtual_memory_commit((address)addr, size, CALLER_PC);
   }
   return res;
@@ -1458,8 +1461,9 @@
                            char *addr, size_t bytes, bool read_only,
                            bool allow_exec) {
   char* result = pd_map_memory(fd, file_name, file_offset, addr, bytes, read_only, allow_exec);
-  if (result != NULL && MemTracker::is_on()) {
+  if (result != NULL) {
     MemTracker::record_virtual_memory_reserve((address)result, bytes, CALLER_PC);
+    MemTracker::record_virtual_memory_commit((address)result, bytes, CALLER_PC);
   }
   return result;
 }
@@ -1474,6 +1478,7 @@
 bool os::unmap_memory(char *addr, size_t bytes) {
   bool result = pd_unmap_memory(addr, bytes);
   if (result) {
+    MemTracker::record_virtual_memory_uncommit((address)addr, bytes);
     MemTracker::record_virtual_memory_release((address)addr, bytes);
   }
   return result;
--- a/src/share/vm/runtime/os.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/os.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -255,6 +255,7 @@
   static int    vm_allocation_granularity();
   static char*  reserve_memory(size_t bytes, char* addr = 0,
                                size_t alignment_hint = 0);
+  static char*  reserve_memory_aligned(size_t size, size_t alignment);
   static char*  attempt_reserve_memory_at(size_t bytes, char* addr);
   static void   split_reserved_memory(char *base, size_t size,
                                       size_t split, bool realloc);
@@ -479,7 +480,8 @@
   static const char*    get_current_directory(char *buf, int buflen);
 
   // Builds a platform-specific full library path given a ld path and lib name
-  static void           dll_build_name(char* buffer, size_t size,
+  // Returns true if buffer contains full path to existing file, false otherwise
+  static bool           dll_build_name(char* buffer, size_t size,
                                        const char* pathname, const char* fname);
 
   // Symbol lookup, find nearest function name; basically it implements
@@ -639,10 +641,6 @@
 
   static struct hostent* get_host_by_name(char* name);
 
-  // Printing 64 bit integers
-  static const char* jlong_format_specifier();
-  static const char* julong_format_specifier();
-
   // Support for signals (see JVM_RaiseSignal, JVM_RegisterSignal)
   static void  signal_init();
   static void  signal_init_pd();
--- a/src/share/vm/runtime/os_ext.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/os_ext.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
--- a/src/share/vm/runtime/perfData.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/perfData.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -192,7 +192,7 @@
 }
 
 int PerfLong::format(char* buffer, int length) {
-  return jio_snprintf(buffer, length,"%lld", *(jlong*)_valuep);
+  return jio_snprintf(buffer, length, JLONG_FORMAT, *(jlong*)_valuep);
 }
 
 PerfLongVariant::PerfLongVariant(CounterNS ns, const char* namep, Units u,
--- a/src/share/vm/runtime/reflection.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/reflection.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -56,22 +56,22 @@
     vframeStream vfst(jthread);
     // skip over any frames belonging to java.lang.Class
     while (!vfst.at_end() &&
-           InstanceKlass::cast(vfst.method()->method_holder())->name() == vmSymbols::java_lang_Class()) {
+           vfst.method()->method_holder()->name() == vmSymbols::java_lang_Class()) {
       vfst.next();
     }
     if (!vfst.at_end()) {
       // this frame is a likely suspect
       caller = vfst.method()->method_holder();
       line_number = vfst.method()->line_number_from_bci(vfst.bci());
-      Symbol* s = InstanceKlass::cast(vfst.method()->method_holder())->source_file_name();
+      Symbol* s = vfst.method()->method_holder()->source_file_name();
       if (s != NULL) {
         source_file = s->as_C_string();
       }
     }
   }
   if (caller != NULL) {
-    const char * from = Klass::cast(caller)->external_name();
-    const char * to = Klass::cast(to_class)->external_name();
+    const char * from = caller->external_name();
+    const char * to = to_class->external_name();
     // print in a single call to reduce interleaving between threads
     if (source_file != NULL) {
       tty->print("RESOLVE %s %s %s:%d (reflection)\n", from, to, source_file, line_number);
@@ -330,7 +330,7 @@
     return TypeArrayKlass::cast(tak)->allocate(length, THREAD);
   } else {
     Klass* k = java_lang_Class::as_Klass(element_mirror);
-    if (Klass::cast(k)->oop_is_array() && ArrayKlass::cast(k)->dimension() >= MAX_DIM) {
+    if (k->oop_is_array() && ArrayKlass::cast(k)->dimension() >= MAX_DIM) {
       THROW_0(vmSymbols::java_lang_IllegalArgumentException());
     }
     return oopFactory::new_objArray(k, length, THREAD);
@@ -366,7 +366,7 @@
     klass = basic_type_mirror_to_arrayklass(element_mirror, CHECK_NULL);
   } else {
     klass = java_lang_Class::as_Klass(element_mirror);
-    if (Klass::cast(klass)->oop_is_array()) {
+    if (klass->oop_is_array()) {
       int k_dim = ArrayKlass::cast(klass)->dimension();
       if (k_dim + len > MAX_DIM) {
         THROW_0(vmSymbols::java_lang_IllegalArgumentException());
@@ -374,7 +374,7 @@
       dim += k_dim;
     }
   }
-  klass = Klass::cast(klass)->array_klass(dim, CHECK_NULL);
+  klass = klass->array_klass(dim, CHECK_NULL);
   oop obj = ArrayKlass::cast(klass)->multi_allocate(len, dimensions, THREAD);
   assert(obj->is_array(), "just checking");
   return arrayOop(obj);
@@ -387,7 +387,7 @@
   }
 
   Klass* klass = java_lang_Class::as_Klass(mirror);
-  if (!Klass::cast(klass)->oop_is_array()) {
+  if (!klass->oop_is_array()) {
     return NULL;
   }
 
@@ -395,15 +395,15 @@
 #ifdef ASSERT
   oop result2 = NULL;
   if (ArrayKlass::cast(klass)->dimension() == 1) {
-    if (Klass::cast(klass)->oop_is_typeArray()) {
+    if (klass->oop_is_typeArray()) {
       result2 = basic_type_arrayklass_to_mirror(klass, CHECK_NULL);
     } else {
-      result2 = Klass::cast(ObjArrayKlass::cast(klass)->element_klass())->java_mirror();
+      result2 = ObjArrayKlass::cast(klass)->element_klass()->java_mirror();
     }
   } else {
     Klass* lower_dim = ArrayKlass::cast(klass)->lower_dimension();
-    assert(Klass::cast(lower_dim)->oop_is_array(), "just checking");
-    result2 = Klass::cast(lower_dim)->java_mirror();
+    assert(lower_dim->oop_is_array(), "just checking");
+    result2 = lower_dim->java_mirror();
   }
   assert(result == result2, "results must be consistent");
 #endif //ASSERT
@@ -442,7 +442,7 @@
   if (acc.is_protected()) {
     if (target_class != client_class) {
       if (!is_same_class_package(client_class, field_class)) {
-        if (!Klass::cast(target_class)->is_subclass_of(client_class)) {
+        if (!target_class->is_subclass_of(client_class)) {
           THROW_(vmSymbols::java_lang_IllegalAccessException(), false);
         }
       }
@@ -468,7 +468,13 @@
   // sun/reflect/MagicAccessorImpl subclasses to succeed trivially.
   if (   JDK_Version::is_gte_jdk14x_version()
       && UseNewReflection
-      && Klass::cast(current_class)->is_subclass_of(SystemDictionary::reflect_MagicAccessorImpl_klass())) {
+      && current_class->is_subclass_of(SystemDictionary::reflect_MagicAccessorImpl_klass())) {
+    return true;
+  }
+
+  // Also allow all accesses from
+  // java/lang/invoke/MagicLambdaImpl subclasses to succeed trivially.
+  if (current_class->is_subclass_of(SystemDictionary::lambda_MagicLambdaImpl_klass())) {
     return true;
   }
 
@@ -540,12 +546,12 @@
   if (access.is_protected()) {
     if (!protected_restriction) {
       // See if current_class is a subclass of field_class
-      if (Klass::cast(current_class)->is_subclass_of(field_class)) {
+      if (current_class->is_subclass_of(field_class)) {
         if (access.is_static() || // static fields are ok, see 6622385
             current_class == resolved_class ||
             field_class == resolved_class ||
-            Klass::cast(current_class)->is_subclass_of(resolved_class) ||
-            Klass::cast(resolved_class)->is_subclass_of(current_class)) {
+            current_class->is_subclass_of(resolved_class) ||
+            resolved_class->is_subclass_of(current_class)) {
           return true;
         }
       }
@@ -560,7 +566,13 @@
   // sun/reflect/MagicAccessorImpl subclasses to succeed trivially.
   if (   JDK_Version::is_gte_jdk14x_version()
       && UseNewReflection
-      && Klass::cast(current_class)->is_subclass_of(SystemDictionary::reflect_MagicAccessorImpl_klass())) {
+      && current_class->is_subclass_of(SystemDictionary::reflect_MagicAccessorImpl_klass())) {
+    return true;
+  }
+
+  // Also allow all accesses from
+  // java/lang/invoke/MagicLambdaImpl subclasses to succeed trivially.
+  if (current_class->is_subclass_of(SystemDictionary::lambda_MagicLambdaImpl_klass())) {
     return true;
   }
 
@@ -630,8 +642,8 @@
     case T_OBJECT:
     case T_ARRAY:
       Symbol* name        = ss->as_symbol(CHECK_NULL);
-      oop loader            = InstanceKlass::cast(method->method_holder())->class_loader();
-      oop protection_domain = InstanceKlass::cast(method->method_holder())->protection_domain();
+      oop loader            = method->method_holder()->class_loader();
+      oop protection_domain = method->method_holder()->protection_domain();
       Klass* k = SystemDictionary::resolve_or_fail(
                                        name,
                                        Handle(THREAD, loader),
@@ -681,7 +693,7 @@
   }
 
   oop loader = InstanceKlass::cast(k())->class_loader();
-  oop protection_domain = Klass::cast(k())->protection_domain();
+  oop protection_domain = k()->protection_domain();
   Klass* result = SystemDictionary::resolve_or_fail(signature,
                                     Handle(THREAD, loader),
                                     Handle(THREAD, protection_domain),
@@ -691,7 +703,7 @@
     trace_class_resolution(result);
   }
 
-  oop nt = Klass::cast(result)->java_mirror();
+  oop nt = result->java_mirror();
   return Handle(THREAD, nt);
 }
 
@@ -702,7 +714,7 @@
   assert(!method()->is_initializer() ||
          (for_constant_pool_access && method()->is_static()) ||
          (method()->name() == vmSymbols::class_initializer_name()
-    && Klass::cast(method()->method_holder())->is_interface() && JDK_Version::is_jdk12x_version()), "should call new_constructor instead");
+    && method()->method_holder()->is_interface() && JDK_Version::is_jdk12x_version()), "should call new_constructor instead");
   instanceKlassHandle holder (THREAD, method->method_holder());
   int slot = method->method_idnum();
 
@@ -759,6 +771,10 @@
     typeArrayOop an_oop = Annotations::make_java_array(method->annotation_default(), CHECK_NULL);
     java_lang_reflect_Method::set_annotation_default(mh(), an_oop);
   }
+  if (java_lang_reflect_Method::has_type_annotations_field()) {
+    typeArrayOop an_oop = Annotations::make_java_array(method->type_annotations(), CHECK_NULL);
+    java_lang_reflect_Method::set_type_annotations(mh(), an_oop);
+  }
   return mh();
 }
 
@@ -820,7 +836,7 @@
   Handle type = new_type(signature, holder, CHECK_NULL);
   Handle rh  = java_lang_reflect_Field::create(CHECK_NULL);
 
-  java_lang_reflect_Field::set_clazz(rh(), Klass::cast(fd->field_holder())->java_mirror());
+  java_lang_reflect_Field::set_clazz(rh(), fd->field_holder()->java_mirror());
   java_lang_reflect_Field::set_slot(rh(), fd->index());
   java_lang_reflect_Field::set_name(rh(), name());
   java_lang_reflect_Field::set_type(rh(), type());
@@ -837,6 +853,29 @@
     typeArrayOop an_oop = Annotations::make_java_array(fd->annotations(), CHECK_NULL);
     java_lang_reflect_Field::set_annotations(rh(), an_oop);
   }
+  if (java_lang_reflect_Field::has_type_annotations_field()) {
+    typeArrayOop an_oop = Annotations::make_java_array(fd->type_annotations(), CHECK_NULL);
+    java_lang_reflect_Field::set_type_annotations(rh(), an_oop);
+  }
+  return rh();
+}
+
+oop Reflection::new_parameter(Handle method, int index, Symbol* sym,
+                              int flags, TRAPS) {
+  Handle name;
+
+  // A null symbol here translates to the empty string
+  if(NULL != sym) {
+    name = java_lang_String::create_from_symbol(sym, CHECK_NULL);
+  } else {
+    name = java_lang_String::create_from_str("", CHECK_NULL);
+  }
+
+  Handle rh = java_lang_reflect_Parameter::create(CHECK_NULL);
+  java_lang_reflect_Parameter::set_name(rh(), name());
+  java_lang_reflect_Parameter::set_modifiers(rh(), flags);
+  java_lang_reflect_Parameter::set_executable(rh(), method());
+  java_lang_reflect_Parameter::set_index(rh(), index);
   return rh();
 }
 
@@ -888,7 +927,7 @@
       method = reflected_method;
     } else {
       // resolve based on the receiver
-      if (InstanceKlass::cast(reflected_method->method_holder())->is_interface()) {
+      if (reflected_method->method_holder()->is_interface()) {
         // resolve interface call
         if (ReflectionWrapResolutionErrors) {
           // new default: 6531596
@@ -925,7 +964,7 @@
               ResourceMark rm(THREAD);
               Handle h_origexception = Exceptions::new_exception(THREAD,
                      vmSymbols::java_lang_AbstractMethodError(),
-                     Method::name_and_sig_as_C_string(Klass::cast(target_klass()),
+                     Method::name_and_sig_as_C_string(target_klass(),
                      method->name(),
                      method->signature()));
               JavaCallArguments args(h_origexception);
@@ -935,7 +974,7 @@
             } else {
               ResourceMark rm(THREAD);
               THROW_MSG_0(vmSymbols::java_lang_AbstractMethodError(),
-                        Method::name_and_sig_as_C_string(Klass::cast(target_klass()),
+                        Method::name_and_sig_as_C_string(target_klass(),
                                                                 method->name(),
                                                                 method->signature()));
             }
@@ -950,7 +989,7 @@
   if (method.is_null()) {
     ResourceMark rm(THREAD);
     THROW_MSG_0(vmSymbols::java_lang_NoSuchMethodError(),
-                Method::name_and_sig_as_C_string(Klass::cast(klass()),
+                Method::name_and_sig_as_C_string(klass(),
                                                         reflected_method->name(),
                                                         reflected_method->signature()));
   }
--- a/src/share/vm/runtime/reflection.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/reflection.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -118,6 +118,10 @@
   static oop new_constructor(methodHandle method, TRAPS);
   // Create a java.lang.reflect.Field object based on a field descriptor
   static oop new_field(fieldDescriptor* fd, bool intern_name, TRAPS);
+  // Create a java.lang.reflect.Parameter object based on a
+  // MethodParameterElement
+  static oop new_parameter(Handle method, int index, Symbol* sym,
+                           int flags, TRAPS);
 
 private:
   // method resolution for invoke
--- a/src/share/vm/runtime/safepoint.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/safepoint.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -48,6 +48,7 @@
 #include "runtime/stubRoutines.hpp"
 #include "runtime/sweeper.hpp"
 #include "runtime/synchronizer.hpp"
+#include "runtime/thread.inline.hpp"
 #include "services/memTracker.hpp"
 #include "services/runtimeService.hpp"
 #include "utilities/events.hpp"
@@ -71,18 +72,6 @@
 # include "nativeInst_ppc.hpp"
 # include "vmreg_ppc.inline.hpp"
 #endif
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 #ifndef SERIALGC
 #include "gc_implementation/concurrentMarkSweep/concurrentMarkSweepThread.hpp"
 #include "gc_implementation/shared/concurrentGCThread.hpp"
--- a/src/share/vm/runtime/sharedRuntime.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/sharedRuntime.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -31,6 +31,7 @@
 #include "compiler/abstractCompiler.hpp"
 #include "compiler/compileBroker.hpp"
 #include "compiler/compilerOracle.hpp"
+#include "compiler/disassembler.hpp"
 #include "interpreter/interpreter.hpp"
 #include "interpreter/interpreterRuntime.hpp"
 #include "memory/gcLocker.inline.hpp"
@@ -642,7 +643,8 @@
       bool skip_scope_increment = false;
       // exception handler lookup
       KlassHandle ek (THREAD, exception->klass());
-      handler_bci = sd->method()->fast_exception_handler_bci_for(ek, bci, THREAD);
+      methodHandle mh(THREAD, sd->method());
+      handler_bci = Method::fast_exception_handler_bci_for(mh, ek, bci, THREAD);
       if (HAS_PENDING_EXCEPTION) {
         recursive_exception = true;
         // We threw an exception while trying to find the exception handler.
@@ -1775,7 +1777,7 @@
   // The copy_array mechanism is awkward and could be removed, but
   // the compilers don't call this function except as a last resort,
   // so it probably doesn't matter.
-  Klass::cast(src->klass())->copy_array((arrayOopDesc*)src,  src_pos,
+  src->klass()->copy_array((arrayOopDesc*)src,  src_pos,
                                         (arrayOopDesc*)dest, dest_pos,
                                         length, thread);
 }
@@ -1788,8 +1790,8 @@
   vframeStream vfst(thread, true);
   assert(!vfst.at_end(), "Java frame must exist");
   Bytecode_checkcast cc(vfst.method(), vfst.method()->bcp_from(vfst.bci()));
-  Klass* targetKlass = Klass::cast(vfst.method()->constants()->klass_at(
-    cc.index(), thread));
+  Klass* targetKlass = vfst.method()->constants()->klass_at(
+    cc.index(), thread);
   return generate_class_cast_message(objName, targetKlass->external_name());
 }
 
--- a/src/share/vm/runtime/signature.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/signature.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -354,7 +354,7 @@
     return Universe::java_mirror(type());
   Klass* klass = as_klass(class_loader, protection_domain, failure_mode, CHECK_NULL);
   if (klass == NULL)  return NULL;
-  return Klass::cast(klass)->java_mirror();
+  return klass->java_mirror();
 }
 
 Symbol* SignatureStream::as_symbol_or_null() {
--- a/src/share/vm/runtime/stubCodeGenerator.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/stubCodeGenerator.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,25 +23,13 @@
  */
 
 #include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "code/codeCache.hpp"
 #include "compiler/disassembler.hpp"
 #include "oops/oop.inline.hpp"
 #include "prims/forte.hpp"
 #include "runtime/stubCodeGenerator.hpp"
-#ifdef TARGET_ARCH_x86
-# include "assembler_x86.inline.hpp"
-#endif
-#ifdef TARGET_ARCH_sparc
-# include "assembler_sparc.inline.hpp"
-#endif
-#ifdef TARGET_ARCH_zero
-# include "assembler_zero.inline.hpp"
-#endif
-#ifdef TARGET_ARCH_arm
-# include "assembler_arm.inline.hpp"
-#endif
-#ifdef TARGET_ARCH_ppc
-# include "assembler_ppc.inline.hpp"
-#endif
 
 
 // Implementation of StubCodeDesc
--- a/src/share/vm/runtime/stubRoutines.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/stubRoutines.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -120,6 +120,10 @@
 address StubRoutines::_arrayof_jshort_fill;
 address StubRoutines::_arrayof_jint_fill;
 
+address StubRoutines::_aescrypt_encryptBlock               = NULL;
+address StubRoutines::_aescrypt_decryptBlock               = NULL;
+address StubRoutines::_cipherBlockChaining_encryptAESCrypt = NULL;
+address StubRoutines::_cipherBlockChaining_decryptAESCrypt = NULL;
 
 double (* StubRoutines::_intrinsic_log   )(double) = NULL;
 double (* StubRoutines::_intrinsic_log10 )(double) = NULL;
--- a/src/share/vm/runtime/stubRoutines.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/stubRoutines.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -199,6 +199,11 @@
   // zero heap space aligned to jlong (8 bytes)
   static address _zero_aligned_words;
 
+  static address _aescrypt_encryptBlock;
+  static address _aescrypt_decryptBlock;
+  static address _cipherBlockChaining_encryptAESCrypt;
+  static address _cipherBlockChaining_decryptAESCrypt;
+
   // These are versions of the java.lang.Math methods which perform
   // the same operations as the intrinsic version.  They are used for
   // constant folding in the compiler to ensure equivalence.  If the
@@ -330,6 +335,11 @@
   static address arrayof_jshort_fill() { return _arrayof_jshort_fill; }
   static address arrayof_jint_fill()   { return _arrayof_jint_fill; }
 
+  static address aescrypt_encryptBlock()                { return _aescrypt_encryptBlock; }
+  static address aescrypt_decryptBlock()                { return _aescrypt_decryptBlock; }
+  static address cipherBlockChaining_encryptAESCrypt()  { return _cipherBlockChaining_encryptAESCrypt; }
+  static address cipherBlockChaining_decryptAESCrypt()  { return _cipherBlockChaining_decryptAESCrypt; }
+
   static address select_fill_function(BasicType t, bool aligned, const char* &name);
 
   static address zero_aligned_words()   { return _zero_aligned_words; }
--- a/src/share/vm/runtime/synchronizer.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/synchronizer.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,24 +36,21 @@
 #include "runtime/osThread.hpp"
 #include "runtime/stubRoutines.hpp"
 #include "runtime/synchronizer.hpp"
+#include "runtime/thread.inline.hpp"
 #include "utilities/dtrace.hpp"
 #include "utilities/events.hpp"
 #include "utilities/preserveException.hpp"
 #ifdef TARGET_OS_FAMILY_linux
 # include "os_linux.inline.hpp"
-# include "thread_linux.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_solaris
 # include "os_solaris.inline.hpp"
-# include "thread_solaris.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_windows
 # include "os_windows.inline.hpp"
-# include "thread_windows.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_bsd
 # include "os_bsd.inline.hpp"
-# include "thread_bsd.inline.hpp"
 #endif
 
 #if defined(__GNUC__) && !defined(IA64)
@@ -336,7 +333,9 @@
 void ObjectSynchronizer::jni_exit(oop obj, Thread* THREAD) {
   TEVENT (jni_exit) ;
   if (UseBiasedLocking) {
-    BiasedLocking::revoke_and_rebias(obj, false, THREAD);
+    Handle h_obj(THREAD, obj);
+    BiasedLocking::revoke_and_rebias(h_obj, false, THREAD);
+    obj = h_obj();
   }
   assert(!obj->mark()->has_bias_pattern(), "biases should be revoked by now");
 
@@ -1323,7 +1322,7 @@
               ResourceMark rm;
               tty->print_cr("Inflating object " INTPTR_FORMAT " , mark " INTPTR_FORMAT " , type %s",
                 (intptr_t) object, (intptr_t) object->mark(),
-                Klass::cast(object->klass())->external_name());
+                object->klass()->external_name());
             }
           }
           return m ;
@@ -1373,7 +1372,7 @@
           ResourceMark rm;
           tty->print_cr("Inflating object " INTPTR_FORMAT " , mark " INTPTR_FORMAT " , type %s",
             (intptr_t) object, (intptr_t) object->mark(),
-            Klass::cast(object->klass())->external_name());
+            object->klass()->external_name());
         }
       }
       return m ;
@@ -1440,7 +1439,7 @@
        if (obj->is_instance()) {
          ResourceMark rm;
            tty->print_cr("Deflating object " INTPTR_FORMAT " , mark " INTPTR_FORMAT " , type %s",
-                (intptr_t) obj, (intptr_t) obj->mark(), Klass::cast(obj->klass())->external_name());
+                (intptr_t) obj, (intptr_t) obj->mark(), obj->klass()->external_name());
        }
      }
 
--- a/src/share/vm/runtime/task.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/task.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -26,22 +26,19 @@
 #include "memory/allocation.hpp"
 #include "runtime/init.hpp"
 #include "runtime/task.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/timer.hpp"
 #ifdef TARGET_OS_FAMILY_linux
 # include "os_linux.inline.hpp"
-# include "thread_linux.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_solaris
 # include "os_solaris.inline.hpp"
-# include "thread_solaris.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_windows
 # include "os_windows.inline.hpp"
-# include "thread_windows.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_bsd
 # include "os_bsd.inline.hpp"
-# include "thread_bsd.inline.hpp"
 #endif
 
 int PeriodicTask::_num_tasks = 0;
@@ -61,7 +58,7 @@
 }
 #endif
 
-void PeriodicTask::real_time_tick(size_t delay_time) {
+void PeriodicTask::real_time_tick(int delay_time) {
 #ifndef PRODUCT
   if (ProfilerCheckIntervals) {
     _ticks++;
@@ -73,19 +70,39 @@
     _intervalHistogram[ms]++;
   }
 #endif
-  int orig_num_tasks = _num_tasks;
-  for(int index = 0; index < _num_tasks; index++) {
-    _tasks[index]->execute_if_pending(delay_time);
-    if (_num_tasks < orig_num_tasks) { // task dis-enrolled itself
-      index--;  // re-do current slot as it has changed
-      orig_num_tasks = _num_tasks;
+
+  {
+    MutexLockerEx ml(PeriodicTask_lock, Mutex::_no_safepoint_check_flag);
+    int orig_num_tasks = _num_tasks;
+
+    for(int index = 0; index < _num_tasks; index++) {
+      _tasks[index]->execute_if_pending(delay_time);
+      if (_num_tasks < orig_num_tasks) { // task dis-enrolled itself
+        index--;  // re-do current slot as it has changed
+        orig_num_tasks = _num_tasks;
+      }
     }
   }
 }
 
+int PeriodicTask::time_to_wait() {
+  MutexLockerEx ml(PeriodicTask_lock->owned_by_self() ?
+                     NULL : PeriodicTask_lock, Mutex::_no_safepoint_check_flag);
+
+  if (_num_tasks == 0) {
+    return 0; // sleep until shutdown or a task is enrolled
+  }
+
+  int delay = _tasks[0]->time_to_next_interval();
+  for (int index = 1; index < _num_tasks; index++) {
+    delay = MIN2(delay, _tasks[index]->time_to_next_interval());
+  }
+  return delay;
+}
+
 
 PeriodicTask::PeriodicTask(size_t interval_time) :
-  _counter(0), _interval(interval_time) {
+  _counter(0), _interval((int) interval_time) {
   // Sanity check the interval time
   assert(_interval >= PeriodicTask::min_interval &&
          _interval <= PeriodicTask::max_interval &&
@@ -94,33 +111,40 @@
 }
 
 PeriodicTask::~PeriodicTask() {
-  if (is_enrolled())
-    disenroll();
-}
-
-bool PeriodicTask::is_enrolled() const {
-  for(int index = 0; index < _num_tasks; index++)
-    if (_tasks[index] == this) return true;
-  return false;
+  disenroll();
 }
 
 void PeriodicTask::enroll() {
-  assert(WatcherThread::watcher_thread() == NULL, "dynamic enrollment of tasks not yet supported");
+  MutexLockerEx ml(PeriodicTask_lock->owned_by_self() ?
+                     NULL : PeriodicTask_lock, Mutex::_no_safepoint_check_flag);
 
-  if (_num_tasks == PeriodicTask::max_tasks)
+  if (_num_tasks == PeriodicTask::max_tasks) {
     fatal("Overflow in PeriodicTask table");
+  }
   _tasks[_num_tasks++] = this;
+
+  WatcherThread* thread = WatcherThread::watcher_thread();
+  if (thread) {
+    thread->unpark();
+  } else {
+    WatcherThread::start();
+  }
 }
 
 void PeriodicTask::disenroll() {
-  assert(WatcherThread::watcher_thread() == NULL ||
-         Thread::current() == WatcherThread::watcher_thread(),
-         "dynamic disenrollment currently only handled from WatcherThread from within task() method");
+  MutexLockerEx ml(PeriodicTask_lock->owned_by_self() ?
+                     NULL : PeriodicTask_lock, Mutex::_no_safepoint_check_flag);
 
   int index;
-  for(index = 0; index < _num_tasks && _tasks[index] != this; index++);
-  if (index == _num_tasks) return;
+  for(index = 0; index < _num_tasks && _tasks[index] != this; index++)
+    ;
+
+  if (index == _num_tasks) {
+    return;
+  }
+
   _num_tasks--;
+
   for (; index < _num_tasks; index++) {
     _tasks[index] = _tasks[index+1];
   }
--- a/src/share/vm/runtime/task.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/task.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -49,12 +49,12 @@
   static int num_tasks()   { return _num_tasks; }
 
  private:
-  size_t _counter;
-  const size_t _interval;
+  int _counter;
+  const int _interval;
 
   static int _num_tasks;
   static PeriodicTask* _tasks[PeriodicTask::max_tasks];
-  static void real_time_tick(size_t delay_time);
+  static void real_time_tick(int delay_time);
 
 #ifndef PRODUCT
   static elapsedTimer _timer;                      // measures time between ticks
@@ -69,51 +69,36 @@
   PeriodicTask(size_t interval_time); // interval is in milliseconds of elapsed time
   ~PeriodicTask();
 
-  // Tells whether is enrolled
-  bool is_enrolled() const;
-
   // Make the task active
-  // NOTE: this may only be called before the WatcherThread has been started
+  // For dynamic enrollment at the time T, the task will execute somewhere
+  // between T and T + interval_time.
   void enroll();
 
   // Make the task deactive
-  // NOTE: this may only be called either while the WatcherThread is
-  // inactive or by a task from within its task() method. One-shot or
-  // several-shot tasks may be implemented this way.
   void disenroll();
 
-  void execute_if_pending(size_t delay_time) {
-    _counter += delay_time;
-    if (_counter >= _interval) {
+  void execute_if_pending(int delay_time) {
+    // make sure we don't overflow
+    jlong tmp = (jlong) _counter + (jlong) delay_time;
+
+    if (tmp >= (jlong) _interval) {
       _counter = 0;
       task();
+    } else {
+      _counter += delay_time;
     }
   }
 
   // Returns how long (time in milliseconds) before the next time we should
   // execute this task.
-  size_t time_to_next_interval() const {
+  int time_to_next_interval() const {
     assert(_interval > _counter,  "task counter greater than interval?");
     return _interval - _counter;
   }
 
   // Calculate when the next periodic task will fire.
   // Called by the WatcherThread's run method.
-  // This assumes that periodic tasks aren't entering the system
-  // dynamically, except for during startup.
-  static size_t time_to_wait() {
-    if (_num_tasks == 0) {
-      // Don't wait any more; shut down the thread since we don't
-      // currently support dynamic enrollment.
-      return 0;
-    }
-
-    size_t delay = _tasks[0]->time_to_next_interval();
-    for (int index = 1; index < _num_tasks; index++) {
-      delay = MIN2(delay, _tasks[index]->time_to_next_interval());
-    }
-    return delay;
-  }
+  static int time_to_wait();
 
   // The task to perform at each period
   virtual void task() = 0;
--- a/src/share/vm/runtime/thread.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/thread.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -65,6 +65,7 @@
 #include "runtime/statSampler.hpp"
 #include "runtime/stubRoutines.hpp"
 #include "runtime/task.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/threadCritical.hpp"
 #include "runtime/threadLocalStorage.hpp"
 #include "runtime/vframe.hpp"
@@ -83,19 +84,15 @@
 #include "utilities/preserveException.hpp"
 #ifdef TARGET_OS_FAMILY_linux
 # include "os_linux.inline.hpp"
-# include "thread_linux.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_solaris
 # include "os_solaris.inline.hpp"
-# include "thread_solaris.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_windows
 # include "os_windows.inline.hpp"
-# include "thread_windows.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_bsd
 # include "os_bsd.inline.hpp"
-# include "thread_bsd.inline.hpp"
 #endif
 #ifndef SERIALGC
 #include "gc_implementation/concurrentMarkSweep/concurrentMarkSweepThread.hpp"
@@ -323,12 +320,10 @@
   os::initialize_thread(this);
 
 #if INCLUDE_NMT
-   // record thread's native stack, stack grows downward
-  if (MemTracker::is_on()) {
-    address stack_low_addr = stack_base() - stack_size();
-    MemTracker::record_thread_stack(stack_low_addr, stack_size(), this,
+  // record thread's native stack, stack grows downward
+  address stack_low_addr = stack_base() - stack_size();
+  MemTracker::record_thread_stack(stack_low_addr, stack_size(), this,
       CURRENT_PC);
-  }
 #endif // INCLUDE_NMT
 }
 
@@ -345,6 +340,9 @@
   if (_stack_base != NULL) {
     address low_stack_addr = stack_base() - stack_size();
     MemTracker::release_thread_stack(low_stack_addr, stack_size(), this);
+#ifdef ASSERT
+    set_stack_base(NULL);
+#endif
   }
 #endif // INCLUDE_NMT
 
@@ -825,7 +823,7 @@
   return false;
 }
 
-void Thread::oops_do(OopClosure* f, CodeBlobClosure* cf) {
+void Thread::oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf) {
   active_handles()->oops_do(f);
   // Do oop for ThreadShadow
   f->do_oop((oop*)&_pending_exception);
@@ -1041,6 +1039,7 @@
 }
 
 char java_runtime_name[128] = "";
+char java_runtime_version[128] = "";
 
 // extract the JRE name from sun.misc.Version.java_runtime_name
 static const char* get_java_runtime_name(TRAPS) {
@@ -1063,6 +1062,27 @@
   }
 }
 
+// extract the JRE version from sun.misc.Version.java_runtime_version
+static const char* get_java_runtime_version(TRAPS) {
+  Klass* k = SystemDictionary::find(vmSymbols::sun_misc_Version(),
+                                      Handle(), Handle(), CHECK_AND_CLEAR_NULL);
+  fieldDescriptor fd;
+  bool found = k != NULL &&
+               InstanceKlass::cast(k)->find_local_field(vmSymbols::java_runtime_version_name(),
+                                                        vmSymbols::string_signature(), &fd);
+  if (found) {
+    oop name_oop = k->java_mirror()->obj_field(fd.offset());
+    if (name_oop == NULL)
+      return NULL;
+    const char* name = java_lang_String::as_utf8_string(name_oop,
+                                                        java_runtime_version,
+                                                        sizeof(java_runtime_version));
+    return name;
+  } else {
+    return NULL;
+  }
+}
+
 // General purpose hook into Java code, run once when the VM is initialized.
 // The Java library method itself may be changed independently from the VM.
 static void call_postVMInitHook(TRAPS) {
@@ -1194,6 +1214,7 @@
 // timer interrupts exists on the platform.
 
 WatcherThread* WatcherThread::_watcher_thread   = NULL;
+bool WatcherThread::_startable = false;
 volatile bool  WatcherThread::_should_terminate = false;
 
 WatcherThread::WatcherThread() : Thread() {
@@ -1214,6 +1235,55 @@
   }
 }
 
+int WatcherThread::sleep() const {
+  MutexLockerEx ml(PeriodicTask_lock, Mutex::_no_safepoint_check_flag);
+
+  // remaining will be zero if there are no tasks,
+  // causing the WatcherThread to sleep until a task is
+  // enrolled
+  int remaining = PeriodicTask::time_to_wait();
+  int time_slept = 0;
+
+  // we expect this to timeout - we only ever get unparked when
+  // we should terminate or when a new task has been enrolled
+  OSThreadWaitState osts(this->osthread(), false /* not Object.wait() */);
+
+  jlong time_before_loop = os::javaTimeNanos();
+
+  for (;;) {
+    bool timedout = PeriodicTask_lock->wait(Mutex::_no_safepoint_check_flag, remaining);
+    jlong now = os::javaTimeNanos();
+
+    if (remaining == 0) {
+        // if we didn't have any tasks we could have waited for a long time
+        // consider the time_slept zero and reset time_before_loop
+        time_slept = 0;
+        time_before_loop = now;
+    } else {
+        // need to recalulate since we might have new tasks in _tasks
+        time_slept = (int) ((now - time_before_loop) / 1000000);
+    }
+
+    // Change to task list or spurious wakeup of some kind
+    if (timedout || _should_terminate) {
+        break;
+    }
+
+    remaining = PeriodicTask::time_to_wait();
+    if (remaining == 0) {
+        // Last task was just disenrolled so loop around and wait until
+        // another task gets enrolled
+        continue;
+    }
+
+    remaining -= time_slept;
+    if (remaining <= 0)
+      break;
+  }
+
+  return time_slept;
+}
+
 void WatcherThread::run() {
   assert(this == watcher_thread(), "just checking");
 
@@ -1226,26 +1296,7 @@
 
     // Calculate how long it'll be until the next PeriodicTask work
     // should be done, and sleep that amount of time.
-    size_t time_to_wait = PeriodicTask::time_to_wait();
-
-    // we expect this to timeout - we only ever get unparked when
-    // we should terminate
-    {
-      OSThreadWaitState osts(this->osthread(), false /* not Object.wait() */);
-
-      jlong prev_time = os::javaTimeNanos();
-      for (;;) {
-        int res= _SleepEvent->park(time_to_wait);
-        if (res == OS_TIMEOUT || _should_terminate)
-          break;
-        // spurious wakeup of some kind
-        jlong now = os::javaTimeNanos();
-        time_to_wait -= (now - prev_time) / 1000000;
-        if (time_to_wait <= 0)
-          break;
-        prev_time = now;
-      }
-    }
+    int time_waited = sleep();
 
     if (is_error_reported()) {
       // A fatal error has happened, the error handler(VMError::report_and_die)
@@ -1275,13 +1326,7 @@
       }
     }
 
-    PeriodicTask::real_time_tick(time_to_wait);
-
-    // If we have no more tasks left due to dynamic disenrollment,
-    // shut down the thread since we don't currently support dynamic enrollment
-    if (PeriodicTask::num_tasks() == 0) {
-      _should_terminate = true;
-    }
+    PeriodicTask::real_time_tick(time_waited);
   }
 
   // Signal that it is terminated
@@ -1296,22 +1341,33 @@
 }
 
 void WatcherThread::start() {
-  if (watcher_thread() == NULL) {
+  assert(PeriodicTask_lock->owned_by_self(), "PeriodicTask_lock required");
+
+  if (watcher_thread() == NULL && _startable) {
     _should_terminate = false;
     // Create the single instance of WatcherThread
     new WatcherThread();
   }
 }
 
+void WatcherThread::make_startable() {
+  assert(PeriodicTask_lock->owned_by_self(), "PeriodicTask_lock required");
+  _startable = true;
+}
+
 void WatcherThread::stop() {
+  {
+    MutexLockerEx ml(PeriodicTask_lock, Mutex::_no_safepoint_check_flag);
+    _should_terminate = true;
+    OrderAccess::fence();  // ensure WatcherThread sees update in main loop
+
+    WatcherThread* watcher = watcher_thread();
+    if (watcher != NULL)
+      watcher->unpark();
+  }
+
   // it is ok to take late safepoints here, if needed
   MutexLocker mu(Terminator_lock);
-  _should_terminate = true;
-  OrderAccess::fence();  // ensure WatcherThread sees update in main loop
-
-  Thread* watcher = watcher_thread();
-  if (watcher != NULL)
-    watcher->_SleepEvent->unpark();
 
   while(watcher_thread() != NULL) {
     // This wait should make safepoint checks, wait without a timeout,
@@ -1329,6 +1385,11 @@
   }
 }
 
+void WatcherThread::unpark() {
+  MutexLockerEx ml(PeriodicTask_lock->owned_by_self() ? NULL : PeriodicTask_lock, Mutex::_no_safepoint_check_flag);
+  PeriodicTask_lock->notify();
+}
+
 void WatcherThread::print_on(outputStream* st) const {
   st->print("\"%s\" ", name());
   Thread::print_on(st);
@@ -1521,10 +1582,12 @@
       tty->print_cr("terminate thread %p", this);
   }
 
-  // Info NMT that this JavaThread is exiting, its memory
-  // recorder should be collected
+  // By now, this thread should already be invisible to safepoint,
+  // and its per-thread recorder also collected.
   assert(!is_safepoint_visible(), "wrong state");
-  MemTracker::thread_exiting(this);
+#if INCLUDE_NMT
+  assert(get_recorder() == NULL, "Already collected");
+#endif // INCLUDE_NMT
 
   // JSR166 -- return the parker to the free list
   Parker::Release(_parker);
@@ -1653,7 +1716,6 @@
 // cleanup_failed_attach_current_thread as well.
 void JavaThread::exit(bool destroy_vm, ExitType exit_type) {
   assert(this == JavaThread::current(),  "thread consistency check");
-  if (!InitializeJavaLangSystem) return;
 
   HandleMark hm(this);
   Handle uncaught_exception(this, this->pending_exception());
@@ -1719,7 +1781,7 @@
           jio_fprintf(defaultStream::error_stream(),
                 "\nException: %s thrown from the UncaughtExceptionHandler"
                 " in thread \"%s\"\n",
-                Klass::cast(pending_exception()->klass())->external_name(),
+                pending_exception()->klass()->external_name(),
                 get_thread_name());
           CLEAR_PENDING_EXCEPTION;
         }
@@ -2127,7 +2189,7 @@
           // BiasedLocking needs an updated RegisterMap for the revoke monitors pass
           RegisterMap reg_map(this, UseBiasedLocking);
           frame compiled_frame = f.sender(&reg_map);
-          if (compiled_frame.can_be_deoptimized()) {
+          if (!StressCompiledExceptionHandlers && compiled_frame.can_be_deoptimized()) {
             Deoptimization::deoptimize(this, compiled_frame, &reg_map);
           }
         }
@@ -2425,6 +2487,7 @@
 }
 
 void JavaThread::remove_stack_guard_pages() {
+  assert(Thread::current() == this, "from different thread");
   if (_stack_guard_state == stack_guard_unused) return;
   address low_addr = stack_base() - stack_size();
   size_t len = (StackYellowPages + StackRedPages) * os::vm_page_size();
@@ -2638,7 +2701,7 @@
   }
 };
 
-void JavaThread::oops_do(OopClosure* f, CodeBlobClosure* cf) {
+void JavaThread::oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf) {
   // Verify that the deferred card marks have been flushed.
   assert(deferred_card_mark().is_empty(), "Should be empty during GC");
 
@@ -2646,7 +2709,7 @@
   // since there may be more than one thread using each ThreadProfiler.
 
   // Traverse the GCHandles
-  Thread::oops_do(f, cf);
+  Thread::oops_do(f, cld_f, cf);
 
   assert( (!has_last_Java_frame() && java_call_counter() == 0) ||
           (has_last_Java_frame() && java_call_counter() > 0), "wrong java_sp info!");
@@ -2674,7 +2737,7 @@
 
     // Traverse the execution stack
     for(StackFrameStream fst(this); !fst.is_done(); fst.next()) {
-      fst.current()->oops_do(f, cf, fst.register_map());
+      fst.current()->oops_do(f, cld_f, cf, fst.register_map());
     }
   }
 
@@ -2808,7 +2871,7 @@
 
 void JavaThread::verify() {
   // Verify oops in the thread.
-  oops_do(&VerifyOopClosure::verify_oop, NULL);
+  oops_do(&VerifyOopClosure::verify_oop, NULL, NULL);
 
   // Verify the stack frames.
   frames_do(frame_verify);
@@ -3058,7 +3121,7 @@
 static void oops_print(frame* f, const RegisterMap *map) {
   PrintAndVerifyOopClosure print;
   f->print_value();
-  f->oops_do(&print, NULL, (RegisterMap*)map);
+  f->oops_do(&print, NULL, NULL, (RegisterMap*)map);
 }
 
 // Print our all the locations that contain oops and whether they are
@@ -3160,8 +3223,8 @@
 #endif
 }
 
-void CompilerThread::oops_do(OopClosure* f, CodeBlobClosure* cf) {
-  JavaThread::oops_do(f, cf);
+void CompilerThread::oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf) {
+  JavaThread::oops_do(f, cld_f, cf);
   if (_scanned_nmethod != NULL && cf != NULL) {
     // Safepoints can occur when the sweeper is scanning an nmethod so
     // process it here to make sure it isn't unloaded in the middle of
@@ -3267,6 +3330,9 @@
   jint os_init_2_result = os::init_2();
   if (os_init_2_result != JNI_OK) return os_init_2_result;
 
+  jint adjust_after_os_result = Arguments::adjust_after_os();
+  if (adjust_after_os_result != JNI_OK) return adjust_after_os_result;
+
   // intialize TLS
   ThreadLocalStorage::init();
 
@@ -3402,11 +3468,7 @@
       create_vm_init_libraries();
     }
 
-    if (InitializeJavaLangString) {
-      initialize_class(vmSymbols::java_lang_String(), CHECK_0);
-    } else {
-      warning("java.lang.String not initialized");
-    }
+    initialize_class(vmSymbols::java_lang_String(), CHECK_0);
 
     if (AggressiveOpts) {
       {
@@ -3447,51 +3509,39 @@
     }
 
     // Initialize java_lang.System (needed before creating the thread)
-    if (InitializeJavaLangSystem) {
-      initialize_class(vmSymbols::java_lang_System(), CHECK_0);
-      initialize_class(vmSymbols::java_lang_ThreadGroup(), CHECK_0);
-      Handle thread_group = create_initial_thread_group(CHECK_0);
-      Universe::set_main_thread_group(thread_group());
-      initialize_class(vmSymbols::java_lang_Thread(), CHECK_0);
-      oop thread_object = create_initial_thread(thread_group, main_thread, CHECK_0);
-      main_thread->set_threadObj(thread_object);
-      // Set thread status to running since main thread has
-      // been started and running.
-      java_lang_Thread::set_thread_status(thread_object,
-                                          java_lang_Thread::RUNNABLE);
-
-      // The VM preresolve methods to these classes. Make sure that get initialized
-      initialize_class(vmSymbols::java_lang_reflect_Method(), CHECK_0);
-      initialize_class(vmSymbols::java_lang_ref_Finalizer(),  CHECK_0);
-      // The VM creates & returns objects of this class. Make sure it's initialized.
-      initialize_class(vmSymbols::java_lang_Class(), CHECK_0);
-      call_initializeSystemClass(CHECK_0);
-
-      // get the Java runtime name after java.lang.System is initialized
-      JDK_Version::set_runtime_name(get_java_runtime_name(THREAD));
-    } else {
-      warning("java.lang.System not initialized");
-    }
+    initialize_class(vmSymbols::java_lang_System(), CHECK_0);
+    initialize_class(vmSymbols::java_lang_ThreadGroup(), CHECK_0);
+    Handle thread_group = create_initial_thread_group(CHECK_0);
+    Universe::set_main_thread_group(thread_group());
+    initialize_class(vmSymbols::java_lang_Thread(), CHECK_0);
+    oop thread_object = create_initial_thread(thread_group, main_thread, CHECK_0);
+    main_thread->set_threadObj(thread_object);
+    // Set thread status to running since main thread has
+    // been started and running.
+    java_lang_Thread::set_thread_status(thread_object,
+                                        java_lang_Thread::RUNNABLE);
+
+    // The VM creates & returns objects of this class. Make sure it's initialized.
+    initialize_class(vmSymbols::java_lang_Class(), CHECK_0);
+
+    // The VM preresolves methods to these classes. Make sure that they get initialized
+    initialize_class(vmSymbols::java_lang_reflect_Method(), CHECK_0);
+    initialize_class(vmSymbols::java_lang_ref_Finalizer(),  CHECK_0);
+    call_initializeSystemClass(CHECK_0);
+
+    // get the Java runtime name after java.lang.System is initialized
+    JDK_Version::set_runtime_name(get_java_runtime_name(THREAD));
+    JDK_Version::set_runtime_version(get_java_runtime_version(THREAD));
 
     // an instance of OutOfMemory exception has been allocated earlier
-    if (InitializeJavaLangExceptionsErrors) {
-      initialize_class(vmSymbols::java_lang_OutOfMemoryError(), CHECK_0);
-      initialize_class(vmSymbols::java_lang_NullPointerException(), CHECK_0);
-      initialize_class(vmSymbols::java_lang_ClassCastException(), CHECK_0);
-      initialize_class(vmSymbols::java_lang_ArrayStoreException(), CHECK_0);
-      initialize_class(vmSymbols::java_lang_ArithmeticException(), CHECK_0);
-      initialize_class(vmSymbols::java_lang_StackOverflowError(), CHECK_0);
-      initialize_class(vmSymbols::java_lang_IllegalMonitorStateException(), CHECK_0);
-      initialize_class(vmSymbols::java_lang_IllegalArgumentException(), CHECK_0);
-    } else {
-      warning("java.lang.OutOfMemoryError has not been initialized");
-      warning("java.lang.NullPointerException has not been initialized");
-      warning("java.lang.ClassCastException has not been initialized");
-      warning("java.lang.ArrayStoreException has not been initialized");
-      warning("java.lang.ArithmeticException has not been initialized");
-      warning("java.lang.StackOverflowError has not been initialized");
-      warning("java.lang.IllegalArgumentException has not been initialized");
-    }
+    initialize_class(vmSymbols::java_lang_OutOfMemoryError(), CHECK_0);
+    initialize_class(vmSymbols::java_lang_NullPointerException(), CHECK_0);
+    initialize_class(vmSymbols::java_lang_ClassCastException(), CHECK_0);
+    initialize_class(vmSymbols::java_lang_ArrayStoreException(), CHECK_0);
+    initialize_class(vmSymbols::java_lang_ArithmeticException(), CHECK_0);
+    initialize_class(vmSymbols::java_lang_StackOverflowError(), CHECK_0);
+    initialize_class(vmSymbols::java_lang_IllegalMonitorStateException(), CHECK_0);
+    initialize_class(vmSymbols::java_lang_IllegalArgumentException(), CHECK_0);
   }
 
   // See        : bugid 4211085.
@@ -3599,7 +3649,7 @@
   }
 
   // initialize compiler(s)
-#if defined(COMPILER1) || defined(COMPILER2)
+#if defined(COMPILER1) || defined(COMPILER2) || defined(SHARK)
   CompileBroker::compilation_init();
 #endif
 
@@ -3631,12 +3681,18 @@
     }
   }
 
-  // Start up the WatcherThread if there are any periodic tasks
-  // NOTE:  All PeriodicTasks should be registered by now. If they
-  //   aren't, late joiners might appear to start slowly (we might
-  //   take a while to process their first tick).
-  if (PeriodicTask::num_tasks() > 0) {
-    WatcherThread::start();
+  {
+      MutexLockerEx ml(PeriodicTask_lock, Mutex::_no_safepoint_check_flag);
+      // Make sure the watcher thread can be started by WatcherThread::start()
+      // or by dynamic enrollment.
+      WatcherThread::make_startable();
+      // Start up the WatcherThread if there are any periodic tasks
+      // NOTE:  All PeriodicTasks should be registered by now. If they
+      //   aren't, late joiners might appear to start slowly (we might
+      //   take a while to process their first tick).
+      if (PeriodicTask::num_tasks() > 0) {
+          WatcherThread::start();
+      }
   }
 
   // Give os specific code one last chance to start
@@ -3679,8 +3735,10 @@
       }
     } else {
       // Try to load the agent from the standard dll directory
-      os::dll_build_name(buffer, sizeof(buffer), Arguments::get_dll_dir(), name);
-      library = os::dll_load(buffer, ebuf, sizeof ebuf);
+      if (os::dll_build_name(buffer, sizeof(buffer), Arguments::get_dll_dir(),
+                             name)) {
+        library = os::dll_load(buffer, ebuf, sizeof ebuf);
+      }
 #ifdef KERNEL
       // Download instrument dll
       if (library == NULL && strcmp(name, "instrument") == 0) {
@@ -3705,8 +3763,9 @@
 #endif // KERNEL
       if (library == NULL) { // Try the local directory
         char ns[1] = {0};
-        os::dll_build_name(buffer, sizeof(buffer), ns, name);
-        library = os::dll_load(buffer, ebuf, sizeof ebuf);
+        if (os::dll_build_name(buffer, sizeof(buffer), ns, name)) {
+          library = os::dll_load(buffer, ebuf, sizeof ebuf);
+        }
         if (library == NULL) {
           const char *sub_msg = " on the library path, with error: ";
           size_t len = strlen(msg) + strlen(name) + strlen(sub_msg) + strlen(ebuf) + 1;
@@ -3933,10 +3992,6 @@
                          Mutex::_as_suspend_equivalent_flag);
   }
 
-  // Shutdown NMT before exit. Otherwise,
-  // it will run into trouble when system destroys static variables.
-  MemTracker::shutdown(MemTracker::NMT_normal);
-
   // Hang forever on exit if we are reporting an error.
   if (ShowMessageBoxOnError && is_error_reported()) {
     os::infinite_sleep();
@@ -4093,7 +4148,10 @@
 
     // Now, this thread is not visible to safepoint
     p->set_safepoint_visible(false);
-
+    // once the thread becomes safepoint invisible, we can not use its per-thread
+    // recorder. And Threads::do_threads() no longer walks this thread, so we have
+    // to release its per-thread recorder here.
+    MemTracker::thread_exiting(p);
   } // unlock Threads_lock
 
   // Since Events::log uses a lock, we grab it outside the Threads_lock
@@ -4118,14 +4176,14 @@
 // uses the Threads_lock to gurantee this property. It also makes sure that
 // all threads gets blocked when exiting or starting).
 
-void Threads::oops_do(OopClosure* f, CodeBlobClosure* cf) {
+void Threads::oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf) {
   ALL_JAVA_THREADS(p) {
-    p->oops_do(f, cf);
+    p->oops_do(f, cld_f, cf);
   }
-  VMThread::vm_thread()->oops_do(f, cf);
+  VMThread::vm_thread()->oops_do(f, cld_f, cf);
 }
 
-void Threads::possibly_parallel_oops_do(OopClosure* f, CodeBlobClosure* cf) {
+void Threads::possibly_parallel_oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf) {
   // Introduce a mechanism allowing parallel threads to claim threads as
   // root groups.  Overhead should be small enough to use all the time,
   // even in sequential code.
@@ -4142,12 +4200,12 @@
   int cp = SharedHeap::heap()->strong_roots_parity();
   ALL_JAVA_THREADS(p) {
     if (p->claim_oops_do(is_par, cp)) {
-      p->oops_do(f, cf);
+      p->oops_do(f, cld_f, cf);
     }
   }
   VMThread* vmt = VMThread::vm_thread();
   if (vmt->claim_oops_do(is_par, cp)) {
-    vmt->oops_do(f, cf);
+    vmt->oops_do(f, cld_f, cf);
   }
 }
 
--- a/src/share/vm/runtime/thread.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/thread.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -480,8 +480,10 @@
 
   // GC support
   // Apply "f->do_oop" to all root oops in "this".
+  // Apply "cld_f->do_cld" to CLDs that are otherwise not kept alive.
+  //   Used by JavaThread::oops_do.
   // Apply "cf->do_code_blob" (if !NULL) to all code blobs active in frames
-  virtual void oops_do(OopClosure* f, CodeBlobClosure* cf);
+  virtual void oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf);
 
   // Handles the parallel case for the method below.
 private:
@@ -722,6 +724,7 @@
  private:
   static WatcherThread* _watcher_thread;
 
+  static bool _startable;
   volatile static bool _should_terminate; // updated without holding lock
  public:
   enum SomeConstants {
@@ -738,6 +741,7 @@
   char* name() const { return (char*)"VM Periodic Task Thread"; }
   void print_on(outputStream* st) const;
   void print() const { print_on(tty); }
+  void unpark();
 
   // Returns the single instance of WatcherThread
   static WatcherThread* watcher_thread()         { return _watcher_thread; }
@@ -745,6 +749,12 @@
   // Create and start the single instance of WatcherThread, or stop it on shutdown
   static void start();
   static void stop();
+  // Only allow start once the VM is sufficiently initialized
+  // Otherwise the first task to enroll will trigger the start
+  static void make_startable();
+
+ private:
+  int sleep() const;
 };
 
 
@@ -1397,7 +1407,7 @@
   void frames_do(void f(frame*, const RegisterMap*));
 
   // Memory operations
-  void oops_do(OopClosure* f, CodeBlobClosure* cf);
+  void oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf);
 
   // Sweeper operations
   void nmethods_do(CodeBlobClosure* cf);
@@ -1817,7 +1827,7 @@
   // GC support
   // Apply "f->do_oop" to all root oops in "this".
   // Apply "cf->do_code_blob" (if !NULL) to all code blobs active in frames
-  void oops_do(OopClosure* f, CodeBlobClosure* cf);
+  void oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf);
 
 #ifndef PRODUCT
 private:
@@ -1884,9 +1894,9 @@
 
   // Apply "f->do_oop" to all root oops in all threads.
   // This version may only be called by sequential code.
-  static void oops_do(OopClosure* f, CodeBlobClosure* cf);
+  static void oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf);
   // This version may be called by sequential or parallel code.
-  static void possibly_parallel_oops_do(OopClosure* f, CodeBlobClosure* cf);
+  static void possibly_parallel_oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf);
   // This creates a list of GCTasks, one per thread.
   static void create_thread_roots_tasks(GCTaskQueue* q);
   // This creates a list of GCTasks, one per thread, for marking objects.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/runtime/thread.inline.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_RUNTIME_THREAD_INLINE_HPP
+#define SHARE_VM_RUNTIME_THREAD_INLINE_HPP
+
+#define SHARE_VM_RUNTIME_THREAD_INLINE_HPP_SCOPE
+
+#include "runtime/thread.hpp"
+#ifdef TARGET_OS_FAMILY_linux
+# include "thread_linux.inline.hpp"
+#endif
+#ifdef TARGET_OS_FAMILY_solaris
+# include "thread_solaris.inline.hpp"
+#endif
+#ifdef TARGET_OS_FAMILY_windows
+# include "thread_windows.inline.hpp"
+#endif
+#ifdef TARGET_OS_FAMILY_bsd
+# include "thread_bsd.inline.hpp"
+#endif
+
+#undef SHARE_VM_RUNTIME_THREAD_INLINE_HPP_SCOPE
+
+#endif // SHARE_VM_RUNTIME_THREAD_INLINE_HPP
--- a/src/share/vm/runtime/threadLocalStorage.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/threadLocalStorage.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,22 +23,19 @@
  */
 
 #include "precompiled.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/threadLocalStorage.hpp"
 #ifdef TARGET_OS_FAMILY_linux
 # include "os_linux.inline.hpp"
-# include "thread_linux.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_solaris
 # include "os_solaris.inline.hpp"
-# include "thread_solaris.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_windows
 # include "os_windows.inline.hpp"
-# include "thread_windows.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_bsd
 # include "os_bsd.inline.hpp"
-# include "thread_bsd.inline.hpp"
 #endif
 
 // static member initialization
--- a/src/share/vm/runtime/timer.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/timer.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -210,8 +210,9 @@
     } else {
       _logfile->print("[Error in TraceCPUTime]");
     }
-     if (_print_cr) {
+    if (_print_cr) {
       _logfile->print_cr("");
     }
+    _logfile->flush();
   }
 }
--- a/src/share/vm/runtime/vframe.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/vframe.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -149,7 +149,7 @@
       Klass* target_klass = java_lang_Class::as_Klass(obj());
       st->print_cr("(a java.lang.Class for %s)", InstanceKlass::cast(target_klass)->external_name());
     } else {
-      Klass* k = Klass::cast(obj->klass());
+      Klass* k = obj->klass();
       st->print_cr("(a %s)", k->external_name());
     }
   }
@@ -161,7 +161,7 @@
   // If this is the first frame, and java.lang.Object.wait(...) then print out the receiver.
   if (frame_count == 0) {
     if (method()->name() == vmSymbols::wait_name() &&
-        InstanceKlass::cast(method()->method_holder())->name() == vmSymbols::java_lang_Object()) {
+        method()->method_holder()->name() == vmSymbols::java_lang_Object()) {
       StackValueCollection* locs = locals();
       if (!locs->is_empty()) {
         StackValue* sv = locs->at(0);
@@ -172,7 +172,7 @@
       }
     } else if (thread()->current_park_blocker() != NULL) {
       oop obj = thread()->current_park_blocker();
-      Klass* k = Klass::cast(obj->klass());
+      Klass* k = obj->klass();
       st->print_cr("\t- %s <" INTPTR_FORMAT "> (a %s)", "parking to wait for ", (address)obj, k->external_name());
     }
   }
@@ -407,7 +407,7 @@
     if (Universe::reflect_invoke_cache()->is_same_method(method())) {
       // This is Method.invoke() -- skip it
     } else if (use_new_reflection &&
-              Klass::cast(method()->method_holder())
+              method()->method_holder()
                  ->is_subclass_of(SystemDictionary::reflect_MethodAccessorImpl_klass())) {
       // This is an auxilary frame -- skip it
     } else if (method()->is_method_handle_intrinsic() ||
@@ -471,8 +471,8 @@
 void vframeStreamCommon::skip_reflection_related_frames() {
   while (!at_end() &&
          (JDK_Version::is_gte_jdk14x_version() && UseNewReflection &&
-          (Klass::cast(method()->method_holder())->is_subclass_of(SystemDictionary::reflect_MethodAccessorImpl_klass()) ||
-           Klass::cast(method()->method_holder())->is_subclass_of(SystemDictionary::reflect_ConstructorAccessorImpl_klass())))) {
+          (method()->method_holder()->is_subclass_of(SystemDictionary::reflect_MethodAccessorImpl_klass()) ||
+           method()->method_holder()->is_subclass_of(SystemDictionary::reflect_ConstructorAccessorImpl_klass())))) {
     next();
   }
 }
@@ -547,13 +547,13 @@
 
 void javaVFrame::print_value() const {
   Method*    m = method();
-  Klass*     k = m->method_holder();
+  InstanceKlass*     k = m->method_holder();
   tty->print_cr("frame( sp=" INTPTR_FORMAT ", unextended_sp=" INTPTR_FORMAT ", fp=" INTPTR_FORMAT ", pc=" INTPTR_FORMAT ")",
                 _fr.sp(),  _fr.unextended_sp(), _fr.fp(), _fr.pc());
-  tty->print("%s.%s", Klass::cast(k)->internal_name(), m->name()->as_C_string());
+  tty->print("%s.%s", k->internal_name(), m->name()->as_C_string());
 
   if (!m->is_native()) {
-    Symbol*  source_name = InstanceKlass::cast(k)->source_file_name();
+    Symbol*  source_name = k->source_file_name();
     int        line_number = m->line_number_from_bci(bci());
     if (source_name != NULL && (line_number != -1)) {
       tty->print("(%s:%d)", source_name->as_C_string(), line_number);
--- a/src/share/vm/runtime/virtualspace.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/virtualspace.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -329,20 +329,9 @@
     if ((((size_t)base + noaccess_prefix) & (alignment - 1)) != 0) {
       // Base not aligned, retry
       if (!os::release_memory(base, size)) fatal("os::release_memory failed");
-      // Reserve size large enough to do manual alignment and
-      // increase size to a multiple of the desired alignment
+      // Make sure that size is aligned
       size = align_size_up(size, alignment);
-      size_t extra_size = size + alignment;
-      do {
-        char* extra_base = os::reserve_memory(extra_size, NULL, alignment);
-        if (extra_base == NULL) return;
-        // Do manual alignement
-        base = (char*) align_size_up((uintptr_t) extra_base, alignment);
-        assert(base >= extra_base, "just checking");
-        // Re-reserve the region at the aligned base address.
-        os::release_memory(extra_base, extra_size);
-        base = os::reserve_memory(size, base);
-      } while (base == NULL);
+      base = os::reserve_memory_aligned(size, alignment);
 
       if (requested_address != 0 &&
           failed_to_reserve_as_requested(base, requested_address, size, false)) {
@@ -879,8 +868,8 @@
   tty->print   ("Virtual space:");
   if (special()) tty->print(" (pinned in memory)");
   tty->cr();
-  tty->print_cr(" - committed: %ld", committed_size());
-  tty->print_cr(" - reserved:  %ld", reserved_size());
+  tty->print_cr(" - committed: " SIZE_FORMAT, committed_size());
+  tty->print_cr(" - reserved:  " SIZE_FORMAT, reserved_size());
   tty->print_cr(" - [low, high]:     [" INTPTR_FORMAT ", " INTPTR_FORMAT "]",  low(), high());
   tty->print_cr(" - [low_b, high_b]: [" INTPTR_FORMAT ", " INTPTR_FORMAT "]",  low_boundary(), high_boundary());
 }
--- a/src/share/vm/runtime/vmStructs.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/vmStructs.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -59,6 +59,7 @@
 #include "memory/generation.hpp"
 #include "memory/generationSpec.hpp"
 #include "memory/heap.hpp"
+#include "memory/metablock.hpp"
 #include "memory/space.hpp"
 #include "memory/tenuredGeneration.hpp"
 #include "memory/universe.hpp"
@@ -94,6 +95,7 @@
 #include "runtime/serviceThread.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/virtualspace.hpp"
 #include "runtime/vmStructs.hpp"
 #include "utilities/array.hpp"
@@ -114,18 +116,6 @@
 #ifdef TARGET_ARCH_ppc
 # include "vmStructs_ppc.hpp"
 #endif
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 #ifdef TARGET_OS_ARCH_linux_x86
 # include "vmStructs_linux_x86.hpp"
 #endif
@@ -249,6 +239,7 @@
 typedef Hashtable<Klass*, mtClass>            KlassHashtable;
 typedef HashtableEntry<Klass*, mtClass>       KlassHashtableEntry;
 typedef TwoOopHashtable<Symbol*, mtClass>     SymbolTwoOopHashtable;
+typedef BinaryTreeDictionary<Metablock, FreeList> MetablockTreeDictionary;
 
 //--------------------------------------------------------------------------------
 // VM_STRUCTS
@@ -266,8 +257,7 @@
                    c1_nonstatic_field, \
                    c2_nonstatic_field, \
                    unchecked_c1_static_field, \
-                   unchecked_c2_static_field, \
-                   last_entry) \
+                   unchecked_c2_static_field) \
                                                                                                                                      \
   /******************************************************************/                                                               \
   /* OopDesc and Klass hierarchies (NOTE: MethodData* incomplete) */                                                               \
@@ -287,7 +277,7 @@
   nonstatic_field(CompiledICHolder,     _holder_klass,                                 Klass*)                                \
   nonstatic_field(ConstantPool,         _tags,                                         Array<u1>*)                            \
   nonstatic_field(ConstantPool,         _cache,                                        ConstantPoolCache*)             \
-  nonstatic_field(ConstantPool,         _pool_holder,                                  Klass*)                                \
+  nonstatic_field(ConstantPool,         _pool_holder,                                  InstanceKlass*)                        \
   nonstatic_field(ConstantPool,         _operands,                                     Array<u2>*)                            \
   nonstatic_field(ConstantPool,         _length,                                       int)                                   \
   nonstatic_field(ConstantPool,         _resolved_references,                          jobject)                               \
@@ -364,9 +354,6 @@
   nonstatic_field(Method,               _access_flags,                                 AccessFlags)                           \
   nonstatic_field(Method,               _vtable_index,                                 int)                                   \
   nonstatic_field(Method,               _method_size,                                  u2)                                    \
-  nonstatic_field(Method,               _max_stack,                                    u2)                                    \
-  nonstatic_field(Method,               _max_locals,                                   u2)                                    \
-  nonstatic_field(Method,               _size_of_parameters,                           u2)                                    \
   nonstatic_field(Method,               _interpreter_throwout_count,                   u2)                                    \
   nonstatic_field(Method,               _number_of_breakpoints,                        u2)                                    \
   nonstatic_field(Method,               _invocation_counter,                           InvocationCounter)                     \
@@ -387,7 +374,9 @@
   nonstatic_field(ConstMethod,          _name_index,                                   u2)                                    \
   nonstatic_field(ConstMethod,          _signature_index,                              u2)                                    \
   nonstatic_field(ConstMethod,          _method_idnum,                                 u2)                                    \
-  nonstatic_field(ConstMethod,          _generic_signature_index,                      u2)                                    \
+  nonstatic_field(ConstMethod,          _max_stack,                                    u2)                                    \
+  nonstatic_field(ConstMethod,          _max_locals,                                   u2)                                    \
+  nonstatic_field(ConstMethod,          _size_of_parameters,                           u2)                                    \
   nonstatic_field(ObjArrayKlass,               _element_klass,                                Klass*)                                \
   nonstatic_field(ObjArrayKlass,               _bottom_klass,                                 Klass*)                                \
   volatile_nonstatic_field(Symbol,             _refcount,                                     int)                                   \
@@ -728,7 +717,6 @@
   nonstatic_field(ClassLoaderData,             _next,                                         ClassLoaderData*)                      \
                                                                                                                                      \
   static_field(ClassLoaderDataGraph,           _head,                                         ClassLoaderData*)                      \
-  nonstatic_field(ClassLoaderDataGraph,        _unloading,                                    ClassLoaderData*)                      \
                                                                                                                                      \
   /*******************/                                                                                                              \
   /* GrowableArrays  */                                                                                                              \
@@ -991,6 +979,7 @@
                                                                                                                                      \
  nonstatic_field(ciMethod,     _interpreter_invocation_count, int)                                                                   \
  nonstatic_field(ciMethod,     _interpreter_throwout_count, int)                                                                     \
+ nonstatic_field(ciMethod,     _instructions_size, int)                                                                              \
                                                                                                                                      \
  nonstatic_field(ciMethodData, _data_size, int)                                                                                      \
  nonstatic_field(ciMethodData, _state, u_char)                                                                                       \
@@ -1209,7 +1198,6 @@
   /*********************************/                                                                                                \
                                                                                                                                      \
   static_field(java_lang_Class,                _klass_offset,                                 int)                                   \
-  static_field(java_lang_Class,                _resolved_constructor_offset,                  int)                                   \
   static_field(java_lang_Class,                _array_klass_offset,                           int)                                   \
   static_field(java_lang_Class,                _oop_size_offset,                              int)                                   \
   static_field(java_lang_Class,                _static_oop_field_count_offset,                int)                                   \
@@ -1237,11 +1225,16 @@
   nonstatic_field(AccessFlags,                 _flags,                                       jint)                                   \
   nonstatic_field(elapsedTimer,                _counter,                                     jlong)                                  \
   nonstatic_field(elapsedTimer,                _active,                                      bool)                                   \
-  nonstatic_field(InvocationCounter,           _counter,                                     unsigned int)
+  nonstatic_field(InvocationCounter,           _counter,                                     unsigned int)                           \
+  volatile_nonstatic_field(FreeChunk,          _size,                                        size_t)                                 \
+  nonstatic_field(FreeChunk,                   _next,                                        FreeChunk*)                             \
+  nonstatic_field(FreeChunk,                   _prev,                                        FreeChunk*)                             \
+  nonstatic_field(FreeList<FreeChunk>,         _size,                                        size_t)                                 \
+  nonstatic_field(FreeList<Metablock>,         _size,                                        size_t)                                 \
+  nonstatic_field(FreeList<FreeChunk>,         _count,                                       ssize_t)                                \
+  nonstatic_field(FreeList<Metablock>,         _count,                                       ssize_t)                                \
+  nonstatic_field(MetablockTreeDictionary,     _total_size,                                  size_t)
 
-  /* NOTE that we do not use the last_entry() macro here; it is used  */
-  /* in vmStructs_<os>_<cpu>.hpp's VM_STRUCTS_OS_CPU macro (and must  */
-  /* be present there)                                                */
 
 //--------------------------------------------------------------------------------
 // VM_TYPES
@@ -1281,8 +1274,7 @@
                  declare_unsigned_integer_type,                           \
                  declare_c1_toplevel_type,                                \
                  declare_c2_type,                                         \
-                 declare_c2_toplevel_type,                                \
-                 last_entry)                                              \
+                 declare_c2_toplevel_type)                                \
                                                                           \
   /*************************************************************/         \
   /* Java primitive types -- required by the SA implementation */         \
@@ -2080,12 +2072,24 @@
   declare_toplevel_type(Universe)                                         \
   declare_toplevel_type(vframeArray)                                      \
   declare_toplevel_type(vframeArrayElement)                               \
-  declare_toplevel_type(Annotations*)
-
+  declare_toplevel_type(Annotations*)                                     \
+                                                                          \
+  /***************/                                                       \
+  /* Miscellaneous types */                                               \
+  /***************/                                                       \
+                                                                          \
+  /* freelist */                                                          \
+  declare_toplevel_type(FreeChunk*)                                       \
+  declare_toplevel_type(Metablock*)                                       \
+  declare_toplevel_type(FreeBlockDictionary<FreeChunk>*)                  \
+  declare_toplevel_type(FreeList<FreeChunk>*)                             \
+  declare_toplevel_type(FreeList<FreeChunk>)                              \
+  declare_toplevel_type(FreeBlockDictionary<Metablock>*)                  \
+  declare_toplevel_type(FreeList<Metablock>*)                             \
+  declare_toplevel_type(FreeList<Metablock>)                              \
+  declare_toplevel_type(MetablockTreeDictionary*)                         \
+           declare_type(MetablockTreeDictionary, FreeBlockDictionary<Metablock>)
 
-  /* NOTE that we do not use the last_entry() macro here; it is used  */
-  /* in vmStructs_<os>_<cpu>.hpp's VM_TYPES_OS_CPU macro (and must be */
-  /* present there)                                                   */
 
 //--------------------------------------------------------------------------------
 // VM_INT_CONSTANTS
@@ -2099,8 +2103,7 @@
                          declare_preprocessor_constant,                   \
                          declare_c1_constant,                             \
                          declare_c2_constant,                             \
-                         declare_c2_preprocessor_constant,                \
-                         last_entry)                                      \
+                         declare_c2_preprocessor_constant)                \
                                                                           \
   /******************/                                                    \
   /* Useful globals */                                                    \
@@ -2264,6 +2267,7 @@
   declare_constant(ConstMethod::_has_checked_exceptions)           \
   declare_constant(ConstMethod::_has_localvariable_table)          \
   declare_constant(ConstMethod::_has_exception_table)              \
+  declare_constant(ConstMethod::_has_generic_signature)            \
                                                                           \
   /*************************************/                                 \
   /* InstanceKlass enum                */                                 \
@@ -2278,10 +2282,18 @@
   declare_constant(FieldInfo::name_index_offset)                          \
   declare_constant(FieldInfo::signature_index_offset)                     \
   declare_constant(FieldInfo::initval_index_offset)                       \
-  declare_constant(FieldInfo::low_offset)                                 \
-  declare_constant(FieldInfo::high_offset)                                \
+  declare_constant(FieldInfo::low_packed_offset)                          \
+  declare_constant(FieldInfo::high_packed_offset)                         \
   declare_constant(FieldInfo::field_slots)                                \
                                                                           \
+  /*************************************/                                 \
+  /* FieldInfo tag constants           */                                 \
+  /*************************************/                                 \
+                                                                          \
+  declare_preprocessor_constant("FIELDINFO_TAG_SIZE", FIELDINFO_TAG_SIZE) \
+  declare_preprocessor_constant("FIELDINFO_TAG_MASK", FIELDINFO_TAG_MASK) \
+  declare_preprocessor_constant("FIELDINFO_TAG_OFFSET", FIELDINFO_TAG_OFFSET) \
+                                                                          \
   /************************************************/                      \
   /* InstanceKlass InnerClassAttributeOffset enum */                      \
   /************************************************/                      \
@@ -2447,7 +2459,7 @@
   /* frame              */                                                \
   /**********************/                                                \
                                                                           \
-  X86_ONLY(declare_constant(frame::entry_frame_call_wrapper_offset))      \
+  NOT_ZERO(X86_ONLY(declare_constant(frame::entry_frame_call_wrapper_offset)))      \
   declare_constant(frame::pc_return_offset)                               \
                                                                           \
   /*************/                                                         \
@@ -2467,9 +2479,6 @@
   declare_c2_preprocessor_constant("SAVED_ON_ENTRY_REG_COUNT", SAVED_ON_ENTRY_REG_COUNT) \
   declare_c2_preprocessor_constant("C_SAVED_ON_ENTRY_REG_COUNT", C_SAVED_ON_ENTRY_REG_COUNT)
 
-  /* NOTE that we do not use the last_entry() macro here; it is used  */
-  /* in vmStructs_<os>_<cpu>.hpp's VM_INT_CONSTANTS_OS_CPU macro (and */
-  /* must be present there)                                           */
 
 //--------------------------------------------------------------------------------
 // VM_LONG_CONSTANTS
@@ -2479,7 +2488,7 @@
 // enums, etc., while "declare_preprocessor_constant" must be used for
 // all #defined constants.
 
-#define VM_LONG_CONSTANTS(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant, last_entry) \
+#define VM_LONG_CONSTANTS(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \
                                                                           \
   /*********************/                                                 \
   /* MarkOop constants */                                                 \
@@ -2525,11 +2534,7 @@
   /* Constants in markOop used by CMS. */                                 \
   declare_constant(markOopDesc::cms_shift)                                \
   declare_constant(markOopDesc::cms_mask)                                 \
-  declare_constant(markOopDesc::size_shift)                               \
-
-  /* NOTE that we do not use the last_entry() macro here; it is used   */
-  /* in vmStructs_<os>_<cpu>.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and */
-  /* must be present there)                                            */
+  declare_constant(markOopDesc::size_shift)
 
 
 //--------------------------------------------------------------------------------
@@ -2569,7 +2574,8 @@
 
 // This macro checks the type of a VMStructEntry by comparing pointer types
 #define CHECK_NONSTATIC_VM_STRUCT_ENTRY(typeName, fieldName, type)                 \
- {typeName *dummyObj = NULL; type* dummy = &dummyObj->fieldName; }
+ {typeName *dummyObj = NULL; type* dummy = &dummyObj->fieldName;                   \
+  assert(offset_of(typeName, fieldName) < sizeof(typeName), "Illegal nonstatic struct entry, field offset too large"); }
 
 // This macro checks the type of a volatile VMStructEntry by comparing pointer types
 #define CHECK_VOLATILE_NONSTATIC_VM_STRUCT_ENTRY(typeName, fieldName, type)        \
@@ -2592,9 +2598,6 @@
 // This is a no-op macro for unchecked fields
 #define CHECK_NO_OP(a, b, c)
 
-// This is a no-op macro for the sentinel value
-#define CHECK_SENTINEL()
-
 //
 // Build-specific macros:
 //
@@ -2773,48 +2776,47 @@
 // as long as class VMStructs is a friend
 VMStructEntry VMStructs::localHotSpotVMStructs[] = {
 
-  VM_STRUCTS(GENERATE_NONSTATIC_VM_STRUCT_ENTRY, \
-             GENERATE_STATIC_VM_STRUCT_ENTRY, \
-             GENERATE_UNCHECKED_NONSTATIC_VM_STRUCT_ENTRY, \
-             GENERATE_NONSTATIC_VM_STRUCT_ENTRY, \
-             GENERATE_NONPRODUCT_NONSTATIC_VM_STRUCT_ENTRY, \
-             GENERATE_C1_NONSTATIC_VM_STRUCT_ENTRY, \
-             GENERATE_C2_NONSTATIC_VM_STRUCT_ENTRY, \
-             GENERATE_C1_UNCHECKED_STATIC_VM_STRUCT_ENTRY, \
-             GENERATE_C2_UNCHECKED_STATIC_VM_STRUCT_ENTRY, \
-             GENERATE_VM_STRUCT_LAST_ENTRY)
+  VM_STRUCTS(GENERATE_NONSTATIC_VM_STRUCT_ENTRY,
+             GENERATE_STATIC_VM_STRUCT_ENTRY,
+             GENERATE_UNCHECKED_NONSTATIC_VM_STRUCT_ENTRY,
+             GENERATE_NONSTATIC_VM_STRUCT_ENTRY,
+             GENERATE_NONPRODUCT_NONSTATIC_VM_STRUCT_ENTRY,
+             GENERATE_C1_NONSTATIC_VM_STRUCT_ENTRY,
+             GENERATE_C2_NONSTATIC_VM_STRUCT_ENTRY,
+             GENERATE_C1_UNCHECKED_STATIC_VM_STRUCT_ENTRY,
+             GENERATE_C2_UNCHECKED_STATIC_VM_STRUCT_ENTRY)
 
 #ifndef SERIALGC
-  VM_STRUCTS_PARALLELGC(GENERATE_NONSTATIC_VM_STRUCT_ENTRY, \
+  VM_STRUCTS_PARALLELGC(GENERATE_NONSTATIC_VM_STRUCT_ENTRY,
                         GENERATE_STATIC_VM_STRUCT_ENTRY)
 
-  VM_STRUCTS_CMS(GENERATE_NONSTATIC_VM_STRUCT_ENTRY, \
-                 GENERATE_NONSTATIC_VM_STRUCT_ENTRY, \
+  VM_STRUCTS_CMS(GENERATE_NONSTATIC_VM_STRUCT_ENTRY,
+                 GENERATE_NONSTATIC_VM_STRUCT_ENTRY,
                  GENERATE_STATIC_VM_STRUCT_ENTRY)
 
-  VM_STRUCTS_G1(GENERATE_NONSTATIC_VM_STRUCT_ENTRY, \
+  VM_STRUCTS_G1(GENERATE_NONSTATIC_VM_STRUCT_ENTRY,
                 GENERATE_STATIC_VM_STRUCT_ENTRY)
 #endif // SERIALGC
 
-  VM_STRUCTS_CPU(GENERATE_NONSTATIC_VM_STRUCT_ENTRY, \
-                 GENERATE_STATIC_VM_STRUCT_ENTRY, \
-                 GENERATE_UNCHECKED_NONSTATIC_VM_STRUCT_ENTRY, \
-                 GENERATE_NONSTATIC_VM_STRUCT_ENTRY, \
-                 GENERATE_NONPRODUCT_NONSTATIC_VM_STRUCT_ENTRY, \
-                 GENERATE_C2_NONSTATIC_VM_STRUCT_ENTRY, \
-                 GENERATE_C1_UNCHECKED_STATIC_VM_STRUCT_ENTRY, \
-                 GENERATE_C2_UNCHECKED_STATIC_VM_STRUCT_ENTRY, \
-                 GENERATE_VM_STRUCT_LAST_ENTRY)
+  VM_STRUCTS_CPU(GENERATE_NONSTATIC_VM_STRUCT_ENTRY,
+                 GENERATE_STATIC_VM_STRUCT_ENTRY,
+                 GENERATE_UNCHECKED_NONSTATIC_VM_STRUCT_ENTRY,
+                 GENERATE_NONSTATIC_VM_STRUCT_ENTRY,
+                 GENERATE_NONPRODUCT_NONSTATIC_VM_STRUCT_ENTRY,
+                 GENERATE_C2_NONSTATIC_VM_STRUCT_ENTRY,
+                 GENERATE_C1_UNCHECKED_STATIC_VM_STRUCT_ENTRY,
+                 GENERATE_C2_UNCHECKED_STATIC_VM_STRUCT_ENTRY)
 
-  VM_STRUCTS_OS_CPU(GENERATE_NONSTATIC_VM_STRUCT_ENTRY, \
-                    GENERATE_STATIC_VM_STRUCT_ENTRY, \
-                    GENERATE_UNCHECKED_NONSTATIC_VM_STRUCT_ENTRY, \
-                    GENERATE_NONSTATIC_VM_STRUCT_ENTRY, \
-                    GENERATE_NONPRODUCT_NONSTATIC_VM_STRUCT_ENTRY, \
-                    GENERATE_C2_NONSTATIC_VM_STRUCT_ENTRY, \
-                    GENERATE_C1_UNCHECKED_STATIC_VM_STRUCT_ENTRY, \
-                    GENERATE_C2_UNCHECKED_STATIC_VM_STRUCT_ENTRY, \
-                    GENERATE_VM_STRUCT_LAST_ENTRY)
+  VM_STRUCTS_OS_CPU(GENERATE_NONSTATIC_VM_STRUCT_ENTRY,
+                    GENERATE_STATIC_VM_STRUCT_ENTRY,
+                    GENERATE_UNCHECKED_NONSTATIC_VM_STRUCT_ENTRY,
+                    GENERATE_NONSTATIC_VM_STRUCT_ENTRY,
+                    GENERATE_NONPRODUCT_NONSTATIC_VM_STRUCT_ENTRY,
+                    GENERATE_C2_NONSTATIC_VM_STRUCT_ENTRY,
+                    GENERATE_C1_UNCHECKED_STATIC_VM_STRUCT_ENTRY,
+                    GENERATE_C2_UNCHECKED_STATIC_VM_STRUCT_ENTRY)
+
+  GENERATE_VM_STRUCT_LAST_ENTRY()
 };
 
 VMTypeEntry VMStructs::localHotSpotVMTypes[] = {
@@ -2826,8 +2828,7 @@
            GENERATE_UNSIGNED_INTEGER_VM_TYPE_ENTRY,
            GENERATE_C1_TOPLEVEL_VM_TYPE_ENTRY,
            GENERATE_C2_VM_TYPE_ENTRY,
-           GENERATE_C2_TOPLEVEL_VM_TYPE_ENTRY,
-           GENERATE_VM_TYPE_LAST_ENTRY)
+           GENERATE_C2_TOPLEVEL_VM_TYPE_ENTRY)
 
 #ifndef SERIALGC
   VM_TYPES_PARALLELGC(GENERATE_VM_TYPE_ENTRY,
@@ -2849,8 +2850,7 @@
                GENERATE_UNSIGNED_INTEGER_VM_TYPE_ENTRY,
                GENERATE_C1_TOPLEVEL_VM_TYPE_ENTRY,
                GENERATE_C2_VM_TYPE_ENTRY,
-               GENERATE_C2_TOPLEVEL_VM_TYPE_ENTRY,
-               GENERATE_VM_TYPE_LAST_ENTRY)
+               GENERATE_C2_TOPLEVEL_VM_TYPE_ENTRY)
 
   VM_TYPES_OS_CPU(GENERATE_VM_TYPE_ENTRY,
                   GENERATE_TOPLEVEL_VM_TYPE_ENTRY,
@@ -2859,8 +2859,9 @@
                   GENERATE_UNSIGNED_INTEGER_VM_TYPE_ENTRY,
                   GENERATE_C1_TOPLEVEL_VM_TYPE_ENTRY,
                   GENERATE_C2_VM_TYPE_ENTRY,
-                  GENERATE_C2_TOPLEVEL_VM_TYPE_ENTRY,
-                  GENERATE_VM_TYPE_LAST_ENTRY)
+                  GENERATE_C2_TOPLEVEL_VM_TYPE_ENTRY)
+
+  GENERATE_VM_TYPE_LAST_ENTRY()
 };
 
 VMIntConstantEntry VMStructs::localHotSpotVMIntConstants[] = {
@@ -2869,8 +2870,7 @@
                    GENERATE_PREPROCESSOR_VM_INT_CONSTANT_ENTRY,
                    GENERATE_C1_VM_INT_CONSTANT_ENTRY,
                    GENERATE_C2_VM_INT_CONSTANT_ENTRY,
-                   GENERATE_C2_PREPROCESSOR_VM_INT_CONSTANT_ENTRY,
-                   GENERATE_VM_INT_CONSTANT_LAST_ENTRY)
+                   GENERATE_C2_PREPROCESSOR_VM_INT_CONSTANT_ENTRY)
 
 #ifndef SERIALGC
   VM_INT_CONSTANTS_CMS(GENERATE_VM_INT_CONSTANT_ENTRY)
@@ -2882,15 +2882,15 @@
                        GENERATE_PREPROCESSOR_VM_INT_CONSTANT_ENTRY,
                        GENERATE_C1_VM_INT_CONSTANT_ENTRY,
                        GENERATE_C2_VM_INT_CONSTANT_ENTRY,
-                       GENERATE_C2_PREPROCESSOR_VM_INT_CONSTANT_ENTRY,
-                       GENERATE_VM_INT_CONSTANT_LAST_ENTRY)
+                       GENERATE_C2_PREPROCESSOR_VM_INT_CONSTANT_ENTRY)
 
   VM_INT_CONSTANTS_OS_CPU(GENERATE_VM_INT_CONSTANT_ENTRY,
                           GENERATE_PREPROCESSOR_VM_INT_CONSTANT_ENTRY,
                           GENERATE_C1_VM_INT_CONSTANT_ENTRY,
                           GENERATE_C2_VM_INT_CONSTANT_ENTRY,
-                          GENERATE_C2_PREPROCESSOR_VM_INT_CONSTANT_ENTRY,
-                          GENERATE_VM_INT_CONSTANT_LAST_ENTRY)
+                          GENERATE_C2_PREPROCESSOR_VM_INT_CONSTANT_ENTRY)
+
+  GENERATE_VM_INT_CONSTANT_LAST_ENTRY()
 };
 
 VMLongConstantEntry VMStructs::localHotSpotVMLongConstants[] = {
@@ -2899,22 +2899,21 @@
                     GENERATE_PREPROCESSOR_VM_LONG_CONSTANT_ENTRY,
                     GENERATE_C1_VM_LONG_CONSTANT_ENTRY,
                     GENERATE_C2_VM_LONG_CONSTANT_ENTRY,
-                    GENERATE_C2_PREPROCESSOR_VM_LONG_CONSTANT_ENTRY,
-                    GENERATE_VM_LONG_CONSTANT_LAST_ENTRY)
+                    GENERATE_C2_PREPROCESSOR_VM_LONG_CONSTANT_ENTRY)
 
   VM_LONG_CONSTANTS_CPU(GENERATE_VM_LONG_CONSTANT_ENTRY,
                         GENERATE_PREPROCESSOR_VM_LONG_CONSTANT_ENTRY,
                         GENERATE_C1_VM_LONG_CONSTANT_ENTRY,
                         GENERATE_C2_VM_LONG_CONSTANT_ENTRY,
-                        GENERATE_C2_PREPROCESSOR_VM_LONG_CONSTANT_ENTRY,
-                        GENERATE_VM_LONG_CONSTANT_LAST_ENTRY)
+                        GENERATE_C2_PREPROCESSOR_VM_LONG_CONSTANT_ENTRY)
 
   VM_LONG_CONSTANTS_OS_CPU(GENERATE_VM_LONG_CONSTANT_ENTRY,
                            GENERATE_PREPROCESSOR_VM_LONG_CONSTANT_ENTRY,
                            GENERATE_C1_VM_LONG_CONSTANT_ENTRY,
                            GENERATE_C2_VM_LONG_CONSTANT_ENTRY,
-                           GENERATE_C2_PREPROCESSOR_VM_LONG_CONSTANT_ENTRY,
-                           GENERATE_VM_LONG_CONSTANT_LAST_ENTRY)
+                           GENERATE_C2_PREPROCESSOR_VM_LONG_CONSTANT_ENTRY)
+
+  GENERATE_VM_LONG_CONSTANT_LAST_ENTRY()
 };
 
 // This is used both to check the types of referenced fields and, in
@@ -2929,8 +2928,7 @@
              CHECK_C1_NONSTATIC_VM_STRUCT_ENTRY,
              CHECK_C2_NONSTATIC_VM_STRUCT_ENTRY,
              CHECK_NO_OP,
-             CHECK_NO_OP,
-             CHECK_SENTINEL);
+             CHECK_NO_OP);
 
 #ifndef SERIALGC
   VM_STRUCTS_PARALLELGC(CHECK_NONSTATIC_VM_STRUCT_ENTRY,
@@ -2951,8 +2949,7 @@
                  CHECK_NONPRODUCT_NONSTATIC_VM_STRUCT_ENTRY,
                  CHECK_C2_NONSTATIC_VM_STRUCT_ENTRY,
                  CHECK_NO_OP,
-                 CHECK_NO_OP,
-                 CHECK_SENTINEL);
+                 CHECK_NO_OP);
 
   VM_STRUCTS_OS_CPU(CHECK_NONSTATIC_VM_STRUCT_ENTRY,
                     CHECK_STATIC_VM_STRUCT_ENTRY,
@@ -2961,8 +2958,7 @@
                     CHECK_NONPRODUCT_NONSTATIC_VM_STRUCT_ENTRY,
                     CHECK_C2_NONSTATIC_VM_STRUCT_ENTRY,
                     CHECK_NO_OP,
-                    CHECK_NO_OP,
-                    CHECK_SENTINEL);
+                    CHECK_NO_OP);
 
   VM_TYPES(CHECK_VM_TYPE_ENTRY,
            CHECK_SINGLE_ARG_VM_TYPE_NO_OP,
@@ -2971,8 +2967,7 @@
            CHECK_SINGLE_ARG_VM_TYPE_NO_OP,
            CHECK_C1_TOPLEVEL_VM_TYPE_ENTRY,
            CHECK_C2_VM_TYPE_ENTRY,
-           CHECK_C2_TOPLEVEL_VM_TYPE_ENTRY,
-           CHECK_SENTINEL);
+           CHECK_C2_TOPLEVEL_VM_TYPE_ENTRY);
 
 #ifndef SERIALGC
   VM_TYPES_PARALLELGC(CHECK_VM_TYPE_ENTRY,
@@ -2994,8 +2989,7 @@
                CHECK_SINGLE_ARG_VM_TYPE_NO_OP,
                CHECK_C1_TOPLEVEL_VM_TYPE_ENTRY,
                CHECK_C2_VM_TYPE_ENTRY,
-               CHECK_C2_TOPLEVEL_VM_TYPE_ENTRY,
-               CHECK_SENTINEL);
+               CHECK_C2_TOPLEVEL_VM_TYPE_ENTRY);
 
   VM_TYPES_OS_CPU(CHECK_VM_TYPE_ENTRY,
                   CHECK_SINGLE_ARG_VM_TYPE_NO_OP,
@@ -3004,8 +2998,7 @@
                   CHECK_SINGLE_ARG_VM_TYPE_NO_OP,
                   CHECK_C1_TOPLEVEL_VM_TYPE_ENTRY,
                   CHECK_C2_VM_TYPE_ENTRY,
-                  CHECK_C2_TOPLEVEL_VM_TYPE_ENTRY,
-                  CHECK_SENTINEL);
+                  CHECK_C2_TOPLEVEL_VM_TYPE_ENTRY);
 
   //
   // Split VM_STRUCTS() invocation into two parts to allow MS VC++ 6.0
@@ -3024,53 +3017,49 @@
   // Solstice NFS setup. If everyone switches to local workspaces on
   // Win32, we can put this back in.
 #ifndef _WINDOWS
-  debug_only(VM_STRUCTS(ENSURE_FIELD_TYPE_PRESENT, \
-                        CHECK_NO_OP, \
-                        CHECK_NO_OP, \
-                        CHECK_NO_OP, \
-                        CHECK_NO_OP, \
-                        CHECK_NO_OP, \
-                        CHECK_NO_OP, \
-                        CHECK_NO_OP, \
-                        CHECK_NO_OP, \
-                        CHECK_SENTINEL));
-  debug_only(VM_STRUCTS(CHECK_NO_OP, \
-                        ENSURE_FIELD_TYPE_PRESENT, \
-                        CHECK_NO_OP, \
-                        ENSURE_FIELD_TYPE_PRESENT, \
-                        ENSURE_NONPRODUCT_FIELD_TYPE_PRESENT, \
-                        ENSURE_C1_FIELD_TYPE_PRESENT, \
-                        ENSURE_C2_FIELD_TYPE_PRESENT, \
-                        CHECK_NO_OP, \
-                        CHECK_NO_OP, \
-                        CHECK_SENTINEL));
+  debug_only(VM_STRUCTS(ENSURE_FIELD_TYPE_PRESENT,
+                        CHECK_NO_OP,
+                        CHECK_NO_OP,
+                        CHECK_NO_OP,
+                        CHECK_NO_OP,
+                        CHECK_NO_OP,
+                        CHECK_NO_OP,
+                        CHECK_NO_OP,
+                        CHECK_NO_OP));
+  debug_only(VM_STRUCTS(CHECK_NO_OP,
+                        ENSURE_FIELD_TYPE_PRESENT,
+                        CHECK_NO_OP,
+                        ENSURE_FIELD_TYPE_PRESENT,
+                        ENSURE_NONPRODUCT_FIELD_TYPE_PRESENT,
+                        ENSURE_C1_FIELD_TYPE_PRESENT,
+                        ENSURE_C2_FIELD_TYPE_PRESENT,
+                        CHECK_NO_OP,
+                        CHECK_NO_OP));
 #ifndef SERIALGC
-  debug_only(VM_STRUCTS_PARALLELGC(ENSURE_FIELD_TYPE_PRESENT, \
+  debug_only(VM_STRUCTS_PARALLELGC(ENSURE_FIELD_TYPE_PRESENT,
                                    ENSURE_FIELD_TYPE_PRESENT));
-  debug_only(VM_STRUCTS_CMS(ENSURE_FIELD_TYPE_PRESENT, \
-                            ENSURE_FIELD_TYPE_PRESENT, \
+  debug_only(VM_STRUCTS_CMS(ENSURE_FIELD_TYPE_PRESENT,
+                            ENSURE_FIELD_TYPE_PRESENT,
                             ENSURE_FIELD_TYPE_PRESENT));
-  debug_only(VM_STRUCTS_G1(ENSURE_FIELD_TYPE_PRESENT, \
+  debug_only(VM_STRUCTS_G1(ENSURE_FIELD_TYPE_PRESENT,
                            ENSURE_FIELD_TYPE_PRESENT));
 #endif // SERIALGC
-  debug_only(VM_STRUCTS_CPU(ENSURE_FIELD_TYPE_PRESENT, \
-                            ENSURE_FIELD_TYPE_PRESENT, \
-                            CHECK_NO_OP, \
-                            ENSURE_FIELD_TYPE_PRESENT, \
-                            ENSURE_NONPRODUCT_FIELD_TYPE_PRESENT, \
-                            ENSURE_C2_FIELD_TYPE_PRESENT, \
-                            CHECK_NO_OP, \
-                            CHECK_NO_OP, \
-                            CHECK_SENTINEL));
-  debug_only(VM_STRUCTS_OS_CPU(ENSURE_FIELD_TYPE_PRESENT, \
-                               ENSURE_FIELD_TYPE_PRESENT, \
-                               CHECK_NO_OP, \
-                               ENSURE_FIELD_TYPE_PRESENT, \
-                               ENSURE_NONPRODUCT_FIELD_TYPE_PRESENT, \
-                               ENSURE_C2_FIELD_TYPE_PRESENT, \
-                               CHECK_NO_OP, \
-                               CHECK_NO_OP, \
-                               CHECK_SENTINEL));
+  debug_only(VM_STRUCTS_CPU(ENSURE_FIELD_TYPE_PRESENT,
+                            ENSURE_FIELD_TYPE_PRESENT,
+                            CHECK_NO_OP,
+                            ENSURE_FIELD_TYPE_PRESENT,
+                            ENSURE_NONPRODUCT_FIELD_TYPE_PRESENT,
+                            ENSURE_C2_FIELD_TYPE_PRESENT,
+                            CHECK_NO_OP,
+                            CHECK_NO_OP));
+  debug_only(VM_STRUCTS_OS_CPU(ENSURE_FIELD_TYPE_PRESENT,
+                               ENSURE_FIELD_TYPE_PRESENT,
+                               CHECK_NO_OP,
+                               ENSURE_FIELD_TYPE_PRESENT,
+                               ENSURE_NONPRODUCT_FIELD_TYPE_PRESENT,
+                               ENSURE_C2_FIELD_TYPE_PRESENT,
+                               CHECK_NO_OP,
+                               CHECK_NO_OP));
 #endif
 }
 
@@ -3130,10 +3119,10 @@
     s[len-1] = '\0';
     // tty->print_cr("checking \"%s\" for \"%s\"", s, typeName);
     if (recursiveFindType(origtypes, s, true) == 1) {
-      delete s;
+      delete [] s;
       return 1;
     }
-    delete s;
+    delete [] s;
   }
   const char* start = NULL;
   if (strstr(typeName, "GrowableArray<") == typeName) {
@@ -3149,10 +3138,10 @@
     s[len-1] = '\0';
     // tty->print_cr("checking \"%s\" for \"%s\"", s, typeName);
     if (recursiveFindType(origtypes, s, true) == 1) {
-      delete s;
+      delete [] s;
       return 1;
     }
-    delete s;
+    delete [] s;
   }
   if (strstr(typeName, "const ") == typeName) {
     const char * s = typeName + strlen("const ");
@@ -3166,8 +3155,10 @@
     s[len - 6] = '\0';
     // tty->print_cr("checking \"%s\" for \"%s\"", s, typeName);
     if (recursiveFindType(origtypes, s, true) == 1) {
+      free(s);
       return 1;
     }
+    free(s);
   }
   if (!isRecurse) {
     tty->print_cr("type \"%s\" not found", typeName);
@@ -3187,3 +3178,41 @@
 void vmStructs_init() {
   debug_only(VMStructs::init());
 }
+
+#ifndef PRODUCT
+void VMStructs::test() {
+  // Make sure last entry in the each array is indeed the correct end marker.
+  // The reason why these are static is to make sure they are zero initialized.
+  // Putting them on the stack will leave some garbage in the padding of some fields.
+  static VMStructEntry struct_last_entry = GENERATE_VM_STRUCT_LAST_ENTRY();
+  assert(memcmp(&localHotSpotVMStructs[(sizeof(localHotSpotVMStructs) / sizeof(VMStructEntry)) - 1],
+                &struct_last_entry,
+                sizeof(VMStructEntry)) == 0, "Incorrect last entry in localHotSpotVMStructs");
+
+  static VMTypeEntry type_last_entry = GENERATE_VM_TYPE_LAST_ENTRY();
+  assert(memcmp(&localHotSpotVMTypes[sizeof(localHotSpotVMTypes) / sizeof(VMTypeEntry) - 1],
+                &type_last_entry,
+                sizeof(VMTypeEntry)) == 0, "Incorrect last entry in localHotSpotVMTypes");
+
+  static VMIntConstantEntry int_last_entry = GENERATE_VM_INT_CONSTANT_LAST_ENTRY();
+  assert(memcmp(&localHotSpotVMIntConstants[sizeof(localHotSpotVMIntConstants) / sizeof(VMIntConstantEntry) - 1],
+                &int_last_entry,
+                sizeof(VMIntConstantEntry)) == 0, "Incorrect last entry in localHotSpotVMIntConstants");
+
+  static VMLongConstantEntry long_last_entry = GENERATE_VM_LONG_CONSTANT_LAST_ENTRY();
+  assert(memcmp(&localHotSpotVMLongConstants[sizeof(localHotSpotVMLongConstants) / sizeof(VMLongConstantEntry) - 1],
+                &long_last_entry,
+                sizeof(VMLongConstantEntry)) == 0, "Incorrect last entry in localHotSpotVMLongConstants");
+
+
+  // Check for duplicate entries in type array
+  for (int i = 0; localHotSpotVMTypes[i].typeName != NULL; i++) {
+    for (int j = i + 1; localHotSpotVMTypes[j].typeName != NULL; j++) {
+      if (strcmp(localHotSpotVMTypes[i].typeName, localHotSpotVMTypes[j].typeName) == 0) {
+        tty->print_cr("Duplicate entries for '%s'", localHotSpotVMTypes[i].typeName);
+        assert(false, "Duplicate types in localHotSpotVMTypes array");
+      }
+    }
+  }
+}
+#endif
--- a/src/share/vm/runtime/vmStructs.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/vmStructs.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -123,6 +123,11 @@
   // the data structure (debug build only)
   static void init();
 
+#ifndef PRODUCT
+  // Execute unit tests
+  static void test();
+#endif
+
 private:
   // Look up a type in localHotSpotVMTypes using strcmp() (debug build only).
   // Returns 1 if found, 0 if not.
--- a/src/share/vm/runtime/vmThread.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/vmThread.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,24 +31,13 @@
 #include "runtime/interfaceSupport.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/os.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/vmThread.hpp"
 #include "runtime/vm_operations.hpp"
 #include "services/runtimeService.hpp"
 #include "utilities/dtrace.hpp"
 #include "utilities/events.hpp"
 #include "utilities/xmlstream.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 
 #ifndef USDT2
 HS_DTRACE_PROBE_DECL3(hotspot, vmops__request, char *, uintptr_t, int);
@@ -581,7 +570,11 @@
   if (!t->is_VM_thread()) {
     SkipGCALot sgcalot(t);    // avoid re-entrant attempts to gc-a-lot
     // JavaThread or WatcherThread
-    t->check_for_valid_safepoint_state(true);
+    bool concurrent = op->evaluate_concurrently();
+    // only blocking VM operations need to verify the caller's safepoint state:
+    if (!concurrent) {
+      t->check_for_valid_safepoint_state(true);
+    }
 
     // New request from Java thread, evaluate prologue
     if (!op->doit_prologue()) {
@@ -593,7 +586,6 @@
 
     // It does not make sense to execute the epilogue, if the VM operation object is getting
     // deallocated by the VM thread.
-    bool concurrent     = op->evaluate_concurrently();
     bool execute_epilog = !op->is_cheap_allocated();
     assert(!concurrent || op->is_cheap_allocated(), "concurrent => cheap_allocated");
 
@@ -668,8 +660,8 @@
 }
 
 
-void VMThread::oops_do(OopClosure* f, CodeBlobClosure* cf) {
-  Thread::oops_do(f, cf);
+void VMThread::oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf) {
+  Thread::oops_do(f, cld_f, cf);
   _vm_queue->oops_do(f);
 }
 
@@ -701,5 +693,5 @@
 #endif
 
 void VMThread::verify() {
-  oops_do(&VerifyOopClosure::verify_oop, NULL);
+  oops_do(&VerifyOopClosure::verify_oop, NULL, NULL);
 }
--- a/src/share/vm/runtime/vmThread.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/vmThread.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -26,19 +26,8 @@
 #define SHARE_VM_RUNTIME_VMTHREAD_HPP
 
 #include "runtime/perfData.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/vm_operations.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 
 //
 // Prioritized queue of VM operations.
@@ -137,7 +126,7 @@
   static VMThread* vm_thread()                    { return _vm_thread; }
 
   // GC support
-  void oops_do(OopClosure* f, CodeBlobClosure* cf);
+  void oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf);
 
   // Debugging
   void print_on(outputStream* st) const;
--- a/src/share/vm/runtime/vm_operations.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/vm_operations.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -34,20 +34,9 @@
 #include "runtime/deoptimization.hpp"
 #include "runtime/interfaceSupport.hpp"
 #include "runtime/sweeper.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/vm_operations.hpp"
 #include "services/threadService.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 
 #define VM_OP_NAME_INITIALIZE(name) #name,
 
--- a/src/share/vm/runtime/vm_version.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/runtime/vm_version.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -243,19 +243,21 @@
 
   #ifndef FLOAT_ARCH
     #if defined(__SOFTFP__)
-      #define FLOAT_ARCH "-sflt"
+      #define FLOAT_ARCH_STR "-sflt"
     #elif defined(E500V2)
-      #define FLOAT_ARCH "-e500v2"
+      #define FLOAT_ARCH_STR "-e500v2"
     #elif defined(ARM)
-      #define FLOAT_ARCH "-vfp"
+      #define FLOAT_ARCH_STR "-vfp"
     #elif defined(PPC)
-      #define FLOAT_ARCH "-hflt"
+      #define FLOAT_ARCH_STR "-hflt"
     #else
-      #define FLOAT_ARCH ""
+      #define FLOAT_ARCH_STR ""
     #endif
+  #else
+    #define FLOAT_ARCH_STR XSTR(FLOAT_ARCH)
   #endif
 
-  return VMNAME " (" VM_RELEASE ") for " OS "-" CPU FLOAT_ARCH
+  return VMNAME " (" VM_RELEASE ") for " OS "-" CPU FLOAT_ARCH_STR
          " JRE (" JRE_RELEASE_VERSION "), built on " __DATE__ " " __TIME__
          " by " XSTR(HOTSPOT_BUILD_USER) " with " HOTSPOT_BUILD_COMPILER;
 }
--- a/src/share/vm/services/attachListener.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/services/attachListener.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -404,6 +404,8 @@
 static void attach_listener_thread_entry(JavaThread* thread, TRAPS) {
   os::set_priority(thread, NearMaxPriority);
 
+  thread->record_stack_base_and_size();
+
   if (AttachListener::pd_init() != 0) {
     return;
   }
--- a/src/share/vm/services/classLoadingService.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/services/classLoadingService.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -135,7 +135,7 @@
     // The spec is unclear at this point to count array klasses or not
     // and also indirect creation of array of super class and secondaries
     //
-    // for (Klass* l = k; l != NULL; l = Klass::cast(l)->array_klass_or_null()) {
+    // for (Klass* l = k; l != NULL; l = l->array_klass_or_null()) {
     //  KlassHandle h(_current_thread, l);
     //  _loaded_classes->append(h);
     // }
--- a/src/share/vm/services/diagnosticArgument.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/services/diagnosticArgument.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2013 Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -86,7 +86,7 @@
 
 template <> void DCmdArgument<jlong>::parse_value(const char* str,
                                                   size_t len, TRAPS) {
-    if (str == NULL || sscanf(str, INT64_FORMAT, &_value) != 1) {
+    if (str == NULL || sscanf(str, JLONG_FORMAT, &_value) != 1) {
     THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
       "Integer parsing error in diagnostic command arguments\n");
   }
@@ -171,7 +171,7 @@
               "Integer parsing error nanotime value: syntax error");
   }
 
-  int argc = sscanf(str, INT64_FORMAT , &_value._time);
+  int argc = sscanf(str, JLONG_FORMAT, &_value._time);
   if (argc != 1) {
     THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
               "Integer parsing error nanotime value: syntax error");
--- a/src/share/vm/services/diagnosticCommand_ext.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/services/diagnosticCommand_ext.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. DO
- * NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
--- a/src/share/vm/services/heapDumper.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/services/heapDumper.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -879,7 +879,7 @@
   writer->write_u4(STACK_TRACE_ID);
 
   // class ID
-  writer->write_classID(Klass::cast(k));
+  writer->write_classID(k);
 
   // number of bytes that follow
   writer->write_u4(instance_size(k) );
@@ -891,7 +891,7 @@
 // creates HPROF_GC_CLASS_DUMP record for the given class and each of
 // its array classes
 void DumperSupport::dump_class_and_array_classes(DumpWriter* writer, Klass* k) {
-  Klass* klass = Klass::cast(k);
+  Klass* klass = k;
   assert(klass->oop_is_instance(), "not an InstanceKlass");
   InstanceKlass* ik = (InstanceKlass*)klass;
 
@@ -906,7 +906,7 @@
   if (java_super == NULL) {
     writer->write_objectID(oop(NULL));
   } else {
-    writer->write_classID(Klass::cast(java_super));
+    writer->write_classID(java_super);
   }
 
   writer->write_objectID(ik->class_loader());
@@ -932,7 +932,7 @@
   // array classes
   k = klass->array_klass_or_null();
   while (k != NULL) {
-    Klass* klass = Klass::cast(k);
+    Klass* klass = k;
     assert(klass->oop_is_objArray(), "not an ObjArrayKlass");
 
     writer->write_u1(HPROF_GC_CLASS_DUMP);
@@ -942,7 +942,7 @@
     // super class of array classes is java.lang.Object
     java_super = klass->java_super();
     assert(java_super != NULL, "checking");
-    writer->write_classID(Klass::cast(java_super));
+    writer->write_classID(java_super);
 
     writer->write_objectID(ik->class_loader());
     writer->write_objectID(ik->signers());
@@ -965,7 +965,7 @@
 void DumperSupport::dump_basic_type_array_class(DumpWriter* writer, Klass* k) {
  // array classes
  while (k != NULL) {
-    Klass* klass = Klass::cast(k);
+    Klass* klass = k;
 
     writer->write_u1(HPROF_GC_CLASS_DUMP);
     writer->write_classID(klass);
@@ -974,7 +974,7 @@
     // super class of array classes is java.lang.Object
     Klass* java_super = klass->java_super();
     assert(java_super != NULL, "checking");
-    writer->write_classID(Klass::cast(java_super));
+    writer->write_classID(java_super);
 
     writer->write_objectID(oop(NULL));    // loader
     writer->write_objectID(oop(NULL));    // signers
@@ -1001,7 +1001,7 @@
   writer->write_u4((u4)array->length());
 
   // array class ID
-  writer->write_classID(Klass::cast(array->klass()));
+  writer->write_classID(array->klass());
 
   // [id]* elements
   for (int index=0; index<array->length(); index++) {
@@ -1117,8 +1117,8 @@
   writer->write_symbolID(m->name());                // method's name
   writer->write_symbolID(m->signature());           // method's signature
 
-  assert(Klass::cast(m->method_holder())->oop_is_instance(), "not InstanceKlass");
-  writer->write_symbolID(InstanceKlass::cast(m->method_holder())->source_file_name());  // source file name
+  assert(m->method_holder()->oop_is_instance(), "not InstanceKlass");
+  writer->write_symbolID(m->method_holder()->source_file_name());  // source file name
   writer->write_u4(class_serial_num);               // class serial number
   writer->write_u4((u4) line_number);               // line number
 }
@@ -1525,7 +1525,7 @@
     writer()->write_u4(++class_serial_num);
 
     // class ID
-    Klass* klass = Klass::cast(k);
+    Klass* klass = k;
     writer()->write_classID(klass);
 
     // add the Klass* and class serial number pair
@@ -1796,7 +1796,7 @@
       // write fake frame that makes it look like the thread, which caused OOME,
       // is in the OutOfMemoryError zero-parameter constructor
       if (thread == _oome_thread && _oome_constructor != NULL) {
-        int oome_serial_num = _klass_map->find(Klass::cast(_oome_constructor->method_holder()));
+        int oome_serial_num = _klass_map->find(_oome_constructor->method_holder());
         // the class serial number starts from 1
         assert(oome_serial_num > 0, "OutOfMemoryError class not found");
         DumperSupport::dump_stack_frame(writer(), ++frame_serial_num, oome_serial_num,
@@ -1806,7 +1806,7 @@
       for (int j=0; j < depth; j++) {
         StackFrameInfo* frame = stack_trace->stack_frame_at(j);
         Method* m = frame->method();
-        int class_serial_num = _klass_map->find(Klass::cast(m->method_holder()));
+        int class_serial_num = _klass_map->find(m->method_holder());
         // the class serial number starts from 1
         assert(class_serial_num > 0, "class not found");
         DumperSupport::dump_stack_frame(writer(), ++frame_serial_num, class_serial_num, m, frame->bci());
@@ -1866,7 +1866,7 @@
     if (error() == NULL) {
       char msg[256];
       sprintf(msg, "Heap dump file created [%s bytes in %3.3f secs]",
-        os::jlong_format_specifier(), timer()->seconds());
+        JLONG_FORMAT, timer()->seconds());
       tty->print_cr(msg, writer.bytes_written());
     } else {
       tty->print_cr("Dump file is incomplete: %s", writer.error());
--- a/src/share/vm/services/lowMemoryDetector.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/services/lowMemoryDetector.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -353,7 +353,7 @@
 
 #ifndef PRODUCT
 void SensorInfo::print() {
-  tty->print_cr("%s count = %ld pending_triggers = %ld pending_clears = %ld",
+  tty->print_cr("%s count = " SIZE_FORMAT " pending_triggers = %ld pending_clears = %ld",
                 (_sensor_on ? "on" : "off"),
                 _sensor_count, _pending_trigger_count, _pending_clear_count);
 }
--- a/src/share/vm/services/management.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/services/management.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1417,7 +1417,7 @@
 
   for (int i = 0; i < num_classes; i++) {
     KlassHandle kh = lce.get_klass(i);
-    oop mirror = Klass::cast(kh())->java_mirror();
+    oop mirror = kh()->java_mirror();
     classes_ah->obj_at_put(i, mirror);
   }
 
--- a/src/share/vm/services/memBaseline.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/services/memBaseline.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -22,7 +22,6 @@
  *
  */
 #include "precompiled.hpp"
-#include "classfile/systemDictionary.hpp"
 #include "memory/allocation.hpp"
 #include "services/memBaseline.hpp"
 #include "services/memTracker.hpp"
@@ -40,6 +39,7 @@
   {mtSymbol,     "Symbol"},
   {mtNMT,        "Memory Tracking"},
   {mtChunk,      "Pooled Free Chunks"},
+  {mtClassShared,"Shared spaces for classes"},
   {mtNone,       "Unknown"}  // It can happen when type tagging records are lagging
                              // behind
 };
@@ -55,6 +55,7 @@
 
   _malloc_cs = NULL;
   _vm_cs = NULL;
+  _vm_map = NULL;
 
   _number_of_classes = 0;
   _number_of_threads = 0;
@@ -72,6 +73,11 @@
     _vm_cs = NULL;
   }
 
+  if (_vm_map != NULL) {
+    delete _vm_map;
+    _vm_map = NULL;
+  }
+
   reset();
 }
 
@@ -85,6 +91,7 @@
 
   if (_malloc_cs != NULL) _malloc_cs->clear();
   if (_vm_cs != NULL) _vm_cs->clear();
+  if (_vm_map != NULL) _vm_map->clear();
 
   for (int index = 0; index < NUMBER_OF_MEMORY_TYPE; index ++) {
     _malloc_data[index].clear();
@@ -94,39 +101,41 @@
 }
 
 MemBaseline::~MemBaseline() {
-  if (_malloc_cs != NULL) {
-    delete _malloc_cs;
-  }
-
-  if (_vm_cs != NULL) {
-    delete _vm_cs;
-  }
+  clear();
 }
 
 // baseline malloc'd memory records, generate overall summary and summaries by
 // memory types
 bool MemBaseline::baseline_malloc_summary(const MemPointerArray* malloc_records) {
-  MemPointerArrayIteratorImpl mItr((MemPointerArray*)malloc_records);
-  MemPointerRecord* mptr = (MemPointerRecord*)mItr.current();
+  MemPointerArrayIteratorImpl malloc_itr((MemPointerArray*)malloc_records);
+  MemPointerRecord* malloc_ptr = (MemPointerRecord*)malloc_itr.current();
   size_t used_arena_size = 0;
   int index;
-  while (mptr != NULL) {
-    index = flag2index(FLAGS_TO_MEMORY_TYPE(mptr->flags()));
-    size_t size = mptr->size();
-    _total_malloced += size;
-    _malloc_data[index].inc(size);
-    if (MemPointerRecord::is_arena_record(mptr->flags())) {
-      // see if arena size record present
-      MemPointerRecord* next_p = (MemPointerRecordEx*)mItr.peek_next();
-      if (MemPointerRecord::is_arena_size_record(next_p->flags())) {
-        assert(next_p->is_size_record_of_arena(mptr), "arena records do not match");
-        size = next_p->size();
-        _arena_data[index].inc(size);
-        used_arena_size += size;
-        mItr.next();
+  while (malloc_ptr != NULL) {
+    index = flag2index(FLAGS_TO_MEMORY_TYPE(malloc_ptr->flags()));
+    size_t size = malloc_ptr->size();
+    if (malloc_ptr->is_arena_memory_record()) {
+      // We do have anonymous arenas, they are either used as value objects,
+      // which are embedded inside other objects, or used as stack objects.
+      _arena_data[index].inc(size);
+      used_arena_size += size;
+    } else {
+      _total_malloced += size;
+      _malloc_data[index].inc(size);
+      if (malloc_ptr->is_arena_record()) {
+        // see if arena memory record present
+        MemPointerRecord* next_malloc_ptr = (MemPointerRecordEx*)malloc_itr.peek_next();
+        if (next_malloc_ptr->is_arena_memory_record()) {
+          assert(next_malloc_ptr->is_memory_record_of_arena(malloc_ptr),
+             "Arena records do not match");
+          size = next_malloc_ptr->size();
+          _arena_data[index].inc(size);
+          used_arena_size += size;
+          malloc_itr.next();
+        }
       }
     }
-    mptr = (MemPointerRecordEx*)mItr.next();
+    malloc_ptr = (MemPointerRecordEx*)malloc_itr.next();
   }
 
   // substract used arena size to get size of arena chunk in free list
@@ -142,20 +151,23 @@
 // baseline mmap'd memory records, generate overall summary and summaries by
 // memory types
 bool MemBaseline::baseline_vm_summary(const MemPointerArray* vm_records) {
-  MemPointerArrayIteratorImpl vItr((MemPointerArray*)vm_records);
-  VMMemRegion* vptr = (VMMemRegion*)vItr.current();
+  MemPointerArrayIteratorImpl vm_itr((MemPointerArray*)vm_records);
+  VMMemRegion* vm_ptr = (VMMemRegion*)vm_itr.current();
   int index;
-  while (vptr != NULL) {
-    index = flag2index(FLAGS_TO_MEMORY_TYPE(vptr->flags()));
-
+  while (vm_ptr != NULL) {
+    if (vm_ptr->is_reserved_region()) {
+      index = flag2index(FLAGS_TO_MEMORY_TYPE(vm_ptr->flags()));
     // we use the number of thread stack to count threads
-    if (IS_MEMORY_TYPE(vptr->flags(), mtThreadStack)) {
+      if (IS_MEMORY_TYPE(vm_ptr->flags(), mtThreadStack)) {
       _number_of_threads ++;
     }
-    _total_vm_reserved += vptr->reserved_size();
-    _total_vm_committed += vptr->committed_size();
-    _vm_data[index].inc(vptr->reserved_size(), vptr->committed_size());
-    vptr = (VMMemRegion*)vItr.next();
+      _total_vm_reserved += vm_ptr->size();
+      _vm_data[index].inc(vm_ptr->size(), 0);
+    } else {
+      _total_vm_committed += vm_ptr->size();
+      _vm_data[index].inc(0, vm_ptr->size());
+    }
+    vm_ptr = (VMMemRegion*)vm_itr.next();
   }
   return true;
 }
@@ -165,41 +177,57 @@
 bool MemBaseline::baseline_malloc_details(const MemPointerArray* malloc_records) {
   assert(MemTracker::track_callsite(), "detail tracking is off");
 
-  MemPointerArrayIteratorImpl mItr((MemPointerArray*)malloc_records);
-  MemPointerRecordEx* mptr = (MemPointerRecordEx*)mItr.current();
-  MallocCallsitePointer mp;
+  MemPointerArrayIteratorImpl malloc_itr(const_cast<MemPointerArray*>(malloc_records));
+  MemPointerRecordEx* malloc_ptr = (MemPointerRecordEx*)malloc_itr.current();
+  MallocCallsitePointer malloc_callsite;
 
+  // initailize malloc callsite array
   if (_malloc_cs == NULL) {
     _malloc_cs = new (std::nothrow) MemPointerArrayImpl<MallocCallsitePointer>(64);
     // out of native memory
-    if (_malloc_cs == NULL) {
+    if (_malloc_cs == NULL || _malloc_cs->out_of_memory()) {
       return false;
     }
   } else {
     _malloc_cs->clear();
   }
 
+  MemPointerArray* malloc_data = const_cast<MemPointerArray*>(malloc_records);
+
+  // sort into callsite pc order. Details are aggregated by callsites
+  malloc_data->sort((FN_SORT)malloc_sort_by_pc);
+  bool ret = true;
+
   // baseline memory that is totaled over 1 KB
-  while (mptr != NULL) {
-    if (!MemPointerRecord::is_arena_size_record(mptr->flags())) {
+  while (malloc_ptr != NULL) {
+    if (!MemPointerRecord::is_arena_memory_record(malloc_ptr->flags())) {
       // skip thread stacks
-      if (!IS_MEMORY_TYPE(mptr->flags(), mtThreadStack)) {
-        if (mp.addr() != mptr->pc()) {
-          if ((mp.amount()/K) > 0) {
-            if (!_malloc_cs->append(&mp)) {
-              return false;
+      if (!IS_MEMORY_TYPE(malloc_ptr->flags(), mtThreadStack)) {
+        if (malloc_callsite.addr() != malloc_ptr->pc()) {
+          if ((malloc_callsite.amount()/K) > 0) {
+            if (!_malloc_cs->append(&malloc_callsite)) {
+              ret = false;
+              break;
             }
           }
-          mp = MallocCallsitePointer(mptr->pc());
+          malloc_callsite = MallocCallsitePointer(malloc_ptr->pc());
         }
-        mp.inc(mptr->size());
+        malloc_callsite.inc(malloc_ptr->size());
       }
     }
-    mptr = (MemPointerRecordEx*)mItr.next();
+    malloc_ptr = (MemPointerRecordEx*)malloc_itr.next();
   }
 
-  if (mp.addr() != 0 && (mp.amount()/K) > 0) {
-    if (!_malloc_cs->append(&mp)) {
+  // restore to address order. Snapshot malloc data is maintained in memory
+  // address order.
+  malloc_data->sort((FN_SORT)malloc_sort_by_addr);
+
+  if (!ret) {
+              return false;
+            }
+  // deal with last record
+  if (malloc_callsite.addr() != 0 && (malloc_callsite.amount()/K) > 0) {
+    if (!_malloc_cs->append(&malloc_callsite)) {
       return false;
     }
   }
@@ -210,34 +238,106 @@
 bool MemBaseline::baseline_vm_details(const MemPointerArray* vm_records) {
   assert(MemTracker::track_callsite(), "detail tracking is off");
 
-  VMCallsitePointer vp;
-  MemPointerArrayIteratorImpl vItr((MemPointerArray*)vm_records);
-  VMMemRegionEx* vptr = (VMMemRegionEx*)vItr.current();
+  VMCallsitePointer  vm_callsite;
+  VMCallsitePointer* cur_callsite = NULL;
+  MemPointerArrayIteratorImpl vm_itr((MemPointerArray*)vm_records);
+  VMMemRegionEx* vm_ptr = (VMMemRegionEx*)vm_itr.current();
 
+  // initialize virtual memory map array
+  if (_vm_map == NULL) {
+    _vm_map = new (std::nothrow) MemPointerArrayImpl<VMMemRegionEx>(vm_records->length());
+   if (_vm_map == NULL || _vm_map->out_of_memory()) {
+     return false;
+   }
+  } else {
+    _vm_map->clear();
+  }
+
+  // initialize virtual memory callsite array
   if (_vm_cs == NULL) {
     _vm_cs = new (std::nothrow) MemPointerArrayImpl<VMCallsitePointer>(64);
-    if (_vm_cs == NULL) {
+    if (_vm_cs == NULL || _vm_cs->out_of_memory()) {
       return false;
     }
   } else {
     _vm_cs->clear();
   }
 
-  while (vptr != NULL) {
-    if (vp.addr() != vptr->pc()) {
-      if (!_vm_cs->append(&vp)) {
+  // consolidate virtual memory data
+  VMMemRegionEx*     reserved_rec = NULL;
+  VMMemRegionEx*     committed_rec = NULL;
+
+  // vm_ptr is coming in increasing base address order
+  while (vm_ptr != NULL) {
+    if (vm_ptr->is_reserved_region()) {
+      // consolidate reserved memory regions for virtual memory map.
+      // The criteria for consolidation is:
+      // 1. two adjacent reserved memory regions
+      // 2. belong to the same memory type
+      // 3. reserved from the same callsite
+      if (reserved_rec == NULL ||
+        reserved_rec->base() + reserved_rec->size() != vm_ptr->addr() ||
+        FLAGS_TO_MEMORY_TYPE(reserved_rec->flags()) != FLAGS_TO_MEMORY_TYPE(vm_ptr->flags()) ||
+        reserved_rec->pc() != vm_ptr->pc()) {
+        if (!_vm_map->append(vm_ptr)) {
         return false;
       }
-      vp = VMCallsitePointer(vptr->pc());
+        // inserted reserved region, we need the pointer to the element in virtual
+        // memory map array.
+        reserved_rec = (VMMemRegionEx*)_vm_map->at(_vm_map->length() - 1);
+      } else {
+        reserved_rec->expand_region(vm_ptr->addr(), vm_ptr->size());
     }
-    vp.inc(vptr->size(), vptr->committed_size());
-    vptr = (VMMemRegionEx*)vItr.next();
-  }
-  if (vp.addr() != 0) {
-    if (!_vm_cs->append(&vp)) {
+
+      if (cur_callsite != NULL && !_vm_cs->append(cur_callsite)) {
       return false;
     }
+      vm_callsite = VMCallsitePointer(vm_ptr->pc());
+      cur_callsite = &vm_callsite;
+      vm_callsite.inc(vm_ptr->size(), 0);
+    } else {
+      // consolidate committed memory regions for virtual memory map
+      // The criterial is:
+      // 1. two adjacent committed memory regions
+      // 2. committed from the same callsite
+      if (committed_rec == NULL ||
+        committed_rec->base() + committed_rec->size() != vm_ptr->addr() ||
+        committed_rec->pc() != vm_ptr->pc()) {
+        if (!_vm_map->append(vm_ptr)) {
+          return false;
   }
+        committed_rec = (VMMemRegionEx*)_vm_map->at(_vm_map->length() - 1);
+    } else {
+        committed_rec->expand_region(vm_ptr->addr(), vm_ptr->size());
+      }
+      vm_callsite.inc(0, vm_ptr->size());
+    }
+    vm_ptr = (VMMemRegionEx*)vm_itr.next();
+  }
+  // deal with last record
+  if (cur_callsite != NULL && !_vm_cs->append(cur_callsite)) {
+    return false;
+  }
+
+  // sort it into callsite pc order. Details are aggregated by callsites
+  _vm_cs->sort((FN_SORT)bl_vm_sort_by_pc);
+
+  // walk the array to consolidate record by pc
+  MemPointerArrayIteratorImpl itr(_vm_cs);
+  VMCallsitePointer* callsite_rec = (VMCallsitePointer*)itr.current();
+  VMCallsitePointer* next_rec = (VMCallsitePointer*)itr.next();
+  while (next_rec != NULL) {
+    assert(callsite_rec != NULL, "Sanity check");
+    if (next_rec->addr() == callsite_rec->addr()) {
+      callsite_rec->inc(next_rec->reserved_amount(), next_rec->committed_amount());
+      itr.remove();
+      next_rec = (VMCallsitePointer*)itr.current();
+    } else {
+      callsite_rec = next_rec;
+      next_rec = (VMCallsitePointer*)itr.next();
+    }
+  }
+
   return true;
 }
 
@@ -248,15 +348,11 @@
   reset();
   _baselined = baseline_malloc_summary(snapshot._alloc_ptrs) &&
                baseline_vm_summary(snapshot._vm_ptrs);
-  _number_of_classes = SystemDictionary::number_of_classes();
+  _number_of_classes = snapshot.number_of_classes();
 
   if (!summary_only && MemTracker::track_callsite() && _baselined) {
-    ((MemPointerArray*)snapshot._alloc_ptrs)->sort((FN_SORT)malloc_sort_by_pc);
-    ((MemPointerArray*)snapshot._vm_ptrs)->sort((FN_SORT)vm_sort_by_pc);
     _baselined =  baseline_malloc_details(snapshot._alloc_ptrs) &&
       baseline_vm_details(snapshot._vm_ptrs);
-    ((MemPointerArray*)snapshot._alloc_ptrs)->sort((FN_SORT)malloc_sort_by_addr);
-    ((MemPointerArray*)snapshot._vm_ptrs)->sort((FN_SORT)vm_sort_by_addr);
   }
   return _baselined;
 }
@@ -278,7 +374,7 @@
       return MemType2NameMap[index]._name;
     }
   }
-  assert(false, "no type");
+  assert(false, err_msg("bad type %x", type));
   return NULL;
 }
 
@@ -341,13 +437,6 @@
   return UNSIGNED_COMPARE(mp1->addr(), mp2->addr());
 }
 
-// sort snapshot mmap'd records in callsite pc order
-int MemBaseline::vm_sort_by_pc(const void* p1, const void* p2) {
-  assert(MemTracker::track_callsite(),"Just check");
-  const VMMemRegionEx* mp1 = (const VMMemRegionEx*)p1;
-  const VMMemRegionEx* mp2 = (const VMMemRegionEx*)p2;
-  return UNSIGNED_COMPARE(mp1->pc(), mp2->pc());
-}
 
 // sort baselined mmap'd records in size (reserved size) order
 int MemBaseline::bl_vm_sort_by_size(const void* p1, const void* p2) {
@@ -376,12 +465,3 @@
   return delta;
 }
 
-// sort snapshot mmap'd records in memory block address order
-int MemBaseline::vm_sort_by_addr(const void* p1, const void* p2) {
-  assert(MemTracker::is_on(), "Just check");
-  const VMMemRegion* mp1 = (const VMMemRegion*)p1;
-  const VMMemRegion* mp2 = (const VMMemRegion*)p2;
-  int delta = UNSIGNED_COMPARE(mp1->addr(), mp2->addr());
-  assert(delta != 0, "dup pointer");
-  return delta;
-}
--- a/src/share/vm/services/memBaseline.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/services/memBaseline.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -320,6 +320,8 @@
   // only available when detail tracking is on.
   MemPointerArray*  _malloc_cs;
   MemPointerArray*  _vm_cs;
+  // virtual memory map
+  MemPointerArray*  _vm_map;
 
  private:
   static MemType2Name  MemType2NameMap[NUMBER_OF_MEMORY_TYPE];
@@ -332,7 +334,7 @@
   // create a memory baseline
   MemBaseline();
 
-  virtual ~MemBaseline();
+  ~MemBaseline();
 
   inline bool baselined() const {
     return _baselined;
@@ -432,9 +434,6 @@
   static int malloc_sort_by_pc(const void* p1, const void* p2);
   static int malloc_sort_by_addr(const void* p1, const void* p2);
 
-  static int vm_sort_by_pc(const void* p1, const void* p2);
-  static int vm_sort_by_addr(const void* p1, const void* p2);
-
  private:
   // sorting functions for baselined records
   static int bl_malloc_sort_by_size(const void* p1, const void* p2);
--- a/src/share/vm/services/memPtr.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/services/memPtr.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -40,35 +40,3 @@
   return seq;
 }
 
-
-
-bool VMMemRegion::contains(const VMMemRegion* mr) const {
-  assert(base() != 0, "Sanity check");
-  assert(size() != 0 || committed_size() != 0,
-    "Sanity check");
-  address base_addr = base();
-  address end_addr = base_addr +
-    (is_reserve_record()? reserved_size(): committed_size());
-  if (mr->is_reserve_record()) {
-    if (mr->base() == base_addr && mr->size() == size()) {
-      // the same range
-      return true;
-    }
-    return false;
-  } else if (mr->is_commit_record() || mr->is_uncommit_record()) {
-    assert(mr->base() != 0 && mr->committed_size() > 0,
-      "bad record");
-    return (mr->base() >= base_addr &&
-      (mr->base() + mr->committed_size()) <= end_addr);
-  } else if (mr->is_type_tagging_record()) {
-    assert(mr->base() != NULL, "Sanity check");
-    return (mr->base() >= base_addr && mr->base() < end_addr);
-  } else if (mr->is_release_record()) {
-    assert(mr->base() != 0 && mr->size() > 0,
-      "bad record");
-    return (mr->base() == base_addr && mr->size() == size());
-  } else {
-    ShouldNotReachHere();
-    return false;
-  }
-}
--- a/src/share/vm/services/memPtr.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/services/memPtr.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -165,7 +165,7 @@
     return (flags & (otArena | tag_size)) == otArena;
   }
 
-  inline static bool is_arena_size_record(MEMFLAGS flags) {
+  inline static bool is_arena_memory_record(MEMFLAGS flags) {
     return (flags & (otArena | tag_size)) == (otArena | tag_size);
   }
 
@@ -256,8 +256,8 @@
   }
 
   // if this record records a size information of an arena
-  inline bool is_arena_size_record() const {
-    return is_arena_size_record(_flags);
+  inline bool is_arena_memory_record() const {
+    return is_arena_memory_record(_flags);
   }
 
   // if this pointer represents an address to an arena object
@@ -266,8 +266,8 @@
   }
 
   // if this record represents a size information of specific arena
-  inline bool is_size_record_of_arena(const MemPointerRecord* arena_rc) {
-    assert(is_arena_size_record(), "not size record");
+  inline bool is_memory_record_of_arena(const MemPointerRecord* arena_rc) {
+    assert(is_arena_memory_record(), "not size record");
     assert(arena_rc->is_arena_record(), "not arena record");
     return (arena_rc->addr() + sizeof(void*)) == addr();
   }
@@ -291,6 +291,37 @@
   inline bool is_type_tagging_record() const {
     return is_virtual_memory_type_record(_flags);
   }
+
+  // if the two memory pointer records actually represent the same
+  // memory block
+  inline bool is_same_region(const MemPointerRecord* other) const {
+    return (addr() == other->addr() && size() == other->size());
+  }
+
+  // if this memory region fully contains another one
+  inline bool contains_region(const MemPointerRecord* other) const {
+    return contains_region(other->addr(), other->size());
+  }
+
+  // if this memory region fully contains specified memory range
+  inline bool contains_region(address add, size_t sz) const {
+    return (addr() <= add && addr() + size() >= add + sz);
+  }
+
+  inline bool contains_address(address add) const {
+    return (addr() <= add && addr() + size() > add);
+  }
+
+  // if this memory region overlaps another region
+  inline bool overlaps_region(const MemPointerRecord* other) const {
+    assert(other != NULL, "Just check");
+    assert(size() > 0 && other->size() > 0, "empty range");
+    return contains_address(other->addr()) ||
+           contains_address(other->addr() + other->size() - 1) || // exclude end address
+           other->contains_address(addr()) ||
+           other->contains_address(addr() + size() - 1); // exclude end address
+  }
+
 };
 
 // MemPointerRecordEx also records callsite pc, from where
@@ -321,66 +352,32 @@
   }
 };
 
-// a virtual memory region
+// a virtual memory region. The region can represent a reserved
+// virtual memory region or a committed memory region
 class VMMemRegion : public MemPointerRecord {
- private:
-  // committed size
-  size_t       _committed_size;
-
 public:
-  VMMemRegion(): _committed_size(0) { }
+  VMMemRegion() { }
 
   void init(const MemPointerRecord* mp) {
-    assert(mp->is_vm_pointer(), "not virtual memory pointer");
+    assert(mp->is_vm_pointer(), "Sanity check");
     _addr = mp->addr();
-    if (mp->is_commit_record() || mp->is_uncommit_record()) {
-      _committed_size = mp->size();
-      set_size(_committed_size);
-    } else {
       set_size(mp->size());
-      _committed_size = 0;
-    }
     set_flags(mp->flags());
   }
 
   VMMemRegion& operator=(const VMMemRegion& other) {
     MemPointerRecord::operator=(other);
-    _committed_size = other.committed_size();
     return *this;
   }
 
-  inline bool is_reserve_record() const {
-    return is_virtual_memory_reserve_record(flags());
-  }
-
-  inline bool is_release_record() const {
-    return is_virtual_memory_release_record(flags());
-  }
-
-  // resize reserved VM range
-  inline void set_reserved_size(size_t new_size) {
-    assert(new_size >= committed_size(), "resize");
-    set_size(new_size);
+  inline bool is_reserved_region() const {
+    return is_allocation_record();
   }
 
-  inline void commit(size_t size) {
-    _committed_size += size;
+  inline bool is_committed_region() const {
+    return is_commit_record();
   }
 
-  inline void uncommit(size_t size) {
-    if (_committed_size >= size) {
-      _committed_size -= size;
-    } else {
-      _committed_size = 0;
-    }
-  }
-
-  /*
-   * if this virtual memory range covers whole range of
-   * the other VMMemRegion
-   */
-  bool contains(const VMMemRegion* mr) const;
-
   /* base address of this virtual memory range */
   inline address base() const {
     return addr();
@@ -391,13 +388,28 @@
     set_flags(flags() | (f & mt_masks));
   }
 
-  // release part of memory range
-  inline void partial_release(address add, size_t sz) {
-    assert(add >= addr() && add < addr() + size(), "not valid address");
-    // for now, it can partially release from the both ends,
-    // but not in the middle
+  // expand this region to also cover specified range.
+  // The range has to be on either end of the memory region.
+  void expand_region(address addr, size_t sz) {
+    if (addr < base()) {
+      assert(addr + sz == base(), "Sanity check");
+      _addr = addr;
+      set_size(size() + sz);
+    } else {
+      assert(base() + size() == addr, "Sanity check");
+      set_size(size() + sz);
+    }
+  }
+
+  // exclude the specified address range from this region.
+  // The excluded memory range has to be on either end of this memory
+  // region.
+  inline void exclude_region(address add, size_t sz) {
+    assert(is_reserved_region() || is_committed_region(), "Sanity check");
+    assert(addr() != NULL && size() != 0, "Sanity check");
+    assert(add >= addr() && add < addr() + size(), "Sanity check");
     assert(add == addr() || (add + sz) == (addr() + size()),
-      "release in the middle");
+      "exclude in the middle");
     if (add == addr()) {
       set_addr(add + sz);
       set_size(size() - sz);
@@ -405,16 +417,6 @@
       set_size(size() - sz);
     }
   }
-
-  // the committed size of the virtual memory block
-  inline size_t committed_size() const {
-    return _committed_size;
-  }
-
-  // the reserved size of the virtual memory block
-  inline size_t reserved_size() const {
-    return size();
-  }
 };
 
 class VMMemRegionEx : public VMMemRegion {
--- a/src/share/vm/services/memRecorder.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/services/memRecorder.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -31,14 +31,19 @@
 #include "services/memTracker.hpp"
 
 MemPointer* SequencedRecordIterator::next_record() {
-  MemPointer* itr_cur = _itr.current();
-  if (itr_cur == NULL) return NULL;
-  MemPointer* itr_next = _itr.next();
+  MemPointerRecord* itr_cur = (MemPointerRecord*)_itr.current();
+  if (itr_cur == NULL)  {
+    return itr_cur;
+  }
+
+  MemPointerRecord* itr_next = (MemPointerRecord*)_itr.next();
 
-  while (itr_next != NULL &&
-    same_kind((MemPointerRecord*)itr_cur, (MemPointerRecord*)itr_next)) {
+  // don't collapse virtual memory records
+  while (itr_next != NULL && !itr_cur->is_vm_pointer() &&
+    !itr_next->is_vm_pointer() &&
+    same_kind(itr_cur, itr_next)) {
     itr_cur = itr_next;
-    itr_next = _itr.next();
+    itr_next = (MemPointerRecord*)_itr.next();
   }
 
   return itr_cur;
@@ -79,10 +84,13 @@
     }
     delete _pointer_records;
   }
-  if (_next != NULL) {
-    delete _next;
+  // delete all linked recorders
+  while (_next != NULL) {
+    MemRecorder* tmp = _next;
+    _next = _next->next();
+    tmp->set_next(NULL);
+    delete tmp;
   }
-
   Atomic::dec(&_instance_count);
 }
 
--- a/src/share/vm/services/memRecorder.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/services/memRecorder.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -188,6 +188,7 @@
   // Test if the two records are the same kind: the same memory block and allocation
   // type.
   inline bool same_kind(const MemPointerRecord* p1, const MemPointerRecord* p2) const {
+    assert(!p1->is_vm_pointer() && !p2->is_vm_pointer(), "malloc pointer only");
     return (p1->addr() == p2->addr() &&
       (p1->flags() &MemPointerRecord::tag_masks) ==
       (p2->flags() & MemPointerRecord::tag_masks));
@@ -202,6 +203,7 @@
   friend class MemSnapshot;
   friend class MemTracker;
   friend class MemTrackWorker;
+  friend class GenerationData;
 
  protected:
   // the array that holds memory records
--- a/src/share/vm/services/memReporter.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/services/memReporter.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -51,6 +51,7 @@
 
   report_summaries(baseline);
   if (!summary_only && MemTracker::track_callsite()) {
+    report_virtual_memory_map(baseline);
     report_callsites(baseline);
   }
   _outputer.done();
@@ -74,6 +75,25 @@
   _outputer.done_category_summary();
 }
 
+void BaselineReporter::report_virtual_memory_map(const MemBaseline& baseline) {
+  _outputer.start_virtual_memory_map();
+  MemBaseline* pBL = const_cast<MemBaseline*>(&baseline);
+  MemPointerArrayIteratorImpl itr = MemPointerArrayIteratorImpl(pBL->_vm_map);
+  VMMemRegionEx* rgn = (VMMemRegionEx*)itr.current();
+  while (rgn != NULL) {
+    if (rgn->is_reserved_region()) {
+      _outputer.reserved_memory_region(FLAGS_TO_MEMORY_TYPE(rgn->flags()),
+        rgn->base(), rgn->base() + rgn->size(), amount_in_current_scale(rgn->size()), rgn->pc());
+    } else {
+      _outputer.committed_memory_region(rgn->base(), rgn->base() + rgn->size(),
+        amount_in_current_scale(rgn->size()), rgn->pc());
+    }
+    rgn = (VMMemRegionEx*)itr.next();
+  }
+
+  _outputer.done_virtual_memory_map();
+}
+
 void BaselineReporter::report_callsites(const MemBaseline& baseline) {
   _outputer.start_callsite();
   MemBaseline* pBL = const_cast<MemBaseline*>(&baseline);
@@ -324,6 +344,40 @@
   _output->print_cr(" ");
 }
 
+
+void BaselineTTYOutputer::start_virtual_memory_map() {
+  _output->print_cr("Virtual memory map:");
+}
+
+void BaselineTTYOutputer::reserved_memory_region(MEMFLAGS type, address base, address end,
+                                                 size_t size, address pc) {
+  const char* unit = memory_unit(_scale);
+  char buf[128];
+  int  offset;
+  _output->print_cr(" ");
+  _output->print_cr("[" PTR_FORMAT " - " PTR_FORMAT "] reserved %d%s for %s", base, end, size, unit,
+            MemBaseline::type2name(type));
+  if (os::dll_address_to_function_name(pc, buf, sizeof(buf), &offset)) {
+      _output->print_cr("\t\tfrom [%s+0x%x]", buf, offset);
+  }
+}
+
+void BaselineTTYOutputer::committed_memory_region(address base, address end, size_t size, address pc) {
+  const char* unit = memory_unit(_scale);
+  char buf[128];
+  int  offset;
+  _output->print("\t[" PTR_FORMAT " - " PTR_FORMAT "] committed %d%s", base, end, size, unit);
+  if (os::dll_address_to_function_name(pc, buf, sizeof(buf), &offset)) {
+      _output->print_cr(" from [%s+0x%x]", buf, offset);
+  }
+}
+
+void BaselineTTYOutputer::done_virtual_memory_map() {
+  _output->print_cr(" ");
+}
+
+
+
 void BaselineTTYOutputer::start_callsite() {
   _output->print_cr("Details:");
   _output->print_cr(" ");
@@ -337,7 +391,7 @@
   size_t malloc_count) {
   if (malloc_amt > 0) {
     const char* unit = memory_unit(_scale);
-    char buf[64];
+    char buf[128];
     int  offset;
     if (pc == 0) {
       _output->print("[BOOTSTRAP]%18s", " ");
@@ -357,7 +411,7 @@
   size_t committed_amt) {
   if (reserved_amt > 0) {
     const char* unit = memory_unit(_scale);
-    char buf[64];
+    char buf[128];
     int  offset;
     if (pc == 0) {
       _output->print("[BOOTSTRAP]%18s", " ");
@@ -502,7 +556,7 @@
     int malloc_diff, int malloc_count_diff) {
   if (malloc_diff != 0) {
     const char* unit = memory_unit(_scale);
-    char buf[64];
+    char buf[128];
     int  offset;
     if (pc == 0) {
       _output->print_cr("[BOOTSTRAP]%18s", " ");
--- a/src/share/vm/services/memReporter.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/services/memReporter.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -93,6 +93,11 @@
 
   virtual void done_category_summary() = 0;
 
+  virtual void start_virtual_memory_map() = 0;
+  virtual void reserved_memory_region(MEMFLAGS type, address base, address end, size_t size, address pc) = 0;
+  virtual void committed_memory_region(address base, address end, size_t size, address pc) = 0;
+  virtual void done_virtual_memory_map() = 0;
+
   /*
    *  Report callsite information
    */
@@ -136,6 +141,7 @@
 
  private:
   void report_summaries(const MemBaseline& baseline);
+  void report_virtual_memory_map(const MemBaseline& baseline);
   void report_callsites(const MemBaseline& baseline);
 
   void diff_summaries(const MemBaseline& cur, const MemBaseline& prev);
@@ -251,6 +257,13 @@
 
   void done_category_summary();
 
+  // virtual memory map
+  void start_virtual_memory_map();
+  void reserved_memory_region(MEMFLAGS type, address base, address end, size_t size, address pc);
+  void committed_memory_region(address base, address end, size_t size, address pc);
+  void done_virtual_memory_map();
+
+
   /*
    *  Report callsite information
    */
--- a/src/share/vm/services/memSnapshot.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/services/memSnapshot.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -31,6 +31,309 @@
 #include "services/memSnapshot.hpp"
 #include "services/memTracker.hpp"
 
+#ifdef ASSERT
+
+void decode_pointer_record(MemPointerRecord* rec) {
+  tty->print("Pointer: [" PTR_FORMAT " - " PTR_FORMAT  "] size = %d bytes", rec->addr(),
+    rec->addr() + rec->size(), (int)rec->size());
+  tty->print(" type = %s", MemBaseline::type2name(FLAGS_TO_MEMORY_TYPE(rec->flags())));
+  if (rec->is_vm_pointer()) {
+    if (rec->is_allocation_record()) {
+      tty->print_cr(" (reserve)");
+    } else if (rec->is_commit_record()) {
+      tty->print_cr(" (commit)");
+    } else if (rec->is_uncommit_record()) {
+      tty->print_cr(" (uncommit)");
+    } else if (rec->is_deallocation_record()) {
+      tty->print_cr(" (release)");
+    } else {
+      tty->print_cr(" (tag)");
+    }
+  } else {
+    if (rec->is_arena_memory_record()) {
+      tty->print_cr(" (arena size)");
+    } else if (rec->is_allocation_record()) {
+      tty->print_cr(" (malloc)");
+    } else {
+      tty->print_cr(" (free)");
+    }
+  }
+  if (MemTracker::track_callsite()) {
+    char buf[1024];
+    address pc = ((MemPointerRecordEx*)rec)->pc();
+    if (pc != NULL && os::dll_address_to_function_name(pc, buf, sizeof(buf), NULL)) {
+      tty->print_cr("\tfrom %s", buf);
+    } else {
+      tty->print_cr("\tcould not decode pc = " PTR_FORMAT "", pc);
+    }
+  }
+}
+
+void decode_vm_region_record(VMMemRegion* rec) {
+  tty->print("VM Region [" PTR_FORMAT " - " PTR_FORMAT "]", rec->addr(),
+    rec->addr() + rec->size());
+  tty->print(" type = %s", MemBaseline::type2name(FLAGS_TO_MEMORY_TYPE(rec->flags())));
+  if (rec->is_allocation_record()) {
+    tty->print_cr(" (reserved)");
+  } else if (rec->is_commit_record()) {
+    tty->print_cr(" (committed)");
+  } else {
+    ShouldNotReachHere();
+  }
+  if (MemTracker::track_callsite()) {
+    char buf[1024];
+    address pc = ((VMMemRegionEx*)rec)->pc();
+    if (pc != NULL && os::dll_address_to_function_name(pc, buf, sizeof(buf), NULL)) {
+      tty->print_cr("\tfrom %s", buf);
+    } else {
+      tty->print_cr("\tcould not decode pc = " PTR_FORMAT "", pc);
+    }
+
+  }
+}
+
+#endif
+
+
+bool VMMemPointerIterator::insert_record(MemPointerRecord* rec) {
+  VMMemRegionEx new_rec;
+  assert(rec->is_allocation_record() || rec->is_commit_record(),
+    "Sanity check");
+  if (MemTracker::track_callsite()) {
+    new_rec.init((MemPointerRecordEx*)rec);
+  } else {
+    new_rec.init(rec);
+  }
+  return insert(&new_rec);
+}
+
+bool VMMemPointerIterator::insert_record_after(MemPointerRecord* rec) {
+  VMMemRegionEx new_rec;
+  assert(rec->is_allocation_record() || rec->is_commit_record(),
+    "Sanity check");
+  if (MemTracker::track_callsite()) {
+    new_rec.init((MemPointerRecordEx*)rec);
+  } else {
+    new_rec.init(rec);
+  }
+  return insert_after(&new_rec);
+}
+
+// we don't consolidate reserved regions, since they may be categorized
+// in different types.
+bool VMMemPointerIterator::add_reserved_region(MemPointerRecord* rec) {
+  assert(rec->is_allocation_record(), "Sanity check");
+  VMMemRegion* reserved_region = (VMMemRegion*)current();
+
+  // we don't have anything yet
+  if (reserved_region == NULL) {
+    return insert_record(rec);
+  }
+
+  assert(reserved_region->is_reserved_region(), "Sanity check");
+  // duplicated records
+  if (reserved_region->is_same_region(rec)) {
+    return true;
+  }
+  // Overlapping stack regions indicate that a JNI thread failed to
+  // detach from the VM before exiting. This leaks the JavaThread object.
+  if (CheckJNICalls)  {
+      guarantee(FLAGS_TO_MEMORY_TYPE(reserved_region->flags()) != mtThreadStack ||
+         !reserved_region->overlaps_region(rec),
+         "Attached JNI thread exited without being detached");
+  }
+  // otherwise, we should not have overlapping reserved regions
+  assert(FLAGS_TO_MEMORY_TYPE(reserved_region->flags()) == mtThreadStack ||
+    reserved_region->base() > rec->addr(), "Just check: locate()");
+  assert(FLAGS_TO_MEMORY_TYPE(reserved_region->flags()) == mtThreadStack ||
+    !reserved_region->overlaps_region(rec), "overlapping reserved regions");
+
+  return insert_record(rec);
+}
+
+// we do consolidate committed regions
+bool VMMemPointerIterator::add_committed_region(MemPointerRecord* rec) {
+  assert(rec->is_commit_record(), "Sanity check");
+  VMMemRegion* reserved_rgn = (VMMemRegion*)current();
+  assert(reserved_rgn->is_reserved_region() && reserved_rgn->contains_region(rec),
+    "Sanity check");
+
+  // thread's native stack is always marked as "committed", ignore
+  // the "commit" operation for creating stack guard pages
+  if (FLAGS_TO_MEMORY_TYPE(reserved_rgn->flags()) == mtThreadStack &&
+      FLAGS_TO_MEMORY_TYPE(rec->flags()) != mtThreadStack) {
+    return true;
+  }
+
+  // if the reserved region has any committed regions
+  VMMemRegion* committed_rgn  = (VMMemRegion*)next();
+  while (committed_rgn != NULL && committed_rgn->is_committed_region()) {
+    // duplicated commit records
+    if(committed_rgn->contains_region(rec)) {
+      return true;
+    } else if (committed_rgn->overlaps_region(rec)) {
+      // overlaps front part
+      if (rec->addr() < committed_rgn->addr()) {
+        committed_rgn->expand_region(rec->addr(),
+          committed_rgn->addr() - rec->addr());
+      } else {
+        // overlaps tail part
+        address committed_rgn_end = committed_rgn->addr() +
+              committed_rgn->size();
+        assert(committed_rgn_end < rec->addr() + rec->size(),
+             "overlap tail part");
+        committed_rgn->expand_region(committed_rgn_end,
+          (rec->addr() + rec->size()) - committed_rgn_end);
+      }
+    } else if (committed_rgn->base() + committed_rgn->size() == rec->addr()) {
+      // adjunct each other
+      committed_rgn->expand_region(rec->addr(), rec->size());
+      VMMemRegion* next_reg = (VMMemRegion*)next();
+      // see if we can consolidate next committed region
+      if (next_reg != NULL && next_reg->is_committed_region() &&
+        next_reg->base() == committed_rgn->base() + committed_rgn->size()) {
+          committed_rgn->expand_region(next_reg->base(), next_reg->size());
+          // delete merged region
+          remove();
+      }
+      return true;
+    } else if (committed_rgn->base() > rec->addr()) {
+      // found the location, insert this committed region
+      return insert_record(rec);
+    }
+    committed_rgn = (VMMemRegion*)next();
+  }
+  return insert_record(rec);
+}
+
+bool VMMemPointerIterator::remove_uncommitted_region(MemPointerRecord* rec) {
+  assert(rec->is_uncommit_record(), "sanity check");
+  VMMemRegion* cur;
+  cur = (VMMemRegion*)current();
+  assert(cur->is_reserved_region() && cur->contains_region(rec),
+    "Sanity check");
+  // thread's native stack is always marked as "committed", ignore
+  // the "commit" operation for creating stack guard pages
+  if (FLAGS_TO_MEMORY_TYPE(cur->flags()) == mtThreadStack &&
+      FLAGS_TO_MEMORY_TYPE(rec->flags()) != mtThreadStack) {
+    return true;
+  }
+
+  cur = (VMMemRegion*)next();
+  while (cur != NULL && cur->is_committed_region()) {
+    // region already uncommitted, must be due to duplicated record
+    if (cur->addr() >= rec->addr() + rec->size()) {
+      break;
+    } else if (cur->contains_region(rec)) {
+      // uncommit whole region
+      if (cur->is_same_region(rec)) {
+        remove();
+        break;
+      } else if (rec->addr() == cur->addr() ||
+        rec->addr() + rec->size() == cur->addr() + cur->size()) {
+        // uncommitted from either end of current memory region.
+        cur->exclude_region(rec->addr(), rec->size());
+        break;
+      } else { // split the committed region and release the middle
+        address high_addr = cur->addr() + cur->size();
+        size_t sz = high_addr - rec->addr();
+        cur->exclude_region(rec->addr(), sz);
+        sz = high_addr - (rec->addr() + rec->size());
+        if (MemTracker::track_callsite()) {
+          MemPointerRecordEx tmp(rec->addr() + rec->size(), cur->flags(), sz,
+             ((VMMemRegionEx*)cur)->pc());
+          return insert_record_after(&tmp);
+        } else {
+          MemPointerRecord tmp(rec->addr() + rec->size(), cur->flags(), sz);
+          return insert_record_after(&tmp);
+        }
+      }
+    }
+    cur = (VMMemRegion*)next();
+  }
+
+  // we may not find committed record due to duplicated records
+  return true;
+}
+
+bool VMMemPointerIterator::remove_released_region(MemPointerRecord* rec) {
+  assert(rec->is_deallocation_record(), "Sanity check");
+  VMMemRegion* cur = (VMMemRegion*)current();
+  assert(cur->is_reserved_region() && cur->contains_region(rec),
+    "Sanity check");
+  if (rec->is_same_region(cur)) {
+    // release whole reserved region
+#ifdef ASSERT
+    VMMemRegion* next_region = (VMMemRegion*)peek_next();
+    // should not have any committed memory in this reserved region
+    assert(next_region == NULL || !next_region->is_committed_region(), "Sanity check");
+#endif
+    remove();
+  } else if (rec->addr() == cur->addr() ||
+    rec->addr() + rec->size() == cur->addr() + cur->size()) {
+    // released region is at either end of this region
+    cur->exclude_region(rec->addr(), rec->size());
+    assert(check_reserved_region(), "Integrity check");
+  } else { // split the reserved region and release the middle
+    address high_addr = cur->addr() + cur->size();
+    size_t sz = high_addr - rec->addr();
+    cur->exclude_region(rec->addr(), sz);
+    sz = high_addr - rec->addr() - rec->size();
+    if (MemTracker::track_callsite()) {
+      MemPointerRecordEx tmp(rec->addr() + rec->size(), cur->flags(), sz,
+        ((VMMemRegionEx*)cur)->pc());
+      bool ret = insert_reserved_region(&tmp);
+      assert(!ret || check_reserved_region(), "Integrity check");
+      return ret;
+    } else {
+      MemPointerRecord tmp(rec->addr() + rec->size(), cur->flags(), sz);
+      bool ret = insert_reserved_region(&tmp);
+      assert(!ret || check_reserved_region(), "Integrity check");
+      return ret;
+    }
+  }
+  return true;
+}
+
+bool VMMemPointerIterator::insert_reserved_region(MemPointerRecord* rec) {
+  // skip all 'commit' records associated with previous reserved region
+  VMMemRegion* p = (VMMemRegion*)next();
+  while (p != NULL && p->is_committed_region() &&
+         p->base() + p->size() < rec->addr()) {
+    p = (VMMemRegion*)next();
+  }
+  return insert_record(rec);
+}
+
+bool VMMemPointerIterator::split_reserved_region(VMMemRegion* rgn, address new_rgn_addr, size_t new_rgn_size) {
+  assert(rgn->contains_region(new_rgn_addr, new_rgn_size), "Not fully contained");
+  address pc = (MemTracker::track_callsite() ? ((VMMemRegionEx*)rgn)->pc() : NULL);
+  if (rgn->base() == new_rgn_addr) { // new region is at the beginning of the region
+    size_t sz = rgn->size() - new_rgn_size;
+    // the original region becomes 'new' region
+    rgn->exclude_region(new_rgn_addr + new_rgn_size, sz);
+     // remaining becomes next region
+    MemPointerRecordEx next_rgn(new_rgn_addr + new_rgn_size, rgn->flags(), sz, pc);
+    return insert_reserved_region(&next_rgn);
+  } else if (rgn->base() + rgn->size() == new_rgn_addr + new_rgn_size) {
+    rgn->exclude_region(new_rgn_addr, new_rgn_size);
+    MemPointerRecordEx next_rgn(new_rgn_addr, rgn->flags(), new_rgn_size, pc);
+    return insert_reserved_region(&next_rgn);
+  } else {
+    // the orginal region will be split into three
+    address rgn_high_addr = rgn->base() + rgn->size();
+    // first region
+    rgn->exclude_region(new_rgn_addr, (rgn_high_addr - new_rgn_addr));
+    // the second region is the new region
+    MemPointerRecordEx new_rgn(new_rgn_addr, rgn->flags(), new_rgn_size, pc);
+    if (!insert_reserved_region(&new_rgn)) return false;
+    // the remaining region
+    MemPointerRecordEx rem_rgn(new_rgn_addr + new_rgn_size, rgn->flags(),
+      rgn_high_addr - (new_rgn_addr + new_rgn_size), pc);
+    return insert_reserved_region(&rem_rgn);
+  }
+}
+
 static int sort_in_seq_order(const void* p1, const void* p2) {
   assert(p1 != NULL && p2 != NULL, "Sanity check");
   const MemPointerRecord* mp1 = (MemPointerRecord*)p1;
@@ -61,11 +364,11 @@
 }
 
 
-MemPointerArrayIteratorImpl StagingArea::virtual_memory_record_walker() {
+VMRecordIterator StagingArea::virtual_memory_record_walker() {
   MemPointerArray* arr = vm_data();
   // sort into seq number order
   arr->sort((FN_SORT)sort_in_seq_order);
-  return MemPointerArrayIteratorImpl(arr);
+  return VMRecordIterator(arr);
 }
 
 
@@ -81,6 +384,7 @@
   _staging_area.init();
   _lock = new (std::nothrow) Mutex(Monitor::max_nonleaf - 1, "memSnapshotLock");
   NOT_PRODUCT(_untracked_count = 0;)
+  _number_of_classes = 0;
 }
 
 MemSnapshot::~MemSnapshot() {
@@ -104,21 +408,31 @@
   }
 }
 
-void MemSnapshot::copy_pointer(MemPointerRecord* dest, const MemPointerRecord* src) {
+
+void MemSnapshot::copy_seq_pointer(MemPointerRecord* dest, const MemPointerRecord* src) {
   assert(dest != NULL && src != NULL, "Just check");
   assert(dest->addr() == src->addr(), "Just check");
+  assert(dest->seq() > 0 && src->seq() > 0, "not sequenced");
 
-  MEMFLAGS flags = dest->flags();
+  if (MemTracker::track_callsite()) {
+    *(SeqMemPointerRecordEx*)dest = *(SeqMemPointerRecordEx*)src;
+  } else {
+    *(SeqMemPointerRecord*)dest = *(SeqMemPointerRecord*)src;
+  }
+}
+
+void MemSnapshot::assign_pointer(MemPointerRecord*dest, const MemPointerRecord* src) {
+  assert(src != NULL && dest != NULL, "Just check");
+  assert(dest->seq() == 0 && src->seq() >0, "cast away sequence");
 
   if (MemTracker::track_callsite()) {
     *(MemPointerRecordEx*)dest = *(MemPointerRecordEx*)src;
   } else {
-    *dest = *src;
+    *(MemPointerRecord*)dest = *(MemPointerRecord*)src;
   }
 }
 
-
-// merge a per-thread memory recorder to the staging area
+// merge a recorder to the staging area
 bool MemSnapshot::merge(MemRecorder* rec) {
   assert(rec != NULL && !rec->out_of_memory(), "Just check");
 
@@ -126,71 +440,47 @@
 
   MutexLockerEx lock(_lock, true);
   MemPointerIterator malloc_staging_itr(_staging_area.malloc_data());
-  MemPointerRecord *p1, *p2;
-  p1 = (MemPointerRecord*) itr.current();
-  while (p1 != NULL) {
-    if (p1->is_vm_pointer()) {
+  MemPointerRecord* incoming_rec = (MemPointerRecord*) itr.current();
+  MemPointerRecord* matched_rec;
+
+  while (incoming_rec != NULL) {
+    if (incoming_rec->is_vm_pointer()) {
       // we don't do anything with virtual memory records during merge
-      if (!_staging_area.vm_data()->append(p1)) {
+      if (!_staging_area.vm_data()->append(incoming_rec)) {
         return false;
       }
     } else {
-      p2 = (MemPointerRecord*)malloc_staging_itr.locate(p1->addr());
-      // we have not seen this memory block, so just add to staging area
-      if (p2 == NULL) {
-        if (!malloc_staging_itr.insert(p1)) {
+      // locate matched record and/or also position the iterator to proper
+      // location for this incoming record.
+      matched_rec = (MemPointerRecord*)malloc_staging_itr.locate(incoming_rec->addr());
+      // we have not seen this memory block in this generation,
+      // so just add to staging area
+      if (matched_rec == NULL) {
+        if (!malloc_staging_itr.insert(incoming_rec)) {
           return false;
         }
-      } else if (p1->addr() == p2->addr()) {
-        MemPointerRecord* staging_next = (MemPointerRecord*)malloc_staging_itr.peek_next();
-        // a memory block can have many tagging records, find right one to replace or
-        // right position to insert
-        while (staging_next != NULL && staging_next->addr() == p1->addr()) {
-          if ((staging_next->flags() & MemPointerRecord::tag_masks) <=
-            (p1->flags() & MemPointerRecord::tag_masks)) {
-            p2 = (MemPointerRecord*)malloc_staging_itr.next();
-            staging_next = (MemPointerRecord*)malloc_staging_itr.peek_next();
-          } else {
-            break;
-          }
+      } else if (incoming_rec->addr() == matched_rec->addr()) {
+        // whoever has higher sequence number wins
+        if (incoming_rec->seq() > matched_rec->seq()) {
+          copy_seq_pointer(matched_rec, incoming_rec);
         }
-        int df = (p1->flags() & MemPointerRecord::tag_masks) -
-          (p2->flags() & MemPointerRecord::tag_masks);
-        if (df == 0) {
-          assert(p1->seq() > 0, "not sequenced");
-          assert(p2->seq() > 0, "not sequenced");
-          if (p1->seq() > p2->seq()) {
-            copy_pointer(p2, p1);
-          }
-        } else if (df < 0) {
-          if (!malloc_staging_itr.insert(p1)) {
-            return false;
-          }
-        } else {
-          if (!malloc_staging_itr.insert_after(p1)) {
-            return false;
-          }
-        }
-      } else if (p1->addr() < p2->addr()) {
-        if (!malloc_staging_itr.insert(p1)) {
+      } else if (incoming_rec->addr() < matched_rec->addr()) {
+        if (!malloc_staging_itr.insert(incoming_rec)) {
           return false;
         }
       } else {
-        if (!malloc_staging_itr.insert_after(p1)) {
-          return false;
-        }
+        ShouldNotReachHere();
       }
     }
-    p1 = (MemPointerRecord*)itr.next();
+    incoming_rec = (MemPointerRecord*)itr.next();
   }
   NOT_PRODUCT(void check_staging_data();)
   return true;
 }
 
 
-
 // promote data to next generation
-bool MemSnapshot::promote() {
+bool MemSnapshot::promote(int number_of_classes) {
   assert(_alloc_ptrs != NULL && _vm_ptrs != NULL, "Just check");
   assert(_staging_area.malloc_data() != NULL && _staging_area.vm_data() != NULL,
          "Just check");
@@ -199,7 +489,7 @@
   MallocRecordIterator  malloc_itr = _staging_area.malloc_record_walker();
   bool promoted = false;
   if (promote_malloc_records(&malloc_itr)) {
-    MemPointerArrayIteratorImpl vm_itr = _staging_area.virtual_memory_record_walker();
+    VMRecordIterator vm_itr = _staging_area.virtual_memory_record_walker();
     if (promote_virtual_memory_records(&vm_itr)) {
       promoted = true;
     }
@@ -207,6 +497,7 @@
 
   NOT_PRODUCT(check_malloc_pointers();)
   _staging_area.clear();
+  _number_of_classes = number_of_classes;
   return promoted;
 }
 
@@ -218,21 +509,26 @@
     matched_rec = (MemPointerRecord*)malloc_snapshot_itr.locate(new_rec->addr());
     // found matched memory block
     if (matched_rec != NULL && new_rec->addr() == matched_rec->addr()) {
-      // snapshot already contains 'lived' records
-      assert(matched_rec->is_allocation_record() || matched_rec->is_arena_size_record(),
+      // snapshot already contains 'live' records
+      assert(matched_rec->is_allocation_record() || matched_rec->is_arena_memory_record(),
              "Sanity check");
       // update block states
-      if (new_rec->is_allocation_record() || new_rec->is_arena_size_record()) {
-        copy_pointer(matched_rec, new_rec);
+      if (new_rec->is_allocation_record()) {
+        assign_pointer(matched_rec, new_rec);
+      } else if (new_rec->is_arena_memory_record()) {
+        if (new_rec->size() == 0) {
+          // remove size record once size drops to 0
+          malloc_snapshot_itr.remove();
+        } else {
+          assign_pointer(matched_rec, new_rec);
+        }
       } else {
         // a deallocation record
         assert(new_rec->is_deallocation_record(), "Sanity check");
         // an arena record can be followed by a size record, we need to remove both
         if (matched_rec->is_arena_record()) {
           MemPointerRecord* next = (MemPointerRecord*)malloc_snapshot_itr.peek_next();
-          if (next->is_arena_size_record()) {
-            // it has to match the arena record
-            assert(next->is_size_record_of_arena(matched_rec), "Sanity check");
+          if (next->is_arena_memory_record() && next->is_memory_record_of_arena(matched_rec)) {
             malloc_snapshot_itr.remove();
           }
         }
@@ -240,17 +536,13 @@
         malloc_snapshot_itr.remove();
       }
     } else {
-      // it is a new record, insert into snapshot
-      if (new_rec->is_arena_size_record()) {
-        MemPointerRecord* prev = (MemPointerRecord*)malloc_snapshot_itr.peek_prev();
-        if (prev == NULL || !prev->is_arena_record() || !new_rec->is_size_record_of_arena(prev)) {
-          // no matched arena record, ignore the size record
-          new_rec = NULL;
-        }
+      // don't insert size 0 record
+      if (new_rec->is_arena_memory_record() && new_rec->size() == 0) {
+        new_rec = NULL;
       }
-      // only 'live' record can go into snapshot
+
       if (new_rec != NULL) {
-        if  (new_rec->is_allocation_record() || new_rec->is_arena_size_record()) {
+        if  (new_rec->is_allocation_record() || new_rec->is_arena_memory_record()) {
           if (matched_rec != NULL && new_rec->addr() > matched_rec->addr()) {
             if (!malloc_snapshot_itr.insert_after(new_rec)) {
               return false;
@@ -277,87 +569,60 @@
 bool MemSnapshot::promote_virtual_memory_records(MemPointerArrayIterator* itr) {
   VMMemPointerIterator vm_snapshot_itr(_vm_ptrs);
   MemPointerRecord* new_rec = (MemPointerRecord*)itr->current();
-  VMMemRegionEx new_vm_rec;
-  VMMemRegion*  matched_rec;
+  VMMemRegion*  reserved_rec;
   while (new_rec != NULL) {
     assert(new_rec->is_vm_pointer(), "Sanity check");
-    if (MemTracker::track_callsite()) {
-      new_vm_rec.init((MemPointerRecordEx*)new_rec);
-    } else {
-      new_vm_rec.init(new_rec);
-    }
-    matched_rec = (VMMemRegion*)vm_snapshot_itr.locate(new_rec->addr());
-    if (matched_rec != NULL &&
-        (matched_rec->contains(&new_vm_rec) || matched_rec->base() == new_vm_rec.base())) {
+
+    // locate a reserved region that contains the specified address, or
+    // the nearest reserved region has base address just above the specified
+    // address
+    reserved_rec = (VMMemRegion*)vm_snapshot_itr.locate(new_rec->addr());
+    if (reserved_rec != NULL && reserved_rec->contains_region(new_rec)) {
       // snapshot can only have 'live' records
-      assert(matched_rec->is_reserve_record(), "Sanity check");
-      if (new_vm_rec.is_reserve_record() && matched_rec->base() == new_vm_rec.base()) {
-        // resize reserved virtual memory range
-        // resize has to cover committed area
-        assert(new_vm_rec.size() >= matched_rec->committed_size(), "Sanity check");
-        matched_rec->set_reserved_size(new_vm_rec.size());
-      } else if (new_vm_rec.is_commit_record()) {
-        // commit memory inside reserved memory range
-        assert(new_vm_rec.committed_size() <= matched_rec->reserved_size(), "Sanity check");
-        // thread stacks are marked committed, so we ignore 'commit' record for creating
-        // stack guard pages
-        if (FLAGS_TO_MEMORY_TYPE(matched_rec->flags()) != mtThreadStack) {
-          matched_rec->commit(new_vm_rec.committed_size());
-        }
-      } else if (new_vm_rec.is_uncommit_record()) {
-        if (FLAGS_TO_MEMORY_TYPE(matched_rec->flags()) == mtThreadStack) {
-          // ignore 'uncommit' record from removing stack guard pages, uncommit
-          // thread stack as whole
-          if (matched_rec->committed_size() == new_vm_rec.committed_size()) {
-            matched_rec->uncommit(new_vm_rec.committed_size());
-          }
-        } else {
-          // uncommit memory inside reserved memory range
-          assert(new_vm_rec.committed_size() <= matched_rec->committed_size(),
-                "Sanity check");
-          matched_rec->uncommit(new_vm_rec.committed_size());
-        }
-      } else if (new_vm_rec.is_type_tagging_record()) {
-        // tag this virtual memory range to a memory type
-        // can not re-tag a memory range to different type
-        assert(FLAGS_TO_MEMORY_TYPE(matched_rec->flags()) == mtNone ||
-               FLAGS_TO_MEMORY_TYPE(matched_rec->flags()) == FLAGS_TO_MEMORY_TYPE(new_vm_rec.flags()),
-               "Sanity check");
-        matched_rec->tag(new_vm_rec.flags());
-      } else if (new_vm_rec.is_release_record()) {
-        // release part or whole memory range
-        if (new_vm_rec.base() == matched_rec->base() &&
-            new_vm_rec.size() == matched_rec->size()) {
-          // release whole virtual memory range
-          assert(matched_rec->committed_size() == 0, "Sanity check");
-          vm_snapshot_itr.remove();
-        } else {
-          // partial release
-          matched_rec->partial_release(new_vm_rec.base(), new_vm_rec.size());
-        }
-      } else {
-        // multiple reserve/commit on the same virtual memory range
-        assert((new_vm_rec.is_reserve_record() || new_vm_rec.is_commit_record()) &&
-          (new_vm_rec.base() == matched_rec->base() && new_vm_rec.size() == matched_rec->size()),
-          "Sanity check");
-        matched_rec->tag(new_vm_rec.flags());
-      }
-    } else {
-      // no matched record
-      if (new_vm_rec.is_reserve_record()) {
-        if (matched_rec == NULL || matched_rec->base() > new_vm_rec.base()) {
-          if (!vm_snapshot_itr.insert(&new_vm_rec)) {
-            return false;
-          }
-        } else {
-          if (!vm_snapshot_itr.insert_after(&new_vm_rec)) {
+      assert(reserved_rec->is_reserved_region(), "Sanity check");
+      if (new_rec->is_allocation_record()) {
+        if (!reserved_rec->is_same_region(new_rec)) {
+          // only deal with split a bigger reserved region into smaller regions.
+          // So far, CDS is the only use case.
+          if (!vm_snapshot_itr.split_reserved_region(reserved_rec, new_rec->addr(), new_rec->size())) {
             return false;
           }
         }
-      } else {
-        // throw out obsolete records, which are the commit/uncommit/release/tag records
-        // on memory regions that are already released.
-      }
+      } else if (new_rec->is_uncommit_record()) {
+        if (!vm_snapshot_itr.remove_uncommitted_region(new_rec)) {
+          return false;
+        }
+      } else if (new_rec->is_commit_record()) {
+        // insert or expand existing committed region to cover this
+        // newly committed region
+        if (!vm_snapshot_itr.add_committed_region(new_rec)) {
+          return false;
+        }
+      } else if (new_rec->is_deallocation_record()) {
+        // release part or all memory region
+        if (!vm_snapshot_itr.remove_released_region(new_rec)) {
+          return false;
+        }
+      } else if (new_rec->is_type_tagging_record()) {
+        // tag this reserved virtual memory range to a memory type. Can not re-tag a memory range
+        // to different type.
+        assert(FLAGS_TO_MEMORY_TYPE(reserved_rec->flags()) == mtNone ||
+               FLAGS_TO_MEMORY_TYPE(reserved_rec->flags()) == FLAGS_TO_MEMORY_TYPE(new_rec->flags()),
+               "Sanity check");
+        reserved_rec->tag(new_rec->flags());
+    } else {
+        ShouldNotReachHere();
+          }
+        } else {
+      /*
+       * The assertion failure indicates mis-matched virtual memory records. The likely
+       * scenario is, that some virtual memory operations are not going through os::xxxx_memory()
+       * api, which have to be tracked manually. (perfMemory is an example).
+      */
+      assert(new_rec->is_allocation_record(), "Sanity check");
+      if (!vm_snapshot_itr.add_reserved_region(new_rec)) {
+            return false;
+          }
   }
     new_rec = (MemPointerRecord*)itr->next();
   }
@@ -433,5 +698,33 @@
     cur = (MemPointerRecord*)vm_itr.next();
   }
 }
+
+void MemSnapshot::dump_all_vm_pointers() {
+  MemPointerArrayIteratorImpl itr(_vm_ptrs);
+  VMMemRegion* ptr = (VMMemRegion*)itr.current();
+  tty->print_cr("dump virtual memory pointers:");
+  while (ptr != NULL) {
+    if (ptr->is_committed_region()) {
+      tty->print("\t");
+    }
+    tty->print("[" PTR_FORMAT " - " PTR_FORMAT "] [%x]", ptr->addr(),
+      (ptr->addr() + ptr->size()), ptr->flags());
+
+    if (MemTracker::track_callsite()) {
+      VMMemRegionEx* ex = (VMMemRegionEx*)ptr;
+      if (ex->pc() != NULL) {
+        char buf[1024];
+        if (os::dll_address_to_function_name(ex->pc(), buf, sizeof(buf), NULL)) {
+          tty->print_cr("\t%s", buf);
+        } else {
+          tty->print_cr("");
+        }
+      }
+    }
+
+    ptr = (VMMemRegion*)itr.next();
+  }
+  tty->flush();
+}
 #endif // ASSERT
 
--- a/src/share/vm/services/memSnapshot.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/services/memSnapshot.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -31,7 +31,6 @@
 #include "services/memBaseline.hpp"
 #include "services/memPtrArray.hpp"
 
-
 // Snapshot pointer array iterator
 
 // The pointer array contains malloc-ed pointers
@@ -111,34 +110,59 @@
       MemPointerIterator(arr) {
   }
 
-  // locate an existing record that contains specified address, or
-  // the record, where the record with specified address, should
-  // be inserted.
-  // virtual memory record array is sorted in address order, so
-  // binary search is performed
+  // locate an existing reserved memory region that contains specified address,
+  // or the reserved region just above this address, where the incoming
+  // reserved region should be inserted.
   virtual MemPointer* locate(address addr) {
-    int index_low = 0;
-    int index_high = _array->length();
-    int index_mid = (index_high + index_low) / 2;
-    int r = 1;
-    while (index_low < index_high && (r = compare(index_mid, addr)) != 0) {
-      if (r > 0) {
-        index_high = index_mid;
-      } else {
-        index_low = index_mid;
+    reset();
+    VMMemRegion* reg = (VMMemRegion*)current();
+    while (reg != NULL) {
+      if (reg->is_reserved_region()) {
+        if (reg->contains_address(addr) || addr < reg->base()) {
+          return reg;
       }
-      index_mid = (index_high + index_low) / 2;
     }
-    if (r == 0) {
-      // update current location
-      _pos = index_mid;
-      return _array->at(index_mid);
-    } else {
+      reg = (VMMemRegion*)next();
+    }
       return NULL;
     }
+
+  // following methods update virtual memory in the context
+  // of 'current' position, which is properly positioned by
+  // callers via locate method.
+  bool add_reserved_region(MemPointerRecord* rec);
+  bool add_committed_region(MemPointerRecord* rec);
+  bool remove_uncommitted_region(MemPointerRecord* rec);
+  bool remove_released_region(MemPointerRecord* rec);
+
+  // split a reserved region to create a new memory region with specified base and size
+  bool split_reserved_region(VMMemRegion* rgn, address new_rgn_addr, size_t new_rgn_size);
+ private:
+  bool insert_record(MemPointerRecord* rec);
+  bool insert_record_after(MemPointerRecord* rec);
+
+  bool insert_reserved_region(MemPointerRecord* rec);
+
+  // reset current position
+  inline void reset() { _pos = 0; }
+#ifdef ASSERT
+  // check integrity of records on current reserved memory region.
+  bool check_reserved_region() {
+    VMMemRegion* reserved_region = (VMMemRegion*)current();
+    assert(reserved_region != NULL && reserved_region->is_reserved_region(),
+          "Sanity check");
+    // all committed regions that follow current reserved region, should all
+    // belong to the reserved region.
+    VMMemRegion* next_region = (VMMemRegion*)next();
+    for (; next_region != NULL && next_region->is_committed_region();
+         next_region = (VMMemRegion*)next() ) {
+      if(!reserved_region->contains_region(next_region)) {
+        return false;
+      }
+    }
+    return true;
   }
 
-#ifdef ASSERT
   virtual bool is_dup_pointer(const MemPointer* ptr1,
     const MemPointer* ptr2) const {
     VMMemRegion* p1 = (VMMemRegion*)ptr1;
@@ -154,57 +178,61 @@
            (p1->flags() & MemPointerRecord::tag_masks) == MemPointerRecord::tag_release;
   }
 #endif
-  // compare if an address falls into a memory region,
-  // return 0, if the address falls into a memory region at specified index
-  // return 1, if memory region pointed by specified index is higher than the address
-  // return -1, if memory region pointed by specified index is lower than the address
-  int compare(int index, address addr) const {
-    VMMemRegion* r = (VMMemRegion*)_array->at(index);
-    assert(r->is_reserve_record(), "Sanity check");
-    if (r->addr() > addr) {
-      return 1;
-    } else if (r->addr() + r->reserved_size() <= addr) {
-      return -1;
-    } else {
-      return 0;
-    }
-  }
 };
 
 class MallocRecordIterator : public MemPointerArrayIterator {
  private:
   MemPointerArrayIteratorImpl  _itr;
 
+
+
  public:
   MallocRecordIterator(MemPointerArray* arr) : _itr(arr) {
   }
 
-  MemPointer* current() const {
-    MemPointerRecord* cur = (MemPointerRecord*)_itr.current();
-    assert(cur == NULL || !cur->is_vm_pointer(), "seek error");
-    MemPointerRecord* next = (MemPointerRecord*)_itr.peek_next();
-    if (next == NULL || next->addr() != cur->addr()) {
-      return cur;
-    } else {
-      assert(!cur->is_vm_pointer(), "Sanity check");
-      assert(cur->is_allocation_record() && next->is_deallocation_record(),
-             "sorting order");
-      assert(cur->seq() != next->seq(), "Sanity check");
-      return cur->seq() >  next->seq() ? cur : next;
+  virtual MemPointer* current() const {
+#ifdef ASSERT
+    MemPointer* cur_rec = _itr.current();
+    if (cur_rec != NULL) {
+      MemPointer* prev_rec = _itr.peek_prev();
+      MemPointer* next_rec = _itr.peek_next();
+      assert(prev_rec == NULL || prev_rec->addr() < cur_rec->addr(), "Sorting order");
+      assert(next_rec == NULL || next_rec->addr() > cur_rec->addr(), "Sorting order");
     }
+#endif
+    return _itr.current();
   }
-
-  MemPointer* next() {
-    MemPointerRecord* cur = (MemPointerRecord*)_itr.current();
-    assert(cur == NULL || !cur->is_vm_pointer(), "Sanity check");
-    MemPointerRecord* next = (MemPointerRecord*)_itr.next();
-    if (next == NULL) {
-      return NULL;
+  virtual MemPointer* next() {
+    MemPointerRecord* next_rec = (MemPointerRecord*)_itr.next();
+    // arena memory record is a special case, which we have to compare
+    // sequence number against its associated arena record.
+    if (next_rec != NULL && next_rec->is_arena_memory_record()) {
+      MemPointerRecord* prev_rec = (MemPointerRecord*)_itr.peek_prev();
+      // if there is an associated arena record, it has to be previous
+      // record because of sorting order (by address) - NMT generates a pseudo address
+      // for arena's size record by offsetting arena's address, that guarantees
+      // the order of arena record and it's size record.
+      if (prev_rec != NULL && prev_rec->is_arena_record() &&
+        next_rec->is_memory_record_of_arena(prev_rec)) {
+        if (prev_rec->seq() > next_rec->seq()) {
+          // Skip this arena memory record
+          // Two scenarios:
+          //   - if the arena record is an allocation record, this early
+          //     size record must be leftover by previous arena,
+          //     and the last size record should have size = 0.
+          //   - if the arena record is a deallocation record, this
+          //     size record should be its cleanup record, which should
+          //     also have size = 0. In other world, arena alway reset
+          //     its size before gone (see Arena's destructor)
+          assert(next_rec->size() == 0, "size not reset");
+          return _itr.next();
+        } else {
+          assert(prev_rec->is_allocation_record(),
+            "Arena size record ahead of allocation record");
+        }
+      }
     }
-    if (cur->addr() == next->addr()) {
-      next = (MemPointerRecord*)_itr.next();
-    }
-    return current();
+    return next_rec;
   }
 
   MemPointer* peek_next() const      { ShouldNotReachHere(); return NULL; }
@@ -214,6 +242,72 @@
   bool insert_after(MemPointer* ptr) { ShouldNotReachHere(); return false; }
 };
 
+// collapse duplicated records. Eliminating duplicated records here, is much
+// cheaper than during promotion phase. However, it does have limitation - it
+// can only eliminate duplicated records within the generation, there are
+// still chances seeing duplicated records during promotion.
+// We want to use the record with higher sequence number, because it has
+// more accurate callsite pc.
+class VMRecordIterator : public MemPointerArrayIterator {
+ private:
+  MemPointerArrayIteratorImpl  _itr;
+
+ public:
+  VMRecordIterator(MemPointerArray* arr) : _itr(arr) {
+    MemPointerRecord* cur = (MemPointerRecord*)_itr.current();
+    MemPointerRecord* next = (MemPointerRecord*)_itr.peek_next();
+    while (next != NULL) {
+      assert(cur != NULL, "Sanity check");
+      assert(((SeqMemPointerRecord*)next)->seq() > ((SeqMemPointerRecord*)cur)->seq(),
+        "pre-sort order");
+
+      if (is_duplicated_record(cur, next)) {
+        _itr.next();
+        next = (MemPointerRecord*)_itr.peek_next();
+      } else {
+        break;
+      }
+    }
+  }
+
+  virtual MemPointer* current() const {
+    return _itr.current();
+  }
+
+  // get next record, but skip the duplicated records
+  virtual MemPointer* next() {
+    MemPointerRecord* cur = (MemPointerRecord*)_itr.next();
+    MemPointerRecord* next = (MemPointerRecord*)_itr.peek_next();
+    while (next != NULL) {
+      assert(cur != NULL, "Sanity check");
+      assert(((SeqMemPointerRecord*)next)->seq() > ((SeqMemPointerRecord*)cur)->seq(),
+        "pre-sort order");
+
+      if (is_duplicated_record(cur, next)) {
+        _itr.next();
+        cur = next;
+        next = (MemPointerRecord*)_itr.peek_next();
+      } else {
+        break;
+      }
+    }
+    return cur;
+  }
+
+  MemPointer* peek_next() const      { ShouldNotReachHere(); return NULL; }
+  MemPointer* peek_prev() const      { ShouldNotReachHere(); return NULL; }
+  void remove()                      { ShouldNotReachHere(); }
+  bool insert(MemPointer* ptr)       { ShouldNotReachHere(); return false; }
+  bool insert_after(MemPointer* ptr) { ShouldNotReachHere(); return false; }
+
+ private:
+  bool is_duplicated_record(MemPointerRecord* p1, MemPointerRecord* p2) const {
+    bool ret = (p1->addr() == p2->addr() && p1->size() == p2->size() && p1->flags() == p2->flags());
+    assert(!(ret && FLAGS_TO_MEMORY_TYPE(p1->flags()) == mtThreadStack), "dup on stack record");
+    return ret;
+  }
+};
+
 class StagingArea : public _ValueObj {
  private:
   MemPointerArray*   _malloc_data;
@@ -233,7 +327,8 @@
     return MallocRecordIterator(malloc_data());
   }
 
-  MemPointerArrayIteratorImpl virtual_memory_record_walker();
+  VMRecordIterator virtual_memory_record_walker();
+
   bool init();
   void clear() {
     assert(_malloc_data != NULL && _vm_data != NULL, "Just check");
@@ -260,6 +355,9 @@
   // the lock to protect this snapshot
   Monitor*              _lock;
 
+  // the number of instance classes
+  int                   _number_of_classes;
+
   NOT_PRODUCT(size_t    _untracked_count;)
   friend class MemBaseline;
 
@@ -280,8 +378,9 @@
   // merge a per-thread memory recorder into staging area
   bool merge(MemRecorder* rec);
   // promote staged data to snapshot
-  bool promote();
+  bool promote(int number_of_classes);
 
+  int  number_of_classes() const { return _number_of_classes; }
 
   void wait(long timeout) {
     assert(_lock != NULL, "Just check");
@@ -293,14 +392,17 @@
   NOT_PRODUCT(void check_staging_data();)
   NOT_PRODUCT(void check_malloc_pointers();)
   NOT_PRODUCT(bool has_allocation_record(address addr);)
+  // dump all virtual memory pointers in snapshot
+  DEBUG_ONLY( void dump_all_vm_pointers();)
 
  private:
-   // copy pointer data from src to dest
-   void copy_pointer(MemPointerRecord* dest, const MemPointerRecord* src);
+   // copy sequenced pointer from src to dest
+   void copy_seq_pointer(MemPointerRecord* dest, const MemPointerRecord* src);
+   // assign a sequenced pointer to non-sequenced pointer
+   void assign_pointer(MemPointerRecord*dest, const MemPointerRecord* src);
 
    bool promote_malloc_records(MemPointerArrayIterator* itr);
    bool promote_virtual_memory_records(MemPointerArrayIterator* itr);
 };
 
-
 #endif // SHARE_VM_SERVICES_MEM_SNAPSHOT_HPP
--- a/src/share/vm/services/memTrackWorker.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/services/memTrackWorker.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -29,6 +29,16 @@
 #include "utilities/decoder.hpp"
 #include "utilities/vmError.hpp"
 
+
+void GenerationData::reset() {
+  _number_of_classes = 0;
+  while (_recorder_list != NULL) {
+    MemRecorder* tmp = _recorder_list;
+    _recorder_list = _recorder_list->next();
+    MemTracker::release_thread_recorder(tmp);
+  }
+}
+
 MemTrackWorker::MemTrackWorker() {
   // create thread uses cgc thread type for now. We should revisit
   // the option, or create new thread type.
@@ -39,7 +49,7 @@
   if (!has_error()) {
     _head = _tail = 0;
     for(int index = 0; index < MAX_GENERATIONS; index ++) {
-      _gen[index] = NULL;
+      ::new ((void*)&_gen[index]) GenerationData();
     }
   }
   NOT_PRODUCT(_sync_point_count = 0;)
@@ -49,10 +59,7 @@
 
 MemTrackWorker::~MemTrackWorker() {
   for (int index = 0; index < MAX_GENERATIONS; index ++) {
-    MemRecorder* rc = _gen[index];
-    if (rc != NULL) {
-      delete rc;
-    }
+    _gen[index].reset();
   }
 }
 
@@ -90,12 +97,7 @@
     {
       // take a recorder from earliest generation in buffer
       ThreadCritical tc;
-      rec = _gen[_head];
-      if (rec != NULL) {
-        _gen[_head] = rec->next();
-      }
-      assert(count_recorder(_gen[_head]) <= MemRecorder::_instance_count,
-        "infinite loop after dequeue");
+      rec = _gen[_head].next_recorder();
     }
     if (rec != NULL) {
       // merge the recorder into staging area
@@ -109,16 +111,20 @@
       // no more recorder to merge, promote staging area
       // to snapshot
       if (_head != _tail) {
+        long number_of_classes;
         {
           ThreadCritical tc;
-          if (_gen[_head] != NULL || _head == _tail) {
+          if (_gen[_head].has_more_recorder() || _head == _tail) {
             continue;
           }
+          number_of_classes = _gen[_head].number_of_classes();
+          _gen[_head].reset();
+
           // done with this generation, increment _head pointer
           _head = (_head + 1) % MAX_GENERATIONS;
         }
         // promote this generation data to snapshot
-        if (!snapshot->promote()) {
+        if (!snapshot->promote(number_of_classes)) {
           // failed to promote, means out of memory
           MemTracker::shutdown(MemTracker::NMT_out_of_memory);
         }
@@ -126,8 +132,8 @@
         snapshot->wait(1000);
         ThreadCritical tc;
         // check if more data arrived
-        if (_gen[_head] == NULL) {
-          _gen[_head] = MemTracker::get_pending_recorders();
+        if (!_gen[_head].has_more_recorder()) {
+          _gen[_head].add_recorders(MemTracker::get_pending_recorders());
         }
       }
     }
@@ -147,7 +153,7 @@
 //   1. add all recorders in pending queue to current generation
 //   2. increase generation
 
-void MemTrackWorker::at_sync_point(MemRecorder* rec) {
+void MemTrackWorker::at_sync_point(MemRecorder* rec, int number_of_classes) {
   NOT_PRODUCT(_sync_point_count ++;)
   assert(count_recorder(rec) <= MemRecorder::_instance_count,
     "pending queue has infinite loop");
@@ -155,23 +161,15 @@
   bool out_of_generation_buffer = false;
   // check shutdown state inside ThreadCritical
   if (MemTracker::shutdown_in_progress()) return;
+
+  _gen[_tail].set_number_of_classes(number_of_classes);
   // append the recorders to the end of the generation
-  if( rec != NULL) {
-    MemRecorder* cur_head = _gen[_tail];
-    if (cur_head == NULL) {
-      _gen[_tail] = rec;
-    } else {
-      while (cur_head->next() != NULL) {
-        cur_head = cur_head->next();
-      }
-      cur_head->set_next(rec);
-    }
-  }
-  assert(count_recorder(rec) <= MemRecorder::_instance_count,
+  _gen[_tail].add_recorders(rec);
+  assert(count_recorder(_gen[_tail].peek()) <= MemRecorder::_instance_count,
     "after add to current generation has infinite loop");
   // we have collected all recorders for this generation. If there is data,
   // we need to increment _tail to start a new generation.
-  if (_gen[_tail] != NULL || _head == _tail) {
+  if (_gen[_tail].has_more_recorder()  || _head == _tail) {
     _tail = (_tail + 1) % MAX_GENERATIONS;
     out_of_generation_buffer = (_tail == _head);
   }
@@ -194,7 +192,7 @@
 int MemTrackWorker::count_pending_recorders() const {
   int count = 0;
   for (int index = 0; index < MAX_GENERATIONS; index ++) {
-    MemRecorder* head = _gen[index];
+    MemRecorder* head = _gen[index].peek();
     if (head != NULL) {
       count += count_recorder(head);
     }
--- a/src/share/vm/services/memTrackWorker.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/services/memTrackWorker.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -32,17 +32,58 @@
 // Maximum MAX_GENERATIONS generation data can be tracked.
 #define MAX_GENERATIONS  512
 
+class GenerationData : public _ValueObj {
+ private:
+  int           _number_of_classes;
+  MemRecorder*  _recorder_list;
+
+ public:
+  GenerationData(): _number_of_classes(0), _recorder_list(NULL) { }
+
+  inline int  number_of_classes() const { return _number_of_classes; }
+  inline void set_number_of_classes(long num) { _number_of_classes = num; }
+
+  inline MemRecorder* next_recorder() {
+    if (_recorder_list == NULL) {
+      return NULL;
+    } else {
+      MemRecorder* tmp = _recorder_list;
+      _recorder_list = _recorder_list->next();
+      return tmp;
+    }
+  }
+
+  inline bool has_more_recorder() const {
+    return (_recorder_list != NULL);
+  }
+
+  // add recorders to this generation
+  void add_recorders(MemRecorder* head) {
+    if (head != NULL) {
+      if (_recorder_list == NULL) {
+        _recorder_list = head;
+      } else {
+        MemRecorder* tmp = _recorder_list;
+        for (; tmp->next() != NULL; tmp = tmp->next());
+        tmp->set_next(head);
+      }
+    }
+  }
+
+  void reset();
+
+  NOT_PRODUCT(MemRecorder* peek() const { return _recorder_list; })
+};
 
 class MemTrackWorker : public NamedThread {
  private:
-  // circular buffer. This buffer contains recorders to be merged into global
+  // circular buffer. This buffer contains generation data to be merged into global
   // snaphsot.
-  // Each slot holds a linked list of memory recorders, that contains one
-  // generation of memory data.
-  MemRecorder*  _gen[MAX_GENERATIONS];
-  int           _head, _tail; // head and tail pointers to above circular buffer
+  // Each slot holds a generation
+  GenerationData  _gen[MAX_GENERATIONS];
+  int             _head, _tail; // head and tail pointers to above circular buffer
 
-  bool          _has_error;
+  bool            _has_error;
 
  public:
   MemTrackWorker();
@@ -56,7 +97,7 @@
   inline bool has_error() const { return _has_error; }
 
   // task at synchronization point
-  void at_sync_point(MemRecorder* pending_recorders);
+  void at_sync_point(MemRecorder* pending_recorders, int number_of_classes);
 
   // for debugging purpose, they are not thread safe.
   NOT_PRODUCT(static int count_recorder(const MemRecorder* head);)
--- a/src/share/vm/services/memTracker.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/services/memTracker.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -23,6 +23,7 @@
  */
 #include "precompiled.hpp"
 
+#include "oops/instanceKlass.hpp"
 #include "runtime/atomic.hpp"
 #include "runtime/interfaceSupport.hpp"
 #include "runtime/mutexLocker.hpp"
@@ -69,15 +70,12 @@
 
 void MemTracker::init_tracking_options(const char* option_line) {
   _tracking_level = NMT_off;
-  if (strncmp(option_line, "=summary", 8) == 0) {
+  if (strcmp(option_line, "=summary") == 0) {
     _tracking_level = NMT_summary;
-  } else if (strncmp(option_line, "=detail", 7) == 0) {
+  } else if (strcmp(option_line, "=detail") == 0) {
     _tracking_level = NMT_detail;
-  } else {
-    char msg[255];
-    //+1 to remove the '=' character
-    jio_snprintf(msg, 255, "Unknown option given to XX:NativeMemoryTracking: %s", option_line+1);
-    vm_exit_during_initialization(msg, NULL);
+  } else if (strcmp(option_line, "=off") != 0) {
+    vm_exit_during_initialization("Syntax error, expecting -XX:NativeMemoryTracking=[off|summary|detail]", NULL);
   }
 }
 
@@ -364,7 +362,7 @@
 
     if (thread != NULL) {
       if (thread->is_Java_thread() && ((JavaThread*)thread)->is_safepoint_visible()) {
-        JavaThread*      java_thread = static_cast<JavaThread*>(thread);
+        JavaThread*      java_thread = (JavaThread*)thread;
         JavaThreadState  state = java_thread->thread_state();
         if (SafepointSynchronize::safepoint_safe(java_thread, state)) {
           // JavaThreads that are safepoint safe, can run through safepoint,
@@ -472,6 +470,8 @@
       // it should guarantee that NMT is fully sync-ed.
       ThreadCritical tc;
 
+      SequenceGenerator::reset();
+
       // walk all JavaThreads to collect recorders
       SyncThreadRecorderClosure stc;
       Threads::threads_do(&stc);
@@ -484,11 +484,12 @@
         pending_recorders = _global_recorder;
         _global_recorder = NULL;
       }
-      SequenceGenerator::reset();
       // check _worker_thread with lock to avoid racing condition
       if (_worker_thread != NULL) {
-        _worker_thread->at_sync_point(pending_recorders);
+        _worker_thread->at_sync_point(pending_recorders, InstanceKlass::number_of_instance_classes());
       }
+
+      assert(SequenceGenerator::peek() == 1, "Should not have memory activities during sync-point");
     }
   }
 
--- a/src/share/vm/services/memTracker.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/services/memTracker.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -109,12 +109,10 @@
 #include "services/memSnapshot.hpp"
 #include "services/memTrackWorker.hpp"
 
-#ifdef SOLARIS
-#include "thread_solaris.inline.hpp"
-#endif
+extern bool NMT_track_callsite;
 
-#ifdef _DEBUG
-  #define DEBUG_CALLER_PC  os::get_caller_pc(3)
+#ifdef ASSERT
+  #define DEBUG_CALLER_PC  (NMT_track_callsite ? os::get_caller_pc(2) : 0)
 #else
   #define DEBUG_CALLER_PC  0
 #endif
@@ -144,6 +142,7 @@
  * MemTracker is the 'gate' class to native memory tracking runtime.
  */
 class MemTracker : AllStatic {
+  friend class GenerationData;
   friend class MemTrackWorker;
   friend class MemSnapshot;
   friend class SyncThreadRecorderClosure;
@@ -261,7 +260,7 @@
   // record a 'malloc' call
   static inline void record_malloc(address addr, size_t size, MEMFLAGS flags,
                             address pc = 0, Thread* thread = NULL) {
-    if (NMT_CAN_TRACK(flags)) {
+    if (is_on() && NMT_CAN_TRACK(flags)) {
       assert(size > 0, "Sanity check");
       create_memory_record(addr, (flags|MemPointerRecord::malloc_tag()), size, pc, thread);
     }
@@ -275,21 +274,21 @@
   // record a 'realloc' call
   static inline void record_realloc(address old_addr, address new_addr, size_t size,
        MEMFLAGS flags, address pc = 0, Thread* thread = NULL) {
-    if (is_on()) {
+    if (is_on() && NMT_CAN_TRACK(flags)) {
       assert(size > 0, "Sanity check");
       record_free(old_addr, flags, thread);
       record_malloc(new_addr, size, flags, pc, thread);
     }
   }
 
-  // record arena size
+  // record arena memory size
   static inline void record_arena_size(address addr, size_t size) {
-    // we add a positive offset to arena address, so we can have arena size record
+    // we add a positive offset to arena address, so we can have arena memory record
     // sorted after arena record
     if (is_on() && !UseMallocOnly) {
       assert(addr != NULL, "Sanity check");
       create_memory_record((addr + sizeof(void*)), MemPointerRecord::arena_size_tag(), size,
-        0, NULL);
+        DEBUG_CALLER_PC, NULL);
     }
   }
 
@@ -317,6 +316,7 @@
   static inline void release_thread_stack(address addr, size_t size, Thread* thr) {
     if (is_on()) {
       assert(size > 0 && thr != NULL, "Sanity check");
+      assert(!thr->is_Java_thread(), "too early");
       create_memory_record(addr, MemPointerRecord::virtual_memory_uncommit_tag() | mtThreadStack,
                           size, DEBUG_CALLER_PC, thr);
       create_memory_record(addr, MemPointerRecord::virtual_memory_release_tag() | mtThreadStack,
@@ -326,11 +326,11 @@
 
   // record a virtual memory 'commit' call
   static inline void record_virtual_memory_commit(address addr, size_t size,
-                            address pc = 0, Thread* thread = NULL) {
+                            address pc, Thread* thread = NULL) {
     if (is_on()) {
       assert(size > 0, "Sanity check");
       create_memory_record(addr, MemPointerRecord::virtual_memory_commit_tag(),
-                           size, DEBUG_CALLER_PC, thread);
+                           size, pc, thread);
     }
   }
 
--- a/src/share/vm/services/nmtDCmd.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/services/nmtDCmd.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -84,28 +84,31 @@
   }
 
   int nopt = 0;
-  if(_summary.is_set()) { ++nopt; }
-  if(_detail.is_set()) { ++nopt; }
-  if(_baseline.is_set()) { ++nopt; }
-  if(_summary_diff.is_set()) { ++nopt; }
-  if(_detail_diff.is_set()) { ++nopt; }
-  if(_shutdown.is_set()) { ++nopt; }
+  if(_summary.is_set() && _summary.value()) { ++nopt; }
+  if(_detail.is_set() && _detail.value()) { ++nopt; }
+  if(_baseline.is_set() && _baseline.value()) { ++nopt; }
+  if(_summary_diff.is_set() && _summary_diff.value()) { ++nopt; }
+  if(_detail_diff.is_set() && _detail_diff.value()) { ++nopt; }
+  if(_shutdown.is_set() && _shutdown.value()) { ++nopt; }
 #ifndef PRODUCT
-  if(_debug.is_set()) { ++nopt; }
+  if(_debug.is_set() && _debug.value()) { ++nopt; }
 #endif
 
   if(nopt > 1) {
       output()->print_cr("At most one of the following option can be specified: " \
         "summary, detail, baseline, summary.diff, detail.diff, shutdown"
 #ifndef PRODUCT
-        " ,debug"
+        ", debug"
 #endif
       );
       return;
-  }
-
-  if(nopt == 0) {
+  } else if (nopt == 0) {
+    if (_summary.is_set()) {
+      output()->print_cr("No command to execute");
+      return;
+    } else {
       _summary.set_value(true);
+    }
   }
 
 #ifndef PRODUCT
--- a/src/share/vm/services/serviceUtil.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/services/serviceUtil.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -57,14 +57,13 @@
       if (k->is_klass()) {
         // if it's a class for an object, an object array, or
         // primitive (type) array then it's visible.
-        Klass* klass = k;
-        if (Klass::cast(klass)->oop_is_instance()) {
+        if (k->oop_is_instance()) {
           return true;
         }
-        if (Klass::cast(klass)->oop_is_objArray()) {
+        if (k->oop_is_objArray()) {
           return true;
         }
-        if (Klass::cast(klass)->oop_is_typeArray()) {
+        if (k->oop_is_typeArray()) {
           return true;
         }
       }
--- a/src/share/vm/shark/llvmHeaders.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/shark/llvmHeaders.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -35,6 +35,7 @@
   #undef DEBUG
 #endif
 
+#include <llvm/Analysis/Verifier.h>
 #include <llvm/Argument.h>
 #include <llvm/Constants.h>
 #include <llvm/DerivedTypes.h>
@@ -42,29 +43,21 @@
 #include <llvm/Instructions.h>
 #include <llvm/LLVMContext.h>
 #include <llvm/Module.h>
-#if SHARK_LLVM_VERSION < 27
-#include <llvm/ModuleProvider.h>
-#endif
+#if SHARK_LLVM_VERSION <= 31
 #include <llvm/Support/IRBuilder.h>
-#if SHARK_LLVM_VERSION >= 29
+#else
+#include <llvm/IRBuilder.h>
+#endif
 #include <llvm/Support/Threading.h>
-#else
-#include <llvm/System/Threading.h>
-#endif
-#include <llvm/Target/TargetSelect.h>
+#include <llvm/Support/TargetSelect.h>
 #include <llvm/Type.h>
 #include <llvm/ExecutionEngine/JITMemoryManager.h>
 #include <llvm/Support/CommandLine.h>
-#if SHARK_LLVM_VERSION >= 27
+#include <llvm/ExecutionEngine/MCJIT.h>
 #include <llvm/ExecutionEngine/JIT.h>
 #include <llvm/ADT/StringMap.h>
 #include <llvm/Support/Debug.h>
-#if SHARK_LLVM_VERSION >= 29
 #include <llvm/Support/Host.h>
-#else
-#include <llvm/System/Host.h>
-#endif
-#endif
 
 #include <map>
 
--- a/src/share/vm/shark/llvmValue.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/shark/llvmValue.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -56,6 +56,10 @@
   {
     return llvm::ConstantPointerNull::get(SharkType::oop_type());
   }
+  static llvm::ConstantPointerNull* nullKlass()
+  {
+    return llvm::ConstantPointerNull::get(SharkType::klass_type());
+  }
 
  public:
   static llvm::ConstantInt* bit_constant(int value)
--- a/src/share/vm/shark/sharkBlock.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/shark/sharkBlock.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -170,10 +170,12 @@
 
     case Bytecodes::_ldc:
     case Bytecodes::_ldc_w:
-    case Bytecodes::_ldc2_w:
-      push(SharkConstant::for_ldc(iter())->value(builder()));
+    case Bytecodes::_ldc2_w: {
+      SharkConstant* constant = SharkConstant::for_ldc(iter());
+      assert(constant->is_loaded(), "trap should handle unloaded classes");
+      push(constant->value(builder()));
       break;
-
+    }
     case Bytecodes::_iload_0:
     case Bytecodes::_lload_0:
     case Bytecodes::_fload_0:
@@ -1000,9 +1002,9 @@
   builder()->SetInsertPoint(done);
   PHINode *result;
   if (is_long)
-    result = builder()->CreatePHI(SharkType::jlong_type(), "result");
+    result = builder()->CreatePHI(SharkType::jlong_type(), 0, "result");
   else
-    result = builder()->CreatePHI(SharkType::jint_type(), "result");
+    result = builder()->CreatePHI(SharkType::jint_type(), 0, "result");
   result->addIncoming(special_result, special_case);
   result->addIncoming(general_result, general_case);
 
@@ -1030,28 +1032,40 @@
     check_null(value);
     object = value->generic_value();
   }
-  if (is_get && field->is_constant()) {
+  if (is_get && field->is_constant() && field->is_static()) {
     SharkConstant *constant = SharkConstant::for_field(iter());
     if (constant->is_loaded())
       value = constant->value(builder());
   }
   if (!is_get || value == NULL) {
-    if (!is_field)
-      object = builder()->CreateInlineOop(field->holder());
-
+    if (!is_field) {
+      object = builder()->CreateInlineOop(field->holder()->java_mirror());
+    }
     BasicType   basic_type = field->type()->basic_type();
-    const Type *stack_type = SharkType::to_stackType(basic_type);
-    const Type *field_type = SharkType::to_arrayType(basic_type);
-
+    Type *stack_type = SharkType::to_stackType(basic_type);
+    Type *field_type = SharkType::to_arrayType(basic_type);
+    Type *type = field_type;
+    if (field->is_volatile()) {
+      if (field_type == SharkType::jfloat_type()) {
+        type = SharkType::jint_type();
+      } else if (field_type == SharkType::jdouble_type()) {
+        type = SharkType::jlong_type();
+      }
+    }
     Value *addr = builder()->CreateAddressOfStructEntry(
       object, in_ByteSize(field->offset_in_bytes()),
-      PointerType::getUnqual(field_type),
+      PointerType::getUnqual(type),
       "addr");
 
     // Do the access
     if (is_get) {
-      Value *field_value = builder()->CreateLoad(addr);
-
+      Value* field_value;
+      if (field->is_volatile()) {
+        field_value = builder()->CreateAtomicLoad(addr);
+        field_value = builder()->CreateBitCast(field_value, field_type);
+      } else {
+        field_value = builder()->CreateLoad(addr);
+      }
       if (field_type != stack_type) {
         field_value = builder()->CreateIntCast(
           field_value, stack_type, basic_type != T_CHAR);
@@ -1067,13 +1081,16 @@
           field_value, field_type, basic_type != T_CHAR);
       }
 
-      builder()->CreateStore(field_value, addr);
+      if (field->is_volatile()) {
+        field_value = builder()->CreateBitCast(field_value, type);
+        builder()->CreateAtomicStore(field_value, addr);
+      } else {
+        builder()->CreateStore(field_value, addr);
+      }
 
-      if (!field->type()->is_primitive_type())
+      if (!field->type()->is_primitive_type()) {
         builder()->CreateUpdateBarrierSet(oopDesc::bs(), addr);
-
-      if (field->is_volatile())
-        builder()->CreateMemoryBarrier(SharkBuilder::BARRIER_STORELOAD);
+      }
     }
   }
 
@@ -1105,7 +1122,7 @@
   builder()->CreateBr(done);
 
   builder()->SetInsertPoint(done);
-  PHINode *result = builder()->CreatePHI(SharkType::jint_type(), "result");
+  PHINode *result = builder()->CreatePHI(SharkType::jint_type(), 0, "result");
   result->addIncoming(LLVMValue::jint_constant(-1), lt);
   result->addIncoming(LLVMValue::jint_constant(0),  eq);
   result->addIncoming(LLVMValue::jint_constant(1),  gt);
@@ -1152,7 +1169,7 @@
   builder()->CreateBr(done);
 
   builder()->SetInsertPoint(done);
-  PHINode *result = builder()->CreatePHI(SharkType::jint_type(), "result");
+  PHINode *result = builder()->CreatePHI(SharkType::jint_type(), 0, "result");
   result->addIncoming(LLVMValue::jint_constant(-1), lt);
   result->addIncoming(LLVMValue::jint_constant(0),  eq);
   result->addIncoming(LLVMValue::jint_constant(1),  gt);
--- a/src/share/vm/shark/sharkBuilder.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/shark/sharkBuilder.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -47,14 +47,14 @@
 // Helpers for accessing structures
 Value* SharkBuilder::CreateAddressOfStructEntry(Value*      base,
                                                 ByteSize    offset,
-                                                const Type* type,
+                                                Type* type,
                                                 const char* name) {
   return CreateBitCast(CreateStructGEP(base, in_bytes(offset)), type, name);
 }
 
 LoadInst* SharkBuilder::CreateValueOfStructEntry(Value*      base,
                                                  ByteSize    offset,
-                                                 const Type* type,
+                                                 Type* type,
                                                  const char* name) {
   return CreateLoad(
     CreateAddressOfStructEntry(
@@ -71,7 +71,7 @@
 }
 
 Value* SharkBuilder::CreateArrayAddress(Value*      arrayoop,
-                                        const Type* element_type,
+                                        Type* element_type,
                                         int         element_bytes,
                                         ByteSize    base_offset,
                                         Value*      index,
@@ -114,7 +114,7 @@
 
 // Helpers for creating intrinsics and external functions.
 
-const Type* SharkBuilder::make_type(char type, bool void_ok) {
+Type* SharkBuilder::make_type(char type, bool void_ok) {
   switch (type) {
     // Primitive types
   case 'c':
@@ -146,6 +146,8 @@
     return PointerType::getUnqual(SharkType::monitor_type());
   case 'O':
     return SharkType::oop_type();
+  case 'K':
+    return SharkType::klass_type();
 
     // Miscellaneous
   case 'v':
@@ -159,14 +161,14 @@
   }
 }
 
-const FunctionType* SharkBuilder::make_ftype(const char* params,
+FunctionType* SharkBuilder::make_ftype(const char* params,
                                              const char* ret) {
-  std::vector<const Type*> param_types;
+  std::vector<Type*> param_types;
   for (const char* c = params; *c; c++)
     param_types.push_back(make_type(*c, false));
 
   assert(strlen(ret) == 1, "should be");
-  const Type *return_type = make_type(*ret, true);
+  Type *return_type = make_type(*ret, true);
 
   return FunctionType::get(return_type, param_types, false);
 }
@@ -274,7 +276,7 @@
 }
 
 Value* SharkBuilder::is_subtype_of() {
-  return make_function((address) SharkRuntime::is_subtype_of, "OO", "c");
+  return make_function((address) SharkRuntime::is_subtype_of, "KK", "c");
 }
 
 Value* SharkBuilder::current_time_millis() {
@@ -352,79 +354,14 @@
     "T", "v");
 }
 
-// Low-level non-VM calls
-
-// The ARM-specific code here is to work around unimplemented
-// atomic exchange and memory barrier intrinsics in LLVM.
-//
-// Delegating to external functions for these would normally
-// incur a speed penalty, but Linux on ARM is a special case
-// in that atomic operations on that platform are handled by
-// external functions anyway.  It would be *preferable* for
-// the calls to be hidden away in LLVM, but it's not hurting
-// performance so having the calls here is acceptable.
-//
-// If you are building Shark on a platform without atomic
-// exchange and/or memory barrier intrinsics then it is only
-// acceptable to mimic this approach if your platform cannot
-// perform these operations without delegating to a function.
-
-#ifdef ARM
-static jint zero_cmpxchg_int(volatile jint *ptr, jint oldval, jint newval) {
-  return Atomic::cmpxchg(newval, ptr, oldval);
-}
-#endif // ARM
-
-Value* SharkBuilder::cmpxchg_int() {
-  return make_function(
-#ifdef ARM
-    (address) zero_cmpxchg_int,
-#else
-    "llvm.atomic.cmp.swap.i32.p0i32",
-#endif // ARM
-    "Iii", "i");
-}
-
-#ifdef ARM
-static intptr_t zero_cmpxchg_ptr(volatile intptr_t* ptr,
-                                 intptr_t           oldval,
-                                 intptr_t           newval) {
-  return Atomic::cmpxchg_ptr(newval, ptr, oldval);
-}
-#endif // ARM
-
-Value* SharkBuilder::cmpxchg_ptr() {
-  return make_function(
-#ifdef ARM
-    (address) zero_cmpxchg_ptr,
-#else
-    "llvm.atomic.cmp.swap.i" LP64_ONLY("64") NOT_LP64("32") ".p0i" LP64_ONLY("64") NOT_LP64("32"),
-#endif // ARM
-    "Xxx", "x");
-}
-
 Value* SharkBuilder::frame_address() {
   return make_function("llvm.frameaddress", "i", "C");
 }
 
-Value* SharkBuilder::memory_barrier() {
-  return make_function(
-#ifdef ARM
-    (address) 0xffff0fa0, // __kernel_dmb
-#else
-    "llvm.memory.barrier",
-#endif // ARM
-    "11111", "v");
-}
-
 Value* SharkBuilder::memset() {
-#if SHARK_LLVM_VERSION >= 28
   // LLVM 2.8 added a fifth isVolatile field for memset
   // introduced with LLVM r100304
-  return make_function("llvm.memset.i32", "Cciii", "v");
-#else
-  return make_function("llvm.memset.i32", "Ccii", "v");
-#endif
+  return make_function("llvm.memset.p0i8.i32", "Cciii", "v");
 }
 
 Value* SharkBuilder::unimplemented() {
@@ -441,43 +378,16 @@
 
 // Public interface to low-level non-VM calls
 
-CallInst* SharkBuilder::CreateCmpxchgInt(Value* exchange_value,
-                                         Value* dst,
-                                         Value* compare_value) {
-  return CreateCall3(cmpxchg_int(), dst, compare_value, exchange_value);
-}
-
-CallInst* SharkBuilder::CreateCmpxchgPtr(Value* exchange_value,
-                                         Value* dst,
-                                         Value* compare_value) {
-  return CreateCall3(cmpxchg_ptr(), dst, compare_value, exchange_value);
-}
-
 CallInst* SharkBuilder::CreateGetFrameAddress() {
   return CreateCall(frame_address(), LLVMValue::jint_constant(0));
 }
 
-CallInst *SharkBuilder::CreateMemoryBarrier(int flags) {
-  Value *args[] = {
-    LLVMValue::bit_constant((flags & BARRIER_LOADLOAD) ? 1 : 0),
-    LLVMValue::bit_constant((flags & BARRIER_LOADSTORE) ? 1 : 0),
-    LLVMValue::bit_constant((flags & BARRIER_STORELOAD) ? 1 : 0),
-    LLVMValue::bit_constant((flags & BARRIER_STORESTORE) ? 1 : 0),
-    LLVMValue::bit_constant(1)};
-
-  return CreateCall(memory_barrier(), args, args + 5);
-}
-
 CallInst* SharkBuilder::CreateMemset(Value* dst,
                                      Value* value,
                                      Value* len,
                                      Value* align) {
-#if SHARK_LLVM_VERSION >= 28
   return CreateCall5(memset(), dst, value, len, align,
                      LLVMValue::jint_constant(0));
-#else
-  return CreateCall4(memset(), dst, value, len, align);
-#endif
 }
 
 CallInst* SharkBuilder::CreateUnimplemented(const char* file, int line) {
@@ -510,11 +420,7 @@
   if (isa<PointerType>(value->getType()))
     value = CreatePtrToInt(value, SharkType::intptr_type());
   else if (value->getType()->
-#if SHARK_LLVM_VERSION >= 27
            isIntegerTy()
-#else
-           isInteger()
-#endif
            )
     value = CreateIntCast(value, SharkType::intptr_type(), false);
   else
@@ -563,9 +469,19 @@
     name);
 }
 
+Value* SharkBuilder::CreateInlineMetadata(Metadata* metadata, llvm::PointerType* type, const char* name) {
+  assert(metadata != NULL, "inlined metadata must not be NULL");
+  assert(metadata->is_metadata(), "sanity check");
+  return CreateLoad(
+    CreateIntToPtr(
+      code_buffer_address(code_buffer()->inline_Metadata(metadata)),
+      PointerType::getUnqual(type)),
+    name);
+}
+
 Value* SharkBuilder::CreateInlineData(void*       data,
                                       size_t      size,
-                                      const Type* type,
+                                      Type* type,
                                       const char* name) {
   return CreateIntToPtr(
     code_buffer_address(code_buffer()->inline_data(data, size)),
@@ -600,3 +516,11 @@
   return BasicBlock::Create(
     SharkContext::current(), name, GetInsertBlock()->getParent(), ip);
 }
+
+LoadInst* SharkBuilder::CreateAtomicLoad(Value* ptr, unsigned align, AtomicOrdering ordering, SynchronizationScope synchScope, bool isVolatile, const char* name) {
+  return Insert(new LoadInst(ptr, name, isVolatile, align, ordering, synchScope), name);
+}
+
+StoreInst* SharkBuilder::CreateAtomicStore(Value* val, Value* ptr, unsigned align, AtomicOrdering ordering, SynchronizationScope synchScope, bool isVolatile, const char* name) {
+  return Insert(new StoreInst(val, ptr, isVolatile, align, ordering, synchScope), name);
+}
--- a/src/share/vm/shark/sharkBuilder.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/shark/sharkBuilder.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -53,22 +53,37 @@
     return _code_buffer;
   }
 
+ public:
+  llvm::LoadInst* CreateAtomicLoad(llvm::Value* ptr,
+                                   unsigned align = HeapWordSize,
+                                   llvm::AtomicOrdering ordering = llvm::SequentiallyConsistent,
+                                   llvm::SynchronizationScope synchScope = llvm::CrossThread,
+                                   bool isVolatile = true,
+                                   const char *name = "");
+  llvm::StoreInst* CreateAtomicStore(llvm::Value *val,
+                                     llvm::Value *ptr,
+                                     unsigned align = HeapWordSize,
+                                     llvm::AtomicOrdering ordering = llvm::SequentiallyConsistent,
+                                     llvm::SynchronizationScope SynchScope = llvm::CrossThread,
+                                     bool isVolatile = true,
+                                     const char *name = "");
+
   // Helpers for accessing structures.
  public:
   llvm::Value* CreateAddressOfStructEntry(llvm::Value* base,
                                           ByteSize offset,
-                                          const llvm::Type* type,
+                                          llvm::Type* type,
                                           const char *name = "");
   llvm::LoadInst* CreateValueOfStructEntry(llvm::Value* base,
                                            ByteSize offset,
-                                           const llvm::Type* type,
+                                           llvm::Type* type,
                                            const char *name = "");
 
   // Helpers for accessing arrays.
  public:
   llvm::LoadInst* CreateArrayLength(llvm::Value* arrayoop);
   llvm::Value* CreateArrayAddress(llvm::Value*      arrayoop,
-                                  const llvm::Type* element_type,
+                                  llvm::Type* element_type,
                                   int               element_bytes,
                                   ByteSize          base_offset,
                                   llvm::Value*      index,
@@ -85,8 +100,8 @@
 
   // Helpers for creating intrinsics and external functions.
  private:
-  static const llvm::Type* make_type(char type, bool void_ok);
-  static const llvm::FunctionType* make_ftype(const char* params,
+  static llvm::Type* make_type(char type, bool void_ok);
+  static llvm::FunctionType* make_ftype(const char* params,
                                               const char* ret);
   llvm::Value* make_function(const char* name,
                              const char* params,
@@ -165,7 +180,6 @@
   llvm::Value* cmpxchg_int();
   llvm::Value* cmpxchg_ptr();
   llvm::Value* frame_address();
-  llvm::Value* memory_barrier();
   llvm::Value* memset();
   llvm::Value* unimplemented();
   llvm::Value* should_not_reach_here();
@@ -173,14 +187,7 @@
 
   // Public interface to low-level non-VM calls.
  public:
-  llvm::CallInst* CreateCmpxchgInt(llvm::Value* exchange_value,
-                                   llvm::Value* dst,
-                                   llvm::Value* compare_value);
-  llvm::CallInst* CreateCmpxchgPtr(llvm::Value* exchange_value,
-                                   llvm::Value* dst,
-                                   llvm::Value* compare_value);
   llvm::CallInst* CreateGetFrameAddress();
-  llvm::CallInst* CreateMemoryBarrier(int flags);
   llvm::CallInst* CreateMemset(llvm::Value* dst,
                                llvm::Value* value,
                                llvm::Value* len,
@@ -189,15 +196,6 @@
   llvm::CallInst* CreateShouldNotReachHere(const char* file, int line);
   NOT_PRODUCT(llvm::CallInst* CreateDump(llvm::Value* value));
 
-  // Flags for CreateMemoryBarrier.
- public:
-  enum BarrierFlags {
-    BARRIER_LOADLOAD   = 1,
-    BARRIER_LOADSTORE  = 2,
-    BARRIER_STORELOAD  = 4,
-    BARRIER_STORESTORE = 8
-  };
-
   // HotSpot memory barriers
  public:
   void CreateUpdateBarrierSet(BarrierSet* bs, llvm::Value* field);
@@ -209,9 +207,14 @@
   llvm::Value* CreateInlineOop(ciObject* object, const char* name = "") {
     return CreateInlineOop(object->constant_encoding(), name);
   }
+
+  llvm::Value* CreateInlineMetadata(Metadata* metadata, llvm::PointerType* type, const char* name = "");
+  llvm::Value* CreateInlineMetadata(ciMetadata* metadata, llvm::PointerType* type, const char* name = "") {
+    return CreateInlineMetadata(metadata->constant_encoding(), type, name);
+  }
   llvm::Value* CreateInlineData(void*             data,
                                 size_t            size,
-                                const llvm::Type* type,
+                                llvm::Type* type,
                                 const char*       name = "");
 
   // Helpers for creating basic blocks.
@@ -222,5 +225,4 @@
   llvm::BasicBlock* CreateBlock(llvm::BasicBlock* ip,
                                 const char*       name="") const;
 };
-
-#endif // SHARE_VM_SHARK_SHARKBUILDER_HPP
+  #endif // SHARE_VM_SHARK_SHARKBUILDER_HPP
--- a/src/share/vm/shark/sharkCacheDecache.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/shark/sharkCacheDecache.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -107,11 +107,10 @@
 void SharkDecacher::process_method_slot(Value** value, int offset) {
   // Decache the method pointer
   write_value_to_frame(
-    SharkType::Method*_type(),
+    SharkType::Method_type(),
     *value,
     offset);
 
-  oopmap()->set_oop(slot2reg(offset));
 }
 
 void SharkDecacher::process_pc_slot(int offset) {
@@ -205,7 +204,7 @@
 
 void SharkCacher::process_method_slot(Value** value, int offset) {
   // Cache the method pointer
-  *value = read_value_from_frame(SharkType::Method*_type(), offset);
+  *value = read_value_from_frame(SharkType::Method_type(), offset);
 }
 
 void SharkFunctionEntryCacher::process_method_slot(Value** value, int offset) {
@@ -230,7 +229,7 @@
 }
 
 Value* SharkOSREntryCacher::CreateAddressOfOSRBufEntry(int         offset,
-                                                       const Type* type) {
+                                                       Type* type) {
   Value *result = builder()->CreateStructGEP(osr_buf(), offset);
   if (type != SharkType::intptr_type())
     result = builder()->CreateBitCast(result, PointerType::getUnqual(type));
@@ -254,12 +253,12 @@
   }
 }
 
-void SharkDecacher::write_value_to_frame(const Type* type,
+void SharkDecacher::write_value_to_frame(Type* type,
                                          Value*      value,
                                          int         offset) {
   builder()->CreateStore(value, stack()->slot_addr(offset, type));
 }
 
-Value* SharkCacher::read_value_from_frame(const Type* type, int offset) {
+Value* SharkCacher::read_value_from_frame(Type* type, int offset) {
   return builder()->CreateLoad(stack()->slot_addr(offset, type));
 }
--- a/src/share/vm/shark/sharkCacheDecache.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/shark/sharkCacheDecache.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -192,7 +192,7 @@
 
   // Writer helper
  protected:
-  void write_value_to_frame(const llvm::Type* type,
+  void write_value_to_frame(llvm::Type* type,
                             llvm::Value*      value,
                             int               offset);
 };
@@ -321,7 +321,7 @@
 
   // Writer helper
  protected:
-  llvm::Value* read_value_from_frame(const llvm::Type* type, int offset);
+  llvm::Value* read_value_from_frame(llvm::Type* type, int offset);
 };
 
 class SharkJavaCallCacher : public SharkCacher {
@@ -422,7 +422,7 @@
 
   // Helper
  private:
-  llvm::Value* CreateAddressOfOSRBufEntry(int offset, const llvm::Type* type);
+  llvm::Value* CreateAddressOfOSRBufEntry(int offset, llvm::Type* type);
 };
 
 #endif // SHARE_VM_SHARK_SHARKCACHEDECACHE_HPP
--- a/src/share/vm/shark/sharkCodeBuffer.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/shark/sharkCodeBuffer.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -81,6 +81,13 @@
     return offset;
   }
 
+  int inline_Metadata(Metadata* metadata) const {
+    masm()->align(BytesPerWord);
+    int offset = masm()->offset();
+    masm()->store_Metadata(metadata);
+    return offset;
+  }
+
   // Inline a block of non-oop data into the buffer and return its offset.
  public:
   int inline_data(void *src, size_t size) const {
--- a/src/share/vm/shark/sharkCompiler.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/shark/sharkCompiler.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -48,7 +48,6 @@
 
 using namespace llvm;
 
-#if SHARK_LLVM_VERSION >= 27
 namespace {
   cl::opt<std::string>
   MCPU("mcpu");
@@ -57,7 +56,6 @@
   MAttrs("mattr",
          cl::CommaSeparated);
 }
-#endif
 
 SharkCompiler::SharkCompiler()
   : AbstractCompiler() {
@@ -72,6 +70,9 @@
   // Initialize the native target
   InitializeNativeTarget();
 
+  // MCJIT require a native AsmPrinter
+  InitializeNativeTargetAsmPrinter();
+
   // Create the two contexts which we'll use
   _normal_context = new SharkContext("normal");
   _native_context = new SharkContext("native");
@@ -79,7 +80,6 @@
   // Create the memory manager
   _memory_manager = new SharkMemoryManager();
 
-#if SHARK_LLVM_VERSION >= 27
   // Finetune LLVM for the current host CPU.
   StringMap<bool> Features;
   bool gotCpuFeatures = llvm::sys::getHostCPUFeatures(Features);
@@ -113,6 +113,16 @@
   builder.setJITMemoryManager(memory_manager());
   builder.setEngineKind(EngineKind::JIT);
   builder.setErrorStr(&ErrorMsg);
+  if (! fnmatch(SharkOptimizationLevel, "None", 0)) {
+    tty->print_cr("Shark optimization level set to: None");
+    builder.setOptLevel(llvm::CodeGenOpt::None);
+  } else if (! fnmatch(SharkOptimizationLevel, "Less", 0)) {
+    tty->print_cr("Shark optimization level set to: Less");
+    builder.setOptLevel(llvm::CodeGenOpt::Less);
+  } else if (! fnmatch(SharkOptimizationLevel, "Aggressive", 0)) {
+    tty->print_cr("Shark optimization level set to: Aggressive");
+    builder.setOptLevel(llvm::CodeGenOpt::Aggressive);
+  } // else Default is selected by, well, default :-)
   _execution_engine = builder.create();
 
   if (!execution_engine()) {
@@ -125,13 +135,6 @@
 
   execution_engine()->addModule(
     _native_context->module());
-#else
-  _execution_engine = ExecutionEngine::createJIT(
-    _normal_context->module_provider(),
-    NULL, memory_manager(), CodeGenOpt::Default);
-  execution_engine()->addModuleProvider(
-    _native_context->module_provider());
-#endif
 
   // All done
   mark_initialized();
@@ -182,6 +185,9 @@
 
   // Build the LLVM IR for the method
   Function *function = SharkFunction::build(env, &builder, flow, name);
+  if (env->failing()) {
+    return;
+  }
 
   // Generate native code.  It's unpleasant that we have to drop into
   // the VM to do this -- it blocks safepoints -- but I can't see any
@@ -261,6 +267,12 @@
       function->dump();
   }
 
+  if (SharkVerifyFunction != NULL) {
+    if (!fnmatch(SharkVerifyFunction, name, 0)) {
+      verifyFunction(*function);
+    }
+  }
+
   // Compile to native code
   address code = NULL;
   context()->add_function(function);
@@ -268,33 +280,28 @@
     MutexLocker locker(execution_engine_lock());
     free_queued_methods();
 
+#ifndef NDEBUG
+#if SHARK_LLVM_VERSION <= 31
+#define setCurrentDebugType SetCurrentDebugType
+#endif
     if (SharkPrintAsmOf != NULL) {
-#if SHARK_LLVM_VERSION >= 27
-#ifndef NDEBUG
       if (!fnmatch(SharkPrintAsmOf, name, 0)) {
-        llvm::SetCurrentDebugType(X86_ONLY("x86-emitter") NOT_X86("jit"));
+        llvm::setCurrentDebugType(X86_ONLY("x86-emitter") NOT_X86("jit"));
         llvm::DebugFlag = true;
       }
       else {
-        llvm::SetCurrentDebugType("");
+        llvm::setCurrentDebugType("");
         llvm::DebugFlag = false;
       }
+    }
+#ifdef setCurrentDebugType
+#undef setCurrentDebugType
+#endif
 #endif // !NDEBUG
-#else
-      // NB you need to patch LLVM with http://tinyurl.com/yf3baln for this
-      std::vector<const char*> args;
-      args.push_back(""); // program name
-      if (!fnmatch(SharkPrintAsmOf, name, 0))
-        args.push_back("-debug-only=x86-emitter");
-      else
-        args.push_back("-debug-only=none");
-      args.push_back(0);  // terminator
-      cl::ParseCommandLineOptions(args.size() - 1, (char **) &args[0]);
-#endif // SHARK_LLVM_VERSION
-    }
     memory_manager()->set_entry_for_function(function, entry);
     code = (address) execution_engine()->getPointerToFunction(function);
   }
+  assert(code != NULL, "code must be != NULL");
   entry->set_entry_point(code);
   entry->set_function(function);
   entry->set_context(context());
@@ -319,8 +326,8 @@
   // finish with the exception of the VM thread, so we can consider
   // ourself the owner of the execution engine lock even though we
   // can't actually acquire it at this time.
-  assert(Thread::current()->is_VM_thread(), "must be called by VM thread");
-  assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
+  assert(Thread::current()->is_Compiler_thread(), "must be called by compiler thread");
+  assert_locked_or_safepoint(CodeCache_lock);
 
   SharkEntry *entry = (SharkEntry *) code;
   entry->context()->push_to_free_queue(entry->function());
--- a/src/share/vm/shark/sharkCompiler.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/shark/sharkCompiler.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -46,6 +46,9 @@
   // Missing feature tests
   bool supports_native() { return true; }
   bool supports_osr()    { return true; }
+  bool can_compile_method(methodHandle method)  {
+    return ! (method->is_method_handle_intrinsic() || method->is_compiled_lambda_form());
+  }
 
   // Customization
   bool needs_adapters()  { return false; }
--- a/src/share/vm/shark/sharkConstant.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/shark/sharkConstant.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -37,10 +37,13 @@
   ciType *type = NULL;
   if (constant.basic_type() == T_OBJECT) {
     ciEnv *env = ciEnv::current();
-    if (constant.as_object()->is_klass())
-      type = env->Class_klass();
-    else
-      type = env->String_klass();
+
+    assert(constant.as_object()->klass() == env->String_klass()
+           || constant.as_object()->klass() == env->Class_klass()
+           || constant.as_object()->klass()->is_subtype_of(env->MethodType_klass())
+           || constant.as_object()->klass()->is_subtype_of(env->MethodHandle_klass()), "should be");
+
+    type = constant.as_object()->klass();
   }
   return new SharkConstant(constant, type);
 }
@@ -108,17 +111,16 @@
   // objects (which differ between ldc* and get*, thanks!)
   ciObject *object = constant.as_object();
   assert(type != NULL, "shouldn't be");
-  if (object->is_klass()) {
-    // The constant returned for a klass is the ciKlass
-    // for the entry, but we want the java_mirror.
-    ciKlass *klass = object->as_klass();
-    if (!klass->is_loaded()) {
+
+  if ((! object->is_null_object()) && object->klass() == ciEnv::current()->Class_klass()) {
+    ciKlass *klass = object->klass();
+    if (! klass->is_loaded()) {
       _is_loaded = false;
       return;
     }
-    object = klass->java_mirror();
   }
-  if (object->is_null_object() || !object->can_be_constant()) {
+
+  if (object->is_null_object() || ! object->can_be_constant() || ! object->is_loaded()) {
     _is_loaded = false;
     return;
   }
--- a/src/share/vm/shark/sharkContext.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/shark/sharkContext.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -29,6 +29,7 @@
 #include "shark/llvmHeaders.hpp"
 #include "shark/sharkContext.hpp"
 #include "utilities/globalDefinitions.hpp"
+#include "memory/allocation.hpp"
 
 using namespace llvm;
 
@@ -52,6 +53,9 @@
   _itableOffsetEntry_type = PointerType::getUnqual(
     ArrayType::get(jbyte_type(), itableOffsetEntry::size() * wordSize));
 
+  _Metadata_type = PointerType::getUnqual(
+    ArrayType::get(jbyte_type(), sizeof(Metadata)));
+
   _klass_type = PointerType::getUnqual(
     ArrayType::get(jbyte_type(), sizeof(Klass)));
 
@@ -61,7 +65,7 @@
   _jniHandleBlock_type = PointerType::getUnqual(
     ArrayType::get(jbyte_type(), sizeof(JNIHandleBlock)));
 
-  _Method*_type = PointerType::getUnqual(
+  _Method_type = PointerType::getUnqual(
     ArrayType::get(jbyte_type(), sizeof(Method)));
 
   _monitor_type = ArrayType::get(
@@ -76,14 +80,14 @@
   _zeroStack_type = PointerType::getUnqual(
     ArrayType::get(jbyte_type(), sizeof(ZeroStack)));
 
-  std::vector<const Type*> params;
-  params.push_back(Method*_type());
+  std::vector<Type*> params;
+  params.push_back(Method_type());
   params.push_back(intptr_type());
   params.push_back(thread_type());
   _entry_point_type = FunctionType::get(jint_type(), params, false);
 
   params.clear();
-  params.push_back(Method*_type());
+  params.push_back(Method_type());
   params.push_back(PointerType::getUnqual(jbyte_type()));
   params.push_back(intptr_type());
   params.push_back(thread_type());
@@ -150,7 +154,7 @@
   }
 }
 
-class SharkFreeQueueItem : public CHeapObj {
+class SharkFreeQueueItem : public CHeapObj<mtNone> {
  public:
   SharkFreeQueueItem(llvm::Function* function, SharkFreeQueueItem *next)
     : _function(function), _next(next) {}
--- a/src/share/vm/shark/sharkContext.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/shark/sharkContext.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -42,11 +42,7 @@
  private:
   llvm::Module* _module;
 
-#if SHARK_LLVM_VERSION >= 27
  public:
-#else
- private:
-#endif
   llvm::Module* module() const {
     return _module;
   }
@@ -59,127 +55,126 @@
 
   // Module accessors
  public:
-#if SHARK_LLVM_VERSION < 27
-  llvm::ModuleProvider* module_provider() const {
-    return new llvm::ExistingModuleProvider(module());
-  }
-#endif
   void add_function(llvm::Function* function) const {
     module()->getFunctionList().push_back(function);
   }
   llvm::Constant* get_external(const char*               name,
-                               const llvm::FunctionType* sig) {
+                               llvm::FunctionType* sig) {
     return module()->getOrInsertFunction(name, sig);
   }
 
   // Basic types
  private:
-  const llvm::Type*        _void_type;
-  const llvm::IntegerType* _bit_type;
-  const llvm::IntegerType* _jbyte_type;
-  const llvm::IntegerType* _jshort_type;
-  const llvm::IntegerType* _jint_type;
-  const llvm::IntegerType* _jlong_type;
-  const llvm::Type*        _jfloat_type;
-  const llvm::Type*        _jdouble_type;
+  llvm::Type*        _void_type;
+  llvm::IntegerType* _bit_type;
+  llvm::IntegerType* _jbyte_type;
+  llvm::IntegerType* _jshort_type;
+  llvm::IntegerType* _jint_type;
+  llvm::IntegerType* _jlong_type;
+  llvm::Type*        _jfloat_type;
+  llvm::Type*        _jdouble_type;
 
  public:
-  const llvm::Type* void_type() const {
+  llvm::Type* void_type() const {
     return _void_type;
   }
-  const llvm::IntegerType* bit_type() const {
+  llvm::IntegerType* bit_type() const {
     return _bit_type;
   }
-  const llvm::IntegerType* jbyte_type() const {
+  llvm::IntegerType* jbyte_type() const {
     return _jbyte_type;
   }
-  const llvm::IntegerType* jshort_type() const {
+  llvm::IntegerType* jshort_type() const {
     return _jshort_type;
   }
-  const llvm::IntegerType* jint_type() const {
+  llvm::IntegerType* jint_type() const {
     return _jint_type;
   }
-  const llvm::IntegerType* jlong_type() const {
+  llvm::IntegerType* jlong_type() const {
     return _jlong_type;
   }
-  const llvm::Type* jfloat_type() const {
+  llvm::Type* jfloat_type() const {
     return _jfloat_type;
   }
-  const llvm::Type* jdouble_type() const {
+  llvm::Type* jdouble_type() const {
     return _jdouble_type;
   }
-  const llvm::IntegerType* intptr_type() const {
+  llvm::IntegerType* intptr_type() const {
     return LP64_ONLY(jlong_type()) NOT_LP64(jint_type());
   }
 
   // Compound types
  private:
-  const llvm::PointerType*  _itableOffsetEntry_type;
-  const llvm::PointerType*  _jniEnv_type;
-  const llvm::PointerType*  _jniHandleBlock_type;
-  const llvm::PointerType*  _klass_type;
-  const llvm::PointerType*  _Method*_type;
-  const llvm::ArrayType*    _monitor_type;
-  const llvm::PointerType*  _oop_type;
-  const llvm::PointerType*  _thread_type;
-  const llvm::PointerType*  _zeroStack_type;
-  const llvm::FunctionType* _entry_point_type;
-  const llvm::FunctionType* _osr_entry_point_type;
+  llvm::PointerType*  _itableOffsetEntry_type;
+  llvm::PointerType*  _jniEnv_type;
+  llvm::PointerType*  _jniHandleBlock_type;
+  llvm::PointerType*  _Metadata_type;
+  llvm::PointerType*  _klass_type;
+  llvm::PointerType*  _Method_type;
+  llvm::ArrayType*    _monitor_type;
+  llvm::PointerType*  _oop_type;
+  llvm::PointerType*  _thread_type;
+  llvm::PointerType*  _zeroStack_type;
+  llvm::FunctionType* _entry_point_type;
+  llvm::FunctionType* _osr_entry_point_type;
 
  public:
-  const llvm::PointerType* itableOffsetEntry_type() const {
+  llvm::PointerType* itableOffsetEntry_type() const {
     return _itableOffsetEntry_type;
   }
-  const llvm::PointerType* jniEnv_type() const {
+  llvm::PointerType* jniEnv_type() const {
     return _jniEnv_type;
   }
-  const llvm::PointerType* jniHandleBlock_type() const {
+  llvm::PointerType* jniHandleBlock_type() const {
     return _jniHandleBlock_type;
   }
-  const llvm::PointerType* klass_type() const {
+  llvm::PointerType* Metadata_type() const {
+    return _Metadata_type;
+  }
+  llvm::PointerType* klass_type() const {
     return _klass_type;
   }
-  const llvm::PointerType* Method*_type() const {
-    return _Method*_type;
+  llvm::PointerType* Method_type() const {
+    return _Method_type;
   }
-  const llvm::ArrayType* monitor_type() const {
+  llvm::ArrayType* monitor_type() const {
     return _monitor_type;
   }
-  const llvm::PointerType* oop_type() const {
+  llvm::PointerType* oop_type() const {
     return _oop_type;
   }
-  const llvm::PointerType* thread_type() const {
+  llvm::PointerType* thread_type() const {
     return _thread_type;
   }
-  const llvm::PointerType* zeroStack_type() const {
+  llvm::PointerType* zeroStack_type() const {
     return _zeroStack_type;
   }
-  const llvm::FunctionType* entry_point_type() const {
+  llvm::FunctionType* entry_point_type() const {
     return _entry_point_type;
   }
-  const llvm::FunctionType* osr_entry_point_type() const {
+  llvm::FunctionType* osr_entry_point_type() const {
     return _osr_entry_point_type;
   }
 
   // Mappings
  private:
-  const llvm::Type* _to_stackType[T_CONFLICT];
-  const llvm::Type* _to_arrayType[T_CONFLICT];
+  llvm::Type* _to_stackType[T_CONFLICT];
+  llvm::Type* _to_arrayType[T_CONFLICT];
 
  private:
-  const llvm::Type* map_type(const llvm::Type* const* table,
+  llvm::Type* map_type(llvm::Type* const* table,
                              BasicType                type) const {
     assert(type >= 0 && type < T_CONFLICT, "unhandled type");
-    const llvm::Type* result = table[type];
+    llvm::Type* result = table[type];
     assert(result != NULL, "unhandled type");
     return result;
   }
 
  public:
-  const llvm::Type* to_stackType(BasicType type) const {
+  llvm::Type* to_stackType(BasicType type) const {
     return map_type(_to_stackType, type);
   }
-  const llvm::Type* to_arrayType(BasicType type) const {
+  llvm::Type* to_arrayType(BasicType type) const {
     return map_type(_to_arrayType, type);
   }
 
--- a/src/share/vm/shark/sharkFunction.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/shark/sharkFunction.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -77,6 +77,10 @@
   // Walk the tree from the start block to determine which
   // blocks are entered and which blocks require phis
   SharkTopLevelBlock *start_block = block(flow()->start_block_num());
+  if (is_osr() && start_block->stack_depth_at_entry() != 0) {
+    env()->record_method_not_compilable("can't compile OSR block with incoming stack-depth > 0");
+    return;
+  }
   assert(start_block->start() == flow()->start_bci(), "blocks out of order");
   start_block->enter();
 
--- a/src/share/vm/shark/sharkFunction.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/shark/sharkFunction.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -91,7 +91,7 @@
   bool is_osr() const {
     return flow()->is_osr_flow();
   }
-  const llvm::FunctionType* entry_point_type() const {
+  llvm::FunctionType* entry_point_type() const {
     if (is_osr())
       return SharkType::osr_entry_point_type();
     else
--- a/src/share/vm/shark/sharkInliner.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/shark/sharkInliner.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -725,7 +725,7 @@
   // Push the result if necessary
   if (is_get) {
     bool result_pushed = false;
-    if (field->is_constant()) {
+    if (field->is_constant() && field->is_static()) {
       SharkConstant *sc = SharkConstant::for_field(iter());
       if (sc->is_loaded()) {
         push(sc->is_nonzero());
--- a/src/share/vm/shark/sharkIntrinsics.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/shark/sharkIntrinsics.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -171,7 +171,7 @@
   builder()->CreateBr(done);
 
   builder()->SetInsertPoint(done);
-  PHINode *phi = builder()->CreatePHI(a->getType(), "result");
+  PHINode *phi = builder()->CreatePHI(a->getType(), 0, "result");
   phi->addIncoming(a, return_a);
   phi->addIncoming(b, return_b);
 
@@ -210,7 +210,7 @@
   Value *klass = builder()->CreateValueOfStructEntry(
     state()->pop()->jobject_value(),
     in_ByteSize(oopDesc::klass_offset_in_bytes()),
-    SharkType::oop_type(),
+    SharkType::klass_type(),
     "klass");
 
   state()->push(
@@ -265,8 +265,7 @@
     "addr");
 
   // Perform the operation
-  Value *result = builder()->CreateCmpxchgInt(x, addr, e);
-
+  Value *result = builder()->CreateAtomicCmpXchg(addr, e, x, llvm::SequentiallyConsistent);
   // Push the result
   state()->push(
     SharkValue::create_jint(
--- a/src/share/vm/shark/sharkInvariants.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/shark/sharkInvariants.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -68,7 +68,7 @@
   //
   // Accessing this directly is kind of ugly, so it's private.  Add
   // new accessors below if you need something from it.
- private:
+ protected:
   ciEnv* env() const {
     assert(_env != NULL, "env not available");
     return _env;
@@ -99,12 +99,14 @@
   DebugInformationRecorder* debug_info() const {
     return env()->debug_info();
   }
+  SharkCodeBuffer* code_buffer() const {
+    return builder()->code_buffer();
+  }
+
+ public:
   Dependencies* dependencies() const {
     return env()->dependencies();
   }
-  SharkCodeBuffer* code_buffer() const {
-    return builder()->code_buffer();
-  }
 
   // Commonly used classes
  protected:
--- a/src/share/vm/shark/sharkMemoryManager.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/shark/sharkMemoryManager.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -79,7 +79,6 @@
   mm()->setMemoryExecutable();
 }
 
-#if SHARK_LLVM_VERSION >= 27
 void SharkMemoryManager::deallocateExceptionTable(void *ptr) {
   mm()->deallocateExceptionTable(ptr);
 }
@@ -87,26 +86,23 @@
 void SharkMemoryManager::deallocateFunctionBody(void *ptr) {
   mm()->deallocateFunctionBody(ptr);
 }
-#else
-void SharkMemoryManager::deallocateMemForFunction(const Function* F) {
-  return mm()->deallocateMemForFunction(F);
-}
-#endif
 
 uint8_t* SharkMemoryManager::allocateGlobal(uintptr_t Size,
                                             unsigned int Alignment) {
   return mm()->allocateGlobal(Size, Alignment);
 }
 
-#if SHARK_LLVM_VERSION < 27
-void* SharkMemoryManager::getDlsymTable() const {
-  return mm()->getDlsymTable();
+void* SharkMemoryManager::getPointerToNamedFunction(const std::string &Name, bool AbortOnFailure) {
+  return mm()->getPointerToNamedFunction(Name, AbortOnFailure);
 }
 
-void SharkMemoryManager::SetDlsymTable(void *ptr) {
-  mm()->SetDlsymTable(ptr);
+uint8_t* SharkMemoryManager::allocateCodeSection(uintptr_t Size, unsigned Alignment, unsigned SectionID) {
+  return mm()->allocateCodeSection(Size, Alignment, SectionID);
 }
-#endif
+
+uint8_t* SharkMemoryManager::allocateDataSection(uintptr_t Size, unsigned Alignment, unsigned SectionID) {
+  return mm()->allocateDataSection(Size, Alignment, SectionID);
+}
 
 void SharkMemoryManager::setPoisonMemory(bool poison) {
   mm()->setPoisonMemory(poison);
--- a/src/share/vm/shark/sharkMemoryManager.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/shark/sharkMemoryManager.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -75,20 +75,15 @@
                          unsigned char* TableStart,
                          unsigned char* TableEnd,
                          unsigned char* FrameRegister);
-#if SHARK_LLVM_VERSION < 27
-  void* getDlsymTable() const;
-  void SetDlsymTable(void *ptr);
-#endif
+  void *getPointerToNamedFunction(const std::string &Name, bool AbortOnFailure = true);
+  uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, unsigned SectionID);
+  uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, unsigned SectionID);
   void setPoisonMemory(bool);
   uint8_t* allocateGlobal(uintptr_t, unsigned int);
   void setMemoryWritable();
   void setMemoryExecutable();
-#if SHARK_LLVM_VERSION >= 27
   void deallocateExceptionTable(void *ptr);
   void deallocateFunctionBody(void *ptr);
-#else
-  void deallocateMemForFunction(const llvm::Function* F);
-#endif
   unsigned char *allocateSpace(intptr_t Size,
                                unsigned int Alignment);
 };
--- a/src/share/vm/shark/sharkNativeWrapper.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/shark/sharkNativeWrapper.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -59,7 +59,6 @@
   OopMap *oopmap = new OopMap(
     SharkStack::oopmap_slot_munge(stack()->oopmap_frame_size()),
     SharkStack::oopmap_slot_munge(arg_size()));
-  oopmap->set_oop(SharkStack::slot2reg(stack()->method_slot_offset()));
 
   // Set up the oop_tmp slot if required:
   //  - For static methods we use it to handlize the class argument
@@ -83,9 +82,9 @@
   }
 
   // Start building the argument list
-  std::vector<const Type*> param_types;
+  std::vector<Type*> param_types;
   std::vector<Value*> param_values;
-  const PointerType *box_type = PointerType::getUnqual(SharkType::oop_type());
+  PointerType *box_type = PointerType::getUnqual(SharkType::oop_type());
 
   // First argument is the JNIEnv
   param_types.push_back(SharkType::jniEnv_type());
@@ -149,7 +148,7 @@
       builder()->CreateBr(merge);
 
       builder()->SetInsertPoint(merge);
-      phi = builder()->CreatePHI(box_type, "boxed_object");
+      phi = builder()->CreatePHI(box_type, 0, "boxed_object");
       phi->addIncoming(ConstantPointerNull::get(box_type), null);
       phi->addIncoming(box, not_null);
       box = phi;
@@ -170,7 +169,7 @@
       // fall through
 
     default:
-      const Type *param_type = SharkType::to_stackType(arg_type(i));
+      Type *param_type = SharkType::to_stackType(arg_type(i));
 
       param_types.push_back(param_type);
       param_values.push_back(
@@ -201,7 +200,7 @@
 
   // Make the call
   BasicType result_type = target()->result_type();
-  const Type* return_type;
+  Type* return_type;
   if (result_type == T_VOID)
     return_type = SharkType::void_type();
   else if (is_returning_oop())
@@ -213,7 +212,7 @@
      PointerType::getUnqual(
        FunctionType::get(return_type, param_types, false)));
   Value *result = builder()->CreateCall(
-    native_function, param_values.begin(), param_values.end());
+    native_function, llvm::makeArrayRef(param_values));
 
   // Start the transition back to _thread_in_Java
   CreateSetThreadState(_thread_in_native_trans);
@@ -221,7 +220,7 @@
   // Make sure new state is visible in the GC thread
   if (os::is_MP()) {
     if (UseMembar)
-      builder()->CreateMemoryBarrier(SharkBuilder::BARRIER_STORELOAD);
+      builder()->CreateFence(llvm::SequentiallyConsistent, llvm::CrossThread);
     else
       CreateWriteMemorySerializePage();
   }
@@ -305,7 +304,7 @@
     builder()->CreateBr(merge);
 
     builder()->SetInsertPoint(merge);
-    PHINode *phi = builder()->CreatePHI(SharkType::oop_type(), "result");
+    PHINode *phi = builder()->CreatePHI(SharkType::oop_type(), 0, "result");
     phi->addIncoming(LLVMValue::null(), null);
     phi->addIncoming(unboxed_result, not_null);
     result = phi;
--- a/src/share/vm/shark/sharkStack.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/shark/sharkStack.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -75,7 +75,7 @@
   _method_slot_offset = offset++;
   if (setup_sp_and_method) {
     builder()->CreateStore(
-      method, slot_addr(method_slot_offset(), SharkType::Method*_type()));
+      method, slot_addr(method_slot_offset(), SharkType::Method_type()));
   }
 
   // Unextended SP
@@ -163,7 +163,7 @@
 }
 
 Value* SharkStack::slot_addr(int         offset,
-                             const Type* type,
+                             Type* type,
                              const char* name) const {
   bool needs_cast = type && type != SharkType::intptr_type();
 
--- a/src/share/vm/shark/sharkStack.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/shark/sharkStack.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -204,7 +204,7 @@
   // Addresses of things in the frame
  public:
   llvm::Value* slot_addr(int               offset,
-                         const llvm::Type* type = NULL,
+                         llvm::Type* type = NULL,
                          const char*       name = "") const;
 
   llvm::Value* monitor_addr(int index) const {
--- a/src/share/vm/shark/sharkState.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/shark/sharkState.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -131,7 +131,7 @@
   Value *this_method = this->method();
   Value *other_method = other->method();
   if (this_method != other_method) {
-    PHINode *phi = builder()->CreatePHI(SharkType::Method*_type(), "method");
+    PHINode *phi = builder()->CreatePHI(SharkType::Method_type(), 0, "method");
     phi->addIncoming(this_method, this_block);
     phi->addIncoming(other_method, other_block);
     set_method(phi);
@@ -142,7 +142,7 @@
   Value *other_oop_tmp = other->oop_tmp();
   if (this_oop_tmp != other_oop_tmp) {
     assert(this_oop_tmp && other_oop_tmp, "can't merge NULL with non-NULL");
-    PHINode *phi = builder()->CreatePHI(SharkType::oop_type(), "oop_tmp");
+    PHINode *phi = builder()->CreatePHI(SharkType::oop_type(), 0, "oop_tmp");
     phi->addIncoming(this_oop_tmp, this_block);
     phi->addIncoming(other_oop_tmp, other_block);
     set_oop_tmp(phi);
@@ -243,7 +243,7 @@
                                        Value*              method,
                                        Value*              osr_buf)
   : SharkState(block) {
-  assert(!block->stack_depth_at_entry(), "entry block shouldn't have stack");
+  assert(block->stack_depth_at_entry() == 0, "entry block shouldn't have stack");
   set_num_monitors(block->ciblock()->monitor_count());
 
   // Local variables
@@ -287,7 +287,7 @@
   char name[18];
 
   // Method
-  set_method(builder()->CreatePHI(SharkType::Method*_type(), "method"));
+  set_method(builder()->CreatePHI(SharkType::Method_type(), 0, "method"));
 
   // Local variables
   for (int i = 0; i < max_locals(); i++) {
@@ -307,7 +307,7 @@
     case T_ARRAY:
       snprintf(name, sizeof(name), "local_%d_", i);
       value = SharkValue::create_phi(
-        type, builder()->CreatePHI(SharkType::to_stackType(type), name));
+        type, builder()->CreatePHI(SharkType::to_stackType(type), 0, name));
       break;
 
     case T_ADDRESS:
@@ -345,7 +345,7 @@
     case T_ARRAY:
       snprintf(name, sizeof(name), "stack_%d_", i);
       value = SharkValue::create_phi(
-        type, builder()->CreatePHI(SharkType::to_stackType(type), name));
+        type, builder()->CreatePHI(SharkType::to_stackType(type), 0, name));
       break;
 
     case T_ADDRESS:
--- a/src/share/vm/shark/sharkTopLevelBlock.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/shark/sharkTopLevelBlock.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -65,6 +65,7 @@
     switch (bc()) {
     case Bytecodes::_ldc:
     case Bytecodes::_ldc_w:
+    case Bytecodes::_ldc2_w:
       if (!SharkConstant::for_ldc(iter())->is_loaded()) {
         set_trap(
           Deoptimization::make_trap_request(
@@ -109,9 +110,22 @@
     case Bytecodes::_invokespecial:
     case Bytecodes::_invokevirtual:
     case Bytecodes::_invokeinterface:
-      method = iter()->get_method(will_link);
+      ciSignature* sig;
+      method = iter()->get_method(will_link, &sig);
       assert(will_link, "typeflow responsibility");
-
+      // We can't compile calls to method handle intrinsics, because we use
+      // the interpreter entry points and they expect the top frame to be an
+      // interpreter frame. We need to implement the intrinsics for Shark.
+      if (method->is_method_handle_intrinsic() || method->is_compiled_lambda_form()) {
+        if (SharkPerformanceWarnings) {
+          warning("JSR292 optimization not yet implemented in Shark");
+        }
+        set_trap(
+          Deoptimization::make_trap_request(
+            Deoptimization::Reason_unhandled,
+            Deoptimization::Action_make_not_compilable), bci());
+          return;
+      }
       if (!method->holder()->is_linked()) {
         set_trap(
           Deoptimization::make_trap_request(
@@ -156,6 +170,16 @@
         return;
       }
       break;
+    case Bytecodes::_invokedynamic:
+    case Bytecodes::_invokehandle:
+      if (SharkPerformanceWarnings) {
+        warning("JSR292 optimization not yet implemented in Shark");
+      }
+      set_trap(
+        Deoptimization::make_trap_request(
+          Deoptimization::Reason_unhandled,
+          Deoptimization::Action_make_not_compilable), bci());
+      return;
     }
   }
 
@@ -562,12 +586,12 @@
   Value *exception_klass = builder()->CreateValueOfStructEntry(
     xstack(0)->jobject_value(),
     in_ByteSize(oopDesc::klass_offset_in_bytes()),
-    SharkType::oop_type(),
+    SharkType::klass_type(),
     "exception_klass");
 
   for (int i = 0; i < num_options; i++) {
     Value *check_klass =
-      builder()->CreateInlineOop(exc_handler(i)->catch_klass());
+      builder()->CreateInlineMetadata(exc_handler(i)->catch_klass(), SharkType::klass_type());
 
     BasicBlock *not_exact   = function()->CreateBlock("not_exact");
     BasicBlock *not_subtype = function()->CreateBlock("not_subtype");
@@ -823,7 +847,7 @@
     builder()->CreateArrayAddress(
       array->jarray_value(), basic_type, index->jint_value()));
 
-  const Type *stack_type = SharkType::to_stackType(basic_type);
+  Type *stack_type = SharkType::to_stackType(basic_type);
   if (value->getType() != stack_type)
     value = builder()->CreateIntCast(value, stack_type, basic_type != T_CHAR);
 
@@ -910,7 +934,7 @@
     ShouldNotReachHere();
   }
 
-  const Type *array_type = SharkType::to_arrayType(basic_type);
+  Type *array_type = SharkType::to_arrayType(basic_type);
   if (value->getType() != array_type)
     value = builder()->CreateIntCast(value, array_type, basic_type != T_CHAR);
 
@@ -1028,7 +1052,6 @@
       dest_method->holder() == java_lang_Object_klass())
     return dest_method;
 
-#ifdef SHARK_CAN_DEOPTIMIZE_ANYWHERE
   // This code can replace a virtual call with a direct call if this
   // class is the only one in the entire set of loaded classes that
   // implements this method.  This makes the compiled code dependent
@@ -1062,6 +1085,8 @@
   if (monomorphic_target != NULL) {
     assert(!monomorphic_target->is_abstract(), "shouldn't be");
 
+    function()->dependencies()->assert_unique_concrete_method(actual_receiver, monomorphic_target);
+
     // Opto has a bunch of type checking here that I don't
     // understand.  It's to inhibit casting in one direction,
     // possibly because objects in Opto can have inexact
@@ -1095,16 +1120,15 @@
   // with non-monomorphic targets if the receiver has an exact
   // type.  We don't mark types this way, so we can't do this.
 
-#endif // SHARK_CAN_DEOPTIMIZE_ANYWHERE
 
   return NULL;
 }
 
 Value *SharkTopLevelBlock::get_direct_callee(ciMethod* method) {
   return builder()->CreateBitCast(
-    builder()->CreateInlineOop(method),
-    SharkType::Method*_type(),
-    "callee");
+    builder()->CreateInlineMetadata(method, SharkType::Method_type()),
+                                    SharkType::Method_type(),
+                                    "callee");
 }
 
 Value *SharkTopLevelBlock::get_virtual_callee(SharkValue* receiver,
@@ -1118,7 +1142,7 @@
   return builder()->CreateLoad(
     builder()->CreateArrayAddress(
       klass,
-      SharkType::Method*_type(),
+      SharkType::Method_type(),
       vtableEntry::size() * wordSize,
       in_ByteSize(InstanceKlass::vtable_start_offset() * wordSize),
       LLVMValue::intptr_constant(vtable_index)),
@@ -1136,7 +1160,7 @@
   // Locate the receiver's itable
   Value *object_klass = builder()->CreateValueOfStructEntry(
     receiver->jobject_value(), in_ByteSize(oopDesc::klass_offset_in_bytes()),
-    SharkType::oop_type(),
+    SharkType::klass_type(),
     "object_klass");
 
   Value *vtable_start = builder()->CreateAdd(
@@ -1169,12 +1193,12 @@
   }
 
   // Locate this interface's entry in the table
-  Value *iklass = builder()->CreateInlineOop(method->holder());
+  Value *iklass = builder()->CreateInlineMetadata(method->holder(), SharkType::klass_type());
   BasicBlock *loop_entry = builder()->GetInsertBlock();
   builder()->CreateBr(loop);
   builder()->SetInsertPoint(loop);
   PHINode *itable_entry_addr = builder()->CreatePHI(
-    SharkType::intptr_type(), "itable_entry_addr");
+    SharkType::intptr_type(), 0, "itable_entry_addr");
   itable_entry_addr->addIncoming(itable_start, loop_entry);
 
   Value *itable_entry = builder()->CreateIntToPtr(
@@ -1183,11 +1207,11 @@
   Value *itable_iklass = builder()->CreateValueOfStructEntry(
     itable_entry,
     in_ByteSize(itableOffsetEntry::interface_offset_in_bytes()),
-    SharkType::oop_type(),
+    SharkType::klass_type(),
     "itable_iklass");
 
   builder()->CreateCondBr(
-    builder()->CreateICmpEQ(itable_iklass, LLVMValue::null()),
+    builder()->CreateICmpEQ(itable_iklass, LLVMValue::nullKlass()),
     got_null, not_null);
 
   // A null entry means that the class doesn't implement the
@@ -1231,7 +1255,7 @@
             method->itable_index() * itableMethodEntry::size() * wordSize)),
         LLVMValue::intptr_constant(
           itableMethodEntry::method_offset_in_bytes())),
-      PointerType::getUnqual(SharkType::Method*_type())),
+      PointerType::getUnqual(SharkType::Method_type())),
     "callee");
 }
 
@@ -1243,7 +1267,9 @@
 
   // Find the method being called
   bool will_link;
-  ciMethod *dest_method = iter()->get_method(will_link);
+  ciSignature* sig;
+  ciMethod *dest_method = iter()->get_method(will_link, &sig);
+
   assert(will_link, "typeflow responsibility");
   assert(dest_method->is_static() == is_static, "must match bc");
 
@@ -1259,10 +1285,17 @@
   assert(holder_klass->is_interface() ||
          holder_klass->super() == NULL ||
          !is_interface, "must match bc");
+
+  bool is_forced_virtual = is_interface && holder_klass == java_lang_Object_klass();
+
   ciKlass *holder = iter()->get_declared_method_holder();
   ciInstanceKlass *klass =
     ciEnv::get_instance_klass_for_declared_method_holder(holder);
 
+  if (is_forced_virtual) {
+    klass = java_lang_Object_klass();
+  }
+
   // Find the receiver in the stack.  We do this before
   // trying to inline because the inliner can only use
   // zero-checked values, not being able to perform the
@@ -1287,14 +1320,15 @@
 
   // Try to inline the call
   if (!call_is_virtual) {
-    if (SharkInliner::attempt_inline(call_method, current_state()))
+    if (SharkInliner::attempt_inline(call_method, current_state())) {
       return;
+    }
   }
 
   // Find the method we are calling
   Value *callee;
   if (call_is_virtual) {
-    if (is_virtual) {
+    if (is_virtual || is_forced_virtual) {
       assert(klass->is_linked(), "scan_for_traps responsibility");
       int vtable_index = call_method->resolve_vtable_index(
         target()->holder(), klass);
@@ -1490,12 +1524,12 @@
 
   // Get the class we're checking against
   builder()->SetInsertPoint(not_null);
-  Value *check_klass = builder()->CreateInlineOop(klass);
+  Value *check_klass = builder()->CreateInlineMetadata(klass, SharkType::klass_type());
 
   // Get the class of the object being tested
   Value *object_klass = builder()->CreateValueOfStructEntry(
     object, in_ByteSize(oopDesc::klass_offset_in_bytes()),
-    SharkType::oop_type(),
+    SharkType::klass_type(),
     "object_klass");
 
   // Perform the check
@@ -1520,7 +1554,7 @@
   // First merge
   builder()->SetInsertPoint(merge1);
   PHINode *nonnull_result = builder()->CreatePHI(
-    SharkType::jint_type(), "nonnull_result");
+    SharkType::jint_type(), 0, "nonnull_result");
   nonnull_result->addIncoming(
     LLVMValue::jint_constant(IC_IS_INSTANCE), is_instance);
   nonnull_result->addIncoming(
@@ -1531,7 +1565,7 @@
   // Second merge
   builder()->SetInsertPoint(merge2);
   PHINode *result = builder()->CreatePHI(
-    SharkType::jint_type(), "result");
+    SharkType::jint_type(), 0, "result");
   result->addIncoming(LLVMValue::jint_constant(IC_IS_NULL), null_block);
   result->addIncoming(nonnull_result, nonnull_block);
 
@@ -1698,7 +1732,7 @@
     heap_object = builder()->CreateIntToPtr(
       old_top, SharkType::oop_type(), "heap_object");
 
-    Value *check = builder()->CreateCmpxchgPtr(new_top, top_addr, old_top);
+    Value *check = builder()->CreateAtomicCmpXchg(top_addr, old_top, new_top, llvm::SequentiallyConsistent);
     builder()->CreateCondBr(
       builder()->CreateICmpEQ(old_top, check),
       initialize, retry);
@@ -1707,7 +1741,7 @@
     builder()->SetInsertPoint(initialize);
     if (tlab_object) {
       PHINode *phi = builder()->CreatePHI(
-        SharkType::oop_type(), "fast_object");
+        SharkType::oop_type(), 0, "fast_object");
       phi->addIncoming(tlab_object, got_tlab);
       phi->addIncoming(heap_object, got_heap);
       fast_object = phi;
@@ -1730,7 +1764,7 @@
 
     Value *klass_addr = builder()->CreateAddressOfStructEntry(
       fast_object, in_ByteSize(oopDesc::klass_offset_in_bytes()),
-      PointerType::getUnqual(SharkType::oop_type()),
+      PointerType::getUnqual(SharkType::klass_type()),
       "klass_addr");
 
     // Set the mark
@@ -1744,7 +1778,7 @@
     builder()->CreateStore(LLVMValue::intptr_constant(mark), mark_addr);
 
     // Set the class
-    Value *rtklass = builder()->CreateInlineOop(klass);
+    Value *rtklass = builder()->CreateInlineMetadata(klass, SharkType::klass_type());
     builder()->CreateStore(rtklass, klass_addr);
     got_fast = builder()->GetInsertBlock();
 
@@ -1767,7 +1801,7 @@
     builder()->SetInsertPoint(push_object);
   }
   if (fast_object) {
-    PHINode *phi = builder()->CreatePHI(SharkType::oop_type(), "object");
+    PHINode *phi = builder()->CreatePHI(SharkType::oop_type(), 0, "object");
     phi->addIncoming(fast_object, got_fast);
     phi->addIncoming(slow_object, got_slow);
     object = phi;
@@ -1849,8 +1883,9 @@
 
 void SharkTopLevelBlock::acquire_method_lock() {
   Value *lockee;
-  if (target()->is_static())
+  if (target()->is_static()) {
     lockee = builder()->CreateInlineOop(target()->holder()->java_mirror());
+  }
   else
     lockee = local(0)->jobject_value();
 
@@ -1898,7 +1933,7 @@
 
   Value *lock = builder()->CreatePtrToInt(
     monitor_header_addr, SharkType::intptr_type());
-  Value *check = builder()->CreateCmpxchgPtr(lock, mark_addr, disp);
+  Value *check = builder()->CreateAtomicCmpXchg(mark_addr, disp, lock, llvm::Acquire);
   builder()->CreateCondBr(
     builder()->CreateICmpEQ(disp, check),
     acquired_fast, try_recursive);
@@ -1983,7 +2018,7 @@
     PointerType::getUnqual(SharkType::intptr_type()),
     "mark_addr");
 
-  Value *check = builder()->CreateCmpxchgPtr(disp, mark_addr, lock);
+  Value *check = builder()->CreateAtomicCmpXchg(mark_addr, lock, disp, llvm::Release);
   builder()->CreateCondBr(
     builder()->CreateICmpEQ(lock, check),
     released_fast, slow_path);
--- a/src/share/vm/shark/sharkTopLevelBlock.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/shark/sharkTopLevelBlock.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -290,7 +290,7 @@
                           int           exception_action) {
     decache_for_VM_call();
     stack()->CreateSetLastJavaFrame();
-    llvm::CallInst *res = builder()->CreateCall(callee, args_start, args_end);
+    llvm::CallInst *res = builder()->CreateCall(callee, llvm::makeArrayRef(args_start, args_end));
     stack()->CreateResetLastJavaFrame();
     cache_after_VM_call();
     if (exception_action & EAM_CHECK) {
--- a/src/share/vm/shark/sharkType.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/shark/sharkType.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -40,82 +40,85 @@
 
   // Basic types
  public:
-  static const llvm::Type* void_type() {
+  static llvm::Type* void_type() {
     return context().void_type();
   }
-  static const llvm::IntegerType* bit_type() {
+  static llvm::IntegerType* bit_type() {
     return context().bit_type();
   }
-  static const llvm::IntegerType* jbyte_type() {
+  static llvm::IntegerType* jbyte_type() {
     return context().jbyte_type();
   }
-  static const llvm::IntegerType* jshort_type() {
+  static llvm::IntegerType* jshort_type() {
     return context().jshort_type();
   }
-  static const llvm::IntegerType* jint_type() {
+  static llvm::IntegerType* jint_type() {
     return context().jint_type();
   }
-  static const llvm::IntegerType* jlong_type() {
+  static llvm::IntegerType* jlong_type() {
     return context().jlong_type();
   }
-  static const llvm::Type* jfloat_type() {
+  static llvm::Type* jfloat_type() {
     return context().jfloat_type();
   }
-  static const llvm::Type* jdouble_type() {
+  static llvm::Type* jdouble_type() {
     return context().jdouble_type();
   }
-  static const llvm::IntegerType* intptr_type() {
+  static llvm::IntegerType* intptr_type() {
     return context().intptr_type();
   }
 
   // Compound types
  public:
-  static const llvm::PointerType* itableOffsetEntry_type() {
+  static llvm::PointerType* itableOffsetEntry_type() {
     return context().itableOffsetEntry_type();
   }
-  static const llvm::PointerType* jniEnv_type() {
+  static llvm::PointerType* jniEnv_type() {
     return context().jniEnv_type();
   }
-  static const llvm::PointerType* jniHandleBlock_type() {
+  static llvm::PointerType* jniHandleBlock_type() {
     return context().jniHandleBlock_type();
   }
-  static const llvm::PointerType* klass_type() {
+  static llvm::PointerType* Metadata_type() {
+    return context().Metadata_type();
+  }
+  static llvm::PointerType* klass_type() {
     return context().klass_type();
   }
-  static const llvm::PointerType* Method*_type() {
-    return context().Method*_type();
+  static llvm::PointerType* Method_type() {
+    return context().Method_type();
   }
-  static const llvm::ArrayType* monitor_type() {
+  static llvm::ArrayType* monitor_type() {
     return context().monitor_type();
   }
-  static const llvm::PointerType* oop_type() {
+  static llvm::PointerType* oop_type() {
     return context().oop_type();
   }
-  static const llvm::PointerType* thread_type() {
+  static llvm::PointerType* thread_type() {
     return context().thread_type();
   }
-  static const llvm::PointerType* zeroStack_type() {
+  static llvm::PointerType* zeroStack_type() {
     return context().zeroStack_type();
   }
-  static const llvm::FunctionType* entry_point_type() {
+  static llvm::FunctionType* entry_point_type() {
     return context().entry_point_type();
   }
-  static const llvm::FunctionType* osr_entry_point_type() {
+  static llvm::FunctionType* osr_entry_point_type() {
     return context().osr_entry_point_type();
   }
 
   // Mappings
  public:
-  static const llvm::Type* to_stackType(BasicType type) {
+  static llvm::Type* to_stackType(BasicType type) {
     return context().to_stackType(type);
   }
-  static const llvm::Type* to_stackType(ciType* type) {
+  static llvm::Type* to_stackType(ciType* type) {
     return to_stackType(type->basic_type());
   }
-  static const llvm::Type* to_arrayType(BasicType type) {
+  static llvm::Type* to_arrayType(BasicType type) {
     return context().to_arrayType(type);
   }
-  static const llvm::Type* to_arrayType(ciType* type) {
+  static llvm::Type* to_arrayType(ciType* type) {
     return to_arrayType(type->basic_type());
   }
 };
--- a/src/share/vm/shark/sharkValue.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/shark/sharkValue.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -233,7 +233,7 @@
   assert(type() == other->type(), "should be");
   assert(zero_checked() == other->zero_checked(), "should be");
 
-  PHINode *phi = builder->CreatePHI(SharkType::to_stackType(type()), name);
+  PHINode *phi = builder->CreatePHI(SharkType::to_stackType(type()), 0, name);
   phi->addIncoming(this->generic_value(), this_block);
   phi->addIncoming(other->generic_value(), other_block);
   return SharkValue::create_generic(type(), phi, zero_checked());
--- a/src/share/vm/shark/shark_globals.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/shark/shark_globals.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -40,6 +40,12 @@
   product(intx, SharkMaxInlineSize, 32,                                       \
           "Maximum bytecode size of methods to inline when using Shark")      \
                                                                               \
+  product(bool, EliminateNestedLocks, true,                                   \
+          "Eliminate nested locks of the same object when possible")          \
+                                                                              \
+  product(ccstr, SharkOptimizationLevel, "Default",                           \
+          "The optimization level passed to LLVM, possible values: None, Less, Default and Agressive") \
+                                                                              \
   /* compiler debugging */                                                    \
   develop(ccstr, SharkPrintTypeflowOf, NULL,                                  \
           "Print the typeflow of the specified method")                       \
@@ -58,6 +64,10 @@
                                                                               \
   diagnostic(bool, SharkPerformanceWarnings, false,                           \
           "Warn about things that could be made faster")                      \
+                                                                              \
+  develop(ccstr, SharkVerifyFunction, NULL,                                   \
+          "Runs LLVM verify over LLVM IR")                                    \
+
 
 SHARK_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_NOTPRODUCT_FLAG)
 
--- a/src/share/vm/utilities/array.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/utilities/array.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -24,19 +24,8 @@
 
 #include "precompiled.hpp"
 #include "memory/resourceArea.hpp"
+#include "runtime/thread.inline.hpp"
 #include "utilities/array.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 
 
 #ifdef ASSERT
--- a/src/share/vm/utilities/array.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/utilities/array.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -353,9 +353,9 @@
   // sort the array.
   bool contains(const T& x) const      { return index_of(x) >= 0; }
 
-  T    at(int i) const                 { return _data[i]; }
-  void at_put(const int i, const T& x) { _data[i] = x; }
-  T*   adr_at(const int i)             { return &_data[i]; }
+  T    at(int i) const                 { assert(i >= 0 && i< _length, err_msg_res("oob: 0 <= %d < %d", i, _length)); return _data[i]; }
+  void at_put(const int i, const T& x) { assert(i >= 0 && i< _length, err_msg_res("oob: 0 <= %d < %d", i, _length)); _data[i] = x; }
+  T*   adr_at(const int i)             { assert(i >= 0 && i< _length, err_msg_res("oob: 0 <= %d < %d", i, _length)); return &_data[i]; }
   int  find(const T& x)                { return index_of(x); }
 
   T at_acquire(const int which)              { return OrderAccess::load_acquire(adr_at(which)); }
--- a/src/share/vm/utilities/debug.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/utilities/debug.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -44,6 +44,7 @@
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubCodeGenerator.hpp"
 #include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/vframe.hpp"
 #include "services/heapDumper.hpp"
 #include "utilities/defaultStream.hpp"
@@ -52,19 +53,15 @@
 #include "utilities/vmError.hpp"
 #ifdef TARGET_OS_FAMILY_linux
 # include "os_linux.inline.hpp"
-# include "thread_linux.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_solaris
 # include "os_solaris.inline.hpp"
-# include "thread_solaris.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_windows
 # include "os_windows.inline.hpp"
-# include "thread_windows.inline.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_bsd
 # include "os_bsd.inline.hpp"
-# include "thread_bsd.inline.hpp"
 #endif
 
 #ifndef ASSERT
@@ -615,21 +612,6 @@
   Events::print();
 }
 
-// Given a heap address that was valid before the most recent GC, if
-// the oop that used to contain it is still live, prints the new
-// location of the oop and the address. Useful for tracking down
-// certain kinds of naked oop and oop map bugs.
-extern "C" void pnl(intptr_t old_heap_addr) {
-  // Print New Location of old heap address
-  Command c("pnl");
-#ifndef VALIDATE_MARK_SWEEP
-  tty->print_cr("Requires build with VALIDATE_MARK_SWEEP defined (debug build) and RecordMarkSweepCompaction enabled");
-#else
-  MarkSweep::print_new_location_of_heap_address((HeapWord*) old_heap_addr);
-#endif
-}
-
-
 extern "C" Method* findm(intptr_t pc) {
   Command c("findm");
   nmethod* nm = CodeCache::find_nmethod((address)pc);
--- a/src/share/vm/utilities/events.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/utilities/events.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -26,22 +26,11 @@
 #include "memory/allocation.inline.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/osThread.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/threadCritical.hpp"
 #include "runtime/threadLocalStorage.hpp"
 #include "runtime/timer.hpp"
 #include "utilities/events.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 
 
 EventLog* Events::_logs = NULL;
--- a/src/share/vm/utilities/events.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/utilities/events.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -135,11 +135,11 @@
 };
 
 // A simple wrapper class for fixed size text messages.
-class StringLogMessage : public FormatBuffer<132> {
+class StringLogMessage : public FormatBuffer<256> {
  public:
   // Wrap this buffer in a stringStream.
   stringStream stream() {
-    return stringStream(_buf, sizeof(_buf));
+    return stringStream(_buf, size());
   }
 };
 
--- a/src/share/vm/utilities/exceptions.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/utilities/exceptions.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -30,21 +30,10 @@
 #include "runtime/init.hpp"
 #include "runtime/java.hpp"
 #include "runtime/javaCalls.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/threadCritical.hpp"
 #include "utilities/events.hpp"
 #include "utilities/exceptions.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 
 
 // Implementation of ThreadShadow
--- a/src/share/vm/utilities/exceptions.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/utilities/exceptions.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -267,6 +267,7 @@
 #define THROW_WRAPPED_0(name, oop_to_wrap)  THROW_WRAPPED_(name, oop_to_wrap, 0)
 #define THROW_ARG_0(name, signature, arg)   THROW_ARG_(name, signature, arg, 0)
 #define THROW_MSG_CAUSE_0(name, message, cause) THROW_MSG_CAUSE_(name, message, cause, 0)
+#define THROW_MSG_CAUSE_NULL(name, message, cause) THROW_MSG_CAUSE_(name, message, cause, NULL)
 
 #define THROW_NULL(name)                    THROW_(name, NULL)
 #define THROW_MSG_NULL(name, message)       THROW_MSG_(name, message, NULL)
--- a/src/share/vm/utilities/globalDefinitions.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/utilities/globalDefinitions.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -328,6 +328,12 @@
 
 
 //----------------------------------------------------------------------------------------------------
+// Minimum StringTableSize value
+
+const int defaultStringTableSize=1009;
+
+
+//----------------------------------------------------------------------------------------------------
 // HotSwap - for JVMTI   aka Class File Replacement and PopFrame
 //
 // Determines whether on-the-fly class replacement and frame popping are enabled.
@@ -1244,6 +1250,14 @@
 
 #define PTR64_FORMAT           "0x%016" PRIx64
 
+// Format jlong, if necessary
+#ifndef JLONG_FORMAT
+#define JLONG_FORMAT           INT64_FORMAT
+#endif
+#ifndef JULONG_FORMAT
+#define JULONG_FORMAT          UINT64_FORMAT
+#endif
+
 // Format pointers which change size between 32- and 64-bit.
 #ifdef  _LP64
 #define INTPTR_FORMAT "0x%016" PRIxPTR
@@ -1274,4 +1288,12 @@
 
 #define ARRAY_SIZE(array) (sizeof(array)/sizeof((array)[0]))
 
+// Dereference vptr
+// All C++ compilers that we know of have the vtbl pointer in the first
+// word.  If there are exceptions, this function needs to be made compiler
+// specific.
+static inline void* dereference_vptr(void* addr) {
+  return *(void**)addr;
+}
+
 #endif // SHARE_VM_UTILITIES_GLOBALDEFINITIONS_HPP
--- a/src/share/vm/utilities/globalDefinitions_gcc.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/utilities/globalDefinitions_gcc.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -306,4 +306,8 @@
 #endif
 #define offsetof(klass,field) offset_of(klass,field)
 
+#if defined(_LP64) && defined(__APPLE__)
+#define JLONG_FORMAT           "%ld"
+#endif // _LP64 && __APPLE__
+
 #endif // SHARE_VM_UTILITIES_GLOBALDEFINITIONS_GCC_HPP
--- a/src/share/vm/utilities/growableArray.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/utilities/growableArray.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -24,19 +24,9 @@
 
 #include "precompiled.hpp"
 #include "memory/resourceArea.hpp"
+#include "runtime/thread.inline.hpp"
 #include "utilities/growableArray.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
+
 #ifdef ASSERT
 void GenericGrowableArray::set_nesting() {
   if (on_stack()) {
--- a/src/share/vm/utilities/growableArray.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/utilities/growableArray.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -217,7 +217,12 @@
     return missed;
   }
 
-  E at(int i) const {
+  E& at(int i) {
+    assert(0 <= i && i < _len, "illegal index");
+    return _data[i];
+  }
+
+  E const& at(int i) const {
     assert(0 <= i && i < _len, "illegal index");
     return _data[i];
   }
--- a/src/share/vm/utilities/macros.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/utilities/macros.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -282,6 +282,22 @@
 #define NOT_WIN64(code) code
 #endif
 
+#if defined(ZERO)
+#define ZERO_ONLY(code) code
+#define NOT_ZERO(code)
+#else
+#define ZERO_ONLY(code)
+#define NOT_ZERO(code) code
+#endif
+
+#if defined(SHARK)
+#define SHARK_ONLY(code) code
+#define NOT_SHARK(code)
+#else
+#define SHARK_ONLY(code)
+#define NOT_SHARK(code) code
+#endif
+
 #if defined(IA32) || defined(AMD64)
 #define X86
 #define X86_ONLY(code) code
--- a/src/share/vm/utilities/ostream.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/utilities/ostream.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -243,13 +243,11 @@
 }
 
 void outputStream::print_jlong(jlong value) {
-  // N.B. Same as INT64_FORMAT
-  print(os::jlong_format_specifier(), value);
+  print(JLONG_FORMAT, value);
 }
 
 void outputStream::print_julong(julong value) {
-  // N.B. Same as UINT64_FORMAT
-  print(os::julong_format_specifier(), value);
+  print(JULONG_FORMAT, value);
 }
 
 /**
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/utilities/pair.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_UTILITIES_PAIR_HPP
+#define SHARE_VM_UTILITIES_PAIR_HPP
+
+#include "memory/allocation.hpp"
+#include "utilities/top.hpp"
+
+template<typename T, typename V,  typename ALLOC_BASE = ResourceObj>
+class Pair : public ALLOC_BASE {
+ public:
+  T first;
+  V second;
+
+  Pair() {}
+  Pair(T t, V v) : first(t), second(v) {}
+};
+
+
+#endif // SHARE_VM_UTILITIES_PAIR_HPP
--- a/src/share/vm/utilities/preserveException.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/utilities/preserveException.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -26,18 +26,7 @@
 #define SHARE_VM_UTILITIES_PRESERVEEXCEPTION_HPP
 
 #include "runtime/handles.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
+#include "runtime/thread.inline.hpp"
 
 // This file provides more support for exception handling; see also exceptions.hpp
 class PreserveExceptionMark {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/utilities/resourceHash.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_UTILITIES_RESOURCEHASH_HPP
+#define SHARE_VM_UTILITIES_RESOURCEHASH_HPP
+
+#include "memory/allocation.hpp"
+#include "utilities/top.hpp"
+
+template<typename K> struct ResourceHashtableFns {
+    typedef unsigned (*hash_fn)(K const&);
+    typedef bool (*equals_fn)(K const&, K const&);
+};
+
+template<typename K> unsigned primitive_hash(const K& k) {
+  unsigned hash = (unsigned)((uintptr_t)k);
+  return hash ^ (hash > 3); // just in case we're dealing with aligned ptrs
+}
+
+template<typename K> bool primitive_equals(const K& k0, const K& k1) {
+  return k0 == k1;
+}
+
+template<
+    typename K, typename V,
+    typename ResourceHashtableFns<K>::hash_fn   HASH   = primitive_hash<K>,
+    typename ResourceHashtableFns<K>::equals_fn EQUALS = primitive_equals<K>,
+    unsigned SIZE = 256
+    >
+class ResourceHashtable : public ResourceObj {
+ private:
+
+  class Node : public ResourceObj {
+   public:
+    unsigned _hash;
+    K _key;
+    V _value;
+    Node* _next;
+
+    Node(unsigned hash, K const& key, V const& value) :
+        _hash(hash), _key(key), _value(value), _next(NULL) {}
+  };
+
+  Node* _table[SIZE];
+
+  // Returns a pointer to where the node where the value would reside if
+  // it's in the table.
+  Node** lookup_node(unsigned hash, K const& key) {
+    unsigned index = hash % SIZE;
+    Node** ptr = &_table[index];
+    while (*ptr != NULL) {
+      Node* node = *ptr;
+      if (node->_hash == hash && EQUALS(key, node->_key)) {
+        break;
+      }
+      ptr = &(node->_next);
+    }
+    return ptr;
+  }
+
+  Node const** lookup_node(unsigned hash, K const& key) const {
+    return const_cast<Node const**>(
+        const_cast<ResourceHashtable*>(this)->lookup_node(hash, key));
+  }
+
+ public:
+  ResourceHashtable() { memset(_table, 0, SIZE * sizeof(Node*)); }
+
+  bool contains(K const& key) const {
+    return get(key) != NULL;
+  }
+
+  V* get(K const& key) const {
+    unsigned hv = HASH(key);
+    Node const** ptr = lookup_node(hv, key);
+    if (*ptr != NULL) {
+      return const_cast<V*>(&(*ptr)->_value);
+    } else {
+      return NULL;
+    }
+  }
+
+  // Inserts or replaces a value in the table
+  void put(K const& key, V const& value) {
+    unsigned hv = HASH(key);
+    Node** ptr = lookup_node(hv, key);
+    if (*ptr != NULL) {
+      (*ptr)->_value = value;
+    } else {
+      *ptr = new Node(hv, key, value);
+    }
+  }
+
+  // ITER contains bool do_entry(K const&, V const&), which will be
+  // called for each entry in the table.  If do_entry() returns false,
+  // the iteration is cancelled.
+  template<class ITER>
+  void iterate(ITER* iter) const {
+    Node* const* bucket = _table;
+    while (bucket < &_table[SIZE]) {
+      Node* node = *bucket;
+      while (node != NULL) {
+        bool cont = iter->do_entry(node->_key, node->_value);
+        if (!cont) { return; }
+        node = node->_next;
+      }
+      ++bucket;
+    }
+  }
+};
+
+
+#endif // SHARE_VM_UTILITIES_RESOURCEHASH_HPP
--- a/src/share/vm/utilities/taskqueue.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/utilities/taskqueue.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,21 +25,10 @@
 #include "precompiled.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/os.hpp"
+#include "runtime/thread.inline.hpp"
 #include "utilities/debug.hpp"
 #include "utilities/stack.inline.hpp"
 #include "utilities/taskqueue.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 
 #ifdef TRACESPINNING
 uint ParallelTaskTerminator::_total_yields = 0;
@@ -250,8 +239,8 @@
 
 #ifdef TRACESPINNING
 void ParallelTaskTerminator::print_termination_counts() {
-  gclog_or_tty->print_cr("ParallelTaskTerminator Total yields: %lld  "
-    "Total spins: %lld  Total peeks: %lld",
+  gclog_or_tty->print_cr("ParallelTaskTerminator Total yields: " UINT32_FORMAT
+    " Total spins: " UINT32_FORMAT " Total peeks: " UINT32_FORMAT,
     total_yields(),
     total_spins(),
     total_peeks());
--- a/src/share/vm/utilities/taskqueue.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/utilities/taskqueue.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -496,9 +496,7 @@
     }
   }
 
-  bool steal_1_random(uint queue_num, int* seed, E& t);
   bool steal_best_of_2(uint queue_num, int* seed, E& t);
-  bool steal_best_of_all(uint queue_num, int* seed, E& t);
 
   void register_queue(uint i, T* q);
 
@@ -538,46 +536,6 @@
 }
 
 template<class T, MEMFLAGS F> bool
-GenericTaskQueueSet<T, F>::steal_best_of_all(uint queue_num, int* seed, E& t) {
-  if (_n > 2) {
-    int best_k;
-    uint best_sz = 0;
-    for (uint k = 0; k < _n; k++) {
-      if (k == queue_num) continue;
-      uint sz = _queues[k]->size();
-      if (sz > best_sz) {
-        best_sz = sz;
-        best_k = k;
-      }
-    }
-    return best_sz > 0 && _queues[best_k]->pop_global(t);
-  } else if (_n == 2) {
-    // Just try the other one.
-    int k = (queue_num + 1) % 2;
-    return _queues[k]->pop_global(t);
-  } else {
-    assert(_n == 1, "can't be zero.");
-    return false;
-  }
-}
-
-template<class T, MEMFLAGS F> bool
-GenericTaskQueueSet<T, F>::steal_1_random(uint queue_num, int* seed, E& t) {
-  if (_n > 2) {
-    uint k = queue_num;
-    while (k == queue_num) k = TaskQueueSetSuper::randomParkAndMiller(seed) % _n;
-    return _queues[2]->pop_global(t);
-  } else if (_n == 2) {
-    // Just try the other one.
-    int k = (queue_num + 1) % 2;
-    return _queues[k]->pop_global(t);
-  } else {
-    assert(_n == 1, "can't be zero.");
-    return false;
-  }
-}
-
-template<class T, MEMFLAGS F> bool
 GenericTaskQueueSet<T, F>::steal_best_of_2(uint queue_num, int* seed, E& t) {
   if (_n > 2) {
     uint k1 = queue_num;
--- a/src/share/vm/utilities/utf8.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/utilities/utf8.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -147,7 +147,7 @@
 
 void UTF8::convert_to_unicode(const char* utf8_str, jchar* unicode_str, int unicode_length) {
   unsigned char ch;
-  const char *ptr = (const char *)utf8_str;
+  const char *ptr = utf8_str;
   int index = 0;
 
   /* ASCII case loop optimization */
@@ -162,6 +162,119 @@
   }
 }
 
+// returns the quoted ascii length of a 0-terminated utf8 string
+int UTF8::quoted_ascii_length(const char* utf8_str, int utf8_length) {
+  const char *ptr = utf8_str;
+  const char* end = ptr + utf8_length;
+  int result = 0;
+  while (ptr < end) {
+    jchar c;
+    ptr = UTF8::next(ptr, &c);
+    if (c >= 32 && c < 127) {
+      result++;
+    } else {
+      result += 6;
+    }
+  }
+  return result;
+}
+
+// converts a utf8 string to quoted ascii
+void UTF8::as_quoted_ascii(const char* utf8_str, char* buf, int buflen) {
+  const char *ptr = utf8_str;
+  char* p = buf;
+  char* end = buf + buflen;
+  while (*ptr != '\0') {
+    jchar c;
+    ptr = UTF8::next(ptr, &c);
+    if (c >= 32 && c < 127) {
+      if (p + 1 >= end) break;      // string is truncated
+      *p++ = (char)c;
+    } else {
+      if (p + 6 >= end) break;      // string is truncated
+      sprintf(p, "\\u%04x", c);
+      p += 6;
+    }
+  }
+  *p = '\0';
+}
+
+
+const char* UTF8::from_quoted_ascii(const char* quoted_ascii_str) {
+  const char *ptr = quoted_ascii_str;
+  char* result = NULL;
+  while (*ptr != '\0') {
+    char c = *ptr;
+    if (c < 32 || c >= 127) break;
+  }
+  if (*ptr == '\0') {
+    // nothing to do so return original string
+    return quoted_ascii_str;
+  }
+  // everything up to this point was ok.
+  int length = ptr - quoted_ascii_str;
+  char* buffer = NULL;
+  for (int round = 0; round < 2; round++) {
+    while (*ptr != '\0') {
+      if (*ptr != '\\') {
+        if (buffer != NULL) {
+          buffer[length] = *ptr;
+        }
+        length++;
+      } else {
+        switch (ptr[1]) {
+          case 'u': {
+            ptr += 2;
+            jchar value=0;
+            for (int i=0; i<4; i++) {
+              char c = *ptr++;
+              switch (c) {
+                case '0': case '1': case '2': case '3': case '4':
+                case '5': case '6': case '7': case '8': case '9':
+                  value = (value << 4) + c - '0';
+                  break;
+                case 'a': case 'b': case 'c':
+                case 'd': case 'e': case 'f':
+                  value = (value << 4) + 10 + c - 'a';
+                  break;
+                case 'A': case 'B': case 'C':
+                case 'D': case 'E': case 'F':
+                  value = (value << 4) + 10 + c - 'A';
+                  break;
+                default:
+                  ShouldNotReachHere();
+              }
+            }
+            if (buffer == NULL) {
+              char utf8_buffer[4];
+              char* next = (char*)utf8_write((u_char*)utf8_buffer, value);
+              length += next - utf8_buffer;
+            } else {
+              char* next = (char*)utf8_write((u_char*)&buffer[length], value);
+              length += next - &buffer[length];
+            }
+            break;
+          }
+          case 't': if (buffer != NULL) buffer[length] = '\t'; ptr += 2; length++; break;
+          case 'n': if (buffer != NULL) buffer[length] = '\n'; ptr += 2; length++; break;
+          case 'r': if (buffer != NULL) buffer[length] = '\r'; ptr += 2; length++; break;
+          case 'f': if (buffer != NULL) buffer[length] = '\f'; ptr += 2; length++; break;
+          default:
+            ShouldNotReachHere();
+        }
+      }
+    }
+    if (round == 0) {
+      buffer = NEW_RESOURCE_ARRAY(char, length + 1);
+      ptr = quoted_ascii_str;
+    } else {
+      buffer[length] = '\0';
+    }
+  }
+  return buffer;
+}
+
+
 // Returns NULL if 'c' it not found. This only works as long
 // as 'c' is an ASCII character
 const jbyte* UTF8::strrchr(const jbyte* base, int length, jbyte c) {
@@ -242,3 +355,35 @@
   }
   *utf8_buffer = '\0';
 }
+
+// returns the quoted ascii length of a unicode string
+int UNICODE::quoted_ascii_length(jchar* base, int length) {
+  int result = 0;
+  for (int i = 0; i < length; i++) {
+    jchar c = base[i];
+    if (c >= 32 && c < 127) {
+      result++;
+    } else {
+      result += 6;
+    }
+  }
+  return result;
+}
+
+// converts a utf8 string to quoted ascii
+void UNICODE::as_quoted_ascii(const jchar* base, int length, char* buf, int buflen) {
+  char* p = buf;
+  char* end = buf + buflen;
+  for (int index = 0; index < length; index++) {
+    jchar c = base[index];
+    if (c >= 32 && c < 127) {
+      if (p + 1 >= end) break;      // string is truncated
+      *p++ = (char)c;
+    } else {
+      if (p + 6 >= end) break;      // string is truncated
+      sprintf(p, "\\u%04x", c);
+      p += 6;
+    }
+  }
+  *p = '\0';
+}
--- a/src/share/vm/utilities/utf8.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/utilities/utf8.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -32,22 +32,32 @@
 
 class UTF8 : AllStatic {
  public:
-  // returns the unicode length of a 0-terminated uft8 string
-  static int unicode_length(const char* uft8_str);
+  // returns the unicode length of a 0-terminated utf8 string
+  static int unicode_length(const char* utf8_str);
 
-  // returns the unicode length of a non-0-terminated uft8 string
-  static int unicode_length(const char* uft8_str, int len);
+  // returns the unicode length of a non-0-terminated utf8 string
+  static int unicode_length(const char* utf8_str, int len);
 
-  // converts a uft8 string to a unicode string
+  // converts a utf8 string to a unicode string
   static void convert_to_unicode(const char* utf8_str, jchar* unicode_buffer, int unicode_length);
 
+  // returns the quoted ascii length of a utf8 string
+  static int quoted_ascii_length(const char* utf8_str, int utf8_length);
+
+  // converts a utf8 string to quoted ascii
+  static void as_quoted_ascii(const char* utf8_str, char* buf, int buflen);
+
+  // converts a quoted ascii string to utf8 string.  returns the original
+  // string unchanged if nothing needs to be done.
+  static const char* from_quoted_ascii(const char* quoted_ascii_string);
+
   // decodes the current utf8 character, stores the result in value,
-  // and returns the end of the current uft8 chararacter.
+  // and returns the end of the current utf8 chararacter.
   static char* next(const char* str, jchar* value);
 
   // decodes the current utf8 character, gets the supplementary character instead of
   // the surrogate pair when seeing a supplementary character in string,
-  // stores the result in value, and returns the end of the current uft8 chararacter.
+  // stores the result in value, and returns the end of the current utf8 chararacter.
   static char* next_character(const char* str, jint* value);
 
   // Utility methods
@@ -79,6 +89,12 @@
   // in resource area unless a buffer is provided.
   static char* as_utf8(jchar* base, int length);
   static char* as_utf8(jchar* base, int length, char* buf, int buflen);
+
+  // returns the quoted ascii length of a unicode string
+  static int quoted_ascii_length(jchar* base, int length);
+
+  // converts a utf8 string to quoted ascii
+  static void as_quoted_ascii(const jchar* base, int length, char* buf, int buflen);
 };
 
 #endif // SHARE_VM_UTILITIES_UTF8_HPP
--- a/src/share/vm/utilities/vmError.cpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/utilities/vmError.cpp	Sun Feb 03 22:43:57 2013 +0100
@@ -453,7 +453,9 @@
      JDK_Version::current().to_string(buf, sizeof(buf));
      const char* runtime_name = JDK_Version::runtime_name() != NULL ?
                                   JDK_Version::runtime_name() : "";
-     st->print_cr("# JRE version: %s (%s)", runtime_name, buf);
+     const char* runtime_version = JDK_Version::runtime_version() != NULL ?
+                                  JDK_Version::runtime_version() : "";
+     st->print_cr("# JRE version: %s (%s) (build %s)", runtime_name, buf, runtime_version);
      st->print_cr("# Java VM: %s (%s %s %s %s)",
                    Abstract_VM_Version::vm_name(),
                    Abstract_VM_Version::vm_release(),
@@ -700,7 +702,7 @@
 
      if (_verbose && Universe::is_fully_initialized()) {
        // print code cache information before vm abort
-       CodeCache::print_bounds(st);
+       CodeCache::print_summary(st);
        st->cr();
      }
 
@@ -1007,6 +1009,15 @@
     OnError = NULL;
   }
 
+  static bool skip_replay = false;
+  if (DumpReplayDataOnError && _thread && _thread->is_Compiler_thread() && !skip_replay) {
+    skip_replay = true;
+    ciEnv* env = ciEnv::current();
+    if (env != NULL) {
+      env->dump_replay_data();
+    }
+  }
+
   static bool skip_bug_url = !should_report_bug(first_error->_id);
   if (!skip_bug_url) {
     skip_bug_url = true;
--- a/src/share/vm/utilities/workgroup.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/utilities/workgroup.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -25,19 +25,8 @@
 #ifndef SHARE_VM_UTILITIES_WORKGROUP_HPP
 #define SHARE_VM_UTILITIES_WORKGROUP_HPP
 
+#include "runtime/thread.inline.hpp"
 #include "utilities/taskqueue.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "thread_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "thread_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "thread_bsd.inline.hpp"
-#endif
 
 // Task class hierarchy:
 //   AbstractGangTask
@@ -101,7 +90,7 @@
     NOT_PRODUCT(_name = name);
     _counter = 0;
   }
-  virtual ~AbstractGangTask() { }
+  ~AbstractGangTask() { }
 
 public:
 };
--- a/src/share/vm/utilities/yieldingWorkgroup.hpp	Mon Nov 05 17:03:33 2012 -0500
+++ b/src/share/vm/utilities/yieldingWorkgroup.hpp	Sun Feb 03 22:43:57 2013 +0100
@@ -106,7 +106,7 @@
     _status(INACTIVE),
     _gang(NULL) { }
 
-  virtual ~YieldingFlexibleGangTask() { }
+  ~YieldingFlexibleGangTask() { }
 
   friend class YieldingFlexibleWorkGang;
   friend class YieldingFlexibleGangWorker;
--- a/test/Makefile	Mon Nov 05 17:03:33 2012 -0500
+++ b/test/Makefile	Sun Feb 03 22:43:57 2013 +0100
@@ -189,9 +189,9 @@
 	$(JTREG) -a -v:fail,error               \
           $(JTREG_KEY_OPTION)                   \
           $(EXTRA_JTREG_OPTIONS)                \
-          -r:$(ABS_TEST_OUTPUT_DIR)/JTreport    \
-          -w:$(ABS_TEST_OUTPUT_DIR)/JTwork      \
-          -jdk:$(PRODUCT_HOME)                  \
+          -r:$(shell $(GETMIXEDPATH) "$(ABS_TEST_OUTPUT_DIR)")/JTreport    \
+          -w:$(shell $(GETMIXEDPATH) "$(ABS_TEST_OUTPUT_DIR)")/JTwork      \
+          -jdk:$(shell $(GETMIXEDPATH) "$(PRODUCT_HOME)")                  \
           $(JAVA_OPTIONS:%=-vmoption:%)         \
           $(JTREG_TESTDIRS)                     \
 	  || $(BUNDLE_UP_FAILED)
--- a/test/compiler/6340864/TestByteVect.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/test/compiler/6340864/TestByteVect.java	Sun Feb 03 22:43:57 2013 +0100
@@ -33,7 +33,7 @@
 public class TestByteVect {
   private static final int ARRLEN = 997;
   private static final int ITERS  = 11000;
-  private static final int ADD_INIT = 0;
+  private static final int ADD_INIT = 63;
   private static final int BIT_MASK = 0xB7;
   private static final int VALUE = 3;
   private static final int SHIFT = 8;
@@ -76,6 +76,7 @@
       test_subc(a0, a1);
       test_subv(a0, a1, (byte)VALUE);
       test_suba(a0, a1, a2);
+
       test_mulc(a0, a1);
       test_mulv(a0, a1, (byte)VALUE);
       test_mula(a0, a1, a2);
@@ -88,6 +89,7 @@
       test_divc_n(a0, a1);
       test_divv(a0, a1, (byte)-VALUE);
       test_diva(a0, a1, a3);
+
       test_andc(a0, a1);
       test_andv(a0, a1, (byte)BIT_MASK);
       test_anda(a0, a1, a4);
@@ -97,30 +99,49 @@
       test_xorc(a0, a1);
       test_xorv(a0, a1, (byte)BIT_MASK);
       test_xora(a0, a1, a4);
+
       test_sllc(a0, a1);
       test_sllv(a0, a1, VALUE);
       test_srlc(a0, a1);
       test_srlv(a0, a1, VALUE);
       test_srac(a0, a1);
       test_srav(a0, a1, VALUE);
+
       test_sllc_n(a0, a1);
       test_sllv(a0, a1, -VALUE);
       test_srlc_n(a0, a1);
       test_srlv(a0, a1, -VALUE);
       test_srac_n(a0, a1);
       test_srav(a0, a1, -VALUE);
+
       test_sllc_o(a0, a1);
       test_sllv(a0, a1, SHIFT);
       test_srlc_o(a0, a1);
       test_srlv(a0, a1, SHIFT);
       test_srac_o(a0, a1);
       test_srav(a0, a1, SHIFT);
+
       test_sllc_on(a0, a1);
       test_sllv(a0, a1, -SHIFT);
       test_srlc_on(a0, a1);
       test_srlv(a0, a1, -SHIFT);
       test_srac_on(a0, a1);
       test_srav(a0, a1, -SHIFT);
+
+      test_sllc_add(a0, a1);
+      test_sllv_add(a0, a1, ADD_INIT);
+      test_srlc_add(a0, a1);
+      test_srlv_add(a0, a1, ADD_INIT);
+      test_srac_add(a0, a1);
+      test_srav_add(a0, a1, ADD_INIT);
+
+      test_sllc_and(a0, a1);
+      test_sllv_and(a0, a1, BIT_MASK);
+      test_srlc_and(a0, a1);
+      test_srlv_and(a0, a1, BIT_MASK);
+      test_srac_and(a0, a1);
+      test_srav_and(a0, a1, BIT_MASK);
+
       test_pack2(p2, a1);
       test_unpack2(a0, p2);
       test_pack2_swap(p2, a1);
@@ -369,6 +390,60 @@
         errn += verify("test_srav_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-SHIFT)));
       }
 
+      test_sllc_add(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+      }
+      test_sllv_add(a0, a1, ADD_INIT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+      }
+
+      test_srlc_add(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+      }
+      test_srlv_add(a0, a1, ADD_INIT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+      }
+
+      test_srac_add(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+      }
+      test_srav_add(a0, a1, ADD_INIT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+      }
+
+      test_sllc_and(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+      }
+      test_sllv_and(a0, a1, BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+      }
+
+      test_srlc_and(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+      }
+      test_srlv_and(a0, a1, BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+      }
+
+      test_srac_and(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+      }
+      test_srav_and(a0, a1, BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+      }
+
       test_pack2(p2, a1);
       for (int i=0; i<ARRLEN/2; i++) {
         errn += verify("test_pack2: ", i, p2[i], (short)(((short)(ADD_INIT+2*i) & 0xFF) | ((short)(ADD_INIT+2*i+1) << 8)));
@@ -805,6 +880,84 @@
 
     start = System.currentTimeMillis();
     for (int i=0; i<ITERS; i++) {
+      test_sllc_add(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_add: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv_add(a0, a1, ADD_INIT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_add: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_add(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_add: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv_add(a0, a1, ADD_INIT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_add: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_add(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_add: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav_add(a0, a1, ADD_INIT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_add: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_and(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_and: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv_and(a0, a1, BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_and: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_and(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_and: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv_and(a0, a1, BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_and: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_and(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_and: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav_and(a0, a1, BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_and: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
       test_pack2(p2, a1);
     }
     end = System.currentTimeMillis();
@@ -1036,6 +1189,26 @@
       a0[i] = (byte)(a1[i]<<b);
     }
   }
+  static void test_sllc_add(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)((a1[i] + ADD_INIT)<<VALUE);
+    }
+  }
+  static void test_sllv_add(byte[] a0, byte[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)((a1[i] + b)<<VALUE);
+    }
+  }
+  static void test_sllc_and(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)((a1[i] & BIT_MASK)<<VALUE);
+    }
+  }
+  static void test_sllv_and(byte[] a0, byte[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)((a1[i] & b)<<VALUE);
+    }
+  }
 
   static void test_srlc(byte[] a0, byte[] a1) {
     for (int i = 0; i < a0.length; i+=1) {
@@ -1062,6 +1235,26 @@
       a0[i] = (byte)(a1[i]>>>b);
     }
   }
+  static void test_srlc_add(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)((a1[i] + ADD_INIT)>>>VALUE);
+    }
+  }
+  static void test_srlv_add(byte[] a0, byte[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)((a1[i] + b)>>>VALUE);
+    }
+  }
+  static void test_srlc_and(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)((a1[i] & BIT_MASK)>>>VALUE);
+    }
+  }
+  static void test_srlv_and(byte[] a0, byte[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)((a1[i] & b)>>>VALUE);
+    }
+  }
 
   static void test_srac(byte[] a0, byte[] a1) {
     for (int i = 0; i < a0.length; i+=1) {
@@ -1088,6 +1281,26 @@
       a0[i] = (byte)(a1[i]>>b);
     }
   }
+  static void test_srac_add(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)((a1[i] + ADD_INIT)>>VALUE);
+    }
+  }
+  static void test_srav_add(byte[] a0, byte[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)((a1[i] + b)>>VALUE);
+    }
+  }
+  static void test_srac_and(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)((a1[i] & BIT_MASK)>>VALUE);
+    }
+  }
+  static void test_srav_and(byte[] a0, byte[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)((a1[i] & b)>>VALUE);
+    }
+  }
 
   static void test_pack2(short[] p2, byte[] a1) {
     if (p2.length*2 > a1.length) return;
--- a/test/compiler/6340864/TestIntVect.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/test/compiler/6340864/TestIntVect.java	Sun Feb 03 22:43:57 2013 +0100
@@ -74,6 +74,7 @@
       test_subc(a0, a1);
       test_subv(a0, a1, (int)VALUE);
       test_suba(a0, a1, a2);
+
       test_mulc(a0, a1);
       test_mulv(a0, a1, (int)VALUE);
       test_mula(a0, a1, a2);
@@ -86,6 +87,7 @@
       test_divc_n(a0, a1);
       test_divv(a0, a1, (int)-VALUE);
       test_diva(a0, a1, a3);
+
       test_andc(a0, a1);
       test_andv(a0, a1, (int)BIT_MASK);
       test_anda(a0, a1, a4);
@@ -95,30 +97,49 @@
       test_xorc(a0, a1);
       test_xorv(a0, a1, (int)BIT_MASK);
       test_xora(a0, a1, a4);
+
       test_sllc(a0, a1);
       test_sllv(a0, a1, VALUE);
       test_srlc(a0, a1);
       test_srlv(a0, a1, VALUE);
       test_srac(a0, a1);
       test_srav(a0, a1, VALUE);
+
       test_sllc_n(a0, a1);
       test_sllv(a0, a1, -VALUE);
       test_srlc_n(a0, a1);
       test_srlv(a0, a1, -VALUE);
       test_srac_n(a0, a1);
       test_srav(a0, a1, -VALUE);
+
       test_sllc_o(a0, a1);
       test_sllv(a0, a1, SHIFT);
       test_srlc_o(a0, a1);
       test_srlv(a0, a1, SHIFT);
       test_srac_o(a0, a1);
       test_srav(a0, a1, SHIFT);
+
       test_sllc_on(a0, a1);
       test_sllv(a0, a1, -SHIFT);
       test_srlc_on(a0, a1);
       test_srlv(a0, a1, -SHIFT);
       test_srac_on(a0, a1);
       test_srav(a0, a1, -SHIFT);
+
+      test_sllc_add(a0, a1);
+      test_sllv_add(a0, a1, ADD_INIT);
+      test_srlc_add(a0, a1);
+      test_srlv_add(a0, a1, ADD_INIT);
+      test_srac_add(a0, a1);
+      test_srav_add(a0, a1, ADD_INIT);
+
+      test_sllc_and(a0, a1);
+      test_sllv_and(a0, a1, BIT_MASK);
+      test_srlc_and(a0, a1);
+      test_srlv_and(a0, a1, BIT_MASK);
+      test_srac_and(a0, a1);
+      test_srav_and(a0, a1, BIT_MASK);
+
       test_pack2(p2, a1);
       test_unpack2(a0, p2);
       test_pack2_swap(p2, a1);
@@ -359,6 +380,60 @@
         errn += verify("test_srav_on: ", i, a0[i], (int)((int)(ADD_INIT+i)>>(-SHIFT)));
       }
 
+      test_sllc_add(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_add: ", i, a0[i], (int)(((int)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+      }
+      test_sllv_add(a0, a1, ADD_INIT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_add: ", i, a0[i], (int)(((int)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+      }
+
+      test_srlc_add(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_add: ", i, a0[i], (int)(((int)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+      }
+      test_srlv_add(a0, a1, ADD_INIT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_add: ", i, a0[i], (int)(((int)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+      }
+
+      test_srac_add(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_add: ", i, a0[i], (int)(((int)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+      }
+      test_srav_add(a0, a1, ADD_INIT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_add: ", i, a0[i], (int)(((int)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+      }
+
+      test_sllc_and(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_and: ", i, a0[i], (int)(((int)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+      }
+      test_sllv_and(a0, a1, BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_and: ", i, a0[i], (int)(((int)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+      }
+
+      test_srlc_and(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_and: ", i, a0[i], (int)(((int)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+      }
+      test_srlv_and(a0, a1, BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_and: ", i, a0[i], (int)(((int)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+      }
+
+      test_srac_and(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_and: ", i, a0[i], (int)(((int)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+      }
+      test_srav_and(a0, a1, BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_and: ", i, a0[i], (int)(((int)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+      }
+
       test_pack2(p2, a1);
       for (int i=0; i<ARRLEN/2; i++) {
         errn += verify("test_pack2: ", i, p2[i], ((long)(ADD_INIT+2*i) & 0xFFFFFFFFl) | ((long)(ADD_INIT+2*i+1) << 32));
@@ -727,6 +802,84 @@
 
     start = System.currentTimeMillis();
     for (int i=0; i<ITERS; i++) {
+      test_sllc_add(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_add: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv_add(a0, a1, ADD_INIT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_add: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_add(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_add: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv_add(a0, a1, ADD_INIT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_add: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_add(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_add: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav_add(a0, a1, ADD_INIT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_add: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_and(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_and: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv_and(a0, a1, BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_and: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_and(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_and: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv_and(a0, a1, BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_and: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_and(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_and: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav_and(a0, a1, BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_and: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
       test_pack2(p2, a1);
     }
     end = System.currentTimeMillis();
@@ -908,6 +1061,26 @@
       a0[i] = (int)(a1[i]<<b);
     }
   }
+  static void test_sllc_add(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)((a1[i] + ADD_INIT)<<VALUE);
+    }
+  }
+  static void test_sllv_add(int[] a0, int[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)((a1[i] + b)<<VALUE);
+    }
+  }
+  static void test_sllc_and(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)((a1[i] & BIT_MASK)<<VALUE);
+    }
+  }
+  static void test_sllv_and(int[] a0, int[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)((a1[i] & b)<<VALUE);
+    }
+  }
 
   static void test_srlc(int[] a0, int[] a1) {
     for (int i = 0; i < a0.length; i+=1) {
@@ -934,6 +1107,26 @@
       a0[i] = (int)(a1[i]>>>b);
     }
   }
+  static void test_srlc_add(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)((a1[i] + ADD_INIT)>>>VALUE);
+    }
+  }
+  static void test_srlv_add(int[] a0, int[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)((a1[i] + b)>>>VALUE);
+    }
+  }
+  static void test_srlc_and(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)((a1[i] & BIT_MASK)>>>VALUE);
+    }
+  }
+  static void test_srlv_and(int[] a0, int[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)((a1[i] & b)>>>VALUE);
+    }
+  }
 
   static void test_srac(int[] a0, int[] a1) {
     for (int i = 0; i < a0.length; i+=1) {
@@ -960,6 +1153,26 @@
       a0[i] = (int)(a1[i]>>b);
     }
   }
+  static void test_srac_add(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)((a1[i] + ADD_INIT)>>VALUE);
+    }
+  }
+  static void test_srav_add(int[] a0, int[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)((a1[i] + b)>>VALUE);
+    }
+  }
+  static void test_srac_and(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)((a1[i] & BIT_MASK)>>VALUE);
+    }
+  }
+  static void test_srav_and(int[] a0, int[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)((a1[i] & b)>>VALUE);
+    }
+  }
 
   static void test_pack2(long[] p2, int[] a1) {
     if (p2.length*2 > a1.length) return;
--- a/test/compiler/6340864/TestLongVect.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/test/compiler/6340864/TestLongVect.java	Sun Feb 03 22:43:57 2013 +0100
@@ -73,6 +73,7 @@
       test_subc(a0, a1);
       test_subv(a0, a1, (long)VALUE);
       test_suba(a0, a1, a2);
+
       test_mulc(a0, a1);
       test_mulv(a0, a1, (long)VALUE);
       test_mula(a0, a1, a2);
@@ -85,6 +86,7 @@
       test_divc_n(a0, a1);
       test_divv(a0, a1, (long)-VALUE);
       test_diva(a0, a1, a3);
+
       test_andc(a0, a1);
       test_andv(a0, a1, (long)BIT_MASK);
       test_anda(a0, a1, a4);
@@ -94,30 +96,48 @@
       test_xorc(a0, a1);
       test_xorv(a0, a1, (long)BIT_MASK);
       test_xora(a0, a1, a4);
+
       test_sllc(a0, a1);
       test_sllv(a0, a1, VALUE);
       test_srlc(a0, a1);
       test_srlv(a0, a1, VALUE);
       test_srac(a0, a1);
       test_srav(a0, a1, VALUE);
+
       test_sllc_n(a0, a1);
       test_sllv(a0, a1, -VALUE);
       test_srlc_n(a0, a1);
       test_srlv(a0, a1, -VALUE);
       test_srac_n(a0, a1);
       test_srav(a0, a1, -VALUE);
+
       test_sllc_o(a0, a1);
       test_sllv(a0, a1, SHIFT);
       test_srlc_o(a0, a1);
       test_srlv(a0, a1, SHIFT);
       test_srac_o(a0, a1);
       test_srav(a0, a1, SHIFT);
+
       test_sllc_on(a0, a1);
       test_sllv(a0, a1, -SHIFT);
       test_srlc_on(a0, a1);
       test_srlv(a0, a1, -SHIFT);
       test_srac_on(a0, a1);
       test_srav(a0, a1, -SHIFT);
+
+      test_sllc_add(a0, a1);
+      test_sllv_add(a0, a1, ADD_INIT);
+      test_srlc_add(a0, a1);
+      test_srlv_add(a0, a1, ADD_INIT);
+      test_srac_add(a0, a1);
+      test_srav_add(a0, a1, ADD_INIT);
+
+      test_sllc_and(a0, a1);
+      test_sllv_and(a0, a1, BIT_MASK);
+      test_srlc_and(a0, a1);
+      test_srlv_and(a0, a1, BIT_MASK);
+      test_srac_and(a0, a1);
+      test_srav_and(a0, a1, BIT_MASK);
     }
     // Test and verify results
     System.out.println("Verification");
@@ -354,6 +374,60 @@
         errn += verify("test_srav_on: ", i, a0[i], (long)((long)(ADD_INIT+i)>>(-SHIFT)));
       }
 
+      test_sllc_add(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_add: ", i, a0[i], (long)(((long)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+      }
+      test_sllv_add(a0, a1, ADD_INIT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_add: ", i, a0[i], (long)(((long)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+      }
+
+      test_srlc_add(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_add: ", i, a0[i], (long)(((long)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+      }
+      test_srlv_add(a0, a1, ADD_INIT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_add: ", i, a0[i], (long)(((long)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+      }
+
+      test_srac_add(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_add: ", i, a0[i], (long)(((long)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+      }
+      test_srav_add(a0, a1, ADD_INIT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_add: ", i, a0[i], (long)(((long)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+      }
+
+      test_sllc_and(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_and: ", i, a0[i], (long)(((long)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+      }
+      test_sllv_and(a0, a1, BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_and: ", i, a0[i], (long)(((long)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+      }
+
+      test_srlc_and(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_and: ", i, a0[i], (long)(((long)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+      }
+      test_srlv_and(a0, a1, BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_and: ", i, a0[i], (long)(((long)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+      }
+
+      test_srac_and(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_and: ", i, a0[i], (long)(((long)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+      }
+      test_srav_and(a0, a1, BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_and: ", i, a0[i], (long)(((long)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+      }
+
     }
 
     if (errn > 0)
@@ -696,6 +770,84 @@
     end = System.currentTimeMillis();
     System.out.println("test_srav_on: " + (end - start));
 
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_add(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_add: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv_add(a0, a1, ADD_INIT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_add: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_add(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_add: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv_add(a0, a1, ADD_INIT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_add: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_add(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_add: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav_add(a0, a1, ADD_INIT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_add: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_and(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_and: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv_and(a0, a1, BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_and: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_and(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_and: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv_and(a0, a1, BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_and: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_and(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_and: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav_and(a0, a1, BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_and: " + (end - start));
+
     return errn;
   }
 
@@ -854,6 +1006,26 @@
       a0[i] = (long)(a1[i]<<b);
     }
   }
+  static void test_sllc_add(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)((a1[i] + ADD_INIT)<<VALUE);
+    }
+  }
+  static void test_sllv_add(long[] a0, long[] a1, long b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)((a1[i] + b)<<VALUE);
+    }
+  }
+  static void test_sllc_and(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)((a1[i] & BIT_MASK)<<VALUE);
+    }
+  }
+  static void test_sllv_and(long[] a0, long[] a1, long b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)((a1[i] & b)<<VALUE);
+    }
+  }
 
   static void test_srlc(long[] a0, long[] a1) {
     for (int i = 0; i < a0.length; i+=1) {
@@ -880,6 +1052,26 @@
       a0[i] = (long)(a1[i]>>>b);
     }
   }
+  static void test_srlc_add(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)((a1[i] + ADD_INIT)>>>VALUE);
+    }
+  }
+  static void test_srlv_add(long[] a0, long[] a1, long b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)((a1[i] + b)>>>VALUE);
+    }
+  }
+  static void test_srlc_and(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)((a1[i] & BIT_MASK)>>>VALUE);
+    }
+  }
+  static void test_srlv_and(long[] a0, long[] a1, long b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)((a1[i] & b)>>>VALUE);
+    }
+  }
 
   static void test_srac(long[] a0, long[] a1) {
     for (int i = 0; i < a0.length; i+=1) {
@@ -906,6 +1098,26 @@
       a0[i] = (long)(a1[i]>>b);
     }
   }
+  static void test_srac_add(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)((a1[i] + ADD_INIT)>>VALUE);
+    }
+  }
+  static void test_srav_add(long[] a0, long[] a1, long b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)((a1[i] + b)>>VALUE);
+    }
+  }
+  static void test_srac_and(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)((a1[i] & BIT_MASK)>>VALUE);
+    }
+  }
+  static void test_srav_and(long[] a0, long[] a1, long b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)((a1[i] & b)>>VALUE);
+    }
+  }
 
   static int verify(String text, int i, long elem, long val) {
     if (elem != val) {
--- a/test/compiler/6340864/TestShortVect.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/test/compiler/6340864/TestShortVect.java	Sun Feb 03 22:43:57 2013 +0100
@@ -75,6 +75,7 @@
       test_subc(a0, a1);
       test_subv(a0, a1, (short)VALUE);
       test_suba(a0, a1, a2);
+
       test_mulc(a0, a1);
       test_mulv(a0, a1, (short)VALUE);
       test_mula(a0, a1, a2);
@@ -87,6 +88,7 @@
       test_divc_n(a0, a1);
       test_divv(a0, a1, (short)-VALUE);
       test_diva(a0, a1, a3);
+
       test_andc(a0, a1);
       test_andv(a0, a1, (short)BIT_MASK);
       test_anda(a0, a1, a4);
@@ -96,30 +98,49 @@
       test_xorc(a0, a1);
       test_xorv(a0, a1, (short)BIT_MASK);
       test_xora(a0, a1, a4);
+
       test_sllc(a0, a1);
       test_sllv(a0, a1, VALUE);
       test_srlc(a0, a1);
       test_srlv(a0, a1, VALUE);
       test_srac(a0, a1);
       test_srav(a0, a1, VALUE);
+
       test_sllc_n(a0, a1);
       test_sllv(a0, a1, -VALUE);
       test_srlc_n(a0, a1);
       test_srlv(a0, a1, -VALUE);
       test_srac_n(a0, a1);
       test_srav(a0, a1, -VALUE);
+
       test_sllc_o(a0, a1);
       test_sllv(a0, a1, SHIFT);
       test_srlc_o(a0, a1);
       test_srlv(a0, a1, SHIFT);
       test_srac_o(a0, a1);
       test_srav(a0, a1, SHIFT);
+
       test_sllc_on(a0, a1);
       test_sllv(a0, a1, -SHIFT);
       test_srlc_on(a0, a1);
       test_srlv(a0, a1, -SHIFT);
       test_srac_on(a0, a1);
       test_srav(a0, a1, -SHIFT);
+
+      test_sllc_add(a0, a1);
+      test_sllv_add(a0, a1, ADD_INIT);
+      test_srlc_add(a0, a1);
+      test_srlv_add(a0, a1, ADD_INIT);
+      test_srac_add(a0, a1);
+      test_srav_add(a0, a1, ADD_INIT);
+
+      test_sllc_and(a0, a1);
+      test_sllv_and(a0, a1, BIT_MASK);
+      test_srlc_and(a0, a1);
+      test_srlv_and(a0, a1, BIT_MASK);
+      test_srac_and(a0, a1);
+      test_srav_and(a0, a1, BIT_MASK);
+
       test_pack2(p2, a1);
       test_unpack2(a0, p2);
       test_pack2_swap(p2, a1);
@@ -364,6 +385,60 @@
         errn += verify("test_srav_on: ", i, a0[i], (short)((short)(ADD_INIT+i)>>(-SHIFT)));
       }
 
+      test_sllc_add(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_add: ", i, a0[i], (short)(((short)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+      }
+      test_sllv_add(a0, a1, ADD_INIT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_add: ", i, a0[i], (short)(((short)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+      }
+
+      test_srlc_add(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_add: ", i, a0[i], (short)(((short)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+      }
+      test_srlv_add(a0, a1, ADD_INIT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_add: ", i, a0[i], (short)(((short)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+      }
+
+      test_srac_add(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_add: ", i, a0[i], (short)(((short)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+      }
+      test_srav_add(a0, a1, ADD_INIT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_add: ", i, a0[i], (short)(((short)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+      }
+
+      test_sllc_and(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_and: ", i, a0[i], (short)(((short)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+      }
+      test_sllv_and(a0, a1, BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_and: ", i, a0[i], (short)(((short)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+      }
+
+      test_srlc_and(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_and: ", i, a0[i], (short)(((short)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+      }
+      test_srlv_and(a0, a1, BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_and: ", i, a0[i], (short)(((short)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+      }
+
+      test_srac_and(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_and: ", i, a0[i], (short)(((short)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+      }
+      test_srav_and(a0, a1, BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_and: ", i, a0[i], (short)(((short)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+      }
+
       test_pack2(p2, a1);
       for (int i=0; i<ARRLEN/2; i++) {
         errn += verify("test_pack2: ", i, p2[i], ((int)(ADD_INIT+2*i) & 0xFFFF) | ((int)(ADD_INIT+2*i+1) << 16));
@@ -762,6 +837,84 @@
 
     start = System.currentTimeMillis();
     for (int i=0; i<ITERS; i++) {
+      test_sllc_add(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_add: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv_add(a0, a1, ADD_INIT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_add: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_add(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_add: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv_add(a0, a1, ADD_INIT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_add: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_add(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_add: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav_add(a0, a1, ADD_INIT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_add: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_and(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_and: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv_and(a0, a1, BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_and: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_and(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_and: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv_and(a0, a1, BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_and: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_and(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_and: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav_and(a0, a1, BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_and: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
       test_pack2(p2, a1);
     }
     end = System.currentTimeMillis();
@@ -968,6 +1121,26 @@
       a0[i] = (short)(a1[i]<<b);
     }
   }
+  static void test_sllc_add(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)((a1[i] + ADD_INIT)<<VALUE);
+    }
+  }
+  static void test_sllv_add(short[] a0, short[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)((a1[i] + b)<<VALUE);
+    }
+  }
+  static void test_sllc_and(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)((a1[i] & BIT_MASK)<<VALUE);
+    }
+  }
+  static void test_sllv_and(short[] a0, short[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)((a1[i] & b)<<VALUE);
+    }
+  }
 
   static void test_srlc(short[] a0, short[] a1) {
     for (int i = 0; i < a0.length; i+=1) {
@@ -994,6 +1167,26 @@
       a0[i] = (short)(a1[i]>>>b);
     }
   }
+  static void test_srlc_add(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)((a1[i] + ADD_INIT)>>>VALUE);
+    }
+  }
+  static void test_srlv_add(short[] a0, short[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)((a1[i] + b)>>>VALUE);
+    }
+  }
+  static void test_srlc_and(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)((a1[i] & BIT_MASK)>>>VALUE);
+    }
+  }
+  static void test_srlv_and(short[] a0, short[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)((a1[i] & b)>>>VALUE);
+    }
+  }
 
   static void test_srac(short[] a0, short[] a1) {
     for (int i = 0; i < a0.length; i+=1) {
@@ -1020,6 +1213,26 @@
       a0[i] = (short)(a1[i]>>b);
     }
   }
+  static void test_srac_add(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)((a1[i] + ADD_INIT)>>VALUE);
+    }
+  }
+  static void test_srav_add(short[] a0, short[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)((a1[i] + b)>>VALUE);
+    }
+  }
+  static void test_srac_and(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)((a1[i] & BIT_MASK)>>VALUE);
+    }
+  }
+  static void test_srav_and(short[] a0, short[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)((a1[i] & b)>>VALUE);
+    }
+  }
 
   static void test_pack2(int[] p2, short[] a1) {
     if (p2.length*2 > a1.length) return;
--- a/test/compiler/6865265/StackOverflowBug.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/test/compiler/6865265/StackOverflowBug.java	Sun Feb 03 22:43:57 2013 +0100
@@ -28,7 +28,7 @@
  * @summary JVM crashes with "missing exception handler" error
  * @author volker.simonis@sap.com
  *
- * @run main/othervm -XX:CompileThreshold=100 -Xbatch -Xss224k StackOverflowBug
+ * @run main/othervm -XX:CompileThreshold=100 -Xbatch -Xss248k StackOverflowBug
  */
 
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6896617/Test6896617.java	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,331 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 6896617
+ * @summary Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() with SSE instructions on x86
+ * @run main/othervm/timeout=1200 -Xbatch -Xmx256m Test6896617
+ *
+ */
+
+import java.util.*;
+import java.nio.*;
+import java.nio.charset.*;
+
+public class Test6896617 {
+    final static int SIZE = 256;
+
+    public static void main(String[] args) {
+        String csn = "ISO-8859-1";
+        Charset cs = Charset.forName(csn);
+        CharsetEncoder enc = cs.newEncoder();
+        enc.onMalformedInput(CodingErrorAction.REPLACE)
+           .onUnmappableCharacter(CodingErrorAction.REPLACE);
+        CharsetDecoder dec = cs.newDecoder();
+        dec.onMalformedInput(CodingErrorAction.REPLACE)
+           .onUnmappableCharacter(CodingErrorAction.REPLACE);
+
+        byte repl = (byte)'?';
+        enc.replaceWith(new byte[] { repl });
+
+        // Use internal API for tests.
+        sun.nio.cs.ArrayEncoder arrenc = (sun.nio.cs.ArrayEncoder)enc;
+        sun.nio.cs.ArrayDecoder arrdec = (sun.nio.cs.ArrayDecoder)dec;
+
+        // Populate char[] with chars which can be encoded by ISO_8859_1 (<= 0xFF)
+        Random rnd = new Random(0);
+        int maxchar = 0xFF;
+        char[] a = new char[SIZE];
+        byte[] b = new byte[SIZE];
+        char[] at = new char[SIZE];
+        byte[] bt = new byte[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            char c = (char) rnd.nextInt(maxchar);
+            if (!enc.canEncode(c)) {
+                System.out.printf("Something wrong: can't encode c=%03x\n", (int)c);
+                System.exit(97);
+            }
+            a[i] = c;
+            b[i] = (byte)c;
+            at[i] = (char)-1;
+            bt[i] = (byte)-1;
+        }
+        if (arrenc.encode(a, 0, SIZE, bt) != SIZE || !Arrays.equals(b, bt)) {
+            System.out.println("Something wrong: ArrayEncoder.encode failed");
+            System.exit(97);
+        }
+        if (arrdec.decode(b, 0, SIZE, at) != SIZE || !Arrays.equals(a, at)) {
+            System.out.println("Something wrong: ArrayDecoder.decode failed");
+            System.exit(97);
+        }
+        for (int i = 0; i < SIZE; i++) {
+            at[i] = (char)-1;
+            bt[i] = (byte)-1;
+        }
+
+        ByteBuffer bb  = ByteBuffer.wrap(b);
+        CharBuffer ba  = CharBuffer.wrap(a);
+        ByteBuffer bbt = ByteBuffer.wrap(bt);
+        CharBuffer bat = CharBuffer.wrap(at);
+        if (!enc.encode(ba, bbt, true).isUnderflow() || !Arrays.equals(b, bt)) {
+            System.out.println("Something wrong: Encoder.encode failed");
+            System.exit(97);
+        }
+        if (!dec.decode(bb, bat, true).isUnderflow() || !Arrays.equals(a, at)) {
+            System.out.println("Something wrong: Decoder.decode failed");
+            System.exit(97);
+        }
+        for (int i = 0; i < SIZE; i++) {
+            at[i] = (char)-1;
+            bt[i] = (byte)-1;
+        }
+
+        // Warm up
+        boolean failed = false;
+        int result = 0;
+        for (int i = 0; i < 10000; i++) {
+            result += arrenc.encode(a, 0, SIZE, bt);
+            result -= arrdec.decode(b, 0, SIZE, at);
+        }
+        for (int i = 0; i < 10000; i++) {
+            result += arrenc.encode(a, 0, SIZE, bt);
+            result -= arrdec.decode(b, 0, SIZE, at);
+        }
+        for (int i = 0; i < 10000; i++) {
+            result += arrenc.encode(a, 0, SIZE, bt);
+            result -= arrdec.decode(b, 0, SIZE, at);
+        }
+        if (result != 0 || !Arrays.equals(b, bt) || !Arrays.equals(a, at)) {
+            failed = true;
+            System.out.println("Failed: ArrayEncoder.encode char[" + SIZE + "] and ArrayDecoder.decode byte[" + SIZE + "]");
+        }
+        for (int i = 0; i < SIZE; i++) {
+            at[i] = (char)-1;
+            bt[i] = (byte)-1;
+        }
+
+        boolean is_underflow = true;
+        for (int i = 0; i < 10000; i++) {
+            ba.clear(); bb.clear(); bat.clear(); bbt.clear();
+            boolean enc_res = enc.encode(ba, bbt, true).isUnderflow();
+            boolean dec_res = dec.decode(bb, bat, true).isUnderflow();
+            is_underflow = is_underflow && enc_res && dec_res;
+        }
+        for (int i = 0; i < SIZE; i++) {
+            at[i] = (char)-1;
+            bt[i] = (byte)-1;
+        }
+        for (int i = 0; i < 10000; i++) {
+            ba.clear(); bb.clear(); bat.clear(); bbt.clear();
+            boolean enc_res = enc.encode(ba, bbt, true).isUnderflow();
+            boolean dec_res = dec.decode(bb, bat, true).isUnderflow();
+            is_underflow = is_underflow && enc_res && dec_res;
+        }
+        for (int i = 0; i < SIZE; i++) {
+            at[i] = (char)-1;
+            bt[i] = (byte)-1;
+        }
+        for (int i = 0; i < 10000; i++) {
+            ba.clear(); bb.clear(); bat.clear(); bbt.clear();
+            boolean enc_res = enc.encode(ba, bbt, true).isUnderflow();
+            boolean dec_res = dec.decode(bb, bat, true).isUnderflow();
+            is_underflow = is_underflow && enc_res && dec_res;
+        }
+        if (!is_underflow || !Arrays.equals(b, bt) || !Arrays.equals(a, at)) {
+            failed = true;
+            System.out.println("Failed: Encoder.encode char[" + SIZE + "] and Decoder.decode byte[" + SIZE + "]");
+        }
+
+        // Test encoder with different source and destination sizes
+        System.out.println("Testing different source and destination sizes");
+        for (int i = 1; i <= SIZE; i++) {
+            for (int j = 1; j <= SIZE; j++) {
+                bt = new byte[j];
+                // very source's SIZE
+                result = arrenc.encode(a, 0, i, bt);
+                int l = Math.min(i, j);
+                if (result != l) {
+                    failed = true;
+                    System.out.println("Failed: encode char[" + i + "] to byte[" + j + "]: result = " + result + ", expected " + l);
+                }
+                for (int k = 0; k < l; k++) {
+                    if (bt[k] != b[k]) {
+                        failed = true;
+                        System.out.println("Failed: encoded byte[" + k + "] (" + bt[k] + ") != " + b[k]);
+                    }
+                }
+                // very source's offset
+                int sz = SIZE - i + 1;
+                result = arrenc.encode(a, i-1, sz, bt);
+                l = Math.min(sz, j);
+                if (result != l) {
+                    failed = true;
+                    System.out.println("Failed: encode char[" + sz + "] to byte[" + j + "]: result = " + result + ", expected " + l);
+                }
+                for (int k = 0; k < l; k++) {
+                    if (bt[k] != b[i+k-1]) {
+                        failed = true;
+                        System.out.println("Failed: encoded byte[" + k + "] (" + bt[k] + ") != " + b[i+k-1]);
+                    }
+                }
+            }
+        }
+
+        // Test encoder with char > 0xFF
+        System.out.println("Testing big char");
+
+        byte orig = (byte)'A';
+        bt = new byte[SIZE];
+        for (int i = 1; i <= SIZE; i++) {
+            for (int j = 0; j < i; j++) {
+                a[j] += 0x100;
+                // make sure to replace a different byte
+                bt[j] = orig;
+                result = arrenc.encode(a, 0, i, bt);
+                if (result != i) {
+                    failed = true;
+                    System.out.println("Failed: encode char[" + i + "] to byte[" + i + "]: result = " + result + ", expected " + i);
+                }
+                if (bt[j] != repl) {
+                    failed = true;
+                    System.out.println("Failed: encoded replace byte[" + j + "] (" + bt[j] + ") != " + repl);
+                }
+                bt[j] = b[j]; // Restore to compare whole array
+                for (int k = 0; k < i; k++) {
+                    if (bt[k] != b[k]) {
+                        failed = true;
+                        System.out.println("Failed: encoded byte[" + k + "] (" + bt[k] + ") != " + b[k]);
+                    }
+                }
+                a[j] -= 0x100; // Restore
+            }
+        }
+
+        // Test sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() performance.
+
+        int itrs = Integer.getInteger("iterations", 1000000);
+        int size = Integer.getInteger("size", 256);
+        a  = new char[size];
+        b  = new byte[size];
+        bt = new byte[size];
+        for (int i = 0; i < size; i++) {
+            char c = (char) rnd.nextInt(maxchar);
+            if (!enc.canEncode(c)) {
+                System.out.printf("Something wrong: can't encode c=%03x\n", (int)c);
+                System.exit(97);
+            }
+            a[i] = c;
+            b[i]  = (byte)-1;
+            bt[i] = (byte)c;
+        }
+        ba = CharBuffer.wrap(a);
+        bb = ByteBuffer.wrap(b);
+        boolean enc_res = enc.encode(ba, bb, true).isUnderflow();
+        if (!enc_res || !Arrays.equals(b, bt)) {
+            failed = true;
+            System.out.println("Failed 1: Encoder.encode char[" + size + "]");
+        }
+        for (int i = 0; i < size; i++) {
+            b[i] = (byte)-1;
+        }
+
+        // Make sure to recompile method if needed before performance run.
+        for (int i = 0; i < 10000; i++) {
+            ba.clear(); bb.clear();
+            enc_res = enc_res && enc.encode(ba, bb, true).isUnderflow();
+        }
+        for (int i = 0; i < size; i++) {
+            b[i] = (byte)-1;
+        }
+        for (int i = 0; i < 10000; i++) {
+            ba.clear(); bb.clear();
+            enc_res = enc_res && enc.encode(ba, bb, true).isUnderflow();
+        }
+        if (!enc_res || !Arrays.equals(b, bt)) {
+            failed = true;
+            System.out.println("Failed 2: Encoder.encode char[" + size + "]");
+        }
+        for (int i = 0; i < size; i++) {
+            b[i] = (byte)-1;
+        }
+
+        System.out.println("Testing ISO_8859_1$Encode.encodeArrayLoop() performance");
+        long start = System.currentTimeMillis();
+        for (int i = 0; i < itrs; i++) {
+            ba.clear(); bb.clear();
+            enc_res = enc_res && enc.encode(ba, bb, true).isUnderflow();
+        }
+        long end = System.currentTimeMillis();
+        if (!enc_res || !Arrays.equals(b, bt)) {
+            failed = true;
+            System.out.println("Failed 3: Encoder.encode char[" + size + "]");
+        } else {
+            System.out.println("size: " + size + " time: " + (end - start));
+        }
+
+        // Test sun.nio.cs.ISO_8859_1$Encode.encode() performance.
+
+        // Make sure to recompile method if needed before performance run.
+        result = 0;
+        for (int i = 0; i < size; i++) {
+            b[i] = (byte)-1;
+        }
+        for (int i = 0; i < 10000; i++) {
+            result += arrenc.encode(a, 0, size, b);
+        }
+        for (int i = 0; i < size; i++) {
+            b[i] = (byte)-1;
+        }
+        for (int i = 0; i < 10000; i++) {
+            result += arrenc.encode(a, 0, size, b);
+        }
+        if (result != size*20000 || !Arrays.equals(b, bt)) {
+            failed = true;
+            System.out.println("Failed 1: ArrayEncoder.encode char[" + SIZE + "]");
+        }
+        for (int i = 0; i < size; i++) {
+            b[i] = (byte)-1;
+        }
+
+        System.out.println("Testing ISO_8859_1$Encode.encode() performance");
+        result = 0;
+        start = System.currentTimeMillis();
+        for (int i = 0; i < itrs; i++) {
+            result += arrenc.encode(a, 0, size, b);
+        }
+        end = System.currentTimeMillis();
+        if (!Arrays.equals(b, bt)) {
+            failed = true;
+            System.out.println("Failed 2: ArrayEncoder.encode char[" + size + "]");
+        } else {
+            System.out.println("size: " + size + " time: " + (end - start));
+        }
+
+        if (failed) {
+          System.out.println("FAILED");
+          System.exit(97);
+        }
+        System.out.println("PASSED");
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7184394/TestAESBase.java	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @author Tom Deneau
+ */
+
+import javax.crypto.Cipher;
+import javax.crypto.KeyGenerator;
+import javax.crypto.SecretKey;
+import javax.crypto.spec.IvParameterSpec;
+import javax.crypto.spec.SecretKeySpec;
+import java.security.AlgorithmParameters;
+
+import java.util.Random;
+import java.util.Arrays;
+
+abstract public class TestAESBase {
+  int msgSize = Integer.getInteger("msgSize", 646);
+  boolean checkOutput = Boolean.getBoolean("checkOutput");
+  boolean noReinit = Boolean.getBoolean("noReinit");
+  int keySize = Integer.getInteger("keySize", 128);
+  String algorithm = System.getProperty("algorithm", "AES");
+  String mode = System.getProperty("mode", "CBC");
+  byte[] input;
+  byte[] encode;
+  byte[] expectedEncode;
+  byte[] decode;
+  byte[] expectedDecode;
+  Random random = new Random(0);
+  Cipher cipher;
+  Cipher dCipher;
+  String paddingStr = "PKCS5Padding";
+  AlgorithmParameters algParams;
+  SecretKey key;
+
+  static int numThreads = 0;
+  int  threadId;
+  static synchronized int getThreadId() {
+    int id = numThreads;
+    numThreads++;
+    return id;
+  }
+
+  abstract public void run();
+
+  public void prepare() {
+    try {
+    System.out.println("\nalgorithm=" + algorithm + ", mode=" + mode + ", msgSize=" + msgSize + ", keySize=" + keySize + ", noReinit=" + noReinit + ", checkOutput=" + checkOutput);
+
+      int keyLenBytes = (keySize == 0 ? 16 : keySize/8);
+      byte keyBytes[] = new byte[keyLenBytes];
+      if (keySize == 128)
+        keyBytes = new byte[] {-8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7};
+      else
+        random.nextBytes(keyBytes);
+
+      key = new SecretKeySpec(keyBytes, algorithm);
+      if (threadId == 0) {
+        System.out.println("Algorithm: " + key.getAlgorithm() + "("
+                           + key.getEncoded().length * 8 + "bit)");
+      }
+      input = new byte[msgSize];
+      for (int i=0; i<input.length; i++) {
+        input[i] = (byte) (i & 0xff);
+      }
+
+      cipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE");
+      dCipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE");
+
+      if (mode.equals("CBC")) {
+        int ivLen = (algorithm.equals("AES") ? 16 : algorithm.equals("DES") ? 8 : 0);
+        IvParameterSpec initVector = new IvParameterSpec(new byte[ivLen]);
+        cipher.init(Cipher.ENCRYPT_MODE, key, initVector);
+      } else {
+        algParams = cipher.getParameters();
+        cipher.init(Cipher.ENCRYPT_MODE, key, algParams);
+      }
+      algParams = cipher.getParameters();
+      dCipher.init(Cipher.DECRYPT_MODE, key, algParams);
+      if (threadId == 0) {
+        childShowCipher();
+      }
+
+      // do one encode and decode in preparation
+      // this will also create the encode buffer and decode buffer
+      encode = cipher.doFinal(input);
+      decode = dCipher.doFinal(encode);
+      if (checkOutput) {
+        expectedEncode = (byte[]) encode.clone();
+        expectedDecode = (byte[]) decode.clone();
+        showArray(key.getEncoded()  ,  "key:    ");
+        showArray(input,  "input:  ");
+        showArray(encode, "encode: ");
+        showArray(decode, "decode: ");
+      }
+    }
+    catch (Exception e) {
+      e.printStackTrace();
+      System.exit(1);
+    }
+  }
+
+  void showArray(byte b[], String name) {
+    System.out.format("%s [%d]: ", name, b.length);
+    for (int i=0; i<Math.min(b.length, 32); i++) {
+      System.out.format("%02x ", b[i] & 0xff);
+    }
+    System.out.println();
+  }
+
+  void compareArrays(byte b[], byte exp[]) {
+    if (b.length != exp.length) {
+      System.out.format("different lengths for actual and expected output arrays\n");
+      showArray(b, "test: ");
+      showArray(exp, "exp : ");
+      System.exit(1);
+    }
+    for (int i=0; i< exp.length; i++) {
+      if (b[i] != exp[i]) {
+        System.out.format("output error at index %d: got %02x, expected %02x\n", i, b[i] & 0xff, exp[i] & 0xff);
+        showArray(b, "test: ");
+        showArray(exp, "exp : ");
+        System.exit(1);
+      }
+    }
+  }
+
+
+  void showCipher(Cipher c, String kind) {
+    System.out.println(kind + " cipher provider: " + cipher.getProvider());
+    System.out.println(kind + " cipher algorithm: " + cipher.getAlgorithm());
+  }
+
+  abstract void childShowCipher();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7184394/TestAESDecode.java	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @author Tom Deneau
+ */
+
+import javax.crypto.Cipher;
+
+public class TestAESDecode extends TestAESBase {
+  @Override
+  public void run() {
+    try {
+      if (!noReinit) dCipher.init(Cipher.DECRYPT_MODE, key, algParams);
+      if (checkOutput) {
+        // checked version creates new output buffer each time
+        decode = dCipher.doFinal(encode, 0, encode.length);
+        compareArrays(decode, expectedDecode);
+      } else {
+        // non-checked version outputs to existing encode buffer for maximum speed
+        decode = new byte[dCipher.getOutputSize(encode.length)];
+        dCipher.doFinal(encode, 0, encode.length, decode);
+      }
+    }
+    catch (Exception e) {
+      e.printStackTrace();
+      System.exit(1);
+    }
+  }
+
+  @Override
+  void childShowCipher() {
+    showCipher(dCipher, "Decryption");
+  }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7184394/TestAESEncode.java	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @author Tom Deneau
+ */
+
+import javax.crypto.Cipher;
+
+public class TestAESEncode extends TestAESBase {
+  @Override
+  public void run() {
+    try {
+      if (!noReinit) cipher.init(Cipher.ENCRYPT_MODE, key, algParams);
+      if (checkOutput) {
+        // checked version creates new output buffer each time
+        encode = cipher.doFinal(input, 0, msgSize);
+        compareArrays(encode, expectedEncode);
+      } else {
+        // non-checked version outputs to existing encode buffer for maximum speed
+        encode = new byte[cipher.getOutputSize(msgSize)];
+        cipher.doFinal(input, 0, msgSize, encode);
+      }
+    }
+    catch (Exception e) {
+      e.printStackTrace();
+      System.exit(1);
+    }
+  }
+
+  @Override
+  void childShowCipher() {
+    showCipher(cipher, "Encryption");
+  }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7184394/TestAESMain.java	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7184394
+ * @summary add intrinsics to use AES instructions
+ *
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CBC TestAESMain
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB TestAESMain
+ *
+ * @author Tom Deneau
+ */
+
+public class TestAESMain {
+  public static void main(String[] args) {
+    int iters = (args.length > 0 ? Integer.valueOf(args[0]) : 1000000);
+    System.out.println(iters + " iterations");
+    TestAESEncode etest = new TestAESEncode();
+    etest.prepare();
+    long start = System.nanoTime();
+    for (int i=0; i<iters; i++) {
+      etest.run();
+    }
+    long end = System.nanoTime();
+    System.out.println("TestAESEncode runtime was " + (double)((end - start)/1000000000.0) + " ms");
+
+    TestAESDecode dtest = new TestAESDecode();
+    dtest.prepare();
+    start = System.nanoTime();
+    for (int i=0; i<iters; i++) {
+      dtest.run();
+    }
+    end = System.nanoTime();
+    System.out.println("TestAESDecode runtime was " + (double)((end - start)/1000000000.0) + " ms");
+  }
+}
--- a/test/compiler/7190310/Test7190310.java	Mon Nov 05 17:03:33 2012 -0500
+++ b/test/compiler/7190310/Test7190310.java	Sun Feb 03 22:43:57 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -23,7 +23,16 @@
  */
 
 /*
- * Manual test
+ * @test
+ * @bug 7190310
+ * @summary Inlining WeakReference.get(), and hoisting $referent may lead to non-terminating loops
+ * @run main/othervm/timeout=600 -Xbatch Test7190310
+ */
+
+/*
+ * Note bug exhibits as infinite loop, timeout is helpful.
+ * It should normally finish pretty quickly, but on some especially slow machines
+ * it may not.  The companion _unsafe test lacks a timeout, but that is okay.
  */
 
 import java.lang.ref.*;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/8000805/Test8000805.java	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @bug 8000805
+ * @summary JMM issue: short loads are non-atomic
+ *
+ * @run main/othervm -server -XX:-TieredCompilation -Xcomp -XX:+PrintCompilation -XX:CompileOnly=Test8000805.loadS2LmaskFF,Test8000805.loadS2Lmask16,Test8000805.loadS2Lmask13,Test8000805.loadUS_signExt,Test8000805.loadB2L_mask8 Test8000805
+ */
+
+public class Test8000805 {
+    static long loadS2LmaskFF   (short[] sa) { return sa[0] & 0xFF; }
+    static long loadS2LmaskFF_1 (short[] sa) { return sa[0] & 0xFF; }
+
+    static long loadS2Lmask16   (short[] sa) { return sa[0] & 0xFFFE; }
+    static long loadS2Lmask16_1 (short[] sa) { return sa[0] & 0xFFFE; }
+
+    static long loadS2Lmask13   (short[] sa) { return sa[0] & 0x0FFF; }
+    static long loadS2Lmask13_1 (short[] sa) { return sa[0] & 0x0FFF; }
+
+    static int loadUS_signExt   (char[] ca) { return (ca[0] << 16) >> 16; }
+    static int loadUS_signExt_1 (char[] ca) { return (ca[0] << 16) >> 16; }
+
+    static long loadB2L_mask8   (byte[] ba) { return ba[0] & 0x55; }
+    static long loadB2L_mask8_1 (byte[] ba) { return ba[0] & 0x55; }
+
+    public static void main(String[] args) {
+        for (int i = Byte.MIN_VALUE; i < Byte.MAX_VALUE; i++) {
+            byte[] ba = new byte[]  { (byte) i};
+
+            { long v1 = loadB2L_mask8(ba);
+              long v2 = loadB2L_mask8_1(ba);
+              if (v1 != v2)
+              throw new InternalError(String.format("loadB2L_mask8 failed: %x != %x", v1, v2)); }
+        }
+
+        for (int i = Short.MIN_VALUE; i < Short.MAX_VALUE; i++) {
+            short[] sa = new short[] { (short)i };
+            char[] ca = new char[] { (char)i };
+
+            { long v1 = loadS2LmaskFF(sa);
+              long v2 = loadS2LmaskFF_1(sa);
+              if (v1 != v2)
+              throw new InternalError(String.format("loadS2LmaskFF failed: %x != %x", v1, v2)); }
+
+            { long v1 = loadS2Lmask16(sa);
+              long v2 = loadS2Lmask16_1(sa);
+              if (v1 != v2)
+              throw new InternalError(String.format("loadS2Lmask16 failed: %x != %x", v1, v2)); }
+
+            { long v1 = loadS2Lmask13(sa);
+              long v2 = loadS2Lmask13_1(sa);
+              if (v1 != v2)
+              throw new InternalError(String.format("loadS2Lmask13 failed: %x != %x", v1, v2)); }
+
+            { int v1 = loadUS_signExt(ca);
+              int v2 = loadUS_signExt_1(ca);
+              if (v1 != v2)
+                throw new InternalError(String.format("loadUS_signExt failed: %x != %x", v1, v2)); }
+        }
+
+        System.out.println("TEST PASSED.");
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/8001183/TestCharVect.java	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,1332 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8001183
+ * @summary incorrect results of char vectors right shift operaiton
+ *
+ * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestCharVect
+ */
+
+public class TestCharVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final int ADD_INIT = Character.MAX_VALUE-500;
+  private static final int BIT_MASK = 0xB731;
+  private static final int VALUE = 7;
+  private static final int SHIFT = 16;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Char vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    char[] a0 = new char[ARRLEN];
+    char[] a1 = new char[ARRLEN];
+    short[] a2 = new short[ARRLEN];
+    short[] a3 = new short[ARRLEN];
+    short[] a4 = new short[ARRLEN];
+     int[] p2 = new  int[ARRLEN/2];
+    long[] p4 = new long[ARRLEN/4];
+    // Initialize
+    int gold_sum = 0;
+    for (int i=0; i<ARRLEN; i++) {
+      char val = (char)(ADD_INIT+i);
+      gold_sum += val;
+      a1[i] = val;
+      a2[i] = VALUE;
+      a3[i] = -VALUE;
+      a4[i] = (short)BIT_MASK;
+    }
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_sum(a1);
+      test_addc(a0, a1);
+      test_addv(a0, a1, (char)VALUE);
+      test_adda(a0, a1, a2);
+      test_subc(a0, a1);
+      test_subv(a0, a1, (char)VALUE);
+      test_suba(a0, a1, a2);
+
+      test_mulc(a0, a1);
+      test_mulv(a0, a1, (char)VALUE);
+      test_mula(a0, a1, a2);
+      test_divc(a0, a1);
+      test_divv(a0, a1, VALUE);
+      test_diva(a0, a1, a2);
+      test_mulc_n(a0, a1);
+      test_mulv(a0, a1, (char)-VALUE);
+      test_mula(a0, a1, a3);
+      test_divc_n(a0, a1);
+      test_divv(a0, a1, -VALUE);
+      test_diva(a0, a1, a3);
+
+      test_andc(a0, a1);
+      test_andv(a0, a1, (short)BIT_MASK);
+      test_anda(a0, a1, a4);
+      test_orc(a0, a1);
+      test_orv(a0, a1, (short)BIT_MASK);
+      test_ora(a0, a1, a4);
+      test_xorc(a0, a1);
+      test_xorv(a0, a1, (short)BIT_MASK);
+      test_xora(a0, a1, a4);
+
+      test_sllc(a0, a1);
+      test_sllv(a0, a1, VALUE);
+      test_srlc(a0, a1);
+      test_srlv(a0, a1, VALUE);
+      test_srac(a0, a1);
+      test_srav(a0, a1, VALUE);
+
+      test_sllc_n(a0, a1);
+      test_sllv(a0, a1, -VALUE);
+      test_srlc_n(a0, a1);
+      test_srlv(a0, a1, -VALUE);
+      test_srac_n(a0, a1);
+      test_srav(a0, a1, -VALUE);
+
+      test_sllc_o(a0, a1);
+      test_sllv(a0, a1, SHIFT);
+      test_srlc_o(a0, a1);
+      test_srlv(a0, a1, SHIFT);
+      test_srac_o(a0, a1);
+      test_srav(a0, a1, SHIFT);
+
+      test_sllc_on(a0, a1);
+      test_sllv(a0, a1, -SHIFT);
+      test_srlc_on(a0, a1);
+      test_srlv(a0, a1, -SHIFT);
+      test_srac_on(a0, a1);
+      test_srav(a0, a1, -SHIFT);
+
+      test_sllc_add(a0, a1);
+      test_sllv_add(a0, a1, ADD_INIT);
+      test_srlc_add(a0, a1);
+      test_srlv_add(a0, a1, ADD_INIT);
+      test_srac_add(a0, a1);
+      test_srav_add(a0, a1, ADD_INIT);
+
+      test_sllc_and(a0, a1);
+      test_sllv_and(a0, a1, BIT_MASK);
+      test_srlc_and(a0, a1);
+      test_srlv_and(a0, a1, BIT_MASK);
+      test_srac_and(a0, a1);
+      test_srav_and(a0, a1, BIT_MASK);
+
+      test_pack2(p2, a1);
+      test_unpack2(a0, p2);
+      test_pack2_swap(p2, a1);
+      test_unpack2_swap(a0, p2);
+      test_pack4(p4, a1);
+      test_unpack4(a0, p4);
+      test_pack4_swap(p4, a1);
+      test_unpack4_swap(a0, p4);
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      int sum = test_sum(a1);
+      if (sum != gold_sum) {
+        System.err.println("test_sum:  " + sum + " != " + gold_sum);
+        errn++;
+      }
+
+      test_addc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_addc: ", i, a0[i], (char)((char)(ADD_INIT+i)+VALUE));
+      }
+      test_addv(a0, a1, (char)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_addv: ", i, a0[i], (char)((char)(ADD_INIT+i)+VALUE));
+      }
+      test_adda(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_adda: ", i, a0[i], (char)((char)(ADD_INIT+i)+VALUE));
+      }
+
+      test_subc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_subc: ", i, a0[i], (char)((char)(ADD_INIT+i)-VALUE));
+      }
+      test_subv(a0, a1, (char)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_subv: ", i, a0[i], (char)((char)(ADD_INIT+i)-VALUE));
+      }
+      test_suba(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_suba: ", i, a0[i], (char)((char)(ADD_INIT+i)-VALUE));
+      }
+
+      test_mulc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulc: ", i, a0[i], (char)((char)(ADD_INIT+i)*VALUE));
+      }
+      test_mulv(a0, a1, (char)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulv: ", i, a0[i], (char)((char)(ADD_INIT+i)*VALUE));
+      }
+      test_mula(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mula: ", i, a0[i], (char)((char)(ADD_INIT+i)*VALUE));
+      }
+
+      test_divc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divc: ", i, a0[i], (char)((char)(ADD_INIT+i)/VALUE));
+      }
+      test_divv(a0, a1, VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divv: ", i, a0[i], (char)((char)(ADD_INIT+i)/VALUE));
+      }
+      test_diva(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_diva: ", i, a0[i], (char)((char)(ADD_INIT+i)/VALUE));
+      }
+
+      test_mulc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulc_n: ", i, a0[i], (char)((char)(ADD_INIT+i)*(-VALUE)));
+      }
+      test_mulv(a0, a1, (char)-VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulv_n: ", i, a0[i], (char)((char)(ADD_INIT+i)*(-VALUE)));
+      }
+      test_mula(a0, a1, a3);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mula_n: ", i, a0[i], (char)((char)(ADD_INIT+i)*(-VALUE)));
+      }
+
+      test_divc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divc_n: ", i, a0[i], (char)((char)(ADD_INIT+i)/(-VALUE)));
+      }
+      test_divv(a0, a1, -VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divv_n: ", i, a0[i], (char)((char)(ADD_INIT+i)/(-VALUE)));
+      }
+      test_diva(a0, a1, a3);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_diva_n: ", i, a0[i], (char)((char)(ADD_INIT+i)/(-VALUE)));
+      }
+
+      test_andc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_andc: ", i, a0[i], (char)((char)(ADD_INIT+i)&BIT_MASK));
+      }
+      test_andv(a0, a1, (short)BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_andv: ", i, a0[i], (char)((char)(ADD_INIT+i)&BIT_MASK));
+      }
+      test_anda(a0, a1, a4);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_anda: ", i, a0[i], (char)((char)(ADD_INIT+i)&BIT_MASK));
+      }
+
+      test_orc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_orc: ", i, a0[i], (char)((char)(ADD_INIT+i)|BIT_MASK));
+      }
+      test_orv(a0, a1, (short)BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_orv: ", i, a0[i], (char)((char)(ADD_INIT+i)|BIT_MASK));
+      }
+      test_ora(a0, a1, a4);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ora: ", i, a0[i], (char)((char)(ADD_INIT+i)|BIT_MASK));
+      }
+
+      test_xorc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_xorc: ", i, a0[i], (char)((char)(ADD_INIT+i)^BIT_MASK));
+      }
+      test_xorv(a0, a1, (short)BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_xorv: ", i, a0[i], (char)((char)(ADD_INIT+i)^BIT_MASK));
+      }
+      test_xora(a0, a1, a4);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_xora: ", i, a0[i], (char)((char)(ADD_INIT+i)^BIT_MASK));
+      }
+
+      test_sllc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc: ", i, a0[i], (char)((char)(ADD_INIT+i)<<VALUE));
+      }
+      test_sllv(a0, a1, VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv: ", i, a0[i], (char)((char)(ADD_INIT+i)<<VALUE));
+      }
+
+      test_srlc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc: ", i, a0[i], (char)((char)(ADD_INIT+i)>>>VALUE));
+      }
+      test_srlv(a0, a1, VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv: ", i, a0[i], (char)((char)(ADD_INIT+i)>>>VALUE));
+      }
+
+      test_srac(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac: ", i, a0[i], (char)((char)(ADD_INIT+i)>>VALUE));
+      }
+      test_srav(a0, a1, VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav: ", i, a0[i], (char)((char)(ADD_INIT+i)>>VALUE));
+      }
+
+      test_sllc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_n: ", i, a0[i], (char)((char)(ADD_INIT+i)<<(-VALUE)));
+      }
+      test_sllv(a0, a1, -VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_n: ", i, a0[i], (char)((char)(ADD_INIT+i)<<(-VALUE)));
+      }
+
+      test_srlc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_n: ", i, a0[i], (char)((char)(ADD_INIT+i)>>>(-VALUE)));
+      }
+      test_srlv(a0, a1, -VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_n: ", i, a0[i], (char)((char)(ADD_INIT+i)>>>(-VALUE)));
+      }
+
+      test_srac_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_n: ", i, a0[i], (char)((char)(ADD_INIT+i)>>(-VALUE)));
+      }
+      test_srav(a0, a1, -VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_n: ", i, a0[i], (char)((char)(ADD_INIT+i)>>(-VALUE)));
+      }
+
+      test_sllc_o(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_o: ", i, a0[i], (char)((char)(ADD_INIT+i)<<SHIFT));
+      }
+      test_sllv(a0, a1, SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_o: ", i, a0[i], (char)((char)(ADD_INIT+i)<<SHIFT));
+      }
+
+      test_srlc_o(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_o: ", i, a0[i], (char)((char)(ADD_INIT+i)>>>SHIFT));
+      }
+      test_srlv(a0, a1, SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_o: ", i, a0[i], (char)((char)(ADD_INIT+i)>>>SHIFT));
+      }
+
+      test_srac_o(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_o: ", i, a0[i], (char)((char)(ADD_INIT+i)>>SHIFT));
+      }
+      test_srav(a0, a1, SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_o: ", i, a0[i], (char)((char)(ADD_INIT+i)>>SHIFT));
+      }
+
+      test_sllc_on(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_on: ", i, a0[i], (char)((char)(ADD_INIT+i)<<(-SHIFT)));
+      }
+      test_sllv(a0, a1, -SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_on: ", i, a0[i], (char)((char)(ADD_INIT+i)<<(-SHIFT)));
+      }
+
+      test_srlc_on(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_on: ", i, a0[i], (char)((char)(ADD_INIT+i)>>>(-SHIFT)));
+      }
+      test_srlv(a0, a1, -SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_on: ", i, a0[i], (char)((char)(ADD_INIT+i)>>>(-SHIFT)));
+      }
+
+      test_srac_on(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_on: ", i, a0[i], (char)((char)(ADD_INIT+i)>>(-SHIFT)));
+      }
+      test_srav(a0, a1, -SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_on: ", i, a0[i], (char)((char)(ADD_INIT+i)>>(-SHIFT)));
+      }
+
+      test_sllc_add(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_add: ", i, a0[i], (char)(((char)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+      }
+      test_sllv_add(a0, a1, ADD_INIT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_add: ", i, a0[i], (char)(((char)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+      }
+
+      test_srlc_add(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_add: ", i, a0[i], (char)(((char)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+      }
+      test_srlv_add(a0, a1, ADD_INIT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_add: ", i, a0[i], (char)(((char)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+      }
+
+      test_srac_add(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_add: ", i, a0[i], (char)(((char)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+      }
+      test_srav_add(a0, a1, ADD_INIT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_add: ", i, a0[i], (char)(((char)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+      }
+
+      test_sllc_and(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_and: ", i, a0[i], (char)(((char)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+      }
+      test_sllv_and(a0, a1, BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_and: ", i, a0[i], (char)(((char)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+      }
+
+      test_srlc_and(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_and: ", i, a0[i], (char)(((char)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+      }
+      test_srlv_and(a0, a1, BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_and: ", i, a0[i], (char)(((char)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+      }
+
+      test_srac_and(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_and: ", i, a0[i], (char)(((char)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+      }
+      test_srav_and(a0, a1, BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_and: ", i, a0[i], (char)(((char)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+      }
+
+      test_pack2(p2, a1);
+      for (int i=0; i<ARRLEN/2; i++) {
+        errn += verify("test_pack2: ", i, p2[i], ((int)(ADD_INIT+2*i) & 0xFFFF) | ((int)(ADD_INIT+2*i+1) << 16));
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a0[i] = (char)-1;
+      }
+      test_unpack2(a0, p2);
+      for (int i=0; i<(ARRLEN&(-2)); i++) {
+        errn += verify("test_unpack2: ", i, a0[i], (char)(ADD_INIT+i));
+      }
+
+      test_pack2_swap(p2, a1);
+      for (int i=0; i<ARRLEN/2; i++) {
+        errn += verify("test_pack2_swap: ", i, p2[i], ((int)(ADD_INIT+2*i+1) & 0xFFFF) | ((int)(ADD_INIT+2*i) << 16));
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a0[i] = (char)-1;
+      }
+      test_unpack2_swap(a0, p2);
+      for (int i=0; i<(ARRLEN&(-2)); i++) {
+        errn += verify("test_unpack2_swap: ", i, a0[i], (char)(ADD_INIT+i));
+      }
+
+      test_pack4(p4, a1);
+      for (int i=0; i<ARRLEN/4; i++) {
+        errn += verify("test_pack4: ", i, p4[i],  ((long)(ADD_INIT+4*i+0) & 0xFFFFl) |
+                                                 (((long)(ADD_INIT+4*i+1) & 0xFFFFl) << 16)  |
+                                                 (((long)(ADD_INIT+4*i+2) & 0xFFFFl) << 32)  |
+                                                 (((long)(ADD_INIT+4*i+3) & 0xFFFFl) << 48));
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a0[i] = (char)-1;
+      }
+      test_unpack4(a0, p4);
+      for (int i=0; i<(ARRLEN&(-4)); i++) {
+        errn += verify("test_unpack4: ", i, a0[i], (char)(ADD_INIT+i));
+      }
+
+      test_pack4_swap(p4, a1);
+      for (int i=0; i<ARRLEN/4; i++) {
+        errn += verify("test_pack4_swap: ", i, p4[i],  ((long)(ADD_INIT+4*i+3) & 0xFFFFl) |
+                                                      (((long)(ADD_INIT+4*i+2) & 0xFFFFl) << 16)  |
+                                                      (((long)(ADD_INIT+4*i+1) & 0xFFFFl) << 32)  |
+                                                      (((long)(ADD_INIT+4*i+0) & 0xFFFFl) << 48));
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a0[i] = (char)-1;
+      }
+      test_unpack4_swap(a0, p4);
+      for (int i=0; i<(ARRLEN&(-4)); i++) {
+        errn += verify("test_unpack4_swap: ", i, a0[i], (char)(ADD_INIT+i));
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sum(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sum: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addv(a0, a1, (char)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_adda(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_adda: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_subc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_subc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_subv(a0, a1, (char)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_subv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_suba(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_suba: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulv(a0, a1, (char)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mula(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mula: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_diva(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_diva: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulv(a0, a1, (char)-VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulv_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mula(a0, a1, a3);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mula_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divv(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divv_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_diva(a0, a1, a3);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_diva_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_andc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_andc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_andv(a0, a1, (short)BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_andv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_anda(a0, a1, a4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_anda: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_orc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_orc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_orv(a0, a1, (short)BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_orv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ora(a0, a1, a4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ora: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_xorc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_xorc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_xorv(a0, a1, (short)BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_xorv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_xora(a0, a1, a4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_xora: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_o(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_o: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_o: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_o(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_o: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_o: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_o(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_o: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_o: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_on(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_on: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, -SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_on: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_on(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_on: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, -SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_on: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_on(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_on: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, -SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_on: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_add(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_add: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv_add(a0, a1, ADD_INIT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_add: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_add(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_add: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv_add(a0, a1, ADD_INIT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_add: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_add(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_add: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav_add(a0, a1, ADD_INIT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_add: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_and(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_and: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv_and(a0, a1, BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_and: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_and(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_and: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv_and(a0, a1, BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_and: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_and(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_and: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav_and(a0, a1, BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_and: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_pack2(p2, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_pack2: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unpack2(a0, p2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unpack2: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_pack2_swap(p2, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_pack2_swap: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unpack2_swap(a0, p2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unpack2_swap: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_pack4(p4, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_pack4: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unpack4(a0, p4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unpack4: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_pack4_swap(p4, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_pack4_swap: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unpack4_swap(a0, p4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unpack4_swap: " + (end - start));
+
+    return errn;
+  }
+
+  static int test_sum(char[] a1) {
+    int sum = 0;
+    for (int i = 0; i < a1.length; i+=1) {
+      sum += a1[i];
+    }
+    return sum;
+  }
+
+  static void test_addc(char[] a0, char[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]+VALUE);
+    }
+  }
+  static void test_addv(char[] a0, char[] a1, char b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]+b);
+    }
+  }
+  static void test_adda(char[] a0, char[] a1, short[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]+a2[i]);
+    }
+  }
+
+  static void test_subc(char[] a0, char[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]-VALUE);
+    }
+  }
+  static void test_subv(char[] a0, char[] a1, char b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]-b);
+    }
+  }
+  static void test_suba(char[] a0, char[] a1, short[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]-a2[i]);
+    }
+  }
+
+  static void test_mulc(char[] a0, char[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]*VALUE);
+    }
+  }
+  static void test_mulc_n(char[] a0, char[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]*(-VALUE));
+    }
+  }
+  static void test_mulv(char[] a0, char[] a1, char b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]*b);
+    }
+  }
+  static void test_mula(char[] a0, char[] a1, short[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]*a2[i]);
+    }
+  }
+
+  static void test_divc(char[] a0, char[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]/VALUE);
+    }
+  }
+  static void test_divc_n(char[] a0, char[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]/(-VALUE));
+    }
+  }
+  static void test_divv(char[] a0, char[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]/b);
+    }
+  }
+  static void test_diva(char[] a0, char[] a1, short[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]/a2[i]);
+    }
+  }
+
+  static void test_andc(char[] a0, char[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]&BIT_MASK);
+    }
+  }
+  static void test_andv(char[] a0, char[] a1, short b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]&b);
+    }
+  }
+  static void test_anda(char[] a0, char[] a1, short[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]&a2[i]);
+    }
+  }
+
+  static void test_orc(char[] a0, char[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]|BIT_MASK);
+    }
+  }
+  static void test_orv(char[] a0, char[] a1, short b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]|b);
+    }
+  }
+  static void test_ora(char[] a0, char[] a1, short[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]|a2[i]);
+    }
+  }
+
+  static void test_xorc(char[] a0, char[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]^BIT_MASK);
+    }
+  }
+  static void test_xorv(char[] a0, char[] a1, short b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]^b);
+    }
+  }
+  static void test_xora(char[] a0, char[] a1, short[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]^a2[i]);
+    }
+  }
+
+  static void test_sllc(char[] a0, char[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]<<VALUE);
+    }
+  }
+  static void test_sllc_n(char[] a0, char[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]<<(-VALUE));
+    }
+  }
+  static void test_sllc_o(char[] a0, char[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]<<SHIFT);
+    }
+  }
+  static void test_sllc_on(char[] a0, char[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]<<(-SHIFT));
+    }
+  }
+  static void test_sllv(char[] a0, char[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]<<b);
+    }
+  }
+  static void test_sllc_add(char[] a0, char[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)((a1[i] + ADD_INIT)<<VALUE);
+    }
+  }
+  static void test_sllv_add(char[] a0, char[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)((a1[i] + b)<<VALUE);
+    }
+  }
+  static void test_sllc_and(char[] a0, char[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)((a1[i] & BIT_MASK)<<VALUE);
+    }
+  }
+  static void test_sllv_and(char[] a0, char[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)((a1[i] & b)<<VALUE);
+    }
+  }
+
+  static void test_srlc(char[] a0, char[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]>>>VALUE);
+    }
+  }
+  static void test_srlc_n(char[] a0, char[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]>>>(-VALUE));
+    }
+  }
+  static void test_srlc_o(char[] a0, char[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]>>>SHIFT);
+    }
+  }
+  static void test_srlc_on(char[] a0, char[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]>>>(-SHIFT));
+    }
+  }
+  static void test_srlv(char[] a0, char[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]>>>b);
+    }
+  }
+  static void test_srlc_add(char[] a0, char[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)((a1[i] + ADD_INIT)>>>VALUE);
+    }
+  }
+  static void test_srlv_add(char[] a0, char[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)((a1[i] + b)>>>VALUE);
+    }
+  }
+  static void test_srlc_and(char[] a0, char[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)((a1[i] & BIT_MASK)>>>VALUE);
+    }
+  }
+  static void test_srlv_and(char[] a0, char[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)((a1[i] & b)>>>VALUE);
+    }
+  }
+
+  static void test_srac(char[] a0, char[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]>>VALUE);
+    }
+  }
+  static void test_srac_n(char[] a0, char[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]>>(-VALUE));
+    }
+  }
+  static void test_srac_o(char[] a0, char[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]>>SHIFT);
+    }
+  }
+  static void test_srac_on(char[] a0, char[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]>>(-SHIFT));
+    }
+  }
+  static void test_srav(char[] a0, char[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)(a1[i]>>b);
+    }
+  }
+  static void test_srac_add(char[] a0, char[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)((a1[i] + ADD_INIT)>>VALUE);
+    }
+  }
+  static void test_srav_add(char[] a0, char[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)((a1[i] + b)>>VALUE);
+    }
+  }
+  static void test_srac_and(char[] a0, char[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)((a1[i] & BIT_MASK)>>VALUE);
+    }
+  }
+  static void test_srav_and(char[] a0, char[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (char)((a1[i] & b)>>VALUE);
+    }
+  }
+
+  static void test_pack2(int[] p2, char[] a1) {
+    if (p2.length*2 > a1.length) return;
+    for (int i = 0; i < p2.length; i+=1) {
+      int l0 = (int)a1[i*2+0];
+      int l1 = (int)a1[i*2+1];
+      p2[i] = (l1 << 16) | (l0 & 0xFFFF);
+    }
+  }
+  static void test_unpack2(char[] a0, int[] p2) {
+    if (p2.length*2 > a0.length) return;
+    for (int i = 0; i < p2.length; i+=1) {
+      int l = p2[i];
+      a0[i*2+0] = (char)(l & 0xFFFF);
+      a0[i*2+1] = (char)(l >> 16);
+    }
+  }
+  static void test_pack2_swap(int[] p2, char[] a1) {
+    if (p2.length*2 > a1.length) return;
+    for (int i = 0; i < p2.length; i+=1) {
+      int l0 = (int)a1[i*2+0];
+      int l1 = (int)a1[i*2+1];
+      p2[i] = (l0 << 16) | (l1 & 0xFFFF);
+    }
+  }
+  static void test_unpack2_swap(char[] a0, int[] p2) {
+    if (p2.length*2 > a0.length) return;
+    for (int i = 0; i < p2.length; i+=1) {
+      int l = p2[i];
+      a0[i*2+0] = (char)(l >> 16);
+      a0[i*2+1] = (char)(l & 0xFFFF);
+    }
+  }
+
+  static void test_pack4(long[] p4, char[] a1) {
+    if (p4.length*4 > a1.length) return;
+    for (int i = 0; i < p4.length; i+=1) {
+      long l0 = (long)a1[i*4+0];
+      long l1 = (long)a1[i*4+1];
+      long l2 = (long)a1[i*4+2];
+      long l3 = (long)a1[i*4+3];
+      p4[i] = (l0 & 0xFFFFl) |
+             ((l1 & 0xFFFFl) << 16) |
+             ((l2 & 0xFFFFl) << 32) |
+             ((l3 & 0xFFFFl) << 48);
+    }
+  }
+  static void test_unpack4(char[] a0, long[] p4) {
+    if (p4.length*4 > a0.length) return;
+    for (int i = 0; i < p4.length; i+=1) {
+      long l = p4[i];
+      a0[i*4+0] = (char)(l & 0xFFFFl);
+      a0[i*4+1] = (char)(l >> 16);
+      a0[i*4+2] = (char)(l >> 32);
+      a0[i*4+3] = (char)(l >> 48);
+    }
+  }
+  static void test_pack4_swap(long[] p4, char[] a1) {
+    if (p4.length*4 > a1.length) return;
+    for (int i = 0; i < p4.length; i+=1) {
+      long l0 = (long)a1[i*4+0];
+      long l1 = (long)a1[i*4+1];
+      long l2 = (long)a1[i*4+2];
+      long l3 = (long)a1[i*4+3];
+      p4[i] = (l3 & 0xFFFFl) |
+             ((l2 & 0xFFFFl) << 16) |
+             ((l1 & 0xFFFFl) << 32) |
+             ((l0 & 0xFFFFl) << 48);
+    }
+  }
+  static void test_unpack4_swap(char[] a0, long[] p4) {
+    if (p4.length*4 > a0.length) return;
+    for (int i = 0; i < p4.length; i+=1) {
+      long l = p4[i];
+      a0[i*4+0] = (char)(l >> 48);
+      a0[i*4+1] = (char)(l >> 32);
+      a0[i*4+2] = (char)(l >> 16);
+      a0[i*4+3] = (char)(l & 0xFFFFl);
+    }
+  }
+
+  static int verify(String text, int i, int elem, int val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+
+  static int verify(String text, int i, long elem, long val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + Long.toHexString(elem) + " != " + Long.toHexString(val));
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/8002069/Test8002069.java	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8002069
+ * @summary Assert failed in C2: assert(field->edge_count() > 0) failed: sanity
+ *
+ * @run main/othervm -Xmx32m -XX:+IgnoreUnrecognizedVMOptions -Xbatch -XX:CompileCommand=exclude,Test8002069.dummy Test8002069
+ */
+
+abstract class O {
+  int f;
+  public O() { f = 5; }
+  abstract void put(int i);
+  public int foo(int i) {
+    put(i);
+    return i;
+  }
+};
+
+class A extends O {
+  int[] a;
+  public A(int s) {
+    a = new int[s];
+  }
+  public void put(int i) {
+    a[i%a.length] = i;
+  }
+}
+
+class B extends O {
+  int sz;
+  int[] a;
+  public B(int s) {
+    sz = s;
+    a = new int[s];
+  }
+  public void put(int i) {
+    a[i%sz] = i;
+  }
+}
+
+public class Test8002069 {
+  public static void main(String args[]) {
+    int sum = 0;
+    for (int i=0; i<8000; i++) {
+      sum += test1(i);
+    }
+    for (int i=0; i<100000; i++) {
+      sum += test2(i);
+    }
+    System.out.println("PASSED. sum = " + sum);
+  }
+
+  private O o;
+
+  private int foo(int i) {
+    return o.foo(i);
+  }
+  static int test1(int i) {
+    Test8002069 t = new Test8002069();
+    t.o = new A(5);
+    return t.foo(i);
+  }
+  static int test2(int i) {
+    Test8002069 t = new Test8002069();
+    t.o = new B(5);
+    dummy(i);
+    return t.foo(i);
+  }
+
+  static int dummy(int i) {
+    return i*2;
+  }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/8004741/Test8004741.java	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test Test8004741.java
+ * @bug 8004741
+ * @summary Missing compiled exception handle table entry for multidimensional array allocation
+ * @run main/othervm -Xmx64m -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:+StressCompiledExceptionHandlers Test8004741
+ *
+ */
+
+import java.util.*;
+
+public class Test8004741 extends Thread {
+
+  static int[][] test(int a, int b) throws Exception {
+    int[][] ar = null;
+    try {
+      ar = new int[a][b];
+    } catch (Error e) {
+      System.out.println("test got Error");
+      passed = true;
+      throw(e);
+    } catch (Exception e) {
+      System.out.println("test got Exception");
+      throw(e);
+    }
+    return ar;
+  }
+
+  static boolean passed = false;
+
+  public void run() {
+      System.out.println("test started");
+      try {
+        while(true) {
+          test(2,20000);
+        }
+      } catch (ThreadDeath e) {
+        System.out.println("test got ThreadDeath");
+        passed = true;
+      } catch (Error e) {
+        e.printStackTrace();
+        System.out.println("test got Error");
+      } catch (Exception e) {
+        e.printStackTrace();
+        System.out.println("test got Exception");
+      }
+  }
+
+  public static void main(String[] args) throws Exception {
+    for (int n = 0; n < 11000; n++) {
+      test(2, 20);
+    }
+
+    // First test exception catch
+    Test8004741 t = new Test8004741();
+
+    passed = false;
+    t.start();
+    Thread.sleep(1000);
+    t.stop();
+
+    Thread.sleep(5000);
+    t.join();
+    if (passed) {
+      System.out.println("PASSED");
+    } else {
+      System.out.println("FAILED");
+      System.exit(97);
+    }
+  }
+
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/8005033/Test8005033.java	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2012 SAP AG.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @bug 8005033
+ * @summary On sparcv9, C2's intrinsic for Integer.bitCount(OV) returns wrong result if OV is the result of an operation with int overflow.
+ * @run main/othervm -Xcomp -XX:CompileOnly=Test8005033::testBitCount Test8005033
+ * @author Richard Reingruber richard DOT reingruber AT sap DOT com
+ */
+
+public class Test8005033 {
+    public static int MINUS_ONE = -1;
+
+    public static void main(String[] args) {
+        System.out.println("EXECUTING test.");
+        Integer.bitCount(1);   // load class
+        int expectedBitCount = 0;
+        int calculatedBitCount = testBitCount();
+        if (expectedBitCount != calculatedBitCount) {
+            throw new InternalError("got " + calculatedBitCount + " but expected " + expectedBitCount);
+        }
+        System.out.println("SUCCESSFULLY passed test.");
+    }
+
+    // testBitCount will be compiled using the Integer.bitCount() intrinsic if possible
+    private static int testBitCount() {
+        return Integer.bitCount(MINUS_ONE+1);   // -1 + 1 => int overflow
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/8005419/Test8005419.java	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8005419
+ * @summary Improve intrinsics code performance on x86 by using AVX2
+ * @run main/othervm -Xbatch -Xmx64m Test8005419
+ *
+ */
+
+public class Test8005419 {
+    public static int SIZE = 64;
+
+    public static void main(String[] args) {
+        char[] a = new char[SIZE];
+        char[] b = new char[SIZE];
+
+        for (int i = 16; i < SIZE; i++) {
+          a[i] = (char)i;
+          b[i] = (char)i;
+        }
+        String s1 = new String(a);
+        String s2 = new String(b);
+
+        // Warm up
+        boolean failed = false;
+        int result = 0;
+        for (int i = 0; i < 10000; i++) {
+          result += test(s1, s2);
+        }
+        for (int i = 0; i < 10000; i++) {
+          result += test(s1, s2);
+        }
+        for (int i = 0; i < 10000; i++) {
+          result += test(s1, s2);
+        }
+        if (result != 0) failed = true;
+
+        System.out.println("Start testing");
+        // Compare same string
+        result = test(s1, s1);
+        if (result != 0) {
+          failed = true;
+          System.out.println("Failed same: result = " + result + ", expected 0");
+        }
+        // Compare equal strings
+        for (int i = 1; i <= SIZE; i++) {
+          s1 = new String(a, 0, i);
+          s2 = new String(b, 0, i);
+          result = test(s1, s2);
+          if (result != 0) {
+            failed = true;
+            System.out.println("Failed equals s1[" + i + "], s2[" + i + "]: result = " + result + ", expected 0");
+          }
+        }
+        // Compare equal strings but different sizes
+        for (int i = 1; i <= SIZE; i++) {
+          s1 = new String(a, 0, i);
+          for (int j = 1; j <= SIZE; j++) {
+            s2 = new String(b, 0, j);
+            result = test(s1, s2);
+            if (result != (i-j)) {
+              failed = true;
+              System.out.println("Failed diff size s1[" + i + "], s2[" + j + "]: result = " + result + ", expected " + (i-j));
+            }
+          }
+        }
+        // Compare strings with one char different and different sizes
+        for (int i = 1; i <= SIZE; i++) {
+          s1 = new String(a, 0, i);
+          for (int j = 0; j < i; j++) {
+            b[j] -= 3; // change char
+            s2 = new String(b, 0, i);
+            result = test(s1, s2);
+            int chdiff = a[j] - b[j];
+            if (result != chdiff) {
+              failed = true;
+              System.out.println("Failed diff char s1[" + i + "], s2[" + i + "]: result = " + result + ", expected " + chdiff);
+            }
+            result = test(s2, s1);
+            chdiff = b[j] - a[j];
+            if (result != chdiff) {
+              failed = true;
+              System.out.println("Failed diff char s2[" + i + "], s1[" + i + "]: result = " + result + ", expected " + chdiff);
+            }
+            b[j] += 3; // restore
+          }
+        }
+        if (failed) {
+          System.out.println("FAILED");
+          System.exit(97);
+        }
+        System.out.println("PASSED");
+    }
+
+    private static int test(String str1, String str2) {
+        return str1.compareTo(str2);
+    }
+}
--- a/test/runtime/7158804/Test7158804.sh	Mon Nov 05 17:03:33 2012 -0500
+++ b/test/runtime/7158804/Test7158804.sh	Sun Feb 03 22:43:57 2013 +0100
@@ -1,6 +1,6 @@
 #!/bin/sh
 #
-# Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/8003720/Asmator.java	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+import jdk.internal.org.objectweb.asm.*;
+
+class Asmator {
+    static byte[] fixup(byte[] buf) throws java.io.IOException {
+        ClassReader cr = new ClassReader(buf);
+        ClassWriter cw = new ClassWriter(0);
+        ClassVisitor cv = new ClassVisitor(Opcodes.ASM4, cw) {
+            public MethodVisitor visitMethod(
+                final int access,
+                final String name,
+                final String desc,
+                final String signature,
+                final String[] exceptions)
+            {
+                MethodVisitor mv = super.visitMethod(access,
+                        name,
+                        desc,
+                        signature,
+                        exceptions);
+                if (mv == null)  return null;
+                if (name.equals("callme")) {
+                    // make receiver go dead!
+                    mv.visitInsn(Opcodes.ACONST_NULL);
+                    mv.visitVarInsn(Opcodes.ASTORE, 0);
+                }
+                return mv;
+            }
+        };
+        cr.accept(cv, 0);
+        return cw.toByteArray();
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/8003720/Test8003720.java	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/*
+ * @test
+ * @bug 8003720
+ * @summary Method in interpreter stack frame can be deallocated
+ * @compile -XDignore.symbol.file -source 1.7 -target 1.7 Victim.java
+ * @run main/othervm -Xverify:all -Xint Test8003720
+ */
+
+// Attempts to make the JVM unload a class while still executing one of its methods.
+public class Test8003720 {
+    final static String VICTIM_CLASS_NAME = "Victim";
+    final static boolean QUIET = true;
+    final static long DURATION = 30000;
+
+    public interface CallMe { void callme(); }
+
+    public static void main(String... av) throws Throwable {
+        newVictimClassLoader();
+        System.gc();
+
+        newVictimClass();
+        System.gc();
+
+        newVictimInstance();
+        System.gc();
+
+        ((CallMe)newVictimInstance()).callme();
+    }
+
+    public static Object newVictimInstance() throws Throwable {
+        return newVictimClass().newInstance();
+    }
+
+    public static Class<?> newVictimClass() throws Throwable {
+        return Class.forName(VICTIM_CLASS_NAME, true, new VictimClassLoader());
+    }
+
+    public static ClassLoader newVictimClassLoader() throws Throwable {
+        return new VictimClassLoader();
+    }
+
+    public static void println(String line) {
+        if (!QUIET) {
+            System.out.println(line);
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/8003720/Victim.java	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+public class Victim implements Test8003720.CallMe {
+    public void callme() {
+        // note: Victim.this is dead here
+        Test8003720.println("executing in loader=" + Victim.class.getClassLoader());
+
+        long now = System.currentTimeMillis();
+
+        while ((System.currentTimeMillis() - now) < Test8003720.DURATION) {
+            long count = VictimClassLoader.counter++;
+            if (count %  1000000 == 0)  System.gc();
+            if (count % 16180000 == 0)  blurb();
+            new Object[1].clone();
+        }
+    }
+    static void blurb() {
+        Test8003720.println("count=" + VictimClassLoader.counter);
+    }
+    static {
+        blather();
+    }
+    static void blather() {
+        new java.util.ArrayList<Object>(1000000);
+        Class<Victim> c = Victim.class;
+        Test8003720.println("initializing " + c + "#" + System.identityHashCode(c) + " in " + c.getClassLoader());
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/8003720/VictimClassLoader.java	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+public class VictimClassLoader extends ClassLoader {
+    public static long counter = 0;
+
+    private int which = (int) ++counter;
+
+    protected VictimClassLoader() {
+        super(VictimClassLoader.class.getClassLoader());
+    }
+
+    protected Class loadClass(String name, boolean resolve) throws ClassNotFoundException {
+        Class c;
+        if (!name.endsWith("Victim")) {
+            c = super.loadClass(name, resolve);
+            return c;
+        }
+
+        c = findLoadedClass(name);
+        if (c != null) {
+            return c;
+        }
+
+        byte[] buf = readClassFile(name);
+        c = defineClass(name, buf, 0, buf.length);
+        resolveClass(c);
+
+        if (c.getClassLoader() != this) {
+            throw new AssertionError();
+        }
+
+        Test8003720.println("loaded " + c + "#" + System.identityHashCode(c) + " in " + c.getClassLoader());
+        return c;
+    }
+
+    static byte[] readClassFile(String name) {
+        try {
+            String rname = name.substring(name.lastIndexOf('.') + 1) + ".class";
+            java.net.URL url = VictimClassLoader.class.getResource(rname);
+            Test8003720.println("found " + rname + " = " + url);
+
+            java.net.URLConnection connection = url.openConnection();
+            int contentLength = connection.getContentLength();
+            byte[] buf = readFully(connection.getInputStream(), contentLength);
+
+            return Asmator.fixup(buf);
+        } catch (java.io.IOException ex) {
+            throw new Error(ex);
+        }
+    }
+
+    static byte[] readFully(java.io.InputStream in, int len) throws java.io.IOException {
+        // Warning here:
+        return sun.misc.IOUtils.readFully(in, len, true);
+    }
+
+    public void finalize() {
+        Test8003720.println("Goodbye from " + this);
+    }
+
+    public String toString() {
+        return "VictimClassLoader#" + which;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/sanity/ExecuteInternalVMTests.java	Sun Feb 03 22:43:57 2013 +0100
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/* @test ExecuteInternalVMTests
+ * @bug 8004691
+ * @summary Add a jtreg test that exercises the ExecuteInternalVMTests flag
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+ExecuteInternalVMTests ExecuteInternalVMTests
+ */
+public class ExecuteInternalVMTests {
+    public static void main(String[] args) throws Exception {
+        // The tests that are run are the HotSpot internal tests which are
+        // executed only when the flag -XX:+ExecuteInternalVMTests is used.
+
+        // The flag -XX:+ExecuteInternalVMTests can only be used for
+        // non-product builds of HotSpot. Therefore, the flag
+        // -XX:+IgnoreUnrecognizedVMOptions is also used, which means that this
+        // test will do nothing on a product build.
+    }
+}