changeset 6622:9e3ae661284d hs24-b21

Merge
author amurillo
date Fri, 24 Aug 2012 15:51:19 -0700
parents de2aa86e037d (current diff) c32dee9b8023 (diff)
children e8fb566b9466
files src/share/vm/gc_implementation/parNew/parGCAllocBuffer.cpp src/share/vm/gc_implementation/parNew/parGCAllocBuffer.hpp
diffstat 99 files changed, 13318 insertions(+), 2452 deletions(-) [+]
line wrap: on
line diff
--- a/make/hotspot_version	Thu Aug 23 12:27:33 2012 -0700
+++ b/make/hotspot_version	Fri Aug 24 15:51:19 2012 -0700
@@ -35,7 +35,7 @@
 
 HS_MAJOR_VER=24
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=20
+HS_BUILD_NUMBER=21
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=8
--- a/make/jprt.properties	Thu Aug 23 12:27:33 2012 -0700
+++ b/make/jprt.properties	Fri Aug 24 15:51:19 2012 -0700
@@ -38,7 +38,7 @@
 
 # This tells jprt what default release we want to build
 
-jprt.hotspot.default.release=jdk7
+jprt.hotspot.default.release=jdk8
 
 jprt.tools.default.release=${jprt.submit.option.release?${jprt.submit.option.release}:${jprt.hotspot.default.release}}
 
@@ -54,77 +54,77 @@
 # Define the Solaris platforms we want for the various releases
 jprt.my.solaris.sparc.jdk8=solaris_sparc_5.10
 jprt.my.solaris.sparc.jdk7=solaris_sparc_5.10
-jprt.my.solaris.sparc.jdk7u6=${jprt.my.solaris.sparc.jdk7}
+jprt.my.solaris.sparc.jdk7u8=${jprt.my.solaris.sparc.jdk7}
 jprt.my.solaris.sparc=${jprt.my.solaris.sparc.${jprt.tools.default.release}}
 
 jprt.my.solaris.sparcv9.jdk8=solaris_sparcv9_5.10
 jprt.my.solaris.sparcv9.jdk7=solaris_sparcv9_5.10
-jprt.my.solaris.sparcv9.jdk7u6=${jprt.my.solaris.sparcv9.jdk7}
+jprt.my.solaris.sparcv9.jdk7u8=${jprt.my.solaris.sparcv9.jdk7}
 jprt.my.solaris.sparcv9=${jprt.my.solaris.sparcv9.${jprt.tools.default.release}}
 
 jprt.my.solaris.i586.jdk8=solaris_i586_5.10
 jprt.my.solaris.i586.jdk7=solaris_i586_5.10
-jprt.my.solaris.i586.jdk7u6=${jprt.my.solaris.i586.jdk7}
+jprt.my.solaris.i586.jdk7u8=${jprt.my.solaris.i586.jdk7}
 jprt.my.solaris.i586=${jprt.my.solaris.i586.${jprt.tools.default.release}}
 
 jprt.my.solaris.x64.jdk8=solaris_x64_5.10
 jprt.my.solaris.x64.jdk7=solaris_x64_5.10
-jprt.my.solaris.x64.jdk7u6=${jprt.my.solaris.x64.jdk7}
+jprt.my.solaris.x64.jdk7u8=${jprt.my.solaris.x64.jdk7}
 jprt.my.solaris.x64=${jprt.my.solaris.x64.${jprt.tools.default.release}}
 
 jprt.my.linux.i586.jdk8=linux_i586_2.6
 jprt.my.linux.i586.jdk7=linux_i586_2.6
-jprt.my.linux.i586.jdk7u6=${jprt.my.linux.i586.jdk7}
+jprt.my.linux.i586.jdk7u8=${jprt.my.linux.i586.jdk7}
 jprt.my.linux.i586=${jprt.my.linux.i586.${jprt.tools.default.release}}
 
 jprt.my.linux.x64.jdk8=linux_x64_2.6
 jprt.my.linux.x64.jdk7=linux_x64_2.6
-jprt.my.linux.x64.jdk7u6=${jprt.my.linux.x64.jdk7}
+jprt.my.linux.x64.jdk7u8=${jprt.my.linux.x64.jdk7}
 jprt.my.linux.x64=${jprt.my.linux.x64.${jprt.tools.default.release}}
 
 jprt.my.linux.ppc.jdk8=linux_ppc_2.6
 jprt.my.linux.ppc.jdk7=linux_ppc_2.6
-jprt.my.linux.ppc.jdk7u6=${jprt.my.linux.ppc.jdk7}
+jprt.my.linux.ppc.jdk7u8=${jprt.my.linux.ppc.jdk7}
 jprt.my.linux.ppc=${jprt.my.linux.ppc.${jprt.tools.default.release}}
 
 jprt.my.linux.ppcv2.jdk8=linux_ppcv2_2.6
 jprt.my.linux.ppcv2.jdk7=linux_ppcv2_2.6
-jprt.my.linux.ppcv2.jdk7u6=${jprt.my.linux.ppcv2.jdk7}
+jprt.my.linux.ppcv2.jdk7u8=${jprt.my.linux.ppcv2.jdk7}
 jprt.my.linux.ppcv2=${jprt.my.linux.ppcv2.${jprt.tools.default.release}}
 
 jprt.my.linux.ppcsflt.jdk8=linux_ppcsflt_2.6
 jprt.my.linux.ppcsflt.jdk7=linux_ppcsflt_2.6
-jprt.my.linux.ppcsflt.jdk7u6=${jprt.my.linux.ppcsflt.jdk7}
+jprt.my.linux.ppcsflt.jdk7u8=${jprt.my.linux.ppcsflt.jdk7}
 jprt.my.linux.ppcsflt=${jprt.my.linux.ppcsflt.${jprt.tools.default.release}}
 
 jprt.my.linux.armvfp.jdk8=linux_armvfp_2.6
 jprt.my.linux.armvfp.jdk7=linux_armvfp_2.6
-jprt.my.linux.armvfp.jdk7u6=${jprt.my.linux.armvfp.jdk7}
+jprt.my.linux.armvfp.jdk7u8=${jprt.my.linux.armvfp.jdk7}
 jprt.my.linux.armvfp=${jprt.my.linux.armvfp.${jprt.tools.default.release}}
 
 jprt.my.linux.armv6.jdk8=linux_armv6_2.6
 jprt.my.linux.armv6.jdk7=linux_armv6_2.6
-jprt.my.linux.armv6.jdk7u6=${jprt.my.linux.armv6.jdk7}
+jprt.my.linux.armv6.jdk7u8=${jprt.my.linux.armv6.jdk7}
 jprt.my.linux.armv6=${jprt.my.linux.armv6.${jprt.tools.default.release}}
 
 jprt.my.linux.armsflt.jdk8=linux_armsflt_2.6
 jprt.my.linux.armsflt.jdk7=linux_armsflt_2.6
-jprt.my.linux.armsflt.jdk7u6=${jprt.my.linux.armsflt.jdk7}
+jprt.my.linux.armsflt.jdk7u8=${jprt.my.linux.armsflt.jdk7}
 jprt.my.linux.armsflt=${jprt.my.linux.armsflt.${jprt.tools.default.release}}
 
 jprt.my.macosx.x64.jdk8=macosx_x64_10.7
 jprt.my.macosx.x64.jdk7=macosx_x64_10.7
-jprt.my.macosx.x64.jdk7u6=${jprt.my.macosx.x64.jdk7}
+jprt.my.macosx.x64.jdk7u8=${jprt.my.macosx.x64.jdk7}
 jprt.my.macosx.x64=${jprt.my.macosx.x64.${jprt.tools.default.release}}
 
 jprt.my.windows.i586.jdk8=windows_i586_5.1
 jprt.my.windows.i586.jdk7=windows_i586_5.1
-jprt.my.windows.i586.jdk7u6=${jprt.my.windows.i586.jdk7}
+jprt.my.windows.i586.jdk7u8=${jprt.my.windows.i586.jdk7}
 jprt.my.windows.i586=${jprt.my.windows.i586.${jprt.tools.default.release}}
 
 jprt.my.windows.x64.jdk8=windows_x64_5.2
 jprt.my.windows.x64.jdk7=windows_x64_5.2
-jprt.my.windows.x64.jdk7u6=${jprt.my.windows.x64.jdk7}
+jprt.my.windows.x64.jdk7u8=${jprt.my.windows.x64.jdk7}
 jprt.my.windows.x64=${jprt.my.windows.x64.${jprt.tools.default.release}}
 
 # Standard list of jprt build targets for this source tree
@@ -159,7 +159,7 @@
 
 jprt.build.targets.jdk8=${jprt.build.targets.all}
 jprt.build.targets.jdk7=${jprt.build.targets.all}
-jprt.build.targets.jdk7u6=${jprt.build.targets.all}
+jprt.build.targets.jdk7u8=${jprt.build.targets.all}
 jprt.build.targets=${jprt.build.targets.${jprt.tools.default.release}}
 
 # Subset lists of test targets for this source tree
@@ -452,7 +452,7 @@
 
 jprt.test.targets.jdk8=${jprt.test.targets.standard}
 jprt.test.targets.jdk7=${jprt.test.targets.standard}
-jprt.test.targets.jdk7u6=${jprt.test.targets.jdk7}
+jprt.test.targets.jdk7u8=${jprt.test.targets.jdk7}
 jprt.test.targets=${jprt.test.targets.${jprt.tools.default.release}}
 
 # The default test/Makefile targets that should be run
@@ -512,7 +512,7 @@
 
 jprt.make.rule.test.targets.jdk8=${jprt.make.rule.test.targets.standard}
 jprt.make.rule.test.targets.jdk7=${jprt.make.rule.test.targets.standard}
-jprt.make.rule.test.targets.jdk7u6=${jprt.make.rule.test.targets.jdk7}
+jprt.make.rule.test.targets.jdk7u8=${jprt.make.rule.test.targets.jdk7}
 jprt.make.rule.test.targets=${jprt.make.rule.test.targets.${jprt.tools.default.release}}
 
 # 7155453: Work-around to prevent popups on OSX from blocking test completion
--- a/make/linux/makefiles/adlc.make	Thu Aug 23 12:27:33 2012 -0700
+++ b/make/linux/makefiles/adlc.make	Fri Aug 24 15:51:19 2012 -0700
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -133,8 +133,10 @@
 # Note that product files are updated via "mv", which is atomic.
 TEMPDIR := $(OUTDIR)/mktmp$(shell echo $$$$)
 
-# Debuggable by default
-CFLAGS += -g
+ifneq ($(DEBUG_BINARIES), true)
+  # Debuggable by default (unless already done by DEBUG_BINARIES)
+  CFLAGS += -g
+endif
 
 # Pass -D flags into ADLC.
 ADLCFLAGS += $(SYSDEFS)
--- a/make/linux/makefiles/gcc.make	Thu Aug 23 12:27:33 2012 -0700
+++ b/make/linux/makefiles/gcc.make	Fri Aug 24 15:51:19 2012 -0700
@@ -215,47 +215,46 @@
 #------------------------------------------------------------------------
 # Debug flags
 
-# Use the stabs format for debugging information (this is the default
-# on gcc-2.91). It's good enough, has all the information about line
-# numbers and local variables, and libjvm_g.so is only about 16M.
-# Change this back to "-g" if you want the most expressive format.
-# (warning: that could easily inflate libjvm_g.so to 150M!)
-# Note: The Itanium gcc compiler crashes when using -gstabs.
-DEBUG_CFLAGS/ia64  = -g
-DEBUG_CFLAGS/amd64 = -g
-DEBUG_CFLAGS/arm   = -g
-DEBUG_CFLAGS/ppc   = -g
-DEBUG_CFLAGS += $(DEBUG_CFLAGS/$(BUILDARCH))
-ifeq ($(DEBUG_CFLAGS/$(BUILDARCH)),)
-DEBUG_CFLAGS += -gstabs
-endif
-
-ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
-  FASTDEBUG_CFLAGS/ia64  = -g
-  FASTDEBUG_CFLAGS/amd64 = -g
-  FASTDEBUG_CFLAGS/arm   = -g
-  FASTDEBUG_CFLAGS/ppc   = -g
-  FASTDEBUG_CFLAGS += $(DEBUG_CFLAGS/$(BUILDARCH))
-  ifeq ($(FASTDEBUG_CFLAGS/$(BUILDARCH)),)
-    FASTDEBUG_CFLAGS += -gstabs
+# DEBUG_BINARIES uses full -g debug information for all configs
+ifeq ($(DEBUG_BINARIES), true)
+  CFLAGS += -g
+else
+  # Use the stabs format for debugging information (this is the default
+  # on gcc-2.91). It's good enough, has all the information about line
+  # numbers and local variables, and libjvm_g.so is only about 16M.
+  # Change this back to "-g" if you want the most expressive format.
+  # (warning: that could easily inflate libjvm_g.so to 150M!)
+  # Note: The Itanium gcc compiler crashes when using -gstabs.
+  DEBUG_CFLAGS/ia64  = -g
+  DEBUG_CFLAGS/amd64 = -g
+  DEBUG_CFLAGS/arm   = -g
+  DEBUG_CFLAGS/ppc   = -g
+  DEBUG_CFLAGS += $(DEBUG_CFLAGS/$(BUILDARCH))
+  ifeq ($(DEBUG_CFLAGS/$(BUILDARCH)),)
+    DEBUG_CFLAGS += -gstabs
   endif
-
-  OPT_CFLAGS/ia64  = -g
-  OPT_CFLAGS/amd64 = -g
-  OPT_CFLAGS/arm   = -g
-  OPT_CFLAGS/ppc   = -g
-  OPT_CFLAGS += $(OPT_CFLAGS/$(BUILDARCH))
-  ifeq ($(OPT_CFLAGS/$(BUILDARCH)),)
-    OPT_CFLAGS += -gstabs
+  
+  ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
+    FASTDEBUG_CFLAGS/ia64  = -g
+    FASTDEBUG_CFLAGS/amd64 = -g
+    FASTDEBUG_CFLAGS/arm   = -g
+    FASTDEBUG_CFLAGS/ppc   = -g
+    FASTDEBUG_CFLAGS += $(DEBUG_CFLAGS/$(BUILDARCH))
+    ifeq ($(FASTDEBUG_CFLAGS/$(BUILDARCH)),)
+      FASTDEBUG_CFLAGS += -gstabs
+    endif
+  
+    OPT_CFLAGS/ia64  = -g
+    OPT_CFLAGS/amd64 = -g
+    OPT_CFLAGS/arm   = -g
+    OPT_CFLAGS/ppc   = -g
+    OPT_CFLAGS += $(OPT_CFLAGS/$(BUILDARCH))
+    ifeq ($(OPT_CFLAGS/$(BUILDARCH)),)
+      OPT_CFLAGS += -gstabs
+    endif
   endif
 endif
 
-# DEBUG_BINARIES overrides everything, use full -g debug information
-ifeq ($(DEBUG_BINARIES), true)
-  DEBUG_CFLAGS = -g
-  CFLAGS += $(DEBUG_CFLAGS)
-endif
-
 # If we are building HEADLESS, pass on to VM
 # so it can set the java.awt.headless property
 ifdef HEADLESS
--- a/make/windows/makefiles/defs.make	Thu Aug 23 12:27:33 2012 -0700
+++ b/make/windows/makefiles/defs.make	Fri Aug 24 15:51:19 2012 -0700
@@ -188,14 +188,22 @@
   MAKE_ARGS += JDK_BUILD_NUMBER=$(COOKED_BUILD_NUMBER)
 endif
 
-NMAKE= MAKEFLAGS= MFLAGS= nmake /NOLOGO
+NMAKE= MAKEFLAGS= MFLAGS= nmake -NOLOGO
+ifndef SYSTEM_UNAME
+  SYSTEM_UNAME := $(shell uname)
+  export SYSTEM_UNAME
+endif
 
 # Check for CYGWIN
-ifneq (,$(findstring CYGWIN,$(shell uname)))
+ifneq (,$(findstring CYGWIN,$(SYSTEM_UNAME)))
   USING_CYGWIN=true
 else
   USING_CYGWIN=false
 endif
+# Check for MinGW
+ifneq (,$(findstring MINGW,$(SYSTEM_UNAME)))
+  USING_MINGW=true
+endif
 # FIXUP: The subdirectory for a debug build is NOT the same on all platforms
 VM_DEBUG=debug
 
@@ -208,11 +216,16 @@
   ABS_BOOTDIR     := $(subst /,\\,$(shell /bin/cygpath -m -a "$(BOOTDIR)"))
   ABS_GAMMADIR    := $(subst /,\\,$(shell /bin/cygpath -m -a "$(GAMMADIR)"))
   ABS_OS_MAKEFILE := $(shell /bin/cygpath -m -a "$(HS_MAKE_DIR)/$(OSNAME)")/build.make
-else
-  ABS_OUTPUTDIR   := $(subst /,\\,$(shell $(CD) $(OUTPUTDIR);$(PWD)))
-  ABS_BOOTDIR     := $(subst /,\\,$(shell $(CD) $(BOOTDIR);$(PWD)))
-  ABS_GAMMADIR    := $(subst /,\\,$(shell $(CD) $(GAMMADIR);$(PWD)))
-  ABS_OS_MAKEFILE := $(subst /,\\,$(shell $(CD) $(HS_MAKE_DIR)/$(OSNAME);$(PWD))/build.make)
+else ifeq ($(USING_MINGW), true)
+    ABS_OUTPUTDIR   := $(shell $(CD) $(OUTPUTDIR);$(PWD))
+    ABS_BOOTDIR     := $(shell $(CD) $(BOOTDIR);$(PWD))
+    ABS_GAMMADIR    := $(shell $(CD) $(GAMMADIR);$(PWD))
+    ABS_OS_MAKEFILE := $(shell $(CD) $(HS_MAKE_DIR)/$(OSNAME);$(PWD))/build.make
+  else
+    ABS_OUTPUTDIR   := $(subst /,\\,$(shell $(CD) $(OUTPUTDIR);$(PWD)))
+    ABS_BOOTDIR     := $(subst /,\\,$(shell $(CD) $(BOOTDIR);$(PWD)))
+    ABS_GAMMADIR    := $(subst /,\\,$(shell $(CD) $(GAMMADIR);$(PWD)))
+    ABS_OS_MAKEFILE := $(subst /,\\,$(shell $(CD) $(HS_MAKE_DIR)/$(OSNAME);$(PWD))/build.make)
 endif
 
 # Disable building SA on windows until we are sure
--- a/make/windows/makefiles/rules.make	Thu Aug 23 12:27:33 2012 -0700
+++ b/make/windows/makefiles/rules.make	Fri Aug 24 15:51:19 2012 -0700
@@ -23,14 +23,15 @@
 #
 
 # These are the commands used externally to compile and run.
-
+# The \ are used here for traditional Windows apps and " quoted to get
+# past the Unix-like shell:
 !ifdef BootStrapDir
-RUN_JAVA=$(BootStrapDir)\bin\java
-RUN_JAVAP=$(BootStrapDir)\bin\javap
-RUN_JAVAH=$(BootStrapDir)\bin\javah
-RUN_JAR=$(BootStrapDir)\bin\jar
-COMPILE_JAVAC=$(BootStrapDir)\bin\javac $(BOOTSTRAP_JAVAC_FLAGS)
-COMPILE_RMIC=$(BootStrapDir)\bin\rmic
+RUN_JAVA="$(BootStrapDir)\bin\java"
+RUN_JAVAP="$(BootStrapDir)\bin\javap"
+RUN_JAVAH="$(BootStrapDir)\bin\javah"
+RUN_JAR="$(BootStrapDir)\bin\jar"
+COMPILE_JAVAC="$(BootStrapDir)\bin\javac" $(BOOTSTRAP_JAVAC_FLAGS)
+COMPILE_RMIC="$(BootStrapDir)\bin\rmic"
 BOOT_JAVA_HOME=$(BootStrapDir)
 !else
 RUN_JAVA=java
--- a/make/windows/makefiles/sa.make	Thu Aug 23 12:27:33 2012 -0700
+++ b/make/windows/makefiles/sa.make	Fri Aug 24 15:51:19 2012 -0700
@@ -36,37 +36,37 @@
 !include $(WorkSpace)/make/windows/makefiles/rules.make
 !include $(WorkSpace)/make/sa.files
 
-GENERATED = ..\generated
+GENERATED = ../generated
 
 # tools.jar is needed by the JDI - SA binding
-SA_CLASSPATH = $(BOOT_JAVA_HOME)\lib\tools.jar
+SA_CLASSPATH = $(BOOT_JAVA_HOME)/lib/tools.jar
 
-SA_CLASSDIR = $(GENERATED)\saclasses
+SA_CLASSDIR = $(GENERATED)/saclasses
 
 SA_BUILD_VERSION_PROP = sun.jvm.hotspot.runtime.VM.saBuildVersion=$(SA_BUILD_VERSION)
 
-SA_PROPERTIES = $(SA_CLASSDIR)\sa.properties
+SA_PROPERTIES = $(SA_CLASSDIR)/sa.properties
 
-default::  $(GENERATED)\sa-jdi.jar
+default::  $(GENERATED)/sa-jdi.jar
 
 # Remove the space between $(SA_BUILD_VERSION_PROP) and > below as it adds a white space
 # at the end of SA version string and causes a version mismatch with the target VM version.
 
-$(GENERATED)\sa-jdi.jar: $(AGENT_FILES:/=\)
-	@if not exist $(SA_CLASSDIR) mkdir $(SA_CLASSDIR)
-	@echo ...Building sa-jdi.jar
+$(GENERATED)/sa-jdi.jar: $(AGENT_FILES)
+	$(QUIETLY) mkdir -p $(SA_CLASSDIR)
+	@echo ...Building sa-jdi.jar into $(SA_CLASSDIR)
 	@echo ...$(COMPILE_JAVAC) -classpath $(SA_CLASSPATH) -d $(SA_CLASSDIR) ....
-	@$(COMPILE_JAVAC) -classpath $(SA_CLASSPATH) -sourcepath $(AGENT_SRC_DIR) -d $(SA_CLASSDIR) $(AGENT_FILES:/=\)
+	@$(COMPILE_JAVAC) -classpath $(SA_CLASSPATH) -sourcepath $(AGENT_SRC_DIR) -d $(SA_CLASSDIR) $(AGENT_FILES)
 	$(COMPILE_RMIC) -classpath $(SA_CLASSDIR) -d $(SA_CLASSDIR) sun.jvm.hotspot.debugger.remote.RemoteDebuggerServer
 	$(QUIETLY) echo $(SA_BUILD_VERSION_PROP)> $(SA_PROPERTIES)
 	$(QUIETLY) rm -f $(SA_CLASSDIR)/sun/jvm/hotspot/utilities/soql/sa.js
 	$(QUIETLY) cp $(AGENT_SRC_DIR)/sun/jvm/hotspot/utilities/soql/sa.js $(SA_CLASSDIR)/sun/jvm/hotspot/utilities/soql
 	$(QUIETLY) rm -rf $(SA_CLASSDIR)/sun/jvm/hotspot/ui/resources
-	$(QUIETLY) mkdir $(SA_CLASSDIR)\sun\jvm\hotspot\ui\resources
+	$(QUIETLY) mkdir $(SA_CLASSDIR)/sun/jvm/hotspot/ui/resources
 	$(QUIETLY) cp $(AGENT_SRC_DIR)/sun/jvm/hotspot/ui/resources/*.png $(SA_CLASSDIR)/sun/jvm/hotspot/ui/resources
 	$(QUIETLY) cp -r $(AGENT_SRC_DIR)/images/* $(SA_CLASSDIR)
 	$(RUN_JAR) cf $@ -C $(SA_CLASSDIR) .
-	$(RUN_JAR) uf $@ -C $(AGENT_SRC_DIR:/=\) META-INF\services\com.sun.jdi.connect.Connector
+	$(RUN_JAR) uf $@ -C $(AGENT_SRC_DIR) META-INF/services/com.sun.jdi.connect.Connector
 	$(RUN_JAVAH) -classpath $(SA_CLASSDIR) -jni sun.jvm.hotspot.debugger.windbg.WindbgDebuggerLocal
 	$(RUN_JAVAH) -classpath $(SA_CLASSDIR) -jni sun.jvm.hotspot.debugger.x86.X86ThreadContext 
 	$(RUN_JAVAH) -classpath $(SA_CLASSDIR) -jni sun.jvm.hotspot.debugger.ia64.IA64ThreadContext 
@@ -85,27 +85,27 @@
 # will be useful to have the assertion checks in place
 
 !if "$(BUILDARCH)" == "ia64"
-SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 $(GX_OPTION) /Od /D "WIN32" /D "WIN64" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
+SA_CFLAGS = -nologo $(MS_RUNTIME_OPTION) -W3 $(GX_OPTION) -Od -D "WIN32" -D "WIN64" -D "_WINDOWS" -D "_DEBUG" -D "_CONSOLE" -D "_MBCS" -YX -FD -c
 !elseif "$(BUILDARCH)" == "amd64"
-SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 $(GX_OPTION) /Od /D "WIN32" /D "WIN64" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
+SA_CFLAGS = -nologo $(MS_RUNTIME_OPTION) -W3 $(GX_OPTION) -Od -D "WIN32" -D "WIN64" -D "_WINDOWS" -D "_DEBUG" -D "_CONSOLE" -D "_MBCS" -YX -FD -c
 !if "$(COMPILER_NAME)" == "VS2005"
 # On amd64, VS2005 compiler requires bufferoverflowU.lib on the link command line, 
 # otherwise we get missing __security_check_cookie externals at link time. 
 SA_LD_FLAGS = bufferoverflowU.lib
 !endif
 !else
-SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 /Gm $(GX_OPTION) /Od /D "WIN32" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c
+SA_CFLAGS = -nologo $(MS_RUNTIME_OPTION) -W3 -Gm $(GX_OPTION) -Od -D "WIN32" -D "_WINDOWS" -D "_DEBUG" -D "_CONSOLE" -D "_MBCS" -YX -FD -GZ -c
 !if "$(ENABLE_FULL_DEBUG_SYMBOLS)" == "1"
-SA_CFLAGS = $(SA_CFLAGS) /ZI
+SA_CFLAGS = $(SA_CFLAGS) -ZI
 !endif
 !endif
 !if "$(MT)" != ""
-SA_LD_FLAGS = /manifest $(SA_LD_FLAGS)
+SA_LD_FLAGS = -manifest $(SA_LD_FLAGS)
 !endif
 SASRCFILE = $(AGENT_DIR)/src/os/win32/windbg/sawindbg.cpp
-SA_LFLAGS = $(SA_LD_FLAGS) /nologo /subsystem:console /machine:$(MACHINE)
+SA_LFLAGS = $(SA_LD_FLAGS) -nologo -subsystem:console -machine:$(MACHINE)
 !if "$(ENABLE_FULL_DEBUG_SYMBOLS)" == "1"
-SA_LFLAGS = $(SA_LFLAGS) /map /debug
+SA_LFLAGS = $(SA_LFLAGS) -map -debug
 !endif
 
 # Note that we do not keep sawindbj.obj around as it would then
@@ -117,15 +117,15 @@
 $(SAWINDBG): $(SASRCFILE)
 	set INCLUDE=$(SA_INCLUDE)$(INCLUDE)
 	$(CXX) @<<
-	  /I"$(BootStrapDir)/include" /I"$(BootStrapDir)/include/win32" 
-	  /I"$(GENERATED)" $(SA_CFLAGS)
+	  -I"$(BootStrapDir)/include" -I"$(BootStrapDir)/include/win32" 
+	  -I"$(GENERATED)" $(SA_CFLAGS)
 	  $(SASRCFILE)
-	  /out:$*.obj
+	  -out:$*.obj
 <<
 	set LIB=$(SA_LIB)$(LIB)
-	$(LD) /out:$@ /DLL $*.obj dbgeng.lib $(SA_LFLAGS)
+	$(LD) -out:$@ -DLL $*.obj dbgeng.lib $(SA_LFLAGS)
 !if "$(MT)" != ""
-	$(MT) /manifest $(@F).manifest /outputresource:$(@F);#2
+	$(MT) -manifest $(@F).manifest -outputresource:$(@F);#2
 !endif
 !if "$(ENABLE_FULL_DEBUG_SYMBOLS)" == "1"
 !if "$(ZIP_DEBUGINFO_FILES)" == "1"
@@ -136,6 +136,6 @@
 	-@rm -f $*.obj
 
 cleanall :
-	rm -rf $(GENERATED:\=/)/saclasses
-	rm -rf $(GENERATED:\=/)/sa-jdi.jar
+	rm -rf $(GENERATED)/saclasses
+	rm -rf $(GENERATED)/sa-jdi.jar
 !endif
--- a/make/windows/makefiles/shared.make	Thu Aug 23 12:27:33 2012 -0700
+++ b/make/windows/makefiles/shared.make	Fri Aug 24 15:51:19 2012 -0700
@@ -36,11 +36,12 @@
 
 
 !ifdef SUBDIRS
+# \ is used below because $(MAKE) is nmake here, which expects Windows paths
 $(SUBDIRS): FORCE
 	@if not exist $@ mkdir $@
-	@if not exist $@\local.make echo # Empty > $@\local.make
-	@echo nmake $(ACTION) in $(DIR)\$@
-	cd $@ && $(MAKE) /NOLOGO /f $(WorkSpace)\make\windows\makefiles\$@.make $(ACTION) DIR=$(DIR)\$@ BUILD_FLAVOR=$(BUILD_FLAVOR)
+	@if not exist $@/local.make echo # Empty > $@/local.make
+	@echo nmake $(ACTION) in $(DIR)/$@
+	cd $@ && $(MAKE) -NOLOGO -f $(WorkSpace)\make\windows\makefiles\$@.make $(ACTION) DIR=$(DIR)\$@ BUILD_FLAVOR=$(BUILD_FLAVOR)
 !endif
 
 # Creates the needed directory
--- a/make/windows/projectfiles/common/Makefile	Thu Aug 23 12:27:33 2012 -0700
+++ b/make/windows/projectfiles/common/Makefile	Fri Aug 24 15:51:19 2012 -0700
@@ -108,7 +108,7 @@
       -define              HOTSPOT_VM_DISTRO=\\\"$(HOTSPOT_VM_DISTRO)\\\"
 
 $(HOTSPOTBUILDSPACE)/$(ProjectFile): $(HOTSPOTBUILDSPACE)/classes/ProjectCreator.class
-	@$(RUN_JAVA) -Djava.class.path=$(HOTSPOTBUILDSPACE)/classes ProjectCreator WinGammaPlatform$(VcVersion) $(ProjectCreatorIDEOptions)
+	@$(RUN_JAVA) -Djava.class.path="$(HOTSPOTBUILDSPACE)/classes" ProjectCreator WinGammaPlatform$(VcVersion) $(ProjectCreatorIDEOptions)
 
 clean:
 	@rm -rf $(HOTSPOTBUILDSPACE)/classes
--- a/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -435,85 +435,6 @@
 
 }
 
-void G1UnsafeGetObjSATBBarrierStub::emit_code(LIR_Assembler* ce) {
-  // At this point we know that offset == referent_offset.
-  //
-  // So we might have to emit:
-  //   if (src == null) goto continuation.
-  //
-  // and we definitely have to emit:
-  //   if (klass(src).reference_type == REF_NONE) goto continuation
-  //   if (!marking_active) goto continuation
-  //   if (pre_val == null) goto continuation
-  //   call pre_barrier(pre_val)
-  //   goto continuation
-  //
-  __ bind(_entry);
-
-  assert(src()->is_register(), "sanity");
-  Register src_reg = src()->as_register();
-
-  if (gen_src_check()) {
-    // The original src operand was not a constant.
-    // Generate src == null?
-    if (__ is_in_wdisp16_range(_continuation)) {
-      __ br_null(src_reg, /*annul*/false, Assembler::pt, _continuation);
-    } else {
-      __ cmp(src_reg, G0);
-      __ brx(Assembler::equal, false, Assembler::pt, _continuation);
-    }
-    __ delayed()->nop();
-  }
-
-  // Generate src->_klass->_reference_type() == REF_NONE)?
-  assert(tmp()->is_register(), "sanity");
-  Register tmp_reg = tmp()->as_register();
-
-  __ load_klass(src_reg, tmp_reg);
-
-  Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset());
-  __ ldub(ref_type_adr, tmp_reg);
-
-  // _reference_type field is of type ReferenceType (enum)
-  assert(REF_NONE == 0, "check this code");
-  __ cmp_zero_and_br(Assembler::equal, tmp_reg, _continuation, /*annul*/false, Assembler::pt);
-  __ delayed()->nop();
-
-  // Is marking active?
-  assert(thread()->is_register(), "precondition");
-  Register thread_reg = thread()->as_pointer_register();
-
-  Address in_progress(thread_reg, in_bytes(JavaThread::satb_mark_queue_offset() +
-                                       PtrQueue::byte_offset_of_active()));
-
-  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
-    __ ld(in_progress, tmp_reg);
-  } else {
-    assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
-    __ ldsb(in_progress, tmp_reg);
-  }
-
-  __ cmp_zero_and_br(Assembler::equal, tmp_reg, _continuation, /*annul*/false, Assembler::pt);
-  __ delayed()->nop();
-
-  // val == null?
-  assert(val()->is_register(), "Precondition.");
-  Register val_reg = val()->as_register();
-
-  if (__ is_in_wdisp16_range(_continuation)) {
-    __ br_null(val_reg, /*annul*/false, Assembler::pt, _continuation);
-  } else {
-    __ cmp(val_reg, G0);
-    __ brx(Assembler::equal, false, Assembler::pt, _continuation);
-  }
-  __ delayed()->nop();
-
-  __ call(Runtime1::entry_for(Runtime1::Runtime1::g1_pre_barrier_slow_id));
-  __ delayed()->mov(val_reg, G4);
-  __ br(Assembler::always, false, Assembler::pt, _continuation);
-  __ delayed()->nop();
-}
-
 jbyte* G1PostBarrierStub::_byte_map_base = NULL;
 
 jbyte* G1PostBarrierStub::byte_map_base_slow() {
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -106,10 +106,10 @@
     if (FLAG_IS_DEFAULT(OptoLoopAlignment)) {
       FLAG_SET_DEFAULT(OptoLoopAlignment, 4);
     }
-    // When using CMS, we cannot use memset() in BOT updates because
-    // the sun4v/CMT version in libc_psr uses BIS which exposes
-    // "phantom zeros" to concurrent readers. See 6948537.
-    if (FLAG_IS_DEFAULT(UseMemSetInBOT) && UseConcMarkSweepGC) {
+    // When using CMS or G1, we cannot use memset() in BOT updates
+    // because the sun4v/CMT version in libc_psr uses BIS which
+    // exposes "phantom zeros" to concurrent readers. See 6948537.
+    if (FLAG_IS_DEFAULT(UseMemSetInBOT) && (UseConcMarkSweepGC || UseG1GC)) {
       FLAG_SET_DEFAULT(UseMemSetInBOT, false);
     }
 #ifdef _LP64
--- a/src/cpu/x86/vm/assembler_x86.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -999,32 +999,22 @@
 
 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x58);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::addsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x58);
-  emit_operand(dst, src);
+  emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::addss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x58);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::addss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x58);
-  emit_operand(dst, src);
+  emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::andl(Address dst, int32_t imm32) {
@@ -1052,36 +1042,6 @@
   emit_arith(0x23, 0xC0, dst, src);
 }
 
-void Assembler::andpd(XMMRegister dst, Address src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x54);
-  emit_operand(dst, src);
-}
-
-void Assembler::andpd(XMMRegister dst, XMMRegister src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x54);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::andps(XMMRegister dst, Address src) {
-  NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_NONE);
-  emit_byte(0x54);
-  emit_operand(dst, src);
-}
-
-void Assembler::andps(XMMRegister dst, XMMRegister src) {
-  NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE);
-  emit_byte(0x54);
-  emit_byte(0xC0 | encode);
-}
-
 void Assembler::bsfl(Register dst, Register src) {
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
   emit_byte(0x0F);
@@ -1246,61 +1206,42 @@
   // NOTE: dbx seems to decode this as comiss even though the
   // 0x66 is there. Strangly ucomisd comes out correct
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_66);
-  emit_byte(0x2F);
-  emit_operand(dst, src);
+  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::comisd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
-  emit_byte(0x2F);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::comiss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_NONE);
-  emit_byte(0x2F);
-  emit_operand(dst, src);
+  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
 }
 
 void Assembler::comiss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
-  emit_byte(0x2F);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
 }
 
 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
-  emit_byte(0xE6);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
-  emit_byte(0x5B);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x5B, dst, src, VEX_SIMD_NONE);
 }
 
 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x5A);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x5A);
-  emit_operand(dst, src);
+  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
@@ -1312,10 +1253,7 @@
 
 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x2A);
-  emit_operand(dst, src);
+  emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
@@ -1327,25 +1265,17 @@
 
 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x2A);
-  emit_operand(dst, src);
+  emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x5A);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::cvtss2sd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x5A);
-  emit_operand(dst, src);
+  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
 }
 
 
@@ -1373,32 +1303,22 @@
 
 void Assembler::divsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x5E);
-  emit_operand(dst, src);
+  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x5E);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::divss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x5E);
-  emit_operand(dst, src);
+  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::divss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x5E);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::emms() {
@@ -1634,16 +1554,12 @@
 
 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
-  emit_byte(0x28);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
-  emit_byte(0x28);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_NONE);
 }
 
 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
@@ -1712,24 +1628,17 @@
 
 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
-  emit_byte(0x6F);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::movdqu(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_F3);
-  emit_byte(0x6F);
-  emit_operand(dst, src);
+  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
-  emit_byte(0x6F);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::movdqu(Address dst, XMMRegister src) {
@@ -1810,10 +1719,7 @@
 // The selection is done in MacroAssembler::movdbl() and movflt().
 void Assembler::movlpd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x12);
-  emit_operand(dst, src);
+  emit_simd_arith(0x12, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::movq( MMXRegister dst, Address src ) {
@@ -1870,17 +1776,12 @@
 
 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x10);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x10, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::movsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_F2);
-  emit_byte(0x10);
-  emit_operand(dst, src);
+  emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::movsd(Address dst, XMMRegister src) {
@@ -1893,17 +1794,12 @@
 
 void Assembler::movss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x10);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x10, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::movss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_F3);
-  emit_byte(0x10);
-  emit_operand(dst, src);
+  emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::movss(Address dst, XMMRegister src) {
@@ -2001,32 +1897,22 @@
 
 void Assembler::mulsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x59);
-  emit_operand(dst, src);
+  emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x59);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::mulss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x59);
-  emit_operand(dst, src);
+  emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x59);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::negl(Register dst) {
@@ -2315,17 +2201,12 @@
 void Assembler::packuswb(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x67);
-  emit_operand(dst, src);
+  emit_simd_arith(0x67, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x67);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x67, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
@@ -2339,7 +2220,7 @@
 
 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_2(), "");
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
   emit_byte(0x61);
   emit_byte(0xC0 | encode);
   emit_byte(imm8);
@@ -2355,7 +2236,7 @@
 
 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
   emit_byte(0x30);
   emit_byte(0xC0 | encode);
 }
@@ -2456,28 +2337,10 @@
   a_byte(p);
 }
 
-void Assembler::por(XMMRegister dst, XMMRegister src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0xEB);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::por(XMMRegister dst, Address src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0xEB);
-  emit_operand(dst, src);
-}
-
 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
-  emit_byte(0x70);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66);
   emit_byte(mode & 0xFF);
 
 }
@@ -2496,9 +2359,7 @@
 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2);
-  emit_byte(0x70);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2);
   emit_byte(mode & 0xFF);
 }
 
@@ -2513,18 +2374,6 @@
   emit_byte(mode & 0xFF);
 }
 
-void Assembler::psrlq(XMMRegister dst, int shift) {
-  // Shift 64 bit value logically right by specified number of bits.
-  // HMM Table D-1 says sse2 or mmx.
-  // Do not confuse it with psrldq SSE2 instruction which
-  // shifts 128 bit value in xmm register by number of bytes.
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
-  emit_byte(0x73);
-  emit_byte(0xC0 | encode);
-  emit_byte(shift);
-}
-
 void Assembler::psrldq(XMMRegister dst, int shift) {
   // Shift 128 bit value in xmm register by number of bytes.
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
@@ -2545,7 +2394,7 @@
 
 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
   emit_byte(0x17);
   emit_byte(0xC0 | encode);
 }
@@ -2553,40 +2402,28 @@
 void Assembler::punpcklbw(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x60);
-  emit_operand(dst, src);
+  emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x60);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::punpckldq(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x62);
-  emit_operand(dst, src);
+  emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x62);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x6C);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x6C, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::push(int32_t imm32) {
@@ -2616,22 +2453,6 @@
 }
 #endif
 
-void Assembler::pxor(XMMRegister dst, Address src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0xEF);
-  emit_operand(dst, src);
-}
-
-void Assembler::pxor(XMMRegister dst, XMMRegister src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0xEF);
-  emit_byte(0xC0 | encode);
-}
-
 void Assembler::rcll(Register dst, int imm8) {
   assert(isShiftCount(imm8), "illegal shift count");
   int encode = prefix_and_encode(dst->encoding());
@@ -2790,32 +2611,22 @@
 
 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x51);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::sqrtsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x51);
-  emit_operand(dst, src);
+  emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x51);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::sqrtss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x51);
-  emit_operand(dst, src);
+  emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::stmxcsr( Address dst) {
@@ -2865,32 +2676,22 @@
 
 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x5C);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::subsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x5C);
-  emit_operand(dst, src);
+  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::subss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x5C);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::subss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x5C);
-  emit_operand(dst, src);
+  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::testb(Register dst, int imm8) {
@@ -2928,32 +2729,22 @@
 
 void Assembler::ucomisd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_66);
-  emit_byte(0x2E);
-  emit_operand(dst, src);
+  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
-  emit_byte(0x2E);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::ucomiss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_NONE);
-  emit_byte(0x2E);
-  emit_operand(dst, src);
+  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);
 }
 
 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
-  emit_byte(0x2E);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);
 }
 
 
@@ -2995,211 +2786,714 @@
   emit_arith(0x33, 0xC0, dst, src);
 }
 
-void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x57);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::xorpd(XMMRegister dst, Address src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x57);
-  emit_operand(dst, src);
-}
-
-
-void Assembler::xorps(XMMRegister dst, XMMRegister src) {
-  NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE);
-  emit_byte(0x57);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::xorps(XMMRegister dst, Address src) {
-  NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_NONE);
-  emit_byte(0x57);
-  emit_operand(dst, src);
-}
-
-// AVX 3-operands non destructive source instructions (encoded with VEX prefix)
+
+// AVX 3-operands scalar float-point arithmetic instructions
 
 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_F2);
-  emit_byte(0x58);
-  emit_operand(dst, src);
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
 }
 
 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
-  emit_byte(0x58);
-  emit_byte(0xC0 | encode);
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
 }
 
 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_F3);
-  emit_byte(0x58);
-  emit_operand(dst, src);
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
 }
 
 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
-  emit_byte(0x58);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src) {
-  assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector
-  emit_byte(0x54);
-  emit_operand(dst, src);
-}
-
-void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src) {
-  assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector
-  emit_byte(0x54);
-  emit_operand(dst, src);
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
 }
 
 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_F2);
-  emit_byte(0x5E);
-  emit_operand(dst, src);
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
 }
 
 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
-  emit_byte(0x5E);
-  emit_byte(0xC0 | encode);
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
 }
 
 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_F3);
-  emit_byte(0x5E);
-  emit_operand(dst, src);
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
 }
 
 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
-  emit_byte(0x5E);
-  emit_byte(0xC0 | encode);
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
 }
 
 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_F2);
-  emit_byte(0x59);
-  emit_operand(dst, src);
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
 }
 
 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
-  emit_byte(0x59);
-  emit_byte(0xC0 | encode);
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
 }
 
 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_F3);
-  emit_byte(0x59);
-  emit_operand(dst, src);
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
 }
 
 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
-  emit_byte(0x59);
-  emit_byte(0xC0 | encode);
-}
-
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+}
 
 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_F2);
-  emit_byte(0x5C);
-  emit_operand(dst, src);
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
 }
 
 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
-  emit_byte(0x5C);
-  emit_byte(0xC0 | encode);
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
 }
 
 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_F3);
-  emit_byte(0x5C);
-  emit_operand(dst, src);
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
 }
 
 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
-  emit_byte(0x5C);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src) {
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+}
+
+//====================VECTOR ARITHMETIC=====================================
+
+// Float-point vector arithmetic
+
+void Assembler::addpd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x58, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::addps(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::subpd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x5C, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::subps(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::mulps(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
   assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector
-  emit_byte(0x57);
-  emit_operand(dst, src);
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::divpd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x5E, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::divps(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::andpd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x54, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::andps(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::andps(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::andpd(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x54, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x57, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::xorps(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::xorpd(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x57, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::xorps(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE);
 }
 
 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
   assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256);
-  emit_byte(0x57);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src) {
-  assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector
-  emit_byte(0x57);
-  emit_operand(dst, src);
+  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256);
 }
 
 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
   assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_NONE, vector256);
-  emit_byte(0x57);
+  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+
+// Integer vector arithmetic
+void Assembler::paddb(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xFC, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::paddw(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xFD, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::paddd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xFE, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::paddq(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xD4, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::psubb(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xF8, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::psubw(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xF9, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::psubd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xFA, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::psubq(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xFB, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xD5, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
+  assert(VM_Version::supports_sse4_1(), "");
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  emit_byte(0x40);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
+  emit_byte(0x40);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  InstructionMark im(this);
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
+  emit_byte(0x40);
+  emit_operand(dst, src);
+}
+
+// Shift packed integers left by specified number of bits.
+void Assembler::psllw(XMMRegister dst, int shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // XMM6 is for /6 encoding: 66 0F 71 /6 ib
+  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
+  emit_byte(0x71);
+  emit_byte(0xC0 | encode);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::pslld(XMMRegister dst, int shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // XMM6 is for /6 encoding: 66 0F 72 /6 ib
+  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
+  emit_byte(0x72);
+  emit_byte(0xC0 | encode);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::psllq(XMMRegister dst, int shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // XMM6 is for /6 encoding: 66 0F 73 /6 ib
+  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
+  emit_byte(0x73);
   emit_byte(0xC0 | encode);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xF2, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  // XMM6 is for /6 encoding: 66 0F 71 /6 ib
+  emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector256);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  // XMM6 is for /6 encoding: 66 0F 72 /6 ib
+  emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector256);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  // XMM6 is for /6 encoding: 66 0F 73 /6 ib
+  emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector256);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+// Shift packed integers logically right by specified number of bits.
+void Assembler::psrlw(XMMRegister dst, int shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // XMM2 is for /2 encoding: 66 0F 71 /2 ib
+  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
+  emit_byte(0x71);
+  emit_byte(0xC0 | encode);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::psrld(XMMRegister dst, int shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // XMM2 is for /2 encoding: 66 0F 72 /2 ib
+  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
+  emit_byte(0x72);
+  emit_byte(0xC0 | encode);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::psrlq(XMMRegister dst, int shift) {
+  // Do not confuse it with psrldq SSE2 instruction which
+  // shifts 128 bit value in xmm register by number of bytes.
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // XMM2 is for /2 encoding: 66 0F 73 /2 ib
+  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
+  emit_byte(0x73);
+  emit_byte(0xC0 | encode);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xD2, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  // XMM2 is for /2 encoding: 66 0F 73 /2 ib
+  emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector256);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  // XMM2 is for /2 encoding: 66 0F 73 /2 ib
+  emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector256);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  // XMM2 is for /2 encoding: 66 0F 73 /2 ib
+  emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector256);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+// Shift packed integers arithmetically right by specified number of bits.
+void Assembler::psraw(XMMRegister dst, int shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // XMM4 is for /4 encoding: 66 0F 71 /4 ib
+  int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
+  emit_byte(0x71);
+  emit_byte(0xC0 | encode);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::psrad(XMMRegister dst, int shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // XMM4 is for /4 encoding: 66 0F 72 /4 ib
+  int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
+  emit_byte(0x72);
+  emit_byte(0xC0 | encode);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  // XMM4 is for /4 encoding: 66 0F 71 /4 ib
+  emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector256);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  // XMM4 is for /4 encoding: 66 0F 71 /4 ib
+  emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector256);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+
+// AND packed integers
+void Assembler::pand(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xDB, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::por(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xEB, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::pxor(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xEF, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
-  assert(VM_Version::supports_avx2() || (!vector256) && VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256);
-  emit_byte(0xEF);
-  emit_byte(0xC0 | encode);
-}
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
 
 void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
@@ -3805,6 +4099,49 @@
   }
 }
 
+void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, pre);
+  emit_byte(opcode);
+  emit_operand(dst, src);
+}
+
+void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
+  int encode = simd_prefix_and_encode(dst, dst, src, pre);
+  emit_byte(opcode);
+  emit_byte(0xC0 | encode);
+}
+
+// Versions with no second source register (non-destructive source).
+void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
+  InstructionMark im(this);
+  simd_prefix(dst, xnoreg, src, pre);
+  emit_byte(opcode);
+  emit_operand(dst, src);
+}
+
+void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, pre);
+  emit_byte(opcode);
+  emit_byte(0xC0 | encode);
+}
+
+// 3-operands AVX instructions
+void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
+                               Address src, VexSimdPrefix pre, bool vector256) {
+  InstructionMark im(this);
+  vex_prefix(dst, nds, src, pre, vector256);
+  emit_byte(opcode);
+  emit_operand(dst, src);
+}
+
+void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
+                               XMMRegister src, VexSimdPrefix pre, bool vector256) {
+  int encode = vex_prefix_and_encode(dst, nds, src, pre, vector256);
+  emit_byte(opcode);
+  emit_byte(0xC0 | encode);
+}
+
 #ifndef _LP64
 
 void Assembler::incl(Register dst) {
@@ -7968,21 +8305,21 @@
   }
 }
 
-void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
   if (reachable(src)) {
-    vandpd(dst, nds, as_Address(src));
+    vandpd(dst, nds, as_Address(src), vector256);
   } else {
     lea(rscratch1, src);
-    vandpd(dst, nds, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+    vandpd(dst, nds, Address(rscratch1, 0), vector256);
+  }
+}
+
+void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
   if (reachable(src)) {
-    vandps(dst, nds, as_Address(src));
+    vandps(dst, nds, as_Address(src), vector256);
   } else {
     lea(rscratch1, src);
-    vandps(dst, nds, Address(rscratch1, 0));
+    vandps(dst, nds, Address(rscratch1, 0), vector256);
   }
 }
 
@@ -8040,21 +8377,21 @@
   }
 }
 
-void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
   if (reachable(src)) {
-    vxorpd(dst, nds, as_Address(src));
+    vxorpd(dst, nds, as_Address(src), vector256);
   } else {
     lea(rscratch1, src);
-    vxorpd(dst, nds, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+    vxorpd(dst, nds, Address(rscratch1, 0), vector256);
+  }
+}
+
+void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
   if (reachable(src)) {
-    vxorps(dst, nds, as_Address(src));
+    vxorps(dst, nds, as_Address(src), vector256);
   } else {
     lea(rscratch1, src);
-    vxorps(dst, nds, Address(rscratch1, 0));
+    vxorps(dst, nds, Address(rscratch1, 0), vector256);
   }
 }
 
--- a/src/cpu/x86/vm/assembler_x86.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -617,6 +617,7 @@
                    VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
     simd_prefix(dst, xnoreg, src, pre, opc);
   }
+
   void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre) {
     simd_prefix(src, dst, pre);
   }
@@ -626,16 +627,10 @@
     simd_prefix(dst, nds, src, pre, VEX_OPCODE_0F, rex_w);
   }
 
-
   int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
                              VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
                              bool rex_w = false, bool vector256 = false);
 
-  int simd_prefix_and_encode(XMMRegister dst, XMMRegister src,
-                             VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
-    return simd_prefix_and_encode(dst, xnoreg, src, pre, opc);
-  }
-
   // Move/convert 32-bit integer value.
   int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,
                              VexSimdPrefix pre) {
@@ -677,6 +672,15 @@
   void emit_arith(int op1, int op2, Register dst, jobject obj);
   void emit_arith(int op1, int op2, Register dst, Register src);
 
+  void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre);
+  void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre);
+  void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre);
+  void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre);
+  void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
+                      Address src, VexSimdPrefix pre, bool vector256);
+  void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
+                      XMMRegister src, VexSimdPrefix pre, bool vector256);
+
   void emit_operand(Register reg,
                     Register base, Register index, Address::ScaleFactor scale,
                     int disp,
@@ -891,12 +895,6 @@
   void andq(Register dst, Address src);
   void andq(Register dst, Register src);
 
-  // Bitwise Logical AND of Packed Double-Precision Floating-Point Values
-  void andpd(XMMRegister dst, XMMRegister src);
-
-  // Bitwise Logical AND of Packed Single-Precision Floating-Point Values
-  void andps(XMMRegister dst, XMMRegister src);
-
   void bsfl(Register dst, Register src);
   void bsrl(Register dst, Register src);
 
@@ -1436,10 +1434,6 @@
   void prefetcht2(Address src);
   void prefetchw(Address src);
 
-  // POR - Bitwise logical OR
-  void por(XMMRegister dst, XMMRegister src);
-  void por(XMMRegister dst, Address src);
-
   // Shuffle Packed Doublewords
   void pshufd(XMMRegister dst, XMMRegister src, int mode);
   void pshufd(XMMRegister dst, Address src,     int mode);
@@ -1448,9 +1442,6 @@
   void pshuflw(XMMRegister dst, XMMRegister src, int mode);
   void pshuflw(XMMRegister dst, Address src,     int mode);
 
-  // Shift Right by bits Logical Quadword Immediate
-  void psrlq(XMMRegister dst, int shift);
-
   // Shift Right by bytes Logical DoubleQuadword Immediate
   void psrldq(XMMRegister dst, int shift);
 
@@ -1475,10 +1466,6 @@
 
   void pushq(Address src);
 
-  // Xor Packed Byte Integer Values
-  void pxor(XMMRegister dst, Address src);
-  void pxor(XMMRegister dst, XMMRegister src);
-
   void rcll(Register dst, int imm8);
 
   void rclq(Register dst, int imm8);
@@ -1601,15 +1588,10 @@
   void xorq(Register dst, Address src);
   void xorq(Register dst, Register src);
 
-  // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
-  void xorpd(XMMRegister dst, XMMRegister src);
-
-  // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
-  void xorps(XMMRegister dst, XMMRegister src);
-
   void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
 
   // AVX 3-operands scalar instructions (encoded with VEX prefix)
+
   void vaddsd(XMMRegister dst, XMMRegister nds, Address src);
   void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
   void vaddss(XMMRegister dst, XMMRegister nds, Address src);
@@ -1627,14 +1609,147 @@
   void vsubss(XMMRegister dst, XMMRegister nds, Address src);
   void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
 
-  // AVX Vector instrucitons.
-  void vandpd(XMMRegister dst, XMMRegister nds, Address src);
-  void vandps(XMMRegister dst, XMMRegister nds, Address src);
-  void vxorpd(XMMRegister dst, XMMRegister nds, Address src);
-  void vxorps(XMMRegister dst, XMMRegister nds, Address src);
+
+  //====================VECTOR ARITHMETIC=====================================
+
+  // Add Packed Floating-Point Values
+  void addpd(XMMRegister dst, XMMRegister src);
+  void addps(XMMRegister dst, XMMRegister src);
+  void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Subtract Packed Floating-Point Values
+  void subpd(XMMRegister dst, XMMRegister src);
+  void subps(XMMRegister dst, XMMRegister src);
+  void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Multiply Packed Floating-Point Values
+  void mulpd(XMMRegister dst, XMMRegister src);
+  void mulps(XMMRegister dst, XMMRegister src);
+  void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Divide Packed Floating-Point Values
+  void divpd(XMMRegister dst, XMMRegister src);
+  void divps(XMMRegister dst, XMMRegister src);
+  void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Bitwise Logical AND of Packed Floating-Point Values
+  void andpd(XMMRegister dst, XMMRegister src);
+  void andps(XMMRegister dst, XMMRegister src);
+  void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Bitwise Logical XOR of Packed Floating-Point Values
+  void xorpd(XMMRegister dst, XMMRegister src);
+  void xorps(XMMRegister dst, XMMRegister src);
   void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
   void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Add packed integers
+  void paddb(XMMRegister dst, XMMRegister src);
+  void paddw(XMMRegister dst, XMMRegister src);
+  void paddd(XMMRegister dst, XMMRegister src);
+  void paddq(XMMRegister dst, XMMRegister src);
+  void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Sub packed integers
+  void psubb(XMMRegister dst, XMMRegister src);
+  void psubw(XMMRegister dst, XMMRegister src);
+  void psubd(XMMRegister dst, XMMRegister src);
+  void psubq(XMMRegister dst, XMMRegister src);
+  void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Multiply packed integers (only shorts and ints)
+  void pmullw(XMMRegister dst, XMMRegister src);
+  void pmulld(XMMRegister dst, XMMRegister src);
+  void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Shift left packed integers
+  void psllw(XMMRegister dst, int shift);
+  void pslld(XMMRegister dst, int shift);
+  void psllq(XMMRegister dst, int shift);
+  void psllw(XMMRegister dst, XMMRegister shift);
+  void pslld(XMMRegister dst, XMMRegister shift);
+  void psllq(XMMRegister dst, XMMRegister shift);
+  void vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+  void vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+  void vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+  void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+  void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+  void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+
+  // Logical shift right packed integers
+  void psrlw(XMMRegister dst, int shift);
+  void psrld(XMMRegister dst, int shift);
+  void psrlq(XMMRegister dst, int shift);
+  void psrlw(XMMRegister dst, XMMRegister shift);
+  void psrld(XMMRegister dst, XMMRegister shift);
+  void psrlq(XMMRegister dst, XMMRegister shift);
+  void vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+  void vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+  void vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+  void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+  void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+  void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+
+  // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
+  void psraw(XMMRegister dst, int shift);
+  void psrad(XMMRegister dst, int shift);
+  void psraw(XMMRegister dst, XMMRegister shift);
+  void psrad(XMMRegister dst, XMMRegister shift);
+  void vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+  void vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+  void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+  void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+
+  // And packed integers
+  void pand(XMMRegister dst, XMMRegister src);
+  void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Or packed integers
+  void por(XMMRegister dst, XMMRegister src);
+  void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Xor packed integers
+  void pxor(XMMRegister dst, XMMRegister src);
   void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Copy low 128bit into high 128bit of YMM registers.
   void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
   void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
 
@@ -2532,11 +2647,13 @@
   void vaddss(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vaddss(dst, nds, src); }
   void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
 
-  void vandpd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vandpd(dst, nds, src); }
-  void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
-
-  void vandps(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vandps(dst, nds, src); }
-  void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+  void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vandpd(dst, nds, src, vector256); }
+  void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256)     { Assembler::vandpd(dst, nds, src, vector256); }
+  void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
+
+  void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vandps(dst, nds, src, vector256); }
+  void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256)     { Assembler::vandps(dst, nds, src, vector256); }
+  void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
 
   void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); }
   void vdivsd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vdivsd(dst, nds, src); }
@@ -2565,12 +2682,12 @@
   // AVX Vector instructions
 
   void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); }
-  void vxorpd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorpd(dst, nds, src); }
-  void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+  void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); }
+  void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
 
   void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); }
-  void vxorps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorps(dst, nds, src); }
-  void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+  void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); }
+  void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
 
   void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
     if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2
@@ -2578,6 +2695,12 @@
     else
       Assembler::vxorpd(dst, nds, src, vector256);
   }
+  void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+    if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2
+      Assembler::vpxor(dst, nds, src, vector256);
+    else
+      Assembler::vxorpd(dst, nds, src, vector256);
+  }
 
   // Move packed integer values from low 128 bit to hign 128 bit in 256 bit vector.
   void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
--- a/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -488,68 +488,6 @@
 
 }
 
-void G1UnsafeGetObjSATBBarrierStub::emit_code(LIR_Assembler* ce) {
-  // At this point we know that offset == referent_offset.
-  //
-  // So we might have to emit:
-  //   if (src == null) goto continuation.
-  //
-  // and we definitely have to emit:
-  //   if (klass(src).reference_type == REF_NONE) goto continuation
-  //   if (!marking_active) goto continuation
-  //   if (pre_val == null) goto continuation
-  //   call pre_barrier(pre_val)
-  //   goto continuation
-  //
-  __ bind(_entry);
-
-  assert(src()->is_register(), "sanity");
-  Register src_reg = src()->as_register();
-
-  if (gen_src_check()) {
-    // The original src operand was not a constant.
-    // Generate src == null?
-    __ cmpptr(src_reg, (int32_t) NULL_WORD);
-    __ jcc(Assembler::equal, _continuation);
-  }
-
-  // Generate src->_klass->_reference_type == REF_NONE)?
-  assert(tmp()->is_register(), "sanity");
-  Register tmp_reg = tmp()->as_register();
-
-  __ load_klass(tmp_reg, src_reg);
-
-  Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset());
-  __ cmpb(ref_type_adr, REF_NONE);
-  __ jcc(Assembler::equal, _continuation);
-
-  // Is marking active?
-  assert(thread()->is_register(), "precondition");
-  Register thread_reg = thread()->as_pointer_register();
-
-  Address in_progress(thread_reg, in_bytes(JavaThread::satb_mark_queue_offset() +
-                                       PtrQueue::byte_offset_of_active()));
-
-  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
-    __ cmpl(in_progress, 0);
-  } else {
-    assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
-    __ cmpb(in_progress, 0);
-  }
-  __ jcc(Assembler::equal, _continuation);
-
-  // val == null?
-  assert(val()->is_register(), "Precondition.");
-  Register val_reg = val()->as_register();
-
-  __ cmpptr(val_reg, (int32_t) NULL_WORD);
-  __ jcc(Assembler::equal, _continuation);
-
-  ce->store_parameter(val()->as_register(), 0);
-  __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id)));
-  __ jmp(_continuation);
-}
-
 jbyte* G1PostBarrierStub::_byte_map_base = NULL;
 
 jbyte* G1PostBarrierStub::byte_map_base_slow() {
--- a/src/cpu/x86/vm/x86.ad	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/cpu/x86/vm/x86.ad	Fri Aug 24 15:51:19 2012 -0700
@@ -500,6 +500,24 @@
   0  /*bottom*/
 };
 
+const bool Matcher::match_rule_supported(int opcode) {
+  if (!has_match_rule(opcode))
+    return false;
+
+  switch (opcode) {
+    case Op_PopCountI:
+    case Op_PopCountL:
+      if (!UsePopCountInstruction)
+        return false;
+    case Op_MulVI:
+      if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
+        return false;
+    break;
+  }
+
+  return true;  // Per default match rules are supported.
+}
+
 // Max vector size in bytes. 0 if not supported.
 const int Matcher::vector_width_in_bytes(BasicType bt) {
   assert(is_java_primitive(bt), "only primitive type vectors");
@@ -1439,8 +1457,9 @@
   ins_cost(150);
   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
   ins_encode %{
+    bool vector256 = false;
     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
-              ExternalAddress(float_signmask()));
+              ExternalAddress(float_signmask()), vector256);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -1464,8 +1483,9 @@
   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
             "# abs double by sign masking" %}
   ins_encode %{
+    bool vector256 = false;
     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
-              ExternalAddress(double_signmask()));
+              ExternalAddress(double_signmask()), vector256);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -1487,8 +1507,9 @@
   ins_cost(150);
   format %{ "vxorps  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
   ins_encode %{
+    bool vector256 = false;
     __ vxorps($dst$$XMMRegister, $src$$XMMRegister,
-              ExternalAddress(float_signflip()));
+              ExternalAddress(float_signflip()), vector256);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -1512,8 +1533,9 @@
   format %{ "vxorpd  $dst, $src, [0x8000000000000000]\t"
             "# neg double by sign flipping" %}
   ins_encode %{
+    bool vector256 = false;
     __ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
-              ExternalAddress(double_signflip()));
+              ExternalAddress(double_signflip()), vector256);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -2382,3 +2404,2416 @@
   ins_pipe( fpu_reg_reg );
 %}
 
+// ====================VECTOR ARITHMETIC=======================================
+
+// --------------------------------- ADD --------------------------------------
+
+// Bytes vector add
+instruct vadd4B(vecS dst, vecS src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (AddVB dst src));
+  format %{ "paddb   $dst,$src\t! add packed4B" %}
+  ins_encode %{
+    __ paddb($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (AddVB src1 src2));
+  format %{ "vpaddb  $dst,$src1,$src2\t! add packed4B" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8B(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (AddVB dst src));
+  format %{ "paddb   $dst,$src\t! add packed8B" %}
+  ins_encode %{
+    __ paddb($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (AddVB src1 src2));
+  format %{ "vpaddb  $dst,$src1,$src2\t! add packed8B" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd16B(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (AddVB dst src));
+  format %{ "paddb   $dst,$src\t! add packed16B" %}
+  ins_encode %{
+    __ paddb($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
+  match(Set dst (AddVB src1 src2));
+  format %{ "vpaddb  $dst,$src1,$src2\t! add packed16B" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
+  match(Set dst (AddVB src (LoadVector mem)));
+  format %{ "vpaddb  $dst,$src,$mem\t! add packed16B" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
+  match(Set dst (AddVB src1 src2));
+  format %{ "vpaddb  $dst,$src1,$src2\t! add packed32B" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
+  match(Set dst (AddVB src (LoadVector mem)));
+  format %{ "vpaddb  $dst,$src,$mem\t! add packed32B" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Shorts/Chars vector add
+instruct vadd2S(vecS dst, vecS src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (AddVS dst src));
+  format %{ "paddw   $dst,$src\t! add packed2S" %}
+  ins_encode %{
+    __ paddw($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (AddVS src1 src2));
+  format %{ "vpaddw  $dst,$src1,$src2\t! add packed2S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4S(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (AddVS dst src));
+  format %{ "paddw   $dst,$src\t! add packed4S" %}
+  ins_encode %{
+    __ paddw($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (AddVS src1 src2));
+  format %{ "vpaddw  $dst,$src1,$src2\t! add packed4S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8S(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (AddVS dst src));
+  format %{ "paddw   $dst,$src\t! add packed8S" %}
+  ins_encode %{
+    __ paddw($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (AddVS src1 src2));
+  format %{ "vpaddw  $dst,$src1,$src2\t! add packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (AddVS src (LoadVector mem)));
+  format %{ "vpaddw  $dst,$src,$mem\t! add packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (AddVS src1 src2));
+  format %{ "vpaddw  $dst,$src1,$src2\t! add packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (AddVS src (LoadVector mem)));
+  format %{ "vpaddw  $dst,$src,$mem\t! add packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Integers vector add
+instruct vadd2I(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (AddVI dst src));
+  format %{ "paddd   $dst,$src\t! add packed2I" %}
+  ins_encode %{
+    __ paddd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (AddVI src1 src2));
+  format %{ "vpaddd  $dst,$src1,$src2\t! add packed2I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4I(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (AddVI dst src));
+  format %{ "paddd   $dst,$src\t! add packed4I" %}
+  ins_encode %{
+    __ paddd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (AddVI src1 src2));
+  format %{ "vpaddd  $dst,$src1,$src2\t! add packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (AddVI src (LoadVector mem)));
+  format %{ "vpaddd  $dst,$src,$mem\t! add packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (AddVI src1 src2));
+  format %{ "vpaddd  $dst,$src1,$src2\t! add packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (AddVI src (LoadVector mem)));
+  format %{ "vpaddd  $dst,$src,$mem\t! add packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Longs vector add
+instruct vadd2L(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (AddVL dst src));
+  format %{ "paddq   $dst,$src\t! add packed2L" %}
+  ins_encode %{
+    __ paddq($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (AddVL src1 src2));
+  format %{ "vpaddq  $dst,$src1,$src2\t! add packed2L" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (AddVL src (LoadVector mem)));
+  format %{ "vpaddq  $dst,$src,$mem\t! add packed2L" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+  match(Set dst (AddVL src1 src2));
+  format %{ "vpaddq  $dst,$src1,$src2\t! add packed4L" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+  match(Set dst (AddVL src (LoadVector mem)));
+  format %{ "vpaddq  $dst,$src,$mem\t! add packed4L" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Floats vector add
+instruct vadd2F(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (AddVF dst src));
+  format %{ "addps   $dst,$src\t! add packed2F" %}
+  ins_encode %{
+    __ addps($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (AddVF src1 src2));
+  format %{ "vaddps  $dst,$src1,$src2\t! add packed2F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4F(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (AddVF dst src));
+  format %{ "addps   $dst,$src\t! add packed4F" %}
+  ins_encode %{
+    __ addps($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (AddVF src1 src2));
+  format %{ "vaddps  $dst,$src1,$src2\t! add packed4F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (AddVF src (LoadVector mem)));
+  format %{ "vaddps  $dst,$src,$mem\t! add packed4F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (AddVF src1 src2));
+  format %{ "vaddps  $dst,$src1,$src2\t! add packed8F" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (AddVF src (LoadVector mem)));
+  format %{ "vaddps  $dst,$src,$mem\t! add packed8F" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Doubles vector add
+instruct vadd2D(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (AddVD dst src));
+  format %{ "addpd   $dst,$src\t! add packed2D" %}
+  ins_encode %{
+    __ addpd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (AddVD src1 src2));
+  format %{ "vaddpd  $dst,$src1,$src2\t! add packed2D" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (AddVD src (LoadVector mem)));
+  format %{ "vaddpd  $dst,$src,$mem\t! add packed2D" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (AddVD src1 src2));
+  format %{ "vaddpd  $dst,$src1,$src2\t! add packed4D" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (AddVD src (LoadVector mem)));
+  format %{ "vaddpd  $dst,$src,$mem\t! add packed4D" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- SUB --------------------------------------
+
+// Bytes vector sub
+instruct vsub4B(vecS dst, vecS src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (SubVB dst src));
+  format %{ "psubb   $dst,$src\t! sub packed4B" %}
+  ins_encode %{
+    __ psubb($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (SubVB src1 src2));
+  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed4B" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8B(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (SubVB dst src));
+  format %{ "psubb   $dst,$src\t! sub packed8B" %}
+  ins_encode %{
+    __ psubb($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (SubVB src1 src2));
+  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed8B" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub16B(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (SubVB dst src));
+  format %{ "psubb   $dst,$src\t! sub packed16B" %}
+  ins_encode %{
+    __ psubb($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
+  match(Set dst (SubVB src1 src2));
+  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed16B" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
+  match(Set dst (SubVB src (LoadVector mem)));
+  format %{ "vpsubb  $dst,$src,$mem\t! sub packed16B" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
+  match(Set dst (SubVB src1 src2));
+  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed32B" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
+  match(Set dst (SubVB src (LoadVector mem)));
+  format %{ "vpsubb  $dst,$src,$mem\t! sub packed32B" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Shorts/Chars vector sub
+instruct vsub2S(vecS dst, vecS src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (SubVS dst src));
+  format %{ "psubw   $dst,$src\t! sub packed2S" %}
+  ins_encode %{
+    __ psubw($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (SubVS src1 src2));
+  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed2S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4S(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (SubVS dst src));
+  format %{ "psubw   $dst,$src\t! sub packed4S" %}
+  ins_encode %{
+    __ psubw($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (SubVS src1 src2));
+  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed4S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8S(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (SubVS dst src));
+  format %{ "psubw   $dst,$src\t! sub packed8S" %}
+  ins_encode %{
+    __ psubw($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (SubVS src1 src2));
+  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (SubVS src (LoadVector mem)));
+  format %{ "vpsubw  $dst,$src,$mem\t! sub packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (SubVS src1 src2));
+  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (SubVS src (LoadVector mem)));
+  format %{ "vpsubw  $dst,$src,$mem\t! sub packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Integers vector sub
+instruct vsub2I(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (SubVI dst src));
+  format %{ "psubd   $dst,$src\t! sub packed2I" %}
+  ins_encode %{
+    __ psubd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (SubVI src1 src2));
+  format %{ "vpsubd  $dst,$src1,$src2\t! sub packed2I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4I(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (SubVI dst src));
+  format %{ "psubd   $dst,$src\t! sub packed4I" %}
+  ins_encode %{
+    __ psubd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (SubVI src1 src2));
+  format %{ "vpsubd  $dst,$src1,$src2\t! sub packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (SubVI src (LoadVector mem)));
+  format %{ "vpsubd  $dst,$src,$mem\t! sub packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (SubVI src1 src2));
+  format %{ "vpsubd  $dst,$src1,$src2\t! sub packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (SubVI src (LoadVector mem)));
+  format %{ "vpsubd  $dst,$src,$mem\t! sub packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Longs vector sub
+instruct vsub2L(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (SubVL dst src));
+  format %{ "psubq   $dst,$src\t! sub packed2L" %}
+  ins_encode %{
+    __ psubq($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (SubVL src1 src2));
+  format %{ "vpsubq  $dst,$src1,$src2\t! sub packed2L" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (SubVL src (LoadVector mem)));
+  format %{ "vpsubq  $dst,$src,$mem\t! sub packed2L" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+  match(Set dst (SubVL src1 src2));
+  format %{ "vpsubq  $dst,$src1,$src2\t! sub packed4L" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+  match(Set dst (SubVL src (LoadVector mem)));
+  format %{ "vpsubq  $dst,$src,$mem\t! sub packed4L" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Floats vector sub
+instruct vsub2F(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (SubVF dst src));
+  format %{ "subps   $dst,$src\t! sub packed2F" %}
+  ins_encode %{
+    __ subps($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (SubVF src1 src2));
+  format %{ "vsubps  $dst,$src1,$src2\t! sub packed2F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4F(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (SubVF dst src));
+  format %{ "subps   $dst,$src\t! sub packed4F" %}
+  ins_encode %{
+    __ subps($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (SubVF src1 src2));
+  format %{ "vsubps  $dst,$src1,$src2\t! sub packed4F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (SubVF src (LoadVector mem)));
+  format %{ "vsubps  $dst,$src,$mem\t! sub packed4F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (SubVF src1 src2));
+  format %{ "vsubps  $dst,$src1,$src2\t! sub packed8F" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (SubVF src (LoadVector mem)));
+  format %{ "vsubps  $dst,$src,$mem\t! sub packed8F" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Doubles vector sub
+instruct vsub2D(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (SubVD dst src));
+  format %{ "subpd   $dst,$src\t! sub packed2D" %}
+  ins_encode %{
+    __ subpd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (SubVD src1 src2));
+  format %{ "vsubpd  $dst,$src1,$src2\t! sub packed2D" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (SubVD src (LoadVector mem)));
+  format %{ "vsubpd  $dst,$src,$mem\t! sub packed2D" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (SubVD src1 src2));
+  format %{ "vsubpd  $dst,$src1,$src2\t! sub packed4D" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (SubVD src (LoadVector mem)));
+  format %{ "vsubpd  $dst,$src,$mem\t! sub packed4D" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- MUL --------------------------------------
+
+// Shorts/Chars vector mul
+instruct vmul2S(vecS dst, vecS src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (MulVS dst src));
+  format %{ "pmullw $dst,$src\t! mul packed2S" %}
+  ins_encode %{
+    __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (MulVS src1 src2));
+  format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4S(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (MulVS dst src));
+  format %{ "pmullw  $dst,$src\t! mul packed4S" %}
+  ins_encode %{
+    __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (MulVS src1 src2));
+  format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul8S(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (MulVS dst src));
+  format %{ "pmullw  $dst,$src\t! mul packed8S" %}
+  ins_encode %{
+    __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (MulVS src1 src2));
+  format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (MulVS src (LoadVector mem)));
+  format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (MulVS src1 src2));
+  format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (MulVS src (LoadVector mem)));
+  format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Integers vector mul (sse4_1)
+instruct vmul2I(vecD dst, vecD src) %{
+  predicate(UseSSE > 3 && n->as_Vector()->length() == 2);
+  match(Set dst (MulVI dst src));
+  format %{ "pmulld  $dst,$src\t! mul packed2I" %}
+  ins_encode %{
+    __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (MulVI src1 src2));
+  format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4I(vecX dst, vecX src) %{
+  predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
+  match(Set dst (MulVI dst src));
+  format %{ "pmulld  $dst,$src\t! mul packed4I" %}
+  ins_encode %{
+    __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (MulVI src1 src2));
+  format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (MulVI src (LoadVector mem)));
+  format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (MulVI src1 src2));
+  format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (MulVI src (LoadVector mem)));
+  format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Floats vector mul
+instruct vmul2F(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (MulVF dst src));
+  format %{ "mulps   $dst,$src\t! mul packed2F" %}
+  ins_encode %{
+    __ mulps($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (MulVF src1 src2));
+  format %{ "vmulps  $dst,$src1,$src2\t! mul packed2F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4F(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (MulVF dst src));
+  format %{ "mulps   $dst,$src\t! mul packed4F" %}
+  ins_encode %{
+    __ mulps($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (MulVF src1 src2));
+  format %{ "vmulps  $dst,$src1,$src2\t! mul packed4F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (MulVF src (LoadVector mem)));
+  format %{ "vmulps  $dst,$src,$mem\t! mul packed4F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (MulVF src1 src2));
+  format %{ "vmulps  $dst,$src1,$src2\t! mul packed8F" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (MulVF src (LoadVector mem)));
+  format %{ "vmulps  $dst,$src,$mem\t! mul packed8F" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Doubles vector mul
+instruct vmul2D(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (MulVD dst src));
+  format %{ "mulpd   $dst,$src\t! mul packed2D" %}
+  ins_encode %{
+    __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (MulVD src1 src2));
+  format %{ "vmulpd  $dst,$src1,$src2\t! mul packed2D" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (MulVD src (LoadVector mem)));
+  format %{ "vmulpd  $dst,$src,$mem\t! mul packed2D" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (MulVD src1 src2));
+  format %{ "vmulpd  $dst,$src1,$src2\t! mul packed4D" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (MulVD src (LoadVector mem)));
+  format %{ "vmulpd  $dst,$src,$mem\t! mul packed4D" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- DIV --------------------------------------
+
+// Floats vector div
+instruct vdiv2F(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (DivVF dst src));
+  format %{ "divps   $dst,$src\t! div packed2F" %}
+  ins_encode %{
+    __ divps($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (DivVF src1 src2));
+  format %{ "vdivps  $dst,$src1,$src2\t! div packed2F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vdiv4F(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (DivVF dst src));
+  format %{ "divps   $dst,$src\t! div packed4F" %}
+  ins_encode %{
+    __ divps($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (DivVF src1 src2));
+  format %{ "vdivps  $dst,$src1,$src2\t! div packed4F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (DivVF src (LoadVector mem)));
+  format %{ "vdivps  $dst,$src,$mem\t! div packed4F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (DivVF src1 src2));
+  format %{ "vdivps  $dst,$src1,$src2\t! div packed8F" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (DivVF src (LoadVector mem)));
+  format %{ "vdivps  $dst,$src,$mem\t! div packed8F" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Doubles vector div
+instruct vdiv2D(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (DivVD dst src));
+  format %{ "divpd   $dst,$src\t! div packed2D" %}
+  ins_encode %{
+    __ divpd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (DivVD src1 src2));
+  format %{ "vdivpd  $dst,$src1,$src2\t! div packed2D" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (DivVD src (LoadVector mem)));
+  format %{ "vdivpd  $dst,$src,$mem\t! div packed2D" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (DivVD src1 src2));
+  format %{ "vdivpd  $dst,$src1,$src2\t! div packed4D" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (DivVD src (LoadVector mem)));
+  format %{ "vdivpd  $dst,$src,$mem\t! div packed4D" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// ------------------------------ LeftShift -----------------------------------
+
+// Shorts/Chars vector left shift
+instruct vsll2S(vecS dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVS dst shift));
+  format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
+  ins_encode %{
+    __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2S_imm(vecS dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVS dst shift));
+  format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
+  ins_encode %{
+    __ psllw($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2S_reg(vecS dst, vecS src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4S(vecD dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVS dst shift));
+  format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
+  ins_encode %{
+    __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4S_imm(vecD dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVS dst shift));
+  format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
+  ins_encode %{
+    __ psllw($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4S_reg(vecD dst, vecD src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll8S(vecX dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVS dst shift));
+  format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
+  ins_encode %{
+    __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll8S_imm(vecX dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVS dst shift));
+  format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
+  ins_encode %{
+    __ psllw($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll8S_reg(vecX dst, vecX src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll16S_reg(vecY dst, vecY src, regF shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Integers vector left shift
+instruct vsll2I(vecD dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVI dst shift));
+  format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
+  ins_encode %{
+    __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2I_imm(vecD dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVI dst shift));
+  format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
+  ins_encode %{
+    __ pslld($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2I_reg(vecD dst, vecD src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVI src shift));
+  format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVI src shift));
+  format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4I(vecX dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVI dst shift));
+  format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
+  ins_encode %{
+    __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4I_imm(vecX dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVI dst shift));
+  format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
+  ins_encode %{
+    __ pslld($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4I_reg(vecX dst, vecX src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVI src shift));
+  format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVI src shift));
+  format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll8I_reg(vecY dst, vecY src, regF shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVI src shift));
+  format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVI src shift));
+  format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Longs vector left shift
+instruct vsll2L(vecX dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVL dst shift));
+  format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
+  ins_encode %{
+    __ psllq($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2L_imm(vecX dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVL dst shift));
+  format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
+  ins_encode %{
+    __ psllq($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2L_reg(vecX dst, vecX src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVL src shift));
+  format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVL src shift));
+  format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4L_reg(vecY dst, vecY src, regF shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVL src shift));
+  format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVL src shift));
+  format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// ----------------------- LogicalRightShift -----------------------------------
+
+// Shorts/Chars vector logical right shift produces incorrect Java result
+// for negative data because java code convert short value into int with
+// sign extension before a shift.
+
+// Integers vector logical right shift
+instruct vsrl2I(vecD dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVI dst shift));
+  format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
+  ins_encode %{
+    __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2I_imm(vecD dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVI dst shift));
+  format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
+  ins_encode %{
+    __ psrld($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2I_reg(vecD dst, vecD src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVI src shift));
+  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVI src shift));
+  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4I(vecX dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVI dst shift));
+  format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
+  ins_encode %{
+    __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4I_imm(vecX dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVI dst shift));
+  format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
+  ins_encode %{
+    __ psrld($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4I_reg(vecX dst, vecX src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVI src shift));
+  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVI src shift));
+  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl8I_reg(vecY dst, vecY src, regF shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (URShiftVI src shift));
+  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (URShiftVI src shift));
+  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Longs vector logical right shift
+instruct vsrl2L(vecX dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVL dst shift));
+  format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
+  ins_encode %{
+    __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2L_imm(vecX dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVL dst shift));
+  format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
+  ins_encode %{
+    __ psrlq($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2L_reg(vecX dst, vecX src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVL src shift));
+  format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVL src shift));
+  format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4L_reg(vecY dst, vecY src, regF shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVL src shift));
+  format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVL src shift));
+  format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// ------------------- ArithmeticRightShift -----------------------------------
+
+// Shorts/Chars vector arithmetic right shift
+instruct vsra2S(vecS dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVS dst shift));
+  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
+  ins_encode %{
+    __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra2S_imm(vecS dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVS dst shift));
+  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
+  ins_encode %{
+    __ psraw($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra2S_reg(vecS dst, vecS src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4S(vecD dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVS dst shift));
+  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
+  ins_encode %{
+    __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4S_imm(vecD dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVS dst shift));
+  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
+  ins_encode %{
+    __ psraw($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4S_reg(vecD dst, vecD src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra8S(vecX dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVS dst shift));
+  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
+  ins_encode %{
+    __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra8S_imm(vecX dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVS dst shift));
+  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
+  ins_encode %{
+    __ psraw($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra8S_reg(vecX dst, vecX src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra16S_reg(vecY dst, vecY src, regF shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Integers vector arithmetic right shift
+instruct vsra2I(vecD dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVI dst shift));
+  format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
+  ins_encode %{
+    __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra2I_imm(vecD dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVI dst shift));
+  format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
+  ins_encode %{
+    __ psrad($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra2I_reg(vecD dst, vecD src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVI src shift));
+  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVI src shift));
+  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4I(vecX dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVI dst shift));
+  format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
+  ins_encode %{
+    __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4I_imm(vecX dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVI dst shift));
+  format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
+  ins_encode %{
+    __ psrad($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4I_reg(vecX dst, vecX src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVI src shift));
+  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVI src shift));
+  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra8I_reg(vecY dst, vecY src, regF shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVI src shift));
+  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVI src shift));
+  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// There are no longs vector arithmetic right shift instructions.
+
+
+// --------------------------------- AND --------------------------------------
+
+instruct vand4B(vecS dst, vecS src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 4);
+  match(Set dst (AndV dst src));
+  format %{ "pand    $dst,$src\t! and vectors (4 bytes)" %}
+  ins_encode %{
+    __ pand($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
+  match(Set dst (AndV src1 src2));
+  format %{ "vpand   $dst,$src1,$src2\t! and vectors (4 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vand8B(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 8);
+  match(Set dst (AndV dst src));
+  format %{ "pand    $dst,$src\t! and vectors (8 bytes)" %}
+  ins_encode %{
+    __ pand($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
+  match(Set dst (AndV src1 src2));
+  format %{ "vpand   $dst,$src1,$src2\t! and vectors (8 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vand16B(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (AndV dst src));
+  format %{ "pand    $dst,$src\t! and vectors (16 bytes)" %}
+  ins_encode %{
+    __ pand($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (AndV src1 src2));
+  format %{ "vpand   $dst,$src1,$src2\t! and vectors (16 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vand16B_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (AndV src (LoadVector mem)));
+  format %{ "vpand   $dst,$src,$mem\t! and vectors (16 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
+  match(Set dst (AndV src1 src2));
+  format %{ "vpand   $dst,$src1,$src2\t! and vectors (32 bytes)" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vand32B_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
+  match(Set dst (AndV src (LoadVector mem)));
+  format %{ "vpand   $dst,$src,$mem\t! and vectors (32 bytes)" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- OR ---------------------------------------
+
+instruct vor4B(vecS dst, vecS src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 4);
+  match(Set dst (OrV dst src));
+  format %{ "por     $dst,$src\t! or vectors (4 bytes)" %}
+  ins_encode %{
+    __ por($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
+  match(Set dst (OrV src1 src2));
+  format %{ "vpor    $dst,$src1,$src2\t! or vectors (4 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vor8B(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 8);
+  match(Set dst (OrV dst src));
+  format %{ "por     $dst,$src\t! or vectors (8 bytes)" %}
+  ins_encode %{
+    __ por($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
+  match(Set dst (OrV src1 src2));
+  format %{ "vpor    $dst,$src1,$src2\t! or vectors (8 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vor16B(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (OrV dst src));
+  format %{ "por     $dst,$src\t! or vectors (16 bytes)" %}
+  ins_encode %{
+    __ por($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (OrV src1 src2));
+  format %{ "vpor    $dst,$src1,$src2\t! or vectors (16 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vor16B_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (OrV src (LoadVector mem)));
+  format %{ "vpor    $dst,$src,$mem\t! or vectors (16 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
+  match(Set dst (OrV src1 src2));
+  format %{ "vpor    $dst,$src1,$src2\t! or vectors (32 bytes)" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vor32B_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
+  match(Set dst (OrV src (LoadVector mem)));
+  format %{ "vpor    $dst,$src,$mem\t! or vectors (32 bytes)" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- XOR --------------------------------------
+
+instruct vxor4B(vecS dst, vecS src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 4);
+  match(Set dst (XorV dst src));
+  format %{ "pxor    $dst,$src\t! xor vectors (4 bytes)" %}
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
+  match(Set dst (XorV src1 src2));
+  format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (4 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vxor8B(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 8);
+  match(Set dst (XorV dst src));
+  format %{ "pxor    $dst,$src\t! xor vectors (8 bytes)" %}
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
+  match(Set dst (XorV src1 src2));
+  format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (8 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vxor16B(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (XorV dst src));
+  format %{ "pxor    $dst,$src\t! xor vectors (16 bytes)" %}
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (XorV src1 src2));
+  format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (16 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (XorV src (LoadVector mem)));
+  format %{ "vpxor   $dst,$src,$mem\t! xor vectors (16 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
+  match(Set dst (XorV src1 src2));
+  format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (32 bytes)" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
+  match(Set dst (XorV src (LoadVector mem)));
+  format %{ "vpxor   $dst,$src,$mem\t! xor vectors (32 bytes)" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
--- a/src/cpu/x86/vm/x86_32.ad	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/cpu/x86/vm/x86_32.ad	Fri Aug 24 15:51:19 2012 -0700
@@ -1367,22 +1367,6 @@
   return offset;
 }
 
-
-const bool Matcher::match_rule_supported(int opcode) {
-  if (!has_match_rule(opcode))
-    return false;
-
-  switch (opcode) {
-    case Op_PopCountI:
-    case Op_PopCountL:
-      if (!UsePopCountInstruction)
-        return false;
-    break;
-  }
-  
-  return true;  // Per default match rules are supported.
-}
-
 int Matcher::regnum_to_fpu_offset(int regnum) {
   return regnum - 32; // The FP registers are in the second chunk
 }
--- a/src/cpu/x86/vm/x86_64.ad	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/cpu/x86/vm/x86_64.ad	Fri Aug 24 15:51:19 2012 -0700
@@ -1513,22 +1513,6 @@
   return offset;
 }
 
-
-const bool Matcher::match_rule_supported(int opcode) {
-  if (!has_match_rule(opcode))
-    return false;
-
-  switch (opcode) {
-    case Op_PopCountI:
-    case Op_PopCountL:
-      if (!UsePopCountInstruction)
-        return false;
-    break;
-  }
-
-  return true;  // Per default match rules are supported.
-}
-
 int Matcher::regnum_to_fpu_offset(int regnum)
 {
   return regnum - 32; // The FP registers are in the second chunk
@@ -6427,6 +6411,31 @@
   ins_pipe(ialu_reg_reg); // XXX
 %}
 
+// Convert oop into int for vectors alignment masking
+instruct convP2I(rRegI dst, rRegP src)
+%{
+  match(Set dst (ConvL2I (CastP2X src)));
+
+  format %{ "movl    $dst, $src\t# ptr -> int" %}
+  ins_encode %{
+    __ movl($dst$$Register, $src$$Register);
+  %}
+  ins_pipe(ialu_reg_reg); // XXX
+%}
+
+// Convert compressed oop into int for vectors alignment masking
+// in case of 32bit oops (heap < 4Gb).
+instruct convN2I(rRegI dst, rRegN src)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  match(Set dst (ConvL2I (CastP2X (DecodeN src))));
+
+  format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
+  ins_encode %{
+    __ movl($dst$$Register, $src$$Register);
+  %}
+  ins_pipe(ialu_reg_reg); // XXX
+%}
 
 // Convert oop pointer into compressed form
 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
@@ -10049,11 +10058,10 @@
   ins_pipe( pipe_slow );
 %}
 
-// The next instructions have long latency and use Int unit. Set high cost.
 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
   match(Set dst (MoveI2F src));
   effect(DEF dst, USE src);
-  ins_cost(300);
+  ins_cost(100);
   format %{ "movd    $dst,$src\t# MoveI2F" %}
   ins_encode %{
     __ movdl($dst$$XMMRegister, $src$$Register);
@@ -10064,7 +10072,7 @@
 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
   match(Set dst (MoveL2D src));
   effect(DEF dst, USE src);
-  ins_cost(300);
+  ins_cost(100);
   format %{ "movd    $dst,$src\t# MoveL2D" %}
   ins_encode %{
      __ movdq($dst$$XMMRegister, $src$$Register);
--- a/src/os/solaris/vm/dtraceJSDT_solaris.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/os/solaris/vm/dtraceJSDT_solaris.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -626,45 +626,6 @@
   }
 }
 
-/**
- * This prints out hex data in a 'windbg' or 'xxd' form, where each line is:
- *   <hex-address>: 8 * <hex-halfword> <ascii translation>
- * example:
- * 0000000: 7f44 4f46 0102 0102 0000 0000 0000 0000  .DOF............
- * 0000010: 0000 0000 0000 0040 0000 0020 0000 0005  .......@... ....
- * 0000020: 0000 0000 0000 0040 0000 0000 0000 015d  .......@.......]
- * ...
- */
-static void printDOFRawData(void* dof) {
-  size_t size = ((dof_hdr_t*)dof)->dofh_loadsz;
-  size_t limit = (size + 16) / 16 * 16;
-  for (size_t i = 0; i < limit; ++i) {
-    if (i % 16 == 0) {
-      tty->print("%07x:", i);
-    }
-    if (i % 2 == 0) {
-      tty->print(" ");
-    }
-    if (i < size) {
-      tty->print("%02x", ((unsigned char*)dof)[i]);
-    } else {
-      tty->print("  ");
-    }
-    if ((i + 1) % 16 == 0) {
-      tty->print("  ");
-      for (size_t j = 0; j < 16; ++j) {
-        size_t idx = i + j - 15;
-        char c = ((char*)dof)[idx];
-        if (idx < size) {
-          tty->print("%c", c >= 32 && c <= 126 ? c : '.');
-        }
-      }
-      tty->print_cr("");
-    }
-  }
-  tty->print_cr("");
-}
-
 static void printDOFHelper(dof_helper_t* helper) {
   tty->print_cr("// dof_helper_t {");
   tty->print_cr("//   dofhp_mod = \"%s\"", helper->dofhp_mod);
@@ -672,7 +633,8 @@
   tty->print_cr("//   dofhp_dof = 0x%016llx", helper->dofhp_dof);
   printDOF((void*)helper->dofhp_dof);
   tty->print_cr("// }");
-  printDOFRawData((void*)helper->dofhp_dof);
+  size_t len = ((dof_hdr_t*)helper)->dofh_loadsz;
+  tty->print_data((void*)helper->dofhp_dof, len, true);
 }
 
 #else // ndef HAVE_DTRACE_H
--- a/src/share/vm/c1/c1_CodeStubs.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/c1/c1_CodeStubs.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -574,71 +574,6 @@
 #endif // PRODUCT
 };
 
-// This G1 barrier code stub is used in Unsafe.getObject.
-// It generates a sequence of guards around the SATB
-// barrier code that are used to detect when we have
-// the referent field of a Reference object.
-// The first check is assumed to have been generated
-// in the code generated for Unsafe.getObject().
-
-class G1UnsafeGetObjSATBBarrierStub: public CodeStub {
- private:
-  LIR_Opr _val;
-  LIR_Opr _src;
-
-  LIR_Opr _tmp;
-  LIR_Opr _thread;
-
-  bool _gen_src_check;
-
- public:
-  // A G1 barrier that is guarded by generated guards that determine whether
-  // val (which is the result of Unsafe.getObject() should be recorded in an
-  // SATB log buffer. We could be reading the referent field of a Reference object
-  // using Unsafe.getObject() and we need to record the referent.
-  //
-  // * val is the operand returned by the unsafe.getObject routine.
-  // * src is the base object
-  // * tmp is a temp used to load the klass of src, and then reference type
-  // * thread is the thread object.
-
-  G1UnsafeGetObjSATBBarrierStub(LIR_Opr val, LIR_Opr src,
-                                LIR_Opr tmp, LIR_Opr thread,
-                                bool gen_src_check) :
-    _val(val), _src(src),
-    _tmp(tmp), _thread(thread),
-    _gen_src_check(gen_src_check)
-  {
-    assert(_val->is_register(), "should have already been loaded");
-    assert(_src->is_register(), "should have already been loaded");
-
-    assert(_tmp->is_register(), "should be a temporary register");
-  }
-
-  LIR_Opr val() const { return _val; }
-  LIR_Opr src() const { return _src; }
-
-  LIR_Opr tmp() const { return _tmp; }
-  LIR_Opr thread() const { return _thread; }
-
-  bool gen_src_check() const { return _gen_src_check; }
-
-  virtual void emit_code(LIR_Assembler* e);
-
-  virtual void visit(LIR_OpVisitState* visitor) {
-    visitor->do_slow_case();
-    visitor->do_input(_val);
-    visitor->do_input(_src);
-    visitor->do_input(_thread);
-
-    visitor->do_temp(_tmp);
-  }
-
-#ifndef PRODUCT
-  virtual void print_name(outputStream* out) const { out->print("G1UnsafeGetObjSATBBarrierStub"); }
-#endif // PRODUCT
-};
-
 class G1PostBarrierStub: public CodeStub {
  private:
   LIR_Opr _addr;
--- a/src/share/vm/c1/c1_GraphBuilder.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/c1/c1_GraphBuilder.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1646,10 +1646,6 @@
 
 
 void GraphBuilder::invoke(Bytecodes::Code code) {
-  const bool has_receiver =
-    code == Bytecodes::_invokespecial   ||
-    code == Bytecodes::_invokevirtual   ||
-    code == Bytecodes::_invokeinterface;
   const bool is_invokedynamic = (code == Bytecodes::_invokedynamic);
 
   bool will_link;
@@ -1690,8 +1686,12 @@
   // convert them directly to an invokespecial or invokestatic.
   if (target->is_loaded() && !target->is_abstract() && target->can_be_statically_bound()) {
     switch (bc_raw) {
-    case Bytecodes::_invokevirtual:  code = Bytecodes::_invokespecial;  break;
-    case Bytecodes::_invokehandle:   code = Bytecodes::_invokestatic;   break;
+    case Bytecodes::_invokevirtual:
+      code = Bytecodes::_invokespecial;
+      break;
+    case Bytecodes::_invokehandle:
+      code = target->is_static() ? Bytecodes::_invokestatic : Bytecodes::_invokespecial;
+      break;
     }
   }
 
@@ -1878,11 +1878,13 @@
   // inlining not successful => standard invoke
   bool is_loaded = target->is_loaded();
   ValueType* result_type = as_ValueType(target->return_type());
-
-  // We require the debug info to be the "state before" because
-  // invokedynamics may deoptimize.
-  ValueStack* state_before = is_invokedynamic ? copy_state_before() : copy_state_exhandling();
-
+  ValueStack* state_before = copy_state_exhandling();
+
+  // The bytecode (code) might change in this method so we are checking this very late.
+  const bool has_receiver =
+    code == Bytecodes::_invokespecial   ||
+    code == Bytecodes::_invokevirtual   ||
+    code == Bytecodes::_invokeinterface;
   Values* args = state()->pop_arguments(target->arg_size_no_receiver());
   Value recv = has_receiver ? apop() : NULL;
   int vtable_index = methodOopDesc::invalid_vtable_index;
@@ -3058,7 +3060,7 @@
 
   case vmIntrinsics::_Reference_get:
     {
-      if (UseG1GC) {
+      {
         // With java.lang.ref.reference.get() we must go through the
         // intrinsic - when G1 is enabled - even when get() is the root
         // method of the compile so that, if necessary, the value in
@@ -3070,6 +3072,9 @@
         // object removed from the list of discovered references during
         // reference processing.
 
+        // Also we need intrinsic to prevent commoning reads from this field
+        // across safepoint since GC can change its value.
+
         // Set up a stream so that appending instructions works properly.
         ciBytecodeStream s(scope->method());
         s.reset_to_bci(0);
@@ -3226,7 +3231,6 @@
 
 
 bool GraphBuilder::try_inline_intrinsics(ciMethod* callee) {
-  if (!InlineNatives           ) INLINE_BAILOUT("intrinsic method inlining disabled");
   if (callee->is_synchronized()) {
     // We don't currently support any synchronized intrinsics
     return false;
@@ -3234,9 +3238,13 @@
 
   // callee seems like a good candidate
   // determine id
+  vmIntrinsics::ID id = callee->intrinsic_id();
+  if (!InlineNatives && id != vmIntrinsics::_Reference_get) {
+    // InlineNatives does not control Reference.get
+    INLINE_BAILOUT("intrinsic method inlining disabled");
+  }
   bool preserves_state = false;
   bool cantrap = true;
-  vmIntrinsics::ID id = callee->intrinsic_id();
   switch (id) {
     case vmIntrinsics::_arraycopy:
       if (!InlineArrayCopy) return false;
@@ -3376,11 +3384,10 @@
       return true;
 
     case vmIntrinsics::_Reference_get:
-      // It is only when G1 is enabled that we absolutely
-      // need to use the intrinsic version of Reference.get()
-      // so that the value in the referent field, if necessary,
-      // can be registered by the pre-barrier code.
-      if (!UseG1GC) return false;
+      // Use the intrinsic version of Reference.get() so that the value in
+      // the referent field can be registered by the G1 pre-barrier code.
+      // Also to prevent commoning reads from this field across safepoint
+      // since GC can change its value.
       preserves_state = true;
       break;
 
--- a/src/share/vm/c1/c1_Instruction.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/c1/c1_Instruction.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -369,9 +369,6 @@
   _signature = new BasicTypeList(number_of_arguments() + (has_receiver() ? 1 : 0));
   if (has_receiver()) {
     _signature->append(as_BasicType(receiver()->type()));
-  } else if (is_invokedynamic()) {
-    // Add the synthetic MethodHandle argument to the signature.
-    _signature->append(T_OBJECT);
   }
   for (int i = 0; i < number_of_arguments(); i++) {
     ValueType* t = argument_at(i)->type();
--- a/src/share/vm/c1/c1_LIRAssembler.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/c1/c1_LIRAssembler.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -448,10 +448,10 @@
 
   switch (op->code()) {
   case lir_static_call:
+  case lir_dynamic_call:
     call(op, relocInfo::static_call_type);
     break;
   case lir_optvirtual_call:
-  case lir_dynamic_call:
     call(op, relocInfo::opt_virtual_call_type);
     break;
   case lir_icvirtual_call:
@@ -460,7 +460,9 @@
   case lir_virtual_call:
     vtable_call(op);
     break;
-  default: ShouldNotReachHere();
+  default:
+    fatal(err_msg_res("unexpected op code: %s", op->name()));
+    break;
   }
 
   // JSR 292
--- a/src/share/vm/c1/c1_LIRGenerator.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/c1/c1_LIRGenerator.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -920,7 +920,8 @@
 
 
 LIR_Opr LIRGenerator::force_to_spill(LIR_Opr value, BasicType t) {
-  assert(type2size[t] == type2size[value->type()], "size mismatch");
+  assert(type2size[t] == type2size[value->type()],
+         err_msg_res("size mismatch: t=%s, value->type()=%s", type2name(t), type2name(value->type())));
   if (!value->is_register()) {
     // force into a register
     LIR_Opr r = new_register(value->type());
@@ -2176,9 +2177,9 @@
   off.load_item();
   src.load_item();
 
-  LIR_Opr reg = rlock_result(x, x->basic_type());
-
-  get_Object_unsafe(reg, src.result(), off.result(), type, x->is_volatile());
+  LIR_Opr value = rlock_result(x, x->basic_type());
+
+  get_Object_unsafe(value, src.result(), off.result(), type, x->is_volatile());
 
 #ifndef SERIALGC
   // We might be reading the value of the referent field of a
@@ -2191,19 +2192,16 @@
   // if (offset == java_lang_ref_Reference::referent_offset) {
   //   if (src != NULL) {
   //     if (klass(src)->reference_type() != REF_NONE) {
-  //       pre_barrier(..., reg, ...);
+  //       pre_barrier(..., value, ...);
   //     }
   //   }
   // }
-  //
-  // The first non-constant check of either the offset or
-  // the src operand will be done here; the remainder
-  // will take place in the generated code stub.
 
   if (UseG1GC && type == T_OBJECT) {
-    bool gen_code_stub = true;       // Assume we need to generate the slow code stub.
-    bool gen_offset_check = true;       // Assume the code stub has to generate the offset guard.
-    bool gen_source_check = true;       // Assume the code stub has to check the src object for null.
+    bool gen_pre_barrier = true;     // Assume we need to generate pre_barrier.
+    bool gen_offset_check = true;    // Assume we need to generate the offset guard.
+    bool gen_source_check = true;    // Assume we need to check the src object for null.
+    bool gen_type_check = true;      // Assume we need to check the reference_type.
 
     if (off.is_constant()) {
       jlong off_con = (off.type()->is_int() ?
@@ -2215,7 +2213,7 @@
         // The constant offset is something other than referent_offset.
         // We can skip generating/checking the remaining guards and
         // skip generation of the code stub.
-        gen_code_stub = false;
+        gen_pre_barrier = false;
       } else {
         // The constant offset is the same as referent_offset -
         // we do not need to generate a runtime offset check.
@@ -2224,11 +2222,11 @@
     }
 
     // We don't need to generate stub if the source object is an array
-    if (gen_code_stub && src.type()->is_array()) {
-      gen_code_stub = false;
+    if (gen_pre_barrier && src.type()->is_array()) {
+      gen_pre_barrier = false;
     }
 
-    if (gen_code_stub) {
+    if (gen_pre_barrier) {
       // We still need to continue with the checks.
       if (src.is_constant()) {
         ciObject* src_con = src.get_jobject_constant();
@@ -2236,7 +2234,7 @@
         if (src_con->is_null_object()) {
           // The constant src object is null - We can skip
           // generating the code stub.
-          gen_code_stub = false;
+          gen_pre_barrier = false;
         } else {
           // Non-null constant source object. We still have to generate
           // the slow stub - but we don't need to generate the runtime
@@ -2245,20 +2243,28 @@
         }
       }
     }
-
-    if (gen_code_stub) {
-      // Temoraries.
-      LIR_Opr src_klass = new_register(T_OBJECT);
-
-      // Get the thread pointer for the pre-barrier
-      LIR_Opr thread = getThreadPointer();
-
-      CodeStub* stub;
+    if (gen_pre_barrier && !PatchALot) {
+      // Can the klass of object be statically determined to be
+      // a sub-class of Reference?
+      ciType* type = src.value()->declared_type();
+      if ((type != NULL) && type->is_loaded()) {
+        if (type->is_subtype_of(compilation()->env()->Reference_klass())) {
+          gen_type_check = false;
+        } else if (type->is_klass() &&
+                   !compilation()->env()->Object_klass()->is_subtype_of(type->as_klass())) {
+          // Not Reference and not Object klass.
+          gen_pre_barrier = false;
+        }
+      }
+    }
+
+    if (gen_pre_barrier) {
+      LabelObj* Lcont = new LabelObj();
 
       // We can have generate one runtime check here. Let's start with
       // the offset check.
       if (gen_offset_check) {
-        // if (offset == referent_offset) -> slow code stub
+        // if (offset != referent_offset) -> continue
         // If offset is an int then we can do the comparison with the
         // referent_offset constant; otherwise we need to move
         // referent_offset into a temporary register and generate
@@ -2273,43 +2279,36 @@
           referent_off = new_register(T_LONG);
           __ move(LIR_OprFact::longConst(java_lang_ref_Reference::referent_offset), referent_off);
         }
-
-        __ cmp(lir_cond_equal, off.result(), referent_off);
-
-        // Optionally generate "src == null" check.
-        stub = new G1UnsafeGetObjSATBBarrierStub(reg, src.result(),
-                                                    src_klass, thread,
-                                                    gen_source_check);
-
-        __ branch(lir_cond_equal, as_BasicType(off.type()), stub);
-      } else {
-        if (gen_source_check) {
-          // offset is a const and equals referent offset
-          // if (source != null) -> slow code stub
-          __ cmp(lir_cond_notEqual, src.result(), LIR_OprFact::oopConst(NULL));
-
-          // Since we are generating the "if src == null" guard here,
-          // there is no need to generate the "src == null" check again.
-          stub = new G1UnsafeGetObjSATBBarrierStub(reg, src.result(),
-                                                    src_klass, thread,
-                                                    false);
-
-          __ branch(lir_cond_notEqual, T_OBJECT, stub);
-        } else {
-          // We have statically determined that offset == referent_offset
-          // && src != null so we unconditionally branch to code stub
-          // to perform the guards and record reg in the SATB log buffer.
-
-          stub = new G1UnsafeGetObjSATBBarrierStub(reg, src.result(),
-                                                    src_klass, thread,
-                                                    false);
-
-          __ branch(lir_cond_always, T_ILLEGAL, stub);
-        }
+        __ cmp(lir_cond_notEqual, off.result(), referent_off);
+        __ branch(lir_cond_notEqual, as_BasicType(off.type()), Lcont->label());
+      }
+      if (gen_source_check) {
+        // offset is a const and equals referent offset
+        // if (source == null) -> continue
+        __ cmp(lir_cond_equal, src.result(), LIR_OprFact::oopConst(NULL));
+        __ branch(lir_cond_equal, T_OBJECT, Lcont->label());
       }
-
-      // Continuation point
-      __ branch_destination(stub->continuation());
+      LIR_Opr src_klass = new_register(T_OBJECT);
+      if (gen_type_check) {
+        // We have determined that offset == referent_offset && src != null.
+        // if (src->_klass->_reference_type == REF_NONE) -> continue
+        __ move(new LIR_Address(src.result(), oopDesc::klass_offset_in_bytes(), T_OBJECT), src_klass);
+        LIR_Address* reference_type_addr = new LIR_Address(src_klass, in_bytes(instanceKlass::reference_type_offset()), T_BYTE);
+        LIR_Opr reference_type = new_register(T_INT);
+        __ move(reference_type_addr, reference_type);
+        __ cmp(lir_cond_equal, reference_type, LIR_OprFact::intConst(REF_NONE));
+        __ branch(lir_cond_equal, T_INT, Lcont->label());
+      }
+      {
+        // We have determined that src->_klass->_reference_type != REF_NONE
+        // so register the value in the referent field with the pre-barrier.
+        pre_barrier(LIR_OprFact::illegalOpr /* addr_opr */,
+                    value  /* pre_val */,
+                    false  /* do_load */,
+                    false  /* patch */,
+                    NULL   /* info */);
+      }
+      __ branch_destination(Lcont->label());
     }
   }
 #endif // SERIALGC
@@ -2664,8 +2663,9 @@
 
 
 void LIRGenerator::invoke_load_arguments(Invoke* x, LIRItemList* args, const LIR_OprList* arg_list) {
-  int i = (x->has_receiver() || x->is_invokedynamic()) ? 1 : 0;
-  for (; i < args->length(); i++) {
+  assert(args->length() == arg_list->length(),
+         err_msg_res("args=%d, arg_list=%d", args->length(), arg_list->length()));
+  for (int i = x->has_receiver() ? 1 : 0; i < args->length(); i++) {
     LIRItem* param = args->at(i);
     LIR_Opr loc = arg_list->at(i);
     if (loc->is_register()) {
@@ -2705,15 +2705,9 @@
     LIRItem* receiver = new LIRItem(x->receiver(), this);
     argument_items->append(receiver);
   }
-  if (x->is_invokedynamic()) {
-    // Insert a dummy for the synthetic MethodHandle argument.
-    argument_items->append(NULL);
-  }
-  int idx = x->has_receiver() ? 1 : 0;
   for (int i = 0; i < x->number_of_arguments(); i++) {
     LIRItem* param = new LIRItem(x->argument_at(i), this);
     argument_items->append(param);
-    idx += (param->type()->is_double_word() ? 2 : 1);
   }
   return argument_items;
 }
@@ -2758,9 +2752,6 @@
 
   CodeEmitInfo* info = state_for(x, x->state());
 
-  // invokedynamics can deoptimize.
-  CodeEmitInfo* deopt_info = x->is_invokedynamic() ? state_for(x, x->state_before()) : NULL;
-
   invoke_load_arguments(x, args, arg_list);
 
   if (x->has_receiver()) {
@@ -2809,41 +2800,8 @@
       }
       break;
     case Bytecodes::_invokedynamic: {
-      ciBytecodeStream bcs(x->scope()->method());
-      bcs.force_bci(x->state()->bci());
-      assert(bcs.cur_bc() == Bytecodes::_invokedynamic, "wrong stream");
-      ciCPCache* cpcache = bcs.get_cpcache();
-
-      // Get CallSite offset from constant pool cache pointer.
-      int index = bcs.get_method_index();
-      size_t call_site_offset = cpcache->get_f1_offset(index);
-
-      // Load CallSite object from constant pool cache.
-      LIR_Opr call_site = new_register(objectType);
-      __ oop2reg(cpcache->constant_encoding(), call_site);
-      __ move_wide(new LIR_Address(call_site, call_site_offset, T_OBJECT), call_site);
-
-      // If this invokedynamic call site hasn't been executed yet in
-      // the interpreter, the CallSite object in the constant pool
-      // cache is still null and we need to deoptimize.
-      if (cpcache->is_f1_null_at(index)) {
-        // Only deoptimize if the CallSite object is still null; we don't
-        // recompile methods in C1 after deoptimization so this call site
-        // might be resolved the next time we execute it after OSR.
-        DeoptimizeStub* deopt_stub = new DeoptimizeStub(deopt_info);
-        __ cmp(lir_cond_equal, call_site, LIR_OprFact::oopConst(NULL));
-        __ branch(lir_cond_equal, T_OBJECT, deopt_stub);
-      }
-
-      // Use the receiver register for the synthetic MethodHandle
-      // argument.
-      receiver = LIR_Assembler::receiverOpr();
-
-      // Load target MethodHandle from CallSite object.
-      __ load(new LIR_Address(call_site, java_lang_invoke_CallSite::target_offset_in_bytes(), T_OBJECT), receiver);
-
       __ call_dynamic(target, receiver, result_register,
-                      SharedRuntime::get_resolve_opt_virtual_call_stub(),
+                      SharedRuntime::get_resolve_static_call_stub(),
                       arg_list, info);
       break;
     }
--- a/src/share/vm/c1/c1_ValueMap.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/c1/c1_ValueMap.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -190,7 +190,7 @@
   LoadField* lf = value->as_LoadField();                                                 \
   bool must_kill = lf != NULL                                                            \
                    && lf->field()->holder() == field->holder()                           \
-                   && lf->field()->offset() == field->offset();
+                   && (all_offsets || lf->field()->offset() == field->offset());
 
 #define MUST_KILL_EXCEPTION(must_kill, entry, value)                                     \
   assert(entry->nesting() < nesting(), "must not find bigger nesting than current");     \
@@ -205,7 +205,7 @@
   GENERIC_KILL_VALUE(MUST_KILL_ARRAY);
 }
 
-void ValueMap::kill_field(ciField* field) {
+void ValueMap::kill_field(ciField* field, bool all_offsets) {
   GENERIC_KILL_VALUE(MUST_KILL_FIELD);
 }
 
@@ -280,9 +280,9 @@
   ValueMap* value_map_of(BlockBegin* block)      { return _gvn->value_map_of(block); }
 
   // implementation for abstract methods of ValueNumberingVisitor
-  void      kill_memory()                        { _too_complicated_loop = true; }
-  void      kill_field(ciField* field)           { current_map()->kill_field(field); };
-  void      kill_array(ValueType* type)          { current_map()->kill_array(type); };
+  void      kill_memory()                                 { _too_complicated_loop = true; }
+  void      kill_field(ciField* field, bool all_offsets)  { current_map()->kill_field(field, all_offsets); };
+  void      kill_array(ValueType* type)                   { current_map()->kill_array(type); };
 
  public:
   ShortLoopOptimizer(GlobalValueNumbering* gvn)
--- a/src/share/vm/c1/c1_ValueMap.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/c1/c1_ValueMap.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -114,7 +114,7 @@
   Value find_insert(Value x);
 
   void kill_memory();
-  void kill_field(ciField* field);
+  void kill_field(ciField* field, bool all_offsets);
   void kill_array(ValueType* type);
   void kill_exception();
   void kill_map(ValueMap* map);
@@ -136,7 +136,7 @@
  protected:
   // called by visitor functions for instructions that kill values
   virtual void kill_memory() = 0;
-  virtual void kill_field(ciField* field) = 0;
+  virtual void kill_field(ciField* field, bool all_offsets) = 0;
   virtual void kill_array(ValueType* type) = 0;
 
   // visitor functions
@@ -148,7 +148,7 @@
         x->field()->is_volatile()) {
       kill_memory();
     } else {
-      kill_field(x->field());
+      kill_field(x->field(), x->needs_patching());
     }
   }
   void do_StoreIndexed   (StoreIndexed*    x) { kill_array(x->type()); }
@@ -214,9 +214,9 @@
 
  public:
   // implementation for abstract methods of ValueNumberingVisitor
-  void          kill_memory()                    { _map->kill_memory(); }
-  void          kill_field(ciField* field)       { _map->kill_field(field); }
-  void          kill_array(ValueType* type)      { _map->kill_array(type); }
+  void          kill_memory()                                 { _map->kill_memory(); }
+  void          kill_field(ciField* field, bool all_offsets)  { _map->kill_field(field, all_offsets); }
+  void          kill_array(ValueType* type)                   { _map->kill_array(type); }
 
   ValueNumberingEffects(ValueMap* map): _map(map) {}
 };
@@ -234,9 +234,9 @@
   void          set_value_map_of(BlockBegin* block, ValueMap* map)   { assert(value_map_of(block) == NULL, ""); _value_maps.at_put(block->linear_scan_number(), map); }
 
   // implementation for abstract methods of ValueNumberingVisitor
-  void          kill_memory()                    { current_map()->kill_memory(); }
-  void          kill_field(ciField* field)       { current_map()->kill_field(field); }
-  void          kill_array(ValueType* type)      { current_map()->kill_array(type); }
+  void          kill_memory()                                 { current_map()->kill_memory(); }
+  void          kill_field(ciField* field, bool all_offsets)  { current_map()->kill_field(field, all_offsets); }
+  void          kill_array(ValueType* type)                   { current_map()->kill_array(type); }
 
   // main entry point that performs global value numbering
   GlobalValueNumbering(IR* ir);
--- a/src/share/vm/classfile/stackMapFrame.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/classfile/stackMapFrame.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -32,9 +32,9 @@
 #include "utilities/globalDefinitions.hpp"
 
 StackMapFrame::StackMapFrame(u2 max_locals, u2 max_stack, ClassVerifier* v) :
-                      _offset(0), _locals_size(0), _stack_size(0), _flags(0),
-                      _max_locals(max_locals), _max_stack(max_stack),
-                      _verifier(v) {
+                      _offset(0), _locals_size(0), _stack_size(0),
+                      _stack_mark(0), _flags(0), _max_locals(max_locals),
+                      _max_stack(max_stack), _verifier(v) {
   Thread* thr = v->thread();
   _locals = NEW_RESOURCE_ARRAY_IN_THREAD(thr, VerificationType, max_locals);
   _stack = NEW_RESOURCE_ARRAY_IN_THREAD(thr, VerificationType, max_stack);
@@ -157,17 +157,17 @@
   }
 }
 
-
-bool StackMapFrame::is_assignable_to(
+// Returns the location of the first mismatch, or 'len' if there are no
+// mismatches
+int StackMapFrame::is_assignable_to(
     VerificationType* from, VerificationType* to, int32_t len, TRAPS) const {
-  for (int32_t i = 0; i < len; i++) {
-    bool subtype = to[i].is_assignable_from(
-      from[i], verifier(), THREAD);
-    if (!subtype) {
-      return false;
+  int32_t i = 0;
+  for (i = 0; i < len; i++) {
+    if (!to[i].is_assignable_from(from[i], verifier(), THREAD)) {
+      break;
     }
   }
-  return true;
+  return i;
 }
 
 bool StackMapFrame::has_flag_match_exception(
@@ -209,50 +209,84 @@
 }
 
 bool StackMapFrame::is_assignable_to(
-    const StackMapFrame* target, bool is_exception_handler, TRAPS) const {
-  if (_max_locals != target->max_locals() ||
-      _stack_size != target->stack_size()) {
+    const StackMapFrame* target, bool is_exception_handler,
+    ErrorContext* ctx, TRAPS) const {
+  if (_max_locals != target->max_locals()) {
+    *ctx = ErrorContext::locals_size_mismatch(
+        _offset, (StackMapFrame*)this, (StackMapFrame*)target);
+    return false;
+  }
+  if (_stack_size != target->stack_size()) {
+    *ctx = ErrorContext::stack_size_mismatch(
+        _offset, (StackMapFrame*)this, (StackMapFrame*)target);
     return false;
   }
   // Only need to compare type elements up to target->locals() or target->stack().
   // The remaining type elements in this state can be ignored because they are
   // assignable to bogus type.
-  bool match_locals = is_assignable_to(
-    _locals, target->locals(), target->locals_size(), CHECK_false);
-  bool match_stack = is_assignable_to(
-    _stack, target->stack(), _stack_size, CHECK_false);
+  int mismatch_loc;
+  mismatch_loc = is_assignable_to(
+    _locals, target->locals(), target->locals_size(), THREAD);
+  if (mismatch_loc != target->locals_size()) {
+    *ctx = ErrorContext::bad_type(target->offset(),
+        TypeOrigin::local(mismatch_loc, (StackMapFrame*)this),
+        TypeOrigin::sm_local(mismatch_loc, (StackMapFrame*)target));
+    return false;
+  }
+  mismatch_loc = is_assignable_to(_stack, target->stack(), _stack_size, THREAD);
+  if (mismatch_loc != _stack_size) {
+    *ctx = ErrorContext::bad_type(target->offset(),
+        TypeOrigin::stack(mismatch_loc, (StackMapFrame*)this),
+        TypeOrigin::sm_stack(mismatch_loc, (StackMapFrame*)target));
+    return false;
+  }
+
   bool match_flags = (_flags | target->flags()) == target->flags();
-
-  return match_locals && match_stack &&
-    (match_flags || (is_exception_handler && has_flag_match_exception(target)));
+  if (match_flags || is_exception_handler && has_flag_match_exception(target)) {
+    return true;
+  } else {
+    *ctx = ErrorContext::bad_flags(target->offset(),
+        (StackMapFrame*)this, (StackMapFrame*)target);
+    return false;
+  }
 }
 
 VerificationType StackMapFrame::pop_stack_ex(VerificationType type, TRAPS) {
   if (_stack_size <= 0) {
-    verifier()->verify_error(_offset, "Operand stack underflow");
+    verifier()->verify_error(
+        ErrorContext::stack_underflow(_offset, this),
+        "Operand stack underflow");
     return VerificationType::bogus_type();
   }
   VerificationType top = _stack[--_stack_size];
   bool subtype = type.is_assignable_from(
     top, verifier(), CHECK_(VerificationType::bogus_type()));
   if (!subtype) {
-    verifier()->verify_error(_offset, "Bad type on operand stack");
+    verifier()->verify_error(
+        ErrorContext::bad_type(_offset, stack_top_ctx(),
+            TypeOrigin::implicit(type)),
+        "Bad type on operand stack");
     return VerificationType::bogus_type();
   }
-  NOT_PRODUCT( _stack[_stack_size] = VerificationType::bogus_type(); )
   return top;
 }
 
 VerificationType StackMapFrame::get_local(
     int32_t index, VerificationType type, TRAPS) {
   if (index >= _max_locals) {
-    verifier()->verify_error(_offset, "Local variable table overflow");
+    verifier()->verify_error(
+        ErrorContext::bad_local_index(_offset, index),
+        "Local variable table overflow");
     return VerificationType::bogus_type();
   }
   bool subtype = type.is_assignable_from(_locals[index],
     verifier(), CHECK_(VerificationType::bogus_type()));
   if (!subtype) {
-    verifier()->verify_error(_offset, "Bad local variable type");
+    verifier()->verify_error(
+        ErrorContext::bad_type(_offset,
+          TypeOrigin::local(index, this),
+          TypeOrigin::implicit(type)),
+        "Bad local variable type");
     return VerificationType::bogus_type();
   }
   if(index >= _locals_size) { _locals_size = index + 1; }
@@ -264,23 +298,37 @@
   assert(type1.is_long() || type1.is_double(), "must be long/double");
   assert(type2.is_long2() || type2.is_double2(), "must be long/double_2");
   if (index >= _locals_size - 1) {
-    verifier()->verify_error(_offset, "get long/double overflows locals");
+    verifier()->verify_error(
+        ErrorContext::bad_local_index(_offset, index),
+        "get long/double overflows locals");
     return;
   }
-  bool subtype1 = type1.is_assignable_from(
-    _locals[index], verifier(), CHECK);
-  bool subtype2 = type2.is_assignable_from(
-    _locals[index+1], verifier(), CHECK);
-  if (!subtype1 || !subtype2) {
-    verifier()->verify_error(_offset, "Bad local variable type");
-    return;
+  bool subtype = type1.is_assignable_from(_locals[index], verifier(), CHECK);
+  if (!subtype) {
+    verifier()->verify_error(
+        ErrorContext::bad_type(_offset,
+            TypeOrigin::local(index, this), TypeOrigin::implicit(type1)),
+        "Bad local variable type");
+  } else {
+    subtype = type2.is_assignable_from(_locals[index + 1], verifier(), CHECK);
+    if (!subtype) {
+      /* Unreachable? All local store routines convert a split long or double
+       * into a TOP during the store.  So we should never end up seeing an
+       * orphaned half.  */
+      verifier()->verify_error(
+          ErrorContext::bad_type(_offset,
+              TypeOrigin::local(index + 1, this), TypeOrigin::implicit(type2)),
+          "Bad local variable type");
+    }
   }
 }
 
 void StackMapFrame::set_local(int32_t index, VerificationType type, TRAPS) {
   assert(!type.is_check(), "Must be a real type");
   if (index >= _max_locals) {
-    verifier()->verify_error("Local variable table overflow", _offset);
+    verifier()->verify_error(
+        ErrorContext::bad_local_index(_offset, index),
+        "Local variable table overflow");
     return;
   }
   // If type at index is double or long, set the next location to be unusable
@@ -310,7 +358,9 @@
   assert(type1.is_long() || type1.is_double(), "must be long/double");
   assert(type2.is_long2() || type2.is_double2(), "must be long/double_2");
   if (index >= _max_locals - 1) {
-    verifier()->verify_error("Local variable table overflow", _offset);
+    verifier()->verify_error(
+        ErrorContext::bad_local_index(_offset, index),
+        "Local variable table overflow");
     return;
   }
   // If type at index+1 is double or long, set the next location to be unusable
@@ -336,21 +386,30 @@
   }
 }
 
-#ifndef PRODUCT
-
-void StackMapFrame::print() const {
-  tty->print_cr("stackmap_frame[%d]:", _offset);
-  tty->print_cr("flags = 0x%x", _flags);
-  tty->print("locals[%d] = { ", _locals_size);
-  for (int32_t i = 0; i < _locals_size; i++) {
-    _locals[i].print_on(tty);
-  }
-  tty->print_cr(" }");
-  tty->print("stack[%d] = { ", _stack_size);
-  for (int32_t j = 0; j < _stack_size; j++) {
-    _stack[j].print_on(tty);
-  }
-  tty->print_cr(" }");
+TypeOrigin StackMapFrame::stack_top_ctx() {
+  return TypeOrigin::stack(_stack_size, this);
 }
 
-#endif
+void StackMapFrame::print_on(outputStream* str) const {
+  str->indent().print_cr("bci: @%d", _offset);
+  str->indent().print_cr("flags: {%s }",
+      flag_this_uninit() ? " flagThisUninit" : "");
+  str->indent().print("locals: {");
+  for (int32_t i = 0; i < _locals_size; ++i) {
+    str->print(" ");
+    _locals[i].print_on(str);
+    if (i != _locals_size - 1) {
+      str->print(",");
+    }
+  }
+  str->print_cr(" }");
+  str->indent().print("stack: {");
+  for (int32_t j = 0; j < _stack_size; ++j) {
+    str->print(" ");
+    _stack[j].print_on(str);
+    if (j != _stack_size - 1) {
+      str->print(",");
+    }
+  }
+  str->print_cr(" }");
+}
--- a/src/share/vm/classfile/stackMapFrame.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/classfile/stackMapFrame.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,6 +34,8 @@
 
 // A StackMapFrame represents one frame in the stack map attribute.
 
+class TypeContext;
+
 enum {
   FLAG_THIS_UNINIT = 0x01
 };
@@ -47,6 +49,10 @@
   int32_t _locals_size;  // number of valid type elements in _locals
   int32_t _stack_size;   // number of valid type elements in _stack
 
+  int32_t _stack_mark;   // Records the size of the stack prior to an
+                         // instruction modification, to allow rewinding
+                         // when/if an error occurs.
+
   int32_t _max_locals;
   int32_t _max_stack;
 
@@ -56,6 +62,31 @@
 
   ClassVerifier* _verifier;  // the verifier verifying this method
 
+  StackMapFrame(const StackMapFrame& cp) :
+      _offset(cp._offset), _locals_size(cp._locals_size),
+      _stack_size(cp._stack_size), _stack_mark(cp._stack_mark),
+      _max_locals(cp._max_locals), _max_stack(cp._max_stack),
+      _flags(cp._flags) {
+    _locals = NEW_RESOURCE_ARRAY(VerificationType, _max_locals);
+    for (int i = 0; i < _max_locals; ++i) {
+      if (i < _locals_size) {
+        _locals[i] = cp._locals[i];
+      } else {
+        _locals[i] = VerificationType::bogus_type();
+      }
+    }
+    int ss = MAX2(_stack_size, _stack_mark);
+    _stack = NEW_RESOURCE_ARRAY(VerificationType, _max_stack);
+    for (int i = 0; i < _max_stack; ++i) {
+      if (i < ss) {
+        _stack[i] = cp._stack[i];
+      } else {
+        _stack[i] = VerificationType::bogus_type();
+      }
+    }
+    _verifier = NULL;
+  }
+
  public:
   // constructors
 
@@ -77,16 +108,21 @@
                 ClassVerifier* v) : _offset(offset), _flags(flags),
                                     _locals_size(locals_size),
                                     _stack_size(stack_size),
+                                    _stack_mark(-1),
                                     _max_locals(max_locals),
                                     _max_stack(max_stack),
                                     _locals(locals), _stack(stack),
                                     _verifier(v) { }
 
+  static StackMapFrame* copy(StackMapFrame* smf) {
+    return new StackMapFrame(*smf);
+  }
+
   inline void set_offset(int32_t offset)      { _offset = offset; }
   inline void set_verifier(ClassVerifier* v)  { _verifier = v; }
   inline void set_flags(u1 flags)             { _flags = flags; }
   inline void set_locals_size(u2 locals_size) { _locals_size = locals_size; }
-  inline void set_stack_size(u2 stack_size)   { _stack_size = stack_size; }
+  inline void set_stack_size(u2 stack_size)   { _stack_size = _stack_mark = stack_size; }
   inline void clear_stack()                   { _stack_size = 0; }
   inline int32_t offset()   const             { return _offset; }
   inline ClassVerifier* verifier() const      { return _verifier; }
@@ -134,14 +170,37 @@
   void copy_stack(const StackMapFrame* src);
 
   // Return true if this stack map frame is assignable to target.
-  bool is_assignable_to(const StackMapFrame* target,
-                        bool is_exception_handler, TRAPS) const;
+  bool is_assignable_to(
+      const StackMapFrame* target, bool is_exception_handler,
+      ErrorContext* ctx, TRAPS) const;
+
+  inline void set_mark() {
+#ifdef DEBUG
+    // Put bogus type to indicate it's no longer valid.
+    if (_stack_mark != -1) {
+      for (int i = _stack_mark; i >= _stack_size; --i) {
+        _stack[i] = VerificationType::bogus_type();
+      }
+    }
+#endif // def DEBUG
+    _stack_mark = _stack_size;
+  }
+
+  // Used when an error occurs and we want to reset the stack to the state
+  // it was before operands were popped off.
+  void restore() {
+    if (_stack_mark != -1) {
+      _stack_size = _stack_mark;
+    }
+  }
 
   // Push type into stack type array.
   inline void push_stack(VerificationType type, TRAPS) {
     assert(!type.is_check(), "Must be a real type");
     if (_stack_size >= _max_stack) {
-      verifier()->verify_error(_offset, "Operand stack overflow");
+      verifier()->verify_error(
+          ErrorContext::stack_overflow(_offset, this),
+          "Operand stack overflow");
       return;
     }
     _stack[_stack_size++] = type;
@@ -152,7 +211,9 @@
     assert(type1.is_long() || type1.is_double(), "must be long/double");
     assert(type2.is_long2() || type2.is_double2(), "must be long/double_2");
     if (_stack_size >= _max_stack - 1) {
-      verifier()->verify_error(_offset, "Operand stack overflow");
+      verifier()->verify_error(
+          ErrorContext::stack_overflow(_offset, this),
+          "Operand stack overflow");
       return;
     }
     _stack[_stack_size++] = type1;
@@ -162,13 +223,12 @@
   // Pop and return the top type on stack without verifying.
   inline VerificationType pop_stack(TRAPS) {
     if (_stack_size <= 0) {
-      verifier()->verify_error(_offset, "Operand stack underflow");
+      verifier()->verify_error(
+          ErrorContext::stack_underflow(_offset, this),
+          "Operand stack underflow");
       return VerificationType::bogus_type();
     }
-    // Put bogus type to indicate it's no longer valid.
-    // Added to make it consistent with the other pop_stack method.
     VerificationType top = _stack[--_stack_size];
-    NOT_PRODUCT( _stack[_stack_size] = VerificationType::bogus_type(); )
     return top;
   }
 
@@ -180,8 +240,7 @@
       bool subtype = type.is_assignable_from(
         top, verifier(), CHECK_(VerificationType::bogus_type()));
       if (subtype) {
-        _stack_size --;
-        NOT_PRODUCT( _stack[_stack_size] = VerificationType::bogus_type(); )
+        --_stack_size;
         return top;
       }
     }
@@ -199,8 +258,6 @@
       bool subtype2 = type2.is_assignable_from(top2, verifier(), CHECK);
       if (subtype1 && subtype2) {
         _stack_size -= 2;
-        NOT_PRODUCT( _stack[_stack_size] = VerificationType::bogus_type(); )
-        NOT_PRODUCT( _stack[_stack_size+1] = VerificationType::bogus_type(); )
         return;
       }
     }
@@ -208,6 +265,14 @@
     pop_stack_ex(type2, THREAD);
   }
 
+  VerificationType local_at(int index) {
+    return _locals[index];
+  }
+
+  VerificationType stack_at(int index) {
+    return _stack[index];
+  }
+
   // Uncommon case that throws exceptions.
   VerificationType pop_stack_ex(VerificationType type, TRAPS);
 
@@ -226,13 +291,14 @@
 
   // Private auxiliary method used only in is_assignable_to(StackMapFrame).
   // Returns true if src is assignable to target.
-  bool is_assignable_to(
+  int is_assignable_to(
     VerificationType* src, VerificationType* target, int32_t len, TRAPS) const;
 
   bool has_flag_match_exception(const StackMapFrame* target) const;
 
-  // Debugging
-  void print() const PRODUCT_RETURN;
+  TypeOrigin stack_top_ctx();
+
+  void print_on(outputStream* str) const;
 };
 
 #endif // SHARE_VM_CLASSFILE_STACKMAPFRAME_HPP
--- a/src/share/vm/classfile/stackMapTable.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/classfile/stackMapTable.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -46,7 +46,9 @@
       _frame_array[i] = frame;
       int offset = frame->offset();
       if (offset >= code_len || code_data[offset] == 0) {
-        frame->verifier()->verify_error("StackMapTable error: bad offset");
+        frame->verifier()->verify_error(
+            ErrorContext::bad_stackmap(i, frame),
+            "StackMapTable error: bad offset");
         return;
       }
       pre_frame = frame;
@@ -68,12 +70,9 @@
 
 bool StackMapTable::match_stackmap(
     StackMapFrame* frame, int32_t target,
-    bool match, bool update, TRAPS) const {
+    bool match, bool update, ErrorContext* ctx, TRAPS) const {
   int index = get_index_from_offset(target);
-
-  return match_stackmap(
-    frame, target, index, match,
-    update, CHECK_VERIFY_(frame->verifier(), false));
+  return match_stackmap(frame, target, index, match, update, ctx, THREAD);
 }
 
 // Match and/or update current_frame to the frame in stackmap table with
@@ -88,23 +87,23 @@
 // unconditional branch:                                 true   true
 bool StackMapTable::match_stackmap(
     StackMapFrame* frame, int32_t target, int32_t frame_index,
-    bool match, bool update, TRAPS) const {
+    bool match, bool update, ErrorContext* ctx, TRAPS) const {
   if (frame_index < 0 || frame_index >= _frame_count) {
-    frame->verifier()->verify_error(frame->offset(),
-      "Expecting a stackmap frame at branch target %d", target);
+    *ctx = ErrorContext::missing_stackmap(frame->offset());
+    frame->verifier()->verify_error(
+        *ctx, "Expecting a stackmap frame at branch target %d", target);
     return false;
   }
 
+  StackMapFrame *stackmap_frame = _frame_array[frame_index];
   bool result = true;
-  StackMapFrame *stackmap_frame = _frame_array[frame_index];
   if (match) {
     // when checking handler target, match == true && update == false
     bool is_exception_handler = !update;
     // Has direct control flow from last instruction, need to match the two
     // frames.
-    result = frame->is_assignable_to(
-      stackmap_frame, is_exception_handler,
-      CHECK_VERIFY_(frame->verifier(), false));
+    result = frame->is_assignable_to(stackmap_frame, is_exception_handler,
+        ctx, CHECK_VERIFY_(frame->verifier(), result));
   }
   if (update) {
     // Use the frame in stackmap table as current frame
@@ -125,11 +124,12 @@
 
 void StackMapTable::check_jump_target(
     StackMapFrame* frame, int32_t target, TRAPS) const {
+  ErrorContext ctx;
   bool match = match_stackmap(
-    frame, target, true, false, CHECK_VERIFY(frame->verifier()));
+    frame, target, true, false, &ctx, CHECK_VERIFY(frame->verifier()));
   if (!match || (target < 0 || target >= _code_length)) {
-    frame->verifier()->verify_error(frame->offset(),
-      "Inconsistent stackmap frames at branch target %d", target);
+    frame->verifier()->verify_error(ctx,
+        "Inconsistent stackmap frames at branch target %d", target);
     return;
   }
   // check if uninitialized objects exist on backward branches
@@ -139,25 +139,25 @@
 void StackMapTable::check_new_object(
     const StackMapFrame* frame, int32_t target, TRAPS) const {
   if (frame->offset() > target && frame->has_new_object()) {
-    frame->verifier()->verify_error(frame->offset(),
-      "Uninitialized object exists on backward branch %d", target);
+    frame->verifier()->verify_error(
+        ErrorContext::bad_code(frame->offset()),
+        "Uninitialized object exists on backward branch %d", target);
     return;
   }
 }
 
-#ifndef PRODUCT
-
-void StackMapTable::print() const {
-  tty->print_cr("StackMapTable: frame_count = %d", _frame_count);
-  tty->print_cr("table = { ");
-  for (int32_t i = 0; i < _frame_count; i++) {
-    _frame_array[i]->print();
+void StackMapTable::print_on(outputStream* str) const {
+  str->indent().print_cr("StackMapTable: frame_count = %d", _frame_count);
+  str->indent().print_cr("table = { ");
+  {
+    streamIndentor si(str);
+    for (int32_t i = 0; i < _frame_count; ++i) {
+      _frame_array[i]->print_on(str);
+    }
   }
-  tty->print_cr(" }");
+  str->print_cr(" }");
 }
 
-#endif
-
 int32_t StackMapReader::chop(
     VerificationType* locals, int32_t length, int32_t chops) {
   if (locals == NULL) return -1;
--- a/src/share/vm/classfile/stackMapTable.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/classfile/stackMapTable.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -26,6 +26,7 @@
 #define SHARE_VM_CLASSFILE_STACKMAPTABLE_HPP
 
 #include "classfile/stackMapFrame.hpp"
+#include "classfile/verifier.hpp"
 #include "memory/allocation.hpp"
 #include "oops/constantPoolOop.hpp"
 #include "oops/methodOop.hpp"
@@ -73,12 +74,12 @@
   // specified offset. Return true if the two frames match.
   bool match_stackmap(
     StackMapFrame* current_frame, int32_t offset,
-    bool match, bool update, TRAPS) const;
+    bool match, bool update, ErrorContext* ctx, TRAPS) const;
   // Match and/or update current_frame to the frame in stackmap table with
   // specified offset and frame index. Return true if the two frames match.
   bool match_stackmap(
     StackMapFrame* current_frame, int32_t offset, int32_t frame_index,
-    bool match, bool update, TRAPS) const;
+    bool match, bool update, ErrorContext* ctx, TRAPS) const;
 
   // Check jump instructions. Make sure there are no uninitialized
   // instances on backward branch.
@@ -93,8 +94,7 @@
   void check_new_object(
     const StackMapFrame* frame, int32_t target, TRAPS) const;
 
-  // Debugging
-  void print() const PRODUCT_RETURN;
+  void print_on(outputStream* str) const;
 };
 
 class StackMapStream : StackObj {
--- a/src/share/vm/classfile/stackMapTableFormat.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/classfile/stackMapTableFormat.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -135,7 +135,6 @@
                 !is_object() && !is_uninitialized()));
   }
 
-#ifdef ASSERT
   void print_on(outputStream* st) {
     switch (tag()) {
       case ITEM_Top: st->print("Top"); break;
@@ -154,14 +153,13 @@
         assert(false, "Bad verification_type_info");
     }
   }
-#endif
 };
 
 #define FOR_EACH_STACKMAP_FRAME_TYPE(macro, arg1, arg2) \
   macro(same_frame, arg1, arg2) \
   macro(same_frame_extended, arg1, arg2) \
-  macro(same_frame_1_stack_item_frame, arg1, arg2) \
-  macro(same_frame_1_stack_item_extended, arg1, arg2) \
+  macro(same_locals_1_stack_item_frame, arg1, arg2) \
+  macro(same_locals_1_stack_item_extended, arg1, arg2) \
   macro(chop_frame, arg1, arg2) \
   macro(append_frame, arg1, arg2) \
   macro(full_frame, arg1, arg2)
@@ -203,9 +201,8 @@
   // that we don't read past a particular memory limit.  It returns false
   // if any part of the data structure is outside the specified memory bounds.
   inline bool verify(address start, address end) const;
-#ifdef ASSERT
-  inline void print_on(outputStream* st) const;
-#endif
+
+  inline void print_on(outputStream* st, int current_offset) const;
 
   // Create as_xxx and is_xxx methods for the subtypes
 #define FRAME_TYPE_DECL(stackmap_frame_type, arg1, arg2) \
@@ -263,11 +260,9 @@
     return true;
   }
 
-#ifdef ASSERT
-  void print_on(outputStream* st) const {
-    st->print("same_frame(%d)", offset_delta());
+  void print_on(outputStream* st, int current_offset = -1) const {
+    st->print("same_frame(@%d)", offset_delta() + current_offset);
   }
-#endif
 };
 
 class same_frame_extended : public stack_map_frame {
@@ -311,14 +306,12 @@
     return frame_type_addr() + size() <= end;
   }
 
-#ifdef ASSERT
-  void print_on(outputStream* st) const {
-    st->print("same_frame_extended(%d)", offset_delta());
+  void print_on(outputStream* st, int current_offset = -1) const {
+    st->print("same_frame_extended(@%d)", offset_delta() + current_offset);
   }
-#endif
 };
 
-class same_frame_1_stack_item_frame : public stack_map_frame {
+class same_locals_1_stack_item_frame : public stack_map_frame {
  private:
   address type_addr() const { return frame_type_addr() + sizeof(u1); }
 
@@ -332,14 +325,14 @@
     return tag >= 64 && tag < 128;
   }
 
-  static same_frame_1_stack_item_frame* at(address addr) {
+  static same_locals_1_stack_item_frame* at(address addr) {
     assert(is_frame_type(*addr), "Wrong frame id");
-    return (same_frame_1_stack_item_frame*)addr;
+    return (same_locals_1_stack_item_frame*)addr;
   }
 
-  static same_frame_1_stack_item_frame* create_at(
+  static same_locals_1_stack_item_frame* create_at(
       address addr, int offset_delta, verification_type_info* vti) {
-    same_frame_1_stack_item_frame* sm = (same_frame_1_stack_item_frame*)addr;
+    same_locals_1_stack_item_frame* sm = (same_locals_1_stack_item_frame*)addr;
     sm->set_offset_delta(offset_delta);
     if (vti != NULL) {
       sm->set_type(vti);
@@ -382,16 +375,15 @@
     return types()->verify(start, end);
   }
 
-#ifdef ASSERT
-  void print_on(outputStream* st) const {
-    st->print("same_frame_1_stack_item_frame(%d,", offset_delta());
+  void print_on(outputStream* st, int current_offset = -1) const {
+    st->print("same_locals_1_stack_item_frame(@%d,",
+        offset_delta() + current_offset);
     types()->print_on(st);
     st->print(")");
   }
-#endif
 };
 
-class same_frame_1_stack_item_extended : public stack_map_frame {
+class same_locals_1_stack_item_extended : public stack_map_frame {
  private:
   address offset_delta_addr() const { return frame_type_addr() + sizeof(u1); }
   address type_addr() const { return offset_delta_addr() + sizeof(u2); }
@@ -403,15 +395,15 @@
     return tag == _frame_id;
   }
 
-  static same_frame_1_stack_item_extended* at(address addr) {
+  static same_locals_1_stack_item_extended* at(address addr) {
     assert(is_frame_type(*addr), "Wrong frame id");
-    return (same_frame_1_stack_item_extended*)addr;
+    return (same_locals_1_stack_item_extended*)addr;
   }
 
-  static same_frame_1_stack_item_extended* create_at(
+  static same_locals_1_stack_item_extended* create_at(
       address addr, int offset_delta, verification_type_info* vti) {
-    same_frame_1_stack_item_extended* sm =
-       (same_frame_1_stack_item_extended*)addr;
+    same_locals_1_stack_item_extended* sm =
+       (same_locals_1_stack_item_extended*)addr;
     sm->set_frame_type(_frame_id);
     sm->set_offset_delta(offset_delta);
     if (vti != NULL) {
@@ -448,13 +440,12 @@
     return type_addr() < end && types()->verify(start, end);
   }
 
-#ifdef ASSERT
-  void print_on(outputStream* st) const {
-    st->print("same_frame_1_stack_item_extended(%d,", offset_delta());
+  void print_on(outputStream* st, int current_offset = -1) const {
+    st->print("same_locals_1_stack_item_extended(@%d,",
+        offset_delta() + current_offset);
     types()->print_on(st);
     st->print(")");
   }
-#endif
 };
 
 class chop_frame : public stack_map_frame {
@@ -517,11 +508,9 @@
     return frame_type_addr() + size() <= end;
   }
 
-#ifdef ASSERT
-  void print_on(outputStream* st) const {
-    st->print("chop_frame(%d,%d)", offset_delta(), chops());
+  void print_on(outputStream* st, int current_offset = -1) const {
+    st->print("chop_frame(@%d,%d)", offset_delta() + current_offset, chops());
   }
-#endif
 };
 
 class append_frame : public stack_map_frame {
@@ -618,9 +607,8 @@
     return false;
   }
 
-#ifdef ASSERT
-  void print_on(outputStream* st) const {
-    st->print("append_frame(%d,", offset_delta());
+  void print_on(outputStream* st, int current_offset = -1) const {
+    st->print("append_frame(@%d,", offset_delta() + current_offset);
     verification_type_info* vti = types();
     for (int i = 0; i < number_of_types(); ++i) {
       vti->print_on(st);
@@ -631,7 +619,6 @@
     }
     st->print(")");
   }
-#endif
 };
 
 class full_frame : public stack_map_frame {
@@ -774,9 +761,8 @@
     return true;
   }
 
-#ifdef ASSERT
-  void print_on(outputStream* st) const {
-    st->print("full_frame(%d,{", offset_delta());
+  void print_on(outputStream* st, int current_offset = -1) const {
+    st->print("full_frame(@%d,{", offset_delta() + current_offset);
     verification_type_info* vti = locals();
     for (int i = 0; i < num_locals(); ++i) {
       vti->print_on(st);
@@ -798,7 +784,6 @@
     }
     st->print("})");
   }
-#endif
 };
 
 #define VIRTUAL_DISPATCH(stack_frame_type, func_name, args) \
@@ -852,11 +837,9 @@
   return false;
 }
 
-#ifdef ASSERT
-void stack_map_frame::print_on(outputStream* st) const {
-  FOR_EACH_STACKMAP_FRAME_TYPE(VOID_VIRTUAL_DISPATCH, print_on, (st));
+void stack_map_frame::print_on(outputStream* st, int offs = -1) const {
+  FOR_EACH_STACKMAP_FRAME_TYPE(VOID_VIRTUAL_DISPATCH, print_on, (st, offs));
 }
-#endif
 
 #undef VIRTUAL_DISPATCH
 #undef VOID_VIRTUAL_DISPATCH
@@ -873,16 +856,46 @@
 FOR_EACH_STACKMAP_FRAME_TYPE(AS_SUBTYPE_DEF, x, x)
 #undef AS_SUBTYPE_DEF
 
+class stack_map_table {
+ private:
+  address number_of_entries_addr() const {
+    return (address)this;
+  }
+  address entries_addr() const {
+    return number_of_entries_addr() + sizeof(u2);
+  }
+
+ protected:
+  // No constructors  - should be 'private', but GCC issues a warning if it is
+  stack_map_table() {}
+  stack_map_table(const stack_map_table&) {}
+
+ public:
+
+  static stack_map_table* at(address addr) {
+    return (stack_map_table*)addr;
+  }
+
+  u2 number_of_entries() const {
+    return Bytes::get_Java_u2(number_of_entries_addr());
+  }
+  stack_map_frame* entries() const {
+    return stack_map_frame::at(entries_addr());
+  }
+
+  void set_number_of_entries(u2 num) {
+    Bytes::put_Java_u2(number_of_entries_addr(), num);
+  }
+};
+
 class stack_map_table_attribute {
  private:
   address name_index_addr() const {
       return (address)this; }
   address attribute_length_addr() const {
       return name_index_addr() + sizeof(u2); }
-  address number_of_entries_addr() const {
+  address stack_map_table_addr() const {
       return attribute_length_addr() + sizeof(u4); }
-  address entries_addr() const {
-      return number_of_entries_addr() + sizeof(u2); }
 
  protected:
   // No constructors  - should be 'private', but GCC issues a warning if it is
@@ -896,17 +909,11 @@
   }
 
   u2 name_index() const {
-       return Bytes::get_Java_u2(name_index_addr()); }
+    return Bytes::get_Java_u2(name_index_addr()); }
   u4 attribute_length() const {
-      return Bytes::get_Java_u4(attribute_length_addr()); }
-  u2 number_of_entries() const {
-      return Bytes::get_Java_u2(number_of_entries_addr()); }
-  stack_map_frame* entries() const {
-    return stack_map_frame::at(entries_addr());
-  }
-
-  static size_t header_size() {
-      return sizeof(u2) + sizeof(u4);
+    return Bytes::get_Java_u4(attribute_length_addr()); }
+  stack_map_table* table() const {
+    return stack_map_table::at(stack_map_table_addr());
   }
 
   void set_name_index(u2 idx) {
@@ -915,9 +922,8 @@
   void set_attribute_length(u4 len) {
     Bytes::put_Java_u4(attribute_length_addr(), len);
   }
-  void set_number_of_entries(u2 num) {
-    Bytes::put_Java_u2(number_of_entries_addr(), num);
-  }
 };
 
+#undef FOR_EACH_STACKMAP_FRAME_TYPE
+
 #endif // SHARE_VM_CLASSFILE_STACKMAPTABLEFORMAT_HPP
--- a/src/share/vm/classfile/verificationType.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/classfile/verificationType.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -110,34 +110,34 @@
   }
 }
 
-#ifndef PRODUCT
-
 void VerificationType::print_on(outputStream* st) const {
   switch (_u._data) {
-    case Bogus:            st->print(" bogus "); break;
-    case Category1:        st->print(" category1 "); break;
-    case Category2:        st->print(" category2 "); break;
-    case Category2_2nd:    st->print(" category2_2nd "); break;
-    case Boolean:          st->print(" boolean "); break;
-    case Byte:             st->print(" byte "); break;
-    case Short:            st->print(" short "); break;
-    case Char:             st->print(" char "); break;
-    case Integer:          st->print(" integer "); break;
-    case Float:            st->print(" float "); break;
-    case Long:             st->print(" long "); break;
-    case Double:           st->print(" double "); break;
-    case Long_2nd:         st->print(" long_2nd "); break;
-    case Double_2nd:       st->print(" double_2nd "); break;
-    case Null:             st->print(" null "); break;
+    case Bogus:            st->print("top"); break;
+    case Category1:        st->print("category1"); break;
+    case Category2:        st->print("category2"); break;
+    case Category2_2nd:    st->print("category2_2nd"); break;
+    case Boolean:          st->print("boolean"); break;
+    case Byte:             st->print("byte"); break;
+    case Short:            st->print("short"); break;
+    case Char:             st->print("char"); break;
+    case Integer:          st->print("integer"); break;
+    case Float:            st->print("float"); break;
+    case Long:             st->print("long"); break;
+    case Double:           st->print("double"); break;
+    case Long_2nd:         st->print("long_2nd"); break;
+    case Double_2nd:       st->print("double_2nd"); break;
+    case Null:             st->print("null"); break;
+    case ReferenceQuery:   st->print("reference type"); break;
+    case Category1Query:   st->print("category1 type"); break;
+    case Category2Query:   st->print("category2 type"); break;
+    case Category2_2ndQuery: st->print("category2_2nd type"); break;
     default:
       if (is_uninitialized_this()) {
-        st->print(" uninitializedThis ");
+        st->print("uninitializedThis");
       } else if (is_uninitialized()) {
-        st->print(" uninitialized %d ", bci());
+        st->print("uninitialized %d", bci());
       } else {
-        st->print(" class %s ", name()->as_klass_external_name());
+        name()->print_value_on(st);
       }
   }
 }
-
-#endif
--- a/src/share/vm/classfile/verificationType.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/classfile/verificationType.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -157,7 +157,7 @@
 
   // For reference types, store the actual Symbol
   static VerificationType reference_type(Symbol* sh) {
-      assert(((uintptr_t)sh & 0x3) == 0, "Oops must be aligned");
+      assert(((uintptr_t)sh & 0x3) == 0, "Symbols must be aligned");
       // If the above assert fails in the future because oop* isn't aligned,
       // then this type encoding system will have to change to have a tag value
       // to descriminate between oops and primitives.
@@ -303,7 +303,7 @@
     return index;
   }
 
-  void print_on(outputStream* st) const PRODUCT_RETURN;
+  void print_on(outputStream* st) const;
 
  private:
 
--- a/src/share/vm/classfile/verifier.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/classfile/verifier.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -26,9 +26,12 @@
 #include "classfile/classFileStream.hpp"
 #include "classfile/javaClasses.hpp"
 #include "classfile/stackMapTable.hpp"
+#include "classfile/stackMapFrame.hpp"
+#include "classfile/stackMapTableFormat.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "classfile/verifier.hpp"
 #include "classfile/vmSymbols.hpp"
+#include "interpreter/bytecodes.hpp"
 #include "interpreter/bytecodeStream.hpp"
 #include "memory/oopFactory.hpp"
 #include "memory/resourceArea.hpp"
@@ -110,8 +113,11 @@
   Symbol* exception_name = NULL;
   const size_t message_buffer_len = klass->name()->utf8_length() + 1024;
   char* message_buffer = NEW_RESOURCE_ARRAY(char, message_buffer_len);
+  char* exception_message = message_buffer;
 
   const char* klassName = klass->external_name();
+  bool can_failover = FailOverToOldVerifier &&
+      klass->major_version() < NOFAILOVER_MAJOR_VERSION;
 
   // If the class should be verified, first see if we can use the split
   // verifier.  If not, or if verification fails and FailOverToOldVerifier
@@ -122,27 +128,28 @@
     }
     if (UseSplitVerifier &&
         klass->major_version() >= STACKMAP_ATTRIBUTE_MAJOR_VERSION) {
-        ClassVerifier split_verifier(
-          klass, message_buffer, message_buffer_len, THREAD);
-        split_verifier.verify_class(THREAD);
-        exception_name = split_verifier.result();
-      if (klass->major_version() < NOFAILOVER_MAJOR_VERSION &&
-          FailOverToOldVerifier && !HAS_PENDING_EXCEPTION &&
+      ClassVerifier split_verifier(klass, THREAD);
+      split_verifier.verify_class(THREAD);
+      exception_name = split_verifier.result();
+      if (can_failover && !HAS_PENDING_EXCEPTION &&
           (exception_name == vmSymbols::java_lang_VerifyError() ||
            exception_name == vmSymbols::java_lang_ClassFormatError())) {
-        if (TraceClassInitialization) {
+        if (TraceClassInitialization || VerboseVerification) {
           tty->print_cr(
             "Fail over class verification to old verifier for: %s", klassName);
         }
         exception_name = inference_verify(
           klass, message_buffer, message_buffer_len, THREAD);
       }
+      if (exception_name != NULL) {
+        exception_message = split_verifier.exception_message();
+      }
     } else {
       exception_name = inference_verify(
           klass, message_buffer, message_buffer_len, THREAD);
     }
 
-    if (TraceClassInitialization) {
+    if (TraceClassInitialization || VerboseVerification) {
       if (HAS_PENDING_EXCEPTION) {
         tty->print("Verification for %s has", klassName);
         tty->print_cr(" exception pending %s ",
@@ -173,7 +180,7 @@
       kls = kls->super();
     }
     message_buffer[message_buffer_len - 1] = '\0'; // just to be sure
-    THROW_MSG_(exception_name, message_buffer, false);
+    THROW_MSG_(exception_name, exception_message, false);
   }
 }
 
@@ -221,7 +228,7 @@
   }
 
   ResourceMark rm(THREAD);
-  if (ClassVerifier::_verify_verbose) {
+  if (VerboseVerification) {
     tty->print_cr("Verifying class %s with old format", klass->external_name());
   }
 
@@ -265,14 +272,252 @@
   }
 }
 
+TypeOrigin TypeOrigin::null() {
+  return TypeOrigin();
+}
+TypeOrigin TypeOrigin::local(u2 index, StackMapFrame* frame) {
+  assert(frame != NULL, "Must have a frame");
+  return TypeOrigin(CF_LOCALS, index, StackMapFrame::copy(frame),
+     frame->local_at(index));
+}
+TypeOrigin TypeOrigin::stack(u2 index, StackMapFrame* frame) {
+  assert(frame != NULL, "Must have a frame");
+  return TypeOrigin(CF_STACK, index, StackMapFrame::copy(frame),
+      frame->stack_at(index));
+}
+TypeOrigin TypeOrigin::sm_local(u2 index, StackMapFrame* frame) {
+  assert(frame != NULL, "Must have a frame");
+  return TypeOrigin(SM_LOCALS, index, StackMapFrame::copy(frame),
+      frame->local_at(index));
+}
+TypeOrigin TypeOrigin::sm_stack(u2 index, StackMapFrame* frame) {
+  assert(frame != NULL, "Must have a frame");
+  return TypeOrigin(SM_STACK, index, StackMapFrame::copy(frame),
+      frame->stack_at(index));
+}
+TypeOrigin TypeOrigin::bad_index(u2 index) {
+  return TypeOrigin(BAD_INDEX, index, NULL, VerificationType::bogus_type());
+}
+TypeOrigin TypeOrigin::cp(u2 index, VerificationType vt) {
+  return TypeOrigin(CONST_POOL, index, NULL, vt);
+}
+TypeOrigin TypeOrigin::signature(VerificationType vt) {
+  return TypeOrigin(SIG, 0, NULL, vt);
+}
+TypeOrigin TypeOrigin::implicit(VerificationType t) {
+  return TypeOrigin(IMPLICIT, 0, NULL, t);
+}
+TypeOrigin TypeOrigin::frame(StackMapFrame* frame) {
+  return TypeOrigin(FRAME_ONLY, 0, StackMapFrame::copy(frame),
+                    VerificationType::bogus_type());
+}
+
+void TypeOrigin::reset_frame() {
+  if (_frame != NULL) {
+    _frame->restore();
+  }
+}
+
+void TypeOrigin::details(outputStream* ss) const {
+  _type.print_on(ss);
+  switch (_origin) {
+    case CF_LOCALS:
+      ss->print(" (current frame, locals[%d])", _index);
+      break;
+    case CF_STACK:
+      ss->print(" (current frame, stack[%d])", _index);
+      break;
+    case SM_LOCALS:
+      ss->print(" (stack map, locals[%d])", _index);
+      break;
+    case SM_STACK:
+      ss->print(" (stack map, stack[%d])", _index);
+      break;
+    case CONST_POOL:
+      ss->print(" (constant pool %d)", _index);
+      break;
+    case SIG:
+      ss->print(" (from method signature)");
+      break;
+    case IMPLICIT:
+    case FRAME_ONLY:
+    case NONE:
+    default:
+      ;
+  }
+}
+
+#ifdef ASSERT
+void TypeOrigin::print_on(outputStream* str) const {
+  str->print("{%d,%d,%p:", _origin, _index, _frame);
+  if (_frame != NULL) {
+    _frame->print_on(str);
+  } else {
+    str->print("null");
+  }
+  str->print(",");
+  _type.print_on(str);
+  str->print("}");
+}
+#endif
+
+void ErrorContext::details(outputStream* ss, methodOop method) const {
+  if (is_valid()) {
+    ss->print_cr("");
+    ss->print_cr("Exception Details:");
+    location_details(ss, method);
+    reason_details(ss);
+    frame_details(ss);
+    bytecode_details(ss, method);
+    handler_details(ss, method);
+    stackmap_details(ss, method);
+  }
+}
+
+void ErrorContext::reason_details(outputStream* ss) const {
+  streamIndentor si(ss);
+  ss->indent().print_cr("Reason:");
+  streamIndentor si2(ss);
+  ss->indent().print("");
+  switch (_fault) {
+    case INVALID_BYTECODE:
+      ss->print("Error exists in the bytecode");
+      break;
+    case WRONG_TYPE:
+      if (_expected.is_valid()) {
+        ss->print("Type ");
+        _type.details(ss);
+        ss->print(" is not assignable to ");
+        _expected.details(ss);
+      } else {
+        ss->print("Invalid type: ");
+        _type.details(ss);
+      }
+      break;
+    case FLAGS_MISMATCH:
+      if (_expected.is_valid()) {
+        ss->print("Current frame's flags are not assignable "
+                  "to stack map frame's.");
+      } else {
+        ss->print("Current frame's flags are invalid in this context.");
+      }
+      break;
+    case BAD_CP_INDEX:
+      ss->print("Constant pool index %d is invalid", _type.index());
+      break;
+    case BAD_LOCAL_INDEX:
+      ss->print("Local index %d is invalid", _type.index());
+      break;
+    case LOCALS_SIZE_MISMATCH:
+      ss->print("Current frame's local size doesn't match stackmap.");
+      break;
+    case STACK_SIZE_MISMATCH:
+      ss->print("Current frame's stack size doesn't match stackmap.");
+      break;
+    case STACK_OVERFLOW:
+      ss->print("Exceeded max stack size.");
+      break;
+    case STACK_UNDERFLOW:
+      ss->print("Attempt to pop empty stack.");
+      break;
+    case MISSING_STACKMAP:
+      ss->print("Expected stackmap frame at this location.");
+      break;
+    case BAD_STACKMAP:
+      ss->print("Invalid stackmap specification.");
+      break;
+    case UNKNOWN:
+    default:
+      ShouldNotReachHere();
+      ss->print_cr("Unknown");
+  }
+  ss->print_cr("");
+}
+
+void ErrorContext::location_details(outputStream* ss, methodOop method) const {
+  if (_bci != -1 && method != NULL) {
+    streamIndentor si(ss);
+    const char* bytecode_name = "<invalid>";
+    if (method->validate_bci_from_bcx(_bci) != -1) {
+      Bytecodes::Code code = Bytecodes::code_or_bp_at(method->bcp_from(_bci));
+      if (Bytecodes::is_defined(code)) {
+          bytecode_name = Bytecodes::name(code);
+      } else {
+          bytecode_name = "<illegal>";
+      }
+    }
+    instanceKlass* ik = instanceKlass::cast(method->method_holder());
+    ss->indent().print_cr("Location:");
+    streamIndentor si2(ss);
+    ss->indent().print_cr("%s.%s%s @%d: %s",
+        ik->name()->as_C_string(), method->name()->as_C_string(),
+        method->signature()->as_C_string(), _bci, bytecode_name);
+  }
+}
+
+void ErrorContext::frame_details(outputStream* ss) const {
+  streamIndentor si(ss);
+  if (_type.is_valid() && _type.frame() != NULL) {
+    ss->indent().print_cr("Current Frame:");
+    streamIndentor si2(ss);
+    _type.frame()->print_on(ss);
+  }
+  if (_expected.is_valid() && _expected.frame() != NULL) {
+    ss->indent().print_cr("Stackmap Frame:");
+    streamIndentor si2(ss);
+    _expected.frame()->print_on(ss);
+  }
+}
+
+void ErrorContext::bytecode_details(outputStream* ss, methodOop method) const {
+  if (method != NULL) {
+    streamIndentor si(ss);
+    ss->indent().print_cr("Bytecode:");
+    streamIndentor si2(ss);
+    ss->print_data(method->code_base(), method->code_size(), false);
+  }
+}
+
+void ErrorContext::handler_details(outputStream* ss, methodOop method) const {
+  if (method != NULL) {
+    streamIndentor si(ss);
+    ExceptionTable table(method);
+    if (table.length() > 0) {
+      ss->indent().print_cr("Exception Handler Table:");
+      streamIndentor si2(ss);
+      for (int i = 0; i < table.length(); ++i) {
+        ss->indent().print_cr("bci [%d, %d] => handler: %d", table.start_pc(i),
+            table.end_pc(i), table.handler_pc(i));
+      }
+    }
+  }
+}
+
+void ErrorContext::stackmap_details(outputStream* ss, methodOop method) const {
+  if (method != NULL && method->has_stackmap_table()) {
+    streamIndentor si(ss);
+    ss->indent().print_cr("Stackmap Table:");
+    typeArrayOop data = method->stackmap_data();
+    stack_map_table* sm_table =
+        stack_map_table::at((address)data->byte_at_addr(0));
+    stack_map_frame* sm_frame = sm_table->entries();
+    streamIndentor si2(ss);
+    int current_offset = -1;
+    for (u2 i = 0; i < sm_table->number_of_entries(); ++i) {
+      ss->indent();
+      sm_frame->print_on(ss, current_offset);
+      ss->print_cr("");
+      current_offset += sm_frame->offset_delta();
+      sm_frame = sm_frame->next();
+    }
+  }
+}
+
 // Methods in ClassVerifier
 
-bool ClassVerifier::_verify_verbose = false;
-
 ClassVerifier::ClassVerifier(
-    instanceKlassHandle klass, char* msg, size_t msg_len, TRAPS)
-    : _thread(THREAD), _exception_type(NULL), _message(msg),
-      _message_buffer_len(msg_len), _klass(klass) {
+    instanceKlassHandle klass, TRAPS)
+    : _thread(THREAD), _exception_type(NULL), _message(NULL), _klass(klass) {
   _this_type = VerificationType::reference_type(klass->name());
   // Create list to hold symbols in reference area.
   _symbols = new GrowableArray<Symbol*>(100, 0, NULL);
@@ -290,8 +535,14 @@
   return VerificationType::reference_type(vmSymbols::java_lang_Object());
 }
 
+TypeOrigin ClassVerifier::ref_ctx(const char* sig, TRAPS) {
+  VerificationType vt = VerificationType::reference_type(
+      create_temporary_symbol(sig, (int)strlen(sig), THREAD));
+  return TypeOrigin::implicit(vt);
+}
+
 void ClassVerifier::verify_class(TRAPS) {
-  if (_verify_verbose) {
+  if (VerboseVerification) {
     tty->print_cr("Verifying class %s with new format",
       _klass->external_name());
   }
@@ -312,7 +563,7 @@
     verify_method(methodHandle(THREAD, m), CHECK_VERIFY(this));
   }
 
-  if (_verify_verbose || TraceClassInitialization) {
+  if (VerboseVerification || TraceClassInitialization) {
     if (was_recursively_verified())
       tty->print_cr("Recursive verification detected for: %s",
           _klass->external_name());
@@ -321,7 +572,7 @@
 
 void ClassVerifier::verify_method(methodHandle m, TRAPS) {
   _method = m;   // initialize _method
-  if (_verify_verbose) {
+  if (VerboseVerification) {
     tty->print_cr("Verifying method %s", m->name_and_sig_as_C_string());
   }
 
@@ -368,8 +619,8 @@
   StackMapTable stackmap_table(&reader, &current_frame, max_locals, max_stack,
                                code_data, code_length, CHECK_VERIFY(this));
 
-  if (_verify_verbose) {
-    stackmap_table.print();
+  if (VerboseVerification) {
+    stackmap_table.print_on(tty);
   }
 
   RawBytecodeStream bcs(m);
@@ -388,6 +639,7 @@
 
     // Set current frame's offset to bci
     current_frame.set_offset(bci);
+    current_frame.set_mark();
 
     // Make sure every offset in stackmap table point to the beginning to
     // an instruction. Match current_frame to stackmap_table entry with
@@ -396,6 +648,7 @@
       stackmap_index, bci, &current_frame, &stackmap_table,
       no_control_flow, CHECK_VERIFY(this));
 
+
     bool this_uninit = false;  // Set to true when invokespecial <init> initialized 'this'
 
     // Merge with the next instruction
@@ -406,8 +659,8 @@
       VerificationType atype;
 
 #ifndef PRODUCT
-      if (_verify_verbose) {
-        current_frame.print();
+      if (VerboseVerification) {
+        current_frame.print_on(tty);
         tty->print_cr("offset = %d,  opcode = %s", bci, Bytecodes::name(opcode));
       }
 #endif
@@ -420,7 +673,10 @@
             opcode != Bytecodes::_lstore && opcode != Bytecodes::_fload  &&
             opcode != Bytecodes::_dload  && opcode != Bytecodes::_fstore &&
             opcode != Bytecodes::_dstore) {
-          verify_error(bci, "Bad wide instruction");
+          /* Unreachable?  RawBytecodeStream's raw_next() returns 'illegal'
+           * if we encounter a wide instruction that modifies an invalid
+           * opcode (not one of the ones listed above) */
+          verify_error(ErrorContext::bad_code(bci), "Bad wide instruction");
           return;
         }
       }
@@ -532,7 +788,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_int_array()) {
-            verify_error(bci, bad_type_msg, "iaload");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[I", THREAD)),
+                bad_type_msg, "iaload");
             return;
           }
           current_frame.push_stack(
@@ -544,7 +802,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_bool_array() && !atype.is_byte_array()) {
-            verify_error(bci, bad_type_msg, "baload");
+            verify_error(
+                ErrorContext::bad_type(bci, current_frame.stack_top_ctx()),
+                bad_type_msg, "baload");
             return;
           }
           current_frame.push_stack(
@@ -556,7 +816,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_char_array()) {
-            verify_error(bci, bad_type_msg, "caload");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[C", THREAD)),
+                bad_type_msg, "caload");
             return;
           }
           current_frame.push_stack(
@@ -568,7 +830,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_short_array()) {
-            verify_error(bci, bad_type_msg, "saload");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[S", THREAD)),
+                bad_type_msg, "saload");
             return;
           }
           current_frame.push_stack(
@@ -580,7 +844,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_long_array()) {
-            verify_error(bci, bad_type_msg, "laload");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[J", THREAD)),
+                bad_type_msg, "laload");
             return;
           }
           current_frame.push_stack_2(
@@ -593,7 +859,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_float_array()) {
-            verify_error(bci, bad_type_msg, "faload");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[F", THREAD)),
+                bad_type_msg, "faload");
             return;
           }
           current_frame.push_stack(
@@ -605,7 +873,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_double_array()) {
-            verify_error(bci, bad_type_msg, "daload");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[D", THREAD)),
+                bad_type_msg, "daload");
             return;
           }
           current_frame.push_stack_2(
@@ -618,7 +888,10 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_reference_array()) {
-            verify_error(bci, bad_type_msg, "aaload");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(),
+                TypeOrigin::implicit(VerificationType::reference_check())),
+                bad_type_msg, "aaload");
             return;
           }
           if (atype.is_null()) {
@@ -689,7 +962,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_int_array()) {
-            verify_error(bci, bad_type_msg, "iastore");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[I", THREAD)),
+                bad_type_msg, "iastore");
             return;
           }
           no_control_flow = false; break;
@@ -701,7 +976,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_bool_array() && !atype.is_byte_array()) {
-            verify_error(bci, bad_type_msg, "bastore");
+            verify_error(
+                ErrorContext::bad_type(bci, current_frame.stack_top_ctx()),
+                bad_type_msg, "bastore");
             return;
           }
           no_control_flow = false; break;
@@ -713,7 +990,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_char_array()) {
-            verify_error(bci, bad_type_msg, "castore");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[C", THREAD)),
+                bad_type_msg, "castore");
             return;
           }
           no_control_flow = false; break;
@@ -725,7 +1004,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_short_array()) {
-            verify_error(bci, bad_type_msg, "sastore");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[S", THREAD)),
+                bad_type_msg, "sastore");
             return;
           }
           no_control_flow = false; break;
@@ -738,7 +1019,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_long_array()) {
-            verify_error(bci, bad_type_msg, "lastore");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[J", THREAD)),
+                bad_type_msg, "lastore");
             return;
           }
           no_control_flow = false; break;
@@ -750,7 +1033,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_float_array()) {
-            verify_error(bci, bad_type_msg, "fastore");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[F", THREAD)),
+                bad_type_msg, "fastore");
             return;
           }
           no_control_flow = false; break;
@@ -763,7 +1048,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_double_array()) {
-            verify_error(bci, bad_type_msg, "dastore");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[D", THREAD)),
+                bad_type_msg, "dastore");
             return;
           }
           no_control_flow = false; break;
@@ -775,7 +1062,10 @@
             VerificationType::reference_check(), CHECK_VERIFY(this));
           // more type-checking is done at runtime
           if (!atype.is_reference_array()) {
-            verify_error(bci, bad_type_msg, "aastore");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(),
+                TypeOrigin::implicit(VerificationType::reference_check())),
+                bad_type_msg, "aastore");
             return;
           }
           // 4938384: relaxed constraint in JVMS 3nd edition.
@@ -793,7 +1083,11 @@
             current_frame.pop_stack(
               VerificationType::category2_check(), CHECK_VERIFY(this));
           } else {
-            verify_error(bci, bad_type_msg, "pop2");
+            /* Unreachable? Would need a category2_1st on TOS
+             * which does not appear possible. */
+            verify_error(
+                ErrorContext::bad_type(bci, current_frame.stack_top_ctx()),
+                bad_type_msg, "pop2");
             return;
           }
           no_control_flow = false; break;
@@ -825,7 +1119,10 @@
             type3 = current_frame.pop_stack(
               VerificationType::category2_check(), CHECK_VERIFY(this));
           } else {
-            verify_error(bci, bad_type_msg, "dup_x2");
+            /* Unreachable? Would need a category2_1st at stack depth 2 with
+             * a category1 on TOS which does not appear possible. */
+            verify_error(ErrorContext::bad_type(
+                bci, current_frame.stack_top_ctx()), bad_type_msg, "dup_x2");
             return;
           }
           current_frame.push_stack(type, CHECK_VERIFY(this));
@@ -843,7 +1140,11 @@
             type2 = current_frame.pop_stack(
               VerificationType::category2_check(), CHECK_VERIFY(this));
           } else {
-            verify_error(bci, bad_type_msg, "dup2");
+            /* Unreachable?  Would need a category2_1st on TOS which does not
+             * appear possible. */
+            verify_error(
+                ErrorContext::bad_type(bci, current_frame.stack_top_ctx()),
+                bad_type_msg, "dup2");
             return;
           }
           current_frame.push_stack(type2, CHECK_VERIFY(this));
@@ -858,11 +1159,15 @@
           if (type.is_category1()) {
             type2 = current_frame.pop_stack(
               VerificationType::category1_check(), CHECK_VERIFY(this));
-          } else if(type.is_category2_2nd()) {
-            type2 = current_frame.pop_stack
-              (VerificationType::category2_check(), CHECK_VERIFY(this));
+          } else if (type.is_category2_2nd()) {
+            type2 = current_frame.pop_stack(
+              VerificationType::category2_check(), CHECK_VERIFY(this));
           } else {
-            verify_error(bci, bad_type_msg, "dup2_x1");
+            /* Unreachable?  Would need a category2_1st on TOS which does
+             * not appear possible. */
+            verify_error(
+                ErrorContext::bad_type(bci, current_frame.stack_top_ctx()),
+                bad_type_msg, "dup2_x1");
             return;
           }
           type3 = current_frame.pop_stack(
@@ -885,7 +1190,11 @@
             type2 = current_frame.pop_stack(
               VerificationType::category2_check(), CHECK_VERIFY(this));
           } else {
-            verify_error(bci, bad_type_msg, "dup2_x2");
+            /* Unreachable?  Would need a category2_1st on TOS which does
+             * not appear possible. */
+            verify_error(
+                ErrorContext::bad_type(bci, current_frame.stack_top_ctx()),
+                bad_type_msg, "dup2_x2");
             return;
           }
           type3 = current_frame.pop_stack(CHECK_VERIFY(this));
@@ -896,7 +1205,12 @@
             type4 = current_frame.pop_stack(
               VerificationType::category2_check(), CHECK_VERIFY(this));
           } else {
-            verify_error(bci, bad_type_msg, "dup2_x2");
+            /* Unreachable?  Would need a category2_1st on TOS after popping
+             * a long/double or two category 1's, which does not
+             * appear possible. */
+            verify_error(
+                ErrorContext::bad_type(bci, current_frame.stack_top_ctx()),
+                bad_type_msg, "dup2_x2");
             return;
           }
           current_frame.push_stack(type2, CHECK_VERIFY(this));
@@ -1176,43 +1490,50 @@
         case Bytecodes::_ireturn :
           type = current_frame.pop_stack(
             VerificationType::integer_type(), CHECK_VERIFY(this));
-          verify_return_value(return_type, type, bci, CHECK_VERIFY(this));
+          verify_return_value(return_type, type, bci,
+                              &current_frame, CHECK_VERIFY(this));
           no_control_flow = true; break;
         case Bytecodes::_lreturn :
           type2 = current_frame.pop_stack(
             VerificationType::long2_type(), CHECK_VERIFY(this));
           type = current_frame.pop_stack(
             VerificationType::long_type(), CHECK_VERIFY(this));
-          verify_return_value(return_type, type, bci, CHECK_VERIFY(this));
+          verify_return_value(return_type, type, bci,
+                              &current_frame, CHECK_VERIFY(this));
           no_control_flow = true; break;
         case Bytecodes::_freturn :
           type = current_frame.pop_stack(
             VerificationType::float_type(), CHECK_VERIFY(this));
-          verify_return_value(return_type, type, bci, CHECK_VERIFY(this));
+          verify_return_value(return_type, type, bci,
+                              &current_frame, CHECK_VERIFY(this));
           no_control_flow = true; break;
         case Bytecodes::_dreturn :
           type2 = current_frame.pop_stack(
             VerificationType::double2_type(),  CHECK_VERIFY(this));
           type = current_frame.pop_stack(
             VerificationType::double_type(), CHECK_VERIFY(this));
-          verify_return_value(return_type, type, bci, CHECK_VERIFY(this));
+          verify_return_value(return_type, type, bci,
+                              &current_frame, CHECK_VERIFY(this));
           no_control_flow = true; break;
         case Bytecodes::_areturn :
           type = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
-          verify_return_value(return_type, type, bci, CHECK_VERIFY(this));
+          verify_return_value(return_type, type, bci,
+                              &current_frame, CHECK_VERIFY(this));
           no_control_flow = true; break;
         case Bytecodes::_return :
           if (return_type != VerificationType::bogus_type()) {
-            verify_error(bci, "Method expects no return value");
+            verify_error(ErrorContext::bad_code(bci),
+                         "Method expects a return value");
             return;
           }
           // Make sure "this" has been initialized if current method is an
           // <init>
           if (_method->name() == vmSymbols::object_initializer_name() &&
               current_frame.flag_this_uninit()) {
-            verify_error(bci,
-              "Constructor must call super() or this() before return");
+            verify_error(ErrorContext::bad_code(bci),
+                         "Constructor must call super() or this() "
+                         "before return");
             return;
           }
           no_control_flow = true; break;
@@ -1239,11 +1560,13 @@
         case Bytecodes::_new :
         {
           index = bcs.get_index_u2();
-          verify_cp_class_type(index, cp, CHECK_VERIFY(this));
+          verify_cp_class_type(bci, index, cp, CHECK_VERIFY(this));
           VerificationType new_class_type =
             cp_index_to_type(index, cp, CHECK_VERIFY(this));
           if (!new_class_type.is_object()) {
-            verify_error(bci, "Illegal new instruction");
+            verify_error(ErrorContext::bad_type(bci,
+                TypeOrigin::cp(index, new_class_type)),
+                "Illegal new instruction");
             return;
           }
           type = VerificationType::uninitialized_type(bci);
@@ -1258,13 +1581,15 @@
           no_control_flow = false; break;
         case Bytecodes::_anewarray :
           verify_anewarray(
-            bcs.get_index_u2(), cp, &current_frame, CHECK_VERIFY(this));
+            bci, bcs.get_index_u2(), cp, &current_frame, CHECK_VERIFY(this));
           no_control_flow = false; break;
         case Bytecodes::_arraylength :
           type = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!(type.is_null() || type.is_array())) {
-            verify_error(bci, bad_type_msg, "arraylength");
+            verify_error(ErrorContext::bad_type(
+                bci, current_frame.stack_top_ctx()),
+                bad_type_msg, "arraylength");
           }
           current_frame.push_stack(
             VerificationType::integer_type(), CHECK_VERIFY(this));
@@ -1272,7 +1597,7 @@
         case Bytecodes::_checkcast :
         {
           index = bcs.get_index_u2();
-          verify_cp_class_type(index, cp, CHECK_VERIFY(this));
+          verify_cp_class_type(bci, index, cp, CHECK_VERIFY(this));
           current_frame.pop_stack(object_type(), CHECK_VERIFY(this));
           VerificationType klass_type = cp_index_to_type(
             index, cp, CHECK_VERIFY(this));
@@ -1281,7 +1606,7 @@
         }
         case Bytecodes::_instanceof : {
           index = bcs.get_index_u2();
-          verify_cp_class_type(index, cp, CHECK_VERIFY(this));
+          verify_cp_class_type(bci, index, cp, CHECK_VERIFY(this));
           current_frame.pop_stack(object_type(), CHECK_VERIFY(this));
           current_frame.push_stack(
             VerificationType::integer_type(), CHECK_VERIFY(this));
@@ -1296,17 +1621,18 @@
         {
           index = bcs.get_index_u2();
           u2 dim = *(bcs.bcp()+3);
-          verify_cp_class_type(index, cp, CHECK_VERIFY(this));
+          verify_cp_class_type(bci, index, cp, CHECK_VERIFY(this));
           VerificationType new_array_type =
             cp_index_to_type(index, cp, CHECK_VERIFY(this));
           if (!new_array_type.is_array()) {
-            verify_error(bci,
-              "Illegal constant pool index in multianewarray instruction");
+            verify_error(ErrorContext::bad_type(bci,
+                TypeOrigin::cp(index, new_array_type)),
+                "Illegal constant pool index in multianewarray instruction");
             return;
           }
           if (dim < 1 || new_array_type.dimensions() < dim) {
-            verify_error(bci,
-              "Illegal dimension in multianewarray instruction");
+            verify_error(ErrorContext::bad_code(bci),
+                "Illegal dimension in multianewarray instruction: %d", dim);
             return;
           }
           for (int i = 0; i < dim; i++) {
@@ -1324,7 +1650,8 @@
         default:
           // We only need to check the valid bytecodes in class file.
           // And jsr and ret are not in the new class file format in JDK1.5.
-          verify_error(bci, "Bad instruction");
+          verify_error(ErrorContext::bad_code(bci),
+              "Bad instruction: %02x", opcode);
           no_control_flow = false;
           return;
       }  // end switch
@@ -1340,7 +1667,8 @@
 
   // Make sure that control flow does not fall through end of the method
   if (!no_control_flow) {
-    verify_error(code_length, "Control flow falls through code end");
+    verify_error(ErrorContext::bad_code(code_length),
+        "Control flow falls through code end");
     return;
   }
 }
@@ -1359,7 +1687,7 @@
         code_data[bci] = BYTECODE_OFFSET;
       }
     } else {
-      verify_error(bcs.bci(), "Bad instruction");
+      verify_error(ErrorContext::bad_code(bcs.bci()), "Bad instruction");
       return NULL;
     }
   }
@@ -1402,9 +1730,11 @@
         catch_type, this, CHECK_VERIFY(this));
       if (!is_subclass) {
         // 4286534: should throw VerifyError according to recent spec change
-        verify_error(
-          "Catch type is not a subclass of Throwable in handler %d",
-          handler_pc);
+        verify_error(ErrorContext::bad_type(handler_pc,
+            TypeOrigin::cp(catch_type_index, catch_type),
+            TypeOrigin::implicit(throwable)),
+            "Catch type is not a subclass "
+            "of Throwable in exception handler %d", handler_pc);
         return;
       }
     }
@@ -1444,19 +1774,21 @@
   if (stackmap_index < stackmap_table->get_frame_count()) {
     u2 this_offset = stackmap_table->get_offset(stackmap_index);
     if (no_control_flow && this_offset > bci) {
-      verify_error(bci, "Expecting a stack map frame");
+      verify_error(ErrorContext::missing_stackmap(bci),
+                   "Expecting a stack map frame");
       return 0;
     }
     if (this_offset == bci) {
+      ErrorContext ctx;
       // See if current stack map can be assigned to the frame in table.
       // current_frame is the stackmap frame got from the last instruction.
       // If matched, current_frame will be updated by this method.
-      bool match = stackmap_table->match_stackmap(
+      bool matches = stackmap_table->match_stackmap(
         current_frame, this_offset, stackmap_index,
-        !no_control_flow, true, CHECK_VERIFY_(this, 0));
-      if (!match) {
+        !no_control_flow, true, &ctx, CHECK_VERIFY_(this, 0));
+      if (!matches) {
         // report type error
-        verify_error(bci, "Instruction type does not match stack map");
+        verify_error(ctx, "Instruction type does not match stack map");
         return 0;
       }
       stackmap_index++;
@@ -1466,7 +1798,7 @@
       return 0;
     }
   } else if (no_control_flow) {
-    verify_error(bci, "Expecting a stack map frame");
+    verify_error(ErrorContext::bad_code(bci), "Expecting a stack map frame");
     return 0;
   }
   return stackmap_index;
@@ -1498,29 +1830,31 @@
           VerificationType::reference_type(vmSymbols::java_lang_Throwable());
         new_frame->push_stack(throwable, CHECK_VERIFY(this));
       }
-      bool match = stackmap_table->match_stackmap(
-        new_frame, handler_pc, true, false, CHECK_VERIFY(this));
-      if (!match) {
-        verify_error(bci,
-          "Stack map does not match the one at exception handler %d",
-          handler_pc);
+      ErrorContext ctx;
+      bool matches = stackmap_table->match_stackmap(
+        new_frame, handler_pc, true, false, &ctx, CHECK_VERIFY(this));
+      if (!matches) {
+        verify_error(ctx, "Stack map does not match the one at "
+            "exception handler %d", handler_pc);
         return;
       }
     }
   }
 }
 
-void ClassVerifier::verify_cp_index(constantPoolHandle cp, int index, TRAPS) {
+void ClassVerifier::verify_cp_index(
+    u2 bci, constantPoolHandle cp, int index, TRAPS) {
   int nconstants = cp->length();
   if ((index <= 0) || (index >= nconstants)) {
-    verify_error("Illegal constant pool index %d in class %s",
-      index, instanceKlass::cast(cp->pool_holder())->external_name());
+    verify_error(ErrorContext::bad_cp_index(bci, index),
+        "Illegal constant pool index %d in class %s",
+        index, instanceKlass::cast(cp->pool_holder())->external_name());
     return;
   }
 }
 
 void ClassVerifier::verify_cp_type(
-    int index, constantPoolHandle cp, unsigned int types, TRAPS) {
+    u2 bci, int index, constantPoolHandle cp, unsigned int types, TRAPS) {
 
   // In some situations, bytecode rewriting may occur while we're verifying.
   // In this case, a constant pool cache exists and some indices refer to that
@@ -1528,10 +1862,10 @@
   // We must check was_recursively_verified() before we get here.
   guarantee(cp->cache() == NULL, "not rewritten yet");
 
-  verify_cp_index(cp, index, CHECK_VERIFY(this));
+  verify_cp_index(bci, cp, index, CHECK_VERIFY(this));
   unsigned int tag = cp->tag_at(index).value();
   if ((types & (1 << tag)) == 0) {
-    verify_error(
+    verify_error(ErrorContext::bad_cp_index(bci, index),
       "Illegal type at constant pool entry %d in class %s",
       index, instanceKlass::cast(cp->pool_holder())->external_name());
     return;
@@ -1539,51 +1873,46 @@
 }
 
 void ClassVerifier::verify_cp_class_type(
-    int index, constantPoolHandle cp, TRAPS) {
-  verify_cp_index(cp, index, CHECK_VERIFY(this));
+    u2 bci, int index, constantPoolHandle cp, TRAPS) {
+  verify_cp_index(bci, cp, index, CHECK_VERIFY(this));
   constantTag tag = cp->tag_at(index);
   if (!tag.is_klass() && !tag.is_unresolved_klass()) {
-    verify_error("Illegal type at constant pool entry %d in class %s",
-      index, instanceKlass::cast(cp->pool_holder())->external_name());
+    verify_error(ErrorContext::bad_cp_index(bci, index),
+        "Illegal type at constant pool entry %d in class %s",
+        index, instanceKlass::cast(cp->pool_holder())->external_name());
     return;
   }
 }
 
-void ClassVerifier::format_error_message(
-    const char* fmt, int offset, va_list va) {
-  ResourceMark rm(_thread);
-  stringStream message(_message, _message_buffer_len);
-  message.vprint(fmt, va);
-  if (!_method.is_null()) {
-    message.print(" in method %s", _method->name_and_sig_as_C_string());
-  }
-  if (offset != -1) {
-    message.print(" at offset %d", offset);
-  }
-}
+void ClassVerifier::verify_error(ErrorContext ctx, const char* msg, ...) {
+  stringStream ss;
 
-void ClassVerifier::verify_error(u2 offset, const char* fmt, ...) {
+  ctx.reset_frames();
   _exception_type = vmSymbols::java_lang_VerifyError();
+  _error_context = ctx;
   va_list va;
-  va_start(va, fmt);
-  format_error_message(fmt, offset, va);
+  va_start(va, msg);
+  ss.vprint(msg, va);
   va_end(va);
-}
-
-void ClassVerifier::verify_error(const char* fmt, ...) {
-  _exception_type = vmSymbols::java_lang_VerifyError();
-  va_list va;
-  va_start(va, fmt);
-  format_error_message(fmt, -1, va);
-  va_end(va);
+  _message = ss.as_string();
+#ifdef ASSERT
+  ResourceMark rm;
+  const char* exception_name = _exception_type->as_C_string();
+  Exceptions::debug_check_abort(exception_name, NULL);
+#endif // ndef ASSERT
 }
 
 void ClassVerifier::class_format_error(const char* msg, ...) {
+  stringStream ss;
   _exception_type = vmSymbols::java_lang_ClassFormatError();
   va_list va;
   va_start(va, msg);
-  format_error_message(msg, -1, va);
+  ss.vprint(msg, va);
   va_end(va);
+  if (!_method.is_null()) {
+    ss.print(" in method %s", _method->name_and_sig_as_C_string());
+  }
+  _message = ss.as_string();
 }
 
 klassOop ClassVerifier::load_class(Symbol* name, TRAPS) {
@@ -1619,7 +1948,7 @@
     }
   } else {
     klassOop member_klass = target_instance->find_field(field_name, field_sig, &fd);
-    if(member_klass != NULL && fd.is_protected()) {
+    if (member_klass != NULL && fd.is_protected()) {
       if (!this_class->is_same_class_package(member_klass)) {
         return true;
       }
@@ -1629,9 +1958,9 @@
 }
 
 void ClassVerifier::verify_ldc(
-    int opcode, u2 index, StackMapFrame *current_frame,
-     constantPoolHandle cp, u2 bci, TRAPS) {
-  verify_cp_index(cp, index, CHECK_VERIFY(this));
+    int opcode, u2 index, StackMapFrame* current_frame,
+    constantPoolHandle cp, u2 bci, TRAPS) {
+  verify_cp_index(bci, cp, index, CHECK_VERIFY(this));
   constantTag tag = cp->tag_at(index);
   unsigned int types;
   if (opcode == Bytecodes::_ldc || opcode == Bytecodes::_ldc_w) {
@@ -1641,12 +1970,12 @@
             | (1 << JVM_CONSTANT_MethodHandle) | (1 << JVM_CONSTANT_MethodType);
       // Note:  The class file parser already verified the legality of
       // MethodHandle and MethodType constants.
-      verify_cp_type(index, cp, types, CHECK_VERIFY(this));
+      verify_cp_type(bci, index, cp, types, CHECK_VERIFY(this));
     }
   } else {
     assert(opcode == Bytecodes::_ldc2_w, "must be ldc2_w");
     types = (1 << JVM_CONSTANT_Double) | (1 << JVM_CONSTANT_Long);
-    verify_cp_type(index, cp, types, CHECK_VERIFY(this));
+    verify_cp_type(bci, index, cp, types, CHECK_VERIFY(this));
   }
   if (tag.is_string() && cp->is_pseudo_string_at(index)) {
     current_frame->push_stack(object_type(), CHECK_VERIFY(this));
@@ -1681,7 +2010,9 @@
       VerificationType::reference_type(
         vmSymbols::java_lang_invoke_MethodType()), CHECK_VERIFY(this));
   } else {
-    verify_error(bci, "Invalid index in ldc");
+    /* Unreachable? verify_cp_type has already validated the cp type. */
+    verify_error(
+        ErrorContext::bad_cp_index(bci, index), "Invalid index in ldc");
     return;
   }
 }
@@ -1697,7 +2028,8 @@
   u2 padding_offset = 1;
   while ((bcp + padding_offset) < aligned_bcp) {
     if(*(bcp + padding_offset) != 0) {
-      verify_error(bci, "Nonzero padding byte in lookswitch or tableswitch");
+      verify_error(ErrorContext::bad_code(bci),
+                   "Nonzero padding byte in lookswitch or tableswitch");
       return;
     }
     padding_offset++;
@@ -1710,20 +2042,21 @@
     jint low = (jint)Bytes::get_Java_u4(aligned_bcp + jintSize);
     jint high = (jint)Bytes::get_Java_u4(aligned_bcp + 2*jintSize);
     if (low > high) {
-      verify_error(bci,
-        "low must be less than or equal to high in tableswitch");
+      verify_error(ErrorContext::bad_code(bci),
+          "low must be less than or equal to high in tableswitch");
       return;
     }
     keys = high - low + 1;
     if (keys < 0) {
-      verify_error(bci, "too many keys in tableswitch");
+      verify_error(ErrorContext::bad_code(bci), "too many keys in tableswitch");
       return;
     }
     delta = 1;
   } else {
     keys = (int)Bytes::get_Java_u4(aligned_bcp + jintSize);
     if (keys < 0) {
-      verify_error(bci, "number of keys in lookupswitch less than 0");
+      verify_error(ErrorContext::bad_code(bci),
+                   "number of keys in lookupswitch less than 0");
       return;
     }
     delta = 2;
@@ -1732,7 +2065,8 @@
       jint this_key = Bytes::get_Java_u4(aligned_bcp + (2+2*i)*jintSize);
       jint next_key = Bytes::get_Java_u4(aligned_bcp + (2+2*i+2)*jintSize);
       if (this_key >= next_key) {
-        verify_error(bci, "Bad lookupswitch instruction");
+        verify_error(ErrorContext::bad_code(bci),
+                     "Bad lookupswitch instruction");
         return;
       }
     }
@@ -1767,7 +2101,8 @@
                                               constantPoolHandle cp,
                                               TRAPS) {
   u2 index = bcs->get_index_u2();
-  verify_cp_type(index, cp, 1 << JVM_CONSTANT_Fieldref, CHECK_VERIFY(this));
+  verify_cp_type(bcs->bci(), index, cp,
+      1 << JVM_CONSTANT_Fieldref, CHECK_VERIFY(this));
 
   // Get field name and signature
   Symbol* field_name = cp->name_ref_at(index);
@@ -1784,9 +2119,11 @@
   VerificationType ref_class_type = cp_ref_index_to_type(
     index, cp, CHECK_VERIFY(this));
   if (!ref_class_type.is_object()) {
-    verify_error(
-      "Expecting reference to class in class %s at constant pool index %d",
-      _klass->external_name(), index);
+    /* Unreachable?  Class file parser verifies Fieldref contents */
+    verify_error(ErrorContext::bad_type(bcs->bci(),
+        TypeOrigin::cp(index, ref_class_type)),
+        "Expecting reference to class in class %s at constant pool index %d",
+        _klass->external_name(), index);
     return;
   }
   VerificationType target_class_type = ref_class_type;
@@ -1844,7 +2181,10 @@
       is_assignable = target_class_type.is_assignable_from(
         stack_object_type, this, CHECK_VERIFY(this));
       if (!is_assignable) {
-        verify_error(bci, "Bad type on operand stack in putfield");
+        verify_error(ErrorContext::bad_type(bci,
+            current_frame->stack_top_ctx(),
+            TypeOrigin::cp(index, target_class_type)),
+            "Bad type on operand stack in putfield");
         return;
       }
     }
@@ -1868,7 +2208,10 @@
         is_assignable = current_type().is_assignable_from(
           stack_object_type, this, CHECK_VERIFY(this));
         if (!is_assignable) {
-          verify_error(bci, "Bad access to protected data in getfield");
+          verify_error(ErrorContext::bad_type(bci,
+              current_frame->stack_top_ctx(),
+              TypeOrigin::implicit(current_type())),
+              "Bad access to protected data in getfield");
           return;
         }
       }
@@ -1879,7 +2222,7 @@
 }
 
 void ClassVerifier::verify_invoke_init(
-    RawBytecodeStream* bcs, VerificationType ref_class_type,
+    RawBytecodeStream* bcs, u2 ref_class_index, VerificationType ref_class_type,
     StackMapFrame* current_frame, u4 code_length, bool *this_uninit,
     constantPoolHandle cp, TRAPS) {
   u2 bci = bcs->bci();
@@ -1890,7 +2233,10 @@
     klassOop superk = current_class()->super();
     if (ref_class_type.name() != current_class()->name() &&
         ref_class_type.name() != superk->klass_part()->name()) {
-      verify_error(bci, "Bad <init> method call");
+      verify_error(ErrorContext::bad_type(bci,
+          TypeOrigin::implicit(ref_class_type),
+          TypeOrigin::implicit(current_type())),
+          "Bad <init> method call");
       return;
     }
     current_frame->initialize_object(type, current_type());
@@ -1899,17 +2245,23 @@
     u2 new_offset = type.bci();
     address new_bcp = bcs->bcp() - bci + new_offset;
     if (new_offset > (code_length - 3) || (*new_bcp) != Bytecodes::_new) {
-      verify_error(new_offset, "Expecting new instruction");
+      /* Unreachable?  Stack map parsing ensures valid type and new
+       * instructions have a valid BCI. */
+      verify_error(ErrorContext::bad_code(new_offset),
+                   "Expecting new instruction");
       return;
     }
     u2 new_class_index = Bytes::get_Java_u2(new_bcp + 1);
-    verify_cp_class_type(new_class_index, cp, CHECK_VERIFY(this));
+    verify_cp_class_type(bci, new_class_index, cp, CHECK_VERIFY(this));
 
     // The method must be an <init> method of the indicated class
     VerificationType new_class_type = cp_index_to_type(
       new_class_index, cp, CHECK_VERIFY(this));
     if (!new_class_type.equals(ref_class_type)) {
-      verify_error(bci, "Call to wrong <init> method");
+      verify_error(ErrorContext::bad_type(bci,
+          TypeOrigin::cp(new_class_index, new_class_type),
+          TypeOrigin::cp(ref_class_index, ref_class_type)),
+          "Call to wrong <init> method");
       return;
     }
     // According to the VM spec, if the referent class is a superclass of the
@@ -1928,14 +2280,18 @@
         bool assignable = current_type().is_assignable_from(
           objectref_type, this, CHECK_VERIFY(this));
         if (!assignable) {
-          verify_error(bci, "Bad access to protected <init> method");
+          verify_error(ErrorContext::bad_type(bci,
+              TypeOrigin::cp(new_class_index, objectref_type),
+              TypeOrigin::implicit(current_type())),
+              "Bad access to protected <init> method");
           return;
         }
       }
     }
     current_frame->initialize_object(type, new_class_type);
   } else {
-    verify_error(bci, "Bad operand type when invoking <init>");
+    verify_error(ErrorContext::bad_type(bci, current_frame->stack_top_ctx()),
+        "Bad operand type when invoking <init>");
     return;
   }
 }
@@ -1952,7 +2308,7 @@
                       : opcode == Bytecodes::_invokedynamic
                                 ? 1 << JVM_CONSTANT_InvokeDynamic
                                 : 1 << JVM_CONSTANT_Methodref);
-  verify_cp_type(index, cp, types, CHECK_VERIFY(this));
+  verify_cp_type(bcs->bci(), index, cp, types, CHECK_VERIFY(this));
 
   // Get method name and signature
   Symbol* method_name = cp->name_ref_at(index);
@@ -2029,11 +2385,13 @@
     // the difference between the size of the operand stack before and after the instruction
     // executes.
     if (*(bcp+3) != (nargs+1)) {
-      verify_error(bci, "Inconsistent args count operand in invokeinterface");
+      verify_error(ErrorContext::bad_code(bci),
+          "Inconsistent args count operand in invokeinterface");
       return;
     }
     if (*(bcp+4) != 0) {
-      verify_error(bci, "Fourth operand byte of invokeinterface must be zero");
+      verify_error(ErrorContext::bad_code(bci),
+          "Fourth operand byte of invokeinterface must be zero");
       return;
     }
   }
@@ -2041,7 +2399,8 @@
   if (opcode == Bytecodes::_invokedynamic) {
     address bcp = bcs->bcp();
     if (*(bcp+3) != 0 || *(bcp+4) != 0) {
-      verify_error(bci, "Third and fourth operand bytes of invokedynamic must be zero");
+      verify_error(ErrorContext::bad_code(bci),
+          "Third and fourth operand bytes of invokedynamic must be zero");
       return;
     }
   }
@@ -2050,7 +2409,8 @@
     // Make sure <init> can only be invoked by invokespecial
     if (opcode != Bytecodes::_invokespecial ||
         method_name != vmSymbols::object_initializer_name()) {
-      verify_error(bci, "Illegal call to internal method");
+      verify_error(ErrorContext::bad_code(bci),
+          "Illegal call to internal method");
       return;
     }
   } else if (opcode == Bytecodes::_invokespecial
@@ -2060,7 +2420,8 @@
     bool subtype = ref_class_type.is_assignable_from(
       current_type(), this, CHECK_VERIFY(this));
     if (!subtype) {
-      verify_error(bci, "Bad invokespecial instruction: "
+      verify_error(ErrorContext::bad_code(bci),
+          "Bad invokespecial instruction: "
           "current class isn't assignable to reference class.");
        return;
     }
@@ -2073,7 +2434,7 @@
   if (opcode != Bytecodes::_invokestatic &&
       opcode != Bytecodes::_invokedynamic) {
     if (method_name == vmSymbols::object_initializer_name()) {  // <init> method
-      verify_invoke_init(bcs, ref_class_type, current_frame,
+      verify_invoke_init(bcs, index, ref_class_type, current_frame,
         code_length, this_uninit, cp, CHECK_VERIFY(this));
     } else {   // other methods
       // Ensures that target class is assignable to method class.
@@ -2103,8 +2464,10 @@
                   // Special case: arrays pretend to implement public Object
                   // clone().
                 } else {
-                  verify_error(bci,
-                    "Bad access to protected data in invokevirtual");
+                  verify_error(ErrorContext::bad_type(bci,
+                      current_frame->stack_top_ctx(),
+                      TypeOrigin::implicit(current_type())),
+                      "Bad access to protected data in invokevirtual");
                   return;
                 }
               }
@@ -2121,7 +2484,10 @@
   if (sig_stream.type() != T_VOID) {
     if (method_name == vmSymbols::object_initializer_name()) {
       // <init> method must have a void return type
-      verify_error(bci, "Return type must be void in <init> method");
+      /* Unreachable?  Class file parser verifies that methods with '<' have
+       * void return */
+      verify_error(ErrorContext::bad_code(bci),
+          "Return type must be void in <init> method");
       return;
     }
     VerificationType return_type[2];
@@ -2139,7 +2505,7 @@
     NULL, NULL, NULL, NULL, "[Z", "[C", "[F", "[D", "[B", "[S", "[I", "[J",
   };
   if (index < T_BOOLEAN || index > T_LONG) {
-    verify_error(bci, "Illegal newarray instruction");
+    verify_error(ErrorContext::bad_code(bci), "Illegal newarray instruction");
     return VerificationType::bogus_type();
   }
 
@@ -2150,8 +2516,9 @@
 }
 
 void ClassVerifier::verify_anewarray(
-    u2 index, constantPoolHandle cp, StackMapFrame* current_frame, TRAPS) {
-  verify_cp_class_type(index, cp, CHECK_VERIFY(this));
+    u2 bci, u2 index, constantPoolHandle cp,
+    StackMapFrame* current_frame, TRAPS) {
+  verify_cp_class_type(bci, index, cp, CHECK_VERIFY(this));
   current_frame->pop_stack(
     VerificationType::integer_type(), CHECK_VERIFY(this));
 
@@ -2264,14 +2631,19 @@
 }
 
 void ClassVerifier::verify_return_value(
-    VerificationType return_type, VerificationType type, u2 bci, TRAPS) {
+    VerificationType return_type, VerificationType type, u2 bci,
+    StackMapFrame* current_frame, TRAPS) {
   if (return_type == VerificationType::bogus_type()) {
-    verify_error(bci, "Method expects a return value");
+    verify_error(ErrorContext::bad_type(bci,
+        current_frame->stack_top_ctx(), TypeOrigin::signature(return_type)),
+        "Method expects a return value");
     return;
   }
   bool match = return_type.is_assignable_from(type, this, CHECK_VERIFY(this));
   if (!match) {
-    verify_error(bci, "Bad return type");
+    verify_error(ErrorContext::bad_type(bci,
+        current_frame->stack_top_ctx(), TypeOrigin::signature(return_type)),
+        "Bad return type");
     return;
   }
 }
--- a/src/share/vm/classfile/verifier.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/classfile/verifier.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -88,18 +88,178 @@
 #define CHECK_VERIFY_(verifier, result) \
   CHECK_(result)); if ((verifier)->has_error()) return (result); (0
 
+class TypeOrigin VALUE_OBJ_CLASS_SPEC {
+ private:
+  typedef enum {
+    CF_LOCALS,  // Comes from the current frame locals
+    CF_STACK,   // Comes from the current frame expression stack
+    SM_LOCALS,  // Comes from stackmap locals
+    SM_STACK,   // Comes from stackmap expression stack
+    CONST_POOL, // Comes from the constant pool
+    SIG,        // Comes from method signature
+    IMPLICIT,   // Comes implicitly from code or context
+    BAD_INDEX,  // No type, but the index is bad
+    FRAME_ONLY, // No type, context just contains the frame
+    NONE
+  } Origin;
+
+  Origin _origin;
+  u2 _index;              // local, stack, or constant pool index
+  StackMapFrame* _frame;  // source frame if CF or SM
+  VerificationType _type; // The actual type
+
+  TypeOrigin(
+      Origin origin, u2 index, StackMapFrame* frame, VerificationType type)
+      : _origin(origin), _index(index), _frame(frame), _type(type) {}
+
+ public:
+  TypeOrigin() : _origin(NONE), _index(0), _frame(NULL) {}
+
+  static TypeOrigin null();
+  static TypeOrigin local(u2 index, StackMapFrame* frame);
+  static TypeOrigin stack(u2 index, StackMapFrame* frame);
+  static TypeOrigin sm_local(u2 index, StackMapFrame* frame);
+  static TypeOrigin sm_stack(u2 index, StackMapFrame* frame);
+  static TypeOrigin cp(u2 index, VerificationType vt);
+  static TypeOrigin signature(VerificationType vt);
+  static TypeOrigin bad_index(u2 index);
+  static TypeOrigin implicit(VerificationType t);
+  static TypeOrigin frame(StackMapFrame* frame);
+
+  void reset_frame();
+  void details(outputStream* ss) const;
+  void print_frame(outputStream* ss) const;
+  const StackMapFrame* frame() const { return _frame; }
+  bool is_valid() const { return _origin != NONE; }
+  u2 index() const { return _index; }
+
+#ifdef ASSERT
+  void print_on(outputStream* str) const;
+#endif
+};
+
+class ErrorContext VALUE_OBJ_CLASS_SPEC {
+ private:
+  typedef enum {
+    INVALID_BYTECODE,     // There was a problem with the bytecode
+    WRONG_TYPE,           // Type value was not as expected
+    FLAGS_MISMATCH,       // Frame flags are not assignable
+    BAD_CP_INDEX,         // Invalid constant pool index
+    BAD_LOCAL_INDEX,      // Invalid local index
+    LOCALS_SIZE_MISMATCH, // Frames have differing local counts
+    STACK_SIZE_MISMATCH,  // Frames have different stack sizes
+    STACK_OVERFLOW,       // Attempt to push onto a full expression stack
+    STACK_UNDERFLOW,      // Attempt to pop and empty expression stack
+    MISSING_STACKMAP,     // No stackmap for this location and there should be
+    BAD_STACKMAP,         // Format error in stackmap
+    NO_FAULT,             // No error
+    UNKNOWN
+  } FaultType;
+
+  int _bci;
+  FaultType _fault;
+  TypeOrigin _type;
+  TypeOrigin _expected;
+
+  ErrorContext(int bci, FaultType fault) :
+      _bci(bci), _fault(fault)  {}
+  ErrorContext(int bci, FaultType fault, TypeOrigin type) :
+      _bci(bci), _fault(fault), _type(type)  {}
+  ErrorContext(int bci, FaultType fault, TypeOrigin type, TypeOrigin exp) :
+      _bci(bci), _fault(fault), _type(type), _expected(exp)  {}
+
+ public:
+  ErrorContext() : _bci(-1), _fault(NO_FAULT) {}
+
+  static ErrorContext bad_code(u2 bci) {
+    return ErrorContext(bci, INVALID_BYTECODE);
+  }
+  static ErrorContext bad_type(u2 bci, TypeOrigin type) {
+    return ErrorContext(bci, WRONG_TYPE, type);
+  }
+  static ErrorContext bad_type(u2 bci, TypeOrigin type, TypeOrigin exp) {
+    return ErrorContext(bci, WRONG_TYPE, type, exp);
+  }
+  static ErrorContext bad_flags(u2 bci, StackMapFrame* frame) {
+    return ErrorContext(bci, FLAGS_MISMATCH, TypeOrigin::frame(frame));
+  }
+  static ErrorContext bad_flags(u2 bci, StackMapFrame* cur, StackMapFrame* sm) {
+    return ErrorContext(bci, FLAGS_MISMATCH,
+                        TypeOrigin::frame(cur), TypeOrigin::frame(sm));
+  }
+  static ErrorContext bad_cp_index(u2 bci, u2 index) {
+    return ErrorContext(bci, BAD_CP_INDEX, TypeOrigin::bad_index(index));
+  }
+  static ErrorContext bad_local_index(u2 bci, u2 index) {
+    return ErrorContext(bci, BAD_LOCAL_INDEX, TypeOrigin::bad_index(index));
+  }
+  static ErrorContext locals_size_mismatch(
+      u2 bci, StackMapFrame* frame0, StackMapFrame* frame1) {
+    return ErrorContext(bci, LOCALS_SIZE_MISMATCH,
+        TypeOrigin::frame(frame0), TypeOrigin::frame(frame1));
+  }
+  static ErrorContext stack_size_mismatch(
+      u2 bci, StackMapFrame* frame0, StackMapFrame* frame1) {
+    return ErrorContext(bci, STACK_SIZE_MISMATCH,
+        TypeOrigin::frame(frame0), TypeOrigin::frame(frame1));
+  }
+  static ErrorContext stack_overflow(u2 bci, StackMapFrame* frame) {
+    return ErrorContext(bci, STACK_OVERFLOW, TypeOrigin::frame(frame));
+  }
+  static ErrorContext stack_underflow(u2 bci, StackMapFrame* frame) {
+    return ErrorContext(bci, STACK_UNDERFLOW, TypeOrigin::frame(frame));
+  }
+  static ErrorContext missing_stackmap(u2 bci) {
+    return ErrorContext(bci, MISSING_STACKMAP);
+  }
+  static ErrorContext bad_stackmap(int index, StackMapFrame* frame) {
+    return ErrorContext(0, BAD_STACKMAP, TypeOrigin::frame(frame));
+  }
+
+  bool is_valid() const { return _fault != NO_FAULT; }
+  int bci() const { return _bci; }
+
+  void reset_frames() {
+    _type.reset_frame();
+    _expected.reset_frame();
+  }
+
+  void details(outputStream* ss, methodOop method) const;
+
+#ifdef ASSERT
+  void print_on(outputStream* str) const {
+    str->print("error_context(%d, %d,", _bci, _fault);
+    _type.print_on(str);
+    str->print(",");
+    _expected.print_on(str);
+    str->print(")");
+  }
+#endif
+
+ private:
+  void location_details(outputStream* ss, methodOop method) const;
+  void reason_details(outputStream* ss) const;
+  void frame_details(outputStream* ss) const;
+  void bytecode_details(outputStream* ss, methodOop method) const;
+  void handler_details(outputStream* ss, methodOop method) const;
+  void stackmap_details(outputStream* ss, methodOop method) const;
+};
+
 // A new instance of this class is created for each class being verified
 class ClassVerifier : public StackObj {
  private:
   Thread* _thread;
+  GrowableArray<Symbol*>* _symbols;  // keep a list of symbols created
+
   Symbol* _exception_type;
   char* _message;
-  size_t _message_buffer_len;
-  GrowableArray<Symbol*>* _symbols;  // keep a list of symbols created
+
+  ErrorContext _error_context;  // contains information about an error
 
   void verify_method(methodHandle method, TRAPS);
   char* generate_code_data(methodHandle m, u4 code_length, TRAPS);
-  void verify_exception_handler_table(u4 code_length, char* code_data, int& min, int& max, TRAPS);
+  void verify_exception_handler_table(u4 code_length, char* code_data,
+                                      int& min, int& max, TRAPS);
   void verify_local_variable_table(u4 code_length, char* code_data, TRAPS);
 
   VerificationType cp_ref_index_to_type(
@@ -111,10 +271,10 @@
     instanceKlassHandle this_class, klassOop target_class,
     Symbol* field_name, Symbol* field_sig, bool is_method);
 
-  void verify_cp_index(constantPoolHandle cp, int index, TRAPS);
-  void verify_cp_type(
-    int index, constantPoolHandle cp, unsigned int types, TRAPS);
-  void verify_cp_class_type(int index, constantPoolHandle cp, TRAPS);
+  void verify_cp_index(u2 bci, constantPoolHandle cp, int index, TRAPS);
+  void verify_cp_type(u2 bci, int index, constantPoolHandle cp,
+      unsigned int types, TRAPS);
+  void verify_cp_class_type(u2 bci, int index, constantPoolHandle cp, TRAPS);
 
   u2 verify_stackmap_table(
     u2 stackmap_index, u2 bci, StackMapFrame* current_frame,
@@ -137,7 +297,7 @@
     constantPoolHandle cp, TRAPS);
 
   void verify_invoke_init(
-    RawBytecodeStream* bcs, VerificationType ref_class_type,
+    RawBytecodeStream* bcs, u2 ref_index, VerificationType ref_class_type,
     StackMapFrame* current_frame, u4 code_length, bool* this_uninit,
     constantPoolHandle cp, TRAPS);
 
@@ -147,10 +307,11 @@
     constantPoolHandle cp, TRAPS);
 
   VerificationType get_newarray_type(u2 index, u2 bci, TRAPS);
-  void verify_anewarray(
-    u2 index, constantPoolHandle cp, StackMapFrame* current_frame, TRAPS);
+  void verify_anewarray(u2 bci, u2 index, constantPoolHandle cp,
+      StackMapFrame* current_frame, TRAPS);
   void verify_return_value(
-    VerificationType return_type, VerificationType type, u2 offset, TRAPS);
+      VerificationType return_type, VerificationType type, u2 offset,
+      StackMapFrame* current_frame, TRAPS);
 
   void verify_iload (u2 index, StackMapFrame* current_frame, TRAPS);
   void verify_lload (u2 index, StackMapFrame* current_frame, TRAPS);
@@ -189,7 +350,7 @@
   };
 
   // constructor
-  ClassVerifier(instanceKlassHandle klass, char* msg, size_t msg_len, TRAPS);
+  ClassVerifier(instanceKlassHandle klass, TRAPS);
 
   // destructor
   ~ClassVerifier();
@@ -207,13 +368,17 @@
   // Return status modes
   Symbol* result() const { return _exception_type; }
   bool has_error() const { return result() != NULL; }
+  char* exception_message() {
+    stringStream ss;
+    ss.print(_message);
+    _error_context.details(&ss, _method());
+    return ss.as_string();
+  }
 
   // Called when verify or class format errors are encountered.
   // May throw an exception based upon the mode.
-  void verify_error(u2 offset, const char* fmt, ...);
-  void verify_error(const char* fmt, ...);
+  void verify_error(ErrorContext ctx, const char* fmt, ...);
   void class_format_error(const char* fmt, ...);
-  void format_error_message(const char* fmt, int offset, va_list args);
 
   klassOop load_class(Symbol* name, TRAPS);
 
@@ -228,10 +393,11 @@
   // their reference counts need to be decrememented when the verifier object
   // goes out of scope.  Since these symbols escape the scope in which they're
   // created, we can't use a TempNewSymbol.
-  Symbol* create_temporary_symbol(const Symbol* s, int begin, int end, TRAPS);
+  Symbol* create_temporary_symbol(
+      const Symbol* s, int begin, int end, TRAPS);
   Symbol* create_temporary_symbol(const char *s, int length, TRAPS);
 
-  static bool _verify_verbose;  // for debugging
+  TypeOrigin ref_ctx(const char* str, TRAPS);
 };
 
 inline int ClassVerifier::change_sig_to_verificationType(
--- a/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -159,14 +159,30 @@
            "right address out of range");
     assert(left  < right, "Heap addresses out of order");
     size_t num_cards = pointer_delta(right, left) >> LogN_words;
-    memset(&_offset_array[index_for(left)], offset, num_cards);
+    if (UseMemSetInBOT) {
+      memset(&_offset_array[index_for(left)], offset, num_cards);
+    } else {
+      size_t i = index_for(left);
+      const size_t end = i + num_cards;
+      for (; i < end; i++) {
+        _offset_array[i] = offset;
+      }
+    }
   }
 
   void set_offset_array(size_t left, size_t right, u_char offset) {
     assert(right < _vs.committed_size(), "right address out of range");
-    assert(left  <= right, "indexes out of order");
+    assert(left <= right, "indexes out of order");
     size_t num_cards = right - left + 1;
-    memset(&_offset_array[left], offset, num_cards);
+    if (UseMemSetInBOT) {
+      memset(&_offset_array[left], offset, num_cards);
+    } else {
+      size_t i = left;
+      const size_t end = i + num_cards;
+      for (; i < end; i++) {
+        _offset_array[i] = offset;
+      }
+    }
   }
 
   void check_offset_array(size_t index, HeapWord* high, HeapWord* low) const {
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1891,6 +1891,8 @@
   _young_list(new YoungList(this)),
   _gc_time_stamp(0),
   _retained_old_gc_alloc_region(NULL),
+  _survivor_plab_stats(YoungPLABSize, PLABWeight),
+  _old_plab_stats(OldPLABSize, PLABWeight),
   _expand_heap_after_alloc_failure(true),
   _surviving_young_words(NULL),
   _old_marking_cycles_started(0),
@@ -1932,6 +1934,14 @@
   clear_cset_start_regions();
 
   guarantee(_task_queues != NULL, "task_queues allocation failure.");
+#ifdef SPARC
+  // Issue a stern warning, but allow use for experimentation and debugging.
+  if (VM_Version::is_sun4v() && UseMemSetInBOT) {
+    assert(!FLAG_IS_DEFAULT(UseMemSetInBOT), "Error");
+    warning("Experimental flag -XX:+UseMemSetInBOT is known to cause instability"
+            " on sun4v; please understand that you are using at your own risk!");
+  }
+#endif
 }
 
 jint G1CollectedHeap::initialize() {
@@ -3580,15 +3590,11 @@
   DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   size_t buffer_size = dcqs.buffer_size();
   size_t buffer_num = dcqs.completed_buffers_num();
-  return buffer_size * buffer_num + extra_cards;
-}
-
-size_t G1CollectedHeap::max_pending_card_num() {
-  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
-  size_t buffer_size = dcqs.buffer_size();
-  size_t buffer_num  = dcqs.completed_buffers_num();
-  int thread_num  = Threads::number_of_threads();
-  return (buffer_num + thread_num) * buffer_size;
+
+  // PtrQueueSet::buffer_size() and PtrQueue:size() return sizes
+  // in bytes - not the number of 'entries'. We need to convert
+  // into a number of cards.
+  return (buffer_size * buffer_num + extra_cards) / oopSize;
 }
 
 size_t G1CollectedHeap::cards_scanned() {
@@ -4099,17 +4105,22 @@
   size_t gclab_word_size;
   switch (purpose) {
     case GCAllocForSurvived:
-      gclab_word_size = YoungPLABSize;
+      gclab_word_size = _survivor_plab_stats.desired_plab_sz();
       break;
     case GCAllocForTenured:
-      gclab_word_size = OldPLABSize;
+      gclab_word_size = _old_plab_stats.desired_plab_sz();
       break;
     default:
       assert(false, "unknown GCAllocPurpose");
-      gclab_word_size = OldPLABSize;
+      gclab_word_size = _old_plab_stats.desired_plab_sz();
       break;
   }
-  return gclab_word_size;
+
+  // Prevent humongous PLAB sizes for two reasons:
+  // * PLABs are allocated using a similar paths as oops, but should
+  //   never be in a humongous region
+  // * Allowing humongous PLABs needlessly churns the region free lists
+  return MIN2(_humongous_object_threshold_in_words, gclab_word_size);
 }
 
 void G1CollectedHeap::init_mutator_alloc_region() {
@@ -4165,6 +4176,11 @@
   // want either way so no reason to check explicitly for either
   // condition.
   _retained_old_gc_alloc_region = _old_gc_alloc_region.release();
+
+  if (ResizePLAB) {
+    _survivor_plab_stats.adjust_desired_plab_sz();
+    _old_plab_stats.adjust_desired_plab_sz();
+  }
 }
 
 void G1CollectedHeap::abandon_gc_alloc_regions() {
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -33,7 +33,7 @@
 #include "gc_implementation/g1/heapRegionSeq.hpp"
 #include "gc_implementation/g1/heapRegionSets.hpp"
 #include "gc_implementation/shared/hSpaceCounters.hpp"
-#include "gc_implementation/parNew/parGCAllocBuffer.hpp"
+#include "gc_implementation/shared/parGCAllocBuffer.hpp"
 #include "memory/barrierSet.hpp"
 #include "memory/memRegion.hpp"
 #include "memory/sharedHeap.hpp"
@@ -278,10 +278,33 @@
   // survivor objects.
   SurvivorGCAllocRegion _survivor_gc_alloc_region;
 
+  // PLAB sizing policy for survivors.
+  PLABStats _survivor_plab_stats;
+
   // Alloc region used to satisfy allocation requests by the GC for
   // old objects.
   OldGCAllocRegion _old_gc_alloc_region;
 
+  // PLAB sizing policy for tenured objects.
+  PLABStats _old_plab_stats;
+
+  PLABStats* stats_for_purpose(GCAllocPurpose purpose) {
+    PLABStats* stats = NULL;
+
+    switch (purpose) {
+    case GCAllocForSurvived:
+      stats = &_survivor_plab_stats;
+      break;
+    case GCAllocForTenured:
+      stats = &_old_plab_stats;
+      break;
+    default:
+      assert(false, "unrecognized GCAllocPurpose");
+    }
+
+    return stats;
+  }
+
   // The last old region we allocated to during the last GC.
   // Typically, it is not full so we should re-use it during the next GC.
   HeapRegion* _retained_old_gc_alloc_region;
@@ -314,7 +337,7 @@
   G1MonitoringSupport* _g1mm;
 
   // Determines PLAB size for a particular allocation purpose.
-  static size_t desired_plab_sz(GCAllocPurpose purpose);
+  size_t desired_plab_sz(GCAllocPurpose purpose);
 
   // Outside of GC pauses, the number of bytes used in all regions other
   // than the current allocation region.
@@ -1683,7 +1706,6 @@
   void stop_conc_gc_threads();
 
   size_t pending_card_num();
-  size_t max_pending_card_num();
   size_t cards_scanned();
 
 protected:
@@ -1811,19 +1833,19 @@
   }
 
   HeapWord* allocate_slow(GCAllocPurpose purpose, size_t word_sz) {
-
     HeapWord* obj = NULL;
     size_t gclab_word_size = _g1h->desired_plab_sz(purpose);
     if (word_sz * 100 < gclab_word_size * ParallelGCBufferWastePct) {
       G1ParGCAllocBuffer* alloc_buf = alloc_buffer(purpose);
-      assert(gclab_word_size == alloc_buf->word_sz(),
-             "dynamic resizing is not supported");
       add_to_alloc_buffer_waste(alloc_buf->words_remaining());
-      alloc_buf->retire(false, false);
+      alloc_buf->flush_stats_and_retire(_g1h->stats_for_purpose(purpose),
+                                        false /* end_of_gc */,
+                                        false /* retain */);
 
       HeapWord* buf = _g1h->par_allocate_during_gc(purpose, gclab_word_size);
       if (buf == NULL) return NULL; // Let caller handle allocation failure.
       // Otherwise.
+      alloc_buf->set_word_size(gclab_word_size);
       alloc_buf->set_buf(buf);
 
       obj = alloc_buf->allocate(word_sz);
@@ -1908,7 +1930,9 @@
     for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
       size_t waste = _alloc_buffers[ap]->words_remaining();
       add_to_alloc_buffer_waste(waste);
-      _alloc_buffers[ap]->retire(true, false);
+      _alloc_buffers[ap]->flush_stats_and_retire(_g1h->stats_for_purpose((GCAllocPurpose)ap),
+                                                 true /* end_of_gc */,
+                                                 false /* retain */);
     }
   }
 
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -90,7 +90,6 @@
 
   _alloc_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
   _prev_collection_pause_end_ms(0.0),
-  _pending_card_diff_seq(new TruncatedSeq(TruncatedSeqLength)),
   _rs_length_diff_seq(new TruncatedSeq(TruncatedSeqLength)),
   _cost_per_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
   _young_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
@@ -197,7 +196,6 @@
 
   int index = MIN2(_parallel_gc_threads - 1, 7);
 
-  _pending_card_diff_seq->add(0.0);
   _rs_length_diff_seq->add(rs_length_diff_defaults[index]);
   _cost_per_card_ms_seq->add(cost_per_card_ms_defaults[index]);
   _young_cards_per_entry_ratio_seq->add(
@@ -657,7 +655,7 @@
   for (HeapRegion * r = _recorded_survivor_head;
        r != NULL && r != _recorded_survivor_tail->get_next_young_region();
        r = r->get_next_young_region()) {
-    survivor_regions_evac_time += predict_region_elapsed_time_ms(r, true);
+    survivor_regions_evac_time += predict_region_elapsed_time_ms(r, gcs_are_young());
   }
   return survivor_regions_evac_time;
 }
@@ -801,9 +799,8 @@
   _cur_collection_pause_used_at_start_bytes = start_used;
   _cur_collection_pause_used_regions_at_start = _g1->used_regions();
   _pending_cards = _g1->pending_card_num();
-  _max_pending_cards = _g1->max_pending_card_num();
 
-  _bytes_in_collection_set_before_gc = 0;
+  _collection_set_bytes_used_before = 0;
   _bytes_copied_during_gc = 0;
 
   YoungList* young_list = _g1->young_list();
@@ -1036,12 +1033,6 @@
   // do that for any other surv rate groupsx
 
   if (update_stats) {
-    size_t diff = 0;
-    if (_max_pending_cards >= _pending_cards) {
-      diff = _max_pending_cards - _pending_cards;
-    }
-    _pending_card_diff_seq->add((double) diff);
-
     double cost_per_card_ms = 0.0;
     if (_pending_cards > 0) {
       cost_per_card_ms = phase_times()->_update_rs_time / (double) _pending_cards;
@@ -1126,9 +1117,9 @@
     _constant_other_time_ms_seq->add(constant_other_time_ms);
 
     double survival_ratio = 0.0;
-    if (_bytes_in_collection_set_before_gc > 0) {
+    if (_collection_set_bytes_used_before > 0) {
       survival_ratio = (double) _bytes_copied_during_gc /
-                                   (double) _bytes_in_collection_set_before_gc;
+                                   (double) _collection_set_bytes_used_before;
     }
 
     _pending_cards_seq->add((double) _pending_cards);
@@ -1229,18 +1220,6 @@
 }
 
 double
-G1CollectorPolicy::predict_base_elapsed_time_ms(size_t pending_cards) {
-  size_t rs_length = predict_rs_length_diff();
-  size_t card_num;
-  if (gcs_are_young()) {
-    card_num = predict_young_card_num(rs_length);
-  } else {
-    card_num = predict_non_young_card_num(rs_length);
-  }
-  return predict_base_elapsed_time_ms(pending_cards, card_num);
-}
-
-double
 G1CollectorPolicy::predict_base_elapsed_time_ms(size_t pending_cards,
                                                 size_t scanned_cards) {
   return
@@ -1250,27 +1229,15 @@
 }
 
 double
-G1CollectorPolicy::predict_region_elapsed_time_ms(HeapRegion* hr,
-                                                  bool young) {
-  size_t rs_length = hr->rem_set()->occupied();
+G1CollectorPolicy::predict_base_elapsed_time_ms(size_t pending_cards) {
+  size_t rs_length = predict_rs_length_diff();
   size_t card_num;
   if (gcs_are_young()) {
     card_num = predict_young_card_num(rs_length);
   } else {
     card_num = predict_non_young_card_num(rs_length);
   }
-  size_t bytes_to_copy = predict_bytes_to_copy(hr);
-
-  double region_elapsed_time_ms =
-    predict_rs_scan_time_ms(card_num) +
-    predict_object_copy_time_ms(bytes_to_copy);
-
-  if (young)
-    region_elapsed_time_ms += predict_young_other_time_ms(1);
-  else
-    region_elapsed_time_ms += predict_non_young_other_time_ms(1);
-
-  return region_elapsed_time_ms;
+  return predict_base_elapsed_time_ms(pending_cards, card_num);
 }
 
 size_t G1CollectorPolicy::predict_bytes_to_copy(HeapRegion* hr) {
@@ -1286,6 +1253,35 @@
   return bytes_to_copy;
 }
 
+double
+G1CollectorPolicy::predict_region_elapsed_time_ms(HeapRegion* hr,
+                                                  bool for_young_gc) {
+  size_t rs_length = hr->rem_set()->occupied();
+  size_t card_num;
+
+  // Predicting the number of cards is based on which type of GC
+  // we're predicting for.
+  if (for_young_gc) {
+    card_num = predict_young_card_num(rs_length);
+  } else {
+    card_num = predict_non_young_card_num(rs_length);
+  }
+  size_t bytes_to_copy = predict_bytes_to_copy(hr);
+
+  double region_elapsed_time_ms =
+    predict_rs_scan_time_ms(card_num) +
+    predict_object_copy_time_ms(bytes_to_copy);
+
+  // The prediction of the "other" time for this region is based
+  // upon the region type and NOT the GC type.
+  if (hr->is_young()) {
+    region_elapsed_time_ms += predict_young_other_time_ms(1);
+  } else {
+    region_elapsed_time_ms += predict_non_young_other_time_ms(1);
+  }
+  return region_elapsed_time_ms;
+}
+
 void
 G1CollectorPolicy::init_cset_region_lengths(uint eden_cset_region_length,
                                             uint survivor_cset_region_length) {
@@ -1342,22 +1338,6 @@
   }
 }
 
-class CountCSClosure: public HeapRegionClosure {
-  G1CollectorPolicy* _g1_policy;
-public:
-  CountCSClosure(G1CollectorPolicy* g1_policy) :
-    _g1_policy(g1_policy) {}
-  bool doHeapRegion(HeapRegion* r) {
-    _g1_policy->_bytes_in_collection_set_before_gc += r->used();
-    return false;
-  }
-};
-
-void G1CollectorPolicy::count_CS_bytes_used() {
-  CountCSClosure cs_closure(this);
-  _g1->collection_set_iterate(&cs_closure);
-}
-
 void G1CollectorPolicy::print_tracing_info() const {
   _trace_gen0_time_data.print();
   _trace_gen1_time_data.print();
@@ -1696,7 +1676,7 @@
   // retiring the current allocation region) or a concurrent
   // refine thread (RSet sampling).
 
-  double region_elapsed_time_ms = predict_region_elapsed_time_ms(hr, true);
+  double region_elapsed_time_ms = predict_region_elapsed_time_ms(hr, gcs_are_young());
   size_t used_bytes = hr->used();
   _inc_cset_recorded_rs_lengths += rs_length;
   _inc_cset_predicted_elapsed_time_ms += region_elapsed_time_ms;
@@ -1731,7 +1711,7 @@
   _inc_cset_recorded_rs_lengths_diffs += rs_lengths_diff;
 
   double old_elapsed_time_ms = hr->predicted_elapsed_time_ms();
-  double new_region_elapsed_time_ms = predict_region_elapsed_time_ms(hr, true);
+  double new_region_elapsed_time_ms = predict_region_elapsed_time_ms(hr, gcs_are_young());
   double elapsed_ms_diff = new_region_elapsed_time_ms - old_elapsed_time_ms;
   _inc_cset_predicted_elapsed_time_ms_diffs += elapsed_ms_diff;
 
@@ -1854,8 +1834,7 @@
 }
 
 void G1CollectorPolicy::finalize_cset(double target_pause_time_ms) {
-  // Set this here - in case we're not doing young collections.
-  double non_young_start_time_sec = os::elapsedTime();
+  double young_start_time_sec = os::elapsedTime();
 
   YoungList* young_list = _g1->young_list();
   finalize_incremental_cset_building();
@@ -1869,17 +1848,14 @@
   double predicted_pause_time_ms = base_time_ms;
   double time_remaining_ms = target_pause_time_ms - base_time_ms;
 
-  ergo_verbose3(ErgoCSetConstruction | ErgoHigh,
+  ergo_verbose4(ErgoCSetConstruction | ErgoHigh,
                 "start choosing CSet",
+                ergo_format_size("_pending_cards")
                 ergo_format_ms("predicted base time")
                 ergo_format_ms("remaining time")
                 ergo_format_ms("target pause time"),
-                base_time_ms, time_remaining_ms, target_pause_time_ms);
+                _pending_cards, base_time_ms, time_remaining_ms, target_pause_time_ms);
 
-  HeapRegion* hr;
-  double young_start_time_sec = os::elapsedTime();
-
-  _collection_set_bytes_used_before = 0;
   _last_gc_was_young = gcs_are_young() ? true : false;
 
   if (_last_gc_was_young) {
@@ -1895,7 +1871,8 @@
   uint survivor_region_length = young_list->survivor_length();
   uint eden_region_length = young_list->length() - survivor_region_length;
   init_cset_region_lengths(eden_region_length, survivor_region_length);
-  hr = young_list->first_survivor_region();
+
+  HeapRegion* hr = young_list->first_survivor_region();
   while (hr != NULL) {
     assert(hr->is_survivor(), "badly formed young list");
     hr->set_young();
@@ -1926,8 +1903,8 @@
   phase_times()->_recorded_young_cset_choice_time_ms =
     (young_end_time_sec - young_start_time_sec) * 1000.0;
 
-  // We are doing young collections so reset this.
-  non_young_start_time_sec = young_end_time_sec;
+  // Set the start of the non-young choice time.
+  double non_young_start_time_sec = young_end_time_sec;
 
   if (!gcs_are_young()) {
     CollectionSetChooser* cset_chooser = _collectionSetChooser;
@@ -1937,6 +1914,7 @@
 
     uint expensive_region_num = 0;
     bool check_time_remaining = adaptive_young_list_length();
+
     HeapRegion* hr = cset_chooser->peek();
     while (hr != NULL) {
       if (old_cset_region_length() >= max_old_cset_length) {
@@ -1950,7 +1928,7 @@
         break;
       }
 
-      double predicted_time_ms = predict_region_elapsed_time_ms(hr, false);
+      double predicted_time_ms = predict_region_elapsed_time_ms(hr, gcs_are_young());
       if (check_time_remaining) {
         if (predicted_time_ms > time_remaining_ms) {
           // Too expensive for the current CSet.
@@ -2025,8 +2003,6 @@
 
   stop_incremental_cset_building();
 
-  count_CS_bytes_used();
-
   ergo_verbose5(ErgoCSetConstruction,
                 "finish choosing CSet",
                 ergo_format_region("eden")
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -228,7 +228,6 @@
   TruncatedSeq* _alloc_rate_ms_seq;
   double        _prev_collection_pause_end_ms;
 
-  TruncatedSeq* _pending_card_diff_seq;
   TruncatedSeq* _rs_length_diff_seq;
   TruncatedSeq* _cost_per_card_ms_seq;
   TruncatedSeq* _young_cards_per_entry_ratio_seq;
@@ -295,7 +294,6 @@
   double _pause_time_target_ms;
 
   size_t _pending_cards;
-  size_t _max_pending_cards;
 
 public:
   // Accessors
@@ -325,28 +323,6 @@
     _max_rs_lengths = rs_lengths;
   }
 
-  size_t predict_pending_card_diff() {
-    double prediction = get_new_neg_prediction(_pending_card_diff_seq);
-    if (prediction < 0.00001) {
-      return 0;
-    } else {
-      return (size_t) prediction;
-    }
-  }
-
-  size_t predict_pending_cards() {
-    size_t max_pending_card_num = _g1->max_pending_card_num();
-    size_t diff = predict_pending_card_diff();
-    size_t prediction;
-    if (diff > max_pending_card_num) {
-      prediction = max_pending_card_num;
-    } else {
-      prediction = max_pending_card_num - diff;
-    }
-
-    return prediction;
-  }
-
   size_t predict_rs_length_diff() {
     return (size_t) get_new_prediction(_rs_length_diff_seq);
   }
@@ -439,7 +415,7 @@
   double predict_base_elapsed_time_ms(size_t pending_cards,
                                       size_t scanned_cards);
   size_t predict_bytes_to_copy(HeapRegion* hr);
-  double predict_region_elapsed_time_ms(HeapRegion* hr, bool young);
+  double predict_region_elapsed_time_ms(HeapRegion* hr, bool for_young_gc);
 
   void set_recorded_rs_lengths(size_t rs_lengths);
 
@@ -495,12 +471,6 @@
   }
 
 private:
-  size_t _bytes_in_collection_set_before_gc;
-  size_t _bytes_copied_during_gc;
-
-  // Used to count used bytes in CS.
-  friend class CountCSClosure;
-
   // Statistics kept per GC stoppage, pause or full.
   TruncatedSeq* _recent_prev_end_times_for_all_gcs_sec;
 
@@ -514,9 +484,13 @@
 
   // The number of bytes in the collection set before the pause. Set from
   // the incrementally built collection set at the start of an evacuation
-  // pause.
+  // pause, and incremented in finalize_cset() when adding old regions
+  // (if any) to the collection set.
   size_t _collection_set_bytes_used_before;
 
+  // The number of bytes copied during the GC.
+  size_t _bytes_copied_during_gc;
+
   // The associated information that is maintained while the incremental
   // collection set is being built with young regions. Used to populate
   // the recorded info for the evacuation pause.
@@ -646,9 +620,6 @@
   bool predict_will_fit(uint young_length, double base_time_ms,
                         uint base_free_regions, double target_pause_time_ms);
 
-  // Count the number of bytes used in the CS.
-  void count_CS_bytes_used();
-
 public:
 
   G1CollectorPolicy();
@@ -666,10 +637,6 @@
   // higher, recalculate the young list target length prediction.
   void revise_young_list_target_length_if_necessary();
 
-  size_t bytes_in_collection_set() {
-    return _bytes_in_collection_set_before_gc;
-  }
-
   // This should be called after the heap is resized.
   void record_new_heap_size(uint new_number_of_regions);
 
--- a/src/share/vm/gc_implementation/g1/g1ErgoVerbose.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1ErgoVerbose.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -125,6 +125,7 @@
 #define ergo_format_double(_name_)   ", " _name_ ": %1.2f"
 #define ergo_format_perc(_name_)     ", " _name_ ": %1.2f %%"
 #define ergo_format_ms(_name_)       ", " _name_ ": %1.2f ms"
+#define ergo_format_size(_name_)     ", " _name_ ": "SIZE_FORMAT
 
 // Double parameter format strings
 #define ergo_format_byte_perc(_name_)                                   \
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -287,17 +287,17 @@
           "The number of times we'll force an overflow during "             \
           "concurrent marking")                                             \
                                                                             \
-  develop(uintx, G1DefaultMinNewGenPercent, 20,                             \
+  experimental(uintx, G1DefaultMinNewGenPercent, 20,                        \
           "Percentage (0-100) of the heap size to use as minimum "          \
           "young gen size.")                                                \
                                                                             \
-  develop(uintx, G1DefaultMaxNewGenPercent, 80,                             \
+  experimental(uintx, G1DefaultMaxNewGenPercent, 80,                        \
           "Percentage (0-100) of the heap size to use as maximum "          \
           "young gen size.")                                                \
                                                                             \
-  develop(uintx, G1OldCSetRegionLiveThresholdPercent, 90,                   \
+  experimental(uintx, G1OldCSetRegionLiveThresholdPercent, 90,              \
           "Threshold for regions to be added to the collection set. "       \
-          "Regions with more live bytes that this will not be collected.")  \
+          "Regions with more live bytes than this will not be collected.")  \
                                                                             \
   product(uintx, G1HeapWastePercent, 5,                                     \
           "Amount of space, expressed as a percentage of the heap size, "   \
@@ -306,7 +306,7 @@
   product(uintx, G1MixedGCCountTarget, 4,                                   \
           "The target number of mixed GCs after a marking cycle.")          \
                                                                             \
-  develop(uintx, G1OldCSetRegionThresholdPercent, 10,                       \
+  experimental(uintx, G1OldCSetRegionThresholdPercent, 10,                  \
           "An upper bound for the number of old CSet regions expressed "    \
           "as a percentage of the heap size.")                              \
                                                                             \
--- a/src/share/vm/gc_implementation/g1/heapRegion.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -384,10 +384,17 @@
 }
 
 void HeapRegion::calc_gc_efficiency() {
+  // GC efficiency is the ratio of how much space would be
+  // reclaimed over how long we predict it would take to reclaim it.
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   G1CollectorPolicy* g1p = g1h->g1_policy();
-  _gc_efficiency = (double) reclaimable_bytes() /
-                            g1p->predict_region_elapsed_time_ms(this, false);
+
+  // Retrieve a prediction of the elapsed time for this region for
+  // a mixed gc because the region will only be evacuated during a
+  // mixed gc.
+  double region_elapsed_time_ms =
+    g1p->predict_region_elapsed_time_ms(this, false /* for_young_gc */);
+  _gc_efficiency = (double) reclaimable_bytes() / region_elapsed_time_ms;
 }
 
 void HeapRegion::set_startsHumongous(HeapWord* new_top, HeapWord* new_end) {
--- a/src/share/vm/gc_implementation/parNew/parGCAllocBuffer.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,344 +0,0 @@
-/*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "precompiled.hpp"
-#include "gc_implementation/parNew/parGCAllocBuffer.hpp"
-#include "memory/sharedHeap.hpp"
-#include "oops/arrayOop.hpp"
-#include "oops/oop.inline.hpp"
-
-ParGCAllocBuffer::ParGCAllocBuffer(size_t desired_plab_sz_) :
-  _word_sz(desired_plab_sz_), _bottom(NULL), _top(NULL),
-  _end(NULL), _hard_end(NULL),
-  _retained(false), _retained_filler(),
-  _allocated(0), _wasted(0)
-{
-  assert (min_size() > AlignmentReserve, "Inconsistency!");
-  // arrayOopDesc::header_size depends on command line initialization.
-  FillerHeaderSize = align_object_size(arrayOopDesc::header_size(T_INT));
-  AlignmentReserve = oopDesc::header_size() > MinObjAlignment ? FillerHeaderSize : 0;
-}
-
-size_t ParGCAllocBuffer::FillerHeaderSize;
-
-// If the minimum object size is greater than MinObjAlignment, we can
-// end up with a shard at the end of the buffer that's smaller than
-// the smallest object.  We can't allow that because the buffer must
-// look like it's full of objects when we retire it, so we make
-// sure we have enough space for a filler int array object.
-size_t ParGCAllocBuffer::AlignmentReserve;
-
-void ParGCAllocBuffer::retire(bool end_of_gc, bool retain) {
-  assert(!retain || end_of_gc, "Can only retain at GC end.");
-  if (_retained) {
-    // If the buffer had been retained shorten the previous filler object.
-    assert(_retained_filler.end() <= _top, "INVARIANT");
-    CollectedHeap::fill_with_object(_retained_filler);
-    // Wasted space book-keeping, otherwise (normally) done in invalidate()
-    _wasted += _retained_filler.word_size();
-    _retained = false;
-  }
-  assert(!end_of_gc || !_retained, "At this point, end_of_gc ==> !_retained.");
-  if (_top < _hard_end) {
-    CollectedHeap::fill_with_object(_top, _hard_end);
-    if (!retain) {
-      invalidate();
-    } else {
-      // Is there wasted space we'd like to retain for the next GC?
-      if (pointer_delta(_end, _top) > FillerHeaderSize) {
-        _retained = true;
-        _retained_filler = MemRegion(_top, FillerHeaderSize);
-        _top = _top + FillerHeaderSize;
-      } else {
-        invalidate();
-      }
-    }
-  }
-}
-
-void ParGCAllocBuffer::flush_stats(PLABStats* stats) {
-  assert(ResizePLAB, "Wasted work");
-  stats->add_allocated(_allocated);
-  stats->add_wasted(_wasted);
-  stats->add_unused(pointer_delta(_end, _top));
-}
-
-// Compute desired plab size and latch result for later
-// use. This should be called once at the end of parallel
-// scavenge; it clears the sensor accumulators.
-void PLABStats::adjust_desired_plab_sz() {
-  assert(ResizePLAB, "Not set");
-  if (_allocated == 0) {
-    assert(_unused == 0, "Inconsistency in PLAB stats");
-    _allocated = 1;
-  }
-  double wasted_frac    = (double)_unused/(double)_allocated;
-  size_t target_refills = (size_t)((wasted_frac*TargetSurvivorRatio)/
-                                   TargetPLABWastePct);
-  if (target_refills == 0) {
-    target_refills = 1;
-  }
-  _used = _allocated - _wasted - _unused;
-  size_t plab_sz = _used/(target_refills*ParallelGCThreads);
-  if (PrintPLAB) gclog_or_tty->print(" (plab_sz = %d ", plab_sz);
-  // Take historical weighted average
-  _filter.sample(plab_sz);
-  // Clip from above and below, and align to object boundary
-  plab_sz = MAX2(min_size(), (size_t)_filter.average());
-  plab_sz = MIN2(max_size(), plab_sz);
-  plab_sz = align_object_size(plab_sz);
-  // Latch the result
-  if (PrintPLAB) gclog_or_tty->print(" desired_plab_sz = %d) ", plab_sz);
-  if (ResizePLAB) {
-    _desired_plab_sz = plab_sz;
-  }
-  // Now clear the accumulators for next round:
-  // note this needs to be fixed in the case where we
-  // are retaining across scavenges. FIX ME !!! XXX
-  _allocated = 0;
-  _wasted    = 0;
-  _unused    = 0;
-}
-
-#ifndef PRODUCT
-void ParGCAllocBuffer::print() {
-  gclog_or_tty->print("parGCAllocBuffer: _bottom: %p  _top: %p  _end: %p  _hard_end: %p"
-             "_retained: %c _retained_filler: [%p,%p)\n",
-             _bottom, _top, _end, _hard_end,
-             "FT"[_retained], _retained_filler.start(), _retained_filler.end());
-}
-#endif // !PRODUCT
-
-const size_t ParGCAllocBufferWithBOT::ChunkSizeInWords =
-MIN2(CardTableModRefBS::par_chunk_heapword_alignment(),
-     ((size_t)Generation::GenGrain)/HeapWordSize);
-const size_t ParGCAllocBufferWithBOT::ChunkSizeInBytes =
-MIN2(CardTableModRefBS::par_chunk_heapword_alignment() * HeapWordSize,
-     (size_t)Generation::GenGrain);
-
-ParGCAllocBufferWithBOT::ParGCAllocBufferWithBOT(size_t word_sz,
-                                                 BlockOffsetSharedArray* bsa) :
-  ParGCAllocBuffer(word_sz),
-  _bsa(bsa),
-  _bt(bsa, MemRegion(_bottom, _hard_end)),
-  _true_end(_hard_end)
-{}
-
-// The buffer comes with its own BOT, with a shared (obviously) underlying
-// BlockOffsetSharedArray. We manipulate this BOT in the normal way
-// as we would for any contiguous space. However, on accasion we
-// need to do some buffer surgery at the extremities before we
-// start using the body of the buffer for allocations. Such surgery
-// (as explained elsewhere) is to prevent allocation on a card that
-// is in the process of being walked concurrently by another GC thread.
-// When such surgery happens at a point that is far removed (to the
-// right of the current allocation point, top), we use the "contig"
-// parameter below to directly manipulate the shared array without
-// modifying the _next_threshold state in the BOT.
-void ParGCAllocBufferWithBOT::fill_region_with_block(MemRegion mr,
-                                                     bool contig) {
-  CollectedHeap::fill_with_object(mr);
-  if (contig) {
-    _bt.alloc_block(mr.start(), mr.end());
-  } else {
-    _bt.BlockOffsetArray::alloc_block(mr.start(), mr.end());
-  }
-}
-
-HeapWord* ParGCAllocBufferWithBOT::allocate_slow(size_t word_sz) {
-  HeapWord* res = NULL;
-  if (_true_end > _hard_end) {
-    assert((HeapWord*)align_size_down(intptr_t(_hard_end),
-                                      ChunkSizeInBytes) == _hard_end,
-           "or else _true_end should be equal to _hard_end");
-    assert(_retained, "or else _true_end should be equal to _hard_end");
-    assert(_retained_filler.end() <= _top, "INVARIANT");
-    CollectedHeap::fill_with_object(_retained_filler);
-    if (_top < _hard_end) {
-      fill_region_with_block(MemRegion(_top, _hard_end), true);
-    }
-    HeapWord* next_hard_end = MIN2(_true_end, _hard_end + ChunkSizeInWords);
-    _retained_filler = MemRegion(_hard_end, FillerHeaderSize);
-    _bt.alloc_block(_retained_filler.start(), _retained_filler.word_size());
-    _top      = _retained_filler.end();
-    _hard_end = next_hard_end;
-    _end      = _hard_end - AlignmentReserve;
-    res       = ParGCAllocBuffer::allocate(word_sz);
-    if (res != NULL) {
-      _bt.alloc_block(res, word_sz);
-    }
-  }
-  return res;
-}
-
-void
-ParGCAllocBufferWithBOT::undo_allocation(HeapWord* obj, size_t word_sz) {
-  ParGCAllocBuffer::undo_allocation(obj, word_sz);
-  // This may back us up beyond the previous threshold, so reset.
-  _bt.set_region(MemRegion(_top, _hard_end));
-  _bt.initialize_threshold();
-}
-
-void ParGCAllocBufferWithBOT::retire(bool end_of_gc, bool retain) {
-  assert(!retain || end_of_gc, "Can only retain at GC end.");
-  if (_retained) {
-    // We're about to make the retained_filler into a block.
-    _bt.BlockOffsetArray::alloc_block(_retained_filler.start(),
-                                      _retained_filler.end());
-  }
-  // Reset _hard_end to _true_end (and update _end)
-  if (retain && _hard_end != NULL) {
-    assert(_hard_end <= _true_end, "Invariant.");
-    _hard_end = _true_end;
-    _end      = MAX2(_top, _hard_end - AlignmentReserve);
-    assert(_end <= _hard_end, "Invariant.");
-  }
-  _true_end = _hard_end;
-  HeapWord* pre_top = _top;
-
-  ParGCAllocBuffer::retire(end_of_gc, retain);
-  // Now any old _retained_filler is cut back to size, the free part is
-  // filled with a filler object, and top is past the header of that
-  // object.
-
-  if (retain && _top < _end) {
-    assert(end_of_gc && retain, "Or else retain should be false.");
-    // If the lab does not start on a card boundary, we don't want to
-    // allocate onto that card, since that might lead to concurrent
-    // allocation and card scanning, which we don't support.  So we fill
-    // the first card with a garbage object.
-    size_t first_card_index = _bsa->index_for(pre_top);
-    HeapWord* first_card_start = _bsa->address_for_index(first_card_index);
-    if (first_card_start < pre_top) {
-      HeapWord* second_card_start =
-        _bsa->inc_by_region_size(first_card_start);
-
-      // Ensure enough room to fill with the smallest block
-      second_card_start = MAX2(second_card_start, pre_top + AlignmentReserve);
-
-      // If the end is already in the first card, don't go beyond it!
-      // Or if the remainder is too small for a filler object, gobble it up.
-      if (_hard_end < second_card_start ||
-          pointer_delta(_hard_end, second_card_start) < AlignmentReserve) {
-        second_card_start = _hard_end;
-      }
-      if (pre_top < second_card_start) {
-        MemRegion first_card_suffix(pre_top, second_card_start);
-        fill_region_with_block(first_card_suffix, true);
-      }
-      pre_top = second_card_start;
-      _top = pre_top;
-      _end = MAX2(_top, _hard_end - AlignmentReserve);
-    }
-
-    // If the lab does not end on a card boundary, we don't want to
-    // allocate onto that card, since that might lead to concurrent
-    // allocation and card scanning, which we don't support.  So we fill
-    // the last card with a garbage object.
-    size_t last_card_index = _bsa->index_for(_hard_end);
-    HeapWord* last_card_start = _bsa->address_for_index(last_card_index);
-    if (last_card_start < _hard_end) {
-
-      // Ensure enough room to fill with the smallest block
-      last_card_start = MIN2(last_card_start, _hard_end - AlignmentReserve);
-
-      // If the top is already in the last card, don't go back beyond it!
-      // Or if the remainder is too small for a filler object, gobble it up.
-      if (_top > last_card_start ||
-          pointer_delta(last_card_start, _top) < AlignmentReserve) {
-        last_card_start = _top;
-      }
-      if (last_card_start < _hard_end) {
-        MemRegion last_card_prefix(last_card_start, _hard_end);
-        fill_region_with_block(last_card_prefix, false);
-      }
-      _hard_end = last_card_start;
-      _end      = MAX2(_top, _hard_end - AlignmentReserve);
-      _true_end = _hard_end;
-      assert(_end <= _hard_end, "Invariant.");
-    }
-
-    // At this point:
-    //   1) we had a filler object from the original top to hard_end.
-    //   2) We've filled in any partial cards at the front and back.
-    if (pre_top < _hard_end) {
-      // Now we can reset the _bt to do allocation in the given area.
-      MemRegion new_filler(pre_top, _hard_end);
-      fill_region_with_block(new_filler, false);
-      _top = pre_top + ParGCAllocBuffer::FillerHeaderSize;
-      // If there's no space left, don't retain.
-      if (_top >= _end) {
-        _retained = false;
-        invalidate();
-        return;
-      }
-      _retained_filler = MemRegion(pre_top, _top);
-      _bt.set_region(MemRegion(_top, _hard_end));
-      _bt.initialize_threshold();
-      assert(_bt.threshold() > _top, "initialize_threshold failed!");
-
-      // There may be other reasons for queries into the middle of the
-      // filler object.  When such queries are done in parallel with
-      // allocation, bad things can happen, if the query involves object
-      // iteration.  So we ensure that such queries do not involve object
-      // iteration, by putting another filler object on the boundaries of
-      // such queries.  One such is the object spanning a parallel card
-      // chunk boundary.
-
-      // "chunk_boundary" is the address of the first chunk boundary less
-      // than "hard_end".
-      HeapWord* chunk_boundary =
-        (HeapWord*)align_size_down(intptr_t(_hard_end-1), ChunkSizeInBytes);
-      assert(chunk_boundary < _hard_end, "Or else above did not work.");
-      assert(pointer_delta(_true_end, chunk_boundary) >= AlignmentReserve,
-             "Consequence of last card handling above.");
-
-      if (_top <= chunk_boundary) {
-        assert(_true_end == _hard_end, "Invariant.");
-        while (_top <= chunk_boundary) {
-          assert(pointer_delta(_hard_end, chunk_boundary) >= AlignmentReserve,
-                 "Consequence of last card handling above.");
-          _bt.BlockOffsetArray::alloc_block(chunk_boundary, _hard_end);
-          CollectedHeap::fill_with_object(chunk_boundary, _hard_end);
-          _hard_end = chunk_boundary;
-          chunk_boundary -= ChunkSizeInWords;
-        }
-        _end = _hard_end - AlignmentReserve;
-        assert(_top <= _end, "Invariant.");
-        // Now reset the initial filler chunk so it doesn't overlap with
-        // the one(s) inserted above.
-        MemRegion new_filler(pre_top, _hard_end);
-        fill_region_with_block(new_filler, false);
-      }
-    } else {
-      _retained = false;
-      invalidate();
-    }
-  } else {
-    assert(!end_of_gc ||
-           (!_retained && _true_end == _hard_end), "Checking.");
-  }
-  assert(_end <= _hard_end, "Invariant.");
-  assert(_top < _end || _top == _hard_end, "Invariant");
-}
--- a/src/share/vm/gc_implementation/parNew/parGCAllocBuffer.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,249 +0,0 @@
-/*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARGCALLOCBUFFER_HPP
-#define SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARGCALLOCBUFFER_HPP
-
-#include "memory/allocation.hpp"
-#include "memory/blockOffsetTable.hpp"
-#include "memory/threadLocalAllocBuffer.hpp"
-#include "utilities/globalDefinitions.hpp"
-
-// Forward decl.
-
-class PLABStats;
-
-// A per-thread allocation buffer used during GC.
-class ParGCAllocBuffer: public CHeapObj<mtGC> {
-protected:
-  char head[32];
-  size_t _word_sz;          // in HeapWord units
-  HeapWord* _bottom;
-  HeapWord* _top;
-  HeapWord* _end;       // last allocatable address + 1
-  HeapWord* _hard_end;  // _end + AlignmentReserve
-  bool      _retained;  // whether we hold a _retained_filler
-  MemRegion _retained_filler;
-  // In support of ergonomic sizing of PLAB's
-  size_t    _allocated;     // in HeapWord units
-  size_t    _wasted;        // in HeapWord units
-  char tail[32];
-  static size_t FillerHeaderSize;
-  static size_t AlignmentReserve;
-
-public:
-  // Initializes the buffer to be empty, but with the given "word_sz".
-  // Must get initialized with "set_buf" for an allocation to succeed.
-  ParGCAllocBuffer(size_t word_sz);
-
-  static const size_t min_size() {
-    return ThreadLocalAllocBuffer::min_size();
-  }
-
-  static const size_t max_size() {
-    return ThreadLocalAllocBuffer::max_size();
-  }
-
-  // If an allocation of the given "word_sz" can be satisfied within the
-  // buffer, do the allocation, returning a pointer to the start of the
-  // allocated block.  If the allocation request cannot be satisfied,
-  // return NULL.
-  HeapWord* allocate(size_t word_sz) {
-    HeapWord* res = _top;
-    if (pointer_delta(_end, _top) >= word_sz) {
-      _top = _top + word_sz;
-      return res;
-    } else {
-      return NULL;
-    }
-  }
-
-  // Undo the last allocation in the buffer, which is required to be of the
-  // "obj" of the given "word_sz".
-  void undo_allocation(HeapWord* obj, size_t word_sz) {
-    assert(pointer_delta(_top, _bottom) >= word_sz, "Bad undo");
-    assert(pointer_delta(_top, obj)     == word_sz, "Bad undo");
-    _top = obj;
-  }
-
-  // The total (word) size of the buffer, including both allocated and
-  // unallocted space.
-  size_t word_sz() { return _word_sz; }
-
-  // Should only be done if we are about to reset with a new buffer of the
-  // given size.
-  void set_word_size(size_t new_word_sz) {
-    assert(new_word_sz > AlignmentReserve, "Too small");
-    _word_sz = new_word_sz;
-  }
-
-  // The number of words of unallocated space remaining in the buffer.
-  size_t words_remaining() {
-    assert(_end >= _top, "Negative buffer");
-    return pointer_delta(_end, _top, HeapWordSize);
-  }
-
-  bool contains(void* addr) {
-    return (void*)_bottom <= addr && addr < (void*)_hard_end;
-  }
-
-  // Sets the space of the buffer to be [buf, space+word_sz()).
-  void set_buf(HeapWord* buf) {
-    _bottom   = buf;
-    _top      = _bottom;
-    _hard_end = _bottom + word_sz();
-    _end      = _hard_end - AlignmentReserve;
-    assert(_end >= _top, "Negative buffer");
-    // In support of ergonomic sizing
-    _allocated += word_sz();
-  }
-
-  // Flush the stats supporting ergonomic sizing of PLAB's
-  void flush_stats(PLABStats* stats);
-  void flush_stats_and_retire(PLABStats* stats, bool retain) {
-    // We flush the stats first in order to get a reading of
-    // unused space in the last buffer.
-    if (ResizePLAB) {
-      flush_stats(stats);
-    }
-    // Retire the last allocation buffer.
-    retire(true, retain);
-  }
-
-  // Force future allocations to fail and queries for contains()
-  // to return false
-  void invalidate() {
-    assert(!_retained, "Shouldn't retain an invalidated buffer.");
-    _end    = _hard_end;
-    _wasted += pointer_delta(_end, _top);  // unused  space
-    _top    = _end;      // force future allocations to fail
-    _bottom = _end;      // force future contains() queries to return false
-  }
-
-  // Fills in the unallocated portion of the buffer with a garbage object.
-  // If "end_of_gc" is TRUE, is after the last use in the GC.  IF "retain"
-  // is true, attempt to re-use the unused portion in the next GC.
-  void retire(bool end_of_gc, bool retain);
-
-  void print() PRODUCT_RETURN;
-};
-
-// PLAB stats book-keeping
-class PLABStats VALUE_OBJ_CLASS_SPEC {
-  size_t _allocated;      // total allocated
-  size_t _wasted;         // of which wasted (internal fragmentation)
-  size_t _unused;         // Unused in last buffer
-  size_t _used;           // derived = allocated - wasted - unused
-  size_t _desired_plab_sz;// output of filter (below), suitably trimmed and quantized
-  AdaptiveWeightedAverage
-         _filter;         // integrator with decay
-
- public:
-  PLABStats(size_t desired_plab_sz_, unsigned wt) :
-    _allocated(0),
-    _wasted(0),
-    _unused(0),
-    _used(0),
-    _desired_plab_sz(desired_plab_sz_),
-    _filter(wt)
-  {
-    size_t min_sz = min_size();
-    size_t max_sz = max_size();
-    size_t aligned_min_sz = align_object_size(min_sz);
-    size_t aligned_max_sz = align_object_size(max_sz);
-    assert(min_sz <= aligned_min_sz && max_sz >= aligned_max_sz &&
-           min_sz <= max_sz,
-           "PLAB clipping computation in adjust_desired_plab_sz()"
-           " may be incorrect");
-  }
-
-  static const size_t min_size() {
-    return ParGCAllocBuffer::min_size();
-  }
-
-  static const size_t max_size() {
-    return ParGCAllocBuffer::max_size();
-  }
-
-  size_t desired_plab_sz() {
-    return _desired_plab_sz;
-  }
-
-  void adjust_desired_plab_sz(); // filter computation, latches output to
-                                 // _desired_plab_sz, clears sensor accumulators
-
-  void add_allocated(size_t v) {
-    Atomic::add_ptr(v, &_allocated);
-  }
-
-  void add_unused(size_t v) {
-    Atomic::add_ptr(v, &_unused);
-  }
-
-  void add_wasted(size_t v) {
-    Atomic::add_ptr(v, &_wasted);
-  }
-};
-
-class ParGCAllocBufferWithBOT: public ParGCAllocBuffer {
-  BlockOffsetArrayContigSpace _bt;
-  BlockOffsetSharedArray*     _bsa;
-  HeapWord*                   _true_end;  // end of the whole ParGCAllocBuffer
-
-  static const size_t ChunkSizeInWords;
-  static const size_t ChunkSizeInBytes;
-  HeapWord* allocate_slow(size_t word_sz);
-
-  void fill_region_with_block(MemRegion mr, bool contig);
-
-public:
-  ParGCAllocBufferWithBOT(size_t word_sz, BlockOffsetSharedArray* bsa);
-
-  HeapWord* allocate(size_t word_sz) {
-    HeapWord* res = ParGCAllocBuffer::allocate(word_sz);
-    if (res != NULL) {
-      _bt.alloc_block(res, word_sz);
-    } else {
-      res = allocate_slow(word_sz);
-    }
-    return res;
-  }
-
-  void undo_allocation(HeapWord* obj, size_t word_sz);
-
-  void set_buf(HeapWord* buf_start) {
-    ParGCAllocBuffer::set_buf(buf_start);
-    _true_end = _hard_end;
-    _bt.set_region(MemRegion(buf_start, word_sz()));
-    _bt.initialize_threshold();
-  }
-
-  void retire(bool end_of_gc, bool retain);
-
-  MemRegion range() {
-    return MemRegion(_top, _true_end);
-  }
-};
-
-#endif // SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARGCALLOCBUFFER_HPP
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -24,11 +24,11 @@
 
 #include "precompiled.hpp"
 #include "gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp"
-#include "gc_implementation/parNew/parGCAllocBuffer.hpp"
 #include "gc_implementation/parNew/parNewGeneration.hpp"
 #include "gc_implementation/parNew/parOopClosures.inline.hpp"
 #include "gc_implementation/shared/adaptiveSizePolicy.hpp"
 #include "gc_implementation/shared/ageTable.hpp"
+#include "gc_implementation/shared/parGCAllocBuffer.hpp"
 #include "gc_implementation/shared/spaceDecorator.hpp"
 #include "memory/defNewGeneration.inline.hpp"
 #include "memory/genCollectedHeap.hpp"
@@ -453,7 +453,8 @@
     // retire the last buffer.
     par_scan_state.to_space_alloc_buffer()->
       flush_stats_and_retire(_gen.plab_stats(),
-                             false /* !retain */);
+                             true /* end_of_gc */,
+                             false /* retain */);
 
     // Every thread has its own age table.  We need to merge
     // them all into one.
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,7 +25,7 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARNEWGENERATION_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARNEWGENERATION_HPP
 
-#include "gc_implementation/parNew/parGCAllocBuffer.hpp"
+#include "gc_implementation/shared/parGCAllocBuffer.hpp"
 #include "memory/defNewGeneration.hpp"
 #include "utilities/taskqueue.hpp"
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/shared/parGCAllocBuffer.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -0,0 +1,342 @@
+/*
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc_implementation/shared/parGCAllocBuffer.hpp"
+#include "memory/sharedHeap.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/oop.inline.hpp"
+
+ParGCAllocBuffer::ParGCAllocBuffer(size_t desired_plab_sz_) :
+  _word_sz(desired_plab_sz_), _bottom(NULL), _top(NULL),
+  _end(NULL), _hard_end(NULL),
+  _retained(false), _retained_filler(),
+  _allocated(0), _wasted(0)
+{
+  assert (min_size() > AlignmentReserve, "Inconsistency!");
+  // arrayOopDesc::header_size depends on command line initialization.
+  FillerHeaderSize = align_object_size(arrayOopDesc::header_size(T_INT));
+  AlignmentReserve = oopDesc::header_size() > MinObjAlignment ? FillerHeaderSize : 0;
+}
+
+size_t ParGCAllocBuffer::FillerHeaderSize;
+
+// If the minimum object size is greater than MinObjAlignment, we can
+// end up with a shard at the end of the buffer that's smaller than
+// the smallest object.  We can't allow that because the buffer must
+// look like it's full of objects when we retire it, so we make
+// sure we have enough space for a filler int array object.
+size_t ParGCAllocBuffer::AlignmentReserve;
+
+void ParGCAllocBuffer::retire(bool end_of_gc, bool retain) {
+  assert(!retain || end_of_gc, "Can only retain at GC end.");
+  if (_retained) {
+    // If the buffer had been retained shorten the previous filler object.
+    assert(_retained_filler.end() <= _top, "INVARIANT");
+    CollectedHeap::fill_with_object(_retained_filler);
+    // Wasted space book-keeping, otherwise (normally) done in invalidate()
+    _wasted += _retained_filler.word_size();
+    _retained = false;
+  }
+  assert(!end_of_gc || !_retained, "At this point, end_of_gc ==> !_retained.");
+  if (_top < _hard_end) {
+    CollectedHeap::fill_with_object(_top, _hard_end);
+    if (!retain) {
+      invalidate();
+    } else {
+      // Is there wasted space we'd like to retain for the next GC?
+      if (pointer_delta(_end, _top) > FillerHeaderSize) {
+        _retained = true;
+        _retained_filler = MemRegion(_top, FillerHeaderSize);
+        _top = _top + FillerHeaderSize;
+      } else {
+        invalidate();
+      }
+    }
+  }
+}
+
+void ParGCAllocBuffer::flush_stats(PLABStats* stats) {
+  assert(ResizePLAB, "Wasted work");
+  stats->add_allocated(_allocated);
+  stats->add_wasted(_wasted);
+  stats->add_unused(pointer_delta(_end, _top));
+}
+
+// Compute desired plab size and latch result for later
+// use. This should be called once at the end of parallel
+// scavenge; it clears the sensor accumulators.
+void PLABStats::adjust_desired_plab_sz() {
+  assert(ResizePLAB, "Not set");
+  if (_allocated == 0) {
+    assert(_unused == 0, "Inconsistency in PLAB stats");
+    _allocated = 1;
+  }
+  double wasted_frac    = (double)_unused/(double)_allocated;
+  size_t target_refills = (size_t)((wasted_frac*TargetSurvivorRatio)/
+                                   TargetPLABWastePct);
+  if (target_refills == 0) {
+    target_refills = 1;
+  }
+  _used = _allocated - _wasted - _unused;
+  size_t plab_sz = _used/(target_refills*ParallelGCThreads);
+  if (PrintPLAB) gclog_or_tty->print(" (plab_sz = %d ", plab_sz);
+  // Take historical weighted average
+  _filter.sample(plab_sz);
+  // Clip from above and below, and align to object boundary
+  plab_sz = MAX2(min_size(), (size_t)_filter.average());
+  plab_sz = MIN2(max_size(), plab_sz);
+  plab_sz = align_object_size(plab_sz);
+  // Latch the result
+  if (PrintPLAB) gclog_or_tty->print(" desired_plab_sz = %d) ", plab_sz);
+  _desired_plab_sz = plab_sz;
+  // Now clear the accumulators for next round:
+  // note this needs to be fixed in the case where we
+  // are retaining across scavenges. FIX ME !!! XXX
+  _allocated = 0;
+  _wasted    = 0;
+  _unused    = 0;
+}
+
+#ifndef PRODUCT
+void ParGCAllocBuffer::print() {
+  gclog_or_tty->print("parGCAllocBuffer: _bottom: %p  _top: %p  _end: %p  _hard_end: %p"
+             "_retained: %c _retained_filler: [%p,%p)\n",
+             _bottom, _top, _end, _hard_end,
+             "FT"[_retained], _retained_filler.start(), _retained_filler.end());
+}
+#endif // !PRODUCT
+
+const size_t ParGCAllocBufferWithBOT::ChunkSizeInWords =
+MIN2(CardTableModRefBS::par_chunk_heapword_alignment(),
+     ((size_t)Generation::GenGrain)/HeapWordSize);
+const size_t ParGCAllocBufferWithBOT::ChunkSizeInBytes =
+MIN2(CardTableModRefBS::par_chunk_heapword_alignment() * HeapWordSize,
+     (size_t)Generation::GenGrain);
+
+ParGCAllocBufferWithBOT::ParGCAllocBufferWithBOT(size_t word_sz,
+                                                 BlockOffsetSharedArray* bsa) :
+  ParGCAllocBuffer(word_sz),
+  _bsa(bsa),
+  _bt(bsa, MemRegion(_bottom, _hard_end)),
+  _true_end(_hard_end)
+{}
+
+// The buffer comes with its own BOT, with a shared (obviously) underlying
+// BlockOffsetSharedArray. We manipulate this BOT in the normal way
+// as we would for any contiguous space. However, on accasion we
+// need to do some buffer surgery at the extremities before we
+// start using the body of the buffer for allocations. Such surgery
+// (as explained elsewhere) is to prevent allocation on a card that
+// is in the process of being walked concurrently by another GC thread.
+// When such surgery happens at a point that is far removed (to the
+// right of the current allocation point, top), we use the "contig"
+// parameter below to directly manipulate the shared array without
+// modifying the _next_threshold state in the BOT.
+void ParGCAllocBufferWithBOT::fill_region_with_block(MemRegion mr,
+                                                     bool contig) {
+  CollectedHeap::fill_with_object(mr);
+  if (contig) {
+    _bt.alloc_block(mr.start(), mr.end());
+  } else {
+    _bt.BlockOffsetArray::alloc_block(mr.start(), mr.end());
+  }
+}
+
+HeapWord* ParGCAllocBufferWithBOT::allocate_slow(size_t word_sz) {
+  HeapWord* res = NULL;
+  if (_true_end > _hard_end) {
+    assert((HeapWord*)align_size_down(intptr_t(_hard_end),
+                                      ChunkSizeInBytes) == _hard_end,
+           "or else _true_end should be equal to _hard_end");
+    assert(_retained, "or else _true_end should be equal to _hard_end");
+    assert(_retained_filler.end() <= _top, "INVARIANT");
+    CollectedHeap::fill_with_object(_retained_filler);
+    if (_top < _hard_end) {
+      fill_region_with_block(MemRegion(_top, _hard_end), true);
+    }
+    HeapWord* next_hard_end = MIN2(_true_end, _hard_end + ChunkSizeInWords);
+    _retained_filler = MemRegion(_hard_end, FillerHeaderSize);
+    _bt.alloc_block(_retained_filler.start(), _retained_filler.word_size());
+    _top      = _retained_filler.end();
+    _hard_end = next_hard_end;
+    _end      = _hard_end - AlignmentReserve;
+    res       = ParGCAllocBuffer::allocate(word_sz);
+    if (res != NULL) {
+      _bt.alloc_block(res, word_sz);
+    }
+  }
+  return res;
+}
+
+void
+ParGCAllocBufferWithBOT::undo_allocation(HeapWord* obj, size_t word_sz) {
+  ParGCAllocBuffer::undo_allocation(obj, word_sz);
+  // This may back us up beyond the previous threshold, so reset.
+  _bt.set_region(MemRegion(_top, _hard_end));
+  _bt.initialize_threshold();
+}
+
+void ParGCAllocBufferWithBOT::retire(bool end_of_gc, bool retain) {
+  assert(!retain || end_of_gc, "Can only retain at GC end.");
+  if (_retained) {
+    // We're about to make the retained_filler into a block.
+    _bt.BlockOffsetArray::alloc_block(_retained_filler.start(),
+                                      _retained_filler.end());
+  }
+  // Reset _hard_end to _true_end (and update _end)
+  if (retain && _hard_end != NULL) {
+    assert(_hard_end <= _true_end, "Invariant.");
+    _hard_end = _true_end;
+    _end      = MAX2(_top, _hard_end - AlignmentReserve);
+    assert(_end <= _hard_end, "Invariant.");
+  }
+  _true_end = _hard_end;
+  HeapWord* pre_top = _top;
+
+  ParGCAllocBuffer::retire(end_of_gc, retain);
+  // Now any old _retained_filler is cut back to size, the free part is
+  // filled with a filler object, and top is past the header of that
+  // object.
+
+  if (retain && _top < _end) {
+    assert(end_of_gc && retain, "Or else retain should be false.");
+    // If the lab does not start on a card boundary, we don't want to
+    // allocate onto that card, since that might lead to concurrent
+    // allocation and card scanning, which we don't support.  So we fill
+    // the first card with a garbage object.
+    size_t first_card_index = _bsa->index_for(pre_top);
+    HeapWord* first_card_start = _bsa->address_for_index(first_card_index);
+    if (first_card_start < pre_top) {
+      HeapWord* second_card_start =
+        _bsa->inc_by_region_size(first_card_start);
+
+      // Ensure enough room to fill with the smallest block
+      second_card_start = MAX2(second_card_start, pre_top + AlignmentReserve);
+
+      // If the end is already in the first card, don't go beyond it!
+      // Or if the remainder is too small for a filler object, gobble it up.
+      if (_hard_end < second_card_start ||
+          pointer_delta(_hard_end, second_card_start) < AlignmentReserve) {
+        second_card_start = _hard_end;
+      }
+      if (pre_top < second_card_start) {
+        MemRegion first_card_suffix(pre_top, second_card_start);
+        fill_region_with_block(first_card_suffix, true);
+      }
+      pre_top = second_card_start;
+      _top = pre_top;
+      _end = MAX2(_top, _hard_end - AlignmentReserve);
+    }
+
+    // If the lab does not end on a card boundary, we don't want to
+    // allocate onto that card, since that might lead to concurrent
+    // allocation and card scanning, which we don't support.  So we fill
+    // the last card with a garbage object.
+    size_t last_card_index = _bsa->index_for(_hard_end);
+    HeapWord* last_card_start = _bsa->address_for_index(last_card_index);
+    if (last_card_start < _hard_end) {
+
+      // Ensure enough room to fill with the smallest block
+      last_card_start = MIN2(last_card_start, _hard_end - AlignmentReserve);
+
+      // If the top is already in the last card, don't go back beyond it!
+      // Or if the remainder is too small for a filler object, gobble it up.
+      if (_top > last_card_start ||
+          pointer_delta(last_card_start, _top) < AlignmentReserve) {
+        last_card_start = _top;
+      }
+      if (last_card_start < _hard_end) {
+        MemRegion last_card_prefix(last_card_start, _hard_end);
+        fill_region_with_block(last_card_prefix, false);
+      }
+      _hard_end = last_card_start;
+      _end      = MAX2(_top, _hard_end - AlignmentReserve);
+      _true_end = _hard_end;
+      assert(_end <= _hard_end, "Invariant.");
+    }
+
+    // At this point:
+    //   1) we had a filler object from the original top to hard_end.
+    //   2) We've filled in any partial cards at the front and back.
+    if (pre_top < _hard_end) {
+      // Now we can reset the _bt to do allocation in the given area.
+      MemRegion new_filler(pre_top, _hard_end);
+      fill_region_with_block(new_filler, false);
+      _top = pre_top + ParGCAllocBuffer::FillerHeaderSize;
+      // If there's no space left, don't retain.
+      if (_top >= _end) {
+        _retained = false;
+        invalidate();
+        return;
+      }
+      _retained_filler = MemRegion(pre_top, _top);
+      _bt.set_region(MemRegion(_top, _hard_end));
+      _bt.initialize_threshold();
+      assert(_bt.threshold() > _top, "initialize_threshold failed!");
+
+      // There may be other reasons for queries into the middle of the
+      // filler object.  When such queries are done in parallel with
+      // allocation, bad things can happen, if the query involves object
+      // iteration.  So we ensure that such queries do not involve object
+      // iteration, by putting another filler object on the boundaries of
+      // such queries.  One such is the object spanning a parallel card
+      // chunk boundary.
+
+      // "chunk_boundary" is the address of the first chunk boundary less
+      // than "hard_end".
+      HeapWord* chunk_boundary =
+        (HeapWord*)align_size_down(intptr_t(_hard_end-1), ChunkSizeInBytes);
+      assert(chunk_boundary < _hard_end, "Or else above did not work.");
+      assert(pointer_delta(_true_end, chunk_boundary) >= AlignmentReserve,
+             "Consequence of last card handling above.");
+
+      if (_top <= chunk_boundary) {
+        assert(_true_end == _hard_end, "Invariant.");
+        while (_top <= chunk_boundary) {
+          assert(pointer_delta(_hard_end, chunk_boundary) >= AlignmentReserve,
+                 "Consequence of last card handling above.");
+          _bt.BlockOffsetArray::alloc_block(chunk_boundary, _hard_end);
+          CollectedHeap::fill_with_object(chunk_boundary, _hard_end);
+          _hard_end = chunk_boundary;
+          chunk_boundary -= ChunkSizeInWords;
+        }
+        _end = _hard_end - AlignmentReserve;
+        assert(_top <= _end, "Invariant.");
+        // Now reset the initial filler chunk so it doesn't overlap with
+        // the one(s) inserted above.
+        MemRegion new_filler(pre_top, _hard_end);
+        fill_region_with_block(new_filler, false);
+      }
+    } else {
+      _retained = false;
+      invalidate();
+    }
+  } else {
+    assert(!end_of_gc ||
+           (!_retained && _true_end == _hard_end), "Checking.");
+  }
+  assert(_end <= _hard_end, "Invariant.");
+  assert(_top < _end || _top == _hard_end, "Invariant");
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/shared/parGCAllocBuffer.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -0,0 +1,249 @@
+/*
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARGCALLOCBUFFER_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARGCALLOCBUFFER_HPP
+
+#include "memory/allocation.hpp"
+#include "memory/blockOffsetTable.hpp"
+#include "memory/threadLocalAllocBuffer.hpp"
+#include "utilities/globalDefinitions.hpp"
+
+// Forward decl.
+
+class PLABStats;
+
+// A per-thread allocation buffer used during GC.
+class ParGCAllocBuffer: public CHeapObj<mtGC> {
+protected:
+  char head[32];
+  size_t _word_sz;          // in HeapWord units
+  HeapWord* _bottom;
+  HeapWord* _top;
+  HeapWord* _end;       // last allocatable address + 1
+  HeapWord* _hard_end;  // _end + AlignmentReserve
+  bool      _retained;  // whether we hold a _retained_filler
+  MemRegion _retained_filler;
+  // In support of ergonomic sizing of PLAB's
+  size_t    _allocated;     // in HeapWord units
+  size_t    _wasted;        // in HeapWord units
+  char tail[32];
+  static size_t FillerHeaderSize;
+  static size_t AlignmentReserve;
+
+public:
+  // Initializes the buffer to be empty, but with the given "word_sz".
+  // Must get initialized with "set_buf" for an allocation to succeed.
+  ParGCAllocBuffer(size_t word_sz);
+
+  static const size_t min_size() {
+    return ThreadLocalAllocBuffer::min_size();
+  }
+
+  static const size_t max_size() {
+    return ThreadLocalAllocBuffer::max_size();
+  }
+
+  // If an allocation of the given "word_sz" can be satisfied within the
+  // buffer, do the allocation, returning a pointer to the start of the
+  // allocated block.  If the allocation request cannot be satisfied,
+  // return NULL.
+  HeapWord* allocate(size_t word_sz) {
+    HeapWord* res = _top;
+    if (pointer_delta(_end, _top) >= word_sz) {
+      _top = _top + word_sz;
+      return res;
+    } else {
+      return NULL;
+    }
+  }
+
+  // Undo the last allocation in the buffer, which is required to be of the
+  // "obj" of the given "word_sz".
+  void undo_allocation(HeapWord* obj, size_t word_sz) {
+    assert(pointer_delta(_top, _bottom) >= word_sz, "Bad undo");
+    assert(pointer_delta(_top, obj)     == word_sz, "Bad undo");
+    _top = obj;
+  }
+
+  // The total (word) size of the buffer, including both allocated and
+  // unallocted space.
+  size_t word_sz() { return _word_sz; }
+
+  // Should only be done if we are about to reset with a new buffer of the
+  // given size.
+  void set_word_size(size_t new_word_sz) {
+    assert(new_word_sz > AlignmentReserve, "Too small");
+    _word_sz = new_word_sz;
+  }
+
+  // The number of words of unallocated space remaining in the buffer.
+  size_t words_remaining() {
+    assert(_end >= _top, "Negative buffer");
+    return pointer_delta(_end, _top, HeapWordSize);
+  }
+
+  bool contains(void* addr) {
+    return (void*)_bottom <= addr && addr < (void*)_hard_end;
+  }
+
+  // Sets the space of the buffer to be [buf, space+word_sz()).
+  void set_buf(HeapWord* buf) {
+    _bottom   = buf;
+    _top      = _bottom;
+    _hard_end = _bottom + word_sz();
+    _end      = _hard_end - AlignmentReserve;
+    assert(_end >= _top, "Negative buffer");
+    // In support of ergonomic sizing
+    _allocated += word_sz();
+  }
+
+  // Flush the stats supporting ergonomic sizing of PLAB's
+  void flush_stats(PLABStats* stats);
+  void flush_stats_and_retire(PLABStats* stats, bool end_of_gc, bool retain) {
+    // We flush the stats first in order to get a reading of
+    // unused space in the last buffer.
+    if (ResizePLAB) {
+      flush_stats(stats);
+    }
+    // Retire the last allocation buffer.
+    retire(end_of_gc, retain);
+  }
+
+  // Force future allocations to fail and queries for contains()
+  // to return false
+  void invalidate() {
+    assert(!_retained, "Shouldn't retain an invalidated buffer.");
+    _end    = _hard_end;
+    _wasted += pointer_delta(_end, _top);  // unused  space
+    _top    = _end;      // force future allocations to fail
+    _bottom = _end;      // force future contains() queries to return false
+  }
+
+  // Fills in the unallocated portion of the buffer with a garbage object.
+  // If "end_of_gc" is TRUE, is after the last use in the GC.  IF "retain"
+  // is true, attempt to re-use the unused portion in the next GC.
+  void retire(bool end_of_gc, bool retain);
+
+  void print() PRODUCT_RETURN;
+};
+
+// PLAB stats book-keeping
+class PLABStats VALUE_OBJ_CLASS_SPEC {
+  size_t _allocated;      // total allocated
+  size_t _wasted;         // of which wasted (internal fragmentation)
+  size_t _unused;         // Unused in last buffer
+  size_t _used;           // derived = allocated - wasted - unused
+  size_t _desired_plab_sz;// output of filter (below), suitably trimmed and quantized
+  AdaptiveWeightedAverage
+         _filter;         // integrator with decay
+
+ public:
+  PLABStats(size_t desired_plab_sz_, unsigned wt) :
+    _allocated(0),
+    _wasted(0),
+    _unused(0),
+    _used(0),
+    _desired_plab_sz(desired_plab_sz_),
+    _filter(wt)
+  {
+    size_t min_sz = min_size();
+    size_t max_sz = max_size();
+    size_t aligned_min_sz = align_object_size(min_sz);
+    size_t aligned_max_sz = align_object_size(max_sz);
+    assert(min_sz <= aligned_min_sz && max_sz >= aligned_max_sz &&
+           min_sz <= max_sz,
+           "PLAB clipping computation in adjust_desired_plab_sz()"
+           " may be incorrect");
+  }
+
+  static const size_t min_size() {
+    return ParGCAllocBuffer::min_size();
+  }
+
+  static const size_t max_size() {
+    return ParGCAllocBuffer::max_size();
+  }
+
+  size_t desired_plab_sz() {
+    return _desired_plab_sz;
+  }
+
+  void adjust_desired_plab_sz(); // filter computation, latches output to
+                                 // _desired_plab_sz, clears sensor accumulators
+
+  void add_allocated(size_t v) {
+    Atomic::add_ptr(v, &_allocated);
+  }
+
+  void add_unused(size_t v) {
+    Atomic::add_ptr(v, &_unused);
+  }
+
+  void add_wasted(size_t v) {
+    Atomic::add_ptr(v, &_wasted);
+  }
+};
+
+class ParGCAllocBufferWithBOT: public ParGCAllocBuffer {
+  BlockOffsetArrayContigSpace _bt;
+  BlockOffsetSharedArray*     _bsa;
+  HeapWord*                   _true_end;  // end of the whole ParGCAllocBuffer
+
+  static const size_t ChunkSizeInWords;
+  static const size_t ChunkSizeInBytes;
+  HeapWord* allocate_slow(size_t word_sz);
+
+  void fill_region_with_block(MemRegion mr, bool contig);
+
+public:
+  ParGCAllocBufferWithBOT(size_t word_sz, BlockOffsetSharedArray* bsa);
+
+  HeapWord* allocate(size_t word_sz) {
+    HeapWord* res = ParGCAllocBuffer::allocate(word_sz);
+    if (res != NULL) {
+      _bt.alloc_block(res, word_sz);
+    } else {
+      res = allocate_slow(word_sz);
+    }
+    return res;
+  }
+
+  void undo_allocation(HeapWord* obj, size_t word_sz);
+
+  void set_buf(HeapWord* buf_start) {
+    ParGCAllocBuffer::set_buf(buf_start);
+    _true_end = _hard_end;
+    _bt.set_region(MemRegion(buf_start, word_sz()));
+    _bt.initialize_threshold();
+  }
+
+  void retire(bool end_of_gc, bool retain);
+
+  MemRegion range() {
+    return MemRegion(_top, _true_end);
+  }
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARGCALLOCBUFFER_HPP
--- a/src/share/vm/memory/tenuredGeneration.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/memory/tenuredGeneration.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -23,8 +23,8 @@
  */
 
 #include "precompiled.hpp"
-#include "gc_implementation/parNew/parGCAllocBuffer.hpp"
 #include "gc_implementation/shared/collectorCounters.hpp"
+#include "gc_implementation/shared/parGCAllocBuffer.hpp"
 #include "memory/allocation.inline.hpp"
 #include "memory/blockOffsetTable.inline.hpp"
 #include "memory/generation.inline.hpp"
--- a/src/share/vm/opto/callGenerator.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/opto/callGenerator.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -158,74 +158,6 @@
   return kit.transfer_exceptions_into_jvms();
 }
 
-//---------------------------DynamicCallGenerator-----------------------------
-// Internal class which handles all out-of-line invokedynamic calls.
-class DynamicCallGenerator : public CallGenerator {
-public:
-  DynamicCallGenerator(ciMethod* method)
-    : CallGenerator(method)
-  {
-  }
-  virtual JVMState* generate(JVMState* jvms);
-};
-
-JVMState* DynamicCallGenerator::generate(JVMState* jvms) {
-  GraphKit kit(jvms);
-  Compile* C = kit.C;
-  PhaseGVN& gvn = kit.gvn();
-
-  if (C->log() != NULL) {
-    C->log()->elem("dynamic_call bci='%d'", jvms->bci());
-  }
-
-  // Get the constant pool cache from the caller class.
-  ciMethod* caller_method = jvms->method();
-  ciBytecodeStream str(caller_method);
-  str.force_bci(jvms->bci());  // Set the stream to the invokedynamic bci.
-  assert(str.cur_bc() == Bytecodes::_invokedynamic, "wrong place to issue a dynamic call!");
-  ciCPCache* cpcache = str.get_cpcache();
-
-  // Get the offset of the CallSite from the constant pool cache
-  // pointer.
-  int index = str.get_method_index();
-  size_t call_site_offset = cpcache->get_f1_offset(index);
-
-  // Load the CallSite object from the constant pool cache.
-  const TypeOopPtr* cpcache_type   = TypeOopPtr::make_from_constant(cpcache);  // returns TypeAryPtr of type T_OBJECT
-  const TypeOopPtr* call_site_type = TypeOopPtr::make_from_klass(C->env()->CallSite_klass());
-  Node* cpcache_adr   = kit.makecon(cpcache_type);
-  Node* call_site_adr = kit.basic_plus_adr(cpcache_adr, call_site_offset);
-  // The oops in the constant pool cache are not compressed; load then as raw pointers.
-  Node* call_site     = kit.make_load(kit.control(), call_site_adr, call_site_type, T_ADDRESS, Compile::AliasIdxRaw);
-
-  // Load the target MethodHandle from the CallSite object.
-  const TypeOopPtr* target_type = TypeOopPtr::make_from_klass(C->env()->MethodHandle_klass());
-  Node* target_mh_adr = kit.basic_plus_adr(call_site, java_lang_invoke_CallSite::target_offset_in_bytes());
-  Node* target_mh     = kit.make_load(kit.control(), target_mh_adr, target_type, T_OBJECT);
-
-  address resolve_stub = SharedRuntime::get_resolve_opt_virtual_call_stub();
-
-  CallStaticJavaNode* call = new (C, tf()->domain()->cnt()) CallStaticJavaNode(tf(), resolve_stub, method(), kit.bci());
-  // invokedynamic is treated as an optimized invokevirtual.
-  call->set_optimized_virtual(true);
-  // Take extra care (in the presence of argument motion) not to trash the SP:
-  call->set_method_handle_invoke(true);
-
-  // Pass the target MethodHandle as first argument and shift the
-  // other arguments.
-  call->init_req(0 + TypeFunc::Parms, target_mh);
-  uint nargs = call->method()->arg_size();
-  for (uint i = 1; i < nargs; i++) {
-    Node* arg = kit.argument(i - 1);
-    call->init_req(i + TypeFunc::Parms, arg);
-  }
-
-  kit.set_edges_for_java_call(call);
-  Node* ret = kit.set_results_for_java_call(call);
-  kit.push_node(method()->return_type()->basic_type(), ret);
-  return kit.transfer_exceptions_into_jvms();
-}
-
 //--------------------------VirtualCallGenerator------------------------------
 // Internal class which handles all out-of-line calls checking receiver type.
 class VirtualCallGenerator : public CallGenerator {
@@ -328,12 +260,6 @@
   return new VirtualCallGenerator(m, vtable_index);
 }
 
-CallGenerator* CallGenerator::for_dynamic_call(ciMethod* m) {
-  assert(m->is_compiled_lambda_form(), "for_dynamic_call mismatch");
-  //@@ FIXME: this should be done via a direct call
-  return new DynamicCallGenerator(m);
-}
-
 // Allow inlining decisions to be delayed
 class LateInlineCallGenerator : public DirectCallGenerator {
   CallGenerator* _inline_cg;
@@ -347,7 +273,7 @@
   // Convert the CallStaticJava into an inline
   virtual void do_late_inline();
 
-  JVMState* generate(JVMState* jvms) {
+  virtual JVMState* generate(JVMState* jvms) {
     // Record that this call site should be revisited once the main
     // parse is finished.
     Compile::current()->add_late_inline(this);
--- a/src/share/vm/opto/chaitin.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/opto/chaitin.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -484,24 +484,33 @@
     if (_names[i]) {           // Live range associated with Node?
       LRG &lrg = lrgs(_names[i]);
       if (!lrg.alive()) {
-        _node_regs[i].set_bad();
+        set_bad(i);
       } else if (lrg.num_regs() == 1) {
-        _node_regs[i].set1(lrg.reg());
-      } else {                  // Must be a register-pair
-        if (!lrg._fat_proj) {   // Must be aligned adjacent register pair
+        set1(i, lrg.reg());
+      } else {                  // Must be a register-set
+        if (!lrg._fat_proj) {   // Must be aligned adjacent register set
           // Live ranges record the highest register in their mask.
           // We want the low register for the AD file writer's convenience.
-          _node_regs[i].set2( OptoReg::add(lrg.reg(),(1-lrg.num_regs())) );
+          OptoReg::Name hi = lrg.reg(); // Get hi register
+          OptoReg::Name lo = OptoReg::add(hi, (1-lrg.num_regs())); // Find lo
+          // We have to use pair [lo,lo+1] even for wide vectors because
+          // the rest of code generation works only with pairs. It is safe
+          // since for registers encoding only 'lo' is used.
+          // Second reg from pair is used in ScheduleAndBundle on SPARC where
+          // vector max size is 8 which corresponds to registers pair.
+          // It is also used in BuildOopMaps but oop operations are not
+          // vectorized.
+          set2(i, lo);
         } else {                // Misaligned; extract 2 bits
           OptoReg::Name hi = lrg.reg(); // Get hi register
           lrg.Remove(hi);       // Yank from mask
           int lo = lrg.mask().find_first_elem(); // Find lo
-          _node_regs[i].set_pair( hi, lo );
+          set_pair(i, hi, lo);
         }
       }
       if( lrg._is_oop ) _node_oops.set(i);
     } else {
-      _node_regs[i].set_bad();
+      set_bad(i);
     }
   }
 
@@ -1121,6 +1130,33 @@
 
 }
 
+//------------------------------is_legal_reg-----------------------------------
+// Is 'reg' register legal for 'lrg'?
+static bool is_legal_reg(LRG &lrg, OptoReg::Name reg, int chunk) {
+  if (reg >= chunk && reg < (chunk + RegMask::CHUNK_SIZE) &&
+      lrg.mask().Member(OptoReg::add(reg,-chunk))) {
+    // RA uses OptoReg which represent the highest element of a registers set.
+    // For example, vectorX (128bit) on x86 uses [XMM,XMMb,XMMc,XMMd] set
+    // in which XMMd is used by RA to represent such vectors. A double value
+    // uses [XMM,XMMb] pairs and XMMb is used by RA for it.
+    // The register mask uses largest bits set of overlapping register sets.
+    // On x86 with AVX it uses 8 bits for each XMM registers set.
+    //
+    // The 'lrg' already has cleared-to-set register mask (done in Select()
+    // before calling choose_color()). Passing mask.Member(reg) check above
+    // indicates that the size (num_regs) of 'reg' set is less or equal to
+    // 'lrg' set size.
+    // For set size 1 any register which is member of 'lrg' mask is legal.
+    if (lrg.num_regs()==1)
+      return true;
+    // For larger sets only an aligned register with the same set size is legal.
+    int mask = lrg.num_regs()-1;
+    if ((reg&mask) == mask)
+      return true;
+  }
+  return false;
+}
+
 //------------------------------bias_color-------------------------------------
 // Choose a color using the biasing heuristic
 OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
@@ -1137,10 +1173,7 @@
     while ((datum = elements.next()) != 0) {
       OptoReg::Name reg = lrgs(datum).reg();
       // If this LRG's register is legal for us, choose it
-      if( reg >= chunk && reg < chunk + RegMask::CHUNK_SIZE &&
-          lrg.mask().Member(OptoReg::add(reg,-chunk)) &&
-          (lrg.num_regs()==1 || // either size 1
-           (reg&1) == 1) )      // or aligned (adjacent reg is available since we already cleared-to-pairs)
+      if (is_legal_reg(lrg, reg, chunk))
         return reg;
     }
   }
@@ -1151,10 +1184,7 @@
     if( !(*(_ifg->_yanked))[copy_lrg] ) {
       OptoReg::Name reg = lrgs(copy_lrg).reg();
       //  And it is legal for you,
-      if( reg >= chunk && reg < chunk + RegMask::CHUNK_SIZE &&
-          lrg.mask().Member(OptoReg::add(reg,-chunk)) &&
-          (lrg.num_regs()==1 || // either size 1
-           (reg&1) == 1) )      // or aligned (adjacent reg is available since we already cleared-to-pairs)
+      if (is_legal_reg(lrg, reg, chunk))
         return reg;
     } else if( chunk == 0 ) {
       // Choose a color which is legal for him
--- a/src/share/vm/opto/classes.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/opto/classes.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -256,6 +256,8 @@
 macro(SubVL)
 macro(SubVF)
 macro(SubVD)
+macro(MulVS)
+macro(MulVI)
 macro(MulVF)
 macro(MulVD)
 macro(DivVF)
@@ -263,9 +265,15 @@
 macro(LShiftVB)
 macro(LShiftVS)
 macro(LShiftVI)
+macro(LShiftVL)
 macro(RShiftVB)
 macro(RShiftVS)
 macro(RShiftVI)
+macro(RShiftVL)
+macro(URShiftVB)
+macro(URShiftVS)
+macro(URShiftVI)
+macro(URShiftVL)
 macro(AndV)
 macro(OrV)
 macro(XorV)
--- a/src/share/vm/opto/compile.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/opto/compile.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -2604,7 +2604,7 @@
     if (n->req()-1 > 2) {
       // Replace many operand PackNodes with a binary tree for matching
       PackNode* p = (PackNode*) n;
-      Node* btp = p->binaryTreePack(Compile::current(), 1, n->req());
+      Node* btp = p->binary_tree_pack(Compile::current(), 1, n->req());
       n->subsume_by(btp);
     }
     break;
--- a/src/share/vm/opto/idealKit.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/opto/idealKit.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -295,7 +295,11 @@
   if (_delay_all_transforms) {
     return delay_transform(n);
   } else {
-    return gvn().transform(n);
+    n = gvn().transform(n);
+    if (!gvn().is_IterGVN()) {
+      C->record_for_igvn(n);
+    }
+    return n;
   }
 }
 
--- a/src/share/vm/opto/library_call.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/opto/library_call.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -171,7 +171,7 @@
   // Helper for inline_unsafe_access.
   // Generates the guards that check whether the result of
   // Unsafe.getObject should be recorded in an SATB log buffer.
-  void insert_g1_pre_barrier(Node* base_oop, Node* offset, Node* pre_val);
+  void insert_pre_barrier(Node* base_oop, Node* offset, Node* pre_val, int nargs, bool need_mem_bar);
   bool inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile);
   bool inline_unsafe_prefetch(bool is_native_ptr, bool is_store, bool is_static);
   bool inline_unsafe_allocate();
@@ -291,6 +291,8 @@
     case vmIntrinsics::_equals:
     case vmIntrinsics::_equalsC:
       break;  // InlineNatives does not control String.compareTo
+    case vmIntrinsics::_Reference_get:
+      break;  // InlineNatives does not control Reference.get
     default:
       return NULL;
     }
@@ -361,11 +363,10 @@
     break;
 
   case vmIntrinsics::_Reference_get:
-    // It is only when G1 is enabled that we absolutely
-    // need to use the intrinsic version of Reference.get()
-    // so that the value in the referent field, if necessary,
-    // can be registered by the pre-barrier code.
-    if (!UseG1GC) return NULL;
+    // Use the intrinsic version of Reference.get() so that the value in
+    // the referent field can be registered by the G1 pre-barrier code.
+    // Also add memory barrier to prevent commoning reads from this field
+    // across safepoint since GC can change it value.
     break;
 
  default:
@@ -2195,14 +2196,17 @@
 
 const static BasicType T_ADDRESS_HOLDER = T_LONG;
 
-// Helper that guards and inserts a G1 pre-barrier.
-void LibraryCallKit::insert_g1_pre_barrier(Node* base_oop, Node* offset, Node* pre_val) {
-  assert(UseG1GC, "should not call this otherwise");
-
+// Helper that guards and inserts a pre-barrier.
+void LibraryCallKit::insert_pre_barrier(Node* base_oop, Node* offset,
+                                        Node* pre_val, int nargs, bool need_mem_bar) {
   // We could be accessing the referent field of a reference object. If so, when G1
   // is enabled, we need to log the value in the referent field in an SATB buffer.
   // This routine performs some compile time filters and generates suitable
   // runtime filters that guard the pre-barrier code.
+  // Also add memory barrier for non volatile load from the referent field
+  // to prevent commoning of loads across safepoint.
+  if (!UseG1GC && !need_mem_bar)
+    return;
 
   // Some compile time checks.
 
@@ -2224,11 +2228,12 @@
 
     const TypeInstPtr* itype = btype->isa_instptr();
     if (itype != NULL) {
-      // Can the klass of base_oop be statically determined
-      // to be _not_ a sub-class of Reference?
+      // Can the klass of base_oop be statically determined to be
+      // _not_ a sub-class of Reference and _not_ Object?
       ciKlass* klass = itype->klass();
-      if (klass->is_subtype_of(env()->Reference_klass()) &&
-          !env()->Reference_klass()->is_subtype_of(klass)) {
+      if ( klass->is_loaded() &&
+          !klass->is_subtype_of(env()->Reference_klass()) &&
+          !env()->Object_klass()->is_subtype_of(klass)) {
         return;
       }
     }
@@ -2238,10 +2243,8 @@
   // we need to generate the following runtime filters
   //
   // if (offset == java_lang_ref_Reference::_reference_offset) {
-  //   if (base != null) {
-  //     if (instance_of(base, java.lang.ref.Reference)) {
-  //       pre_barrier(_, pre_val, ...);
-  //     }
+  //   if (instance_of(base, java.lang.ref.Reference)) {
+  //     pre_barrier(_, pre_val, ...);
   //   }
   // }
 
@@ -2254,19 +2257,19 @@
   Node* referent_off = __ ConX(java_lang_ref_Reference::referent_offset);
 
   __ if_then(offset, BoolTest::eq, referent_off, unlikely); {
-    __ if_then(base_oop, BoolTest::ne, null(), likely); {
-
       // Update graphKit memory and control from IdealKit.
       sync_kit(ideal);
 
       Node* ref_klass_con = makecon(TypeKlassPtr::make(env()->Reference_klass()));
+      _sp += nargs;  // gen_instanceof might do an uncommon trap
       Node* is_instof = gen_instanceof(base_oop, ref_klass_con);
+      _sp -= nargs;
 
       // Update IdealKit memory and control from graphKit.
       __ sync_kit(this);
 
       Node* one = __ ConI(1);
-
+      // is_instof == 0 if base_oop == NULL
       __ if_then(is_instof, BoolTest::eq, one, unlikely); {
 
         // Update graphKit from IdeakKit.
@@ -2278,12 +2281,15 @@
                     NULL /* obj */, NULL /* adr */, max_juint /* alias_idx */, NULL /* val */, NULL /* val_type */,
                     pre_val /* pre_val */,
                     T_OBJECT);
-
+        if (need_mem_bar) {
+          // Add memory barrier to prevent commoning reads from this field
+          // across safepoint since GC can change its value.
+          insert_mem_bar(Op_MemBarCPUOrder);
+        }
         // Update IdealKit from graphKit.
         __ sync_kit(this);
 
       } __ end_if(); // _ref_type != ref_none
-    } __ end_if(); // base  != NULL
   } __ end_if(); // offset == referent_offset
 
   // Final sync IdealKit and GraphKit.
@@ -2418,7 +2424,9 @@
   // object (either by using Unsafe directly or through reflection)
   // then, if G1 is enabled, we need to record the referent in an
   // SATB log buffer using the pre-barrier mechanism.
-  bool need_read_barrier = UseG1GC && !is_native_ptr && !is_store &&
+  // Also we need to add memory barrier to prevent commoning reads
+  // from this field across safepoint since GC can change its value.
+  bool need_read_barrier = !is_native_ptr && !is_store &&
                            offset != top() && heap_base_oop != top();
 
   if (!is_store && type == T_OBJECT) {
@@ -2508,7 +2516,7 @@
       break;
     case T_OBJECT:
       if (need_read_barrier) {
-        insert_g1_pre_barrier(heap_base_oop, offset, p);
+        insert_pre_barrier(heap_base_oop, offset, p, nargs, !(is_volatile || need_mem_bar));
       }
       push(p);
       break;
@@ -5484,6 +5492,10 @@
               result /* pre_val */,
               T_OBJECT);
 
+  // Add memory barrier to prevent commoning reads from this field
+  // across safepoint since GC can change its value.
+  insert_mem_bar(Op_MemBarCPUOrder);
+
   push(result);
   return true;
 }
--- a/src/share/vm/opto/loopnode.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/opto/loopnode.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1773,6 +1773,8 @@
     if (stride_con > 0) tty->print("+");
     tty->print("%d", stride_con);
 
+    tty->print(" (%d iters) ", (int)cl->profile_trip_cnt());
+
     if (cl->is_pre_loop ()) tty->print(" pre" );
     if (cl->is_main_loop()) tty->print(" main");
     if (cl->is_post_loop()) tty->print(" post");
--- a/src/share/vm/opto/output.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/opto/output.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1871,6 +1871,8 @@
   if (!do_scheduling())
     return;
 
+  assert(MaxVectorSize <= 8, "scheduling code works only with pairs");
+
   NOT_PRODUCT( TracePhase t2("isched", &_t_instrSched, TimeCompiler); )
 
   // Create a data structure for all the scheduling information
--- a/src/share/vm/opto/superword.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/opto/superword.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1058,12 +1058,27 @@
   return VectorNode::implemented(p0->Opcode(), p->size(), velt_basic_type(p0));
 }
 
+//------------------------------same_inputs--------------------------
+// For pack p, are all idx operands the same?
+static bool same_inputs(Node_List* p, int idx) {
+  Node* p0 = p->at(0);
+  uint vlen = p->size();
+  Node* p0_def = p0->in(idx);
+  for (uint i = 1; i < vlen; i++) {
+    Node* pi = p->at(i);
+    Node* pi_def = pi->in(idx);
+    if (p0_def != pi_def)
+      return false;
+  }
+  return true;
+}
+
 //------------------------------profitable---------------------------
 // For pack p, are all operands and all uses (with in the block) vector?
 bool SuperWord::profitable(Node_List* p) {
   Node* p0 = p->at(0);
   uint start, end;
-  vector_opd_range(p0, &start, &end);
+  VectorNode::vector_operands(p0, &start, &end);
 
   // Return false if some input is not vector and inside block
   for (uint i = start; i < end; i++) {
@@ -1071,15 +1086,20 @@
       // For now, return false if not scalar promotion case (inputs are the same.)
       // Later, implement PackNode and allow differing, non-vector inputs
       // (maybe just the ones from outside the block.)
-      Node* p0_def = p0->in(i);
-      for (uint j = 1; j < p->size(); j++) {
-        Node* use = p->at(j);
-        Node* def = use->in(i);
-        if (p0_def != def)
-          return false;
+      if (!same_inputs(p, i)) {
+        return false;
       }
     }
   }
+  if (VectorNode::is_shift(p0)) {
+    // For now, return false if shift count is vector because
+    // hw does not support it.
+    if (is_vector_use(p0, 2))
+      return false;
+    // For the same reason return false if different shift counts.
+    if (!same_inputs(p, 2))
+      return false;
+  }
   if (!p0->is_Store()) {
     // For now, return false if not all uses are vector.
     // Later, implement ExtractNode and allow non-vector uses (maybe
@@ -1357,6 +1377,12 @@
         // Promote operands to vector
         Node* in1 = vector_opd(p, 1);
         Node* in2 = vector_opd(p, 2);
+        if (VectorNode::is_invariant_vector(in1) && (n->is_Add() || n->is_Mul())) {
+          // Move invariant vector input into second position to avoid register spilling.
+          Node* tmp = in1;
+          in1 = in2;
+          in2 = tmp;
+        }
         vn = VectorNode::make(_phase->C, opc, in1, in2, vlen, velt_basic_type(n));
       } else {
         ShouldNotReachHere();
@@ -1386,19 +1412,40 @@
   uint vlen = p->size();
   Node* opd = p0->in(opd_idx);
 
-  bool same_opd = true;
-  for (uint i = 1; i < vlen; i++) {
-    Node* pi = p->at(i);
-    Node* in = pi->in(opd_idx);
-    if (opd != in) {
-      same_opd = false;
-      break;
+  if (same_inputs(p, opd_idx)) {
+    if (opd->is_Vector() || opd->is_LoadVector()) {
+      assert(((opd_idx != 2) || !VectorNode::is_shift(p0)), "shift's count can't be vector");
+      return opd; // input is matching vector
     }
-  }
-
-  if (same_opd) {
-    if (opd->is_Vector() || opd->is_LoadVector()) {
-      return opd; // input is matching vector
+    if ((opd_idx == 2) && VectorNode::is_shift(p0)) {
+      // No vector is needed for shift count.
+      // Vector instructions do not mask shift count, do it here.
+      Compile* C = _phase->C;
+      Node* cnt = opd;
+      juint mask = (p0->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1);
+      const TypeInt* t = opd->find_int_type();
+      if (t != NULL && t->is_con()) {
+        juint shift = t->get_con();
+        if (shift > mask) { // Unsigned cmp
+          cnt = ConNode::make(C, TypeInt::make(shift & mask));
+        }
+      } else {
+        if (t == NULL || t->_lo < 0 || t->_hi > (int)mask) {
+          cnt = ConNode::make(C, TypeInt::make(mask));
+          _phase->_igvn.register_new_node_with_optimizer(cnt);
+          cnt = new (C, 3) AndINode(opd, cnt);
+          _phase->_igvn.register_new_node_with_optimizer(cnt);
+          _phase->set_ctrl(cnt, _phase->get_ctrl(opd));
+        }
+        assert(opd->bottom_type()->isa_int(), "int type only");
+        // Move non constant shift count into XMM register.
+        cnt = new (_phase->C, 2) MoveI2FNode(cnt);
+      }
+      if (cnt != opd) {
+        _phase->_igvn.register_new_node_with_optimizer(cnt);
+        _phase->set_ctrl(cnt, _phase->get_ctrl(opd));
+      }
+      return cnt;
     }
     assert(!opd->is_StoreVector(), "such vector is not expected here");
     // Convert scalar input to vector with the same number of elements as
@@ -1428,7 +1475,7 @@
     Node* in = pi->in(opd_idx);
     assert(my_pack(in) == NULL, "Should already have been unpacked");
     assert(opd_bt == in->bottom_type()->basic_type(), "all same type");
-    pk->add_opd(i, in);
+    pk->add_opd(in);
   }
   _phase->_igvn.register_new_node_with_optimizer(pk);
   _phase->set_ctrl(pk, _phase->get_ctrl(opd));
@@ -1718,37 +1765,27 @@
   for (int i = _block.length() - 1; i >= 0; i--) {
     Node* n = _block.at(i);
     // Only integer types need be examined
-    if (n->bottom_type()->isa_int()) {
+    const Type* vt = velt_type(n);
+    if (vt->basic_type() == T_INT) {
       uint start, end;
-      vector_opd_range(n, &start, &end);
+      VectorNode::vector_operands(n, &start, &end);
       const Type* vt = velt_type(n);
 
       for (uint j = start; j < end; j++) {
         Node* in  = n->in(j);
-        // Don't propagate through a type conversion
-        if (n->bottom_type() != in->bottom_type())
-          continue;
-        switch(in->Opcode()) {
-        case Op_AddI:    case Op_AddL:
-        case Op_SubI:    case Op_SubL:
-        case Op_MulI:    case Op_MulL:
-        case Op_AndI:    case Op_AndL:
-        case Op_OrI:     case Op_OrL:
-        case Op_XorI:    case Op_XorL:
-        case Op_LShiftI: case Op_LShiftL:
-        case Op_CMoveI:  case Op_CMoveL:
-          if (in_bb(in)) {
-            bool same_type = true;
-            for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) {
-              Node *use = in->fast_out(k);
-              if (!in_bb(use) || !same_velt_type(use, n)) {
-                same_type = false;
-                break;
-              }
+        // Don't propagate through a memory
+        if (!in->is_Mem() && in_bb(in) && velt_type(in)->basic_type() == T_INT &&
+            data_size(n) < data_size(in)) {
+          bool same_type = true;
+          for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) {
+            Node *use = in->fast_out(k);
+            if (!in_bb(use) || !same_velt_type(use, n)) {
+              same_type = false;
+              break;
             }
-            if (same_type) {
-              set_velt_type(in, vt);
-            }
+          }
+          if (same_type) {
+            set_velt_type(in, vt);
           }
         }
       }
@@ -1792,10 +1829,8 @@
   }
   const Type* t = _igvn.type(n);
   if (t->basic_type() == T_INT) {
-    if (t->higher_equal(TypeInt::BOOL))  return TypeInt::BOOL;
-    if (t->higher_equal(TypeInt::BYTE))  return TypeInt::BYTE;
-    if (t->higher_equal(TypeInt::CHAR))  return TypeInt::CHAR;
-    if (t->higher_equal(TypeInt::SHORT)) return TypeInt::SHORT;
+    // A narrow type of arithmetic operations will be determined by
+    // propagating the type of memory operations.
     return TypeInt::INT;
   }
   return t;
@@ -1811,38 +1846,6 @@
   return vt1 == vt2;
 }
 
-//-------------------------vector_opd_range-----------------------
-// (Start, end] half-open range defining which operands are vector
-void SuperWord::vector_opd_range(Node* n, uint* start, uint* end) {
-  switch (n->Opcode()) {
-  case Op_LoadB:   case Op_LoadUB:
-  case Op_LoadS:   case Op_LoadUS:
-  case Op_LoadI:   case Op_LoadL:
-  case Op_LoadF:   case Op_LoadD:
-  case Op_LoadP:
-    *start = 0;
-    *end   = 0;
-    return;
-  case Op_StoreB:  case Op_StoreC:
-  case Op_StoreI:  case Op_StoreL:
-  case Op_StoreF:  case Op_StoreD:
-  case Op_StoreP:
-    *start = MemNode::ValueIn;
-    *end   = *start + 1;
-    return;
-  case Op_LShiftI: case Op_LShiftL:
-    *start = 1;
-    *end   = 2;
-    return;
-  case Op_CMoveI:  case Op_CMoveL:  case Op_CMoveF:  case Op_CMoveD:
-    *start = 2;
-    *end   = n->req();
-    return;
-  }
-  *start = 1;
-  *end   = n->req(); // default is all operands
-}
-
 //------------------------------in_packset---------------------------
 // Are s1 and s2 in a pack pair and ordered as s1,s2?
 bool SuperWord::in_packset(Node* s1, Node* s2) {
@@ -1940,7 +1943,7 @@
   //     lim0 == original pre loop limit
   //     V == v_align (power of 2)
   //     invar == extra invariant piece of the address expression
-  //     e == k [ +/- invar ]
+  //     e == offset [ +/- invar ]
   //
   // When reassociating expressions involving '%' the basic rules are:
   //     (a - b) % k == 0   =>  a % k == b % k
@@ -1993,13 +1996,12 @@
   int elt_size = align_to_ref_p.memory_size();
   int v_align  = vw / elt_size;
   assert(v_align > 1, "sanity");
-  int k        = align_to_ref_p.offset_in_bytes() / elt_size;
-
-  Node *kn   = _igvn.intcon(k);
+  int offset   = align_to_ref_p.offset_in_bytes() / elt_size;
+  Node *offsn  = _igvn.intcon(offset);
 
-  Node *e = kn;
+  Node *e = offsn;
   if (align_to_ref_p.invar() != NULL) {
-    // incorporate any extra invariant piece producing k +/- invar >>> log2(elt)
+    // incorporate any extra invariant piece producing (offset +/- invar) >>> log2(elt)
     Node* log2_elt = _igvn.intcon(exact_log2(elt_size));
     Node* aref     = new (_phase->C, 3) URShiftINode(align_to_ref_p.invar(), log2_elt);
     _phase->_igvn.register_new_node_with_optimizer(aref);
@@ -2014,15 +2016,15 @@
   }
   if (vw > ObjectAlignmentInBytes) {
     // incorporate base e +/- base && Mask >>> log2(elt)
-    Node* mask = _igvn.MakeConX(~(-1 << exact_log2(vw)));
     Node* xbase = new(_phase->C, 2) CastP2XNode(NULL, align_to_ref_p.base());
     _phase->_igvn.register_new_node_with_optimizer(xbase);
-    Node* masked_xbase  = new (_phase->C, 3) AndXNode(xbase, mask);
+#ifdef _LP64
+    xbase  = new (_phase->C, 2) ConvL2INode(xbase);
+    _phase->_igvn.register_new_node_with_optimizer(xbase);
+#endif
+    Node* mask = _igvn.intcon(vw-1);
+    Node* masked_xbase  = new (_phase->C, 3) AndINode(xbase, mask);
     _phase->_igvn.register_new_node_with_optimizer(masked_xbase);
-#ifdef _LP64
-    masked_xbase  = new (_phase->C, 2) ConvL2INode(masked_xbase);
-    _phase->_igvn.register_new_node_with_optimizer(masked_xbase);
-#endif
     Node* log2_elt = _igvn.intcon(exact_log2(elt_size));
     Node* bref     = new (_phase->C, 3) URShiftINode(masked_xbase, log2_elt);
     _phase->_igvn.register_new_node_with_optimizer(bref);
--- a/src/share/vm/opto/vectornode.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/opto/vectornode.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -31,7 +31,7 @@
 // Return the vector operator for the specified scalar operation
 // and vector length.  Also used to check if the code generator
 // supports the vector operation.
-int VectorNode::opcode(int sopc, uint vlen, BasicType bt) {
+int VectorNode::opcode(int sopc, BasicType bt) {
   switch (sopc) {
   case Op_AddI:
     switch (bt) {
@@ -69,6 +69,15 @@
   case Op_SubD:
     assert(bt == T_DOUBLE, "must be");
     return Op_SubVD;
+  case Op_MulI:
+    switch (bt) {
+    case T_BOOLEAN:
+    case T_BYTE:   return 0;   // Unimplemented
+    case T_CHAR:
+    case T_SHORT:  return Op_MulVS;
+    case T_INT:    return Matcher::match_rule_supported(Op_MulVI) ? Op_MulVI : 0; // SSE4_1
+    }
+    ShouldNotReachHere();
   case Op_MulF:
     assert(bt == T_FLOAT, "must be");
     return Op_MulVF;
@@ -90,6 +99,9 @@
     case T_INT:    return Op_LShiftVI;
     }
     ShouldNotReachHere();
+  case Op_LShiftL:
+    assert(bt == T_LONG, "must be");
+    return Op_LShiftVL;
   case Op_RShiftI:
     switch (bt) {
     case T_BOOLEAN:
@@ -99,6 +111,21 @@
     case T_INT:    return Op_RShiftVI;
     }
     ShouldNotReachHere();
+  case Op_RShiftL:
+    assert(bt == T_LONG, "must be");
+    return Op_RShiftVL;
+  case Op_URShiftI:
+    switch (bt) {
+    case T_BOOLEAN:
+    case T_BYTE:   return Op_URShiftVB;
+    case T_CHAR:
+    case T_SHORT:  return Op_URShiftVS;
+    case T_INT:    return Op_URShiftVI;
+    }
+    ShouldNotReachHere();
+  case Op_URShiftL:
+    assert(bt == T_LONG, "must be");
+    return Op_URShiftVL;
   case Op_AndI:
   case Op_AndL:
     return Op_AndV;
@@ -134,16 +161,88 @@
   if (is_java_primitive(bt) &&
       (vlen > 1) && is_power_of_2(vlen) &&
       Matcher::vector_size_supported(bt, vlen)) {
-    int vopc = VectorNode::opcode(opc, vlen, bt);
+    int vopc = VectorNode::opcode(opc, bt);
     return vopc > 0 && Matcher::has_match_rule(vopc);
   }
   return false;
 }
 
+bool VectorNode::is_shift(Node* n) {
+  switch (n->Opcode()) {
+  case Op_LShiftI:
+  case Op_LShiftL:
+  case Op_RShiftI:
+  case Op_RShiftL:
+  case Op_URShiftI:
+  case Op_URShiftL:
+    return true;
+  }
+  return false;
+}
+
+// Check if input is loop invariant vector.
+bool VectorNode::is_invariant_vector(Node* n) {
+  // Only Replicate vector nodes are loop invariant for now.
+  switch (n->Opcode()) {
+  case Op_ReplicateB:
+  case Op_ReplicateS:
+  case Op_ReplicateI:
+  case Op_ReplicateL:
+  case Op_ReplicateF:
+  case Op_ReplicateD:
+    return true;
+  }
+  return false;
+}
+
+// [Start, end) half-open range defining which operands are vectors
+void VectorNode::vector_operands(Node* n, uint* start, uint* end) {
+  switch (n->Opcode()) {
+  case Op_LoadB:   case Op_LoadUB:
+  case Op_LoadS:   case Op_LoadUS:
+  case Op_LoadI:   case Op_LoadL:
+  case Op_LoadF:   case Op_LoadD:
+  case Op_LoadP:   case Op_LoadN:
+    *start = 0;
+    *end   = 0; // no vector operands
+    break;
+  case Op_StoreB:  case Op_StoreC:
+  case Op_StoreI:  case Op_StoreL:
+  case Op_StoreF:  case Op_StoreD:
+  case Op_StoreP:  case Op_StoreN:
+    *start = MemNode::ValueIn;
+    *end   = MemNode::ValueIn + 1; // 1 vector operand
+    break;
+  case Op_LShiftI:  case Op_LShiftL:
+  case Op_RShiftI:  case Op_RShiftL:
+  case Op_URShiftI: case Op_URShiftL:
+    *start = 1;
+    *end   = 2; // 1 vector operand
+    break;
+  case Op_AddI: case Op_AddL: case Op_AddF: case Op_AddD:
+  case Op_SubI: case Op_SubL: case Op_SubF: case Op_SubD:
+  case Op_MulI: case Op_MulL: case Op_MulF: case Op_MulD:
+  case Op_DivF: case Op_DivD:
+  case Op_AndI: case Op_AndL:
+  case Op_OrI:  case Op_OrL:
+  case Op_XorI: case Op_XorL:
+    *start = 1;
+    *end   = 3; // 2 vector operands
+    break;
+  case Op_CMoveI:  case Op_CMoveL:  case Op_CMoveF:  case Op_CMoveD:
+    *start = 2;
+    *end   = n->req();
+    break;
+  default:
+    *start = 1;
+    *end   = n->req(); // default is all operands
+  }
+}
+
 // Return the vector version of a scalar operation node.
 VectorNode* VectorNode::make(Compile* C, int opc, Node* n1, Node* n2, uint vlen, BasicType bt) {
   const TypeVect* vt = TypeVect::make(bt, vlen);
-  int vopc = VectorNode::opcode(opc, vlen, bt);
+  int vopc = VectorNode::opcode(opc, bt);
 
   switch (vopc) {
   case Op_AddVB: return new (C, 3) AddVBNode(n1, n2, vt);
@@ -160,6 +259,8 @@
   case Op_SubVF: return new (C, 3) SubVFNode(n1, n2, vt);
   case Op_SubVD: return new (C, 3) SubVDNode(n1, n2, vt);
 
+  case Op_MulVS: return new (C, 3) MulVSNode(n1, n2, vt);
+  case Op_MulVI: return new (C, 3) MulVINode(n1, n2, vt);
   case Op_MulVF: return new (C, 3) MulVFNode(n1, n2, vt);
   case Op_MulVD: return new (C, 3) MulVDNode(n1, n2, vt);
 
@@ -169,10 +270,17 @@
   case Op_LShiftVB: return new (C, 3) LShiftVBNode(n1, n2, vt);
   case Op_LShiftVS: return new (C, 3) LShiftVSNode(n1, n2, vt);
   case Op_LShiftVI: return new (C, 3) LShiftVINode(n1, n2, vt);
+  case Op_LShiftVL: return new (C, 3) LShiftVLNode(n1, n2, vt);
 
   case Op_RShiftVB: return new (C, 3) RShiftVBNode(n1, n2, vt);
   case Op_RShiftVS: return new (C, 3) RShiftVSNode(n1, n2, vt);
   case Op_RShiftVI: return new (C, 3) RShiftVINode(n1, n2, vt);
+  case Op_RShiftVL: return new (C, 3) RShiftVLNode(n1, n2, vt);
+
+  case Op_URShiftVB: return new (C, 3) URShiftVBNode(n1, n2, vt);
+  case Op_URShiftVS: return new (C, 3) URShiftVSNode(n1, n2, vt);
+  case Op_URShiftVI: return new (C, 3) URShiftVINode(n1, n2, vt);
+  case Op_URShiftVL: return new (C, 3) URShiftVLNode(n1, n2, vt);
 
   case Op_AndV: return new (C, 3) AndVNode(n1, n2, vt);
   case Op_OrV:  return new (C, 3) OrVNode (n1, n2, vt);
@@ -214,38 +322,39 @@
   switch (bt) {
   case T_BOOLEAN:
   case T_BYTE:
-    return new (C, vlen+1) PackBNode(s, vt);
+    return new (C, 2) PackBNode(s, vt);
   case T_CHAR:
   case T_SHORT:
-    return new (C, vlen+1) PackSNode(s, vt);
+    return new (C, 2) PackSNode(s, vt);
   case T_INT:
-    return new (C, vlen+1) PackINode(s, vt);
+    return new (C, 2) PackINode(s, vt);
   case T_LONG:
-    return new (C, vlen+1) PackLNode(s, vt);
+    return new (C, 2) PackLNode(s, vt);
   case T_FLOAT:
-    return new (C, vlen+1) PackFNode(s, vt);
+    return new (C, 2) PackFNode(s, vt);
   case T_DOUBLE:
-    return new (C, vlen+1) PackDNode(s, vt);
+    return new (C, 2) PackDNode(s, vt);
   }
   ShouldNotReachHere();
   return NULL;
 }
 
 // Create a binary tree form for Packs. [lo, hi) (half-open) range
-Node* PackNode::binaryTreePack(Compile* C, int lo, int hi) {
+PackNode* PackNode::binary_tree_pack(Compile* C, int lo, int hi) {
   int ct = hi - lo;
   assert(is_power_of_2(ct), "power of 2");
   if (ct == 2) {
     PackNode* pk = PackNode::make(C, in(lo), 2, vect_type()->element_basic_type());
-    pk->add_opd(1, in(lo+1));
+    pk->add_opd(in(lo+1));
     return pk;
 
   } else {
     int mid = lo + ct/2;
-    Node* n1 = binaryTreePack(C, lo,  mid);
-    Node* n2 = binaryTreePack(C, mid, hi );
+    PackNode* n1 = binary_tree_pack(C, lo,  mid);
+    PackNode* n2 = binary_tree_pack(C, mid, hi );
 
-    BasicType bt = vect_type()->element_basic_type();
+    BasicType bt = n1->vect_type()->element_basic_type();
+    assert(bt == n2->vect_type()->element_basic_type(), "should be the same");
     switch (bt) {
     case T_BOOLEAN:
     case T_BYTE:
--- a/src/share/vm/opto/vectornode.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/opto/vectornode.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -46,6 +46,7 @@
 
   const TypeVect* vect_type() const { return type()->is_vect(); }
   uint length() const { return vect_type()->length(); } // Vector length
+  uint length_in_bytes() const { return vect_type()->length_in_bytes(); }
 
   virtual int Opcode() const;
 
@@ -55,9 +56,12 @@
 
   static VectorNode* make(Compile* C, int opc, Node* n1, Node* n2, uint vlen, BasicType bt);
 
-  static int  opcode(int opc, uint vlen, BasicType bt);
+  static int  opcode(int opc, BasicType bt);
   static bool implemented(int opc, uint vlen, BasicType bt);
-
+  static bool is_shift(Node* n);
+  static bool is_invariant_vector(Node* n);
+  // [Start, end) half-open range defining which operands are vectors
+  static void vector_operands(Node* n, uint* start, uint* end);
 };
 
 //===========================Vector=ALU=Operations====================================
@@ -158,6 +162,22 @@
   virtual int Opcode() const;
 };
 
+//------------------------------MulVSNode---------------------------------------
+// Vector multiply short
+class MulVSNode : public VectorNode {
+ public:
+  MulVSNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------MulVINode---------------------------------------
+// Vector multiply int
+class MulVINode : public VectorNode {
+ public:
+  MulVINode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
+  virtual int Opcode() const;
+};
+
 //------------------------------MulVFNode---------------------------------------
 // Vector multiply float
 class MulVFNode : public VectorNode {
@@ -191,7 +211,7 @@
 };
 
 //------------------------------LShiftVBNode---------------------------------------
-// Vector lshift byte
+// Vector left shift bytes
 class LShiftVBNode : public VectorNode {
  public:
   LShiftVBNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
@@ -199,7 +219,7 @@
 };
 
 //------------------------------LShiftVSNode---------------------------------------
-// Vector lshift shorts
+// Vector left shift shorts
 class LShiftVSNode : public VectorNode {
  public:
   LShiftVSNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
@@ -207,39 +227,88 @@
 };
 
 //------------------------------LShiftVINode---------------------------------------
-// Vector lshift ints
+// Vector left shift ints
 class LShiftVINode : public VectorNode {
  public:
   LShiftVINode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
-//------------------------------URShiftVBNode---------------------------------------
-// Vector urshift bytes
+//------------------------------LShiftVLNode---------------------------------------
+// Vector left shift longs
+class LShiftVLNode : public VectorNode {
+ public:
+  LShiftVLNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------RShiftVBNode---------------------------------------
+// Vector right arithmetic (signed) shift bytes
 class RShiftVBNode : public VectorNode {
  public:
   RShiftVBNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
-//------------------------------URShiftVSNode---------------------------------------
-// Vector urshift shorts
+//------------------------------RShiftVSNode---------------------------------------
+// Vector right arithmetic (signed) shift shorts
 class RShiftVSNode : public VectorNode {
  public:
   RShiftVSNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
-//------------------------------URShiftVINode---------------------------------------
-// Vector urshift ints
+//------------------------------RShiftVINode---------------------------------------
+// Vector right arithmetic (signed) shift ints
 class RShiftVINode : public VectorNode {
  public:
   RShiftVINode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
+//------------------------------RShiftVLNode---------------------------------------
+// Vector right arithmetic (signed) shift longs
+class RShiftVLNode : public VectorNode {
+ public:
+  RShiftVLNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------URShiftVBNode---------------------------------------
+// Vector right logical (unsigned) shift bytes
+class URShiftVBNode : public VectorNode {
+ public:
+  URShiftVBNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------URShiftVSNode---------------------------------------
+// Vector right logical (unsigned) shift shorts
+class URShiftVSNode : public VectorNode {
+ public:
+  URShiftVSNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------URShiftVINode---------------------------------------
+// Vector right logical (unsigned) shift ints
+class URShiftVINode : public VectorNode {
+ public:
+  URShiftVINode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------URShiftVLNode---------------------------------------
+// Vector right logical (unsigned) shift longs
+class URShiftVLNode : public VectorNode {
+ public:
+  URShiftVLNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
+  virtual int Opcode() const;
+};
+
+
 //------------------------------AndVNode---------------------------------------
-// Vector and
+// Vector and integer
 class AndVNode : public VectorNode {
  public:
   AndVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
@@ -247,7 +316,7 @@
 };
 
 //------------------------------OrVNode---------------------------------------
-// Vector or
+// Vector or integer
 class OrVNode : public VectorNode {
  public:
   OrVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
@@ -255,7 +324,7 @@
 };
 
 //------------------------------XorVNode---------------------------------------
-// Vector xor
+// Vector xor integer
 class XorVNode : public VectorNode {
  public:
   XorVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
@@ -373,12 +442,12 @@
   PackNode(Node* in1, Node* n2, const TypeVect* vt) : VectorNode(in1, n2, vt) {}
   virtual int Opcode() const;
 
-  void add_opd(uint i, Node* n) {
-    init_req(i+1, n);
+  void add_opd(Node* n) {
+    add_req(n);
   }
 
   // Create a binary tree form for Packs. [lo, hi) (half-open) range
-  Node* binaryTreePack(Compile* C, int lo, int hi);
+  PackNode* binary_tree_pack(Compile* C, int lo, int hi);
 
   static PackNode* make(Compile* C, Node* s, uint vlen, BasicType bt);
 };
--- a/src/share/vm/precompiled/precompiled.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/precompiled/precompiled.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -306,7 +306,6 @@
 # include "gc_implementation/g1/g1_specialized_oop_closures.hpp"
 # include "gc_implementation/g1/ptrQueue.hpp"
 # include "gc_implementation/g1/satbQueue.hpp"
-# include "gc_implementation/parNew/parGCAllocBuffer.hpp"
 # include "gc_implementation/parNew/parOopClosures.hpp"
 # include "gc_implementation/parallelScavenge/objectStartArray.hpp"
 # include "gc_implementation/parallelScavenge/parMarkBitMap.hpp"
@@ -322,6 +321,7 @@
 # include "gc_implementation/parallelScavenge/psYoungGen.hpp"
 # include "gc_implementation/shared/gcAdaptivePolicyCounters.hpp"
 # include "gc_implementation/shared/gcPolicyCounters.hpp"
+# include "gc_implementation/shared/parGCAllocBuffer.hpp"
 #endif // SERIALGC
 
 #endif // !DONT_USE_PRECOMPILED_HEADER
--- a/src/share/vm/runtime/globals.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/runtime/globals.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -829,6 +829,9 @@
   product(bool, PrintGCApplicationStoppedTime, false,                       \
           "Print the time the application has been stopped")                \
                                                                             \
+  diagnostic(bool, VerboseVerification, false,                              \
+             "Display detailed verification details")                       \
+                                                                            \
   notproduct(uintx, ErrorHandlerTest, 0,                                    \
           "If > 0, provokes an error after VM initialization; the value"    \
           "determines which error to provoke.  See test_error_handler()"    \
--- a/src/share/vm/runtime/relocator.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/runtime/relocator.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -465,13 +465,12 @@
 void Relocator::adjust_stack_map_table(int bci, int delta) {
   if (method()->has_stackmap_table()) {
     typeArrayOop data = method()->stackmap_data();
-    // The data in the array is a classfile representation of the stackmap
-    // table attribute, less the initial u2 tag and u4 attribute_length fields.
-    stack_map_table_attribute* attr = stack_map_table_attribute::at(
-        (address)data->byte_at_addr(0) - (sizeof(u2) + sizeof(u4)));
+    // The data in the array is a classfile representation of the stackmap table
+    stack_map_table* sm_table =
+        stack_map_table::at((address)data->byte_at_addr(0));
 
-    int count = attr->number_of_entries();
-    stack_map_frame* frame = attr->entries();
+    int count = sm_table->number_of_entries();
+    stack_map_frame* frame = sm_table->entries();
     int bci_iter = -1;
     bool offset_adjusted = false; // only need to adjust one offset
 
@@ -486,7 +485,7 @@
           frame->set_offset_delta(new_offset_delta);
         } else {
           assert(frame->is_same_frame() ||
-                 frame->is_same_frame_1_stack_item_frame(),
+                 frame->is_same_locals_1_stack_item_frame(),
                  "Frame must be one of the compressed forms");
           // The new delta exceeds the capacity of the 'same_frame' or
           // 'same_frame_1_stack_item_frame' frame types.  We need to
@@ -513,7 +512,7 @@
           if (frame->is_same_frame()) {
             same_frame_extended::create_at(frame_addr, new_offset_delta);
           } else {
-            same_frame_1_stack_item_extended::create_at(
+            same_locals_1_stack_item_extended::create_at(
               frame_addr, new_offset_delta, NULL);
             // the verification_info_type should already be at the right spot
           }
--- a/src/share/vm/services/memPtr.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/services/memPtr.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -27,7 +27,7 @@
 #include "services/memTracker.hpp"
 
 volatile jint SequenceGenerator::_seq_number = 1;
-DEBUG_ONLY(jint SequenceGenerator::_max_seq_number = 1;)
+NOT_PRODUCT(jint SequenceGenerator::_max_seq_number = 1;)
 DEBUG_ONLY(volatile unsigned long SequenceGenerator::_generation = 0;)
 
 jint SequenceGenerator::next() {
@@ -36,7 +36,7 @@
     MemTracker::shutdown(MemTracker::NMT_sequence_overflow);
   }
   assert(seq > 0, "counter overflow");
-  DEBUG_ONLY(_max_seq_number = (seq > _max_seq_number) ? seq : _max_seq_number;)
+  NOT_PRODUCT(_max_seq_number = (seq > _max_seq_number) ? seq : _max_seq_number;)
   return seq;
 }
 
--- a/src/share/vm/services/memPtr.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/services/memPtr.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -51,11 +51,11 @@
   };
 
   DEBUG_ONLY(static unsigned long current_generation() { return (unsigned long)_generation; })
-  DEBUG_ONLY(static jint max_seq_num() { return _max_seq_number; })
+  NOT_PRODUCT(static jint max_seq_num() { return _max_seq_number; })
 
  private:
   static volatile jint _seq_number;
-  DEBUG_ONLY(static jint _max_seq_number; )
+  NOT_PRODUCT(static jint _max_seq_number; )
   DEBUG_ONLY(static volatile unsigned long _generation; )
 };
 
--- a/src/share/vm/services/memPtrArray.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/services/memPtrArray.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -59,7 +59,7 @@
   virtual size_t instance_size() const = 0;
   virtual bool shrink() = 0;
 
-  debug_only(virtual int capacity() const = 0;)
+  NOT_PRODUCT(virtual int capacity() const = 0;)
 };
 
 // Iterator interface
@@ -205,7 +205,7 @@
     return _size;
   }
 
-  debug_only(int capacity() const { return _max_size; })
+  NOT_PRODUCT(int capacity() const { return _max_size; })
 
   void clear() {
     assert(_data != NULL, "Just check");
--- a/src/share/vm/services/memRecorder.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/services/memRecorder.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -73,7 +73,7 @@
     return sizeof(FixedSizeMemPointerArray<E, SIZE>);
   }
 
-  debug_only(int capacity() const { return SIZE; })
+  NOT_PRODUCT(int capacity() const { return SIZE; })
 
  public:
   // implementation of public interface
--- a/src/share/vm/services/memSnapshot.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/services/memSnapshot.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -338,15 +338,13 @@
             vm_itr.insert_after(cur_vm);
           }
         } else {
-#ifdef ASSERT
           // In theory, we should assert without conditions. However, in case of native
           // thread stack, NMT explicitly releases the thread stack in Thread's destructor,
           // due to platform dependent behaviors. On some platforms, we see uncommit/release
           // native thread stack, but some, we don't.
-          if (!cur_vm->is_uncommit_record() && !cur_vm->is_deallocation_record()) {
-            ShouldNotReachHere();
-          }
-#endif
+          assert(cur_vm->is_uncommit_record() || cur_vm->is_deallocation_record(),
+            err_msg("Should not reach here, pointer addr = [" INTPTR_FORMAT "], flags = [%x]",
+               cur_vm->addr(), cur_vm->flags()));
         }
       }
     } else {
@@ -406,7 +404,7 @@
 }
 
 
-#ifdef ASSERT
+#ifndef PRODUCT
 void MemSnapshot::print_snapshot_stats(outputStream* st) {
   st->print_cr("Snapshot:");
   st->print_cr("\tMalloced: %d/%d [%5.2f%%]  %dKB", _alloc_ptrs->length(), _alloc_ptrs->capacity(),
@@ -434,6 +432,20 @@
   }
 }
 
+bool MemSnapshot::has_allocation_record(address addr) {
+  MemPointerArrayIteratorImpl itr(_staging_area);
+  MemPointerRecord* cur = (MemPointerRecord*)itr.current();
+  while (cur != NULL) {
+    if (cur->addr() == addr && cur->is_allocation_record()) {
+      return true;
+    }
+    cur = (MemPointerRecord*)itr.next();
+  }
+  return false;
+}
+#endif // PRODUCT
+
+#ifdef ASSERT
 void MemSnapshot::check_staging_data() {
   MemPointerArrayIteratorImpl itr(_staging_area);
   MemPointerRecord* cur = (MemPointerRecord*)itr.current();
@@ -447,17 +459,5 @@
     next = (MemPointerRecord*)itr.next();
   }
 }
+#endif // ASSERT
 
-bool MemSnapshot::has_allocation_record(address addr) {
-  MemPointerArrayIteratorImpl itr(_staging_area);
-  MemPointerRecord* cur = (MemPointerRecord*)itr.current();
-  while (cur != NULL) {
-    if (cur->addr() == addr && cur->is_allocation_record()) {
-      return true;
-    }
-    cur = (MemPointerRecord*)itr.next();
-  }
-  return false;
-}
-
-#endif
--- a/src/share/vm/services/memSnapshot.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/services/memSnapshot.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -63,13 +63,13 @@
       MemPointer* p1 = (MemPointer*)ptr;
       MemPointer* p2 = (MemPointer*)_array->at(_pos - 1);
       assert(!is_dup_pointer(p1, p2),
-        "dup pointer");
+        err_msg("duplicated pointer, flag = [%x]", (unsigned int)((MemPointerRecord*)p1)->flags()));
     }
      if (_pos < _array->length() -1) {
       MemPointer* p1 = (MemPointer*)ptr;
       MemPointer* p2 = (MemPointer*)_array->at(_pos + 1);
       assert(!is_dup_pointer(p1, p2),
-        "dup pointer");
+        err_msg("duplicated pointer, flag = [%x]", (unsigned int)((MemPointerRecord*)p1)->flags()));
      }
     return _array->insert_at(ptr, _pos);
   }
@@ -79,14 +79,14 @@
       MemPointer* p1 = (MemPointer*)ptr;
       MemPointer* p2 = (MemPointer*)_array->at(_pos - 1);
       assert(!is_dup_pointer(p1, p2),
-        "dup pointer");
+        err_msg("duplicated pointer, flag = [%x]", (unsigned int)((MemPointerRecord*)p1)->flags()));
     }
     if (_pos < _array->length() - 1) {
       MemPointer* p1 = (MemPointer*)ptr;
       MemPointer* p2 = (MemPointer*)_array->at(_pos + 1);
 
       assert(!is_dup_pointer(p1, p2),
-        "dup pointer");
+        err_msg("duplicated pointer, flag = [%x]", (unsigned int)((MemPointerRecord*)p1)->flags()));
      }
     if (_array->insert_at(ptr, _pos + 1)) {
       _pos ++;
--- a/src/share/vm/services/memTrackWorker.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/services/memTrackWorker.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -99,9 +99,11 @@
     }
     if (rec != NULL) {
       // merge the recorder into staging area
-      bool result = snapshot->merge(rec);
-      assert(result, "merge failed");
-      debug_only(_merge_count ++;)
+      if (!snapshot->merge(rec)) {
+        MemTracker::shutdown(MemTracker::NMT_out_of_memory);
+      } else {
+        NOT_PRODUCT(_merge_count ++;)
+      }
       MemTracker::release_thread_recorder(rec);
     } else {
       // no more recorder to merge, promote staging area
@@ -129,7 +131,7 @@
   }
   assert(MemTracker::shutdown_in_progress(), "just check");
 
-  // transites to final shutdown
+  // transits to final shutdown
   MemTracker::final_shutdown();
 }
 
--- a/src/share/vm/services/memTracker.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/services/memTracker.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -65,7 +65,7 @@
 int                             MemTracker::_thread_count = 255;
 volatile jint                   MemTracker::_pooled_recorder_count = 0;
 debug_only(intx                 MemTracker::_main_thread_tid = 0;)
-debug_only(volatile jint        MemTracker::_pending_recorder_count = 0;)
+NOT_PRODUCT(volatile jint       MemTracker::_pending_recorder_count = 0;)
 
 void MemTracker::init_tracking_options(const char* option_line) {
   _tracking_level = NMT_off;
@@ -291,7 +291,7 @@
     (void*)cur_head)) {
     cur_head = const_cast<MemRecorder*>(_merge_pending_queue);
   }
-  debug_only(Atomic::store(0, &_pending_recorder_count));
+  NOT_PRODUCT(Atomic::store(0, &_pending_recorder_count));
   return cur_head;
 }
 
@@ -420,7 +420,7 @@
     cur_head = const_cast<MemRecorder*>(_merge_pending_queue);
     rec->set_next(cur_head);
   }
-  debug_only(Atomic::inc(&_pending_recorder_count);)
+  NOT_PRODUCT(Atomic::inc(&_pending_recorder_count);)
 }
 
 /*
--- a/src/share/vm/services/memTracker.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/services/memTracker.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -184,7 +184,6 @@
   // record a 'malloc' call
   static inline void record_malloc(address addr, size_t size, MEMFLAGS flags,
                             address pc = 0, Thread* thread = NULL) {
-    assert(is_on(), "check by caller");
     if (NMT_CAN_TRACK(flags)) {
       create_memory_record(addr, (flags|MemPointerRecord::malloc_tag()), size, pc, thread);
     }
@@ -285,7 +284,6 @@
 
   // retrieve global snapshot
   static MemSnapshot* get_snapshot() {
-    assert(is_on(), "native memory tracking is off");
     if (shutdown_in_progress()) {
       return NULL;
     }
--- a/src/share/vm/utilities/ostream.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/utilities/ostream.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -237,8 +237,9 @@
   return;
 }
 
-void outputStream::indent() {
+outputStream& outputStream::indent() {
   while (_position < _indentation) sp();
+  return *this;
 }
 
 void outputStream::print_jlong(jlong value) {
@@ -251,6 +252,47 @@
   print(os::julong_format_specifier(), value);
 }
 
+/**
+ * This prints out hex data in a 'windbg' or 'xxd' form, where each line is:
+ *   <hex-address>: 8 * <hex-halfword> <ascii translation (optional)>
+ * example:
+ * 0000000: 7f44 4f46 0102 0102 0000 0000 0000 0000  .DOF............
+ * 0000010: 0000 0000 0000 0040 0000 0020 0000 0005  .......@... ....
+ * 0000020: 0000 0000 0000 0040 0000 0000 0000 015d  .......@.......]
+ * ...
+ *
+ * indent is applied to each line.  Ends with a CR.
+ */
+void outputStream::print_data(void* data, size_t len, bool with_ascii) {
+  size_t limit = (len + 16) / 16 * 16;
+  for (size_t i = 0; i < limit; ++i) {
+    if (i % 16 == 0) {
+      indent().print("%07x:", i);
+    }
+    if (i % 2 == 0) {
+      print(" ");
+    }
+    if (i < len) {
+      print("%02x", ((unsigned char*)data)[i]);
+    } else {
+      print("  ");
+    }
+    if ((i + 1) % 16 == 0) {
+      if (with_ascii) {
+        print("  ");
+        for (size_t j = 0; j < 16; ++j) {
+          size_t idx = i + j - 15;
+          if (idx < len) {
+            char c = ((char*)data)[idx];
+            print("%c", c >= 32 && c <= 126 ? c : '.');
+          }
+        }
+      }
+      print_cr("");
+    }
+  }
+}
+
 stringStream::stringStream(size_t initial_size) : outputStream() {
   buffer_length = initial_size;
   buffer        = NEW_RESOURCE_ARRAY(char, buffer_length);
--- a/src/share/vm/utilities/ostream.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/utilities/ostream.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -59,9 +59,11 @@
    outputStream(int width, bool has_time_stamps);
 
    // indentation
-   void indent();
+   outputStream& indent();
    void inc() { _indentation++; };
    void dec() { _indentation--; };
+   void inc(int n) { _indentation += n; };
+   void dec(int n) { _indentation -= n; };
    int  indentation() const    { return _indentation; }
    void set_indentation(int i) { _indentation = i;    }
    void fill_to(int col);
@@ -84,6 +86,7 @@
    void print_raw(const char* str, int len)   { write(str,         len); }
    void print_raw_cr(const char* str)         { write(str, strlen(str)); cr(); }
    void print_raw_cr(const char* str, int len){ write(str,         len); cr(); }
+   void print_data(void* data, size_t len, bool with_ascii);
    void put(char ch);
    void sp(int count = 1);
    void cr();
@@ -122,6 +125,19 @@
 extern outputStream* tty;           // tty output
 extern outputStream* gclog_or_tty;  // stream for gc log if -Xloggc:<f>, or tty
 
+class streamIndentor : public StackObj {
+ private:
+  outputStream* _str;
+  int _amount;
+
+ public:
+  streamIndentor(outputStream* str, int amt = 2) : _str(str), _amount(amt) {
+    _str->inc(_amount);
+  }
+  ~streamIndentor() { _str->dec(_amount); }
+};
+
+
 // advisory locking for the shared tty stream:
 class ttyLocker: StackObj {
   friend class ttyUnlocker;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6340864/TestByteVect.java	Fri Aug 24 15:51:19 2012 -0700
@@ -0,0 +1,1274 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 6340864
+ * @summary Implement vectorization optimizations in hotspot-server
+ *
+ * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestByteVect
+ */
+
+public class TestByteVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final int ADD_INIT = 0;
+  private static final int BIT_MASK = 0xB7;
+  private static final int VALUE = 3;
+  private static final int SHIFT = 8;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Byte vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    byte[] a0 = new byte[ARRLEN];
+    byte[] a1 = new byte[ARRLEN];
+    byte[] a2 = new byte[ARRLEN];
+    byte[] a3 = new byte[ARRLEN];
+    byte[] a4 = new byte[ARRLEN];
+    short[] p2 = new short[ARRLEN/2];
+      int[] p4 = new   int[ARRLEN/4];
+     long[] p8 = new  long[ARRLEN/8];
+    // Initialize
+    int gold_sum = 0;
+    for (int i=0; i<ARRLEN; i++) {
+      byte val = (byte)(ADD_INIT+i);
+      gold_sum += val;
+      a1[i] = val;
+      a2[i] = (byte)VALUE;
+      a3[i] = (byte)-VALUE;
+      a4[i] = (byte)BIT_MASK;
+    }
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_sum(a1);
+      test_addc(a0, a1);
+      test_addv(a0, a1, (byte)VALUE);
+      test_adda(a0, a1, a2);
+      test_subc(a0, a1);
+      test_subv(a0, a1, (byte)VALUE);
+      test_suba(a0, a1, a2);
+      test_mulc(a0, a1);
+      test_mulv(a0, a1, (byte)VALUE);
+      test_mula(a0, a1, a2);
+      test_divc(a0, a1);
+      test_divv(a0, a1, (byte)VALUE);
+      test_diva(a0, a1, a2);
+      test_mulc_n(a0, a1);
+      test_mulv(a0, a1, (byte)-VALUE);
+      test_mula(a0, a1, a3);
+      test_divc_n(a0, a1);
+      test_divv(a0, a1, (byte)-VALUE);
+      test_diva(a0, a1, a3);
+      test_andc(a0, a1);
+      test_andv(a0, a1, (byte)BIT_MASK);
+      test_anda(a0, a1, a4);
+      test_orc(a0, a1);
+      test_orv(a0, a1, (byte)BIT_MASK);
+      test_ora(a0, a1, a4);
+      test_xorc(a0, a1);
+      test_xorv(a0, a1, (byte)BIT_MASK);
+      test_xora(a0, a1, a4);
+      test_sllc(a0, a1);
+      test_sllv(a0, a1, VALUE);
+      test_srlc(a0, a1);
+      test_srlv(a0, a1, VALUE);
+      test_srac(a0, a1);
+      test_srav(a0, a1, VALUE);
+      test_sllc_n(a0, a1);
+      test_sllv(a0, a1, -VALUE);
+      test_srlc_n(a0, a1);
+      test_srlv(a0, a1, -VALUE);
+      test_srac_n(a0, a1);
+      test_srav(a0, a1, -VALUE);
+      test_sllc_o(a0, a1);
+      test_sllv(a0, a1, SHIFT);
+      test_srlc_o(a0, a1);
+      test_srlv(a0, a1, SHIFT);
+      test_srac_o(a0, a1);
+      test_srav(a0, a1, SHIFT);
+      test_sllc_on(a0, a1);
+      test_sllv(a0, a1, -SHIFT);
+      test_srlc_on(a0, a1);
+      test_srlv(a0, a1, -SHIFT);
+      test_srac_on(a0, a1);
+      test_srav(a0, a1, -SHIFT);
+      test_pack2(p2, a1);
+      test_unpack2(a0, p2);
+      test_pack2_swap(p2, a1);
+      test_unpack2_swap(a0, p2);
+      test_pack4(p4, a1);
+      test_unpack4(a0, p4);
+      test_pack4_swap(p4, a1);
+      test_unpack4_swap(a0, p4);
+      test_pack8(p8, a1);
+      test_unpack8(a0, p8);
+      test_pack8_swap(p8, a1);
+      test_unpack8_swap(a0, p8);
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      int sum = test_sum(a1);
+      if (sum != gold_sum) {
+        System.err.println("test_sum:  " + sum + " != " + gold_sum);
+        errn++;
+      }
+
+      test_addc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_addc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)+VALUE));
+      }
+      test_addv(a0, a1, (byte)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_addv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)+VALUE));
+      }
+      test_adda(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_adda: ", i, a0[i], (byte)((byte)(ADD_INIT+i)+VALUE));
+      }
+
+      test_subc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_subc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)-VALUE));
+      }
+      test_subv(a0, a1, (byte)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_subv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)-VALUE));
+      }
+      test_suba(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_suba: ", i, a0[i], (byte)((byte)(ADD_INIT+i)-VALUE));
+      }
+
+      test_mulc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*VALUE));
+      }
+      test_mulv(a0, a1, (byte)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*VALUE));
+      }
+      test_mula(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mula: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*VALUE));
+      }
+
+      test_divc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/VALUE));
+      }
+      test_divv(a0, a1, (byte)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/VALUE));
+      }
+      test_diva(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_diva: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/VALUE));
+      }
+
+      test_mulc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulc_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*(-VALUE)));
+      }
+      test_mulv(a0, a1, (byte)-VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulv_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*(-VALUE)));
+      }
+      test_mula(a0, a1, a3);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mula_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*(-VALUE)));
+      }
+
+      test_divc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divc_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/(-VALUE)));
+      }
+      test_divv(a0, a1, (byte)-VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divv_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/(-VALUE)));
+      }
+      test_diva(a0, a1, a3);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_diva_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/(-VALUE)));
+      }
+
+      test_andc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_andc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)&BIT_MASK));
+      }
+      test_andv(a0, a1, (byte)BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_andv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)&BIT_MASK));
+      }
+      test_anda(a0, a1, a4);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_anda: ", i, a0[i], (byte)((byte)(ADD_INIT+i)&BIT_MASK));
+      }
+
+      test_orc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_orc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)|BIT_MASK));
+      }
+      test_orv(a0, a1, (byte)BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_orv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)|BIT_MASK));
+      }
+      test_ora(a0, a1, a4);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ora: ", i, a0[i], (byte)((byte)(ADD_INIT+i)|BIT_MASK));
+      }
+
+      test_xorc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_xorc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)^BIT_MASK));
+      }
+      test_xorv(a0, a1, (byte)BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_xorv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)^BIT_MASK));
+      }
+      test_xora(a0, a1, a4);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_xora: ", i, a0[i], (byte)((byte)(ADD_INIT+i)^BIT_MASK));
+      }
+
+      test_sllc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<VALUE));
+      }
+      test_sllv(a0, a1, VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<VALUE));
+      }
+
+      test_srlc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>VALUE));
+      }
+      test_srlv(a0, a1, VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>VALUE));
+      }
+
+      test_srac(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>VALUE));
+      }
+      test_srav(a0, a1, VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>VALUE));
+      }
+
+      test_sllc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<(-VALUE)));
+      }
+      test_sllv(a0, a1, -VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<(-VALUE)));
+      }
+
+      test_srlc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>(-VALUE)));
+      }
+      test_srlv(a0, a1, -VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>(-VALUE)));
+      }
+
+      test_srac_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-VALUE)));
+      }
+      test_srav(a0, a1, -VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-VALUE)));
+      }
+
+      test_sllc_o(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<SHIFT));
+      }
+      test_sllv(a0, a1, SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<SHIFT));
+      }
+
+      test_srlc_o(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>SHIFT));
+      }
+      test_srlv(a0, a1, SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>SHIFT));
+      }
+
+      test_srac_o(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>SHIFT));
+      }
+      test_srav(a0, a1, SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>SHIFT));
+      }
+
+      test_sllc_on(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<(-SHIFT)));
+      }
+      test_sllv(a0, a1, -SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<(-SHIFT)));
+      }
+
+      test_srlc_on(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>(-SHIFT)));
+      }
+      test_srlv(a0, a1, -SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>(-SHIFT)));
+      }
+
+      test_srac_on(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-SHIFT)));
+      }
+      test_srav(a0, a1, -SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-SHIFT)));
+      }
+
+      test_pack2(p2, a1);
+      for (int i=0; i<ARRLEN/2; i++) {
+        errn += verify("test_pack2: ", i, p2[i], (short)(((short)(ADD_INIT+2*i) & 0xFF) | ((short)(ADD_INIT+2*i+1) << 8)));
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a0[i] = -1;
+      }
+      test_unpack2(a0, p2);
+      for (int i=0; i<(ARRLEN&(-2)); i++) {
+        errn += verify("test_unpack2: ", i, a0[i], (byte)(ADD_INIT+i));
+      }
+
+      test_pack2_swap(p2, a1);
+      for (int i=0; i<ARRLEN/2; i++) {
+        errn += verify("test_pack2_swap: ", i, p2[i], (short)(((short)(ADD_INIT+2*i+1) & 0xFF) | ((short)(ADD_INIT+2*i) << 8)));
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a0[i] = -1;
+      }
+      test_unpack2_swap(a0, p2);
+      for (int i=0; i<(ARRLEN&(-2)); i++) {
+        errn += verify("test_unpack2_swap: ", i, a0[i], (byte)(ADD_INIT+i));
+      }
+
+      test_pack4(p4, a1);
+      for (int i=0; i<ARRLEN/4; i++) {
+        errn += verify("test_pack4: ", i, p4[i],  ((int)(ADD_INIT+4*i+0) & 0xFF) |
+                                                 (((int)(ADD_INIT+4*i+1) & 0xFF) <<  8)  |
+                                                 (((int)(ADD_INIT+4*i+2) & 0xFF) << 16)  |
+                                                 (((int)(ADD_INIT+4*i+3) & 0xFF) << 24));
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a0[i] = -1;
+      }
+      test_unpack4(a0, p4);
+      for (int i=0; i<(ARRLEN&(-4)); i++) {
+        errn += verify("test_unpack4: ", i, a0[i], (byte)(ADD_INIT+i));
+      }
+
+      test_pack4_swap(p4, a1);
+      for (int i=0; i<ARRLEN/4; i++) {
+        errn += verify("test_pack4_swap: ", i, p4[i],  ((int)(ADD_INIT+4*i+3) & 0xFF) |
+                                                      (((int)(ADD_INIT+4*i+2) & 0xFF) <<  8)  |
+                                                      (((int)(ADD_INIT+4*i+1) & 0xFF) << 16)  |
+                                                      (((int)(ADD_INIT+4*i+0) & 0xFF) << 24));
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a0[i] = -1;
+      }
+      test_unpack4_swap(a0, p4);
+      for (int i=0; i<(ARRLEN&(-4)); i++) {
+        errn += verify("test_unpack4_swap: ", i, a0[i], (byte)(ADD_INIT+i));
+      }
+
+      test_pack8(p8, a1);
+      for (int i=0; i<ARRLEN/8; i++) {
+        errn += verify("test_pack8: ", i, p8[i],  ((long)(ADD_INIT+8*i+0) & 0xFFl) |
+                                                 (((long)(ADD_INIT+8*i+1) & 0xFFl) <<  8)  |
+                                                 (((long)(ADD_INIT+8*i+2) & 0xFFl) << 16)  |
+                                                 (((long)(ADD_INIT+8*i+3) & 0xFFl) << 24)  |
+                                                 (((long)(ADD_INIT+8*i+4) & 0xFFl) << 32)  |
+                                                 (((long)(ADD_INIT+8*i+5) & 0xFFl) << 40)  |
+                                                 (((long)(ADD_INIT+8*i+6) & 0xFFl) << 48)  |
+                                                 (((long)(ADD_INIT+8*i+7) & 0xFFl) << 56));
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a0[i] = -1;
+      }
+      test_unpack8(a0, p8);
+      for (int i=0; i<(ARRLEN&(-8)); i++) {
+        errn += verify("test_unpack8: ", i, a0[i], (byte)(ADD_INIT+i));
+      }
+
+      test_pack8_swap(p8, a1);
+      for (int i=0; i<ARRLEN/8; i++) {
+        errn += verify("test_pack8_swap: ", i, p8[i],  ((long)(ADD_INIT+8*i+7) & 0xFFl) |
+                                                      (((long)(ADD_INIT+8*i+6) & 0xFFl) <<  8)  |
+                                                      (((long)(ADD_INIT+8*i+5) & 0xFFl) << 16)  |
+                                                      (((long)(ADD_INIT+8*i+4) & 0xFFl) << 24)  |
+                                                      (((long)(ADD_INIT+8*i+3) & 0xFFl) << 32)  |
+                                                      (((long)(ADD_INIT+8*i+2) & 0xFFl) << 40)  |
+                                                      (((long)(ADD_INIT+8*i+1) & 0xFFl) << 48)  |
+                                                      (((long)(ADD_INIT+8*i+0) & 0xFFl) << 56));
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a0[i] = -1;
+      }
+      test_unpack8_swap(a0, p8);
+      for (int i=0; i<(ARRLEN&(-8)); i++) {
+        errn += verify("test_unpack8_swap: ", i, a0[i], (byte)(ADD_INIT+i));
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sum(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sum: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addv(a0, a1, (byte)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_adda(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_adda: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_subc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_subc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_subv(a0, a1, (byte)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_subv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_suba(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_suba: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulv(a0, a1, (byte)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mula(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mula: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divv(a0, a1, (byte)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_diva(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_diva: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulv(a0, a1, (byte)-VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulv_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mula(a0, a1, a3);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mula_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divv(a0, a1, (byte)-VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divv_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_diva(a0, a1, a3);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_diva_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_andc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_andc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_andv(a0, a1, (byte)BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_andv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_anda(a0, a1, a4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_anda: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_orc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_orc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_orv(a0, a1, (byte)BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_orv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ora(a0, a1, a4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ora: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_xorc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_xorc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_xorv(a0, a1, (byte)BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_xorv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_xora(a0, a1, a4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_xora: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_o(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_o: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_o: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_o(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_o: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_o: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_o(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_o: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_o: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_on(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_on: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, -SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_on: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_on(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_on: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, -SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_on: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_on(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_on: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, -SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_on: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_pack2(p2, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_pack2: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unpack2(a0, p2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unpack2: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_pack2_swap(p2, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_pack2_swap: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unpack2_swap(a0, p2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unpack2_swap: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_pack4(p4, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_pack4: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unpack4(a0, p4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unpack4: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_pack4_swap(p4, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_pack4_swap: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unpack4_swap(a0, p4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unpack4_swap: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_pack8(p8, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_pack8: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unpack8(a0, p8);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unpack8: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_pack8_swap(p8, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_pack8_swap: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unpack8_swap(a0, p8);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unpack8_swap: " + (end - start));
+
+    return errn;
+  }
+
+  static int test_sum(byte[] a1) {
+    int sum = 0;
+    for (int i = 0; i < a1.length; i+=1) {
+      sum += a1[i];
+    }
+    return sum;
+  }
+
+  static void test_addc(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]+VALUE);
+    }
+  }
+  static void test_addv(byte[] a0, byte[] a1, byte b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]+b);
+    }
+  }
+  static void test_adda(byte[] a0, byte[] a1, byte[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]+a2[i]);
+    }
+  }
+
+  static void test_subc(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]-VALUE);
+    }
+  }
+  static void test_subv(byte[] a0, byte[] a1, byte b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]-b);
+    }
+  }
+  static void test_suba(byte[] a0, byte[] a1, byte[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]-a2[i]);
+    }
+  }
+
+  static void test_mulc(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]*VALUE);
+    }
+  }
+  static void test_mulc_n(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]*(-VALUE));
+    }
+  }
+  static void test_mulv(byte[] a0, byte[] a1, byte b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]*b);
+    }
+  }
+  static void test_mula(byte[] a0, byte[] a1, byte[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]*a2[i]);
+    }
+  }
+
+  static void test_divc(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]/VALUE);
+    }
+  }
+  static void test_divc_n(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]/(-VALUE));
+    }
+  }
+  static void test_divv(byte[] a0, byte[] a1, byte b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]/b);
+    }
+  }
+  static void test_diva(byte[] a0, byte[] a1, byte[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]/a2[i]);
+    }
+  }
+
+  static void test_andc(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]&BIT_MASK);
+    }
+  }
+  static void test_andv(byte[] a0, byte[] a1, byte b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]&b);
+    }
+  }
+  static void test_anda(byte[] a0, byte[] a1, byte[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]&a2[i]);
+    }
+  }
+
+  static void test_orc(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]|BIT_MASK);
+    }
+  }
+  static void test_orv(byte[] a0, byte[] a1, byte b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]|b);
+    }
+  }
+  static void test_ora(byte[] a0, byte[] a1, byte[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]|a2[i]);
+    }
+  }
+
+  static void test_xorc(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]^BIT_MASK);
+    }
+  }
+  static void test_xorv(byte[] a0, byte[] a1, byte b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]^b);
+    }
+  }
+  static void test_xora(byte[] a0, byte[] a1, byte[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]^a2[i]);
+    }
+  }
+
+  static void test_sllc(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]<<VALUE);
+    }
+  }
+  static void test_sllc_n(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]<<(-VALUE));
+    }
+  }
+  static void test_sllc_o(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]<<SHIFT);
+    }
+  }
+  static void test_sllc_on(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]<<(-SHIFT));
+    }
+  }
+  static void test_sllv(byte[] a0, byte[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]<<b);
+    }
+  }
+
+  static void test_srlc(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]>>>VALUE);
+    }
+  }
+  static void test_srlc_n(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]>>>(-VALUE));
+    }
+  }
+  static void test_srlc_o(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]>>>SHIFT);
+    }
+  }
+  static void test_srlc_on(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]>>>(-SHIFT));
+    }
+  }
+  static void test_srlv(byte[] a0, byte[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]>>>b);
+    }
+  }
+
+  static void test_srac(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]>>VALUE);
+    }
+  }
+  static void test_srac_n(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]>>(-VALUE));
+    }
+  }
+  static void test_srac_o(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]>>SHIFT);
+    }
+  }
+  static void test_srac_on(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]>>(-SHIFT));
+    }
+  }
+  static void test_srav(byte[] a0, byte[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]>>b);
+    }
+  }
+
+  static void test_pack2(short[] p2, byte[] a1) {
+    if (p2.length*2 > a1.length) return;
+    for (int i = 0; i < p2.length; i+=1) {
+      short l0 = (short)a1[i*2+0];
+      short l1 = (short)a1[i*2+1];
+      p2[i] = (short)((l1 << 8) | (l0 & 0xFF));
+    }
+  }
+  static void test_unpack2(byte[] a0, short[] p2) {
+    if (p2.length*2 > a0.length) return;
+    for (int i = 0; i < p2.length; i+=1) {
+      short l = p2[i];
+      a0[i*2+0] = (byte)(l & 0xFF);
+      a0[i*2+1] = (byte)(l >> 8);
+    }
+  }
+  static void test_pack2_swap(short[] p2, byte[] a1) {
+    if (p2.length*2 > a1.length) return;
+    for (int i = 0; i < p2.length; i+=1) {
+      short l0 = (short)a1[i*2+0];
+      short l1 = (short)a1[i*2+1];
+      p2[i] = (short)((l0 << 8) | (l1 & 0xFF));
+    }
+  }
+  static void test_unpack2_swap(byte[] a0, short[] p2) {
+    if (p2.length*2 > a0.length) return;
+    for (int i = 0; i < p2.length; i+=1) {
+      short l = p2[i];
+      a0[i*2+0] = (byte)(l >> 8);
+      a0[i*2+1] = (byte)(l & 0xFF);
+    }
+  }
+
+  static void test_pack4(int[] p4, byte[] a1) {
+    if (p4.length*4 > a1.length) return;
+    for (int i = 0; i < p4.length; i+=1) {
+      int l0 = (int)a1[i*4+0];
+      int l1 = (int)a1[i*4+1];
+      int l2 = (int)a1[i*4+2];
+      int l3 = (int)a1[i*4+3];
+      p4[i] = (l0 & 0xFF) |
+             ((l1 & 0xFF) <<  8) |
+             ((l2 & 0xFF) << 16) |
+             ((l3 & 0xFF) << 24);
+    }
+  }
+  static void test_unpack4(byte[] a0, int[] p4) {
+    if (p4.length*4 > a0.length) return;
+    for (int i = 0; i < p4.length; i+=1) {
+      int l = p4[i];
+      a0[i*4+0] = (byte)(l & 0xFF);
+      a0[i*4+1] = (byte)(l >>  8);
+      a0[i*4+2] = (byte)(l >> 16);
+      a0[i*4+3] = (byte)(l >> 24);
+    }
+  }
+  static void test_pack4_swap(int[] p4, byte[] a1) {
+    if (p4.length*4 > a1.length) return;
+    for (int i = 0; i < p4.length; i+=1) {
+      int l0 = (int)a1[i*4+0];
+      int l1 = (int)a1[i*4+1];
+      int l2 = (int)a1[i*4+2];
+      int l3 = (int)a1[i*4+3];
+      p4[i] = (l3 & 0xFF) |
+             ((l2 & 0xFF) <<  8) |
+             ((l1 & 0xFF) << 16) |
+             ((l0 & 0xFF) << 24);
+    }
+  }
+  static void test_unpack4_swap(byte[] a0, int[] p4) {
+    if (p4.length*4 > a0.length) return;
+    for (int i = 0; i < p4.length; i+=1) {
+      int l = p4[i];
+      a0[i*4+0] = (byte)(l >> 24);
+      a0[i*4+1] = (byte)(l >> 16);
+      a0[i*4+2] = (byte)(l >>  8);
+      a0[i*4+3] = (byte)(l & 0xFF);
+    }
+  }
+
+  static void test_pack8(long[] p8, byte[] a1) {
+    if (p8.length*8 > a1.length) return;
+    for (int i = 0; i < p8.length; i+=1) {
+      long l0 = (long)a1[i*8+0];
+      long l1 = (long)a1[i*8+1];
+      long l2 = (long)a1[i*8+2];
+      long l3 = (long)a1[i*8+3];
+      long l4 = (long)a1[i*8+4];
+      long l5 = (long)a1[i*8+5];
+      long l6 = (long)a1[i*8+6];
+      long l7 = (long)a1[i*8+7];
+      p8[i] = (l0 & 0xFFl) |
+             ((l1 & 0xFFl) <<  8) |
+             ((l2 & 0xFFl) << 16) |
+             ((l3 & 0xFFl) << 24) |
+             ((l4 & 0xFFl) << 32) |
+             ((l5 & 0xFFl) << 40) |
+             ((l6 & 0xFFl) << 48) |
+             ((l7 & 0xFFl) << 56);
+    }
+  }
+  static void test_unpack8(byte[] a0, long[] p8) {
+    if (p8.length*8 > a0.length) return;
+    for (int i = 0; i < p8.length; i+=1) {
+      long l = p8[i];
+      a0[i*8+0] = (byte)(l & 0xFFl);
+      a0[i*8+1] = (byte)(l >>  8);
+      a0[i*8+2] = (byte)(l >> 16);
+      a0[i*8+3] = (byte)(l >> 24);
+      a0[i*8+4] = (byte)(l >> 32);
+      a0[i*8+5] = (byte)(l >> 40);
+      a0[i*8+6] = (byte)(l >> 48);
+      a0[i*8+7] = (byte)(l >> 56);
+    }
+  }
+  static void test_pack8_swap(long[] p8, byte[] a1) {
+    if (p8.length*8 > a1.length) return;
+    for (int i = 0; i < p8.length; i+=1) {
+      long l0 = (long)a1[i*8+0];
+      long l1 = (long)a1[i*8+1];
+      long l2 = (long)a1[i*8+2];
+      long l3 = (long)a1[i*8+3];
+      long l4 = (long)a1[i*8+4];
+      long l5 = (long)a1[i*8+5];
+      long l6 = (long)a1[i*8+6];
+      long l7 = (long)a1[i*8+7];
+      p8[i] = (l7 & 0xFFl) |
+             ((l6 & 0xFFl) <<  8) |
+             ((l5 & 0xFFl) << 16) |
+             ((l4 & 0xFFl) << 24) |
+             ((l3 & 0xFFl) << 32) |
+             ((l2 & 0xFFl) << 40) |
+             ((l1 & 0xFFl) << 48) |
+             ((l0 & 0xFFl) << 56);
+    }
+  }
+  static void test_unpack8_swap(byte[] a0, long[] p8) {
+    if (p8.length*8 > a0.length) return;
+    for (int i = 0; i < p8.length; i+=1) {
+      long l = p8[i];
+      a0[i*8+0] = (byte)(l >> 56);
+      a0[i*8+1] = (byte)(l >> 48);
+      a0[i*8+2] = (byte)(l >> 40);
+      a0[i*8+3] = (byte)(l >> 32);
+      a0[i*8+4] = (byte)(l >> 24);
+      a0[i*8+5] = (byte)(l >> 16);
+      a0[i*8+6] = (byte)(l >>  8);
+      a0[i*8+7] = (byte)(l & 0xFFl);
+    }
+  }
+
+  static int verify(String text, int i, byte elem, byte val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+
+  static int verify(String text, int i, short elem, short val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+
+  static int verify(String text, int i, int elem, int val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + Integer.toHexString(elem) + " != " + Integer.toHexString(val));
+      return 1;
+    }
+    return 0;
+  }
+
+  static int verify(String text, int i, long elem, long val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + Long.toHexString(elem) + " != " + Long.toHexString(val));
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6340864/TestDoubleVect.java	Fri Aug 24 15:51:19 2012 -0700
@@ -0,0 +1,560 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 6340864
+ * @summary Implement vectorization optimizations in hotspot-server
+ *
+ * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestDoubleVect
+ */
+
+public class TestDoubleVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final double ADD_INIT = -7500.;
+  private static final double VALUE = 15.;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Double vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    double[] a0 = new double[ARRLEN];
+    double[] a1 = new double[ARRLEN];
+    double[] a2 = new double[ARRLEN];
+    double[] a3 = new double[ARRLEN];
+    // Initialize
+    double gold_sum = 0;
+    for (int i=0; i<ARRLEN; i++) {
+      double val = ADD_INIT+(double)i;
+      gold_sum += val;
+      a1[i] = val;
+      a2[i] = VALUE;
+      a3[i] = -VALUE;
+    }
+
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_sum(a1);
+      test_addc(a0, a1);
+      test_addv(a0, a1, VALUE);
+      test_adda(a0, a1, a2);
+      test_subc(a0, a1);
+      test_subv(a0, a1, VALUE);
+      test_suba(a0, a1, a2);
+      test_mulc(a0, a1);
+      test_mulv(a0, a1, VALUE);
+      test_mula(a0, a1, a2);
+      test_divc(a0, a1);
+      test_divv(a0, a1, VALUE);
+      test_diva(a0, a1, a2);
+      test_mulc_n(a0, a1);
+      test_mulv(a0, a1, -VALUE);
+      test_mula(a0, a1, a3);
+      test_divc_n(a0, a1);
+      test_divv(a0, a1, -VALUE);
+      test_diva(a0, a1, a3);
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      double sum = test_sum(a1);
+      if (sum != gold_sum) {
+        System.err.println("test_sum:  " + sum + " != " + gold_sum);
+        errn++;
+      }
+      // Overwrite with NaN values
+      a1[0] = Double.NaN;
+      a1[1] = Double.POSITIVE_INFINITY;
+      a1[2] = Double.NEGATIVE_INFINITY;
+      a1[3] = Double.MAX_VALUE;
+      a1[4] = Double.MIN_VALUE;
+      a1[5] = Double.MIN_NORMAL;
+
+      a2[6] = a1[0];
+      a2[7] = a1[1];
+      a2[8] = a1[2];
+      a2[9] = a1[3];
+      a2[10] = a1[4];
+      a2[11] = a1[5];
+
+      a3[6] = -a2[6];
+      a3[7] = -a2[7];
+      a3[8] = -a2[8];
+      a3[9] = -a2[9];
+      a3[10] = -a2[10];
+      a3[11] = -a2[11];
+
+      test_addc(a0, a1);
+      errn += verify("test_addc: ", 0, a0[0], (Double.NaN+VALUE));
+      errn += verify("test_addc: ", 1, a0[1], (Double.POSITIVE_INFINITY+VALUE));
+      errn += verify("test_addc: ", 2, a0[2], (Double.NEGATIVE_INFINITY+VALUE));
+      errn += verify("test_addc: ", 3, a0[3], (Double.MAX_VALUE+VALUE));
+      errn += verify("test_addc: ", 4, a0[4], (Double.MIN_VALUE+VALUE));
+      errn += verify("test_addc: ", 5, a0[5], (Double.MIN_NORMAL+VALUE));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_addc: ", i, a0[i], ((ADD_INIT+i)+VALUE));
+      }
+      test_addv(a0, a1, VALUE);
+      errn += verify("test_addv: ", 0, a0[0], (Double.NaN+VALUE));
+      errn += verify("test_addv: ", 1, a0[1], (Double.POSITIVE_INFINITY+VALUE));
+      errn += verify("test_addv: ", 2, a0[2], (Double.NEGATIVE_INFINITY+VALUE));
+      errn += verify("test_addv: ", 3, a0[3], (Double.MAX_VALUE+VALUE));
+      errn += verify("test_addv: ", 4, a0[4], (Double.MIN_VALUE+VALUE));
+      errn += verify("test_addv: ", 5, a0[5], (Double.MIN_NORMAL+VALUE));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_addv: ", i, a0[i], ((ADD_INIT+i)+VALUE));
+      }
+      test_adda(a0, a1, a2);
+      errn += verify("test_adda: ", 0, a0[0], (Double.NaN+VALUE));
+      errn += verify("test_adda: ", 1, a0[1], (Double.POSITIVE_INFINITY+VALUE));
+      errn += verify("test_adda: ", 2, a0[2], (Double.NEGATIVE_INFINITY+VALUE));
+      errn += verify("test_adda: ", 3, a0[3], (Double.MAX_VALUE+VALUE));
+      errn += verify("test_adda: ", 4, a0[4], (Double.MIN_VALUE+VALUE));
+      errn += verify("test_adda: ", 5, a0[5], (Double.MIN_NORMAL+VALUE));
+      errn += verify("test_adda: ", 6, a0[6], ((ADD_INIT+6)+Double.NaN));
+      errn += verify("test_adda: ", 7, a0[7], ((ADD_INIT+7)+Double.POSITIVE_INFINITY));
+      errn += verify("test_adda: ", 8, a0[8], ((ADD_INIT+8)+Double.NEGATIVE_INFINITY));
+      errn += verify("test_adda: ", 9, a0[9], ((ADD_INIT+9)+Double.MAX_VALUE));
+      errn += verify("test_adda: ", 10, a0[10], ((ADD_INIT+10)+Double.MIN_VALUE));
+      errn += verify("test_adda: ", 11, a0[11], ((ADD_INIT+11)+Double.MIN_NORMAL));
+      for (int i=12; i<ARRLEN; i++) {
+        errn += verify("test_adda: ", i, a0[i], ((ADD_INIT+i)+VALUE));
+      }
+
+      test_subc(a0, a1);
+      errn += verify("test_subc: ", 0, a0[0], (Double.NaN-VALUE));
+      errn += verify("test_subc: ", 1, a0[1], (Double.POSITIVE_INFINITY-VALUE));
+      errn += verify("test_subc: ", 2, a0[2], (Double.NEGATIVE_INFINITY-VALUE));
+      errn += verify("test_subc: ", 3, a0[3], (Double.MAX_VALUE-VALUE));
+      errn += verify("test_subc: ", 4, a0[4], (Double.MIN_VALUE-VALUE));
+      errn += verify("test_subc: ", 5, a0[5], (Double.MIN_NORMAL-VALUE));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_subc: ", i, a0[i], ((ADD_INIT+i)-VALUE));
+      }
+      test_subv(a0, a1, VALUE);
+      errn += verify("test_subv: ", 0, a0[0], (Double.NaN-VALUE));
+      errn += verify("test_subv: ", 1, a0[1], (Double.POSITIVE_INFINITY-VALUE));
+      errn += verify("test_subv: ", 2, a0[2], (Double.NEGATIVE_INFINITY-VALUE));
+      errn += verify("test_subv: ", 3, a0[3], (Double.MAX_VALUE-VALUE));
+      errn += verify("test_subv: ", 4, a0[4], (Double.MIN_VALUE-VALUE));
+      errn += verify("test_subv: ", 5, a0[5], (Double.MIN_NORMAL-VALUE));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_subv: ", i, a0[i], ((ADD_INIT+i)-VALUE));
+      }
+      test_suba(a0, a1, a2);
+      errn += verify("test_suba: ", 0, a0[0], (Double.NaN-VALUE));
+      errn += verify("test_suba: ", 1, a0[1], (Double.POSITIVE_INFINITY-VALUE));
+      errn += verify("test_suba: ", 2, a0[2], (Double.NEGATIVE_INFINITY-VALUE));
+      errn += verify("test_suba: ", 3, a0[3], (Double.MAX_VALUE-VALUE));
+      errn += verify("test_suba: ", 4, a0[4], (Double.MIN_VALUE-VALUE));
+      errn += verify("test_suba: ", 5, a0[5], (Double.MIN_NORMAL-VALUE));
+      errn += verify("test_suba: ", 6, a0[6], ((ADD_INIT+6)-Double.NaN));
+      errn += verify("test_suba: ", 7, a0[7], ((ADD_INIT+7)-Double.POSITIVE_INFINITY));
+      errn += verify("test_suba: ", 8, a0[8], ((ADD_INIT+8)-Double.NEGATIVE_INFINITY));
+      errn += verify("test_suba: ", 9, a0[9], ((ADD_INIT+9)-Double.MAX_VALUE));
+      errn += verify("test_suba: ", 10, a0[10], ((ADD_INIT+10)-Double.MIN_VALUE));
+      errn += verify("test_suba: ", 11, a0[11], ((ADD_INIT+11)-Double.MIN_NORMAL));
+      for (int i=12; i<ARRLEN; i++) {
+        errn += verify("test_suba: ", i, a0[i], ((ADD_INIT+i)-VALUE));
+      }
+
+      test_mulc(a0, a1);
+      errn += verify("test_mulc: ", 0, a0[0], (Double.NaN*VALUE));
+      errn += verify("test_mulc: ", 1, a0[1], (Double.POSITIVE_INFINITY*VALUE));
+      errn += verify("test_mulc: ", 2, a0[2], (Double.NEGATIVE_INFINITY*VALUE));
+      errn += verify("test_mulc: ", 3, a0[3], (Double.MAX_VALUE*VALUE));
+      errn += verify("test_mulc: ", 4, a0[4], (Double.MIN_VALUE*VALUE));
+      errn += verify("test_mulc: ", 5, a0[5], (Double.MIN_NORMAL*VALUE));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_mulc: ", i, a0[i], ((ADD_INIT+i)*VALUE));
+      }
+      test_mulv(a0, a1, VALUE);
+      errn += verify("test_mulv: ", 0, a0[0], (Double.NaN*VALUE));
+      errn += verify("test_mulv: ", 1, a0[1], (Double.POSITIVE_INFINITY*VALUE));
+      errn += verify("test_mulv: ", 2, a0[2], (Double.NEGATIVE_INFINITY*VALUE));
+      errn += verify("test_mulv: ", 3, a0[3], (Double.MAX_VALUE*VALUE));
+      errn += verify("test_mulv: ", 4, a0[4], (Double.MIN_VALUE*VALUE));
+      errn += verify("test_mulv: ", 5, a0[5], (Double.MIN_NORMAL*VALUE));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_mulv: ", i, a0[i], ((ADD_INIT+i)*VALUE));
+      }
+      test_mula(a0, a1, a2);
+      errn += verify("test_mula: ", 0, a0[0], (Double.NaN*VALUE));
+      errn += verify("test_mula: ", 1, a0[1], (Double.POSITIVE_INFINITY*VALUE));
+      errn += verify("test_mula: ", 2, a0[2], (Double.NEGATIVE_INFINITY*VALUE));
+      errn += verify("test_mula: ", 3, a0[3], (Double.MAX_VALUE*VALUE));
+      errn += verify("test_mula: ", 4, a0[4], (Double.MIN_VALUE*VALUE));
+      errn += verify("test_mula: ", 5, a0[5], (Double.MIN_NORMAL*VALUE));
+      errn += verify("test_mula: ", 6, a0[6], ((ADD_INIT+6)*Double.NaN));
+      errn += verify("test_mula: ", 7, a0[7], ((ADD_INIT+7)*Double.POSITIVE_INFINITY));
+      errn += verify("test_mula: ", 8, a0[8], ((ADD_INIT+8)*Double.NEGATIVE_INFINITY));
+      errn += verify("test_mula: ", 9, a0[9], ((ADD_INIT+9)*Double.MAX_VALUE));
+      errn += verify("test_mula: ", 10, a0[10], ((ADD_INIT+10)*Double.MIN_VALUE));
+      errn += verify("test_mula: ", 11, a0[11], ((ADD_INIT+11)*Double.MIN_NORMAL));
+      for (int i=12; i<ARRLEN; i++) {
+        errn += verify("test_mula: ", i, a0[i], ((ADD_INIT+i)*VALUE));
+      }
+
+      test_divc(a0, a1);
+      errn += verify("test_divc: ", 0, a0[0], (Double.NaN/VALUE));
+      errn += verify("test_divc: ", 1, a0[1], (Double.POSITIVE_INFINITY/VALUE));
+      errn += verify("test_divc: ", 2, a0[2], (Double.NEGATIVE_INFINITY/VALUE));
+      errn += verify("test_divc: ", 3, a0[3], (Double.MAX_VALUE/VALUE));
+      errn += verify("test_divc: ", 4, a0[4], (Double.MIN_VALUE/VALUE));
+      errn += verify("test_divc: ", 5, a0[5], (Double.MIN_NORMAL/VALUE));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_divc: ", i, a0[i], ((ADD_INIT+i)/VALUE));
+      }
+      test_divv(a0, a1, VALUE);
+      errn += verify("test_divv: ", 0, a0[0], (Double.NaN/VALUE));
+      errn += verify("test_divv: ", 1, a0[1], (Double.POSITIVE_INFINITY/VALUE));
+      errn += verify("test_divv: ", 2, a0[2], (Double.NEGATIVE_INFINITY/VALUE));
+      errn += verify("test_divv: ", 3, a0[3], (Double.MAX_VALUE/VALUE));
+      errn += verify("test_divv: ", 4, a0[4], (Double.MIN_VALUE/VALUE));
+      errn += verify("test_divv: ", 5, a0[5], (Double.MIN_NORMAL/VALUE));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_divv: ", i, a0[i], ((ADD_INIT+i)/VALUE));
+      }
+      test_diva(a0, a1, a2);
+      errn += verify("test_diva: ", 0, a0[0], (Double.NaN/VALUE));
+      errn += verify("test_diva: ", 1, a0[1], (Double.POSITIVE_INFINITY/VALUE));
+      errn += verify("test_diva: ", 2, a0[2], (Double.NEGATIVE_INFINITY/VALUE));
+      errn += verify("test_diva: ", 3, a0[3], (Double.MAX_VALUE/VALUE));
+      errn += verify("test_diva: ", 4, a0[4], (Double.MIN_VALUE/VALUE));
+      errn += verify("test_diva: ", 5, a0[5], (Double.MIN_NORMAL/VALUE));
+      errn += verify("test_diva: ", 6, a0[6], ((ADD_INIT+6)/Double.NaN));
+      errn += verify("test_diva: ", 7, a0[7], ((ADD_INIT+7)/Double.POSITIVE_INFINITY));
+      errn += verify("test_diva: ", 8, a0[8], ((ADD_INIT+8)/Double.NEGATIVE_INFINITY));
+      errn += verify("test_diva: ", 9, a0[9], ((ADD_INIT+9)/Double.MAX_VALUE));
+      errn += verify("test_diva: ", 10, a0[10], ((ADD_INIT+10)/Double.MIN_VALUE));
+      errn += verify("test_diva: ", 11, a0[11], ((ADD_INIT+11)/Double.MIN_NORMAL));
+      for (int i=12; i<ARRLEN; i++) {
+        errn += verify("test_diva: ", i, a0[i], ((ADD_INIT+i)/VALUE));
+      }
+
+      test_mulc_n(a0, a1);
+      errn += verify("test_mulc_n: ", 0, a0[0], (Double.NaN*(-VALUE)));
+      errn += verify("test_mulc_n: ", 1, a0[1], (Double.POSITIVE_INFINITY*(-VALUE)));
+      errn += verify("test_mulc_n: ", 2, a0[2], (Double.NEGATIVE_INFINITY*(-VALUE)));
+      errn += verify("test_mulc_n: ", 3, a0[3], (Double.MAX_VALUE*(-VALUE)));
+      errn += verify("test_mulc_n: ", 4, a0[4], (Double.MIN_VALUE*(-VALUE)));
+      errn += verify("test_mulc_n: ", 5, a0[5], (Double.MIN_NORMAL*(-VALUE)));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_mulc_n: ", i, a0[i], ((ADD_INIT+i)*(-VALUE)));
+      }
+      test_mulv(a0, a1, -VALUE);
+      errn += verify("test_mulv_n: ", 0, a0[0], (Double.NaN*(-VALUE)));
+      errn += verify("test_mulv_n: ", 1, a0[1], (Double.POSITIVE_INFINITY*(-VALUE)));
+      errn += verify("test_mulv_n: ", 2, a0[2], (Double.NEGATIVE_INFINITY*(-VALUE)));
+      errn += verify("test_mulv_n: ", 3, a0[3], (Double.MAX_VALUE*(-VALUE)));
+      errn += verify("test_mulv_n: ", 4, a0[4], (Double.MIN_VALUE*(-VALUE)));
+      errn += verify("test_mulv_n: ", 5, a0[5], (Double.MIN_NORMAL*(-VALUE)));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_mulv_n: ", i, a0[i], ((ADD_INIT+i)*(-VALUE)));
+      }
+      test_mula(a0, a1, a3);
+      errn += verify("test_mula_n: ", 0, a0[0], (Double.NaN*(-VALUE)));
+      errn += verify("test_mula_n: ", 1, a0[1], (Double.POSITIVE_INFINITY*(-VALUE)));
+      errn += verify("test_mula_n: ", 2, a0[2], (Double.NEGATIVE_INFINITY*(-VALUE)));
+      errn += verify("test_mula_n: ", 3, a0[3], (Double.MAX_VALUE*(-VALUE)));
+      errn += verify("test_mula_n: ", 4, a0[4], (Double.MIN_VALUE*(-VALUE)));
+      errn += verify("test_mula_n: ", 5, a0[5], (Double.MIN_NORMAL*(-VALUE)));
+      errn += verify("test_mula_n: ", 6, a0[6], ((ADD_INIT+6)*(-Double.NaN)));
+      errn += verify("test_mula_n: ", 7, a0[7], ((ADD_INIT+7)*(-Double.POSITIVE_INFINITY)));
+      errn += verify("test_mula_n: ", 8, a0[8], ((ADD_INIT+8)*(-Double.NEGATIVE_INFINITY)));
+      errn += verify("test_mula_n: ", 9, a0[9], ((ADD_INIT+9)*(-Double.MAX_VALUE)));
+      errn += verify("test_mula_n: ", 10, a0[10], ((ADD_INIT+10)*(-Double.MIN_VALUE)));
+      errn += verify("test_mula_n: ", 11, a0[11], ((ADD_INIT+11)*(-Double.MIN_NORMAL)));
+      for (int i=12; i<ARRLEN; i++) {
+        errn += verify("test_mula_n: ", i, a0[i], ((ADD_INIT+i)*(-VALUE)));
+      }
+
+      test_divc_n(a0, a1);
+      errn += verify("test_divc_n: ", 0, a0[0], (Double.NaN/(-VALUE)));
+      errn += verify("test_divc_n: ", 1, a0[1], (Double.POSITIVE_INFINITY/(-VALUE)));
+      errn += verify("test_divc_n: ", 2, a0[2], (Double.NEGATIVE_INFINITY/(-VALUE)));
+      errn += verify("test_divc_n: ", 3, a0[3], (Double.MAX_VALUE/(-VALUE)));
+      errn += verify("test_divc_n: ", 4, a0[4], (Double.MIN_VALUE/(-VALUE)));
+      errn += verify("test_divc_n: ", 5, a0[5], (Double.MIN_NORMAL/(-VALUE)));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_divc_n: ", i, a0[i], ((ADD_INIT+i)/(-VALUE)));
+      }
+      test_divv(a0, a1, -VALUE);
+      errn += verify("test_divv_n: ", 0, a0[0], (Double.NaN/(-VALUE)));
+      errn += verify("test_divv_n: ", 1, a0[1], (Double.POSITIVE_INFINITY/(-VALUE)));
+      errn += verify("test_divv_n: ", 2, a0[2], (Double.NEGATIVE_INFINITY/(-VALUE)));
+      errn += verify("test_divv_n: ", 3, a0[3], (Double.MAX_VALUE/(-VALUE)));
+      errn += verify("test_divv_n: ", 4, a0[4], (Double.MIN_VALUE/(-VALUE)));
+      errn += verify("test_divv_n: ", 5, a0[5], (Double.MIN_NORMAL/(-VALUE)));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_divv_n: ", i, a0[i], ((ADD_INIT+i)/(-VALUE)));
+      }
+      test_diva(a0, a1, a3);
+      errn += verify("test_diva_n: ", 0, a0[0], (Double.NaN/(-VALUE)));
+      errn += verify("test_diva_n: ", 1, a0[1], (Double.POSITIVE_INFINITY/(-VALUE)));
+      errn += verify("test_diva_n: ", 2, a0[2], (Double.NEGATIVE_INFINITY/(-VALUE)));
+      errn += verify("test_diva_n: ", 3, a0[3], (Double.MAX_VALUE/(-VALUE)));
+      errn += verify("test_diva_n: ", 4, a0[4], (Double.MIN_VALUE/(-VALUE)));
+      errn += verify("test_diva_n: ", 5, a0[5], (Double.MIN_NORMAL/(-VALUE)));
+      errn += verify("test_diva_n: ", 6, a0[6], ((ADD_INIT+6)/(-Double.NaN)));
+      errn += verify("test_diva_n: ", 7, a0[7], ((ADD_INIT+7)/(-Double.POSITIVE_INFINITY)));
+      errn += verify("test_diva_n: ", 8, a0[8], ((ADD_INIT+8)/(-Double.NEGATIVE_INFINITY)));
+      errn += verify("test_diva_n: ", 9, a0[9], ((ADD_INIT+9)/(-Double.MAX_VALUE)));
+      errn += verify("test_diva_n: ", 10, a0[10], ((ADD_INIT+10)/(-Double.MIN_VALUE)));
+      errn += verify("test_diva_n: ", 11, a0[11], ((ADD_INIT+11)/(-Double.MIN_NORMAL)));
+      for (int i=12; i<ARRLEN; i++) {
+        errn += verify("test_diva_n: ", i, a0[i], ((ADD_INIT+i)/(-VALUE)));
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sum(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sum: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_adda(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_adda: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_subc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_subc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_subv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_subv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_suba(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_suba: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mula(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mula: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_diva(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_diva: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulv(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulv_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mula(a0, a1, a3);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mula_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divv(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divv_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_diva(a0, a1, a3);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_diva_n: " + (end - start));
+
+    return errn;
+  }
+
+  static double test_sum(double[] a1) {
+    double sum = 0;
+    for (int i = 0; i < a1.length; i+=1) {
+      sum += a1[i];
+    }
+    return sum;
+  }
+
+  static void test_addc(double[] a0, double[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]+VALUE);
+    }
+  }
+  static void test_addv(double[] a0, double[] a1, double b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]+b);
+    }
+  }
+  static void test_adda(double[] a0, double[] a1, double[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]+a2[i]);
+    }
+  }
+
+  static void test_subc(double[] a0, double[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]-VALUE);
+    }
+  }
+  static void test_subv(double[] a0, double[] a1, double b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]-b);
+    }
+  }
+  static void test_suba(double[] a0, double[] a1, double[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]-a2[i]);
+    }
+  }
+
+  static void test_mulc(double[] a0, double[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]*VALUE);
+    }
+  }
+  static void test_mulc_n(double[] a0, double[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]*(-VALUE));
+    }
+  }
+  static void test_mulv(double[] a0, double[] a1, double b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]*b);
+    }
+  }
+  static void test_mula(double[] a0, double[] a1, double[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]*a2[i]);
+    }
+  }
+
+  static void test_divc(double[] a0, double[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]/VALUE);
+    }
+  }
+  static void test_divc_n(double[] a0, double[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]/(-VALUE));
+    }
+  }
+  static void test_divv(double[] a0, double[] a1, double b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]/b);
+    }
+  }
+  static void test_diva(double[] a0, double[] a1, double[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]/a2[i]);
+    }
+  }
+
+  static int verify(String text, int i, double elem, double val) {
+    if (elem != val && !(Double.isNaN(elem) && Double.isNaN(val))) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6340864/TestFloatVect.java	Fri Aug 24 15:51:19 2012 -0700
@@ -0,0 +1,560 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 6340864
+ * @summary Implement vectorization optimizations in hotspot-server
+ *
+ * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestFloatVect
+ */
+
+public class TestFloatVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final float ADD_INIT = -7500.f;
+  private static final float VALUE = 15.f;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Float vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    float[] a0 = new float[ARRLEN];
+    float[] a1 = new float[ARRLEN];
+    float[] a2 = new float[ARRLEN];
+    float[] a3 = new float[ARRLEN];
+    // Initialize
+    float gold_sum = 0;
+    for (int i=0; i<ARRLEN; i++) {
+      float val = ADD_INIT+(float)i;
+      gold_sum += val;
+      a1[i] = val;
+      a2[i] = VALUE;
+      a3[i] = -VALUE;
+    }
+
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_sum(a1);
+      test_addc(a0, a1);
+      test_addv(a0, a1, VALUE);
+      test_adda(a0, a1, a2);
+      test_subc(a0, a1);
+      test_subv(a0, a1, VALUE);
+      test_suba(a0, a1, a2);
+      test_mulc(a0, a1);
+      test_mulv(a0, a1, VALUE);
+      test_mula(a0, a1, a2);
+      test_divc(a0, a1);
+      test_divv(a0, a1, VALUE);
+      test_diva(a0, a1, a2);
+      test_mulc_n(a0, a1);
+      test_mulv(a0, a1, -VALUE);
+      test_mula(a0, a1, a3);
+      test_divc_n(a0, a1);
+      test_divv(a0, a1, -VALUE);
+      test_diva(a0, a1, a3);
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      float sum = test_sum(a1);
+      if (sum != gold_sum) {
+        System.err.println("test_sum:  " + sum + " != " + gold_sum);
+        errn++;
+      }
+      // Overwrite with NaN values
+      a1[0] = Float.NaN;
+      a1[1] = Float.POSITIVE_INFINITY;
+      a1[2] = Float.NEGATIVE_INFINITY;
+      a1[3] = Float.MAX_VALUE;
+      a1[4] = Float.MIN_VALUE;
+      a1[5] = Float.MIN_NORMAL;
+
+      a2[6] = a1[0];
+      a2[7] = a1[1];
+      a2[8] = a1[2];
+      a2[9] = a1[3];
+      a2[10] = a1[4];
+      a2[11] = a1[5];
+
+      a3[6] = -a2[6];
+      a3[7] = -a2[7];
+      a3[8] = -a2[8];
+      a3[9] = -a2[9];
+      a3[10] = -a2[10];
+      a3[11] = -a2[11];
+
+      test_addc(a0, a1);
+      errn += verify("test_addc: ", 0, a0[0], (Float.NaN+VALUE));
+      errn += verify("test_addc: ", 1, a0[1], (Float.POSITIVE_INFINITY+VALUE));
+      errn += verify("test_addc: ", 2, a0[2], (Float.NEGATIVE_INFINITY+VALUE));
+      errn += verify("test_addc: ", 3, a0[3], (Float.MAX_VALUE+VALUE));
+      errn += verify("test_addc: ", 4, a0[4], (Float.MIN_VALUE+VALUE));
+      errn += verify("test_addc: ", 5, a0[5], (Float.MIN_NORMAL+VALUE));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_addc: ", i, a0[i], ((ADD_INIT+i)+VALUE));
+      }
+      test_addv(a0, a1, VALUE);
+      errn += verify("test_addv: ", 0, a0[0], (Float.NaN+VALUE));
+      errn += verify("test_addv: ", 1, a0[1], (Float.POSITIVE_INFINITY+VALUE));
+      errn += verify("test_addv: ", 2, a0[2], (Float.NEGATIVE_INFINITY+VALUE));
+      errn += verify("test_addv: ", 3, a0[3], (Float.MAX_VALUE+VALUE));
+      errn += verify("test_addv: ", 4, a0[4], (Float.MIN_VALUE+VALUE));
+      errn += verify("test_addv: ", 5, a0[5], (Float.MIN_NORMAL+VALUE));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_addv: ", i, a0[i], ((ADD_INIT+i)+VALUE));
+      }
+      test_adda(a0, a1, a2);
+      errn += verify("test_adda: ", 0, a0[0], (Float.NaN+VALUE));
+      errn += verify("test_adda: ", 1, a0[1], (Float.POSITIVE_INFINITY+VALUE));
+      errn += verify("test_adda: ", 2, a0[2], (Float.NEGATIVE_INFINITY+VALUE));
+      errn += verify("test_adda: ", 3, a0[3], (Float.MAX_VALUE+VALUE));
+      errn += verify("test_adda: ", 4, a0[4], (Float.MIN_VALUE+VALUE));
+      errn += verify("test_adda: ", 5, a0[5], (Float.MIN_NORMAL+VALUE));
+      errn += verify("test_adda: ", 6, a0[6], ((ADD_INIT+6)+Float.NaN));
+      errn += verify("test_adda: ", 7, a0[7], ((ADD_INIT+7)+Float.POSITIVE_INFINITY));
+      errn += verify("test_adda: ", 8, a0[8], ((ADD_INIT+8)+Float.NEGATIVE_INFINITY));
+      errn += verify("test_adda: ", 9, a0[9], ((ADD_INIT+9)+Float.MAX_VALUE));
+      errn += verify("test_adda: ", 10, a0[10], ((ADD_INIT+10)+Float.MIN_VALUE));
+      errn += verify("test_adda: ", 11, a0[11], ((ADD_INIT+11)+Float.MIN_NORMAL));
+      for (int i=12; i<ARRLEN; i++) {
+        errn += verify("test_adda: ", i, a0[i], ((ADD_INIT+i)+VALUE));
+      }
+
+      test_subc(a0, a1);
+      errn += verify("test_subc: ", 0, a0[0], (Float.NaN-VALUE));
+      errn += verify("test_subc: ", 1, a0[1], (Float.POSITIVE_INFINITY-VALUE));
+      errn += verify("test_subc: ", 2, a0[2], (Float.NEGATIVE_INFINITY-VALUE));
+      errn += verify("test_subc: ", 3, a0[3], (Float.MAX_VALUE-VALUE));
+      errn += verify("test_subc: ", 4, a0[4], (Float.MIN_VALUE-VALUE));
+      errn += verify("test_subc: ", 5, a0[5], (Float.MIN_NORMAL-VALUE));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_subc: ", i, a0[i], ((ADD_INIT+i)-VALUE));
+      }
+      test_subv(a0, a1, VALUE);
+      errn += verify("test_subv: ", 0, a0[0], (Float.NaN-VALUE));
+      errn += verify("test_subv: ", 1, a0[1], (Float.POSITIVE_INFINITY-VALUE));
+      errn += verify("test_subv: ", 2, a0[2], (Float.NEGATIVE_INFINITY-VALUE));
+      errn += verify("test_subv: ", 3, a0[3], (Float.MAX_VALUE-VALUE));
+      errn += verify("test_subv: ", 4, a0[4], (Float.MIN_VALUE-VALUE));
+      errn += verify("test_subv: ", 5, a0[5], (Float.MIN_NORMAL-VALUE));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_subv: ", i, a0[i], ((ADD_INIT+i)-VALUE));
+      }
+      test_suba(a0, a1, a2);
+      errn += verify("test_suba: ", 0, a0[0], (Float.NaN-VALUE));
+      errn += verify("test_suba: ", 1, a0[1], (Float.POSITIVE_INFINITY-VALUE));
+      errn += verify("test_suba: ", 2, a0[2], (Float.NEGATIVE_INFINITY-VALUE));
+      errn += verify("test_suba: ", 3, a0[3], (Float.MAX_VALUE-VALUE));
+      errn += verify("test_suba: ", 4, a0[4], (Float.MIN_VALUE-VALUE));
+      errn += verify("test_suba: ", 5, a0[5], (Float.MIN_NORMAL-VALUE));
+      errn += verify("test_suba: ", 6, a0[6], ((ADD_INIT+6)-Float.NaN));
+      errn += verify("test_suba: ", 7, a0[7], ((ADD_INIT+7)-Float.POSITIVE_INFINITY));
+      errn += verify("test_suba: ", 8, a0[8], ((ADD_INIT+8)-Float.NEGATIVE_INFINITY));
+      errn += verify("test_suba: ", 9, a0[9], ((ADD_INIT+9)-Float.MAX_VALUE));
+      errn += verify("test_suba: ", 10, a0[10], ((ADD_INIT+10)-Float.MIN_VALUE));
+      errn += verify("test_suba: ", 11, a0[11], ((ADD_INIT+11)-Float.MIN_NORMAL));
+      for (int i=12; i<ARRLEN; i++) {
+        errn += verify("test_suba: ", i, a0[i], ((ADD_INIT+i)-VALUE));
+      }
+
+      test_mulc(a0, a1);
+      errn += verify("test_mulc: ", 0, a0[0], (Float.NaN*VALUE));
+      errn += verify("test_mulc: ", 1, a0[1], (Float.POSITIVE_INFINITY*VALUE));
+      errn += verify("test_mulc: ", 2, a0[2], (Float.NEGATIVE_INFINITY*VALUE));
+      errn += verify("test_mulc: ", 3, a0[3], (Float.MAX_VALUE*VALUE));
+      errn += verify("test_mulc: ", 4, a0[4], (Float.MIN_VALUE*VALUE));
+      errn += verify("test_mulc: ", 5, a0[5], (Float.MIN_NORMAL*VALUE));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_mulc: ", i, a0[i], ((ADD_INIT+i)*VALUE));
+      }
+      test_mulv(a0, a1, VALUE);
+      errn += verify("test_mulv: ", 0, a0[0], (Float.NaN*VALUE));
+      errn += verify("test_mulv: ", 1, a0[1], (Float.POSITIVE_INFINITY*VALUE));
+      errn += verify("test_mulv: ", 2, a0[2], (Float.NEGATIVE_INFINITY*VALUE));
+      errn += verify("test_mulv: ", 3, a0[3], (Float.MAX_VALUE*VALUE));
+      errn += verify("test_mulv: ", 4, a0[4], (Float.MIN_VALUE*VALUE));
+      errn += verify("test_mulv: ", 5, a0[5], (Float.MIN_NORMAL*VALUE));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_mulv: ", i, a0[i], ((ADD_INIT+i)*VALUE));
+      }
+      test_mula(a0, a1, a2);
+      errn += verify("test_mula: ", 0, a0[0], (Float.NaN*VALUE));
+      errn += verify("test_mula: ", 1, a0[1], (Float.POSITIVE_INFINITY*VALUE));
+      errn += verify("test_mula: ", 2, a0[2], (Float.NEGATIVE_INFINITY*VALUE));
+      errn += verify("test_mula: ", 3, a0[3], (Float.MAX_VALUE*VALUE));
+      errn += verify("test_mula: ", 4, a0[4], (Float.MIN_VALUE*VALUE));
+      errn += verify("test_mula: ", 5, a0[5], (Float.MIN_NORMAL*VALUE));
+      errn += verify("test_mula: ", 6, a0[6], ((ADD_INIT+6)*Float.NaN));
+      errn += verify("test_mula: ", 7, a0[7], ((ADD_INIT+7)*Float.POSITIVE_INFINITY));
+      errn += verify("test_mula: ", 8, a0[8], ((ADD_INIT+8)*Float.NEGATIVE_INFINITY));
+      errn += verify("test_mula: ", 9, a0[9], ((ADD_INIT+9)*Float.MAX_VALUE));
+      errn += verify("test_mula: ", 10, a0[10], ((ADD_INIT+10)*Float.MIN_VALUE));
+      errn += verify("test_mula: ", 11, a0[11], ((ADD_INIT+11)*Float.MIN_NORMAL));
+      for (int i=12; i<ARRLEN; i++) {
+        errn += verify("test_mula: ", i, a0[i], ((ADD_INIT+i)*VALUE));
+      }
+
+      test_divc(a0, a1);
+      errn += verify("test_divc: ", 0, a0[0], (Float.NaN/VALUE));
+      errn += verify("test_divc: ", 1, a0[1], (Float.POSITIVE_INFINITY/VALUE));
+      errn += verify("test_divc: ", 2, a0[2], (Float.NEGATIVE_INFINITY/VALUE));
+      errn += verify("test_divc: ", 3, a0[3], (Float.MAX_VALUE/VALUE));
+      errn += verify("test_divc: ", 4, a0[4], (Float.MIN_VALUE/VALUE));
+      errn += verify("test_divc: ", 5, a0[5], (Float.MIN_NORMAL/VALUE));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_divc: ", i, a0[i], ((ADD_INIT+i)/VALUE));
+      }
+      test_divv(a0, a1, VALUE);
+      errn += verify("test_divv: ", 0, a0[0], (Float.NaN/VALUE));
+      errn += verify("test_divv: ", 1, a0[1], (Float.POSITIVE_INFINITY/VALUE));
+      errn += verify("test_divv: ", 2, a0[2], (Float.NEGATIVE_INFINITY/VALUE));
+      errn += verify("test_divv: ", 3, a0[3], (Float.MAX_VALUE/VALUE));
+      errn += verify("test_divv: ", 4, a0[4], (Float.MIN_VALUE/VALUE));
+      errn += verify("test_divv: ", 5, a0[5], (Float.MIN_NORMAL/VALUE));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_divv: ", i, a0[i], ((ADD_INIT+i)/VALUE));
+      }
+      test_diva(a0, a1, a2);
+      errn += verify("test_diva: ", 0, a0[0], (Float.NaN/VALUE));
+      errn += verify("test_diva: ", 1, a0[1], (Float.POSITIVE_INFINITY/VALUE));
+      errn += verify("test_diva: ", 2, a0[2], (Float.NEGATIVE_INFINITY/VALUE));
+      errn += verify("test_diva: ", 3, a0[3], (Float.MAX_VALUE/VALUE));
+      errn += verify("test_diva: ", 4, a0[4], (Float.MIN_VALUE/VALUE));
+      errn += verify("test_diva: ", 5, a0[5], (Float.MIN_NORMAL/VALUE));
+      errn += verify("test_diva: ", 6, a0[6], ((ADD_INIT+6)/Float.NaN));
+      errn += verify("test_diva: ", 7, a0[7], ((ADD_INIT+7)/Float.POSITIVE_INFINITY));
+      errn += verify("test_diva: ", 8, a0[8], ((ADD_INIT+8)/Float.NEGATIVE_INFINITY));
+      errn += verify("test_diva: ", 9, a0[9], ((ADD_INIT+9)/Float.MAX_VALUE));
+      errn += verify("test_diva: ", 10, a0[10], ((ADD_INIT+10)/Float.MIN_VALUE));
+      errn += verify("test_diva: ", 11, a0[11], ((ADD_INIT+11)/Float.MIN_NORMAL));
+      for (int i=12; i<ARRLEN; i++) {
+        errn += verify("test_diva: ", i, a0[i], ((ADD_INIT+i)/VALUE));
+      }
+
+      test_mulc_n(a0, a1);
+      errn += verify("test_mulc_n: ", 0, a0[0], (Float.NaN*(-VALUE)));
+      errn += verify("test_mulc_n: ", 1, a0[1], (Float.POSITIVE_INFINITY*(-VALUE)));
+      errn += verify("test_mulc_n: ", 2, a0[2], (Float.NEGATIVE_INFINITY*(-VALUE)));
+      errn += verify("test_mulc_n: ", 3, a0[3], (Float.MAX_VALUE*(-VALUE)));
+      errn += verify("test_mulc_n: ", 4, a0[4], (Float.MIN_VALUE*(-VALUE)));
+      errn += verify("test_mulc_n: ", 5, a0[5], (Float.MIN_NORMAL*(-VALUE)));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_mulc_n: ", i, a0[i], ((ADD_INIT+i)*(-VALUE)));
+      }
+      test_mulv(a0, a1, -VALUE);
+      errn += verify("test_mulv_n: ", 0, a0[0], (Float.NaN*(-VALUE)));
+      errn += verify("test_mulv_n: ", 1, a0[1], (Float.POSITIVE_INFINITY*(-VALUE)));
+      errn += verify("test_mulv_n: ", 2, a0[2], (Float.NEGATIVE_INFINITY*(-VALUE)));
+      errn += verify("test_mulv_n: ", 3, a0[3], (Float.MAX_VALUE*(-VALUE)));
+      errn += verify("test_mulv_n: ", 4, a0[4], (Float.MIN_VALUE*(-VALUE)));
+      errn += verify("test_mulv_n: ", 5, a0[5], (Float.MIN_NORMAL*(-VALUE)));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_mulv_n: ", i, a0[i], ((ADD_INIT+i)*(-VALUE)));
+      }
+      test_mula(a0, a1, a3);
+      errn += verify("test_mula_n: ", 0, a0[0], (Float.NaN*(-VALUE)));
+      errn += verify("test_mula_n: ", 1, a0[1], (Float.POSITIVE_INFINITY*(-VALUE)));
+      errn += verify("test_mula_n: ", 2, a0[2], (Float.NEGATIVE_INFINITY*(-VALUE)));
+      errn += verify("test_mula_n: ", 3, a0[3], (Float.MAX_VALUE*(-VALUE)));
+      errn += verify("test_mula_n: ", 4, a0[4], (Float.MIN_VALUE*(-VALUE)));
+      errn += verify("test_mula_n: ", 5, a0[5], (Float.MIN_NORMAL*(-VALUE)));
+      errn += verify("test_mula_n: ", 6, a0[6], ((ADD_INIT+6)*(-Float.NaN)));
+      errn += verify("test_mula_n: ", 7, a0[7], ((ADD_INIT+7)*(-Float.POSITIVE_INFINITY)));
+      errn += verify("test_mula_n: ", 8, a0[8], ((ADD_INIT+8)*(-Float.NEGATIVE_INFINITY)));
+      errn += verify("test_mula_n: ", 9, a0[9], ((ADD_INIT+9)*(-Float.MAX_VALUE)));
+      errn += verify("test_mula_n: ", 10, a0[10], ((ADD_INIT+10)*(-Float.MIN_VALUE)));
+      errn += verify("test_mula_n: ", 11, a0[11], ((ADD_INIT+11)*(-Float.MIN_NORMAL)));
+      for (int i=12; i<ARRLEN; i++) {
+        errn += verify("test_mula_n: ", i, a0[i], ((ADD_INIT+i)*(-VALUE)));
+      }
+
+      test_divc_n(a0, a1);
+      errn += verify("test_divc_n: ", 0, a0[0], (Float.NaN/(-VALUE)));
+      errn += verify("test_divc_n: ", 1, a0[1], (Float.POSITIVE_INFINITY/(-VALUE)));
+      errn += verify("test_divc_n: ", 2, a0[2], (Float.NEGATIVE_INFINITY/(-VALUE)));
+      errn += verify("test_divc_n: ", 3, a0[3], (Float.MAX_VALUE/(-VALUE)));
+      errn += verify("test_divc_n: ", 4, a0[4], (Float.MIN_VALUE/(-VALUE)));
+      errn += verify("test_divc_n: ", 5, a0[5], (Float.MIN_NORMAL/(-VALUE)));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_divc_n: ", i, a0[i], ((ADD_INIT+i)/(-VALUE)));
+      }
+      test_divv(a0, a1, -VALUE);
+      errn += verify("test_divv_n: ", 0, a0[0], (Float.NaN/(-VALUE)));
+      errn += verify("test_divv_n: ", 1, a0[1], (Float.POSITIVE_INFINITY/(-VALUE)));
+      errn += verify("test_divv_n: ", 2, a0[2], (Float.NEGATIVE_INFINITY/(-VALUE)));
+      errn += verify("test_divv_n: ", 3, a0[3], (Float.MAX_VALUE/(-VALUE)));
+      errn += verify("test_divv_n: ", 4, a0[4], (Float.MIN_VALUE/(-VALUE)));
+      errn += verify("test_divv_n: ", 5, a0[5], (Float.MIN_NORMAL/(-VALUE)));
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_divv_n: ", i, a0[i], ((ADD_INIT+i)/(-VALUE)));
+      }
+      test_diva(a0, a1, a3);
+      errn += verify("test_diva_n: ", 0, a0[0], (Float.NaN/(-VALUE)));
+      errn += verify("test_diva_n: ", 1, a0[1], (Float.POSITIVE_INFINITY/(-VALUE)));
+      errn += verify("test_diva_n: ", 2, a0[2], (Float.NEGATIVE_INFINITY/(-VALUE)));
+      errn += verify("test_diva_n: ", 3, a0[3], (Float.MAX_VALUE/(-VALUE)));
+      errn += verify("test_diva_n: ", 4, a0[4], (Float.MIN_VALUE/(-VALUE)));
+      errn += verify("test_diva_n: ", 5, a0[5], (Float.MIN_NORMAL/(-VALUE)));
+      errn += verify("test_diva_n: ", 6, a0[6], ((ADD_INIT+6)/(-Float.NaN)));
+      errn += verify("test_diva_n: ", 7, a0[7], ((ADD_INIT+7)/(-Float.POSITIVE_INFINITY)));
+      errn += verify("test_diva_n: ", 8, a0[8], ((ADD_INIT+8)/(-Float.NEGATIVE_INFINITY)));
+      errn += verify("test_diva_n: ", 9, a0[9], ((ADD_INIT+9)/(-Float.MAX_VALUE)));
+      errn += verify("test_diva_n: ", 10, a0[10], ((ADD_INIT+10)/(-Float.MIN_VALUE)));
+      errn += verify("test_diva_n: ", 11, a0[11], ((ADD_INIT+11)/(-Float.MIN_NORMAL)));
+      for (int i=12; i<ARRLEN; i++) {
+        errn += verify("test_diva_n: ", i, a0[i], ((ADD_INIT+i)/(-VALUE)));
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sum(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sum: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_adda(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_adda: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_subc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_subc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_subv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_subv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_suba(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_suba: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mula(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mula: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_diva(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_diva: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulv(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulv_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mula(a0, a1, a3);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mula_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divv(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divv_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_diva(a0, a1, a3);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_diva_n: " + (end - start));
+
+    return errn;
+  }
+
+  static float test_sum(float[] a1) {
+    float sum = 0;
+    for (int i = 0; i < a1.length; i+=1) {
+      sum += a1[i];
+    }
+    return sum;
+  }
+
+  static void test_addc(float[] a0, float[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]+VALUE);
+    }
+  }
+  static void test_addv(float[] a0, float[] a1, float b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]+b);
+    }
+  }
+  static void test_adda(float[] a0, float[] a1, float[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]+a2[i]);
+    }
+  }
+
+  static void test_subc(float[] a0, float[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]-VALUE);
+    }
+  }
+  static void test_subv(float[] a0, float[] a1, float b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]-b);
+    }
+  }
+  static void test_suba(float[] a0, float[] a1, float[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]-a2[i]);
+    }
+  }
+
+  static void test_mulc(float[] a0, float[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]*VALUE);
+    }
+  }
+  static void test_mulc_n(float[] a0, float[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]*(-VALUE));
+    }
+  }
+  static void test_mulv(float[] a0, float[] a1, float b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]*b);
+    }
+  }
+  static void test_mula(float[] a0, float[] a1, float[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]*a2[i]);
+    }
+  }
+
+  static void test_divc(float[] a0, float[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]/VALUE);
+    }
+  }
+  static void test_divc_n(float[] a0, float[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]/(-VALUE));
+    }
+  }
+  static void test_divv(float[] a0, float[] a1, float b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]/b);
+    }
+  }
+  static void test_diva(float[] a0, float[] a1, float[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (a1[i]/a2[i]);
+    }
+  }
+
+  static int verify(String text, int i, float elem, float val) {
+    if (elem != val && !(Float.isNaN(elem) && Float.isNaN(val))) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6340864/TestIntVect.java	Fri Aug 24 15:51:19 2012 -0700
@@ -0,0 +1,1012 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 6340864
+ * @summary Implement vectorization optimizations in hotspot-server
+ *
+ * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestIntVect
+ */
+
+public class TestIntVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final int ADD_INIT = Integer.MAX_VALUE-500;
+  private static final int BIT_MASK = 0xEC80F731;
+  private static final int VALUE = 15;
+  private static final int SHIFT = 32;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Integer vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    int[] a0 = new int[ARRLEN];
+    int[] a1 = new int[ARRLEN];
+    int[] a2 = new int[ARRLEN];
+    int[] a3 = new int[ARRLEN];
+    int[] a4 = new int[ARRLEN];
+    long[] p2 = new long[ARRLEN/2];
+    // Initialize
+    int gold_sum = 0;
+    for (int i=0; i<ARRLEN; i++) {
+      int val = (int)(ADD_INIT+i);
+      gold_sum += val;
+      a1[i] = val;
+      a2[i] = (int)VALUE;
+      a3[i] = (int)-VALUE;
+      a4[i] = (int)BIT_MASK;
+    }
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_sum(a1);
+      test_addc(a0, a1);
+      test_addv(a0, a1, (int)VALUE);
+      test_adda(a0, a1, a2);
+      test_subc(a0, a1);
+      test_subv(a0, a1, (int)VALUE);
+      test_suba(a0, a1, a2);
+      test_mulc(a0, a1);
+      test_mulv(a0, a1, (int)VALUE);
+      test_mula(a0, a1, a2);
+      test_divc(a0, a1);
+      test_divv(a0, a1, (int)VALUE);
+      test_diva(a0, a1, a2);
+      test_mulc_n(a0, a1);
+      test_mulv(a0, a1, (int)-VALUE);
+      test_mula(a0, a1, a3);
+      test_divc_n(a0, a1);
+      test_divv(a0, a1, (int)-VALUE);
+      test_diva(a0, a1, a3);
+      test_andc(a0, a1);
+      test_andv(a0, a1, (int)BIT_MASK);
+      test_anda(a0, a1, a4);
+      test_orc(a0, a1);
+      test_orv(a0, a1, (int)BIT_MASK);
+      test_ora(a0, a1, a4);
+      test_xorc(a0, a1);
+      test_xorv(a0, a1, (int)BIT_MASK);
+      test_xora(a0, a1, a4);
+      test_sllc(a0, a1);
+      test_sllv(a0, a1, VALUE);
+      test_srlc(a0, a1);
+      test_srlv(a0, a1, VALUE);
+      test_srac(a0, a1);
+      test_srav(a0, a1, VALUE);
+      test_sllc_n(a0, a1);
+      test_sllv(a0, a1, -VALUE);
+      test_srlc_n(a0, a1);
+      test_srlv(a0, a1, -VALUE);
+      test_srac_n(a0, a1);
+      test_srav(a0, a1, -VALUE);
+      test_sllc_o(a0, a1);
+      test_sllv(a0, a1, SHIFT);
+      test_srlc_o(a0, a1);
+      test_srlv(a0, a1, SHIFT);
+      test_srac_o(a0, a1);
+      test_srav(a0, a1, SHIFT);
+      test_sllc_on(a0, a1);
+      test_sllv(a0, a1, -SHIFT);
+      test_srlc_on(a0, a1);
+      test_srlv(a0, a1, -SHIFT);
+      test_srac_on(a0, a1);
+      test_srav(a0, a1, -SHIFT);
+      test_pack2(p2, a1);
+      test_unpack2(a0, p2);
+      test_pack2_swap(p2, a1);
+      test_unpack2_swap(a0, p2);
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      int sum = test_sum(a1);
+      if (sum != gold_sum) {
+        System.err.println("test_sum:  " + sum + " != " + gold_sum);
+        errn++;
+      }
+
+      test_addc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_addc: ", i, a0[i], (int)((int)(ADD_INIT+i)+VALUE));
+      }
+      test_addv(a0, a1, (int)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_addv: ", i, a0[i], (int)((int)(ADD_INIT+i)+VALUE));
+      }
+      test_adda(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_adda: ", i, a0[i], (int)((int)(ADD_INIT+i)+VALUE));
+      }
+
+      test_subc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_subc: ", i, a0[i], (int)((int)(ADD_INIT+i)-VALUE));
+      }
+      test_subv(a0, a1, (int)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_subv: ", i, a0[i], (int)((int)(ADD_INIT+i)-VALUE));
+      }
+      test_suba(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_suba: ", i, a0[i], (int)((int)(ADD_INIT+i)-VALUE));
+      }
+
+      test_mulc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulc: ", i, a0[i], (int)((int)(ADD_INIT+i)*VALUE));
+      }
+      test_mulv(a0, a1, (int)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulv: ", i, a0[i], (int)((int)(ADD_INIT+i)*VALUE));
+      }
+      test_mula(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mula: ", i, a0[i], (int)((int)(ADD_INIT+i)*VALUE));
+      }
+
+      test_divc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divc: ", i, a0[i], (int)((int)(ADD_INIT+i)/VALUE));
+      }
+      test_divv(a0, a1, (int)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divv: ", i, a0[i], (int)((int)(ADD_INIT+i)/VALUE));
+      }
+      test_diva(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_diva: ", i, a0[i], (int)((int)(ADD_INIT+i)/VALUE));
+      }
+
+      test_mulc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulc_n: ", i, a0[i], (int)((int)(ADD_INIT+i)*(-VALUE)));
+      }
+      test_mulv(a0, a1, (int)-VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulv_n: ", i, a0[i], (int)((int)(ADD_INIT+i)*(-VALUE)));
+      }
+      test_mula(a0, a1, a3);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mula_n: ", i, a0[i], (int)((int)(ADD_INIT+i)*(-VALUE)));
+      }
+
+      test_divc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divc_n: ", i, a0[i], (int)((int)(ADD_INIT+i)/(-VALUE)));
+      }
+      test_divv(a0, a1, (int)-VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divv_n: ", i, a0[i], (int)((int)(ADD_INIT+i)/(-VALUE)));
+      }
+      test_diva(a0, a1, a3);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_diva_n: ", i, a0[i], (int)((int)(ADD_INIT+i)/(-VALUE)));
+      }
+
+      test_andc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_andc: ", i, a0[i], (int)((int)(ADD_INIT+i)&BIT_MASK));
+      }
+      test_andv(a0, a1, (int)BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_andv: ", i, a0[i], (int)((int)(ADD_INIT+i)&BIT_MASK));
+      }
+      test_anda(a0, a1, a4);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_anda: ", i, a0[i], (int)((int)(ADD_INIT+i)&BIT_MASK));
+      }
+
+      test_orc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_orc: ", i, a0[i], (int)((int)(ADD_INIT+i)|BIT_MASK));
+      }
+      test_orv(a0, a1, (int)BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_orv: ", i, a0[i], (int)((int)(ADD_INIT+i)|BIT_MASK));
+      }
+      test_ora(a0, a1, a4);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ora: ", i, a0[i], (int)((int)(ADD_INIT+i)|BIT_MASK));
+      }
+
+      test_xorc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_xorc: ", i, a0[i], (int)((int)(ADD_INIT+i)^BIT_MASK));
+      }
+      test_xorv(a0, a1, (int)BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_xorv: ", i, a0[i], (int)((int)(ADD_INIT+i)^BIT_MASK));
+      }
+      test_xora(a0, a1, a4);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_xora: ", i, a0[i], (int)((int)(ADD_INIT+i)^BIT_MASK));
+      }
+
+      test_sllc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc: ", i, a0[i], (int)((int)(ADD_INIT+i)<<VALUE));
+      }
+      test_sllv(a0, a1, VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv: ", i, a0[i], (int)((int)(ADD_INIT+i)<<VALUE));
+      }
+
+      test_srlc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc: ", i, a0[i], (int)((int)(ADD_INIT+i)>>>VALUE));
+      }
+      test_srlv(a0, a1, VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv: ", i, a0[i], (int)((int)(ADD_INIT+i)>>>VALUE));
+      }
+
+      test_srac(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac: ", i, a0[i], (int)((int)(ADD_INIT+i)>>VALUE));
+      }
+      test_srav(a0, a1, VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav: ", i, a0[i], (int)((int)(ADD_INIT+i)>>VALUE));
+      }
+
+      test_sllc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_n: ", i, a0[i], (int)((int)(ADD_INIT+i)<<(-VALUE)));
+      }
+      test_sllv(a0, a1, -VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_n: ", i, a0[i], (int)((int)(ADD_INIT+i)<<(-VALUE)));
+      }
+
+      test_srlc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_n: ", i, a0[i], (int)((int)(ADD_INIT+i)>>>(-VALUE)));
+      }
+      test_srlv(a0, a1, -VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_n: ", i, a0[i], (int)((int)(ADD_INIT+i)>>>(-VALUE)));
+      }
+
+      test_srac_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_n: ", i, a0[i], (int)((int)(ADD_INIT+i)>>(-VALUE)));
+      }
+      test_srav(a0, a1, -VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_n: ", i, a0[i], (int)((int)(ADD_INIT+i)>>(-VALUE)));
+      }
+
+      test_sllc_o(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_o: ", i, a0[i], (int)((int)(ADD_INIT+i)<<SHIFT));
+      }
+      test_sllv(a0, a1, SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_o: ", i, a0[i], (int)((int)(ADD_INIT+i)<<SHIFT));
+      }
+
+      test_srlc_o(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_o: ", i, a0[i], (int)((int)(ADD_INIT+i)>>>SHIFT));
+      }
+      test_srlv(a0, a1, SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_o: ", i, a0[i], (int)((int)(ADD_INIT+i)>>>SHIFT));
+      }
+
+      test_srac_o(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_o: ", i, a0[i], (int)((int)(ADD_INIT+i)>>SHIFT));
+      }
+      test_srav(a0, a1, SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_o: ", i, a0[i], (int)((int)(ADD_INIT+i)>>SHIFT));
+      }
+
+      test_sllc_on(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_on: ", i, a0[i], (int)((int)(ADD_INIT+i)<<(-SHIFT)));
+      }
+      test_sllv(a0, a1, -SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_on: ", i, a0[i], (int)((int)(ADD_INIT+i)<<(-SHIFT)));
+      }
+
+      test_srlc_on(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_on: ", i, a0[i], (int)((int)(ADD_INIT+i)>>>(-SHIFT)));
+      }
+      test_srlv(a0, a1, -SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_on: ", i, a0[i], (int)((int)(ADD_INIT+i)>>>(-SHIFT)));
+      }
+
+      test_srac_on(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_on: ", i, a0[i], (int)((int)(ADD_INIT+i)>>(-SHIFT)));
+      }
+      test_srav(a0, a1, -SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_on: ", i, a0[i], (int)((int)(ADD_INIT+i)>>(-SHIFT)));
+      }
+
+      test_pack2(p2, a1);
+      for (int i=0; i<ARRLEN/2; i++) {
+        errn += verify("test_pack2: ", i, p2[i], ((long)(ADD_INIT+2*i) & 0xFFFFFFFFl) | ((long)(ADD_INIT+2*i+1) << 32));
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a0[i] = -1;
+      }
+      test_unpack2(a0, p2);
+      for (int i=0; i<(ARRLEN&(-2)); i++) {
+        errn += verify("test_unpack2: ", i, a0[i], (ADD_INIT+i));
+      }
+
+      test_pack2_swap(p2, a1);
+      for (int i=0; i<ARRLEN/2; i++) {
+        errn += verify("test_pack2_swap: ", i, p2[i], ((long)(ADD_INIT+2*i+1) & 0xFFFFFFFFl) | ((long)(ADD_INIT+2*i) << 32));
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a0[i] = -1;
+      }
+      test_unpack2_swap(a0, p2);
+      for (int i=0; i<(ARRLEN&(-2)); i++) {
+        errn += verify("test_unpack2_swap: ", i, a0[i], (ADD_INIT+i));
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sum(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sum: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addv(a0, a1, (int)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_adda(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_adda: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_subc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_subc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_subv(a0, a1, (int)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_subv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_suba(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_suba: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulv(a0, a1, (int)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mula(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mula: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divv(a0, a1, (int)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_diva(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_diva: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulv(a0, a1, (int)-VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulv_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mula(a0, a1, a3);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mula_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divv(a0, a1, (int)-VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divv_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_diva(a0, a1, a3);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_diva_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_andc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_andc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_andv(a0, a1, (int)BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_andv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_anda(a0, a1, a4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_anda: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_orc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_orc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_orv(a0, a1, (int)BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_orv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ora(a0, a1, a4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ora: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_xorc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_xorc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_xorv(a0, a1, (int)BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_xorv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_xora(a0, a1, a4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_xora: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_o(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_o: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_o: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_o(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_o: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_o: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_o(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_o: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_o: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_on(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_on: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, -SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_on: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_on(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_on: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, -SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_on: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_on(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_on: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, -SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_on: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_pack2(p2, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_pack2: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unpack2(a0, p2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unpack2: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_pack2_swap(p2, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_pack2_swap: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unpack2_swap(a0, p2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unpack2_swap: " + (end - start));
+
+    return errn;
+  }
+
+  static int test_sum(int[] a1) {
+    int sum = 0;
+    for (int i = 0; i < a1.length; i+=1) {
+      sum += a1[i];
+    }
+    return sum;
+  }
+
+  static void test_addc(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]+VALUE);
+    }
+  }
+  static void test_addv(int[] a0, int[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]+b);
+    }
+  }
+  static void test_adda(int[] a0, int[] a1, int[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]+a2[i]);
+    }
+  }
+
+  static void test_subc(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]-VALUE);
+    }
+  }
+  static void test_subv(int[] a0, int[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]-b);
+    }
+  }
+  static void test_suba(int[] a0, int[] a1, int[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]-a2[i]);
+    }
+  }
+
+  static void test_mulc(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]*VALUE);
+    }
+  }
+  static void test_mulc_n(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]*(-VALUE));
+    }
+  }
+  static void test_mulv(int[] a0, int[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]*b);
+    }
+  }
+  static void test_mula(int[] a0, int[] a1, int[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]*a2[i]);
+    }
+  }
+
+  static void test_divc(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]/VALUE);
+    }
+  }
+  static void test_divc_n(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]/(-VALUE));
+    }
+  }
+  static void test_divv(int[] a0, int[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]/b);
+    }
+  }
+  static void test_diva(int[] a0, int[] a1, int[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]/a2[i]);
+    }
+  }
+
+  static void test_andc(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]&BIT_MASK);
+    }
+  }
+  static void test_andv(int[] a0, int[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]&b);
+    }
+  }
+  static void test_anda(int[] a0, int[] a1, int[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]&a2[i]);
+    }
+  }
+
+  static void test_orc(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]|BIT_MASK);
+    }
+  }
+  static void test_orv(int[] a0, int[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]|b);
+    }
+  }
+  static void test_ora(int[] a0, int[] a1, int[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]|a2[i]);
+    }
+  }
+
+  static void test_xorc(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]^BIT_MASK);
+    }
+  }
+  static void test_xorv(int[] a0, int[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]^b);
+    }
+  }
+  static void test_xora(int[] a0, int[] a1, int[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]^a2[i]);
+    }
+  }
+
+  static void test_sllc(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]<<VALUE);
+    }
+  }
+  static void test_sllc_n(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]<<(-VALUE));
+    }
+  }
+  static void test_sllc_o(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]<<SHIFT);
+    }
+  }
+  static void test_sllc_on(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]<<(-SHIFT));
+    }
+  }
+  static void test_sllv(int[] a0, int[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]<<b);
+    }
+  }
+
+  static void test_srlc(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]>>>VALUE);
+    }
+  }
+  static void test_srlc_n(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]>>>(-VALUE));
+    }
+  }
+  static void test_srlc_o(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]>>>SHIFT);
+    }
+  }
+  static void test_srlc_on(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]>>>(-SHIFT));
+    }
+  }
+  static void test_srlv(int[] a0, int[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]>>>b);
+    }
+  }
+
+  static void test_srac(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]>>VALUE);
+    }
+  }
+  static void test_srac_n(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]>>(-VALUE));
+    }
+  }
+  static void test_srac_o(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]>>SHIFT);
+    }
+  }
+  static void test_srac_on(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]>>(-SHIFT));
+    }
+  }
+  static void test_srav(int[] a0, int[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)(a1[i]>>b);
+    }
+  }
+
+  static void test_pack2(long[] p2, int[] a1) {
+    if (p2.length*2 > a1.length) return;
+    for (int i = 0; i < p2.length; i+=1) {
+      long l0 = (long)a1[i*2+0];
+      long l1 = (long)a1[i*2+1];
+      p2[i] = (l1 << 32) | (l0 & 0xFFFFFFFFl);
+    }
+  }
+  static void test_unpack2(int[] a0, long[] p2) {
+    if (p2.length*2 > a0.length) return;
+    for (int i = 0; i < p2.length; i+=1) {
+      long l = p2[i];
+      a0[i*2+0] = (int)(l & 0xFFFFFFFFl);
+      a0[i*2+1] = (int)(l >> 32);
+    }
+  }
+  static void test_pack2_swap(long[] p2, int[] a1) {
+    if (p2.length*2 > a1.length) return;
+    for (int i = 0; i < p2.length; i+=1) {
+      long l0 = (long)a1[i*2+0];
+      long l1 = (long)a1[i*2+1];
+      p2[i] = (l0 << 32) | (l1 & 0xFFFFFFFFl);
+    }
+  }
+  static void test_unpack2_swap(int[] a0, long[] p2) {
+    if (p2.length*2 > a0.length) return;
+    for (int i = 0; i < p2.length; i+=1) {
+      long l = p2[i];
+      a0[i*2+0] = (int)(l >> 32);
+      a0[i*2+1] = (int)(l & 0xFFFFFFFFl);
+    }
+  }
+
+  static int verify(String text, int i, int elem, int val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+
+  static int verify(String text, int i, long elem, long val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + Long.toHexString(elem) + " != " + Long.toHexString(val));
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6340864/TestLongVect.java	Fri Aug 24 15:51:19 2012 -0700
@@ -0,0 +1,917 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 6340864
+ * @summary Implement vectorization optimizations in hotspot-server
+ *
+ * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestLongVect
+ */
+
+public class TestLongVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final long ADD_INIT = Long.MAX_VALUE-500;
+  private static final long BIT_MASK = 0xEC80F731EC80F731L;
+  private static final int VALUE = 31;
+  private static final int SHIFT = 64;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Long vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    long[] a0 = new long[ARRLEN];
+    long[] a1 = new long[ARRLEN];
+    long[] a2 = new long[ARRLEN];
+    long[] a3 = new long[ARRLEN];
+    long[] a4 = new long[ARRLEN];
+    // Initialize
+    long gold_sum = 0;
+    for (int i=0; i<ARRLEN; i++) {
+      long val = (long)(ADD_INIT+i);
+      gold_sum += val;
+      a1[i] = val;
+      a2[i] = (long)VALUE;
+      a3[i] = (long)-VALUE;
+      a4[i] = (long)BIT_MASK;
+    }
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_sum(a1);
+      test_addc(a0, a1);
+      test_addv(a0, a1, (long)VALUE);
+      test_adda(a0, a1, a2);
+      test_subc(a0, a1);
+      test_subv(a0, a1, (long)VALUE);
+      test_suba(a0, a1, a2);
+      test_mulc(a0, a1);
+      test_mulv(a0, a1, (long)VALUE);
+      test_mula(a0, a1, a2);
+      test_divc(a0, a1);
+      test_divv(a0, a1, (long)VALUE);
+      test_diva(a0, a1, a2);
+      test_mulc_n(a0, a1);
+      test_mulv(a0, a1, (long)-VALUE);
+      test_mula(a0, a1, a3);
+      test_divc_n(a0, a1);
+      test_divv(a0, a1, (long)-VALUE);
+      test_diva(a0, a1, a3);
+      test_andc(a0, a1);
+      test_andv(a0, a1, (long)BIT_MASK);
+      test_anda(a0, a1, a4);
+      test_orc(a0, a1);
+      test_orv(a0, a1, (long)BIT_MASK);
+      test_ora(a0, a1, a4);
+      test_xorc(a0, a1);
+      test_xorv(a0, a1, (long)BIT_MASK);
+      test_xora(a0, a1, a4);
+      test_sllc(a0, a1);
+      test_sllv(a0, a1, VALUE);
+      test_srlc(a0, a1);
+      test_srlv(a0, a1, VALUE);
+      test_srac(a0, a1);
+      test_srav(a0, a1, VALUE);
+      test_sllc_n(a0, a1);
+      test_sllv(a0, a1, -VALUE);
+      test_srlc_n(a0, a1);
+      test_srlv(a0, a1, -VALUE);
+      test_srac_n(a0, a1);
+      test_srav(a0, a1, -VALUE);
+      test_sllc_o(a0, a1);
+      test_sllv(a0, a1, SHIFT);
+      test_srlc_o(a0, a1);
+      test_srlv(a0, a1, SHIFT);
+      test_srac_o(a0, a1);
+      test_srav(a0, a1, SHIFT);
+      test_sllc_on(a0, a1);
+      test_sllv(a0, a1, -SHIFT);
+      test_srlc_on(a0, a1);
+      test_srlv(a0, a1, -SHIFT);
+      test_srac_on(a0, a1);
+      test_srav(a0, a1, -SHIFT);
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      long sum = test_sum(a1);
+      if (sum != gold_sum) {
+        System.err.println("test_sum:  " + sum + " != " + gold_sum);
+        errn++;
+      }
+
+      test_addc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_addc: ", i, a0[i], (long)((long)(ADD_INIT+i)+VALUE));
+      }
+      test_addv(a0, a1, (long)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_addv: ", i, a0[i], (long)((long)(ADD_INIT+i)+VALUE));
+      }
+      test_adda(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_adda: ", i, a0[i], (long)((long)(ADD_INIT+i)+VALUE));
+      }
+
+      test_subc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_subc: ", i, a0[i], (long)((long)(ADD_INIT+i)-VALUE));
+      }
+      test_subv(a0, a1, (long)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_subv: ", i, a0[i], (long)((long)(ADD_INIT+i)-VALUE));
+      }
+      test_suba(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_suba: ", i, a0[i], (long)((long)(ADD_INIT+i)-VALUE));
+      }
+
+      test_mulc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulc: ", i, a0[i], (long)((long)(ADD_INIT+i)*VALUE));
+      }
+      test_mulv(a0, a1, (long)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulv: ", i, a0[i], (long)((long)(ADD_INIT+i)*VALUE));
+      }
+      test_mula(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mula: ", i, a0[i], (long)((long)(ADD_INIT+i)*VALUE));
+      }
+
+      test_divc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divc: ", i, a0[i], (long)((long)(ADD_INIT+i)/VALUE));
+      }
+      test_divv(a0, a1, (long)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divv: ", i, a0[i], (long)((long)(ADD_INIT+i)/VALUE));
+      }
+      test_diva(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_diva: ", i, a0[i], (long)((long)(ADD_INIT+i)/VALUE));
+      }
+
+      test_mulc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulc_n: ", i, a0[i], (long)((long)(ADD_INIT+i)*(-VALUE)));
+      }
+      test_mulv(a0, a1, (long)-VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulv_n: ", i, a0[i], (long)((long)(ADD_INIT+i)*(-VALUE)));
+      }
+      test_mula(a0, a1, a3);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mula_n: ", i, a0[i], (long)((long)(ADD_INIT+i)*(-VALUE)));
+      }
+
+      test_divc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divc_n: ", i, a0[i], (long)((long)(ADD_INIT+i)/(-VALUE)));
+      }
+      test_divv(a0, a1, (long)-VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divv_n: ", i, a0[i], (long)((long)(ADD_INIT+i)/(-VALUE)));
+      }
+      test_diva(a0, a1, a3);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_diva_n: ", i, a0[i], (long)((long)(ADD_INIT+i)/(-VALUE)));
+      }
+
+      test_andc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_andc: ", i, a0[i], (long)((long)(ADD_INIT+i)&BIT_MASK));
+      }
+      test_andv(a0, a1, (long)BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_andv: ", i, a0[i], (long)((long)(ADD_INIT+i)&BIT_MASK));
+      }
+      test_anda(a0, a1, a4);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_anda: ", i, a0[i], (long)((long)(ADD_INIT+i)&BIT_MASK));
+      }
+
+      test_orc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_orc: ", i, a0[i], (long)((long)(ADD_INIT+i)|BIT_MASK));
+      }
+      test_orv(a0, a1, (long)BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_orv: ", i, a0[i], (long)((long)(ADD_INIT+i)|BIT_MASK));
+      }
+      test_ora(a0, a1, a4);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ora: ", i, a0[i], (long)((long)(ADD_INIT+i)|BIT_MASK));
+      }
+
+      test_xorc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_xorc: ", i, a0[i], (long)((long)(ADD_INIT+i)^BIT_MASK));
+      }
+      test_xorv(a0, a1, (long)BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_xorv: ", i, a0[i], (long)((long)(ADD_INIT+i)^BIT_MASK));
+      }
+      test_xora(a0, a1, a4);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_xora: ", i, a0[i], (long)((long)(ADD_INIT+i)^BIT_MASK));
+      }
+
+      test_sllc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc: ", i, a0[i], (long)((long)(ADD_INIT+i)<<VALUE));
+      }
+      test_sllv(a0, a1, VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv: ", i, a0[i], (long)((long)(ADD_INIT+i)<<VALUE));
+      }
+
+      test_srlc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc: ", i, a0[i], (long)((long)(ADD_INIT+i)>>>VALUE));
+      }
+      test_srlv(a0, a1, VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv: ", i, a0[i], (long)((long)(ADD_INIT+i)>>>VALUE));
+      }
+
+      test_srac(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac: ", i, a0[i], (long)((long)(ADD_INIT+i)>>VALUE));
+      }
+      test_srav(a0, a1, VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav: ", i, a0[i], (long)((long)(ADD_INIT+i)>>VALUE));
+      }
+
+      test_sllc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_n: ", i, a0[i], (long)((long)(ADD_INIT+i)<<(-VALUE)));
+      }
+      test_sllv(a0, a1, -VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_n: ", i, a0[i], (long)((long)(ADD_INIT+i)<<(-VALUE)));
+      }
+
+      test_srlc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_n: ", i, a0[i], (long)((long)(ADD_INIT+i)>>>(-VALUE)));
+      }
+      test_srlv(a0, a1, -VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_n: ", i, a0[i], (long)((long)(ADD_INIT+i)>>>(-VALUE)));
+      }
+
+      test_srac_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_n: ", i, a0[i], (long)((long)(ADD_INIT+i)>>(-VALUE)));
+      }
+      test_srav(a0, a1, -VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_n: ", i, a0[i], (long)((long)(ADD_INIT+i)>>(-VALUE)));
+      }
+
+      test_sllc_o(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_o: ", i, a0[i], (long)((long)(ADD_INIT+i)<<SHIFT));
+      }
+      test_sllv(a0, a1, SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_o: ", i, a0[i], (long)((long)(ADD_INIT+i)<<SHIFT));
+      }
+
+      test_srlc_o(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_o: ", i, a0[i], (long)((long)(ADD_INIT+i)>>>SHIFT));
+      }
+      test_srlv(a0, a1, SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_o: ", i, a0[i], (long)((long)(ADD_INIT+i)>>>SHIFT));
+      }
+
+      test_srac_o(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_o: ", i, a0[i], (long)((long)(ADD_INIT+i)>>SHIFT));
+      }
+      test_srav(a0, a1, SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_o: ", i, a0[i], (long)((long)(ADD_INIT+i)>>SHIFT));
+      }
+
+      test_sllc_on(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_on: ", i, a0[i], (long)((long)(ADD_INIT+i)<<(-SHIFT)));
+      }
+      test_sllv(a0, a1, -SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_on: ", i, a0[i], (long)((long)(ADD_INIT+i)<<(-SHIFT)));
+      }
+
+      test_srlc_on(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_on: ", i, a0[i], (long)((long)(ADD_INIT+i)>>>(-SHIFT)));
+      }
+      test_srlv(a0, a1, -SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_on: ", i, a0[i], (long)((long)(ADD_INIT+i)>>>(-SHIFT)));
+      }
+
+      test_srac_on(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_on: ", i, a0[i], (long)((long)(ADD_INIT+i)>>(-SHIFT)));
+      }
+      test_srav(a0, a1, -SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_on: ", i, a0[i], (long)((long)(ADD_INIT+i)>>(-SHIFT)));
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sum(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sum: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addv(a0, a1, (long)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_adda(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_adda: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_subc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_subc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_subv(a0, a1, (long)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_subv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_suba(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_suba: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulv(a0, a1, (long)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mula(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mula: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divv(a0, a1, (long)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_diva(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_diva: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulv(a0, a1, (long)-VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulv_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mula(a0, a1, a3);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mula_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divv(a0, a1, (long)-VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divv_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_diva(a0, a1, a3);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_diva_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_andc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_andc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_andv(a0, a1, (long)BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_andv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_anda(a0, a1, a4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_anda: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_orc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_orc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_orv(a0, a1, (long)BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_orv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ora(a0, a1, a4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ora: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_xorc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_xorc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_xorv(a0, a1, (long)BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_xorv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_xora(a0, a1, a4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_xora: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_o(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_o: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_o: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_o(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_o: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_o: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_o(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_o: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_o: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_on(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_on: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, -SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_on: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_on(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_on: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, -SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_on: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_on(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_on: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, -SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_on: " + (end - start));
+
+    return errn;
+  }
+
+  static long test_sum(long[] a1) {
+    long sum = 0;
+    for (int i = 0; i < a1.length; i+=1) {
+      sum += a1[i];
+    }
+    return sum;
+  }
+
+  static void test_addc(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]+VALUE);
+    }
+  }
+  static void test_addv(long[] a0, long[] a1, long b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]+b);
+    }
+  }
+  static void test_adda(long[] a0, long[] a1, long[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]+a2[i]);
+    }
+  }
+
+  static void test_subc(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]-VALUE);
+    }
+  }
+  static void test_subv(long[] a0, long[] a1, long b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]-b);
+    }
+  }
+  static void test_suba(long[] a0, long[] a1, long[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]-a2[i]);
+    }
+  }
+
+  static void test_mulc(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]*VALUE);
+    }
+  }
+  static void test_mulc_n(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]*(-VALUE));
+    }
+  }
+  static void test_mulv(long[] a0, long[] a1, long b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]*b);
+    }
+  }
+  static void test_mula(long[] a0, long[] a1, long[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]*a2[i]);
+    }
+  }
+
+  static void test_divc(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]/VALUE);
+    }
+  }
+  static void test_divc_n(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]/(-VALUE));
+    }
+  }
+  static void test_divv(long[] a0, long[] a1, long b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]/b);
+    }
+  }
+  static void test_diva(long[] a0, long[] a1, long[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]/a2[i]);
+    }
+  }
+
+  static void test_andc(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]&BIT_MASK);
+    }
+  }
+  static void test_andv(long[] a0, long[] a1, long b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]&b);
+    }
+  }
+  static void test_anda(long[] a0, long[] a1, long[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]&a2[i]);
+    }
+  }
+
+  static void test_orc(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]|BIT_MASK);
+    }
+  }
+  static void test_orv(long[] a0, long[] a1, long b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]|b);
+    }
+  }
+  static void test_ora(long[] a0, long[] a1, long[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]|a2[i]);
+    }
+  }
+
+  static void test_xorc(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]^BIT_MASK);
+    }
+  }
+  static void test_xorv(long[] a0, long[] a1, long b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]^b);
+    }
+  }
+  static void test_xora(long[] a0, long[] a1, long[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]^a2[i]);
+    }
+  }
+
+  static void test_sllc(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]<<VALUE);
+    }
+  }
+  static void test_sllc_n(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]<<(-VALUE));
+    }
+  }
+  static void test_sllc_o(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]<<SHIFT);
+    }
+  }
+  static void test_sllc_on(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]<<(-SHIFT));
+    }
+  }
+  static void test_sllv(long[] a0, long[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]<<b);
+    }
+  }
+
+  static void test_srlc(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]>>>VALUE);
+    }
+  }
+  static void test_srlc_n(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]>>>(-VALUE));
+    }
+  }
+  static void test_srlc_o(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]>>>SHIFT);
+    }
+  }
+  static void test_srlc_on(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]>>>(-SHIFT));
+    }
+  }
+  static void test_srlv(long[] a0, long[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]>>>b);
+    }
+  }
+
+  static void test_srac(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]>>VALUE);
+    }
+  }
+  static void test_srac_n(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]>>(-VALUE));
+    }
+  }
+  static void test_srac_o(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]>>SHIFT);
+    }
+  }
+  static void test_srac_on(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]>>(-SHIFT));
+    }
+  }
+  static void test_srav(long[] a0, long[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]>>b);
+    }
+  }
+
+  static int verify(String text, int i, long elem, long val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6340864/TestShortVect.java	Fri Aug 24 15:51:19 2012 -0700
@@ -0,0 +1,1127 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 6340864
+ * @summary Implement vectorization optimizations in hotspot-server
+ *
+ * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestShortVect
+ */
+
+public class TestShortVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  private static final int ADD_INIT = Short.MAX_VALUE-500;
+  private static final int BIT_MASK = 0xB731;
+  private static final int VALUE = 7;
+  private static final int SHIFT = 16;
+
+  public static void main(String args[]) {
+    System.out.println("Testing Short vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    short[] a0 = new short[ARRLEN];
+    short[] a1 = new short[ARRLEN];
+    short[] a2 = new short[ARRLEN];
+    short[] a3 = new short[ARRLEN];
+    short[] a4 = new short[ARRLEN];
+     int[] p2 = new  int[ARRLEN/2];
+    long[] p4 = new long[ARRLEN/4];
+    // Initialize
+    int gold_sum = 0;
+    for (int i=0; i<ARRLEN; i++) {
+      short val = (short)(ADD_INIT+i);
+      gold_sum += val;
+      a1[i] = val;
+      a2[i] = (short)VALUE;
+      a3[i] = (short)-VALUE;
+      a4[i] = (short)BIT_MASK;
+    }
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_sum(a1);
+      test_addc(a0, a1);
+      test_addv(a0, a1, (short)VALUE);
+      test_adda(a0, a1, a2);
+      test_subc(a0, a1);
+      test_subv(a0, a1, (short)VALUE);
+      test_suba(a0, a1, a2);
+      test_mulc(a0, a1);
+      test_mulv(a0, a1, (short)VALUE);
+      test_mula(a0, a1, a2);
+      test_divc(a0, a1);
+      test_divv(a0, a1, (short)VALUE);
+      test_diva(a0, a1, a2);
+      test_mulc_n(a0, a1);
+      test_mulv(a0, a1, (short)-VALUE);
+      test_mula(a0, a1, a3);
+      test_divc_n(a0, a1);
+      test_divv(a0, a1, (short)-VALUE);
+      test_diva(a0, a1, a3);
+      test_andc(a0, a1);
+      test_andv(a0, a1, (short)BIT_MASK);
+      test_anda(a0, a1, a4);
+      test_orc(a0, a1);
+      test_orv(a0, a1, (short)BIT_MASK);
+      test_ora(a0, a1, a4);
+      test_xorc(a0, a1);
+      test_xorv(a0, a1, (short)BIT_MASK);
+      test_xora(a0, a1, a4);
+      test_sllc(a0, a1);
+      test_sllv(a0, a1, VALUE);
+      test_srlc(a0, a1);
+      test_srlv(a0, a1, VALUE);
+      test_srac(a0, a1);
+      test_srav(a0, a1, VALUE);
+      test_sllc_n(a0, a1);
+      test_sllv(a0, a1, -VALUE);
+      test_srlc_n(a0, a1);
+      test_srlv(a0, a1, -VALUE);
+      test_srac_n(a0, a1);
+      test_srav(a0, a1, -VALUE);
+      test_sllc_o(a0, a1);
+      test_sllv(a0, a1, SHIFT);
+      test_srlc_o(a0, a1);
+      test_srlv(a0, a1, SHIFT);
+      test_srac_o(a0, a1);
+      test_srav(a0, a1, SHIFT);
+      test_sllc_on(a0, a1);
+      test_sllv(a0, a1, -SHIFT);
+      test_srlc_on(a0, a1);
+      test_srlv(a0, a1, -SHIFT);
+      test_srac_on(a0, a1);
+      test_srav(a0, a1, -SHIFT);
+      test_pack2(p2, a1);
+      test_unpack2(a0, p2);
+      test_pack2_swap(p2, a1);
+      test_unpack2_swap(a0, p2);
+      test_pack4(p4, a1);
+      test_unpack4(a0, p4);
+      test_pack4_swap(p4, a1);
+      test_unpack4_swap(a0, p4);
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      int sum = test_sum(a1);
+      if (sum != gold_sum) {
+        System.err.println("test_sum:  " + sum + " != " + gold_sum);
+        errn++;
+      }
+
+      test_addc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_addc: ", i, a0[i], (short)((short)(ADD_INIT+i)+VALUE));
+      }
+      test_addv(a0, a1, (short)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_addv: ", i, a0[i], (short)((short)(ADD_INIT+i)+VALUE));
+      }
+      test_adda(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_adda: ", i, a0[i], (short)((short)(ADD_INIT+i)+VALUE));
+      }
+
+      test_subc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_subc: ", i, a0[i], (short)((short)(ADD_INIT+i)-VALUE));
+      }
+      test_subv(a0, a1, (short)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_subv: ", i, a0[i], (short)((short)(ADD_INIT+i)-VALUE));
+      }
+      test_suba(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_suba: ", i, a0[i], (short)((short)(ADD_INIT+i)-VALUE));
+      }
+
+      test_mulc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulc: ", i, a0[i], (short)((short)(ADD_INIT+i)*VALUE));
+      }
+      test_mulv(a0, a1, (short)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulv: ", i, a0[i], (short)((short)(ADD_INIT+i)*VALUE));
+      }
+      test_mula(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mula: ", i, a0[i], (short)((short)(ADD_INIT+i)*VALUE));
+      }
+
+      test_divc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divc: ", i, a0[i], (short)((short)(ADD_INIT+i)/VALUE));
+      }
+      test_divv(a0, a1, (short)VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divv: ", i, a0[i], (short)((short)(ADD_INIT+i)/VALUE));
+      }
+      test_diva(a0, a1, a2);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_diva: ", i, a0[i], (short)((short)(ADD_INIT+i)/VALUE));
+      }
+
+      test_mulc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulc_n: ", i, a0[i], (short)((short)(ADD_INIT+i)*(-VALUE)));
+      }
+      test_mulv(a0, a1, (short)-VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mulv_n: ", i, a0[i], (short)((short)(ADD_INIT+i)*(-VALUE)));
+      }
+      test_mula(a0, a1, a3);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_mula_n: ", i, a0[i], (short)((short)(ADD_INIT+i)*(-VALUE)));
+      }
+
+      test_divc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divc_n: ", i, a0[i], (short)((short)(ADD_INIT+i)/(-VALUE)));
+      }
+      test_divv(a0, a1, (short)-VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divv_n: ", i, a0[i], (short)((short)(ADD_INIT+i)/(-VALUE)));
+      }
+      test_diva(a0, a1, a3);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_diva_n: ", i, a0[i], (short)((short)(ADD_INIT+i)/(-VALUE)));
+      }
+
+      test_andc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_andc: ", i, a0[i], (short)((short)(ADD_INIT+i)&BIT_MASK));
+      }
+      test_andv(a0, a1, (short)BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_andv: ", i, a0[i], (short)((short)(ADD_INIT+i)&BIT_MASK));
+      }
+      test_anda(a0, a1, a4);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_anda: ", i, a0[i], (short)((short)(ADD_INIT+i)&BIT_MASK));
+      }
+
+      test_orc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_orc: ", i, a0[i], (short)((short)(ADD_INIT+i)|BIT_MASK));
+      }
+      test_orv(a0, a1, (short)BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_orv: ", i, a0[i], (short)((short)(ADD_INIT+i)|BIT_MASK));
+      }
+      test_ora(a0, a1, a4);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_ora: ", i, a0[i], (short)((short)(ADD_INIT+i)|BIT_MASK));
+      }
+
+      test_xorc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_xorc: ", i, a0[i], (short)((short)(ADD_INIT+i)^BIT_MASK));
+      }
+      test_xorv(a0, a1, (short)BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_xorv: ", i, a0[i], (short)((short)(ADD_INIT+i)^BIT_MASK));
+      }
+      test_xora(a0, a1, a4);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_xora: ", i, a0[i], (short)((short)(ADD_INIT+i)^BIT_MASK));
+      }
+
+      test_sllc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc: ", i, a0[i], (short)((short)(ADD_INIT+i)<<VALUE));
+      }
+      test_sllv(a0, a1, VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv: ", i, a0[i], (short)((short)(ADD_INIT+i)<<VALUE));
+      }
+
+      test_srlc(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc: ", i, a0[i], (short)((short)(ADD_INIT+i)>>>VALUE));
+      }
+      test_srlv(a0, a1, VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv: ", i, a0[i], (short)((short)(ADD_INIT+i)>>>VALUE));
+      }
+
+      test_srac(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac: ", i, a0[i], (short)((short)(ADD_INIT+i)>>VALUE));
+      }
+      test_srav(a0, a1, VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav: ", i, a0[i], (short)((short)(ADD_INIT+i)>>VALUE));
+      }
+
+      test_sllc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_n: ", i, a0[i], (short)((short)(ADD_INIT+i)<<(-VALUE)));
+      }
+      test_sllv(a0, a1, -VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_n: ", i, a0[i], (short)((short)(ADD_INIT+i)<<(-VALUE)));
+      }
+
+      test_srlc_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_n: ", i, a0[i], (short)((short)(ADD_INIT+i)>>>(-VALUE)));
+      }
+      test_srlv(a0, a1, -VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_n: ", i, a0[i], (short)((short)(ADD_INIT+i)>>>(-VALUE)));
+      }
+
+      test_srac_n(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_n: ", i, a0[i], (short)((short)(ADD_INIT+i)>>(-VALUE)));
+      }
+      test_srav(a0, a1, -VALUE);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_n: ", i, a0[i], (short)((short)(ADD_INIT+i)>>(-VALUE)));
+      }
+
+      test_sllc_o(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_o: ", i, a0[i], (short)((short)(ADD_INIT+i)<<SHIFT));
+      }
+      test_sllv(a0, a1, SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_o: ", i, a0[i], (short)((short)(ADD_INIT+i)<<SHIFT));
+      }
+
+      test_srlc_o(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_o: ", i, a0[i], (short)((short)(ADD_INIT+i)>>>SHIFT));
+      }
+      test_srlv(a0, a1, SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_o: ", i, a0[i], (short)((short)(ADD_INIT+i)>>>SHIFT));
+      }
+
+      test_srac_o(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_o: ", i, a0[i], (short)((short)(ADD_INIT+i)>>SHIFT));
+      }
+      test_srav(a0, a1, SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_o: ", i, a0[i], (short)((short)(ADD_INIT+i)>>SHIFT));
+      }
+
+      test_sllc_on(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_on: ", i, a0[i], (short)((short)(ADD_INIT+i)<<(-SHIFT)));
+      }
+      test_sllv(a0, a1, -SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_on: ", i, a0[i], (short)((short)(ADD_INIT+i)<<(-SHIFT)));
+      }
+
+      test_srlc_on(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_on: ", i, a0[i], (short)((short)(ADD_INIT+i)>>>(-SHIFT)));
+      }
+      test_srlv(a0, a1, -SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_on: ", i, a0[i], (short)((short)(ADD_INIT+i)>>>(-SHIFT)));
+      }
+
+      test_srac_on(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_on: ", i, a0[i], (short)((short)(ADD_INIT+i)>>(-SHIFT)));
+      }
+      test_srav(a0, a1, -SHIFT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_on: ", i, a0[i], (short)((short)(ADD_INIT+i)>>(-SHIFT)));
+      }
+
+      test_pack2(p2, a1);
+      for (int i=0; i<ARRLEN/2; i++) {
+        errn += verify("test_pack2: ", i, p2[i], ((int)(ADD_INIT+2*i) & 0xFFFF) | ((int)(ADD_INIT+2*i+1) << 16));
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a0[i] = -1;
+      }
+      test_unpack2(a0, p2);
+      for (int i=0; i<(ARRLEN&(-2)); i++) {
+        errn += verify("test_unpack2: ", i, a0[i], (short)(ADD_INIT+i));
+      }
+
+      test_pack2_swap(p2, a1);
+      for (int i=0; i<ARRLEN/2; i++) {
+        errn += verify("test_pack2_swap: ", i, p2[i], ((int)(ADD_INIT+2*i+1) & 0xFFFF) | ((int)(ADD_INIT+2*i) << 16));
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a0[i] = -1;
+      }
+      test_unpack2_swap(a0, p2);
+      for (int i=0; i<(ARRLEN&(-2)); i++) {
+        errn += verify("test_unpack2_swap: ", i, a0[i], (short)(ADD_INIT+i));
+      }
+
+      test_pack4(p4, a1);
+      for (int i=0; i<ARRLEN/4; i++) {
+        errn += verify("test_pack4: ", i, p4[i],  ((long)(ADD_INIT+4*i+0) & 0xFFFFl) |
+                                                 (((long)(ADD_INIT+4*i+1) & 0xFFFFl) << 16)  |
+                                                 (((long)(ADD_INIT+4*i+2) & 0xFFFFl) << 32)  |
+                                                 (((long)(ADD_INIT+4*i+3) & 0xFFFFl) << 48));
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a0[i] = -1;
+      }
+      test_unpack4(a0, p4);
+      for (int i=0; i<(ARRLEN&(-4)); i++) {
+        errn += verify("test_unpack4: ", i, a0[i], (short)(ADD_INIT+i));
+      }
+
+      test_pack4_swap(p4, a1);
+      for (int i=0; i<ARRLEN/4; i++) {
+        errn += verify("test_pack4_swap: ", i, p4[i],  ((long)(ADD_INIT+4*i+3) & 0xFFFFl) |
+                                                      (((long)(ADD_INIT+4*i+2) & 0xFFFFl) << 16)  |
+                                                      (((long)(ADD_INIT+4*i+1) & 0xFFFFl) << 32)  |
+                                                      (((long)(ADD_INIT+4*i+0) & 0xFFFFl) << 48));
+      }
+      for (int i=0; i<ARRLEN; i++) {
+        a0[i] = -1;
+      }
+      test_unpack4_swap(a0, p4);
+      for (int i=0; i<(ARRLEN&(-4)); i++) {
+        errn += verify("test_unpack4_swap: ", i, a0[i], (short)(ADD_INIT+i));
+      }
+
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sum(a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sum: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addv(a0, a1, (short)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_adda(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_adda: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_subc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_subc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_subv(a0, a1, (short)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_subv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_suba(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_suba: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulv(a0, a1, (short)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mula(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mula: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divv(a0, a1, (short)VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_diva(a0, a1, a2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_diva: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mulv(a0, a1, (short)-VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mulv_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_mula(a0, a1, a3);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_mula_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divv(a0, a1, (short)-VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divv_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_diva(a0, a1, a3);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_diva_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_andc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_andc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_andv(a0, a1, (short)BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_andv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_anda(a0, a1, a4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_anda: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_orc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_orc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_orv(a0, a1, (short)BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_orv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_ora(a0, a1, a4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_ora: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_xorc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_xorc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_xorv(a0, a1, (short)BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_xorv: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_xora(a0, a1, a4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_xora: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_n(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_n: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, -VALUE);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_n: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_o(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_o: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_o: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_o(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_o: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_o: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_o(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_o: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_o: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_on(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_on: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv(a0, a1, -SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_on: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_on(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_on: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv(a0, a1, -SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_on: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_on(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_on: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav(a0, a1, -SHIFT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_on: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_pack2(p2, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_pack2: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unpack2(a0, p2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unpack2: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_pack2_swap(p2, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_pack2_swap: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unpack2_swap(a0, p2);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unpack2_swap: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_pack4(p4, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_pack4: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unpack4(a0, p4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unpack4: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_pack4_swap(p4, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_pack4_swap: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unpack4_swap(a0, p4);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unpack4_swap: " + (end - start));
+
+    return errn;
+  }
+
+  static int test_sum(short[] a1) {
+    int sum = 0;
+    for (int i = 0; i < a1.length; i+=1) {
+      sum += a1[i];
+    }
+    return sum;
+  }
+
+  static void test_addc(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]+VALUE);
+    }
+  }
+  static void test_addv(short[] a0, short[] a1, short b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]+b);
+    }
+  }
+  static void test_adda(short[] a0, short[] a1, short[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]+a2[i]);
+    }
+  }
+
+  static void test_subc(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]-VALUE);
+    }
+  }
+  static void test_subv(short[] a0, short[] a1, short b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]-b);
+    }
+  }
+  static void test_suba(short[] a0, short[] a1, short[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]-a2[i]);
+    }
+  }
+
+  static void test_mulc(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]*VALUE);
+    }
+  }
+  static void test_mulc_n(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]*(-VALUE));
+    }
+  }
+  static void test_mulv(short[] a0, short[] a1, short b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]*b);
+    }
+  }
+  static void test_mula(short[] a0, short[] a1, short[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]*a2[i]);
+    }
+  }
+
+  static void test_divc(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]/VALUE);
+    }
+  }
+  static void test_divc_n(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]/(-VALUE));
+    }
+  }
+  static void test_divv(short[] a0, short[] a1, short b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]/b);
+    }
+  }
+  static void test_diva(short[] a0, short[] a1, short[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]/a2[i]);
+    }
+  }
+
+  static void test_andc(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]&BIT_MASK);
+    }
+  }
+  static void test_andv(short[] a0, short[] a1, short b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]&b);
+    }
+  }
+  static void test_anda(short[] a0, short[] a1, short[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]&a2[i]);
+    }
+  }
+
+  static void test_orc(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]|BIT_MASK);
+    }
+  }
+  static void test_orv(short[] a0, short[] a1, short b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]|b);
+    }
+  }
+  static void test_ora(short[] a0, short[] a1, short[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]|a2[i]);
+    }
+  }
+
+  static void test_xorc(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]^BIT_MASK);
+    }
+  }
+  static void test_xorv(short[] a0, short[] a1, short b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]^b);
+    }
+  }
+  static void test_xora(short[] a0, short[] a1, short[] a2) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]^a2[i]);
+    }
+  }
+
+  static void test_sllc(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]<<VALUE);
+    }
+  }
+  static void test_sllc_n(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]<<(-VALUE));
+    }
+  }
+  static void test_sllc_o(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]<<SHIFT);
+    }
+  }
+  static void test_sllc_on(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]<<(-SHIFT));
+    }
+  }
+  static void test_sllv(short[] a0, short[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]<<b);
+    }
+  }
+
+  static void test_srlc(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]>>>VALUE);
+    }
+  }
+  static void test_srlc_n(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]>>>(-VALUE));
+    }
+  }
+  static void test_srlc_o(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]>>>SHIFT);
+    }
+  }
+  static void test_srlc_on(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]>>>(-SHIFT));
+    }
+  }
+  static void test_srlv(short[] a0, short[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]>>>b);
+    }
+  }
+
+  static void test_srac(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]>>VALUE);
+    }
+  }
+  static void test_srac_n(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]>>(-VALUE));
+    }
+  }
+  static void test_srac_o(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]>>SHIFT);
+    }
+  }
+  static void test_srac_on(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]>>(-SHIFT));
+    }
+  }
+  static void test_srav(short[] a0, short[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]>>b);
+    }
+  }
+
+  static void test_pack2(int[] p2, short[] a1) {
+    if (p2.length*2 > a1.length) return;
+    for (int i = 0; i < p2.length; i+=1) {
+      int l0 = (int)a1[i*2+0];
+      int l1 = (int)a1[i*2+1];
+      p2[i] = (l1 << 16) | (l0 & 0xFFFF);
+    }
+  }
+  static void test_unpack2(short[] a0, int[] p2) {
+    if (p2.length*2 > a0.length) return;
+    for (int i = 0; i < p2.length; i+=1) {
+      int l = p2[i];
+      a0[i*2+0] = (short)(l & 0xFFFF);
+      a0[i*2+1] = (short)(l >> 16);
+    }
+  }
+  static void test_pack2_swap(int[] p2, short[] a1) {
+    if (p2.length*2 > a1.length) return;
+    for (int i = 0; i < p2.length; i+=1) {
+      int l0 = (int)a1[i*2+0];
+      int l1 = (int)a1[i*2+1];
+      p2[i] = (l0 << 16) | (l1 & 0xFFFF);
+    }
+  }
+  static void test_unpack2_swap(short[] a0, int[] p2) {
+    if (p2.length*2 > a0.length) return;
+    for (int i = 0; i < p2.length; i+=1) {
+      int l = p2[i];
+      a0[i*2+0] = (short)(l >> 16);
+      a0[i*2+1] = (short)(l & 0xFFFF);
+    }
+  }
+
+  static void test_pack4(long[] p4, short[] a1) {
+    if (p4.length*4 > a1.length) return;
+    for (int i = 0; i < p4.length; i+=1) {
+      long l0 = (long)a1[i*4+0];
+      long l1 = (long)a1[i*4+1];
+      long l2 = (long)a1[i*4+2];
+      long l3 = (long)a1[i*4+3];
+      p4[i] = (l0 & 0xFFFFl) |
+             ((l1 & 0xFFFFl) << 16) |
+             ((l2 & 0xFFFFl) << 32) |
+             ((l3 & 0xFFFFl) << 48);
+    }
+  }
+  static void test_unpack4(short[] a0, long[] p4) {
+    if (p4.length*4 > a0.length) return;
+    for (int i = 0; i < p4.length; i+=1) {
+      long l = p4[i];
+      a0[i*4+0] = (short)(l & 0xFFFFl);
+      a0[i*4+1] = (short)(l >> 16);
+      a0[i*4+2] = (short)(l >> 32);
+      a0[i*4+3] = (short)(l >> 48);
+    }
+  }
+  static void test_pack4_swap(long[] p4, short[] a1) {
+    if (p4.length*4 > a1.length) return;
+    for (int i = 0; i < p4.length; i+=1) {
+      long l0 = (long)a1[i*4+0];
+      long l1 = (long)a1[i*4+1];
+      long l2 = (long)a1[i*4+2];
+      long l3 = (long)a1[i*4+3];
+      p4[i] = (l3 & 0xFFFFl) |
+             ((l2 & 0xFFFFl) << 16) |
+             ((l1 & 0xFFFFl) << 32) |
+             ((l0 & 0xFFFFl) << 48);
+    }
+  }
+  static void test_unpack4_swap(short[] a0, long[] p4) {
+    if (p4.length*4 > a0.length) return;
+    for (int i = 0; i < p4.length; i+=1) {
+      long l = p4[i];
+      a0[i*4+0] = (short)(l >> 48);
+      a0[i*4+1] = (short)(l >> 32);
+      a0[i*4+2] = (short)(l >> 16);
+      a0[i*4+3] = (short)(l & 0xFFFFl);
+    }
+  }
+
+  static int verify(String text, int i, short elem, short val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+
+  static int verify(String text, int i, int elem, int val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + Integer.toHexString(elem) + " != " + Integer.toHexString(val));
+      return 1;
+    }
+    return 0;
+  }
+
+  static int verify(String text, int i, long elem, long val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + Long.toHexString(elem) + " != " + Long.toHexString(val));
+      return 1;
+    }
+    return 0;
+  }
+}
--- a/test/compiler/6894807/Test6894807.sh	Thu Aug 23 12:27:33 2012 -0700
+++ b/test/compiler/6894807/Test6894807.sh	Fri Aug 24 15:51:19 2012 -0700
@@ -21,7 +21,7 @@
 # set platform-dependent variables
 OS=`uname -s`
 case "$OS" in
-  SunOS | Linux )
+  SunOS | Linux | Darwin )
     NULL=/dev/null
     PS=":"
     FS="/"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7190310/Test7190310.java	Fri Aug 24 15:51:19 2012 -0700
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/*
+ * Manual test
+ */
+
+import java.lang.ref.*;
+
+public class Test7190310 {
+  private static Object str = new Object() {
+    public String toString() {
+      return "The Object";
+    }
+
+    protected void finalize() throws Throwable {
+      System.out.println("The Object is being finalized");
+      super.finalize();
+    }
+  };
+  private final static ReferenceQueue<Object> rq =
+      new ReferenceQueue<Object>();
+  private final static WeakReference<Object> wr =
+      new WeakReference<Object>(str, rq);
+
+  public static void main(String[] args)
+      throws InterruptedException {
+    Thread reader = new Thread() {
+      public void run() {
+        while (wr.get() != null) {
+        }
+        System.out.println("wr.get() returned null");
+      }
+    };
+
+    Thread queueReader = new Thread() {
+      public void run() {
+        try {
+          Reference<? extends Object> ref = rq.remove();
+          System.out.println(ref);
+          System.out.println("queueReader returned, ref==wr is "
+              + (ref == wr));
+        } catch (InterruptedException e) {
+          System.err.println("Sleep interrupted - exiting");
+        }
+      }
+    };
+
+    reader.start();
+    queueReader.start();
+
+    Thread.sleep(1000);
+    str = null;
+    System.gc();
+  }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7190310/Test7190310_unsafe.java	Fri Aug 24 15:51:19 2012 -0700
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/*
+ * @test
+ * @bug 7190310
+ * @summary Inlining WeakReference.get(), and hoisting $referent may lead to non-terminating loops
+ * @run main/othervm -Xbatch Test7190310_unsafe
+ */
+
+import java.lang.ref.*;
+import java.lang.reflect.*;
+import sun.misc.Unsafe;
+
+public class Test7190310_unsafe {
+
+  static class TestObject {
+    public String toString() {
+      return "TestObject";
+    }
+  };
+
+  private static TestObject str = new TestObject();
+  private static final WeakReference ref = new WeakReference(str);
+
+  private TestObject obj;
+
+  public static void main(String[] args) throws Exception {
+    Class c = Test7190310_unsafe.class.getClassLoader().loadClass("sun.misc.Unsafe");
+    Field f = c.getDeclaredField("theUnsafe");
+    f.setAccessible(true);
+    Unsafe unsafe = (Unsafe)f.get(c);
+
+    f = Reference.class.getDeclaredField("referent");
+    f.setAccessible(true);
+    long referent_offset = unsafe.objectFieldOffset(f);
+
+    Test7190310_unsafe t = new Test7190310_unsafe();
+    TestObject o = new TestObject();
+    t.obj = o;
+
+    // Warmup (compile methods)
+    System.err.println("Warmup");
+    Object obj = null;
+    for (int i = 0; i < 11000; i++) {
+      obj = getRef0(ref);
+    }
+    for (int i = 0; i < 11000; i++) {
+      obj = getRef1(unsafe, ref, referent_offset);
+    }
+    for (int i = 0; i < 11000; i++) {
+      obj = getRef2(unsafe, ref, referent_offset);
+    }
+    for (int i = 0; i < 11000; i++) {
+      obj = getRef3(unsafe, ref, referent_offset);
+    }
+    for (int i = 0; i < 11000; i++) {
+      obj = getRef4(unsafe, t, referent_offset);
+    }
+
+    // Access verification
+    System.err.println("Verification");
+    if (!verifyGet(referent_offset, unsafe)) {
+      System.exit(97);
+    }
+
+    obj = getRef3(unsafe, t, referent_offset);
+    if (obj != o) {
+      System.out.println("FAILED: unsafe.getObject(Object, " + referent_offset + ") " + obj + " != " + o);
+      System.exit(97);
+    }
+    obj = getRef4(unsafe, t, referent_offset);
+    if (obj != o) {
+      System.out.println("FAILED: unsafe.getObject(Test7190310, " + referent_offset + ") " + obj + " != " + o);
+      System.exit(97);
+    }
+  }
+
+  static boolean verifyGet(long referent_offset, Unsafe unsafe) throws Exception {
+    // Access verification
+    System.out.println("referent: " + str);
+    Object obj = getRef0(ref);
+    if (obj != str) {
+      System.out.println("FAILED: weakRef.get() " + obj + " != " + str);
+      return false;
+    }
+    obj = getRef1(unsafe, ref, referent_offset);
+    if (obj != str) {
+      System.out.println("FAILED: unsafe.getObject(weakRef, " + referent_offset + ") " + obj + " != " + str);
+      return false;
+    }
+    obj = getRef2(unsafe, ref, referent_offset);
+    if (obj != str) {
+      System.out.println("FAILED: unsafe.getObject(abstRef, " + referent_offset + ") " + obj + " != " + str);
+      return false;
+    }
+    obj = getRef3(unsafe, ref, referent_offset);
+    if (obj != str) {
+      System.out.println("FAILED: unsafe.getObject(Object, " + referent_offset + ") " + obj + " != " + str);
+      return false;
+    }
+    return true;
+  }
+
+  static Object getRef0(WeakReference ref) throws Exception {
+    return ref.get();
+  }
+  static Object getRef1(Unsafe unsafe, WeakReference ref, long referent_offset) throws Exception {
+    return unsafe.getObject(ref, referent_offset);
+  }
+  static Object getRef2(Unsafe unsafe, Reference ref, long referent_offset) throws Exception {
+    return unsafe.getObject(ref, referent_offset);
+  }
+  static Object getRef3(Unsafe unsafe, Object ref, long referent_offset) throws Exception {
+    return unsafe.getObject(ref, referent_offset);
+  }
+  static Object getRef4(Unsafe unsafe, Test7190310_unsafe ref, long referent_offset) throws Exception {
+    return unsafe.getObject(ref, referent_offset);
+  }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7192963/TestByteVect.java	Fri Aug 24 15:51:19 2012 -0700
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7192963
+ * @summary assert(_in[req-1] == this) failed: Must pass arg count to 'new'
+ *
+ * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestByteVect
+ */
+
+public class TestByteVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  public static void main(String args[]) {
+    System.out.println("Testing Byte vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    byte[] a0 = new byte[ARRLEN];
+    byte[] a1 = new byte[ARRLEN];
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = (byte)i;
+    }
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_init(a0);
+      test_addi(a0, a1);
+      test_lsai(a0, a1);
+      test_unrl_init(a0);
+      test_unrl_addi(a0, a1);
+      test_unrl_lsai(a0, a1);
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_init(a0);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_init: ", i, a0[i], (byte)(i&3));
+      }
+      test_addi(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_addi: ", i, a0[i], (byte)(i+(i&3)));
+      }
+      test_lsai(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_lsai: ", i, a0[i], (byte)(i<<(i&3)));
+      }
+      test_unrl_init(a0);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_init: ", i, a0[i], (byte)(i&3));
+      }
+      test_unrl_addi(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_addi: ", i, a0[i], (byte)(i+(i&3)));
+      }
+      test_unrl_lsai(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_lsai: ", i, a0[i], (byte)(i<<(i&3)));
+      }
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_init(a0);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_init: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addi(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_lsai(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_lsai: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_init(a0);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_init: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_addi(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_addi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_lsai(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_lsai: " + (end - start));
+
+    return errn;
+  }
+
+  static void test_init(byte[] a0) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(i&3);
+    }
+  }
+  static void test_addi(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]+(i&3));
+    }
+  }
+  static void test_lsai(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)(a1[i]<<(i&3));
+    }
+  }
+  static void test_unrl_init(byte[] a0) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = 0;
+      a0[i+1] = 1;
+      a0[i+2] = 2;
+      a0[i+3] = 3;
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = (byte)(i&3);
+    }
+  }
+  static void test_unrl_addi(byte[] a0, byte[] a1) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = (byte)(a1[i+0]+0);
+      a0[i+1] = (byte)(a1[i+1]+1);
+      a0[i+2] = (byte)(a1[i+2]+2);
+      a0[i+3] = (byte)(a1[i+3]+3);
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = (byte)(a1[i]+(i&3));
+    }
+  }
+  static void test_unrl_lsai(byte[] a0, byte[] a1) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = (byte)(a1[i+0]<<0);
+      a0[i+1] = (byte)(a1[i+1]<<1);
+      a0[i+2] = (byte)(a1[i+2]<<2);
+      a0[i+3] = (byte)(a1[i+3]<<3);
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = (byte)(a1[i]<<(i&3));
+    }
+  }
+
+  static int verify(String text, int i, byte elem, byte val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7192963/TestDoubleVect.java	Fri Aug 24 15:51:19 2012 -0700
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7192963
+ * @summary assert(_in[req-1] == this) failed: Must pass arg count to 'new'
+ *
+ * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestDoubleVect
+ */
+
+public class TestDoubleVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  public static void main(String args[]) {
+    System.out.println("Testing Double vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    double[] a0 = new double[ARRLEN];
+    double[] a1 = new double[ARRLEN];
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = (double)i;
+    }
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_init(a0);
+      test_addi(a0, a1);
+      test_divi(a0, a1);
+      test_unrl_init(a0);
+      test_unrl_addi(a0, a1);
+      test_unrl_divi(a0, a1);
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_init(a0);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_init: ", i, a0[i], (double)(i&3));
+      }
+      test_addi(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_addi: ", i, a0[i], (double)(i+(i&3)));
+      }
+      test_divi(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divi: ", i, a0[i], (double)i/(double)((i&3)+1));
+      }
+      test_unrl_init(a0);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_init: ", i, a0[i], (double)(i&3));
+      }
+      test_unrl_addi(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_addi: ", i, a0[i], (double)(i+(i&3)));
+      }
+      test_unrl_divi(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_divi: ", i, a0[i], (double)i/(double)((i&3)+1));
+      }
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_init(a0);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_init: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addi(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divi(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_init(a0);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_init: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_addi(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_addi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_divi(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_divi: " + (end - start));
+
+    return errn;
+  }
+
+  static void test_init(double[] a0) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (double)(i&3);
+    }
+  }
+  static void test_addi(double[] a0, double[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = a1[i]+(double)(i&3);
+    }
+  }
+  static void test_divi(double[] a0, double[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = a1[i]/(double)((i&3)+1);
+    }
+  }
+  static void test_unrl_init(double[] a0) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = 0.;
+      a0[i+1] = 1.;
+      a0[i+2] = 2.;
+      a0[i+3] = 3.;
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = (double)(i&3);
+    }
+  }
+  static void test_unrl_addi(double[] a0, double[] a1) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = a1[i+0]+0.;
+      a0[i+1] = a1[i+1]+1.;
+      a0[i+2] = a1[i+2]+2.;
+      a0[i+3] = a1[i+3]+3.;
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = a1[i]+(double)(i&3);
+    }
+  }
+  static void test_unrl_divi(double[] a0, double[] a1) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = a1[i+0]/1.;
+      a0[i+1] = a1[i+1]/2.;
+      a0[i+2] = a1[i+2]/3.;
+      a0[i+3] = a1[i+3]/4.;
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = a1[i]/(double)((i&3)+1);
+    }
+  }
+
+  static int verify(String text, int i, double elem, double val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7192963/TestFloatVect.java	Fri Aug 24 15:51:19 2012 -0700
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7192963
+ * @summary assert(_in[req-1] == this) failed: Must pass arg count to 'new'
+ *
+ * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestFloatVect
+ */
+
+public class TestFloatVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  public static void main(String args[]) {
+    System.out.println("Testing Float vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    float[] a0 = new float[ARRLEN];
+    float[] a1 = new float[ARRLEN];
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = (float)i;
+    }
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_init(a0);
+      test_addi(a0, a1);
+      test_divi(a0, a1);
+      test_unrl_init(a0);
+      test_unrl_addi(a0, a1);
+      test_unrl_divi(a0, a1);
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_init(a0);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_init: ", i, a0[i], (float)(i&3));
+      }
+      test_addi(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_addi: ", i, a0[i], (float)(i+(i&3)));
+      }
+      test_divi(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_divi: ", i, a0[i], (float)i/(float)((i&3)+1));
+      }
+      test_unrl_init(a0);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_init: ", i, a0[i], (float)(i&3));
+      }
+      test_unrl_addi(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_addi: ", i, a0[i], (float)(i+(i&3)));
+      }
+      test_unrl_divi(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_divi: ", i, a0[i], (float)i/(float)((i&3)+1));
+      }
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_init(a0);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_init: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addi(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_divi(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_divi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_init(a0);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_init: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_addi(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_addi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_divi(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_divi: " + (end - start));
+
+    return errn;
+  }
+
+  static void test_init(float[] a0) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (float)(i&3);
+    }
+  }
+  static void test_addi(float[] a0, float[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = a1[i]+(float)(i&3);
+    }
+  }
+  static void test_divi(float[] a0, float[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = a1[i]/(float)((i&3)+1);
+    }
+  }
+  static void test_unrl_init(float[] a0) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = 0.f;
+      a0[i+1] = 1.f;
+      a0[i+2] = 2.f;
+      a0[i+3] = 3.f;
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = (float)(i&3);
+    }
+  }
+  static void test_unrl_addi(float[] a0, float[] a1) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = a1[i+0]+0.f;
+      a0[i+1] = a1[i+1]+1.f;
+      a0[i+2] = a1[i+2]+2.f;
+      a0[i+3] = a1[i+3]+3.f;
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = a1[i]+(float)(i&3);
+    }
+  }
+  static void test_unrl_divi(float[] a0, float[] a1) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = a1[i+0]/1.f;
+      a0[i+1] = a1[i+1]/2.f;
+      a0[i+2] = a1[i+2]/3.f;
+      a0[i+3] = a1[i+3]/4.f;
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = a1[i]/(float)((i&3)+1);
+    }
+  }
+
+  static int verify(String text, int i, float elem, float val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7192963/TestIntVect.java	Fri Aug 24 15:51:19 2012 -0700
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7192963
+ * @summary assert(_in[req-1] == this) failed: Must pass arg count to 'new'
+ *
+ * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestIntVect
+ */
+
+public class TestIntVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  public static void main(String args[]) {
+    System.out.println("Testing Integer vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    int[] a0 = new int[ARRLEN];
+    int[] a1 = new int[ARRLEN];
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = i;
+    }
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_init(a0);
+      test_addi(a0, a1);
+      test_lsai(a0, a1);
+      test_unrl_init(a0);
+      test_unrl_addi(a0, a1);
+      test_unrl_lsai(a0, a1);
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_init(a0);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_init: ", i, a0[i], (i&3));
+      }
+      test_addi(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_addi: ", i, a0[i], (i+(i&3)));
+      }
+      test_lsai(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_lsai: ", i, a0[i], (i<<(i&3)));
+      }
+      test_unrl_init(a0);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_init: ", i, a0[i], (i&3));
+      }
+      test_unrl_addi(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_addi: ", i, a0[i], (i+(i&3)));
+      }
+      test_unrl_lsai(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_lsai: ", i, a0[i], (i<<(i&3)));
+      }
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_init(a0);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_init: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addi(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_lsai(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_lsai: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_init(a0);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_init: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_addi(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_addi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_lsai(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_lsai: " + (end - start));
+
+    return errn;
+  }
+
+  static void test_init(int[] a0) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (i&3);
+    }
+  }
+  static void test_addi(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = a1[i]+(i&3);
+    }
+  }
+  static void test_lsai(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = a1[i]<<(i&3);
+    }
+  }
+  static void test_unrl_init(int[] a0) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = 0;
+      a0[i+1] = 1;
+      a0[i+2] = 2;
+      a0[i+3] = 3;
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = (i&3);
+    }
+  }
+  static void test_unrl_addi(int[] a0, int[] a1) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = a1[i+0]+0;
+      a0[i+1] = a1[i+1]+1;
+      a0[i+2] = a1[i+2]+2;
+      a0[i+3] = a1[i+3]+3;
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = a1[i]+(i&3);
+    }
+  }
+  static void test_unrl_lsai(int[] a0, int[] a1) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = a1[i+0]<<0;
+      a0[i+1] = a1[i+1]<<1;
+      a0[i+2] = a1[i+2]<<2;
+      a0[i+3] = a1[i+3]<<3;
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = a1[i]<<(i&3);
+    }
+  }
+
+  static int verify(String text, int i, int elem, int val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7192963/TestLongVect.java	Fri Aug 24 15:51:19 2012 -0700
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7192963
+ * @summary assert(_in[req-1] == this) failed: Must pass arg count to 'new'
+ *
+ * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestLongVect
+ */
+
+public class TestLongVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  public static void main(String args[]) {
+    System.out.println("Testing Long vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    long[] a0 = new long[ARRLEN];
+    long[] a1 = new long[ARRLEN];
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = (long)i;
+    }
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_init(a0);
+      test_addi(a0, a1);
+      test_lsai(a0, a1);
+      test_unrl_init(a0);
+      test_unrl_addi(a0, a1);
+      test_unrl_lsai(a0, a1);
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_init(a0);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_init: ", i, a0[i], (long)(i&3));
+      }
+      test_addi(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_addi: ", i, a0[i], (long)(i+(i&3)));
+      }
+      test_lsai(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_lsai: ", i, a0[i], (long)(i<<(i&3)));
+      }
+      test_unrl_init(a0);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_init: ", i, a0[i], (long)(i&3));
+      }
+      test_unrl_addi(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_addi: ", i, a0[i], (long)(i+(i&3)));
+      }
+      test_unrl_lsai(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_lsai: ", i, a0[i], (long)(i<<(i&3)));
+      }
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_init(a0);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_init: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addi(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_lsai(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_lsai: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_init(a0);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_init: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_addi(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_addi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_lsai(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_lsai: " + (end - start));
+
+    return errn;
+  }
+
+  static void test_init(long[] a0) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(i&3);
+    }
+  }
+  static void test_addi(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]+(i&3));
+    }
+  }
+  static void test_lsai(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)(a1[i]<<(i&3));
+    }
+  }
+  static void test_unrl_init(long[] a0) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = 0;
+      a0[i+1] = 1;
+      a0[i+2] = 2;
+      a0[i+3] = 3;
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = (long)(i&3);
+    }
+  }
+  static void test_unrl_addi(long[] a0, long[] a1) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = (long)(a1[i+0]+0);
+      a0[i+1] = (long)(a1[i+1]+1);
+      a0[i+2] = (long)(a1[i+2]+2);
+      a0[i+3] = (long)(a1[i+3]+3);
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = (long)(a1[i]+(i&3));
+    }
+  }
+  static void test_unrl_lsai(long[] a0, long[] a1) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = (long)(a1[i+0]<<0);
+      a0[i+1] = (long)(a1[i+1]<<1);
+      a0[i+2] = (long)(a1[i+2]<<2);
+      a0[i+3] = (long)(a1[i+3]<<3);
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = (long)(a1[i]<<(i&3));
+    }
+  }
+
+  static int verify(String text, int i, long elem, long val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7192963/TestShortVect.java	Fri Aug 24 15:51:19 2012 -0700
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7192963
+ * @summary assert(_in[req-1] == this) failed: Must pass arg count to 'new'
+ *
+ * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestShortVect
+ */
+
+public class TestShortVect {
+  private static final int ARRLEN = 997;
+  private static final int ITERS  = 11000;
+  public static void main(String args[]) {
+    System.out.println("Testing Short vectors");
+    int errn = test();
+    if (errn > 0) {
+      System.err.println("FAILED: " + errn + " errors");
+      System.exit(97);
+    }
+    System.out.println("PASSED");
+  }
+
+  static int test() {
+    short[] a0 = new short[ARRLEN];
+    short[] a1 = new short[ARRLEN];
+    // Initialize
+    for (int i=0; i<ARRLEN; i++) {
+      a1[i] = (short)i;
+    }
+    System.out.println("Warmup");
+    for (int i=0; i<ITERS; i++) {
+      test_init(a0);
+      test_addi(a0, a1);
+      test_lsai(a0, a1);
+      test_unrl_init(a0);
+      test_unrl_addi(a0, a1);
+      test_unrl_lsai(a0, a1);
+    }
+    // Test and verify results
+    System.out.println("Verification");
+    int errn = 0;
+    {
+      test_init(a0);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_init: ", i, a0[i], (short)(i&3));
+      }
+      test_addi(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_addi: ", i, a0[i], (short)(i+(i&3)));
+      }
+      test_lsai(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_lsai: ", i, a0[i], (short)(i<<(i&3)));
+      }
+      test_unrl_init(a0);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_init: ", i, a0[i], (short)(i&3));
+      }
+      test_unrl_addi(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_addi: ", i, a0[i], (short)(i+(i&3)));
+      }
+      test_unrl_lsai(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_unrl_lsai: ", i, a0[i], (short)(i<<(i&3)));
+      }
+    }
+
+    if (errn > 0)
+      return errn;
+
+    System.out.println("Time");
+    long start, end;
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_init(a0);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_init: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_addi(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_addi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_lsai(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_lsai: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_init(a0);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_init: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_addi(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_addi: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_unrl_lsai(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_unrl_lsai: " + (end - start));
+
+    return errn;
+  }
+
+  static void test_init(short[] a0) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(i&3);
+    }
+  }
+  static void test_addi(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]+(i&3));
+    }
+  }
+  static void test_lsai(short[] a0, short[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (short)(a1[i]<<(i&3));
+    }
+  }
+  static void test_unrl_init(short[] a0) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = 0;
+      a0[i+1] = 1;
+      a0[i+2] = 2;
+      a0[i+3] = 3;
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = (short)(i&3);
+    }
+  }
+  static void test_unrl_addi(short[] a0, short[] a1) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = (short)(a1[i+0]+0);
+      a0[i+1] = (short)(a1[i+1]+1);
+      a0[i+2] = (short)(a1[i+2]+2);
+      a0[i+3] = (short)(a1[i+3]+3);
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = (short)(a1[i]+(i&3));
+    }
+  }
+  static void test_unrl_lsai(short[] a0, short[] a1) {
+    int i = 0;
+    for (; i < a0.length-4; i+=4) {
+      a0[i+0] = (short)(a1[i+0]<<0);
+      a0[i+1] = (short)(a1[i+1]<<1);
+      a0[i+2] = (short)(a1[i+2]<<2);
+      a0[i+3] = (short)(a1[i+3]<<3);
+    }
+    for (; i < a0.length; i++) {
+      a0[i] = (short)(a1[i]<<(i&3));
+    }
+  }
+
+  static int verify(String text, int i, short elem, short val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+}
+
--- a/test/gc/6941923/test6941923.sh	Thu Aug 23 12:27:33 2012 -0700
+++ b/test/gc/6941923/test6941923.sh	Fri Aug 24 15:51:19 2012 -0700
@@ -9,7 +9,7 @@
 ## skip on windows
 OS=`uname -s`
 case "$OS" in
-  SunOS | Linux )
+  SunOS | Linux | Darwin )
     NULL=/dev/null
     PS=":"
     FS="/"
--- a/test/runtime/6626217/Test6626217.sh	Thu Aug 23 12:27:33 2012 -0700
+++ b/test/runtime/6626217/Test6626217.sh	Fri Aug 24 15:51:19 2012 -0700
@@ -49,7 +49,7 @@
 # set platform-dependent variables
 OS=`uname -s`
 case "$OS" in
-  SunOS | Linux )
+  SunOS | Linux | Darwin )
     NULL=/dev/null
     PS=":"
     FS="/"
--- a/test/runtime/6878713/Test6878713.sh	Thu Aug 23 12:27:33 2012 -0700
+++ b/test/runtime/6878713/Test6878713.sh	Fri Aug 24 15:51:19 2012 -0700
@@ -28,7 +28,7 @@
 # set platform-dependent variables
 OS=`uname -s`
 case "$OS" in
-  SunOS | Linux )
+  SunOS | Linux | Darwin )
     NULL=/dev/null
     PS=":"
     FS="/"
--- a/test/runtime/6929067/Test6929067.sh	Thu Aug 23 12:27:33 2012 -0700
+++ b/test/runtime/6929067/Test6929067.sh	Fri Aug 24 15:51:19 2012 -0700
@@ -27,17 +27,10 @@
     PS=":"
     FS="/"
     ;;
-  SunOS | Windows_* | *BSD)
-    NULL=NUL
-    PS=";"
-    FS="\\"
+  * )
     echo "Test passed; only valid for Linux"
     exit 0;
     ;;
-  * )
-    echo "Unrecognized system!"
-    exit 1;
-    ;;
 esac
 
 # Choose arch: i386 or amd64 (test is Linux-specific)
--- a/test/runtime/7051189/Xchecksig.sh	Thu Aug 23 12:27:33 2012 -0700
+++ b/test/runtime/7051189/Xchecksig.sh	Fri Aug 24 15:51:19 2012 -0700
@@ -43,7 +43,7 @@
 
 OS=`uname -s`
 case "$OS" in
-  SunOS | Linux )
+  SunOS | Linux | Darwin )
     FS="/"
     ;;
   Windows_* )
--- a/test/runtime/7110720/Test7110720.sh	Thu Aug 23 12:27:33 2012 -0700
+++ b/test/runtime/7110720/Test7110720.sh	Fri Aug 24 15:51:19 2012 -0700
@@ -37,7 +37,7 @@
 # set platform-dependent variables
 OS=`uname -s`
 case "$OS" in
-  SunOS | Linux )
+  SunOS | Linux | Darwin )
     FS="/"
     RM=/bin/rm
     CP=/bin/cp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/7116786/Test7116786.java	Fri Aug 24 15:51:19 2012 -0700
@@ -0,0 +1,486 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/*
+ * @test Test7116786
+ * @summary verify that VerifyError messages are as expected
+ * @library testcases.jar
+ * @run main/othervm -Xverify:all Test7116786
+ */
+
+
+/**
+ * This class contains information regarding when a VerifyError is thrown
+ * in the verifier.  Most of the data is informational-only, and can be
+ * used to track down where and why VerifyErrors are thrown.  As such it
+ * is possible the information may go out-of-date.
+ *
+ * The only fields used for the purpose of testing is the 'caseName' and
+ * the 'message'.  The 'caseName' corresponds to a classfile which exhibits
+ * the VerifyError, and the 'message' is a regular expression which we expect
+ * to match the verify error message.  If the 'message' doesn't match what
+ * we expect, it warrents investigation to see if we are still triggering
+ * the VerifyError that we expect.  It could simply just be that the message
+ * changed, which is fine.
+ *
+ * Some cases are not testable, either because the code is probably unreachable
+ * or the test classfile would be too onerous to create.  These cases are
+ * marked with 'testable' == false, and the test runner will skip them.
+ */
+class Case {
+    private String caseName;    // Name of the case
+    private String file;        // Source file where VerifyError is thrown
+    private String location;    // enclosing function or switch case
+    private String description; // What causes this VerifyError
+    private String message;     // The VerifyError message used.
+
+    private boolean testable;   // Whether this case is testable or not.
+
+    public Case(String caseName, String file, boolean testable,
+                String location, String description, String message) {
+        this.caseName = caseName;
+        this.file = file;
+        this.testable = testable;
+        this.location = location;
+        this.description = description;
+        this.message = message;
+    }
+
+    String getCaseName() { return this.caseName; }
+    String getFile() { return this.file; }
+    String getLocation() { return this.location; }
+    String getDescription() { return this.description; }
+    String getMessage() { return this.message; }
+
+    boolean isTestable() { return this.testable; }
+}
+
+/**
+ * These are the locations in the source code where VerifyErrors are thrown
+ * as of today, 2012/07/18.  These may change as the verification code is
+ * modified, which is ok.  This test is trying to provide coverage for all
+ * VerifyErrors (just to make sure there are no crashes) and it's probably
+ * not necessary to update it every time the VM changes.
+ */
+class VerifyErrorCases {
+    public static final Case[] cases = {
+
+        new Case("case00", "stackMapFrame.cpp", true, "pop_stack_ex",
+                 "stack underflow",
+                 "Operand stack underflow"),
+
+        new Case("case01", "stackMapFrame.cpp", true, "pop_stack_ex",
+                 "stack pop not assignable to expected",
+                 "Bad type on operand stack"),
+
+        new Case("case02", "stackMapFrame.cpp", true, "get_local",
+                 "local index out-of-bounds",
+                 "Local variable table overflow"),
+
+        new Case("case03", "stackMapFrame.cpp", true, "get_local",
+                 "local not assignable to expected",
+                 "Bad local variable type"),
+
+        new Case("case04", "stackMapFrame.cpp", true, "get_local_2",
+                 "local index out-of-bounds [type2]",
+                 "get long/double overflows locals"),
+
+        new Case("case05", "stackMapFrame.cpp", true, "get_local_2",
+                 "local not assignabled to expected [type2]",
+                 "Bad local variable type"),
+
+        /* Unreachable: Can't split long/double on stack */
+        new Case("case06", "stackMapFrame.cpp", false, "get_local_2",
+                 "local second-word not assignabled to expected",
+                 "Bad local variable type"),
+
+        new Case("case07", "stackMapFrame.cpp", true, "set_local",
+                 "local index out-of-bounds",
+                 "Local variable table overflow"),
+
+        new Case("case08", "stackMapFrame.cpp", true, "set_local_2",
+                 "local index out-of-bounds [type2]",
+                 "Local variable table overflow"),
+
+        new Case("case09", "stackMapFrame.hpp", true, "push_stack",
+                 "stack overflow",
+                 "Operand stack overflow"),
+
+        new Case("case10", "stackMapFrame.hpp", true, "push_stack_2",
+                 "stack overflow [type2]",
+                 "Operand stack overflow"),
+
+        new Case("case11", "stackMapFrame.hpp", true, "pop_stack",
+                 "stack underflow",
+                 "Operand stack underflow"),
+
+        new Case("case12", "stackMapTable.cpp", true, "StackMapTable ctor",
+                 "stackmap offset beyond code size",
+                 "StackMapTable error: bad offset"),
+
+        new Case("case13", "stackMapTable.cpp", true, "match_stackmap",
+                 "no stackmap frame at expected location",
+                 "Expecting a stackmap frame at branch target "),
+
+        new Case("case14", "stackMapTable.cpp", true, "check_jump_target",
+                 "no stackmap frame at jump location or bad jump",
+                 "Inconsistent stackmap frames at branch target "),
+
+        new Case("case15", "stackMapTable.cpp", true, "check_new_object",
+                 "backward jump with uninit",
+                 "Uninitialized object exists on backward branch "),
+
+        /* Unreachable: wide instructions verified during bytecode analysis */
+        new Case("case16", "verifier.cpp", false, "loop header",
+                 "bad op in wide instruction",
+                 "Bad wide instruction"),
+
+        new Case("case17", "verifier.cpp", true, "case iaload",
+                 "TOS not X array",
+                 "Bad type on operand stack in iaload"),
+
+        new Case("case18", "verifier.cpp", true, "case baload",
+                 "TOS not X array",
+                 "Bad type on operand stack in baload"),
+
+        new Case("case19", "verifier.cpp", true, "case caload",
+                 "TOS not X array",
+                 "Bad type on operand stack in caload"),
+
+        new Case("case20", "verifier.cpp", true, "case saload",
+                 "TOS not X array",
+                 "Bad type on operand stack in saload"),
+
+        new Case("case21", "verifier.cpp", true, "case laload",
+                 "TOS not X array",
+                 "Bad type on operand stack in laload"),
+
+        new Case("case22", "verifier.cpp", true, "case faload",
+                 "TOS not X array",
+                 "Bad type on operand stack in faload"),
+
+        new Case("case23", "verifier.cpp", true, "case daload",
+                 "TOS not X array",
+                 "Bad type on operand stack in daload"),
+
+        new Case("case24", "verifier.cpp", true, "case aaload",
+                 "TOS not X array",
+                 "Bad type on operand stack in aaload"),
+
+        new Case("case25", "verifier.cpp", true, "case iastore",
+                 "TOS not int array",
+                 "Bad type on operand stack in iastore"),
+
+        new Case("case26", "verifier.cpp", true, "case bastore",
+                 "TOS not byte array",
+                 "Bad type on operand stack in bastore"),
+
+        new Case("case27", "verifier.cpp", true, "case castore",
+                 "TOS not char array",
+                 "Bad type on operand stack in castore"),
+
+        new Case("case28", "verifier.cpp", true, "case sastore",
+                 "TOS not short array",
+                 "Bad type on operand stack in sastore"),
+
+        new Case("case29", "verifier.cpp", true, "case lastore",
+                 "TOS not long array",
+                 "Bad type on operand stack in lastore"),
+
+        new Case("case30", "verifier.cpp", true, "case fastore",
+                 "TOS not float array",
+                 "Bad type on operand stack in fastore"),
+
+        new Case("case31", "verifier.cpp", true, "case dastore",
+                 "TOS not double array",
+                 "Bad type on operand stack in dastore"),
+
+        new Case("case32", "verifier.cpp", true, "case aastore",
+                 "TOS not object array",
+                 "Bad type on operand stack in aastore"),
+
+        /* Unreachable: In order to hit this case, we would need a
+         * category2_1st at TOS which is not possible. */
+        new Case("case33", "verifier.cpp", false, "case pop2",
+                 "TOS is category2_1st (would split)",
+                 "Bad type on operand stack in pop2"),
+
+        /* Unreachable: In order to hit this case, we would need a
+         * category2_1st at stack depth 2 with category_1 on TOS which is not
+         * possible. */
+        new Case("case34", "verifier.cpp", false, "case dup_x2",
+                 "TOS-1 is category2_1st (would split)",
+                 "Bad type on operand stack in dup_x2"),
+
+        /* Unreachable: In order to hit this case, we would need a
+         * category2_1st at TOS which is not possible. */
+        new Case("case35", "verifier.cpp", false, "case dup2",
+                 "TOS-1 is category2_1st (would split)",
+                 "Bad type on operand stack in dup2"),
+
+        /* Unreachable: In order to hit this case, we would need a
+         * category2_1st at TOS which is not possible. */
+        new Case("case36", "verifier.cpp", false, "case dup2_x1",
+                 "TOS-1 is category2_1st (would split)",
+                 "Bad type on operand stack in dup2_x1"),
+
+        /* Unreachable: In order to hit this case, we would need a
+         * category2_1st at TOS which is not possible. */
+        new Case("case37", "verifier.cpp", false, "case dup2_x2",
+                 "TOS-1 is category2_1st (would split)",
+                 "Bad type on operand stack in dup2_x2"),
+
+        /* Unreachable: In order to hit this case, we would need a
+         * category2_1st at stack depth 3 with either 2 category_1 or 1
+         * category_2 on TOS, which is not possible. */
+        new Case("case38", "verifier.cpp", false, "case dup2_x2",
+                 "TOS-3 is category2_1st (would split)",
+                 "Bad type on operand stack in dup2_x2"),
+
+        new Case("case39", "verifier.cpp", true, "case return",
+                 "return type of method is not void",
+                 "Method expects a return value"),
+
+        new Case("case40", "verifier.cpp", true, "case return",
+                 "return with uninitialized this ",
+                 "Constructor must call super() or this() before return"),
+
+        new Case("case41", "verifier.cpp", true, "case new",
+                 "cp index not a class type",
+                 "Illegal new instruction"),
+
+        new Case("case42", "verifier.cpp", true, "case arraylength",
+                 "TOS is not an array",
+                 "Bad type on operand stack in arraylength"),
+
+        new Case("case43", "verifier.cpp", true, "case multianewarray",
+                 "CP index does not refer to array type",
+                 "Illegal constant pool index in multianewarray instruction"),
+
+        new Case("case44", "verifier.cpp", true, "case multianewarray",
+                 "Bad dimension (<1) or does not match CP signature",
+                 "Illegal dimension in multianewarray instruction: "),
+
+        new Case("case45", "verifier.cpp", true, "case default",
+                 "Unrecognized bytecode",
+                 "Bad instruction: "),
+
+        new Case("case46", "verifier.cpp", true, "loop end",
+                 "control flow falls off method",
+                 "Control flow falls through code end"),
+
+        new Case("case47", "verifier.cpp", true, "generate_code_data",
+                 "illegal bytecode via RawBytecodeStream (breakpoint)",
+                 "Bad instruction"),
+
+        new Case("case48", "verifier.cpp", true, "generate_code_data",
+                 "illegal bytecode via RawBytecodeStream (other illegal)",
+                 "Bad instruction"),
+
+        new Case("case49", "verifier.cpp", true,
+                 "verify_exception_handler_table",
+                 "catch_type is not throwable",
+                 "Catch type is not a subclass of Throwable in " +
+                 "exception handler "),
+
+        new Case("case50", "verifier.cpp", true, "verify_stackmap_table",
+                 "missing a stack map frame @ target location (mid table)",
+                 "Expecting a stack map frame"),
+
+        new Case("case51", "verifier.cpp", true, "verify_stackmap_table",
+                 "stack map does not match?",
+                 "Instruction type does not match stack map"),
+
+        new Case("case52", "verifier.cpp", true, "verify_stackmap_table",
+                 "missing a stack map frame @ target location (end of table)",
+                 "Expecting a stack map frame"),
+
+        new Case("case53", "verifier.cpp", true,
+                 "verify_exception_handler_targets",
+                 "stackmap mismatch at exception handler",
+                 "Stack map does not match the one at exception handler "),
+
+        new Case("case54", "verifier.cpp", true, "verify_cp_index",
+                 "constant pool index is out-of-bounds",
+                 "Illegal constant pool index "),
+
+        new Case("case55", "verifier.cpp", true, "verify_cp_type",
+                 "constant pool entry is not expected type",
+                 "Illegal type at constant pool entry "),
+
+        new Case("case56", "verifier.cpp", true, "verify_cp_class_type",
+                 "constant pool entry is not an object type",
+                 "Illegal type at constant pool entry "),
+
+        /* Unreachable: verify_cp_type gates this case */
+        new Case("case57", "verifier.cpp", false, "verify_ldc",
+                 "invalid constant pool index in ldc",
+                 "Invalid index in ldc"),
+
+        new Case("case58", "verifier.cpp", true, "verify_switch",
+                 "bad switch padding",
+                 "Nonzero padding byte in lookswitch or tableswitch"),
+
+        new Case("case59", "verifier.cpp", true, "verify_switch",
+                 "tableswitch low is greater than high",
+                 "low must be less than or equal to high in tableswitch"),
+
+        /* Unreachable on 64-bit?  Only way to get here is to overflow
+         * the 'keys' variable which can't happen on 64-bit since we're dealing
+         * with 32-bit values.  Perhaps reachable on 32-bit but the
+         * triggering class would be quite large */
+        new Case("case60", "verifier.cpp", false, "verify_switch",
+                 "high - low + 1 < 0 (overflow?)",
+                 "too many keys in tableswitch"),
+
+        /* Would have to create a 16G classfile to trip this.  Possible but
+         * not reasonable to do in a test.  */
+        new Case("case61", "verifier.cpp", false, "verify_switch",
+                 "lookupswitch keys < 0",
+                 "number of keys in lookupswitch less than 0"),
+
+        new Case("case62", "verifier.cpp", true, "verify_switch",
+                 "lookupswitch keys out-of-order",
+                 "Bad lookupswitch instruction"),
+
+        /* Unreachable: Class file parser verifies Fieldref contents */
+        new Case("case63", "verifier.cpp", false, "verify_field_instructions",
+                 "referenced class is not an CP object",
+                 "Expecting reference to class in class "),
+
+        new Case("case64", "verifier.cpp", true, "verify_field_instructions",
+                 "TOS not assignable to field type in putfield",
+                 "Bad type on operand stack in putfield"),
+
+        new Case("case65", "verifier.cpp", true, "verify_field_instructions",
+                 "TOS not assignable to class when accessing protected field",
+                 "Bad access to protected data in getfield"),
+
+        new Case("case66", "verifier.cpp", true, "verify_invoke_init",
+                 "Uninit_this is not of the current type or it's supertype",
+                 "Bad <init> method call"),
+
+        /* Unreachable:  Stack map parsing ensures valid type and new
+         * instructions have a valid BCI. */
+        new Case("case67", "verifier.cpp", false, "verify_invoke_init",
+                 "Uninit type with bad new instruction index",
+                 "Expecting new instruction"),
+
+        new Case("case68", "verifier.cpp", true, "verify_invoke_init",
+                 "calling other class's <init> method",
+                 "Call to wrong <init> method"),
+
+        new Case("case69", "verifier.cpp", true, "verify_invoke_init",
+                 "Calling protected <init> and type unassignable from current",
+                 "Bad access to protected <init> method"),
+
+        new Case("case70", "verifier.cpp", true, "verify_invoke_init",
+                 "TOS is not an uninitialized (or Uninit_this) type",
+                 "Bad operand type when invoking <init>"),
+
+        new Case("case71", "verifier.cpp", true, "verify_invoke_instructions",
+                 "Arg count in instruction doesn't match signature",
+                 "Inconsistent args count operand in invokeinterface"),
+
+        new Case("case72", "verifier.cpp", true, "verify_invoke_instructions",
+                 "Non-zero pad in invokeinterface",
+                 "Fourth operand byte of invokeinterface must be zero"),
+
+        new Case("case73", "verifier.cpp", true, "verify_invoke_instructions",
+                 "Non-zero pad in invokedynamic",
+                 "Third and fourth operand bytes of " +
+                 "invokedynamic must be zero"),
+
+        new Case("case74", "verifier.cpp", true, "verify_invoke_instructions",
+                 "Non-invokespecial trying to invoke a '<' method",
+                 "Illegal call to internal method"),
+
+        new Case("case75", "verifier.cpp", true, "verify_invoke_instructions",
+                 "invokespecial and current unassignable from referenced type",
+                 "Bad invokespecial instruction: current class isn't " +
+                 "assignable to reference class."),
+
+        new Case("case76", "verifier.cpp", true, "verify_invoke_instructions",
+                 "TOS not assignable to current when calling protected method",
+                 "Bad access to protected data in invokevirtual"),
+
+        /* Unreachable:  class file parser enforces void signature */
+        new Case("case77", "verifier.cpp", false, "verify_invoke_instructions",
+                 "<init> method is not void return",
+                 "Return type must be void in <init> method"),
+
+        new Case("case78", "verifier.cpp", true, "get_newarray_type",
+                 "newarray type invalid",
+                 "Illegal newarray instruction"),
+
+        new Case("case79", "verifier.cpp", true, "verify_return_value",
+                 "void return from method which has a return value",
+                 "Method expects a return value"),
+
+        new Case("case80", "verifier.cpp", true, "verify_return_value",
+                 "TOS type does not match signature",
+                 "Bad return type"),
+
+        new Case("case81", "verifier.cpp", true, "verify_stackmap_table",
+                 "stack map does not match (flags)",
+                 "Instruction type does not match stack map")
+    };
+}
+
+public class Test7116786 {
+    public static void main(String argv[]) throws Exception {
+        for (Case c : VerifyErrorCases.cases) {
+            System.out.println("******** " + c.getCaseName() + " ********");
+            if (c.isTestable()) {
+                try {
+                    ClassLoader cl = Test7116786.class.getClassLoader();
+                    Class<?> cls = Class.forName(c.getCaseName(), true, cl);
+                    throw new RuntimeException(
+                        "++ FAIL: No verify error encountered");
+                } catch (VerifyError ve) {
+                    String message = c.getMessage();
+                    String veMessage = ve.getMessage();
+                    System.out.print(veMessage);
+                    if (!veMessage.startsWith(message)) {
+                        // We're not seeing the message we expect.  Could be
+                        // that we've gotten the wrong VerifyError case, or
+                        // maybe the message changed.
+                        System.out.println("++ FAIL? " +
+                            "Message does not match what was expected: " +
+                            message);
+                        continue;
+                    }
+                    if (!veMessage.contains("Exception Details:") &&
+                        !veMessage.contains("Reason:")) {
+                        System.out.println("++ FAIL: No details found");
+                        throw new RuntimeException("FAIL: No details found");
+                    }
+                    System.out.println("++ PASS");
+                }
+            } else {
+               System.out.println("++ SKIPPED");
+            }
+        }
+    }
+}
Binary file test/runtime/7116786/testcases.jar has changed
--- a/test/runtime/7158800/Test7158800.sh	Thu Aug 23 12:27:33 2012 -0700
+++ b/test/runtime/7158800/Test7158800.sh	Fri Aug 24 15:51:19 2012 -0700
@@ -46,7 +46,7 @@
 # set platform-dependent variables
 OS=`uname -s`
 case "$OS" in
-  SunOS | Linux )
+  SunOS | Linux | Darwin )
     NULL=/dev/null
     PS=":"
     FS="/"
@@ -67,13 +67,13 @@
 
 THIS_DIR=`pwd`
 
-${TESTJAVA}${FS}bin${FS}java -fullversion
+${TESTJAVA}${FS}bin${FS}java ${TESTVMOPTS} -fullversion
 
 ${TESTJAVA}${FS}bin${FS}javac -d . ${TESTSRC}${FS}InternTest.java
 
 cp ${TESTSRC}${FS}badstrings.txt .
 
-${TESTJAVA}${FS}bin${FS}java -XX:+PrintStringTableStatistics -XX:+TraceSafepointCleanupTime InternTest bad > test.out 2>&1 &
+${TESTJAVA}${FS}bin${FS}java ${TESTVMOPTS} -XX:+PrintStringTableStatistics -XX:+TraceSafepointCleanupTime InternTest bad > test.out 2>&1 &
 C_PID=$!
 
 sleep 60
--- a/test/runtime/7158988/TestFieldMonitor.sh	Thu Aug 23 12:27:33 2012 -0700
+++ b/test/runtime/7158988/TestFieldMonitor.sh	Fri Aug 24 15:51:19 2012 -0700
@@ -21,7 +21,7 @@
 # set platform-dependent variables
 OS=`uname -s`
 case "$OS" in
-  SunOS | Linux )
+  SunOS | Linux | Darwin)
     NULL=/dev/null
     PS=":"
     FS="/"
@@ -52,30 +52,7 @@
 
 ${TESTJAVA}${FS}bin${FS}javac -classpath .${PS}$TESTJAVA${FS}lib${FS}tools.jar *.java
 
-${TESTJAVA}${FS}bin${FS}java ${TESTVMOPTS} -classpath .${PS}$TESTJAVA${FS}lib${FS}tools.jar FieldMonitor > test.out 2>&1 &
-
-P_PID=$!
-
-sleep 60
-STATUS=0
-
-case "$OS" in
-    SunOS | Linux )
-        ps -ef | grep $P_PID | grep -v grep > ${NULL}
-        if [ $? = 0 ]; then
-            kill -9 $P_PID
-            STATUS=1
-        fi
-        ;;
-      * )
-        ps | grep -i "FieldMonitor" | grep -v grep > ${NULL}
-        if [ $? = 0 ]; then
-            C_PID=`ps | grep -i "FieldMonitor" | awk '{print $1}'`
-            kill -s 9 $C_PID
-            STATUS=1
-        fi
-        ;;
-esac
+${TESTJAVA}${FS}bin${FS}java ${TESTVMOPTS} -classpath .${PS}$TESTJAVA${FS}lib${FS}tools.jar FieldMonitor > test.out
 
 grep "A fatal error has been detected" test.out > ${NULL}
 if [ $? = 0 ]; then