changeset 14969:a6c144380ce7

HSAIL: added UseHSAILDeoptimization VM option for disabling HSAIL deopt support Contributed-by: Eric Caspole <eric.caspole@amd.com>
author Doug Simon <doug.simon@oracle.com>
date Fri, 04 Apr 2014 12:22:49 +0200
parents 169caf662ac7
children fe00ea6439be
files graal/com.oracle.graal.compiler.hsail.test.infra/src/com/oracle/graal/compiler/hsail/test/infra/GraalKernelTester.java graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/BoundsCatchBase.java graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/SingleExceptionTestBase.java graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend.java graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotVMConfig.java src/gpu/hsail/vm/gpu_hsail.cpp src/gpu/hsail/vm/hsailKernelArguments.hpp src/share/vm/graal/graalGlobals.hpp
diffstat 8 files changed, 197 insertions(+), 160 deletions(-) [+]
line wrap: on
line diff
--- a/graal/com.oracle.graal.compiler.hsail.test.infra/src/com/oracle/graal/compiler/hsail/test/infra/GraalKernelTester.java	Fri Apr 04 12:05:41 2014 +0200
+++ b/graal/com.oracle.graal.compiler.hsail.test.infra/src/com/oracle/graal/compiler/hsail/test/infra/GraalKernelTester.java	Fri Apr 04 12:22:49 2014 +0200
@@ -34,6 +34,7 @@
 import java.lang.reflect.*;
 
 import org.junit.*;
+import static org.junit.Assume.*;
 
 import com.oracle.graal.api.code.*;
 import com.oracle.graal.api.meta.*;
@@ -54,7 +55,7 @@
         super(getHSAILBackend().isDeviceInitialized());
     }
 
-    private static HSAILHotSpotBackend getHSAILBackend() {
+    protected static HSAILHotSpotBackend getHSAILBackend() {
         Backend backend = runtime().getBackend(HSAIL.class);
         Assume.assumeTrue(backend instanceof HSAILHotSpotBackend);
         return (HSAILHotSpotBackend) backend;
@@ -102,6 +103,13 @@
         return (canGenerateCalls && canExecuteCalls);
     }
 
+    /**
+     * Determines if the runtime has the capabilities required by this test.
+     */
+    protected boolean supportsRequiredCapabilities() {
+        return true;
+    }
+
     @Override
     protected void dispatchKernelOkra(int range, Object... args) {
         HSAILHotSpotBackend backend = getHSAILBackend();
@@ -140,6 +148,7 @@
     @Override
     public void testGeneratedHsail() {
         try (OverrideScope s = getOverrideScope()) {
+            assumeTrue(supportsRequiredCapabilities());
             super.testGeneratedHsail();
         }
     }
--- a/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/BoundsCatchBase.java	Fri Apr 04 12:05:41 2014 +0200
+++ b/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/BoundsCatchBase.java	Fri Apr 04 12:22:49 2014 +0200
@@ -55,6 +55,11 @@
     }
 
     @Override
+    protected boolean supportsRequiredCapabilities() {
+        return getHSAILBackend().getRuntime().getConfig().useHSAILDeoptimization;
+    }
+
+    @Override
     public void runTest() {
         setupArrays();
 
--- a/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/SingleExceptionTestBase.java	Fri Apr 04 12:05:41 2014 +0200
+++ b/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/SingleExceptionTestBase.java	Fri Apr 04 12:22:49 2014 +0200
@@ -26,7 +26,7 @@
 import com.oracle.graal.compiler.hsail.test.infra.GraalKernelTester;
 
 /**
- * 
+ *
  * @author ecaspole
  */
 public abstract class SingleExceptionTestBase extends GraalKernelTester {
@@ -35,6 +35,11 @@
     @Result String exceptionString;
     @Result StackTraceElement firstStackTraceElement;
 
+    @Override
+    protected boolean supportsRequiredCapabilities() {
+        return getHSAILBackend().getRuntime().getConfig().useHSAILDeoptimization;
+    }
+
     void recordException(Exception e) {
         // for now we just test that the class the of the exception
         // matches for the java and gpu side
--- a/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend.java	Fri Apr 04 12:05:41 2014 +0200
+++ b/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend.java	Fri Apr 04 12:22:49 2014 +0200
@@ -431,7 +431,7 @@
     public void emitCode(CompilationResultBuilder crb, LIR lir, ResolvedJavaMethod method) {
         assert method != null : lir + " is not associated with a method";
 
-        boolean usesDeoptInfo = true;     // TODO: make this conditional on something?
+        boolean useHSAILDeoptimization = getRuntime().getConfig().useHSAILDeoptimization;
 
         // Emit the prologue.
         HSAILAssembler asm = (HSAILAssembler) crb.asm;
@@ -513,13 +513,13 @@
         for (int i = 0; i < totalParamCount; i++) {
             String str = "align 8 kernarg_" + paramHsailSizes[i] + " " + paramNames[i];
 
-            if (usesDeoptInfo || (i != totalParamCount - 1)) {
+            if (useHSAILDeoptimization || (i != totalParamCount - 1)) {
                 str += ",";
             }
             asm.emitString(str);
         }
 
-        if (usesDeoptInfo) {
+        if (useHSAILDeoptimization) {
             // add in the deoptInfo parameter
             asm.emitString("kernarg_u64 " + asm.getDeoptInfoName());
         }
@@ -542,14 +542,10 @@
         String workItemReg = "$s" + Integer.toString(asRegister(cc.getArgument(nonConstantParamCount)).encoding());
         asm.emitString("workitemabsid_u32 " + workItemReg + ", 0;");
 
-        final int offsetToDeoptSaveStates = getRuntime().getConfig().hsailSaveStatesOffset0;
-        final int sizeofKernelDeopt = getRuntime().getConfig().hsailSaveStatesOffset1 - getRuntime().getConfig().hsailSaveStatesOffset0;
         final int offsetToDeopt = getRuntime().getConfig().hsailDeoptOffset;
-        final int offsetToNeverRanArray = getRuntime().getConfig().hsailNeverRanArrayOffset;
-        final int offsetToDeoptNextIndex = getRuntime().getConfig().hsailDeoptNextIndexOffset;
         final String deoptInProgressLabel = "@LHandleDeoptInProgress";
 
-        if (usesDeoptInfo) {
+        if (useHSAILDeoptimization) {
             AllocatableValue scratch64 = HSAIL.d16.asValue(Kind.Object);
             AllocatableValue scratch32 = HSAIL.s34.asValue(Kind.Int);
             HSAILAddress deoptInfoAddr = new HSAILAddressValue(Kind.Int, scratch64, offsetToDeopt).toAddress();
@@ -631,15 +627,19 @@
         asm.emitString(spillsegStringFinal, spillsegDeclarationPosition);
         // Emit the epilogue.
 
-        final int offsetToDeoptimizationWorkItem = getRuntime().getConfig().hsailDeoptimizationWorkItem;
-        final int offsetToDeoptimizationReason = getRuntime().getConfig().hsailDeoptimizationReason;
-        final int offsetToDeoptimizationFrame = getRuntime().getConfig().hsailDeoptimizationFrame;
-        final int offsetToFramePc = getRuntime().getConfig().hsailFramePcOffset;
-        final int offsetToNumSaves = getRuntime().getConfig().hsailFrameNumSRegOffset;
-        final int offsetToSaveArea = getRuntime().getConfig().hsailFrameSaveAreaOffset;
+        // TODO: keep track of whether we need it
+        if (useHSAILDeoptimization) {
+            final int offsetToDeoptSaveStates = getRuntime().getConfig().hsailSaveStatesOffset0;
+            final int sizeofKernelDeopt = getRuntime().getConfig().hsailSaveStatesOffset1 - getRuntime().getConfig().hsailSaveStatesOffset0;
+            final int offsetToNeverRanArray = getRuntime().getConfig().hsailNeverRanArrayOffset;
+            final int offsetToDeoptNextIndex = getRuntime().getConfig().hsailDeoptNextIndexOffset;
+            final int offsetToDeoptimizationWorkItem = getRuntime().getConfig().hsailDeoptimizationWorkItem;
+            final int offsetToDeoptimizationReason = getRuntime().getConfig().hsailDeoptimizationReason;
+            final int offsetToDeoptimizationFrame = getRuntime().getConfig().hsailDeoptimizationFrame;
+            final int offsetToFramePc = getRuntime().getConfig().hsailFramePcOffset;
+            final int offsetToNumSaves = getRuntime().getConfig().hsailFrameNumSRegOffset;
+            final int offsetToSaveArea = getRuntime().getConfig().hsailFrameSaveAreaOffset;
 
-        // TODO: keep track of whether we need it
-        if (usesDeoptInfo) {
             AllocatableValue scratch64 = HSAIL.d16.asValue(Kind.Object);
             AllocatableValue cuSaveAreaPtr = HSAIL.d17.asValue(Kind.Object);
             AllocatableValue waveMathScratch1 = HSAIL.d18.asValue(Kind.Object);
@@ -773,6 +773,11 @@
             // and emit the return
             crb.frameContext.leave(crb);
             asm.exit();
+        } else {
+            // Deoptimization is explicitly off, so emit simple return
+            asm.emitString0(asm.getDeoptLabelName() + ":\n");
+            asm.emitComment("// No deoptimization");
+            asm.emitString("ret;");
         }
 
         asm.emitString0("}; \n");
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotVMConfig.java	Fri Apr 04 12:05:41 2014 +0200
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotVMConfig.java	Fri Apr 04 12:22:49 2014 +0200
@@ -1004,6 +1004,8 @@
     @HotSpotVMField(name = "ThreadShadow::_pending_deoptimization", type = "int", get = HotSpotVMField.Type.OFFSET) @Stable public int pendingDeoptimizationOffset;
     @HotSpotVMField(name = "ThreadShadow::_pending_failed_speculation", type = "oop", get = HotSpotVMField.Type.OFFSET) @Stable public int pendingFailedSpeculationOffset;
 
+    @HotSpotVMFlag(name = "UseHSAILDeoptimization") @Stable public boolean useHSAILDeoptimization;
+
     /**
      * Offsets of Hsail deoptimization fields (defined in gpu_hsail.hpp). Used to propagate
      * exceptions from Hsail back to C++ runtime.
--- a/src/gpu/hsail/vm/gpu_hsail.cpp	Fri Apr 04 12:05:41 2014 +0200
+++ b/src/gpu/hsail/vm/gpu_hsail.cpp	Fri Apr 04 12:22:49 2014 +0200
@@ -133,10 +133,6 @@
   }
 }
 
-
-// for experimentation
-static bool useDeoptInfo = true;
-
 jboolean Hsail::execute_kernel_void_1d_internal(address kernel, int dimX, jobject args_handle, methodHandle& mh, nmethod *nm, jobject oops_save_handle, TRAPS) {
 
   ResourceMark rm(THREAD);
@@ -145,9 +141,8 @@
   // Reset the kernel arguments
   _okra_clearargs(kernel);
 
-  
   HSAILDeoptimizationInfo* e;
-  if (useDeoptInfo) {
+  if (UseHSAILDeoptimization) {
     e = new (ResourceObj::C_HEAP, mtInternal) HSAILDeoptimizationInfo();
     e->set_never_ran_array(NEW_C_HEAP_ARRAY(jboolean, dimX, mtInternal));
     memset(e->never_ran_array(), 0, dimX * sizeof(jboolean));
@@ -168,150 +163,161 @@
   }
 
   // Run the kernel
-  bool success = _okra_execute_with_range(kernel, dimX);
-  // check if any workitem requested a deopt
-  // currently we only support at most one such workitem
-
-
-  int deoptcode = e->deopt_occurred();
-  if (useDeoptInfo &&  deoptcode != 0) {
-    if (deoptcode != 1) {
-      // error condition detected in deopt code
-      char msg[200];
-      sprintf(msg, "deopt error detected, slot for workitem %d was not empty", -1*(deoptcode + 1));
-      guarantee(deoptcode == 1, msg);
-    }
-    if (TraceGPUInteraction) {
-      tty->print_cr("deopt happened.");
-      HSAILKernelDeoptimization * pdeopt = &e->_deopt_save_states[0];
-      tty->print_cr("first deopter was workitem %d", pdeopt->workitem());
-    }
+  bool success = false;
+  {
+    TraceTime t1("execute kernel", TraceGPUInteraction);
+    success = _okra_execute_with_range(kernel, dimX);
+  }
 
-    // Before handling any deopting workitems, save the pointers from
-    // the hsail frames in oops_save so they get adjusted by any
-    // GC. Need to do this before leaving thread_in_vm mode.
-    // resolve handle only needed once here (not exiting vm mode)
-    objArrayOop oopsSaveArray = (objArrayOop) JNIHandles::resolve(oops_save_handle);
+  if (UseHSAILDeoptimization) {
+    // check if any workitem requested a deopt
+    // currently we only support at most one such workitem
+    int deoptcode = e->deopt_occurred();
+    if (deoptcode != 0) {
+      if (deoptcode != 1) {
+        // error condition detected in deopt code
+        char msg[200];
+        sprintf(msg, "deopt error detected, slot for workitem %d was not empty", -1 * (deoptcode + 1));
+        guarantee(deoptcode == 1, msg);
+      }
 
-    // since slots are allocated from the beginning, we know how far to look
-    assert(e->num_deopts() < MAX_DEOPT_SAVE_STATES_SIZE, "deopt save state overflow");
-    for (int k = 0; k < e->num_deopts(); k++) {
-      HSAILKernelDeoptimization * pdeopt = &e->_deopt_save_states[k];
-      jint workitem = pdeopt->workitem();
-      if (workitem != -1) {      
-        // this is a workitem that deopted
-        HSAILFrame *hsailFrame = pdeopt->first_frame();
-        int dregOopMap = hsailFrame->dreg_oops_map();
-        for (int bit = 0; bit < 16; bit++) {
-          if ((dregOopMap & (1 << bit)) != 0) {
-            // the dregister at this bit is an oop, save it in the array
-            int index = k * 16 + bit;
-            void* saved_oop = (void*) hsailFrame->get_d_reg(bit);
-            oopsSaveArray->obj_at_put(index, (oop) saved_oop);
-          }
-        }
-      }
-    }   
+      {
+        TraceTime t3("handle deoptimizing workitems", TraceGPUInteraction);
 
-    // Handle any deopting workitems. 
-    int count_deoptimized = 0;
-    for (int k = 0; k < e->num_deopts(); k++) {
-      HSAILKernelDeoptimization * pdeopt = &e->_deopt_save_states[k];
-    
-      jint workitem = pdeopt->workitem();
-      if (workitem != -1) {      
-        int deoptId = pdeopt->pc_offset();
-        HSAILFrame *hsailFrame = pdeopt->first_frame();
+        if (TraceGPUInteraction) {
+          tty->print_cr("deopt happened.");
+          HSAILKernelDeoptimization * pdeopt = &e->_deopt_save_states[0];
+          tty->print_cr("first deopter was workitem %d", pdeopt->workitem());
+        }
 
-        // update the hsailFrame from the oopsSaveArray
-        // re-resolve the handle
-        oopsSaveArray = (objArrayOop) JNIHandles::resolve(oops_save_handle);
+        // Before handling any deopting workitems, save the pointers from
+        // the hsail frames in oops_save so they get adjusted by any
+        // GC. Need to do this before leaving thread_in_vm mode.
+        // resolve handle only needed once here (not exiting vm mode)
+        objArrayOop oopsSaveArray = (objArrayOop) JNIHandles::resolve(oops_save_handle);
 
-        int dregOopMap = hsailFrame->dreg_oops_map();
-        for (int bit = 0; bit < 16; bit++) {
-          if ((dregOopMap & (1 << bit)) != 0) {
-            // the dregister at this bit is an oop, retrieve it from array and put back in frame
-            int index = k * 16 + bit;
-            void * dregValue = (void *) oopsSaveArray->obj_at(index);
-            void * oldDregValue = (void *) hsailFrame->get_d_reg(bit);
-            assert((oldDregValue != 0 ? dregValue != 0 : dregValue == 0) , "bad dregValue retrieved");
-            if (TraceGPUInteraction) {
-              if (dregValue != oldDregValue) {
-                tty->print_cr("oop moved for $d%d, workitem %d, slot %d, old=%p, new=%p", bit, workitem, k, oldDregValue, dregValue);
+        // since slots are allocated from the beginning, we know how far to look
+        assert(e->num_deopts() < MAX_DEOPT_SAVE_STATES_SIZE, "deopt save state overflow");
+        for (int k = 0; k < e->num_deopts(); k++) {
+          HSAILKernelDeoptimization * pdeopt = &e->_deopt_save_states[k];
+          jint workitem = pdeopt->workitem();
+          if (workitem != -1) {
+            // this is a workitem that deopted
+            HSAILFrame *hsailFrame = pdeopt->first_frame();
+            int dregOopMap = hsailFrame->dreg_oops_map();
+            for (int bit = 0; bit < 16; bit++) {
+              if ((dregOopMap & (1 << bit)) != 0) {
+                // the dregister at this bit is an oop, save it in the array
+                int index = k * 16 + bit;
+                void* saved_oop = (void*) hsailFrame->get_d_reg(bit);
+                oopsSaveArray->obj_at_put(index, (oop) saved_oop);
               }
             }
-            hsailFrame->put_d_reg(bit, (jlong) dregValue);
-          }
-        }
-       
-        JavaValue result(T_VOID);
-        JavaCallArguments javaArgs;
-        javaArgs.set_alternative_target(nm);
-        javaArgs.push_int(deoptId);
-        javaArgs.push_long((jlong) hsailFrame);
-
-        // override the deoptimization action with Action_none until we decide
-        // how to handle the other actions.
-        int myActionReason = Deoptimization::make_trap_request(Deoptimization::trap_request_reason(pdeopt->reason()), Deoptimization::Action_none);
-        javaArgs.push_int(myActionReason);
-        javaArgs.push_oop((oop)NULL);
-        if (TraceGPUInteraction) {
-          int dregOopMap = hsailFrame->dreg_oops_map();
-          tty->print_cr("[HSAIL] Deoptimizing to host for workitem=%d (slot=%d) with deoptId=%d, frame=" INTPTR_FORMAT ", actionAndReason=%d, dregOopMap=%04x", workitem, k, deoptId, hsailFrame, myActionReason, dregOopMap);
-          // show the registers containing references
-          for (int bit = 0; bit < 16; bit++) {
-            if ((dregOopMap & (1 << bit)) != 0) {
-              tty->print_cr("  oop $d%d = %p", bit, hsailFrame->get_d_reg(bit));
-            }
           }
         }
-        JavaCalls::call(&result, mh, &javaArgs, THREAD);
-        count_deoptimized++;
+
+        // Handle any deopting workitems.
+        int count_deoptimized = 0;
+        for (int k = 0; k < e->num_deopts(); k++) {
+          HSAILKernelDeoptimization * pdeopt = &e->_deopt_save_states[k];
+
+          jint workitem = pdeopt->workitem();
+          if (workitem != -1) {
+            int deoptId = pdeopt->pc_offset();
+            HSAILFrame *hsailFrame = pdeopt->first_frame();
+
+            // update the hsailFrame from the oopsSaveArray
+            // re-resolve the handle
+            oopsSaveArray = (objArrayOop) JNIHandles::resolve(oops_save_handle);
+
+            int dregOopMap = hsailFrame->dreg_oops_map();
+            for (int bit = 0; bit < 16; bit++) {
+              if ((dregOopMap & (1 << bit)) != 0) {
+                // the dregister at this bit is an oop, retrieve it from array and put back in frame
+                int index = k * 16 + bit;
+                void * dregValue = (void *) oopsSaveArray->obj_at(index);
+                void * oldDregValue = (void *) hsailFrame->get_d_reg(bit);
+                assert((oldDregValue != 0 ? dregValue != 0 : dregValue == 0), "bad dregValue retrieved");
+                if (TraceGPUInteraction) {
+                  if (dregValue != oldDregValue) {
+                    tty->print_cr("oop moved for $d%d, workitem %d, slot %d, old=%p, new=%p", bit, workitem, k, oldDregValue, dregValue);
+                  }
+                }
+                hsailFrame->put_d_reg(bit, (jlong) dregValue);
+              }
+            }
+
+            JavaValue result(T_VOID);
+            JavaCallArguments javaArgs;
+            javaArgs.set_alternative_target(nm);
+            javaArgs.push_int(deoptId);
+            javaArgs.push_long((jlong) hsailFrame);
+
+            // override the deoptimization action with Action_none until we decide
+            // how to handle the other actions.
+            int myActionReason = Deoptimization::make_trap_request(Deoptimization::trap_request_reason(pdeopt->reason()), Deoptimization::Action_none);
+            javaArgs.push_int(myActionReason);
+            javaArgs.push_oop((oop) NULL);
+            if (TraceGPUInteraction) {
+              int dregOopMap = hsailFrame->dreg_oops_map();
+              tty->print_cr("[HSAIL] Deoptimizing to host for workitem=%d (slot=%d) with deoptId=%d, frame=" INTPTR_FORMAT ", actionAndReason=%d, dregOopMap=%04x", workitem, k, deoptId, hsailFrame, myActionReason, dregOopMap);
+              // show the registers containing references
+              for (int bit = 0; bit < 16; bit++) {
+                if ((dregOopMap & (1 << bit)) != 0) {
+                  tty->print_cr("  oop $d%d = %p", bit, hsailFrame->get_d_reg(bit));
+                }
+              }
+            }
+            JavaCalls::call(&result, mh, &javaArgs, THREAD);
+            count_deoptimized++;
+          }
+        }
+        if (TraceGPUInteraction) {
+          tty->print_cr("[HSAIL] Deoptimizing to host completed for %d workitems", count_deoptimized);
+        }
       }
-    }    
-    if (TraceGPUInteraction) {
-      tty->print_cr("[HSAIL] Deoptimizing to host completed for %d workitems", count_deoptimized);
+
+      {
+        TraceTime t3("handle never-rans", TraceGPUInteraction);
+
+        // Handle any never_ran workitems if there were any
+        int count_never_ran = 0;
+        bool handleNeverRansHere = true;
+        // turn off verbose trace stuff for javacall arg setup
+        bool savedTraceGPUInteraction = TraceGPUInteraction;
+        TraceGPUInteraction = false;
+        jboolean *never_ran_array = e->never_ran_array();
+        if (handleNeverRansHere) {
+          for (int k = 0; k < dimX; k++) {
+            if (never_ran_array[k]) {
+              // run it as a javaCall
+              KlassHandle methKlass = mh->method_holder();
+              Thread* THREAD = Thread::current();
+              JavaValue result(T_VOID);
+              JavaCallArguments javaArgs;
+              // re-resolve the args_handle here
+              objArrayOop resolvedArgsArray = (objArrayOop) JNIHandles::resolve(args_handle);
+              // This object sets up the javaCall arguments
+              // the way argsArray is set up, this should work for instance methods as well
+              // (the receiver will be the first oop pushed)
+              HSAILJavaCallArguments hjca(&javaArgs, k, mh->signature(), resolvedArgsArray, mh->is_static());
+              if (mh->is_static()) {
+                JavaCalls::call_static(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD);
+              } else {
+                JavaCalls::call_virtual(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD);
+              }
+              count_never_ran++;
+            }
+          }
+          TraceGPUInteraction = savedTraceGPUInteraction;
+          if (TraceGPUInteraction) {
+            tty->print_cr("%d workitems never ran, have been run via JavaCall", count_never_ran);
+            showRanges(never_ran_array, dimX);
+          }
+        } // end of never-ran handling
+      }
     }
 
-    // Handle any never_ran workitems if there were any
-    int count_never_ran = 0;
-    bool handleNeverRansHere = true;
-    // turn off verbose trace stuff for javacall arg setup
-    bool savedTraceGPUInteraction = TraceGPUInteraction;
-    TraceGPUInteraction = false;
-    jboolean *never_ran_array = e->never_ran_array();
-    if (handleNeverRansHere) {
-      for (int k = 0; k < dimX; k++) {
-        if (never_ran_array[k]) {
-          // run it as a javaCall
-          KlassHandle methKlass = mh->method_holder();
-          Thread* THREAD = Thread::current();
-          JavaValue result(T_VOID);
-          JavaCallArguments javaArgs;
-          // re-resolve the args_handle here
-          objArrayOop resolvedArgsArray = (objArrayOop) JNIHandles::resolve(args_handle);
-          // This object sets up the javaCall arguments
-          // the way argsArray is set up, this should work for instance methods as well
-          // (the receiver will be the first oop pushed)
-          HSAILJavaCallArguments hjca(&javaArgs, k, mh->signature(), resolvedArgsArray, mh->is_static());
-          if (mh->is_static()) {
-            JavaCalls::call_static(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD);
-          } else {
-            JavaCalls::call_virtual(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD);
-          }
-          count_never_ran++;
-        }
-      }
-      TraceGPUInteraction = savedTraceGPUInteraction;
-      if (TraceGPUInteraction) {
-        tty->print_cr("%d workitems never ran, have been run via JavaCall", count_never_ran);
-        showRanges(never_ran_array, dimX);
-      }
-    } // end of never-ran handling
-
-  }
-
-  if (useDeoptInfo) {
     FREE_C_HEAP_ARRAY(jboolean, e->never_ran_array(), mtInternal);
     delete e;
   }
--- a/src/gpu/hsail/vm/hsailKernelArguments.hpp	Fri Apr 04 12:05:41 2014 +0200
+++ b/src/gpu/hsail/vm/hsailKernelArguments.hpp	Fri Apr 04 12:22:49 2014 +0200
@@ -79,11 +79,13 @@
         assert(pushed == true, "arg push failed");
     }
     virtual void pushTrailingArgs() {
-        // Last argument is the exception info block
-        if (TraceGPUInteraction) {
-            tty->print_cr("[HSAIL] exception block=" PTR_FORMAT, _exceptionHolder);
+        if (UseHSAILDeoptimization) {
+            // Last argument is the exception info block
+            if (TraceGPUInteraction) {
+                tty->print_cr("[HSAIL] exception block=" PTR_FORMAT, _exceptionHolder);
+            }
+            pushObject(_exceptionHolder);
         }
-        pushObject(_exceptionHolder);
     }
 
     // For kernel arguments we don't pass the final int parameter
--- a/src/share/vm/graal/graalGlobals.hpp	Fri Apr 04 12:05:41 2014 +0200
+++ b/src/share/vm/graal/graalGlobals.hpp	Fri Apr 04 12:22:49 2014 +0200
@@ -85,6 +85,9 @@
   develop(bool, TraceUncollectedSpeculations, false,                        \
           "Print message when a failed speculation was not collected")      \
                                                                             \
+  product(bool, UseHSAILDeoptimization, true,                               \
+          "Code gen and runtime support for deoptimizing HSAIL kernels")    \
+                                                                            \
   product(bool, GPUOffload, false,                                          \
           "Offload execution to GPU whenever possible")                     \
                                                                             \