# HG changeset patch # User Doug Simon # Date 1396606969 -7200 # Node ID a6c144380ce7398a024fdec8d3ef69d56da1a54d # Parent 169caf662ac7923da6b8001580769e5e1597bcf1 HSAIL: added UseHSAILDeoptimization VM option for disabling HSAIL deopt support Contributed-by: Eric Caspole diff -r 169caf662ac7 -r a6c144380ce7 graal/com.oracle.graal.compiler.hsail.test.infra/src/com/oracle/graal/compiler/hsail/test/infra/GraalKernelTester.java --- a/graal/com.oracle.graal.compiler.hsail.test.infra/src/com/oracle/graal/compiler/hsail/test/infra/GraalKernelTester.java Fri Apr 04 12:05:41 2014 +0200 +++ b/graal/com.oracle.graal.compiler.hsail.test.infra/src/com/oracle/graal/compiler/hsail/test/infra/GraalKernelTester.java Fri Apr 04 12:22:49 2014 +0200 @@ -34,6 +34,7 @@ import java.lang.reflect.*; import org.junit.*; +import static org.junit.Assume.*; import com.oracle.graal.api.code.*; import com.oracle.graal.api.meta.*; @@ -54,7 +55,7 @@ super(getHSAILBackend().isDeviceInitialized()); } - private static HSAILHotSpotBackend getHSAILBackend() { + protected static HSAILHotSpotBackend getHSAILBackend() { Backend backend = runtime().getBackend(HSAIL.class); Assume.assumeTrue(backend instanceof HSAILHotSpotBackend); return (HSAILHotSpotBackend) backend; @@ -102,6 +103,13 @@ return (canGenerateCalls && canExecuteCalls); } + /** + * Determines if the runtime has the capabilities required by this test. + */ + protected boolean supportsRequiredCapabilities() { + return true; + } + @Override protected void dispatchKernelOkra(int range, Object... args) { HSAILHotSpotBackend backend = getHSAILBackend(); @@ -140,6 +148,7 @@ @Override public void testGeneratedHsail() { try (OverrideScope s = getOverrideScope()) { + assumeTrue(supportsRequiredCapabilities()); super.testGeneratedHsail(); } } diff -r 169caf662ac7 -r a6c144380ce7 graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/BoundsCatchBase.java --- a/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/BoundsCatchBase.java Fri Apr 04 12:05:41 2014 +0200 +++ b/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/BoundsCatchBase.java Fri Apr 04 12:22:49 2014 +0200 @@ -55,6 +55,11 @@ } @Override + protected boolean supportsRequiredCapabilities() { + return getHSAILBackend().getRuntime().getConfig().useHSAILDeoptimization; + } + + @Override public void runTest() { setupArrays(); diff -r 169caf662ac7 -r a6c144380ce7 graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/SingleExceptionTestBase.java --- a/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/SingleExceptionTestBase.java Fri Apr 04 12:05:41 2014 +0200 +++ b/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/SingleExceptionTestBase.java Fri Apr 04 12:22:49 2014 +0200 @@ -26,7 +26,7 @@ import com.oracle.graal.compiler.hsail.test.infra.GraalKernelTester; /** - * + * * @author ecaspole */ public abstract class SingleExceptionTestBase extends GraalKernelTester { @@ -35,6 +35,11 @@ @Result String exceptionString; @Result StackTraceElement firstStackTraceElement; + @Override + protected boolean supportsRequiredCapabilities() { + return getHSAILBackend().getRuntime().getConfig().useHSAILDeoptimization; + } + void recordException(Exception e) { // for now we just test that the class the of the exception // matches for the java and gpu side diff -r 169caf662ac7 -r a6c144380ce7 graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend.java --- a/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend.java Fri Apr 04 12:05:41 2014 +0200 +++ b/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend.java Fri Apr 04 12:22:49 2014 +0200 @@ -431,7 +431,7 @@ public void emitCode(CompilationResultBuilder crb, LIR lir, ResolvedJavaMethod method) { assert method != null : lir + " is not associated with a method"; - boolean usesDeoptInfo = true; // TODO: make this conditional on something? + boolean useHSAILDeoptimization = getRuntime().getConfig().useHSAILDeoptimization; // Emit the prologue. HSAILAssembler asm = (HSAILAssembler) crb.asm; @@ -513,13 +513,13 @@ for (int i = 0; i < totalParamCount; i++) { String str = "align 8 kernarg_" + paramHsailSizes[i] + " " + paramNames[i]; - if (usesDeoptInfo || (i != totalParamCount - 1)) { + if (useHSAILDeoptimization || (i != totalParamCount - 1)) { str += ","; } asm.emitString(str); } - if (usesDeoptInfo) { + if (useHSAILDeoptimization) { // add in the deoptInfo parameter asm.emitString("kernarg_u64 " + asm.getDeoptInfoName()); } @@ -542,14 +542,10 @@ String workItemReg = "$s" + Integer.toString(asRegister(cc.getArgument(nonConstantParamCount)).encoding()); asm.emitString("workitemabsid_u32 " + workItemReg + ", 0;"); - final int offsetToDeoptSaveStates = getRuntime().getConfig().hsailSaveStatesOffset0; - final int sizeofKernelDeopt = getRuntime().getConfig().hsailSaveStatesOffset1 - getRuntime().getConfig().hsailSaveStatesOffset0; final int offsetToDeopt = getRuntime().getConfig().hsailDeoptOffset; - final int offsetToNeverRanArray = getRuntime().getConfig().hsailNeverRanArrayOffset; - final int offsetToDeoptNextIndex = getRuntime().getConfig().hsailDeoptNextIndexOffset; final String deoptInProgressLabel = "@LHandleDeoptInProgress"; - if (usesDeoptInfo) { + if (useHSAILDeoptimization) { AllocatableValue scratch64 = HSAIL.d16.asValue(Kind.Object); AllocatableValue scratch32 = HSAIL.s34.asValue(Kind.Int); HSAILAddress deoptInfoAddr = new HSAILAddressValue(Kind.Int, scratch64, offsetToDeopt).toAddress(); @@ -631,15 +627,19 @@ asm.emitString(spillsegStringFinal, spillsegDeclarationPosition); // Emit the epilogue. - final int offsetToDeoptimizationWorkItem = getRuntime().getConfig().hsailDeoptimizationWorkItem; - final int offsetToDeoptimizationReason = getRuntime().getConfig().hsailDeoptimizationReason; - final int offsetToDeoptimizationFrame = getRuntime().getConfig().hsailDeoptimizationFrame; - final int offsetToFramePc = getRuntime().getConfig().hsailFramePcOffset; - final int offsetToNumSaves = getRuntime().getConfig().hsailFrameNumSRegOffset; - final int offsetToSaveArea = getRuntime().getConfig().hsailFrameSaveAreaOffset; + // TODO: keep track of whether we need it + if (useHSAILDeoptimization) { + final int offsetToDeoptSaveStates = getRuntime().getConfig().hsailSaveStatesOffset0; + final int sizeofKernelDeopt = getRuntime().getConfig().hsailSaveStatesOffset1 - getRuntime().getConfig().hsailSaveStatesOffset0; + final int offsetToNeverRanArray = getRuntime().getConfig().hsailNeverRanArrayOffset; + final int offsetToDeoptNextIndex = getRuntime().getConfig().hsailDeoptNextIndexOffset; + final int offsetToDeoptimizationWorkItem = getRuntime().getConfig().hsailDeoptimizationWorkItem; + final int offsetToDeoptimizationReason = getRuntime().getConfig().hsailDeoptimizationReason; + final int offsetToDeoptimizationFrame = getRuntime().getConfig().hsailDeoptimizationFrame; + final int offsetToFramePc = getRuntime().getConfig().hsailFramePcOffset; + final int offsetToNumSaves = getRuntime().getConfig().hsailFrameNumSRegOffset; + final int offsetToSaveArea = getRuntime().getConfig().hsailFrameSaveAreaOffset; - // TODO: keep track of whether we need it - if (usesDeoptInfo) { AllocatableValue scratch64 = HSAIL.d16.asValue(Kind.Object); AllocatableValue cuSaveAreaPtr = HSAIL.d17.asValue(Kind.Object); AllocatableValue waveMathScratch1 = HSAIL.d18.asValue(Kind.Object); @@ -773,6 +773,11 @@ // and emit the return crb.frameContext.leave(crb); asm.exit(); + } else { + // Deoptimization is explicitly off, so emit simple return + asm.emitString0(asm.getDeoptLabelName() + ":\n"); + asm.emitComment("// No deoptimization"); + asm.emitString("ret;"); } asm.emitString0("}; \n"); diff -r 169caf662ac7 -r a6c144380ce7 graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotVMConfig.java --- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotVMConfig.java Fri Apr 04 12:05:41 2014 +0200 +++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotVMConfig.java Fri Apr 04 12:22:49 2014 +0200 @@ -1004,6 +1004,8 @@ @HotSpotVMField(name = "ThreadShadow::_pending_deoptimization", type = "int", get = HotSpotVMField.Type.OFFSET) @Stable public int pendingDeoptimizationOffset; @HotSpotVMField(name = "ThreadShadow::_pending_failed_speculation", type = "oop", get = HotSpotVMField.Type.OFFSET) @Stable public int pendingFailedSpeculationOffset; + @HotSpotVMFlag(name = "UseHSAILDeoptimization") @Stable public boolean useHSAILDeoptimization; + /** * Offsets of Hsail deoptimization fields (defined in gpu_hsail.hpp). Used to propagate * exceptions from Hsail back to C++ runtime. diff -r 169caf662ac7 -r a6c144380ce7 src/gpu/hsail/vm/gpu_hsail.cpp --- a/src/gpu/hsail/vm/gpu_hsail.cpp Fri Apr 04 12:05:41 2014 +0200 +++ b/src/gpu/hsail/vm/gpu_hsail.cpp Fri Apr 04 12:22:49 2014 +0200 @@ -133,10 +133,6 @@ } } - -// for experimentation -static bool useDeoptInfo = true; - jboolean Hsail::execute_kernel_void_1d_internal(address kernel, int dimX, jobject args_handle, methodHandle& mh, nmethod *nm, jobject oops_save_handle, TRAPS) { ResourceMark rm(THREAD); @@ -145,9 +141,8 @@ // Reset the kernel arguments _okra_clearargs(kernel); - HSAILDeoptimizationInfo* e; - if (useDeoptInfo) { + if (UseHSAILDeoptimization) { e = new (ResourceObj::C_HEAP, mtInternal) HSAILDeoptimizationInfo(); e->set_never_ran_array(NEW_C_HEAP_ARRAY(jboolean, dimX, mtInternal)); memset(e->never_ran_array(), 0, dimX * sizeof(jboolean)); @@ -168,150 +163,161 @@ } // Run the kernel - bool success = _okra_execute_with_range(kernel, dimX); - // check if any workitem requested a deopt - // currently we only support at most one such workitem - - - int deoptcode = e->deopt_occurred(); - if (useDeoptInfo && deoptcode != 0) { - if (deoptcode != 1) { - // error condition detected in deopt code - char msg[200]; - sprintf(msg, "deopt error detected, slot for workitem %d was not empty", -1*(deoptcode + 1)); - guarantee(deoptcode == 1, msg); - } - if (TraceGPUInteraction) { - tty->print_cr("deopt happened."); - HSAILKernelDeoptimization * pdeopt = &e->_deopt_save_states[0]; - tty->print_cr("first deopter was workitem %d", pdeopt->workitem()); - } + bool success = false; + { + TraceTime t1("execute kernel", TraceGPUInteraction); + success = _okra_execute_with_range(kernel, dimX); + } - // Before handling any deopting workitems, save the pointers from - // the hsail frames in oops_save so they get adjusted by any - // GC. Need to do this before leaving thread_in_vm mode. - // resolve handle only needed once here (not exiting vm mode) - objArrayOop oopsSaveArray = (objArrayOop) JNIHandles::resolve(oops_save_handle); + if (UseHSAILDeoptimization) { + // check if any workitem requested a deopt + // currently we only support at most one such workitem + int deoptcode = e->deopt_occurred(); + if (deoptcode != 0) { + if (deoptcode != 1) { + // error condition detected in deopt code + char msg[200]; + sprintf(msg, "deopt error detected, slot for workitem %d was not empty", -1 * (deoptcode + 1)); + guarantee(deoptcode == 1, msg); + } - // since slots are allocated from the beginning, we know how far to look - assert(e->num_deopts() < MAX_DEOPT_SAVE_STATES_SIZE, "deopt save state overflow"); - for (int k = 0; k < e->num_deopts(); k++) { - HSAILKernelDeoptimization * pdeopt = &e->_deopt_save_states[k]; - jint workitem = pdeopt->workitem(); - if (workitem != -1) { - // this is a workitem that deopted - HSAILFrame *hsailFrame = pdeopt->first_frame(); - int dregOopMap = hsailFrame->dreg_oops_map(); - for (int bit = 0; bit < 16; bit++) { - if ((dregOopMap & (1 << bit)) != 0) { - // the dregister at this bit is an oop, save it in the array - int index = k * 16 + bit; - void* saved_oop = (void*) hsailFrame->get_d_reg(bit); - oopsSaveArray->obj_at_put(index, (oop) saved_oop); - } - } - } - } + { + TraceTime t3("handle deoptimizing workitems", TraceGPUInteraction); - // Handle any deopting workitems. - int count_deoptimized = 0; - for (int k = 0; k < e->num_deopts(); k++) { - HSAILKernelDeoptimization * pdeopt = &e->_deopt_save_states[k]; - - jint workitem = pdeopt->workitem(); - if (workitem != -1) { - int deoptId = pdeopt->pc_offset(); - HSAILFrame *hsailFrame = pdeopt->first_frame(); + if (TraceGPUInteraction) { + tty->print_cr("deopt happened."); + HSAILKernelDeoptimization * pdeopt = &e->_deopt_save_states[0]; + tty->print_cr("first deopter was workitem %d", pdeopt->workitem()); + } - // update the hsailFrame from the oopsSaveArray - // re-resolve the handle - oopsSaveArray = (objArrayOop) JNIHandles::resolve(oops_save_handle); + // Before handling any deopting workitems, save the pointers from + // the hsail frames in oops_save so they get adjusted by any + // GC. Need to do this before leaving thread_in_vm mode. + // resolve handle only needed once here (not exiting vm mode) + objArrayOop oopsSaveArray = (objArrayOop) JNIHandles::resolve(oops_save_handle); - int dregOopMap = hsailFrame->dreg_oops_map(); - for (int bit = 0; bit < 16; bit++) { - if ((dregOopMap & (1 << bit)) != 0) { - // the dregister at this bit is an oop, retrieve it from array and put back in frame - int index = k * 16 + bit; - void * dregValue = (void *) oopsSaveArray->obj_at(index); - void * oldDregValue = (void *) hsailFrame->get_d_reg(bit); - assert((oldDregValue != 0 ? dregValue != 0 : dregValue == 0) , "bad dregValue retrieved"); - if (TraceGPUInteraction) { - if (dregValue != oldDregValue) { - tty->print_cr("oop moved for $d%d, workitem %d, slot %d, old=%p, new=%p", bit, workitem, k, oldDregValue, dregValue); + // since slots are allocated from the beginning, we know how far to look + assert(e->num_deopts() < MAX_DEOPT_SAVE_STATES_SIZE, "deopt save state overflow"); + for (int k = 0; k < e->num_deopts(); k++) { + HSAILKernelDeoptimization * pdeopt = &e->_deopt_save_states[k]; + jint workitem = pdeopt->workitem(); + if (workitem != -1) { + // this is a workitem that deopted + HSAILFrame *hsailFrame = pdeopt->first_frame(); + int dregOopMap = hsailFrame->dreg_oops_map(); + for (int bit = 0; bit < 16; bit++) { + if ((dregOopMap & (1 << bit)) != 0) { + // the dregister at this bit is an oop, save it in the array + int index = k * 16 + bit; + void* saved_oop = (void*) hsailFrame->get_d_reg(bit); + oopsSaveArray->obj_at_put(index, (oop) saved_oop); } } - hsailFrame->put_d_reg(bit, (jlong) dregValue); - } - } - - JavaValue result(T_VOID); - JavaCallArguments javaArgs; - javaArgs.set_alternative_target(nm); - javaArgs.push_int(deoptId); - javaArgs.push_long((jlong) hsailFrame); - - // override the deoptimization action with Action_none until we decide - // how to handle the other actions. - int myActionReason = Deoptimization::make_trap_request(Deoptimization::trap_request_reason(pdeopt->reason()), Deoptimization::Action_none); - javaArgs.push_int(myActionReason); - javaArgs.push_oop((oop)NULL); - if (TraceGPUInteraction) { - int dregOopMap = hsailFrame->dreg_oops_map(); - tty->print_cr("[HSAIL] Deoptimizing to host for workitem=%d (slot=%d) with deoptId=%d, frame=" INTPTR_FORMAT ", actionAndReason=%d, dregOopMap=%04x", workitem, k, deoptId, hsailFrame, myActionReason, dregOopMap); - // show the registers containing references - for (int bit = 0; bit < 16; bit++) { - if ((dregOopMap & (1 << bit)) != 0) { - tty->print_cr(" oop $d%d = %p", bit, hsailFrame->get_d_reg(bit)); - } } } - JavaCalls::call(&result, mh, &javaArgs, THREAD); - count_deoptimized++; + + // Handle any deopting workitems. + int count_deoptimized = 0; + for (int k = 0; k < e->num_deopts(); k++) { + HSAILKernelDeoptimization * pdeopt = &e->_deopt_save_states[k]; + + jint workitem = pdeopt->workitem(); + if (workitem != -1) { + int deoptId = pdeopt->pc_offset(); + HSAILFrame *hsailFrame = pdeopt->first_frame(); + + // update the hsailFrame from the oopsSaveArray + // re-resolve the handle + oopsSaveArray = (objArrayOop) JNIHandles::resolve(oops_save_handle); + + int dregOopMap = hsailFrame->dreg_oops_map(); + for (int bit = 0; bit < 16; bit++) { + if ((dregOopMap & (1 << bit)) != 0) { + // the dregister at this bit is an oop, retrieve it from array and put back in frame + int index = k * 16 + bit; + void * dregValue = (void *) oopsSaveArray->obj_at(index); + void * oldDregValue = (void *) hsailFrame->get_d_reg(bit); + assert((oldDregValue != 0 ? dregValue != 0 : dregValue == 0), "bad dregValue retrieved"); + if (TraceGPUInteraction) { + if (dregValue != oldDregValue) { + tty->print_cr("oop moved for $d%d, workitem %d, slot %d, old=%p, new=%p", bit, workitem, k, oldDregValue, dregValue); + } + } + hsailFrame->put_d_reg(bit, (jlong) dregValue); + } + } + + JavaValue result(T_VOID); + JavaCallArguments javaArgs; + javaArgs.set_alternative_target(nm); + javaArgs.push_int(deoptId); + javaArgs.push_long((jlong) hsailFrame); + + // override the deoptimization action with Action_none until we decide + // how to handle the other actions. + int myActionReason = Deoptimization::make_trap_request(Deoptimization::trap_request_reason(pdeopt->reason()), Deoptimization::Action_none); + javaArgs.push_int(myActionReason); + javaArgs.push_oop((oop) NULL); + if (TraceGPUInteraction) { + int dregOopMap = hsailFrame->dreg_oops_map(); + tty->print_cr("[HSAIL] Deoptimizing to host for workitem=%d (slot=%d) with deoptId=%d, frame=" INTPTR_FORMAT ", actionAndReason=%d, dregOopMap=%04x", workitem, k, deoptId, hsailFrame, myActionReason, dregOopMap); + // show the registers containing references + for (int bit = 0; bit < 16; bit++) { + if ((dregOopMap & (1 << bit)) != 0) { + tty->print_cr(" oop $d%d = %p", bit, hsailFrame->get_d_reg(bit)); + } + } + } + JavaCalls::call(&result, mh, &javaArgs, THREAD); + count_deoptimized++; + } + } + if (TraceGPUInteraction) { + tty->print_cr("[HSAIL] Deoptimizing to host completed for %d workitems", count_deoptimized); + } } - } - if (TraceGPUInteraction) { - tty->print_cr("[HSAIL] Deoptimizing to host completed for %d workitems", count_deoptimized); + + { + TraceTime t3("handle never-rans", TraceGPUInteraction); + + // Handle any never_ran workitems if there were any + int count_never_ran = 0; + bool handleNeverRansHere = true; + // turn off verbose trace stuff for javacall arg setup + bool savedTraceGPUInteraction = TraceGPUInteraction; + TraceGPUInteraction = false; + jboolean *never_ran_array = e->never_ran_array(); + if (handleNeverRansHere) { + for (int k = 0; k < dimX; k++) { + if (never_ran_array[k]) { + // run it as a javaCall + KlassHandle methKlass = mh->method_holder(); + Thread* THREAD = Thread::current(); + JavaValue result(T_VOID); + JavaCallArguments javaArgs; + // re-resolve the args_handle here + objArrayOop resolvedArgsArray = (objArrayOop) JNIHandles::resolve(args_handle); + // This object sets up the javaCall arguments + // the way argsArray is set up, this should work for instance methods as well + // (the receiver will be the first oop pushed) + HSAILJavaCallArguments hjca(&javaArgs, k, mh->signature(), resolvedArgsArray, mh->is_static()); + if (mh->is_static()) { + JavaCalls::call_static(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD); + } else { + JavaCalls::call_virtual(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD); + } + count_never_ran++; + } + } + TraceGPUInteraction = savedTraceGPUInteraction; + if (TraceGPUInteraction) { + tty->print_cr("%d workitems never ran, have been run via JavaCall", count_never_ran); + showRanges(never_ran_array, dimX); + } + } // end of never-ran handling + } } - // Handle any never_ran workitems if there were any - int count_never_ran = 0; - bool handleNeverRansHere = true; - // turn off verbose trace stuff for javacall arg setup - bool savedTraceGPUInteraction = TraceGPUInteraction; - TraceGPUInteraction = false; - jboolean *never_ran_array = e->never_ran_array(); - if (handleNeverRansHere) { - for (int k = 0; k < dimX; k++) { - if (never_ran_array[k]) { - // run it as a javaCall - KlassHandle methKlass = mh->method_holder(); - Thread* THREAD = Thread::current(); - JavaValue result(T_VOID); - JavaCallArguments javaArgs; - // re-resolve the args_handle here - objArrayOop resolvedArgsArray = (objArrayOop) JNIHandles::resolve(args_handle); - // This object sets up the javaCall arguments - // the way argsArray is set up, this should work for instance methods as well - // (the receiver will be the first oop pushed) - HSAILJavaCallArguments hjca(&javaArgs, k, mh->signature(), resolvedArgsArray, mh->is_static()); - if (mh->is_static()) { - JavaCalls::call_static(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD); - } else { - JavaCalls::call_virtual(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD); - } - count_never_ran++; - } - } - TraceGPUInteraction = savedTraceGPUInteraction; - if (TraceGPUInteraction) { - tty->print_cr("%d workitems never ran, have been run via JavaCall", count_never_ran); - showRanges(never_ran_array, dimX); - } - } // end of never-ran handling - - } - - if (useDeoptInfo) { FREE_C_HEAP_ARRAY(jboolean, e->never_ran_array(), mtInternal); delete e; } diff -r 169caf662ac7 -r a6c144380ce7 src/gpu/hsail/vm/hsailKernelArguments.hpp --- a/src/gpu/hsail/vm/hsailKernelArguments.hpp Fri Apr 04 12:05:41 2014 +0200 +++ b/src/gpu/hsail/vm/hsailKernelArguments.hpp Fri Apr 04 12:22:49 2014 +0200 @@ -79,11 +79,13 @@ assert(pushed == true, "arg push failed"); } virtual void pushTrailingArgs() { - // Last argument is the exception info block - if (TraceGPUInteraction) { - tty->print_cr("[HSAIL] exception block=" PTR_FORMAT, _exceptionHolder); + if (UseHSAILDeoptimization) { + // Last argument is the exception info block + if (TraceGPUInteraction) { + tty->print_cr("[HSAIL] exception block=" PTR_FORMAT, _exceptionHolder); + } + pushObject(_exceptionHolder); } - pushObject(_exceptionHolder); } // For kernel arguments we don't pass the final int parameter diff -r 169caf662ac7 -r a6c144380ce7 src/share/vm/graal/graalGlobals.hpp --- a/src/share/vm/graal/graalGlobals.hpp Fri Apr 04 12:05:41 2014 +0200 +++ b/src/share/vm/graal/graalGlobals.hpp Fri Apr 04 12:22:49 2014 +0200 @@ -85,6 +85,9 @@ develop(bool, TraceUncollectedSpeculations, false, \ "Print message when a failed speculation was not collected") \ \ + product(bool, UseHSAILDeoptimization, true, \ + "Code gen and runtime support for deoptimizing HSAIL kernels") \ + \ product(bool, GPUOffload, false, \ "Offload execution to GPU whenever possible") \ \