Mercurial > hg > truffle
diff src/gpu/hsail/vm/gpu_hsail.cpp @ 14969:a6c144380ce7
HSAIL: added UseHSAILDeoptimization VM option for disabling HSAIL deopt support
Contributed-by: Eric Caspole <eric.caspole@amd.com>
author | Doug Simon <doug.simon@oracle.com> |
---|---|
date | Fri, 04 Apr 2014 12:22:49 +0200 |
parents | 3e9a960f0da1 |
children | 2cae21d9f122 |
line wrap: on
line diff
--- a/src/gpu/hsail/vm/gpu_hsail.cpp Fri Apr 04 12:05:41 2014 +0200 +++ b/src/gpu/hsail/vm/gpu_hsail.cpp Fri Apr 04 12:22:49 2014 +0200 @@ -133,10 +133,6 @@ } } - -// for experimentation -static bool useDeoptInfo = true; - jboolean Hsail::execute_kernel_void_1d_internal(address kernel, int dimX, jobject args_handle, methodHandle& mh, nmethod *nm, jobject oops_save_handle, TRAPS) { ResourceMark rm(THREAD); @@ -145,9 +141,8 @@ // Reset the kernel arguments _okra_clearargs(kernel); - HSAILDeoptimizationInfo* e; - if (useDeoptInfo) { + if (UseHSAILDeoptimization) { e = new (ResourceObj::C_HEAP, mtInternal) HSAILDeoptimizationInfo(); e->set_never_ran_array(NEW_C_HEAP_ARRAY(jboolean, dimX, mtInternal)); memset(e->never_ran_array(), 0, dimX * sizeof(jboolean)); @@ -168,150 +163,161 @@ } // Run the kernel - bool success = _okra_execute_with_range(kernel, dimX); - // check if any workitem requested a deopt - // currently we only support at most one such workitem - - - int deoptcode = e->deopt_occurred(); - if (useDeoptInfo && deoptcode != 0) { - if (deoptcode != 1) { - // error condition detected in deopt code - char msg[200]; - sprintf(msg, "deopt error detected, slot for workitem %d was not empty", -1*(deoptcode + 1)); - guarantee(deoptcode == 1, msg); - } - if (TraceGPUInteraction) { - tty->print_cr("deopt happened."); - HSAILKernelDeoptimization * pdeopt = &e->_deopt_save_states[0]; - tty->print_cr("first deopter was workitem %d", pdeopt->workitem()); - } + bool success = false; + { + TraceTime t1("execute kernel", TraceGPUInteraction); + success = _okra_execute_with_range(kernel, dimX); + } - // Before handling any deopting workitems, save the pointers from - // the hsail frames in oops_save so they get adjusted by any - // GC. Need to do this before leaving thread_in_vm mode. - // resolve handle only needed once here (not exiting vm mode) - objArrayOop oopsSaveArray = (objArrayOop) JNIHandles::resolve(oops_save_handle); + if (UseHSAILDeoptimization) { + // check if any workitem requested a deopt + // currently we only support at most one such workitem + int deoptcode = e->deopt_occurred(); + if (deoptcode != 0) { + if (deoptcode != 1) { + // error condition detected in deopt code + char msg[200]; + sprintf(msg, "deopt error detected, slot for workitem %d was not empty", -1 * (deoptcode + 1)); + guarantee(deoptcode == 1, msg); + } - // since slots are allocated from the beginning, we know how far to look - assert(e->num_deopts() < MAX_DEOPT_SAVE_STATES_SIZE, "deopt save state overflow"); - for (int k = 0; k < e->num_deopts(); k++) { - HSAILKernelDeoptimization * pdeopt = &e->_deopt_save_states[k]; - jint workitem = pdeopt->workitem(); - if (workitem != -1) { - // this is a workitem that deopted - HSAILFrame *hsailFrame = pdeopt->first_frame(); - int dregOopMap = hsailFrame->dreg_oops_map(); - for (int bit = 0; bit < 16; bit++) { - if ((dregOopMap & (1 << bit)) != 0) { - // the dregister at this bit is an oop, save it in the array - int index = k * 16 + bit; - void* saved_oop = (void*) hsailFrame->get_d_reg(bit); - oopsSaveArray->obj_at_put(index, (oop) saved_oop); - } - } - } - } + { + TraceTime t3("handle deoptimizing workitems", TraceGPUInteraction); - // Handle any deopting workitems. - int count_deoptimized = 0; - for (int k = 0; k < e->num_deopts(); k++) { - HSAILKernelDeoptimization * pdeopt = &e->_deopt_save_states[k]; - - jint workitem = pdeopt->workitem(); - if (workitem != -1) { - int deoptId = pdeopt->pc_offset(); - HSAILFrame *hsailFrame = pdeopt->first_frame(); + if (TraceGPUInteraction) { + tty->print_cr("deopt happened."); + HSAILKernelDeoptimization * pdeopt = &e->_deopt_save_states[0]; + tty->print_cr("first deopter was workitem %d", pdeopt->workitem()); + } - // update the hsailFrame from the oopsSaveArray - // re-resolve the handle - oopsSaveArray = (objArrayOop) JNIHandles::resolve(oops_save_handle); + // Before handling any deopting workitems, save the pointers from + // the hsail frames in oops_save so they get adjusted by any + // GC. Need to do this before leaving thread_in_vm mode. + // resolve handle only needed once here (not exiting vm mode) + objArrayOop oopsSaveArray = (objArrayOop) JNIHandles::resolve(oops_save_handle); - int dregOopMap = hsailFrame->dreg_oops_map(); - for (int bit = 0; bit < 16; bit++) { - if ((dregOopMap & (1 << bit)) != 0) { - // the dregister at this bit is an oop, retrieve it from array and put back in frame - int index = k * 16 + bit; - void * dregValue = (void *) oopsSaveArray->obj_at(index); - void * oldDregValue = (void *) hsailFrame->get_d_reg(bit); - assert((oldDregValue != 0 ? dregValue != 0 : dregValue == 0) , "bad dregValue retrieved"); - if (TraceGPUInteraction) { - if (dregValue != oldDregValue) { - tty->print_cr("oop moved for $d%d, workitem %d, slot %d, old=%p, new=%p", bit, workitem, k, oldDregValue, dregValue); + // since slots are allocated from the beginning, we know how far to look + assert(e->num_deopts() < MAX_DEOPT_SAVE_STATES_SIZE, "deopt save state overflow"); + for (int k = 0; k < e->num_deopts(); k++) { + HSAILKernelDeoptimization * pdeopt = &e->_deopt_save_states[k]; + jint workitem = pdeopt->workitem(); + if (workitem != -1) { + // this is a workitem that deopted + HSAILFrame *hsailFrame = pdeopt->first_frame(); + int dregOopMap = hsailFrame->dreg_oops_map(); + for (int bit = 0; bit < 16; bit++) { + if ((dregOopMap & (1 << bit)) != 0) { + // the dregister at this bit is an oop, save it in the array + int index = k * 16 + bit; + void* saved_oop = (void*) hsailFrame->get_d_reg(bit); + oopsSaveArray->obj_at_put(index, (oop) saved_oop); } } - hsailFrame->put_d_reg(bit, (jlong) dregValue); - } - } - - JavaValue result(T_VOID); - JavaCallArguments javaArgs; - javaArgs.set_alternative_target(nm); - javaArgs.push_int(deoptId); - javaArgs.push_long((jlong) hsailFrame); - - // override the deoptimization action with Action_none until we decide - // how to handle the other actions. - int myActionReason = Deoptimization::make_trap_request(Deoptimization::trap_request_reason(pdeopt->reason()), Deoptimization::Action_none); - javaArgs.push_int(myActionReason); - javaArgs.push_oop((oop)NULL); - if (TraceGPUInteraction) { - int dregOopMap = hsailFrame->dreg_oops_map(); - tty->print_cr("[HSAIL] Deoptimizing to host for workitem=%d (slot=%d) with deoptId=%d, frame=" INTPTR_FORMAT ", actionAndReason=%d, dregOopMap=%04x", workitem, k, deoptId, hsailFrame, myActionReason, dregOopMap); - // show the registers containing references - for (int bit = 0; bit < 16; bit++) { - if ((dregOopMap & (1 << bit)) != 0) { - tty->print_cr(" oop $d%d = %p", bit, hsailFrame->get_d_reg(bit)); - } } } - JavaCalls::call(&result, mh, &javaArgs, THREAD); - count_deoptimized++; + + // Handle any deopting workitems. + int count_deoptimized = 0; + for (int k = 0; k < e->num_deopts(); k++) { + HSAILKernelDeoptimization * pdeopt = &e->_deopt_save_states[k]; + + jint workitem = pdeopt->workitem(); + if (workitem != -1) { + int deoptId = pdeopt->pc_offset(); + HSAILFrame *hsailFrame = pdeopt->first_frame(); + + // update the hsailFrame from the oopsSaveArray + // re-resolve the handle + oopsSaveArray = (objArrayOop) JNIHandles::resolve(oops_save_handle); + + int dregOopMap = hsailFrame->dreg_oops_map(); + for (int bit = 0; bit < 16; bit++) { + if ((dregOopMap & (1 << bit)) != 0) { + // the dregister at this bit is an oop, retrieve it from array and put back in frame + int index = k * 16 + bit; + void * dregValue = (void *) oopsSaveArray->obj_at(index); + void * oldDregValue = (void *) hsailFrame->get_d_reg(bit); + assert((oldDregValue != 0 ? dregValue != 0 : dregValue == 0), "bad dregValue retrieved"); + if (TraceGPUInteraction) { + if (dregValue != oldDregValue) { + tty->print_cr("oop moved for $d%d, workitem %d, slot %d, old=%p, new=%p", bit, workitem, k, oldDregValue, dregValue); + } + } + hsailFrame->put_d_reg(bit, (jlong) dregValue); + } + } + + JavaValue result(T_VOID); + JavaCallArguments javaArgs; + javaArgs.set_alternative_target(nm); + javaArgs.push_int(deoptId); + javaArgs.push_long((jlong) hsailFrame); + + // override the deoptimization action with Action_none until we decide + // how to handle the other actions. + int myActionReason = Deoptimization::make_trap_request(Deoptimization::trap_request_reason(pdeopt->reason()), Deoptimization::Action_none); + javaArgs.push_int(myActionReason); + javaArgs.push_oop((oop) NULL); + if (TraceGPUInteraction) { + int dregOopMap = hsailFrame->dreg_oops_map(); + tty->print_cr("[HSAIL] Deoptimizing to host for workitem=%d (slot=%d) with deoptId=%d, frame=" INTPTR_FORMAT ", actionAndReason=%d, dregOopMap=%04x", workitem, k, deoptId, hsailFrame, myActionReason, dregOopMap); + // show the registers containing references + for (int bit = 0; bit < 16; bit++) { + if ((dregOopMap & (1 << bit)) != 0) { + tty->print_cr(" oop $d%d = %p", bit, hsailFrame->get_d_reg(bit)); + } + } + } + JavaCalls::call(&result, mh, &javaArgs, THREAD); + count_deoptimized++; + } + } + if (TraceGPUInteraction) { + tty->print_cr("[HSAIL] Deoptimizing to host completed for %d workitems", count_deoptimized); + } } - } - if (TraceGPUInteraction) { - tty->print_cr("[HSAIL] Deoptimizing to host completed for %d workitems", count_deoptimized); + + { + TraceTime t3("handle never-rans", TraceGPUInteraction); + + // Handle any never_ran workitems if there were any + int count_never_ran = 0; + bool handleNeverRansHere = true; + // turn off verbose trace stuff for javacall arg setup + bool savedTraceGPUInteraction = TraceGPUInteraction; + TraceGPUInteraction = false; + jboolean *never_ran_array = e->never_ran_array(); + if (handleNeverRansHere) { + for (int k = 0; k < dimX; k++) { + if (never_ran_array[k]) { + // run it as a javaCall + KlassHandle methKlass = mh->method_holder(); + Thread* THREAD = Thread::current(); + JavaValue result(T_VOID); + JavaCallArguments javaArgs; + // re-resolve the args_handle here + objArrayOop resolvedArgsArray = (objArrayOop) JNIHandles::resolve(args_handle); + // This object sets up the javaCall arguments + // the way argsArray is set up, this should work for instance methods as well + // (the receiver will be the first oop pushed) + HSAILJavaCallArguments hjca(&javaArgs, k, mh->signature(), resolvedArgsArray, mh->is_static()); + if (mh->is_static()) { + JavaCalls::call_static(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD); + } else { + JavaCalls::call_virtual(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD); + } + count_never_ran++; + } + } + TraceGPUInteraction = savedTraceGPUInteraction; + if (TraceGPUInteraction) { + tty->print_cr("%d workitems never ran, have been run via JavaCall", count_never_ran); + showRanges(never_ran_array, dimX); + } + } // end of never-ran handling + } } - // Handle any never_ran workitems if there were any - int count_never_ran = 0; - bool handleNeverRansHere = true; - // turn off verbose trace stuff for javacall arg setup - bool savedTraceGPUInteraction = TraceGPUInteraction; - TraceGPUInteraction = false; - jboolean *never_ran_array = e->never_ran_array(); - if (handleNeverRansHere) { - for (int k = 0; k < dimX; k++) { - if (never_ran_array[k]) { - // run it as a javaCall - KlassHandle methKlass = mh->method_holder(); - Thread* THREAD = Thread::current(); - JavaValue result(T_VOID); - JavaCallArguments javaArgs; - // re-resolve the args_handle here - objArrayOop resolvedArgsArray = (objArrayOop) JNIHandles::resolve(args_handle); - // This object sets up the javaCall arguments - // the way argsArray is set up, this should work for instance methods as well - // (the receiver will be the first oop pushed) - HSAILJavaCallArguments hjca(&javaArgs, k, mh->signature(), resolvedArgsArray, mh->is_static()); - if (mh->is_static()) { - JavaCalls::call_static(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD); - } else { - JavaCalls::call_virtual(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD); - } - count_never_ran++; - } - } - TraceGPUInteraction = savedTraceGPUInteraction; - if (TraceGPUInteraction) { - tty->print_cr("%d workitems never ran, have been run via JavaCall", count_never_ran); - showRanges(never_ran_array, dimX); - } - } // end of never-ran handling - - } - - if (useDeoptInfo) { FREE_C_HEAP_ARRAY(jboolean, e->never_ran_array(), mtInternal); delete e; }