# HG changeset patch # User Gilles Duboscq # Date 1403799935 -7200 # Node ID e9998e2be7f5a3b1650fc55c50dd27433412698a # Parent c6ebc1997a558ae1205b3f9e694ce11b8d08e87a use oops_do to modify saved hsail state Contributed-by: Tom Deneau diff -r c6ebc1997a55 -r e9998e2be7f5 graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/BoundsCatchMost20000StressGCTest.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/BoundsCatchMost20000StressGCTest.java Thu Jun 26 18:25:35 2014 +0200 @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2009, 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package com.oracle.graal.compiler.hsail.test; + +import org.junit.*; + +/** + * A version which is likely to get a GC while running the never_rans, and so is good for oops_do + * testing. + */ +public class BoundsCatchMost20000StressGCTest extends BoundsCatchManyBase { + + @Override + int getGlobalSize() { + return 20000; + } + + boolean isMyDeoptGid(int gid) { + return (gid > 100 && gid % 100 != 1); + } + + int[] dummyArray; + + // copied run routine here because otherwise polymorphic calls to isDeoptGid + @Override + public void run(int gid) { + int outval = getOutval(gid); + try { + int index = (isMyDeoptGid(gid) ? num + 1 : gid); + outArray[index] = outval; + } catch (ArrayIndexOutOfBoundsException e) { + // set up so we can detect if we go thru here twice + outArray[gid] += outval; + // note: cannot record the exceptiongid here for many deopts in parallel + + // allocate something so GCs happen more often + dummyArray = new int[1000]; + } + } + + @Override + public void runTest() { + setupArrays(); + + for (int i = 0; i < 10; i++) { + // we should not get an exception escaping from the kernel + dispatchMethodKernel(num); + } + } + + @Test + public void test() { + testGeneratedHsail(); + } +} diff -r c6ebc1997a55 -r e9998e2be7f5 graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/lambda/ArrayListSetTest.java --- a/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/lambda/ArrayListSetTest.java Thu Jun 26 13:42:29 2014 +0200 +++ b/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/lambda/ArrayListSetTest.java Thu Jun 26 18:25:35 2014 +0200 @@ -43,10 +43,6 @@ dispatchLambdaKernel(NUM, (gid) -> { aryList.set(gid, gid); }); - - // for (int i = 0; i < NUM; i++) { - // System.out.println(aryList.get(i)); - // } } @Override @@ -55,6 +51,7 @@ } @Test + @Ignore public void testUsingLambdaMethod() { testGeneratedHsailUsingLambdaMethod(); } diff -r c6ebc1997a55 -r e9998e2be7f5 graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend.java --- a/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend.java Thu Jun 26 13:42:29 2014 +0200 +++ b/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend.java Thu Jun 26 18:25:35 2014 +0200 @@ -379,22 +379,13 @@ throw new GraalInternalError("Cannot execute GPU kernel if device is not initialized"); } int[] oopMapArray = ((HSAILHotSpotNmethod) kernel).getOopMapArray(); - Object[] oopsSaveArea; - if (getRuntime().getConfig().useHSAILDeoptimization) { - int saveAreaCounts = OopMapArrayBuilder.getSaveAreaCounts(oopMapArray); - int numDRegs = (saveAreaCounts >> 8) & 0xff; - int numStackSlots = (saveAreaCounts >> 16); - // pessimistically assume that any of the DRegs or stackslots could be oops - oopsSaveArea = new Object[maxDeoptIndex * (numDRegs + numStackSlots)]; - } else { - oopsSaveArea = null; - } + // Pass donorThreadPoolArray if this kernel uses allocation, otherwise null Thread[] donorThreadArray = ((HSAILHotSpotNmethod) kernel).getUsesAllocationFlag() ? donorThreadPool.get().getThreads() : null; - return executeKernel0(kernel, jobSize, args, oopsSaveArea, donorThreadArray, HsailAllocBytesPerWorkitem.getValue(), oopMapArray); + return executeKernel0(kernel, jobSize, args, donorThreadArray, HsailAllocBytesPerWorkitem.getValue(), oopMapArray); } - private static native boolean executeKernel0(HotSpotInstalledCode kernel, int jobSize, Object[] args, Object[] oopsSave, Thread[] donorThreads, int allocBytesPerWorkitem, int[] oopMapArray) + private static native boolean executeKernel0(HotSpotInstalledCode kernel, int jobSize, Object[] args, Thread[] donorThreads, int allocBytesPerWorkitem, int[] oopMapArray) throws InvalidInstalledCodeException; /** @@ -1069,10 +1060,6 @@ int arrIndex = HEADERSIZE + infoIndex * intsPerInfopoint + 1 + intIndex; return array[arrIndex]; } - - public static int getSaveAreaCounts(int[] array) { - return array[SAVEAREACOUNTS_OFST]; - } } private static StructuredGraph prepareHostGraph(ResolvedJavaMethod method, List deopts, HotSpotProviders providers, HotSpotVMConfig config, int numSRegs, int numDRegs) { diff -r c6ebc1997a55 -r e9998e2be7f5 src/gpu/hsail/vm/gpu_hsail.cpp --- a/src/gpu/hsail/vm/gpu_hsail.cpp Thu Jun 26 13:42:29 2014 +0200 +++ b/src/gpu/hsail/vm/gpu_hsail.cpp Thu Jun 26 18:25:35 2014 +0200 @@ -66,7 +66,7 @@ JNINativeMethod Hsail::HSAIL_methods[] = { {CC"initialize", CC"()Z", FN_PTR(Hsail::initialize)}, {CC"generateKernel", CC"([B" STRING ")J", FN_PTR(Hsail::generate_kernel)}, - {CC"executeKernel0", CC"("HS_INSTALLED_CODE"I["OBJECT"["OBJECT"["JLTHREAD"I[I)Z", FN_PTR(Hsail::execute_kernel_void_1d)}, + {CC"executeKernel0", CC"("HS_INSTALLED_CODE"I["OBJECT"["JLTHREAD"I[I)Z", FN_PTR(Hsail::execute_kernel_void_1d)}, }; void* Hsail::_device_context = NULL; @@ -108,7 +108,7 @@ _okra_register_heap(Universe::heap()->base(), Universe::heap()->capacity()); } -GPU_VMENTRY(jboolean, Hsail::execute_kernel_void_1d, (JNIEnv* env, jclass, jobject kernel_handle, jint dimX, jobject args, jobject oops_save, +GPU_VMENTRY(jboolean, Hsail::execute_kernel_void_1d, (JNIEnv* env, jclass, jobject kernel_handle, jint dimX, jobject args, jobject donor_threads, jint allocBytesPerWorkitem, jobject oop_map_array)) ResourceMark rm; @@ -125,7 +125,7 @@ SharedRuntime::throw_and_post_jvmti_exception(JavaThread::current(), vmSymbols::com_oracle_graal_api_code_InvalidInstalledCodeException(), NULL); } -return execute_kernel_void_1d_internal((address) kernel, dimX, args, mh, nm, oops_save, donor_threads, allocBytesPerWorkitem, oop_map_array, CHECK_0); +return execute_kernel_void_1d_internal((address) kernel, dimX, args, mh, nm, donor_threads, allocBytesPerWorkitem, oop_map_array, CHECK_0); GPU_END static void showRanges(jboolean* a, int len) { @@ -145,143 +145,11 @@ } } -class OopSaver : public StackObj { -private: - objArrayOop _oopsSaveArray; - typeArrayOop _oopMapArray; - jobject _oops_save; - jobject _oop_map_array; - int _last_pcoffset; - int _last_idx; - int _saveAreaCounts; - - enum { - SAVEAREACOUNTS_OFST=0, - SPAN_OFST=1, - HEADERSIZE=2 - }; - int mapPcOffsetToIndex(int pcOffset) { - if (pcOffset == _last_pcoffset) { - return _last_idx; - } - int span = _oopMapArray->int_at(SPAN_OFST); - for (int idx = HEADERSIZE; idx < _oopMapArray->length(); idx += span) { - int ofst = _oopMapArray->int_at(idx); - if (ofst == pcOffset) { - _last_pcoffset = pcOffset; - _last_idx = idx + 1; - return _last_idx; - } - } - ShouldNotReachHere(); - return -1; - } - -public: - OopSaver(jobject oops_save, jobject oop_map_array) { - _oops_save = oops_save; - _oop_map_array = oop_map_array; - _last_pcoffset = -1; - _saveAreaCounts = getSaveAreaCounts(oop_map_array); - resolveArrays(); - } - - void resolveArrays() { - _oopsSaveArray = (objArrayOop) JNIHandles::resolve(_oops_save); - _oopMapArray = (typeArrayOop) JNIHandles::resolve(_oop_map_array); - } - - void* getOopForBit(HSAILFrame* hsailFrame, int bit) { - assert(isOop(hsailFrame, bit), ""); - void* oop; - if (bit < hsailFrame->num_d_regs()) { - // d register - oop = (void*) hsailFrame->get_d_reg(bit); - } else { - // stack slot - int stackOffset = (bit - hsailFrame->num_d_regs()) * 8; // 8 bytes per stack slot - oop = (void*) hsailFrame->get_stackslot64(stackOffset); - } - return oop; - } - - void putOopForBit(HSAILFrame* hsailFrame, int bit, void* oop) { - assert(isOop(hsailFrame, bit), ""); - if (bit < hsailFrame->num_d_regs()) { - // d register - hsailFrame->put_d_reg(bit, (jlong) oop); - } else { - // stack slot - int stackOffset = (bit - hsailFrame->num_d_regs()) * 8; // 8 bytes per stack slot - hsailFrame->put_stackslot64(stackOffset, (jlong) oop); - } - } - - void saveOopsFromFrame(HSAILFrame* hsailFrame, int deoptSlot){ - // as used, no need to resolve arrays on each call - int oopsPerDeopt = hsailFrame->num_d_regs() + hsailFrame->num_stack_slots(); - - // handle the dregister and stackSlot based oops - for (int bit = 0; bit < oopsPerDeopt; bit++) { - if (isOop(hsailFrame, bit)) { - void* saved_oop = getOopForBit(hsailFrame, bit); - int saveArrayIndex = deoptSlot * oopsPerDeopt + bit; - _oopsSaveArray->obj_at_put(saveArrayIndex, (oop) saved_oop); - } - } - } - - void restoreOopsToFrame(HSAILFrame* hsailFrame, int deoptSlot, int workitem){ - // need to re-resolve on each restore - resolveArrays(); - int oopsPerDeopt = hsailFrame->num_d_regs() + hsailFrame->num_stack_slots(); - - // handle the dregister and stackSlot based oops - for (int bit = 0; bit < oopsPerDeopt; bit++) { - if (isOop(hsailFrame, bit)) { - // the dregister or stack slot at this bit is an oop, retrieve it from array and put back in frame - int saveArrayIndex = deoptSlot * oopsPerDeopt + bit; - void* newValue = (void*) _oopsSaveArray->obj_at(saveArrayIndex); - void* oldValue = getOopForBit(hsailFrame, bit); - assert((oldValue != 0 ? newValue != 0 : newValue == 0), "bad dregValue retrieved"); - if (newValue != oldValue) { - if (TraceGPUInteraction) { - int numDRegs = hsailFrame->num_d_regs(); - const char* name = (bit < numDRegs ? "$d" : "stk"); - int num = (bit < numDRegs ? bit : bit - numDRegs); - tty->print_cr("oop moved for %s%d, workitem %d, slot %d, old=%p, new=%p", - name, num, workitem, deoptSlot, oldValue, newValue); - } - putOopForBit(hsailFrame, bit, newValue); - } - } - } - } - - bool isOop(HSAILFrame* hsailFrame, int bit){ - // re-resolve on each access - resolveArrays(); - if (bit > hsailFrame->num_d_regs() + hsailFrame->num_stack_slots()) { - return false; - } - int pcOffset = hsailFrame->pc_offset(); - int bits_int_idx = mapPcOffsetToIndex(pcOffset) + (bit / 32); - int bitpos = bit % 32; - int bits = _oopMapArray->int_at(bits_int_idx); - return ((bits & (1 << bitpos)) != 0); - } - - static int getSaveAreaCounts(jobject oopMapArrayObject) { - typeArrayOop oopMapArray = (typeArrayOop) JNIHandles::resolve(oopMapArrayObject); - return oopMapArray->int_at(SAVEAREACOUNTS_OFST); - } - -}; - -jboolean Hsail::execute_kernel_void_1d_internal(address kernel, int dimX, jobject args, methodHandle& mh, nmethod* nm, jobject oops_save, +jboolean Hsail::execute_kernel_void_1d_internal(address kernel, int dimX, jobject args, methodHandle& mh, nmethod* nm, jobject donor_threads, int allocBytesPerWorkitem, jobject oop_map_array, TRAPS) { ResourceMark rm(THREAD); objArrayOop argsArray = (objArrayOop) JNIHandles::resolve(args); + assert(THREAD->is_Java_thread(), "must be a JavaThread"); // We avoid HSAILAllocationInfo logic if kernel does not allocate // in which case the donor_thread array passed in will be null @@ -290,20 +158,23 @@ // Reset the kernel arguments _okra_clearargs(kernel); + JavaThread* thread = (JavaThread*)THREAD; HSAILDeoptimizationInfo* e; if (UseHSAILDeoptimization) { // get how many bytes per deopt save area are required - int saveAreaCounts = OopSaver::getSaveAreaCounts(oop_map_array); + int saveAreaCounts = HSAILOopMapHelper::get_save_area_counts(oop_map_array); int numSRegs = saveAreaCounts & 0xff; int numDRegs = (saveAreaCounts >> 8) & 0xff; int numStackSlots = (saveAreaCounts >> 16); int bytesPerSaveArea = numSRegs * 4 + (numDRegs + numStackSlots) * 8; - e = new (MAX_DEOPT_SLOTS, bytesPerSaveArea) HSAILDeoptimizationInfo(MAX_DEOPT_SLOTS, bytesPerSaveArea, dimX, allocInfo); + e = new (MAX_DEOPT_SLOTS, bytesPerSaveArea) HSAILDeoptimizationInfo(MAX_DEOPT_SLOTS, bytesPerSaveArea, dimX, allocInfo, oop_map_array); // copy cur_tlab_infos if (allocInfo != NULL) { - e->setCurTlabInfos(allocInfo->getCurTlabInfos()); + e->set_cur_tlabInfos(allocInfo->getCurTlabInfos()); } + // set deopt info in thread so gc oops_do processing can find it + thread->set_gpu_hsail_deopt_info(e); } // This object sets up the kernel arguments @@ -317,7 +188,6 @@ if (hka.getFirstNullParameterIndex() >= 0) { char buf[64]; sprintf(buf, "Null Kernel Parameter seen, Parameter Index: %d", hka.getFirstNullParameterIndex()); - JavaThread* thread = (JavaThread*)THREAD; thread->set_gpu_exception_bci(0); thread->set_gpu_exception_method(mh()); THROW_MSG_0(vmSymbols::java_lang_NullPointerException(), buf); @@ -362,22 +232,6 @@ tty->print_cr("first deopter was workitem %d", pdeopt->workitem()); } - // Before handling any deopting workitems, save the pointers from - // the hsail frames in oops_save so they get adjusted by any - // GC. Need to do this before leaving thread_in_vm mode. - OopSaver oopSaver(oops_save, oop_map_array); - // resolve handle only needed once here (not exiting vm mode) - oopSaver.resolveArrays(); - - // since slots are allocated from the beginning, we know how far to look - assert(e->num_deopts() < e->num_slots(), "deopt save state overflow"); - for (int k = 0; k < e->num_deopts(); k++) { - HSAILKernelDeoptimization* pdeopt = e->get_deopt_save_state(k); - assert (pdeopt->workitem() >= 0, "bad workitem in deopt"); - // this is a workitem that deopted - oopSaver.saveOopsFromFrame(pdeopt->first_frame(), k); - } - // Handle any deopting workitems. int count_deoptimized = 0; for (int k = 0; k < e->num_deopts(); k++) { @@ -388,10 +242,6 @@ int deoptId = pdeopt->pc_offset(); HSAILFrame* hsailFrame = pdeopt->first_frame(); - // Update the hsailFrame from the oopsSaveArray - // will re-resolve the handles each time. - oopSaver.restoreOopsToFrame(hsailFrame, k, workitem); - JavaValue result(T_VOID); JavaCallArguments javaArgs; javaArgs.set_alternative_target(nm); @@ -407,21 +257,24 @@ tty->print_cr("[HSAIL] Deoptimizing to host for workitem=%d (slot=%d) with deoptId=%d, frame=" INTPTR_FORMAT ", actionAndReason=%d", workitem, k, deoptId, hsailFrame, myActionReason); // show the $d registers or stack slots containing references int maxOopBits = hsailFrame->num_d_regs() + hsailFrame->num_stack_slots(); + HSAILOopMapHelper oopMapHelper(oop_map_array); + int pc_offset = hsailFrame->pc_offset(); for (int bit = 0; bit < maxOopBits; bit++) { - if (oopSaver.isOop(hsailFrame, bit)) { + if (oopMapHelper.is_oop(pc_offset, bit)) { if (bit < hsailFrame->num_d_regs()) { // show $d reg oop - tty->print_cr(" oop $d%d = %p", bit, oopSaver.getOopForBit(hsailFrame, bit)); + tty->print_cr(" oop $d%d = %p", bit, hsailFrame->get_oop_for_bit(bit)); } else { // show stack slot oop int stackOffset = (bit - hsailFrame->num_d_regs()) * 8; // 8 bytes per stack slot - tty->print_cr(" oop stk:%d = %p", stackOffset, oopSaver.getOopForBit(hsailFrame, bit)); + tty->print_cr(" oop stk:%d = %p", stackOffset, hsailFrame->get_oop_for_bit(bit)); } } } } JavaCalls::call(&result, mh, &javaArgs, THREAD); count_deoptimized++; + e->set_deopt_work_index(k + 1); } } if (TraceGPUInteraction) { @@ -429,6 +282,9 @@ } } } + // when we are done with the deopts, we don't need to oops_do anything + // in the saved state anymore + thread->set_gpu_hsail_deopt_info(NULL); // Handle any never_ran workitems if there were any { @@ -595,3 +451,27 @@ } return true; } + + +void Hsail::HSAILDeoptimizationInfo::oops_do(OopClosure* f) { + int unprocessed_deopts = num_deopts() - deopt_work_index(); + if (TraceGPUInteraction) { + tty->print_cr("HSAILDeoptimizationInfo::oops_do deopt_occurred=%d, total_deopts=%d, unprocessed_deopts=%d, oop_map_array=%p", _deopt_occurred, num_deopts(), unprocessed_deopts, _oop_map_array); + } + if (num_deopts() == 0 || unprocessed_deopts <= 0) { + return; // nothing to do + } + HSAILOopMapHelper oopMapHelper(_oop_map_array); + oopMapHelper.resolve_arrays(); // resolve once before processing + + // go thru the unprocessed deopt frames, finding each oop and applying the closre + for (int k = deopt_work_index(); k < num_deopts(); k++) { + HSAILKernelDeoptimization* pdeopt = get_deopt_save_state(k); + assert (pdeopt->workitem() >= 0, "bad workitem in deopt"); + if (TraceGPUInteraction) { + tty->print_cr(" deopt %d, workitem %d, pc %d", k, pdeopt->workitem(), pdeopt->pc_offset()); + } + HSAILFrame* hsailFrame = pdeopt->first_frame(); + hsailFrame->oops_do(f, &oopMapHelper); + } +} diff -r c6ebc1997a55 -r e9998e2be7f5 src/gpu/hsail/vm/gpu_hsail.hpp --- a/src/gpu/hsail/vm/gpu_hsail.hpp Thu Jun 26 13:42:29 2014 +0200 +++ b/src/gpu/hsail/vm/gpu_hsail.hpp Thu Jun 26 18:25:35 2014 +0200 @@ -28,6 +28,7 @@ #include "runtime/gpu.hpp" #include "utilities/exceptions.hpp" #include "graal/graalEnv.hpp" +#include "gpu_hsail_OopMapHelper.hpp" #include "gpu_hsail_Frame.hpp" #include "gpu_hsail_Tlab.hpp" @@ -101,9 +102,11 @@ jint _deopt_next_index; jint _num_slots; jint _deopt_span; + jint _deopt_work_index; // how far we are in processing the deopts HSAILTlabInfo** _cur_tlab_info; // copy of what was in the HSAILAllocationInfo, to avoid an extra indirection HSAILAllocationInfo* _alloc_info; char _ignore; + jobject _oop_map_array; // keep a pointer last so save area following it is word aligned jboolean* _never_ran_array; @@ -119,14 +122,16 @@ return (jbyte*) (this) + hdr_size(); } - inline HSAILDeoptimizationInfo(int numSlots, int bytesPerSaveArea, int dimX, HSAILAllocationInfo* allocInfo) { + inline HSAILDeoptimizationInfo(int numSlots, int bytesPerSaveArea, int dimX, HSAILAllocationInfo* allocInfo, jobject oop_map_array) { _notice_safepoints = &Hsail::_notice_safepoints; _deopt_occurred = 0; _deopt_next_index = 0; + _deopt_work_index = 0; _num_slots = numSlots; _never_ran_array = NEW_C_HEAP_ARRAY(jboolean, dimX, mtInternal); memset(_never_ran_array, 0, dimX * sizeof(jboolean)); _alloc_info = allocInfo; + _oop_map_array = oop_map_array; _deopt_span = sizeof(HSAILKernelDeoptimization) + sizeof(HSAILFrame) + bytesPerSaveArea; if (TraceGPUInteraction) { tty->print_cr("HSAILDeoptimizationInfo allocated, %d slots of size %d, total size = 0x%lx bytes", _num_slots, _deopt_span, (_num_slots * _deopt_span + sizeof(HSAILDeoptimizationInfo))); @@ -143,16 +148,20 @@ inline jint num_deopts() { return _deopt_next_index; } inline jboolean* never_ran_array() { return _never_ran_array; } inline jint num_slots() {return _num_slots;} + inline void set_deopt_work_index(int val) { _deopt_work_index = val; } + inline jint deopt_work_index() { return _deopt_work_index; } inline HSAILKernelDeoptimization* get_deopt_save_state(int slot) { // use _deopt_span to index into _deopt_states return (HSAILKernelDeoptimization*) (save_area_start() + _deopt_span * slot); } - void setCurTlabInfos(HSAILTlabInfo** ptlabInfos) { + void set_cur_tlabInfos(HSAILTlabInfo** ptlabInfos) { _cur_tlab_info = ptlabInfos; } + void oops_do(OopClosure* f); + void* operator new (size_t hdrSize, int numSlots, int bytesPerSaveArea) { assert(hdrSize <= hdr_size(), ""); size_t totalSizeBytes = hdr_size() + numSlots * (sizeof(HSAILKernelDeoptimization) + sizeof(HSAILFrame) + bytesPerSaveArea); @@ -175,10 +184,10 @@ JNIEXPORT static jlong generate_kernel(JNIEnv* env, jclass, jbyteArray code_handle, jstring name_handle); // static native boolean executeKernel0(HotSpotInstalledCode kernel, int jobSize, Object[] args); - JNIEXPORT static jboolean execute_kernel_void_1d(JNIEnv* env, jclass, jobject hotspotInstalledCode, jint dimX, jobject args, jobject oopsSave, + JNIEXPORT static jboolean execute_kernel_void_1d(JNIEnv* env, jclass, jobject hotspotInstalledCode, jint dimX, jobject args, jobject donorThreads, int allocBytesPerWorkitem, jobject oop_map_array); - static jboolean execute_kernel_void_1d_internal(address kernel, int dimX, jobject args, methodHandle& mh, nmethod* nm, jobject oopsSave, + static jboolean execute_kernel_void_1d_internal(address kernel, int dimX, jobject args, methodHandle& mh, nmethod* nm, jobject donorThreads, int allocBytesPerWorkitem, jobject oop_map_array, TRAPS); static void register_heap(); diff -r c6ebc1997a55 -r e9998e2be7f5 src/gpu/hsail/vm/gpu_hsail_Frame.hpp --- a/src/gpu/hsail/vm/gpu_hsail_Frame.hpp Thu Jun 26 13:42:29 2014 +0200 +++ b/src/gpu/hsail/vm/gpu_hsail_Frame.hpp Thu Jun 26 18:25:35 2014 +0200 @@ -37,36 +37,85 @@ jbyte _num_d_regs; jshort _num_stack_slots; + jbyte* data_start() {return (jbyte*) this + sizeof(*this); } + int sreg_ofst_start() { return 0; } + int dreg_ofst_start() { return sreg_ofst_start() + num_s_regs() * sizeof(jint); } + int stackslot_ofst_start() { return dreg_ofst_start() + num_d_regs() * sizeof(jlong); } + + int sreg_ofst(int idx) { + assert(idx >= 0 && idx < num_s_regs(), "bad sreg index"); + return sreg_ofst_start() + idx * sizeof(jint); + } + + int dreg_ofst(int idx) { + assert(idx >= 0 && idx < num_d_regs(), "bad dreg index"); + return dreg_ofst_start() + idx * sizeof(jlong); + } + + int stackslot_ofst(int stackOffset) { + assert(stackOffset >= 0 && (unsigned int) stackOffset < num_stack_slots() * sizeof(jlong), "bad stackoffset"); + return stackslot_ofst_start() + stackOffset; + } + + // the _ptr versions just return a pointer to the indicated d reg or stackslot64 + // some of these are used for oops_do processing + jint* get_s_reg_ptr(int idx) { + return((jint*) (data_start() + sreg_ofst(idx))); + } + + jlong* get_d_reg_ptr(int idx) { + return((jlong*) (data_start() + dreg_ofst(idx))); + } + + jlong* get_stackslot64_ptr(int stackOffset) { + return((jlong*) (data_start() + stackslot_ofst(stackOffset))); + } + + jint* get_stackslot32_ptr(int stackOffset) { + return((jint*) (data_start() + stackslot_ofst(stackOffset))); + } + + void* get_oop_ptr_for_bit(int bit) { + void* oop_ptr; + if (bit < num_d_regs()) { + // d register + oop_ptr = (void*) get_d_reg_ptr(bit); + } else { + // stack slot + int stackOffset = (bit - num_d_regs()) * 8; // 8 bytes per stack slot + oop_ptr = (void*) get_stackslot64_ptr(stackOffset); + } + return oop_ptr; + } + public: // Accessors jint pc_offset() { return _pc_offset; } jint num_s_regs() {return _num_s_regs; } jint num_d_regs() {return _num_d_regs; } jint num_stack_slots() {return _num_stack_slots; } - jbyte* data_start() {return (jbyte*) this + sizeof(*this); } - jlong get_d_reg(int idx) { - int ofst = num_s_regs() * 4 + idx * 8; - return(*(jlong*) (data_start() + ofst)); - } - jint get_s_reg(int idx) { - int ofst = idx * 4; - return(*(jint*) (data_start() + ofst)); + + jlong get_oop_for_bit(int bit) { + return * (jlong *) get_oop_ptr_for_bit(bit); } - void put_d_reg(int idx, jlong val) { - int ofst = num_s_regs() * 4 + idx * 8; - (*(jlong*) (data_start() + ofst)) = val; - } - jint get_stackslot32(int stackOffset) { - int ofst = num_s_regs() * 4 + num_d_regs() * 8 + stackOffset; - return(*(jint*) (data_start() + ofst)); - } - jlong get_stackslot64(int stackOffset) { - int ofst = num_s_regs() * 4 + num_d_regs() * 8 + stackOffset; - return(*(jlong*) (data_start() + ofst)); - } - void put_stackslot64(int stackOffset, jlong val) { - int ofst = num_s_regs() * 4 + num_d_regs() * 8 + stackOffset; - (*(jlong*) (data_start() + ofst)) = val; + + // do the oops from this frame + void oops_do(OopClosure* f, HSAILOopMapHelper* oopMapHelper) { + int oops_per_deopt = num_d_regs() + num_stack_slots(); + + // handle the dregister and stackSlot based oops + for (int bit = 0; bit < oops_per_deopt; bit++) { + if (oopMapHelper->is_oop(pc_offset(), bit)) { + void* oop_ptr = get_oop_ptr_for_bit(bit); + // the oops we are dealing with here in the hsailFrame are always uncompressed + oop old_oop = oopDesc::load_heap_oop((oop *)oop_ptr); + f->do_oop((oop*) oop_ptr); + if (TraceGPUInteraction) { + oop new_oop = oopDesc::load_heap_oop((oop *)oop_ptr); + tty->print_cr("bit=%d, oop_ptr=%p, old=%p, new=%p", bit, oop_ptr, (void *)old_oop, (void *)new_oop); + } + } + } } }; diff -r c6ebc1997a55 -r e9998e2be7f5 src/gpu/hsail/vm/gpu_hsail_OopMapHelper.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/gpu/hsail/vm/gpu_hsail_OopMapHelper.hpp Thu Jun 26 18:25:35 2014 +0200 @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef GPU_HSAIL_VM_GPU_HSAIL_OOPMAPHELPER_HPP +#define GPU_HSAIL_VM_GPU_HSAIL_OOPMAPHELPER_HPP + +#include "graal/graalEnv.hpp" +#include "code/debugInfo.hpp" +#include "code/location.hpp" + +// Takes the jobject for the array of ints created by the java side +// and decodes the information based on pc_offset to find oops +class HSAILOopMapHelper : public StackObj { +private: + jobject _oop_map_array_jobject; + typeArrayOop _oop_map_array; + int _last_pcoffset; + int _last_idx; + + enum { + SAVEAREACOUNTS_OFST=0, + SPAN_OFST=1, + HEADERSIZE=2 + }; + int mapPcOffsetToIndex(int pcOffset) { + if (pcOffset == _last_pcoffset) { + return _last_idx; + } + int span = _oop_map_array->int_at(SPAN_OFST); + for (int idx = HEADERSIZE; idx < _oop_map_array->length(); idx += span) { + int ofst = _oop_map_array->int_at(idx); + if (ofst == pcOffset) { + _last_pcoffset = pcOffset; + _last_idx = idx + 1; + return _last_idx; + } + } + ShouldNotReachHere(); + return -1; + } + +public: + HSAILOopMapHelper(jobject oop_map_array_jobject) { + _oop_map_array_jobject = oop_map_array_jobject; + _last_pcoffset = -1; + resolve_arrays(); + } + + void resolve_arrays() { + _oop_map_array = (typeArrayOop) JNIHandles::resolve(_oop_map_array_jobject); + } + + static int get_save_area_counts(jobject oop_map_array_jobject) { + typeArrayOop oop_map_array_resolved = (typeArrayOop) JNIHandles::resolve(oop_map_array_jobject); + return oop_map_array_resolved->int_at(SAVEAREACOUNTS_OFST); + } + + bool is_oop(int pcOffset, int bit){ + int bits_int_idx = mapPcOffsetToIndex(pcOffset) + (bit / 32); + int bitpos = bit % 32; + int bits = _oop_map_array->int_at(bits_int_idx); + return ((bits & (1 << bitpos)) != 0); + } + +}; + +#endif // GPU_HSAIL_VM_GPU_HSAIL_OOPMAPHELPER_HPP diff -r c6ebc1997a55 -r e9998e2be7f5 src/share/vm/classfile/javaClasses.cpp --- a/src/share/vm/classfile/javaClasses.cpp Thu Jun 26 13:42:29 2014 +0200 +++ b/src/share/vm/classfile/javaClasses.cpp Thu Jun 26 18:25:35 2014 +0200 @@ -1532,6 +1532,7 @@ return; } +#ifdef GRAAL // Check for gpu exception to add as top frame Method* gpu_method = thread->get_gpu_exception_method(); if (gpu_method != NULL) { @@ -1541,6 +1542,7 @@ thread->set_gpu_exception_bci(0); thread->set_gpu_exception_method(NULL); } +#endif // Instead of using vframe directly, this version of fill_in_stack_trace // basically handles everything by hand. This significantly improved the diff -r c6ebc1997a55 -r e9998e2be7f5 src/share/vm/runtime/thread.cpp --- a/src/share/vm/runtime/thread.cpp Thu Jun 26 13:42:29 2014 +0200 +++ b/src/share/vm/runtime/thread.cpp Thu Jun 26 18:25:35 2014 +0200 @@ -54,6 +54,9 @@ #include "runtime/fprofiler.hpp" #include "runtime/frame.inline.hpp" #include "runtime/gpu.hpp" +#ifdef GRAAL +# include "hsail/vm/gpu_hsail.hpp" +#endif #include "runtime/init.hpp" #include "runtime/interfaceSupport.hpp" #include "runtime/java.hpp" @@ -1467,8 +1470,11 @@ clear_must_deopt_id(); set_monitor_chunks(NULL); set_next(NULL); +#ifdef GRAAL set_gpu_exception_bci(0); set_gpu_exception_method(NULL); + set_gpu_hsail_deopt_info(NULL); +#endif set_thread_state(_thread_new); #if INCLUDE_NMT set_recorder(NULL); @@ -2853,6 +2859,13 @@ // a scan. cf->do_code_blob(_scanned_nmethod); } + +#ifdef GRAAL + Hsail::HSAILDeoptimizationInfo* gpu_hsail_deopt_info = (Hsail::HSAILDeoptimizationInfo*) get_gpu_hsail_deopt_info(); + if (gpu_hsail_deopt_info != NULL) { + gpu_hsail_deopt_info->oops_do(f); + } +#endif } void JavaThread::nmethods_do(CodeBlobClosure* cf) { diff -r c6ebc1997a55 -r e9998e2be7f5 src/share/vm/runtime/thread.hpp --- a/src/share/vm/runtime/thread.hpp Thu Jun 26 13:42:29 2014 +0200 +++ b/src/share/vm/runtime/thread.hpp Thu Jun 26 18:25:35 2014 +0200 @@ -944,15 +944,24 @@ volatile address _exception_handler_pc; // PC for handler of exception volatile int _is_method_handle_return; // true (== 1) if the current exception PC is a MethodHandle call site. +#ifdef GRAAL // Record the method and bci from a gpu kernel exception so // it can be added into the exception stack trace jint _gpu_exception_bci; Method* _gpu_exception_method; + // Record the hsailDeoptimization info so gc oops_do processing can find it + void* _gpu_hsail_deopt_info; +#endif + public: +#ifdef GRAAL void set_gpu_exception_bci(jint bci) { _gpu_exception_bci = bci; } jint get_gpu_exception_bci() { return _gpu_exception_bci; } void set_gpu_exception_method(Method* method) { _gpu_exception_method = method; } Method* get_gpu_exception_method() { return _gpu_exception_method; } + void set_gpu_hsail_deopt_info(void * deoptInfo) { _gpu_hsail_deopt_info = deoptInfo; } + void* get_gpu_hsail_deopt_info() { return _gpu_hsail_deopt_info; } +#endif private: // support for JNI critical regions