# HG changeset patch # User Doug Simon # Date 1399984515 -7200 # Node ID 66d31e70bd792f901cc6bedcf55b3317315e65bb # Parent 4e12cac4e51eb0bdd141a2f7777ea5e84a6c8c12 HSAIL: fixed deopt bug; cleaned up C++ code Contributed-by: Tom Deneau diff -r 4e12cac4e51e -r 66d31e70bd79 graal/com.oracle.graal.compiler.hsail.test.infra/src/com/oracle/graal/compiler/hsail/test/infra/GraalKernelTester.java --- a/graal/com.oracle.graal.compiler.hsail.test.infra/src/com/oracle/graal/compiler/hsail/test/infra/GraalKernelTester.java Tue May 13 11:55:11 2014 +0200 +++ b/graal/com.oracle.graal.compiler.hsail.test.infra/src/com/oracle/graal/compiler/hsail/test/infra/GraalKernelTester.java Tue May 13 14:35:15 2014 +0200 @@ -139,14 +139,6 @@ } /** - * Determines if the runtime supports {@link StackSlot}s in {@link DebugInfo} associated with - * HSAIL code. - */ - public boolean canHandleDeoptStackSlots() { - return false; - } - - /** * Determines if the runtime has the capabilities required by this test. */ protected boolean supportsRequiredCapabilities() { diff -r 4e12cac4e51e -r 66d31e70bd79 graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/ObjSpillDeoptBase.java --- a/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/ObjSpillDeoptBase.java Tue May 13 11:55:11 2014 +0200 +++ b/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/ObjSpillDeoptBase.java Tue May 13 14:35:15 2014 +0200 @@ -86,4 +86,8 @@ dispatchMethodKernel(getSize()); } + @Override + protected boolean supportsRequiredCapabilities() { + return canDeoptimize(); + } } diff -r 4e12cac4e51e -r 66d31e70bd79 graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/StaticDoubleSpillBoundsCatchOneTest.java --- a/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/StaticDoubleSpillBoundsCatchOneTest.java Tue May 13 11:55:11 2014 +0200 +++ b/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/StaticDoubleSpillBoundsCatchOneTest.java Tue May 13 14:35:15 2014 +0200 @@ -130,9 +130,13 @@ dispatchMethodKernel(size, out, in, aux); } + @Override + protected boolean supportsRequiredCapabilities() { + return canDeoptimize(); + } + @Test public void test() { testGeneratedHsail(); } - } diff -r 4e12cac4e51e -r 66d31e70bd79 graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/StaticDoubleSpillBoundsCatchTest.java --- a/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/StaticDoubleSpillBoundsCatchTest.java Tue May 13 11:55:11 2014 +0200 +++ b/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/StaticDoubleSpillBoundsCatchTest.java Tue May 13 14:35:15 2014 +0200 @@ -133,6 +133,11 @@ dispatchMethodKernel(size, out, in, aux); } + @Override + protected boolean supportsRequiredCapabilities() { + return canDeoptimize(); + } + @Test public void test() { testGeneratedHsail(); diff -r 4e12cac4e51e -r 66d31e70bd79 graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend.java --- a/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend.java Tue May 13 11:55:11 2014 +0200 +++ b/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend.java Tue May 13 14:35:15 2014 +0200 @@ -374,11 +374,16 @@ throw new GraalInternalError("Cannot execute GPU kernel if device is not initialized"); } int[] oopMapArray = ((HSAILHotSpotNmethod) kernel).getOopMapArray(); - int saveAreaCounts = OopMapArrayBuilder.getSaveAreaCounts(oopMapArray); - int numDRegs = (saveAreaCounts >> 8) & 0xff; - int numStackSlots = (saveAreaCounts >> 16); - // pessimistically assume that any of the DRegs or stackslots could be oops - Object[] oopsSaveArea = new Object[maxDeoptIndex * (numDRegs + numStackSlots)]; + Object[] oopsSaveArea; + if (getRuntime().getConfig().useHSAILDeoptimization) { + int saveAreaCounts = OopMapArrayBuilder.getSaveAreaCounts(oopMapArray); + int numDRegs = (saveAreaCounts >> 8) & 0xff; + int numStackSlots = (saveAreaCounts >> 16); + // pessimistically assume that any of the DRegs or stackslots could be oops + oopsSaveArea = new Object[maxDeoptIndex * (numDRegs + numStackSlots)]; + } else { + oopsSaveArea = null; + } return executeKernel0(kernel, jobSize, args, oopsSaveArea, donorThreadPool.get().getThreads(), HsailAllocBytesPerWorkitem.getValue(), oopMapArray); } @@ -765,7 +770,7 @@ // numStackSlots is the number of 8-byte locations used for stack variables int numStackSlots = (numStackSlotBytes + 7) / 8; - final int offsetToDeoptSaveStates = config.hsailSaveStatesOffset0; + final int offsetToDeoptSaveStates = config.hsailDeoptimizationInfoHeaderSize; final int bytesPerSaveArea = 4 * numSRegs + 8 * numDRegs + 8 * numStackSlots; final int sizeofKernelDeopt = config.hsailKernelDeoptimizationHeaderSize + config.hsailFrameHeaderSize + bytesPerSaveArea; final int offsetToNeverRanArray = config.hsailNeverRanArrayOffset; @@ -854,7 +859,7 @@ asm.emitComment("// store PC"); asm.emitStore(Kind.Int, codeBufferOffsetReg, pcStoreAddr); - asm.emitComment("// store regCounts (" + numSRegs + " $s registers and " + numDRegs + " $d registers" + numStackSlots + " stack slots)"); + asm.emitComment("// store regCounts (" + numSRegs + " $s registers, " + numDRegs + " $d registers, " + numStackSlots + " stack slots)"); asm.emitStore(Kind.Int, Constant.forInt(numSRegs + (numDRegs << 8) + (numStackSlots << 16)), regCountsAddr); // loop thru the usedValues storing each of the registers that are used. diff -r 4e12cac4e51e -r 66d31e70bd79 graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotVMConfig.java --- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotVMConfig.java Tue May 13 11:55:11 2014 +0200 +++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotVMConfig.java Tue May 13 14:35:15 2014 +0200 @@ -1035,7 +1035,7 @@ @HotSpotVMField(name = "HSAILFrame::_num_d_regs", type = "jbyte", get = HotSpotVMField.Type.OFFSET) @Stable public int hsailFrameNumDRegOffset; @HotSpotVMConstant(name = "sizeof(HSAILFrame)") @Stable public int hsailFrameHeaderSize; @HotSpotVMConstant(name = "sizeof(Hsail::HSAILKernelDeoptimization)") @Stable public int hsailKernelDeoptimizationHeaderSize; - @HotSpotVMField(name = "Hsail::HSAILDeoptimizationInfo::_deopt_save_states[0]", type = "Hsail::HSAILKernelDeoptimization", get = HotSpotVMField.Type.OFFSET) @Stable public int hsailSaveStatesOffset0; + @HotSpotVMConstant(name = "sizeof(Hsail::HSAILDeoptimizationInfo)") @Stable public int hsailDeoptimizationInfoHeaderSize; /** * Mark word right shift to get identity hash code. diff -r 4e12cac4e51e -r 66d31e70bd79 src/gpu/hsail/vm/gpu_hsail.cpp --- a/src/gpu/hsail/vm/gpu_hsail.cpp Tue May 13 11:55:11 2014 +0200 +++ b/src/gpu/hsail/vm/gpu_hsail.cpp Tue May 13 14:35:15 2014 +0200 @@ -392,19 +392,16 @@ // Reset the kernel arguments _okra_clearargs(kernel); - // get how many bytes per deopt save area are required - int saveAreaCounts = OopSaver::getSaveAreaCounts(oop_map_array); - int numSRegs = saveAreaCounts & 0xff; - int numDRegs = (saveAreaCounts >> 8) & 0xff; - int numStackSlots = (saveAreaCounts >> 16); - int bytesPerSaveArea = numSRegs * 4 + (numDRegs + numStackSlots) * 8; - HSAILDeoptimizationInfo* e; if (UseHSAILDeoptimization) { - e = new (MAX_DEOPT_SLOTS, bytesPerSaveArea) HSAILDeoptimizationInfo(MAX_DEOPT_SLOTS, bytesPerSaveArea); - e->set_never_ran_array(NEW_C_HEAP_ARRAY(jboolean, dimX, mtInternal)); - memset(e->never_ran_array(), 0, dimX * sizeof(jboolean)); - e->set_donor_threads(donorThreads); + // get how many bytes per deopt save area are required + int saveAreaCounts = OopSaver::getSaveAreaCounts(oop_map_array); + int numSRegs = saveAreaCounts & 0xff; + int numDRegs = (saveAreaCounts >> 8) & 0xff; + int numStackSlots = (saveAreaCounts >> 16); + int bytesPerSaveArea = numSRegs * 4 + (numDRegs + numStackSlots) * 8; + + e = new (MAX_DEOPT_SLOTS, bytesPerSaveArea) HSAILDeoptimizationInfo(MAX_DEOPT_SLOTS, bytesPerSaveArea, dimX, donorThreads); } // This object sets up the kernel arguments @@ -455,7 +452,6 @@ } if (UseHSAILDeoptimization) { - kernelStats.incDeopts(); // check if any workitem requested a deopt int deoptcode = e->deopt_occurred(); if (deoptcode != 1) { @@ -470,6 +466,7 @@ guarantee(deoptcode == 1, msg); } } else { + kernelStats.incDeopts(); { TraceTime t3("handle deoptimizing workitems", TraceGPUInteraction); @@ -586,7 +583,6 @@ } // end of never-ran handling } - FREE_C_HEAP_ARRAY(jboolean, e->never_ran_array(), mtInternal); delete e; } kernelStats.finishDispatch(); diff -r 4e12cac4e51e -r 66d31e70bd79 src/gpu/hsail/vm/gpu_hsail.hpp --- a/src/gpu/hsail/vm/gpu_hsail.hpp Tue May 13 11:55:11 2014 +0200 +++ b/src/gpu/hsail/vm/gpu_hsail.hpp Tue May 13 14:35:15 2014 +0200 @@ -37,7 +37,7 @@ private: // TODO: separate workitemid and actionAndReason out // since they are there only once even if there are multiple frames - // for now, though we only ever have one hsail fram + // for now, though we only ever have one hsail frame jint _workitemid; jint _actionAndReason; // the first (innermost) "hsail frame" starts after the above fields @@ -56,51 +56,63 @@ // TODO: query the device to get this number #define MAX_DEOPT_SLOTS (8 * 40 * 64) - class HSAILDeoptimizationInfo : public ResourceObj { + class HSAILDeoptimizationInfo : public CHeapObj { friend class VMStructs; private: jint* _notice_safepoints; jint _deopt_occurred; jint _deopt_next_index; JavaThread** _donor_threads; - jboolean * _never_ran_array; jint _num_slots; - jint _bytesPerSaveArea; jint _deopt_span; + char _ignore; + // keep a pointer last so save area following it is word aligned + jboolean * _never_ran_array; public: HSAILKernelDeoptimization _deopt_save_states[1]; // number and size of these can vary per kernel - inline HSAILDeoptimizationInfo(int numSlots, int bytesPerSaveArea) { + static inline size_t hdr_size() { + return sizeof(HSAILDeoptimizationInfo); + } + + inline jbyte * save_area_start() { + return (jbyte*) (this) + hdr_size(); + } + + inline HSAILDeoptimizationInfo(int numSlots, int bytesPerSaveArea, int dimX, JavaThread** donorThreads) { _notice_safepoints = &Hsail::_notice_safepoints; _deopt_occurred = 0; _deopt_next_index = 0; _num_slots = numSlots; - _bytesPerSaveArea = bytesPerSaveArea; + _never_ran_array = NEW_C_HEAP_ARRAY(jboolean, dimX, mtInternal); + memset(_never_ran_array, 0, dimX * sizeof(jboolean)); + _donor_threads = donorThreads; _deopt_span = sizeof(HSAILKernelDeoptimization) + sizeof(HSAILFrame) + bytesPerSaveArea; if (TraceGPUInteraction) { tty->print_cr("HSAILDeoptimizationInfo allocated, %d slots of size %d, total size = 0x%lx bytes", _num_slots, _deopt_span, (_num_slots * _deopt_span + sizeof(HSAILDeoptimizationInfo))); } } + inline ~HSAILDeoptimizationInfo() { + FREE_C_HEAP_ARRAY(jboolean, _never_ran_array, mtInternal); + } + inline jint deopt_occurred() { return _deopt_occurred; } inline jint num_deopts() { return _deopt_next_index; } inline jboolean *never_ran_array() { return _never_ran_array; } - inline void set_never_ran_array(jboolean *p) { _never_ran_array = p; } - inline void set_donor_threads(JavaThread **threads) { _donor_threads = threads; } inline jint num_slots() {return _num_slots;} inline HSAILKernelDeoptimization * get_deopt_save_state(int slot) { // use _deopt_span to index into _deopt_states - char *p = (char *) _deopt_save_states; - p += _deopt_span * slot; - return (HSAILKernelDeoptimization *) p; + return (HSAILKernelDeoptimization *) (save_area_start() + _deopt_span * slot); } void * operator new (size_t hdrSize, int numSlots, int bytesPerSaveArea) { - size_t totalSizeBytes = hdrSize + numSlots * (sizeof(HSAILKernelDeoptimization) + bytesPerSaveArea); + assert(hdrSize <= hdr_size(), ""); + size_t totalSizeBytes = hdr_size() + numSlots * (sizeof(HSAILKernelDeoptimization) + sizeof(HSAILFrame) + bytesPerSaveArea); return NEW_C_HEAP_ARRAY(char, totalSizeBytes, mtInternal); } @@ -109,7 +121,6 @@ } }; - private: static JNINativeMethod HSAIL_methods[]; diff -r 4e12cac4e51e -r 66d31e70bd79 src/gpu/hsail/vm/vmStructs_hsail.hpp --- a/src/gpu/hsail/vm/vmStructs_hsail.hpp Tue May 13 11:55:11 2014 +0200 +++ b/src/gpu/hsail/vm/vmStructs_hsail.hpp Tue May 13 14:35:15 2014 +0200 @@ -46,7 +46,6 @@ nonstatic_field(Hsail::HSAILDeoptimizationInfo, _deopt_next_index, jint) \ nonstatic_field(Hsail::HSAILDeoptimizationInfo, _donor_threads, JavaThread**) \ nonstatic_field(Hsail::HSAILDeoptimizationInfo, _never_ran_array, jboolean *) \ - nonstatic_field(Hsail::HSAILDeoptimizationInfo, _deopt_save_states[0], Hsail::HSAILKernelDeoptimization) \ #define VM_TYPES_GPU_HSAIL(declare_type, declare_toplevel_type) \ declare_toplevel_type(HSAILFrame) \ @@ -57,5 +56,6 @@ #define VM_INT_CONSTANTS_GPU_HSAIL(declare_constant) \ declare_constant(sizeof(HSAILFrame)) \ declare_constant(sizeof(Hsail::HSAILKernelDeoptimization)) \ + declare_constant(sizeof(Hsail::HSAILDeoptimizationInfo)) \ #endif // GPU_HSAIL_VM_VMSTRUCTS_HSAIL_HPP