changeset 15609:66d31e70bd79

HSAIL: fixed deopt bug; cleaned up C++ code Contributed-by: Tom Deneau <tom.deneau@amd.com>
author Doug Simon <doug.simon@oracle.com>
date Tue, 13 May 2014 14:35:15 +0200
parents 4e12cac4e51e
children c44cf62d1c97 cf430b3e838b
files graal/com.oracle.graal.compiler.hsail.test.infra/src/com/oracle/graal/compiler/hsail/test/infra/GraalKernelTester.java graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/ObjSpillDeoptBase.java graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/StaticDoubleSpillBoundsCatchOneTest.java graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/StaticDoubleSpillBoundsCatchTest.java graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend.java graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotVMConfig.java src/gpu/hsail/vm/gpu_hsail.cpp src/gpu/hsail/vm/gpu_hsail.hpp src/gpu/hsail/vm/vmStructs_hsail.hpp
diffstat 9 files changed, 61 insertions(+), 44 deletions(-) [+]
line wrap: on
line diff
--- a/graal/com.oracle.graal.compiler.hsail.test.infra/src/com/oracle/graal/compiler/hsail/test/infra/GraalKernelTester.java	Tue May 13 11:55:11 2014 +0200
+++ b/graal/com.oracle.graal.compiler.hsail.test.infra/src/com/oracle/graal/compiler/hsail/test/infra/GraalKernelTester.java	Tue May 13 14:35:15 2014 +0200
@@ -139,14 +139,6 @@
     }
 
     /**
-     * Determines if the runtime supports {@link StackSlot}s in {@link DebugInfo} associated with
-     * HSAIL code.
-     */
-    public boolean canHandleDeoptStackSlots() {
-        return false;
-    }
-
-    /**
      * Determines if the runtime has the capabilities required by this test.
      */
     protected boolean supportsRequiredCapabilities() {
--- a/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/ObjSpillDeoptBase.java	Tue May 13 11:55:11 2014 +0200
+++ b/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/ObjSpillDeoptBase.java	Tue May 13 14:35:15 2014 +0200
@@ -86,4 +86,8 @@
         dispatchMethodKernel(getSize());
     }
 
+    @Override
+    protected boolean supportsRequiredCapabilities() {
+        return canDeoptimize();
+    }
 }
--- a/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/StaticDoubleSpillBoundsCatchOneTest.java	Tue May 13 11:55:11 2014 +0200
+++ b/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/StaticDoubleSpillBoundsCatchOneTest.java	Tue May 13 14:35:15 2014 +0200
@@ -130,9 +130,13 @@
         dispatchMethodKernel(size, out, in, aux);
     }
 
+    @Override
+    protected boolean supportsRequiredCapabilities() {
+        return canDeoptimize();
+    }
+
     @Test
     public void test() {
         testGeneratedHsail();
     }
-
 }
--- a/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/StaticDoubleSpillBoundsCatchTest.java	Tue May 13 11:55:11 2014 +0200
+++ b/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/StaticDoubleSpillBoundsCatchTest.java	Tue May 13 14:35:15 2014 +0200
@@ -133,6 +133,11 @@
         dispatchMethodKernel(size, out, in, aux);
     }
 
+    @Override
+    protected boolean supportsRequiredCapabilities() {
+        return canDeoptimize();
+    }
+
     @Test
     public void test() {
         testGeneratedHsail();
--- a/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend.java	Tue May 13 11:55:11 2014 +0200
+++ b/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend.java	Tue May 13 14:35:15 2014 +0200
@@ -374,11 +374,16 @@
             throw new GraalInternalError("Cannot execute GPU kernel if device is not initialized");
         }
         int[] oopMapArray = ((HSAILHotSpotNmethod) kernel).getOopMapArray();
-        int saveAreaCounts = OopMapArrayBuilder.getSaveAreaCounts(oopMapArray);
-        int numDRegs = (saveAreaCounts >> 8) & 0xff;
-        int numStackSlots = (saveAreaCounts >> 16);
-        // pessimistically assume that any of the DRegs or stackslots could be oops
-        Object[] oopsSaveArea = new Object[maxDeoptIndex * (numDRegs + numStackSlots)];
+        Object[] oopsSaveArea;
+        if (getRuntime().getConfig().useHSAILDeoptimization) {
+            int saveAreaCounts = OopMapArrayBuilder.getSaveAreaCounts(oopMapArray);
+            int numDRegs = (saveAreaCounts >> 8) & 0xff;
+            int numStackSlots = (saveAreaCounts >> 16);
+            // pessimistically assume that any of the DRegs or stackslots could be oops
+            oopsSaveArea = new Object[maxDeoptIndex * (numDRegs + numStackSlots)];
+        } else {
+            oopsSaveArea = null;
+        }
         return executeKernel0(kernel, jobSize, args, oopsSaveArea, donorThreadPool.get().getThreads(), HsailAllocBytesPerWorkitem.getValue(), oopMapArray);
     }
 
@@ -765,7 +770,7 @@
             // numStackSlots is the number of 8-byte locations used for stack variables
             int numStackSlots = (numStackSlotBytes + 7) / 8;
 
-            final int offsetToDeoptSaveStates = config.hsailSaveStatesOffset0;
+            final int offsetToDeoptSaveStates = config.hsailDeoptimizationInfoHeaderSize;
             final int bytesPerSaveArea = 4 * numSRegs + 8 * numDRegs + 8 * numStackSlots;
             final int sizeofKernelDeopt = config.hsailKernelDeoptimizationHeaderSize + config.hsailFrameHeaderSize + bytesPerSaveArea;
             final int offsetToNeverRanArray = config.hsailNeverRanArrayOffset;
@@ -854,7 +859,7 @@
             asm.emitComment("// store PC");
             asm.emitStore(Kind.Int, codeBufferOffsetReg, pcStoreAddr);
 
-            asm.emitComment("// store regCounts (" + numSRegs + " $s registers and " + numDRegs + " $d registers" + numStackSlots + " stack slots)");
+            asm.emitComment("// store regCounts (" + numSRegs + " $s registers, " + numDRegs + " $d registers, " + numStackSlots + " stack slots)");
             asm.emitStore(Kind.Int, Constant.forInt(numSRegs + (numDRegs << 8) + (numStackSlots << 16)), regCountsAddr);
 
             // loop thru the usedValues storing each of the registers that are used.
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotVMConfig.java	Tue May 13 11:55:11 2014 +0200
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotVMConfig.java	Tue May 13 14:35:15 2014 +0200
@@ -1035,7 +1035,7 @@
     @HotSpotVMField(name = "HSAILFrame::_num_d_regs", type = "jbyte", get = HotSpotVMField.Type.OFFSET) @Stable public int hsailFrameNumDRegOffset;
     @HotSpotVMConstant(name = "sizeof(HSAILFrame)") @Stable public int hsailFrameHeaderSize;
     @HotSpotVMConstant(name = "sizeof(Hsail::HSAILKernelDeoptimization)") @Stable public int hsailKernelDeoptimizationHeaderSize;
-    @HotSpotVMField(name = "Hsail::HSAILDeoptimizationInfo::_deopt_save_states[0]", type = "Hsail::HSAILKernelDeoptimization", get = HotSpotVMField.Type.OFFSET) @Stable public int hsailSaveStatesOffset0;
+    @HotSpotVMConstant(name = "sizeof(Hsail::HSAILDeoptimizationInfo)") @Stable public int hsailDeoptimizationInfoHeaderSize;
 
     /**
      * Mark word right shift to get identity hash code.
--- a/src/gpu/hsail/vm/gpu_hsail.cpp	Tue May 13 11:55:11 2014 +0200
+++ b/src/gpu/hsail/vm/gpu_hsail.cpp	Tue May 13 14:35:15 2014 +0200
@@ -392,19 +392,16 @@
   // Reset the kernel arguments
   _okra_clearargs(kernel);
 
-  // get how many bytes per deopt save area are required
-  int saveAreaCounts = OopSaver::getSaveAreaCounts(oop_map_array);
-  int numSRegs = saveAreaCounts & 0xff;
-  int numDRegs = (saveAreaCounts >> 8) & 0xff;
-  int numStackSlots = (saveAreaCounts >> 16);
-  int bytesPerSaveArea = numSRegs * 4 + (numDRegs + numStackSlots) * 8;
-
   HSAILDeoptimizationInfo* e;
   if (UseHSAILDeoptimization) {
-    e = new (MAX_DEOPT_SLOTS, bytesPerSaveArea) HSAILDeoptimizationInfo(MAX_DEOPT_SLOTS, bytesPerSaveArea);
-    e->set_never_ran_array(NEW_C_HEAP_ARRAY(jboolean, dimX, mtInternal));
-    memset(e->never_ran_array(), 0, dimX * sizeof(jboolean));
-    e->set_donor_threads(donorThreads);
+    // get how many bytes per deopt save area are required
+    int saveAreaCounts = OopSaver::getSaveAreaCounts(oop_map_array);
+    int numSRegs = saveAreaCounts & 0xff;
+    int numDRegs = (saveAreaCounts >> 8) & 0xff;
+    int numStackSlots = (saveAreaCounts >> 16);
+    int bytesPerSaveArea = numSRegs * 4 + (numDRegs + numStackSlots) * 8;
+
+    e = new (MAX_DEOPT_SLOTS, bytesPerSaveArea) HSAILDeoptimizationInfo(MAX_DEOPT_SLOTS, bytesPerSaveArea, dimX, donorThreads);
   }
 
   // This object sets up the kernel arguments
@@ -455,7 +452,6 @@
   }
 
   if (UseHSAILDeoptimization) {
-    kernelStats.incDeopts();
     // check if any workitem requested a deopt
     int deoptcode = e->deopt_occurred();
     if (deoptcode != 1) {
@@ -470,6 +466,7 @@
         guarantee(deoptcode == 1, msg);
       }
     } else {
+      kernelStats.incDeopts();
 
       {
         TraceTime t3("handle deoptimizing workitems", TraceGPUInteraction);
@@ -586,7 +583,6 @@
       } // end of never-ran handling
     }
 
-    FREE_C_HEAP_ARRAY(jboolean, e->never_ran_array(), mtInternal);
     delete e;
   }
   kernelStats.finishDispatch();
--- a/src/gpu/hsail/vm/gpu_hsail.hpp	Tue May 13 11:55:11 2014 +0200
+++ b/src/gpu/hsail/vm/gpu_hsail.hpp	Tue May 13 14:35:15 2014 +0200
@@ -37,7 +37,7 @@
    private:
     // TODO: separate workitemid and actionAndReason out
     // since they are there only once even if there are multiple frames
-    // for now, though we only ever have one hsail fram
+    // for now, though we only ever have one hsail frame
     jint  _workitemid;
     jint  _actionAndReason;
     // the first (innermost) "hsail frame" starts after the above fields
@@ -56,51 +56,63 @@
 // TODO: query the device to get this number
 #define MAX_DEOPT_SLOTS    (8 * 40 * 64)
 
-  class HSAILDeoptimizationInfo : public ResourceObj {
+  class HSAILDeoptimizationInfo : public CHeapObj<mtInternal> {
     friend class VMStructs;
    private:
     jint* _notice_safepoints;
     jint _deopt_occurred;
     jint _deopt_next_index;
     JavaThread** _donor_threads;
-    jboolean * _never_ran_array;
     jint _num_slots;
-    jint _bytesPerSaveArea;
     jint _deopt_span;
+    char _ignore;
+    // keep a pointer last so save area following it is word aligned
+    jboolean * _never_ran_array; 
 
    public:
     HSAILKernelDeoptimization _deopt_save_states[1];  // number and size of these can vary per kernel
 
-    inline HSAILDeoptimizationInfo(int numSlots, int bytesPerSaveArea) {
+    static inline size_t hdr_size() {
+      return sizeof(HSAILDeoptimizationInfo);
+    }
+
+    inline jbyte * save_area_start() {
+      return (jbyte*) (this) + hdr_size();
+    }
+
+    inline HSAILDeoptimizationInfo(int numSlots, int bytesPerSaveArea, int dimX, JavaThread** donorThreads) {
       _notice_safepoints = &Hsail::_notice_safepoints;
       _deopt_occurred = 0;
       _deopt_next_index = 0;
       _num_slots = numSlots;
-      _bytesPerSaveArea = bytesPerSaveArea;
+      _never_ran_array = NEW_C_HEAP_ARRAY(jboolean, dimX, mtInternal);
+      memset(_never_ran_array, 0, dimX * sizeof(jboolean));
+      _donor_threads = donorThreads;
       _deopt_span = sizeof(HSAILKernelDeoptimization) + sizeof(HSAILFrame) + bytesPerSaveArea;
       if (TraceGPUInteraction) {
         tty->print_cr("HSAILDeoptimizationInfo allocated, %d slots of size %d, total size = 0x%lx bytes", _num_slots, _deopt_span, (_num_slots * _deopt_span + sizeof(HSAILDeoptimizationInfo)));
       }
     }
 
+    inline ~HSAILDeoptimizationInfo() {
+      FREE_C_HEAP_ARRAY(jboolean, _never_ran_array, mtInternal);
+    }
+
     inline jint deopt_occurred() {
       return _deopt_occurred;
     }
     inline jint num_deopts() { return _deopt_next_index; }
     inline jboolean *never_ran_array() { return _never_ran_array; }
-    inline void  set_never_ran_array(jboolean *p) { _never_ran_array = p; }
-    inline void  set_donor_threads(JavaThread **threads) { _donor_threads = threads; }
     inline jint num_slots() {return _num_slots;}
 
     inline HSAILKernelDeoptimization * get_deopt_save_state(int slot) {
       // use _deopt_span to index into _deopt_states
-      char *p = (char *) _deopt_save_states;
-      p += _deopt_span * slot;
-      return (HSAILKernelDeoptimization *) p;
+      return (HSAILKernelDeoptimization *) (save_area_start() + _deopt_span * slot);
     }
 
     void * operator new (size_t hdrSize, int numSlots, int bytesPerSaveArea) {
-      size_t totalSizeBytes = hdrSize + numSlots * (sizeof(HSAILKernelDeoptimization) + bytesPerSaveArea);
+      assert(hdrSize <= hdr_size(), "");
+      size_t totalSizeBytes = hdr_size()  + numSlots * (sizeof(HSAILKernelDeoptimization) + sizeof(HSAILFrame) + bytesPerSaveArea);
       return NEW_C_HEAP_ARRAY(char, totalSizeBytes, mtInternal);
     }
 
@@ -109,7 +121,6 @@
     }
   };
 
-
 private:
 
   static JNINativeMethod HSAIL_methods[];
--- a/src/gpu/hsail/vm/vmStructs_hsail.hpp	Tue May 13 11:55:11 2014 +0200
+++ b/src/gpu/hsail/vm/vmStructs_hsail.hpp	Tue May 13 14:35:15 2014 +0200
@@ -46,7 +46,6 @@
   nonstatic_field(Hsail::HSAILDeoptimizationInfo, _deopt_next_index,                       jint)                                      \
   nonstatic_field(Hsail::HSAILDeoptimizationInfo, _donor_threads,                          JavaThread**)                              \
   nonstatic_field(Hsail::HSAILDeoptimizationInfo, _never_ran_array,                        jboolean *)                                \
-  nonstatic_field(Hsail::HSAILDeoptimizationInfo, _deopt_save_states[0],                   Hsail::HSAILKernelDeoptimization)          \
 
 #define VM_TYPES_GPU_HSAIL(declare_type, declare_toplevel_type)                 \
   declare_toplevel_type(HSAILFrame)                                  \
@@ -57,5 +56,6 @@
 #define VM_INT_CONSTANTS_GPU_HSAIL(declare_constant)                                                                                  \
   declare_constant(sizeof(HSAILFrame))                                                                                                \
   declare_constant(sizeof(Hsail::HSAILKernelDeoptimization))                                                                          \
+  declare_constant(sizeof(Hsail::HSAILDeoptimizationInfo))                                                                           \
 
 #endif // GPU_HSAIL_VM_VMSTRUCTS_HSAIL_HPP