diff src/gpu/hsail/vm/gpu_hsail.hpp @ 16076:06eedda53e14

HSAIL: add support to allocate new TLAB from GPU Contributed-by: Tom Deneau <tom.deneau@amd.com>
author Doug Simon <doug.simon@oracle.com>
date Tue, 10 Jun 2014 22:36:26 +0200
parents 66d31e70bd79
children 310994c667a7
line wrap: on
line diff
--- a/src/gpu/hsail/vm/gpu_hsail.hpp	Tue Jun 10 19:08:33 2014 +0200
+++ b/src/gpu/hsail/vm/gpu_hsail.hpp	Tue Jun 10 22:36:26 2014 +0200
@@ -22,12 +22,47 @@
  *
  */
 
-#ifndef GPU_HSAIL_HPP
-#define GPU_HSAIL_HPP
+#ifndef GPU_HSAIL_VM_GPU_HSAIL_HPP
+#define GPU_HSAIL_VM_GPU_HSAIL_HPP
 
 #include "utilities/exceptions.hpp"
 #include "graal/graalEnv.hpp"
 #include "gpu_hsail_Frame.hpp"
+#include "gpu_hsail_Tlab.hpp"
+
+struct HSAILKernelStats {
+  int _dispatches;
+  int _deopts;
+  int _overflows;
+  bool _changeSeen;
+  
+public:
+  HSAILKernelStats() {
+    _dispatches = _deopts = _overflows = 0;
+    _changeSeen = false;
+  }
+  
+  void incDeopts() {
+    _deopts++;
+    _changeSeen = true;
+  }
+  void incOverflows() {
+    _overflows++;
+    _changeSeen = true;
+  }
+  
+  void finishDispatch() {
+    _dispatches++;
+    if (_changeSeen) {
+      // print();
+      _changeSeen = false;
+    }
+  }
+  
+  void print() {
+    tty->print_cr("Disp=%d, Deopts=%d, Ovflows=%d", _dispatches, _deopts, _overflows);
+  }
+};
 
 class Hsail : public Gpu {
 
@@ -46,9 +81,9 @@
     inline jint workitem() { return _workitemid; }
     inline jint reason() { return _actionAndReason; }
     inline jint pc_offset() { return first_frame()->pc_offset(); }
-    inline HSAILFrame *first_frame() {
+    inline HSAILFrame* first_frame() {
       // starts after the "header" fields
-      return (HSAILFrame *) (((jbyte *) this) + sizeof(*this));
+      return (HSAILFrame*) (((jbyte*) this) + sizeof(*this));
     }
   };
 
@@ -56,38 +91,41 @@
 // TODO: query the device to get this number
 #define MAX_DEOPT_SLOTS    (8 * 40 * 64)
 
+
   class HSAILDeoptimizationInfo : public CHeapObj<mtInternal> {
     friend class VMStructs;
    private:
     jint* _notice_safepoints;
     jint _deopt_occurred;
     jint _deopt_next_index;
-    JavaThread** _donor_threads;
     jint _num_slots;
     jint _deopt_span;
+    HSAILTlabInfo** _cur_tlab_info;   // copy of what was in the HSAILAllocationInfo, to avoid an extra indirection
+    HSAILAllocationInfo* _alloc_info;
     char _ignore;
     // keep a pointer last so save area following it is word aligned
-    jboolean * _never_ran_array; 
+    jboolean* _never_ran_array; 
 
    public:
+    // static HSAILKernelStats kernelStats;
     HSAILKernelDeoptimization _deopt_save_states[1];  // number and size of these can vary per kernel
 
     static inline size_t hdr_size() {
       return sizeof(HSAILDeoptimizationInfo);
     }
 
-    inline jbyte * save_area_start() {
+    inline jbyte* save_area_start() {
       return (jbyte*) (this) + hdr_size();
     }
 
-    inline HSAILDeoptimizationInfo(int numSlots, int bytesPerSaveArea, int dimX, JavaThread** donorThreads) {
+    inline HSAILDeoptimizationInfo(int numSlots, int bytesPerSaveArea, int dimX, HSAILAllocationInfo* allocInfo) {
       _notice_safepoints = &Hsail::_notice_safepoints;
       _deopt_occurred = 0;
       _deopt_next_index = 0;
       _num_slots = numSlots;
       _never_ran_array = NEW_C_HEAP_ARRAY(jboolean, dimX, mtInternal);
       memset(_never_ran_array, 0, dimX * sizeof(jboolean));
-      _donor_threads = donorThreads;
+      _alloc_info = allocInfo;
       _deopt_span = sizeof(HSAILKernelDeoptimization) + sizeof(HSAILFrame) + bytesPerSaveArea;
       if (TraceGPUInteraction) {
         tty->print_cr("HSAILDeoptimizationInfo allocated, %d slots of size %d, total size = 0x%lx bytes", _num_slots, _deopt_span, (_num_slots * _deopt_span + sizeof(HSAILDeoptimizationInfo)));
@@ -102,21 +140,25 @@
       return _deopt_occurred;
     }
     inline jint num_deopts() { return _deopt_next_index; }
-    inline jboolean *never_ran_array() { return _never_ran_array; }
+    inline jboolean* never_ran_array() { return _never_ran_array; }
     inline jint num_slots() {return _num_slots;}
 
-    inline HSAILKernelDeoptimization * get_deopt_save_state(int slot) {
+    inline HSAILKernelDeoptimization* get_deopt_save_state(int slot) {
       // use _deopt_span to index into _deopt_states
-      return (HSAILKernelDeoptimization *) (save_area_start() + _deopt_span * slot);
+      return (HSAILKernelDeoptimization*) (save_area_start() + _deopt_span * slot);
     }
 
-    void * operator new (size_t hdrSize, int numSlots, int bytesPerSaveArea) {
+    void setCurTlabInfos(HSAILTlabInfo** ptlabInfos) {
+      _cur_tlab_info = ptlabInfos;
+    }
+
+    void* operator new (size_t hdrSize, int numSlots, int bytesPerSaveArea) {
       assert(hdrSize <= hdr_size(), "");
       size_t totalSizeBytes = hdr_size()  + numSlots * (sizeof(HSAILKernelDeoptimization) + sizeof(HSAILFrame) + bytesPerSaveArea);
       return NEW_C_HEAP_ARRAY(char, totalSizeBytes, mtInternal);
     }
 
-    void operator delete (void *ptr) {
+    void operator delete (void* ptr) {
       FREE_C_HEAP_ARRAY(char, ptr, mtInternal);
     }
   };
@@ -126,21 +168,16 @@
   static JNINativeMethod HSAIL_methods[];
 
   // static native boolean initialize();
-  JNIEXPORT static jboolean initialize(JNIEnv *env, jclass);
+  JNIEXPORT static jboolean initialize(JNIEnv* env, jclass);
 
   // static native long generateKernel(byte[] targetCode, String name);
-  JNIEXPORT static jlong generate_kernel(JNIEnv *env, jclass, jbyteArray code_handle, jstring name_handle);
+  JNIEXPORT static jlong generate_kernel(JNIEnv* env, jclass, jbyteArray code_handle, jstring name_handle);
 
   // static native boolean executeKernel0(HotSpotInstalledCode kernel, int jobSize, Object[] args);
-  JNIEXPORT static jboolean execute_kernel_void_1d(JNIEnv *env, jclass, jobject hotspotInstalledCode, jint dimX, jobject args, jobject oopsSave,
+  JNIEXPORT static jboolean execute_kernel_void_1d(JNIEnv* env, jclass, jobject hotspotInstalledCode, jint dimX, jobject args, jobject oopsSave,
                                                    jobject donorThreads, int allocBytesPerWorkitem, jobject oop_map_array);
 
-  // static native void getThreadPointers(Object[] donorThreads, long[] threadPointersOut);
-  JNIEXPORT static void get_thread_pointers(JNIEnv *env, jclass, jobject donor_threads_handle, jobject thread_ptrs_handle);
-
-  static void getNewTlabForDonorThread(ThreadLocalAllocBuffer* tlab, size_t tlabMinHsail);
-
-  static jboolean execute_kernel_void_1d_internal(address kernel, int dimX, jobject args, methodHandle& mh, nmethod *nm, jobject oopsSave,
+  static jboolean execute_kernel_void_1d_internal(address kernel, int dimX, jobject args, methodHandle& mh, nmethod* nm, jobject oopsSave,
                                                   jobject donorThreads, int allocBytesPerWorkitem, jobject oop_map_array, TRAPS);
 
   static void register_heap();
@@ -165,7 +202,7 @@
 
 private:
   typedef void* (*okra_create_context_func_t)();
-  typedef void* (*okra_create_kernel_func_t)(void*, unsigned char *, const char *);
+  typedef void* (*okra_create_kernel_func_t)(void*, unsigned char*, const char*);
   typedef bool (*okra_push_object_func_t)(void*, void*);
   typedef bool (*okra_push_boolean_func_t)(void*, jboolean);
   typedef bool (*okra_push_byte_func_t)(void*, jbyte);
@@ -197,4 +234,4 @@
   // true if safepoints are activated
   static jint _notice_safepoints;
 };
-#endif // GPU_HSAIL_HPP
+#endif // GPU_HSAIL_VM_GPU_HSAIL_HPP