comparison src/gpu/hsail/vm/gpu_hsail.hpp @ 16242:e9998e2be7f5

use oops_do to modify saved hsail state Contributed-by: Tom Deneau <tom.deneau@amd.com>
author Gilles Duboscq <duboscq@ssw.jku.at>
date Thu, 26 Jun 2014 18:25:35 +0200
parents 310994c667a7
children d5c4bb0039d8
comparison
equal deleted inserted replaced
16241:c6ebc1997a55 16242:e9998e2be7f5
26 #define GPU_HSAIL_VM_GPU_HSAIL_HPP 26 #define GPU_HSAIL_VM_GPU_HSAIL_HPP
27 27
28 #include "runtime/gpu.hpp" 28 #include "runtime/gpu.hpp"
29 #include "utilities/exceptions.hpp" 29 #include "utilities/exceptions.hpp"
30 #include "graal/graalEnv.hpp" 30 #include "graal/graalEnv.hpp"
31 #include "gpu_hsail_OopMapHelper.hpp"
31 #include "gpu_hsail_Frame.hpp" 32 #include "gpu_hsail_Frame.hpp"
32 #include "gpu_hsail_Tlab.hpp" 33 #include "gpu_hsail_Tlab.hpp"
33 34
34 struct HSAILKernelStats { 35 struct HSAILKernelStats {
35 int _dispatches; 36 int _dispatches;
99 jint* _notice_safepoints; 100 jint* _notice_safepoints;
100 jint _deopt_occurred; 101 jint _deopt_occurred;
101 jint _deopt_next_index; 102 jint _deopt_next_index;
102 jint _num_slots; 103 jint _num_slots;
103 jint _deopt_span; 104 jint _deopt_span;
105 jint _deopt_work_index; // how far we are in processing the deopts
104 HSAILTlabInfo** _cur_tlab_info; // copy of what was in the HSAILAllocationInfo, to avoid an extra indirection 106 HSAILTlabInfo** _cur_tlab_info; // copy of what was in the HSAILAllocationInfo, to avoid an extra indirection
105 HSAILAllocationInfo* _alloc_info; 107 HSAILAllocationInfo* _alloc_info;
106 char _ignore; 108 char _ignore;
109 jobject _oop_map_array;
107 // keep a pointer last so save area following it is word aligned 110 // keep a pointer last so save area following it is word aligned
108 jboolean* _never_ran_array; 111 jboolean* _never_ran_array;
109 112
110 public: 113 public:
111 // static HSAILKernelStats kernelStats; 114 // static HSAILKernelStats kernelStats;
117 120
118 inline jbyte* save_area_start() { 121 inline jbyte* save_area_start() {
119 return (jbyte*) (this) + hdr_size(); 122 return (jbyte*) (this) + hdr_size();
120 } 123 }
121 124
122 inline HSAILDeoptimizationInfo(int numSlots, int bytesPerSaveArea, int dimX, HSAILAllocationInfo* allocInfo) { 125 inline HSAILDeoptimizationInfo(int numSlots, int bytesPerSaveArea, int dimX, HSAILAllocationInfo* allocInfo, jobject oop_map_array) {
123 _notice_safepoints = &Hsail::_notice_safepoints; 126 _notice_safepoints = &Hsail::_notice_safepoints;
124 _deopt_occurred = 0; 127 _deopt_occurred = 0;
125 _deopt_next_index = 0; 128 _deopt_next_index = 0;
129 _deopt_work_index = 0;
126 _num_slots = numSlots; 130 _num_slots = numSlots;
127 _never_ran_array = NEW_C_HEAP_ARRAY(jboolean, dimX, mtInternal); 131 _never_ran_array = NEW_C_HEAP_ARRAY(jboolean, dimX, mtInternal);
128 memset(_never_ran_array, 0, dimX * sizeof(jboolean)); 132 memset(_never_ran_array, 0, dimX * sizeof(jboolean));
129 _alloc_info = allocInfo; 133 _alloc_info = allocInfo;
134 _oop_map_array = oop_map_array;
130 _deopt_span = sizeof(HSAILKernelDeoptimization) + sizeof(HSAILFrame) + bytesPerSaveArea; 135 _deopt_span = sizeof(HSAILKernelDeoptimization) + sizeof(HSAILFrame) + bytesPerSaveArea;
131 if (TraceGPUInteraction) { 136 if (TraceGPUInteraction) {
132 tty->print_cr("HSAILDeoptimizationInfo allocated, %d slots of size %d, total size = 0x%lx bytes", _num_slots, _deopt_span, (_num_slots * _deopt_span + sizeof(HSAILDeoptimizationInfo))); 137 tty->print_cr("HSAILDeoptimizationInfo allocated, %d slots of size %d, total size = 0x%lx bytes", _num_slots, _deopt_span, (_num_slots * _deopt_span + sizeof(HSAILDeoptimizationInfo)));
133 } 138 }
134 } 139 }
141 return _deopt_occurred; 146 return _deopt_occurred;
142 } 147 }
143 inline jint num_deopts() { return _deopt_next_index; } 148 inline jint num_deopts() { return _deopt_next_index; }
144 inline jboolean* never_ran_array() { return _never_ran_array; } 149 inline jboolean* never_ran_array() { return _never_ran_array; }
145 inline jint num_slots() {return _num_slots;} 150 inline jint num_slots() {return _num_slots;}
151 inline void set_deopt_work_index(int val) { _deopt_work_index = val; }
152 inline jint deopt_work_index() { return _deopt_work_index; }
146 153
147 inline HSAILKernelDeoptimization* get_deopt_save_state(int slot) { 154 inline HSAILKernelDeoptimization* get_deopt_save_state(int slot) {
148 // use _deopt_span to index into _deopt_states 155 // use _deopt_span to index into _deopt_states
149 return (HSAILKernelDeoptimization*) (save_area_start() + _deopt_span * slot); 156 return (HSAILKernelDeoptimization*) (save_area_start() + _deopt_span * slot);
150 } 157 }
151 158
152 void setCurTlabInfos(HSAILTlabInfo** ptlabInfos) { 159 void set_cur_tlabInfos(HSAILTlabInfo** ptlabInfos) {
153 _cur_tlab_info = ptlabInfos; 160 _cur_tlab_info = ptlabInfos;
154 } 161 }
162
163 void oops_do(OopClosure* f);
155 164
156 void* operator new (size_t hdrSize, int numSlots, int bytesPerSaveArea) { 165 void* operator new (size_t hdrSize, int numSlots, int bytesPerSaveArea) {
157 assert(hdrSize <= hdr_size(), ""); 166 assert(hdrSize <= hdr_size(), "");
158 size_t totalSizeBytes = hdr_size() + numSlots * (sizeof(HSAILKernelDeoptimization) + sizeof(HSAILFrame) + bytesPerSaveArea); 167 size_t totalSizeBytes = hdr_size() + numSlots * (sizeof(HSAILKernelDeoptimization) + sizeof(HSAILFrame) + bytesPerSaveArea);
159 return NEW_C_HEAP_ARRAY(char, totalSizeBytes, mtInternal); 168 return NEW_C_HEAP_ARRAY(char, totalSizeBytes, mtInternal);
173 182
174 // static native long generateKernel(byte[] targetCode, String name); 183 // static native long generateKernel(byte[] targetCode, String name);
175 JNIEXPORT static jlong generate_kernel(JNIEnv* env, jclass, jbyteArray code_handle, jstring name_handle); 184 JNIEXPORT static jlong generate_kernel(JNIEnv* env, jclass, jbyteArray code_handle, jstring name_handle);
176 185
177 // static native boolean executeKernel0(HotSpotInstalledCode kernel, int jobSize, Object[] args); 186 // static native boolean executeKernel0(HotSpotInstalledCode kernel, int jobSize, Object[] args);
178 JNIEXPORT static jboolean execute_kernel_void_1d(JNIEnv* env, jclass, jobject hotspotInstalledCode, jint dimX, jobject args, jobject oopsSave, 187 JNIEXPORT static jboolean execute_kernel_void_1d(JNIEnv* env, jclass, jobject hotspotInstalledCode, jint dimX, jobject args,
179 jobject donorThreads, int allocBytesPerWorkitem, jobject oop_map_array); 188 jobject donorThreads, int allocBytesPerWorkitem, jobject oop_map_array);
180 189
181 static jboolean execute_kernel_void_1d_internal(address kernel, int dimX, jobject args, methodHandle& mh, nmethod* nm, jobject oopsSave, 190 static jboolean execute_kernel_void_1d_internal(address kernel, int dimX, jobject args, methodHandle& mh, nmethod* nm,
182 jobject donorThreads, int allocBytesPerWorkitem, jobject oop_map_array, TRAPS); 191 jobject donorThreads, int allocBytesPerWorkitem, jobject oop_map_array, TRAPS);
183 192
184 static void register_heap(); 193 static void register_heap();
185 194
186 static GraalEnv::CodeInstallResult install_code(Handle& compiled_code, CodeBlob*& cb, Handle installed_code, Handle triggered_deoptimizations); 195 static GraalEnv::CodeInstallResult install_code(Handle& compiled_code, CodeBlob*& cb, Handle installed_code, Handle triggered_deoptimizations);