comparison src/gpu/hsail/vm/gpu_hsail.cpp @ 16242:e9998e2be7f5

use oops_do to modify saved hsail state Contributed-by: Tom Deneau <tom.deneau@amd.com>
author Gilles Duboscq <duboscq@ssw.jku.at>
date Thu, 26 Jun 2014 18:25:35 +0200
parents 06eedda53e14
children d5c4bb0039d8
comparison
equal deleted inserted replaced
16241:c6ebc1997a55 16242:e9998e2be7f5
64 #define HS_NMETHOD "Lcom/oracle/graal/hotspot/meta/HotSpotNmethod;" 64 #define HS_NMETHOD "Lcom/oracle/graal/hotspot/meta/HotSpotNmethod;"
65 65
66 JNINativeMethod Hsail::HSAIL_methods[] = { 66 JNINativeMethod Hsail::HSAIL_methods[] = {
67 {CC"initialize", CC"()Z", FN_PTR(Hsail::initialize)}, 67 {CC"initialize", CC"()Z", FN_PTR(Hsail::initialize)},
68 {CC"generateKernel", CC"([B" STRING ")J", FN_PTR(Hsail::generate_kernel)}, 68 {CC"generateKernel", CC"([B" STRING ")J", FN_PTR(Hsail::generate_kernel)},
69 {CC"executeKernel0", CC"("HS_INSTALLED_CODE"I["OBJECT"["OBJECT"["JLTHREAD"I[I)Z", FN_PTR(Hsail::execute_kernel_void_1d)}, 69 {CC"executeKernel0", CC"("HS_INSTALLED_CODE"I["OBJECT"["JLTHREAD"I[I)Z", FN_PTR(Hsail::execute_kernel_void_1d)},
70 }; 70 };
71 71
72 void* Hsail::_device_context = NULL; 72 void* Hsail::_device_context = NULL;
73 jint Hsail::_notice_safepoints = false; 73 jint Hsail::_notice_safepoints = false;
74 74
106 tty->print_cr("[HSAIL] base=0x%08x, capacity=%ld", Universe::heap()->base(), Universe::heap()->capacity()); 106 tty->print_cr("[HSAIL] base=0x%08x, capacity=%ld", Universe::heap()->base(), Universe::heap()->capacity());
107 } 107 }
108 _okra_register_heap(Universe::heap()->base(), Universe::heap()->capacity()); 108 _okra_register_heap(Universe::heap()->base(), Universe::heap()->capacity());
109 } 109 }
110 110
111 GPU_VMENTRY(jboolean, Hsail::execute_kernel_void_1d, (JNIEnv* env, jclass, jobject kernel_handle, jint dimX, jobject args, jobject oops_save, 111 GPU_VMENTRY(jboolean, Hsail::execute_kernel_void_1d, (JNIEnv* env, jclass, jobject kernel_handle, jint dimX, jobject args,
112 jobject donor_threads, jint allocBytesPerWorkitem, jobject oop_map_array)) 112 jobject donor_threads, jint allocBytesPerWorkitem, jobject oop_map_array))
113 113
114 ResourceMark rm; 114 ResourceMark rm;
115 jlong nmethodValue = InstalledCode::address(kernel_handle); 115 jlong nmethodValue = InstalledCode::address(kernel_handle);
116 if (nmethodValue == 0) { 116 if (nmethodValue == 0) {
123 void* kernel = (void*) HotSpotInstalledCode::codeStart(kernel_handle); 123 void* kernel = (void*) HotSpotInstalledCode::codeStart(kernel_handle);
124 if (kernel == NULL) { 124 if (kernel == NULL) {
125 SharedRuntime::throw_and_post_jvmti_exception(JavaThread::current(), vmSymbols::com_oracle_graal_api_code_InvalidInstalledCodeException(), NULL); 125 SharedRuntime::throw_and_post_jvmti_exception(JavaThread::current(), vmSymbols::com_oracle_graal_api_code_InvalidInstalledCodeException(), NULL);
126 } 126 }
127 127
128 return execute_kernel_void_1d_internal((address) kernel, dimX, args, mh, nm, oops_save, donor_threads, allocBytesPerWorkitem, oop_map_array, CHECK_0); 128 return execute_kernel_void_1d_internal((address) kernel, dimX, args, mh, nm, donor_threads, allocBytesPerWorkitem, oop_map_array, CHECK_0);
129 GPU_END 129 GPU_END
130 130
131 static void showRanges(jboolean* a, int len) { 131 static void showRanges(jboolean* a, int len) {
132 // show ranges 132 // show ranges
133 bool lookFor = true; 133 bool lookFor = true;
143 if (lookFor == false) { 143 if (lookFor == false) {
144 tty->print_cr("-%d", len-1); 144 tty->print_cr("-%d", len-1);
145 } 145 }
146 } 146 }
147 147
148 class OopSaver : public StackObj { 148 jboolean Hsail::execute_kernel_void_1d_internal(address kernel, int dimX, jobject args, methodHandle& mh, nmethod* nm,
149 private:
150 objArrayOop _oopsSaveArray;
151 typeArrayOop _oopMapArray;
152 jobject _oops_save;
153 jobject _oop_map_array;
154 int _last_pcoffset;
155 int _last_idx;
156 int _saveAreaCounts;
157
158 enum {
159 SAVEAREACOUNTS_OFST=0,
160 SPAN_OFST=1,
161 HEADERSIZE=2
162 };
163 int mapPcOffsetToIndex(int pcOffset) {
164 if (pcOffset == _last_pcoffset) {
165 return _last_idx;
166 }
167 int span = _oopMapArray->int_at(SPAN_OFST);
168 for (int idx = HEADERSIZE; idx < _oopMapArray->length(); idx += span) {
169 int ofst = _oopMapArray->int_at(idx);
170 if (ofst == pcOffset) {
171 _last_pcoffset = pcOffset;
172 _last_idx = idx + 1;
173 return _last_idx;
174 }
175 }
176 ShouldNotReachHere();
177 return -1;
178 }
179
180 public:
181 OopSaver(jobject oops_save, jobject oop_map_array) {
182 _oops_save = oops_save;
183 _oop_map_array = oop_map_array;
184 _last_pcoffset = -1;
185 _saveAreaCounts = getSaveAreaCounts(oop_map_array);
186 resolveArrays();
187 }
188
189 void resolveArrays() {
190 _oopsSaveArray = (objArrayOop) JNIHandles::resolve(_oops_save);
191 _oopMapArray = (typeArrayOop) JNIHandles::resolve(_oop_map_array);
192 }
193
194 void* getOopForBit(HSAILFrame* hsailFrame, int bit) {
195 assert(isOop(hsailFrame, bit), "");
196 void* oop;
197 if (bit < hsailFrame->num_d_regs()) {
198 // d register
199 oop = (void*) hsailFrame->get_d_reg(bit);
200 } else {
201 // stack slot
202 int stackOffset = (bit - hsailFrame->num_d_regs()) * 8; // 8 bytes per stack slot
203 oop = (void*) hsailFrame->get_stackslot64(stackOffset);
204 }
205 return oop;
206 }
207
208 void putOopForBit(HSAILFrame* hsailFrame, int bit, void* oop) {
209 assert(isOop(hsailFrame, bit), "");
210 if (bit < hsailFrame->num_d_regs()) {
211 // d register
212 hsailFrame->put_d_reg(bit, (jlong) oop);
213 } else {
214 // stack slot
215 int stackOffset = (bit - hsailFrame->num_d_regs()) * 8; // 8 bytes per stack slot
216 hsailFrame->put_stackslot64(stackOffset, (jlong) oop);
217 }
218 }
219
220 void saveOopsFromFrame(HSAILFrame* hsailFrame, int deoptSlot){
221 // as used, no need to resolve arrays on each call
222 int oopsPerDeopt = hsailFrame->num_d_regs() + hsailFrame->num_stack_slots();
223
224 // handle the dregister and stackSlot based oops
225 for (int bit = 0; bit < oopsPerDeopt; bit++) {
226 if (isOop(hsailFrame, bit)) {
227 void* saved_oop = getOopForBit(hsailFrame, bit);
228 int saveArrayIndex = deoptSlot * oopsPerDeopt + bit;
229 _oopsSaveArray->obj_at_put(saveArrayIndex, (oop) saved_oop);
230 }
231 }
232 }
233
234 void restoreOopsToFrame(HSAILFrame* hsailFrame, int deoptSlot, int workitem){
235 // need to re-resolve on each restore
236 resolveArrays();
237 int oopsPerDeopt = hsailFrame->num_d_regs() + hsailFrame->num_stack_slots();
238
239 // handle the dregister and stackSlot based oops
240 for (int bit = 0; bit < oopsPerDeopt; bit++) {
241 if (isOop(hsailFrame, bit)) {
242 // the dregister or stack slot at this bit is an oop, retrieve it from array and put back in frame
243 int saveArrayIndex = deoptSlot * oopsPerDeopt + bit;
244 void* newValue = (void*) _oopsSaveArray->obj_at(saveArrayIndex);
245 void* oldValue = getOopForBit(hsailFrame, bit);
246 assert((oldValue != 0 ? newValue != 0 : newValue == 0), "bad dregValue retrieved");
247 if (newValue != oldValue) {
248 if (TraceGPUInteraction) {
249 int numDRegs = hsailFrame->num_d_regs();
250 const char* name = (bit < numDRegs ? "$d" : "stk");
251 int num = (bit < numDRegs ? bit : bit - numDRegs);
252 tty->print_cr("oop moved for %s%d, workitem %d, slot %d, old=%p, new=%p",
253 name, num, workitem, deoptSlot, oldValue, newValue);
254 }
255 putOopForBit(hsailFrame, bit, newValue);
256 }
257 }
258 }
259 }
260
261 bool isOop(HSAILFrame* hsailFrame, int bit){
262 // re-resolve on each access
263 resolveArrays();
264 if (bit > hsailFrame->num_d_regs() + hsailFrame->num_stack_slots()) {
265 return false;
266 }
267 int pcOffset = hsailFrame->pc_offset();
268 int bits_int_idx = mapPcOffsetToIndex(pcOffset) + (bit / 32);
269 int bitpos = bit % 32;
270 int bits = _oopMapArray->int_at(bits_int_idx);
271 return ((bits & (1 << bitpos)) != 0);
272 }
273
274 static int getSaveAreaCounts(jobject oopMapArrayObject) {
275 typeArrayOop oopMapArray = (typeArrayOop) JNIHandles::resolve(oopMapArrayObject);
276 return oopMapArray->int_at(SAVEAREACOUNTS_OFST);
277 }
278
279 };
280
281 jboolean Hsail::execute_kernel_void_1d_internal(address kernel, int dimX, jobject args, methodHandle& mh, nmethod* nm, jobject oops_save,
282 jobject donor_threads, int allocBytesPerWorkitem, jobject oop_map_array, TRAPS) { 149 jobject donor_threads, int allocBytesPerWorkitem, jobject oop_map_array, TRAPS) {
283 ResourceMark rm(THREAD); 150 ResourceMark rm(THREAD);
284 objArrayOop argsArray = (objArrayOop) JNIHandles::resolve(args); 151 objArrayOop argsArray = (objArrayOop) JNIHandles::resolve(args);
152 assert(THREAD->is_Java_thread(), "must be a JavaThread");
285 153
286 // We avoid HSAILAllocationInfo logic if kernel does not allocate 154 // We avoid HSAILAllocationInfo logic if kernel does not allocate
287 // in which case the donor_thread array passed in will be null 155 // in which case the donor_thread array passed in will be null
288 HSAILAllocationInfo* allocInfo = (donor_threads == NULL ? NULL : new HSAILAllocationInfo(donor_threads, dimX, allocBytesPerWorkitem)); 156 HSAILAllocationInfo* allocInfo = (donor_threads == NULL ? NULL : new HSAILAllocationInfo(donor_threads, dimX, allocBytesPerWorkitem));
289 157
290 // Reset the kernel arguments 158 // Reset the kernel arguments
291 _okra_clearargs(kernel); 159 _okra_clearargs(kernel);
292 160
161 JavaThread* thread = (JavaThread*)THREAD;
293 HSAILDeoptimizationInfo* e; 162 HSAILDeoptimizationInfo* e;
294 if (UseHSAILDeoptimization) { 163 if (UseHSAILDeoptimization) {
295 // get how many bytes per deopt save area are required 164 // get how many bytes per deopt save area are required
296 int saveAreaCounts = OopSaver::getSaveAreaCounts(oop_map_array); 165 int saveAreaCounts = HSAILOopMapHelper::get_save_area_counts(oop_map_array);
297 int numSRegs = saveAreaCounts & 0xff; 166 int numSRegs = saveAreaCounts & 0xff;
298 int numDRegs = (saveAreaCounts >> 8) & 0xff; 167 int numDRegs = (saveAreaCounts >> 8) & 0xff;
299 int numStackSlots = (saveAreaCounts >> 16); 168 int numStackSlots = (saveAreaCounts >> 16);
300 int bytesPerSaveArea = numSRegs * 4 + (numDRegs + numStackSlots) * 8; 169 int bytesPerSaveArea = numSRegs * 4 + (numDRegs + numStackSlots) * 8;
301 170
302 e = new (MAX_DEOPT_SLOTS, bytesPerSaveArea) HSAILDeoptimizationInfo(MAX_DEOPT_SLOTS, bytesPerSaveArea, dimX, allocInfo); 171 e = new (MAX_DEOPT_SLOTS, bytesPerSaveArea) HSAILDeoptimizationInfo(MAX_DEOPT_SLOTS, bytesPerSaveArea, dimX, allocInfo, oop_map_array);
303 // copy cur_tlab_infos 172 // copy cur_tlab_infos
304 if (allocInfo != NULL) { 173 if (allocInfo != NULL) {
305 e->setCurTlabInfos(allocInfo->getCurTlabInfos()); 174 e->set_cur_tlabInfos(allocInfo->getCurTlabInfos());
306 } 175 }
176 // set deopt info in thread so gc oops_do processing can find it
177 thread->set_gpu_hsail_deopt_info(e);
307 } 178 }
308 179
309 // This object sets up the kernel arguments 180 // This object sets up the kernel arguments
310 HSAILKernelArguments hka((address) kernel, mh->signature(), argsArray, mh->is_static(), e); 181 HSAILKernelArguments hka((address) kernel, mh->signature(), argsArray, mh->is_static(), e);
311 if (TraceGPUInteraction) { 182 if (TraceGPUInteraction) {
315 // If any object passed was null, throw an exception here. Doing this 186 // If any object passed was null, throw an exception here. Doing this
316 // means the kernel code can avoid null checks on the object parameters. 187 // means the kernel code can avoid null checks on the object parameters.
317 if (hka.getFirstNullParameterIndex() >= 0) { 188 if (hka.getFirstNullParameterIndex() >= 0) {
318 char buf[64]; 189 char buf[64];
319 sprintf(buf, "Null Kernel Parameter seen, Parameter Index: %d", hka.getFirstNullParameterIndex()); 190 sprintf(buf, "Null Kernel Parameter seen, Parameter Index: %d", hka.getFirstNullParameterIndex());
320 JavaThread* thread = (JavaThread*)THREAD;
321 thread->set_gpu_exception_bci(0); 191 thread->set_gpu_exception_bci(0);
322 thread->set_gpu_exception_method(mh()); 192 thread->set_gpu_exception_method(mh());
323 THROW_MSG_0(vmSymbols::java_lang_NullPointerException(), buf); 193 THROW_MSG_0(vmSymbols::java_lang_NullPointerException(), buf);
324 } 194 }
325 195
360 tty->print_cr("deopt happened."); 230 tty->print_cr("deopt happened.");
361 HSAILKernelDeoptimization* pdeopt = e->get_deopt_save_state(0); 231 HSAILKernelDeoptimization* pdeopt = e->get_deopt_save_state(0);
362 tty->print_cr("first deopter was workitem %d", pdeopt->workitem()); 232 tty->print_cr("first deopter was workitem %d", pdeopt->workitem());
363 } 233 }
364 234
365 // Before handling any deopting workitems, save the pointers from
366 // the hsail frames in oops_save so they get adjusted by any
367 // GC. Need to do this before leaving thread_in_vm mode.
368 OopSaver oopSaver(oops_save, oop_map_array);
369 // resolve handle only needed once here (not exiting vm mode)
370 oopSaver.resolveArrays();
371
372 // since slots are allocated from the beginning, we know how far to look
373 assert(e->num_deopts() < e->num_slots(), "deopt save state overflow");
374 for (int k = 0; k < e->num_deopts(); k++) {
375 HSAILKernelDeoptimization* pdeopt = e->get_deopt_save_state(k);
376 assert (pdeopt->workitem() >= 0, "bad workitem in deopt");
377 // this is a workitem that deopted
378 oopSaver.saveOopsFromFrame(pdeopt->first_frame(), k);
379 }
380
381 // Handle any deopting workitems. 235 // Handle any deopting workitems.
382 int count_deoptimized = 0; 236 int count_deoptimized = 0;
383 for (int k = 0; k < e->num_deopts(); k++) { 237 for (int k = 0; k < e->num_deopts(); k++) {
384 HSAILKernelDeoptimization* pdeopt = e->get_deopt_save_state(k); 238 HSAILKernelDeoptimization* pdeopt = e->get_deopt_save_state(k);
385 239
386 jint workitem = pdeopt->workitem(); 240 jint workitem = pdeopt->workitem();
387 if (workitem != -1) { 241 if (workitem != -1) {
388 int deoptId = pdeopt->pc_offset(); 242 int deoptId = pdeopt->pc_offset();
389 HSAILFrame* hsailFrame = pdeopt->first_frame(); 243 HSAILFrame* hsailFrame = pdeopt->first_frame();
390
391 // Update the hsailFrame from the oopsSaveArray
392 // will re-resolve the handles each time.
393 oopSaver.restoreOopsToFrame(hsailFrame, k, workitem);
394 244
395 JavaValue result(T_VOID); 245 JavaValue result(T_VOID);
396 JavaCallArguments javaArgs; 246 JavaCallArguments javaArgs;
397 javaArgs.set_alternative_target(nm); 247 javaArgs.set_alternative_target(nm);
398 javaArgs.push_int(deoptId); 248 javaArgs.push_int(deoptId);
405 javaArgs.push_oop((oop) NULL); 255 javaArgs.push_oop((oop) NULL);
406 if (TraceGPUInteraction) { 256 if (TraceGPUInteraction) {
407 tty->print_cr("[HSAIL] Deoptimizing to host for workitem=%d (slot=%d) with deoptId=%d, frame=" INTPTR_FORMAT ", actionAndReason=%d", workitem, k, deoptId, hsailFrame, myActionReason); 257 tty->print_cr("[HSAIL] Deoptimizing to host for workitem=%d (slot=%d) with deoptId=%d, frame=" INTPTR_FORMAT ", actionAndReason=%d", workitem, k, deoptId, hsailFrame, myActionReason);
408 // show the $d registers or stack slots containing references 258 // show the $d registers or stack slots containing references
409 int maxOopBits = hsailFrame->num_d_regs() + hsailFrame->num_stack_slots(); 259 int maxOopBits = hsailFrame->num_d_regs() + hsailFrame->num_stack_slots();
260 HSAILOopMapHelper oopMapHelper(oop_map_array);
261 int pc_offset = hsailFrame->pc_offset();
410 for (int bit = 0; bit < maxOopBits; bit++) { 262 for (int bit = 0; bit < maxOopBits; bit++) {
411 if (oopSaver.isOop(hsailFrame, bit)) { 263 if (oopMapHelper.is_oop(pc_offset, bit)) {
412 if (bit < hsailFrame->num_d_regs()) { 264 if (bit < hsailFrame->num_d_regs()) {
413 // show $d reg oop 265 // show $d reg oop
414 tty->print_cr(" oop $d%d = %p", bit, oopSaver.getOopForBit(hsailFrame, bit)); 266 tty->print_cr(" oop $d%d = %p", bit, hsailFrame->get_oop_for_bit(bit));
415 } else { 267 } else {
416 // show stack slot oop 268 // show stack slot oop
417 int stackOffset = (bit - hsailFrame->num_d_regs()) * 8; // 8 bytes per stack slot 269 int stackOffset = (bit - hsailFrame->num_d_regs()) * 8; // 8 bytes per stack slot
418 tty->print_cr(" oop stk:%d = %p", stackOffset, oopSaver.getOopForBit(hsailFrame, bit)); 270 tty->print_cr(" oop stk:%d = %p", stackOffset, hsailFrame->get_oop_for_bit(bit));
419 } 271 }
420 } 272 }
421 } 273 }
422 } 274 }
423 JavaCalls::call(&result, mh, &javaArgs, THREAD); 275 JavaCalls::call(&result, mh, &javaArgs, THREAD);
424 count_deoptimized++; 276 count_deoptimized++;
277 e->set_deopt_work_index(k + 1);
425 } 278 }
426 } 279 }
427 if (TraceGPUInteraction) { 280 if (TraceGPUInteraction) {
428 tty->print_cr("[HSAIL] Deoptimizing to host completed for %d workitems", count_deoptimized); 281 tty->print_cr("[HSAIL] Deoptimizing to host completed for %d workitems", count_deoptimized);
429 } 282 }
430 } 283 }
431 } 284 }
285 // when we are done with the deopts, we don't need to oops_do anything
286 // in the saved state anymore
287 thread->set_gpu_hsail_deopt_info(NULL);
432 288
433 // Handle any never_ran workitems if there were any 289 // Handle any never_ran workitems if there were any
434 { 290 {
435 TraceTime t("handle never-rans ", TraceGPUInteraction); 291 TraceTime t("handle never-rans ", TraceGPUInteraction);
436 int count_never_ran = 0; 292 int count_never_ran = 0;
593 } 449 }
594 return false; 450 return false;
595 } 451 }
596 return true; 452 return true;
597 } 453 }
454
455
456 void Hsail::HSAILDeoptimizationInfo::oops_do(OopClosure* f) {
457 int unprocessed_deopts = num_deopts() - deopt_work_index();
458 if (TraceGPUInteraction) {
459 tty->print_cr("HSAILDeoptimizationInfo::oops_do deopt_occurred=%d, total_deopts=%d, unprocessed_deopts=%d, oop_map_array=%p", _deopt_occurred, num_deopts(), unprocessed_deopts, _oop_map_array);
460 }
461 if (num_deopts() == 0 || unprocessed_deopts <= 0) {
462 return; // nothing to do
463 }
464 HSAILOopMapHelper oopMapHelper(_oop_map_array);
465 oopMapHelper.resolve_arrays(); // resolve once before processing
466
467 // go thru the unprocessed deopt frames, finding each oop and applying the closre
468 for (int k = deopt_work_index(); k < num_deopts(); k++) {
469 HSAILKernelDeoptimization* pdeopt = get_deopt_save_state(k);
470 assert (pdeopt->workitem() >= 0, "bad workitem in deopt");
471 if (TraceGPUInteraction) {
472 tty->print_cr(" deopt %d, workitem %d, pc %d", k, pdeopt->workitem(), pdeopt->pc_offset());
473 }
474 HSAILFrame* hsailFrame = pdeopt->first_frame();
475 hsailFrame->oops_do(f, &oopMapHelper);
476 }
477 }