comparison src/gpu/hsail/vm/gpu_hsail.cpp @ 16076:06eedda53e14

HSAIL: add support to allocate new TLAB from GPU Contributed-by: Tom Deneau <tom.deneau@amd.com>
author Doug Simon <doug.simon@oracle.com>
date Tue, 10 Jun 2014 22:36:26 +0200
parents 66a9286203a2
children e9998e2be7f5
comparison
equal deleted inserted replaced
16074:b6ab7e7fa0a5 16076:06eedda53e14
67 {CC"initialize", CC"()Z", FN_PTR(Hsail::initialize)}, 67 {CC"initialize", CC"()Z", FN_PTR(Hsail::initialize)},
68 {CC"generateKernel", CC"([B" STRING ")J", FN_PTR(Hsail::generate_kernel)}, 68 {CC"generateKernel", CC"([B" STRING ")J", FN_PTR(Hsail::generate_kernel)},
69 {CC"executeKernel0", CC"("HS_INSTALLED_CODE"I["OBJECT"["OBJECT"["JLTHREAD"I[I)Z", FN_PTR(Hsail::execute_kernel_void_1d)}, 69 {CC"executeKernel0", CC"("HS_INSTALLED_CODE"I["OBJECT"["OBJECT"["JLTHREAD"I[I)Z", FN_PTR(Hsail::execute_kernel_void_1d)},
70 }; 70 };
71 71
72 void * Hsail::_device_context = NULL; 72 void* Hsail::_device_context = NULL;
73 jint Hsail::_notice_safepoints = false; 73 jint Hsail::_notice_safepoints = false;
74 74
75 Hsail::okra_create_context_func_t Hsail::_okra_create_context; 75 Hsail::okra_create_context_func_t Hsail::_okra_create_context;
76 Hsail::okra_create_kernel_func_t Hsail::_okra_create_kernel; 76 Hsail::okra_create_kernel_func_t Hsail::_okra_create_kernel;
77 Hsail::okra_push_object_func_t Hsail::_okra_push_object; 77 Hsail::okra_push_object_func_t Hsail::_okra_push_object;
78 Hsail::okra_push_boolean_func_t Hsail::_okra_push_boolean; 78 Hsail::okra_push_boolean_func_t Hsail::_okra_push_boolean;
83 Hsail::okra_push_long_func_t Hsail::_okra_push_long; 83 Hsail::okra_push_long_func_t Hsail::_okra_push_long;
84 Hsail::okra_execute_with_range_func_t Hsail::_okra_execute_with_range; 84 Hsail::okra_execute_with_range_func_t Hsail::_okra_execute_with_range;
85 Hsail::okra_clearargs_func_t Hsail::_okra_clearargs; 85 Hsail::okra_clearargs_func_t Hsail::_okra_clearargs;
86 Hsail::okra_register_heap_func_t Hsail::_okra_register_heap; 86 Hsail::okra_register_heap_func_t Hsail::_okra_register_heap;
87 87
88 struct Stats {
89 int _dispatches;
90 int _deopts;
91 int _overflows;
92 bool _changeSeen;
93
94 public:
95 Stats() {
96 _dispatches = _deopts = _overflows = 0;
97 _changeSeen = false;
98 }
99
100 void incDeopts() {
101 _deopts++;
102 _changeSeen = true;
103 }
104 void incOverflows() {
105 _overflows++;
106 _changeSeen = true;
107 }
108
109 void finishDispatch() {
110 _dispatches++;
111 if (_changeSeen) {
112 // print();
113 _changeSeen = false;
114 }
115 }
116
117 void print() {
118 tty->print_cr("Disp=%d, Deopts=%d, Ovflows=%d", _dispatches, _deopts, _overflows);
119 }
120
121 };
122
123 static Stats kernelStats;
124
125 //static jint in_kernel = 0; 88 //static jint in_kernel = 0;
126 89
127 void Hsail::notice_safepoints() { 90 void Hsail::notice_safepoints() {
128 _notice_safepoints = true; 91 _notice_safepoints = true;
129 // if (TraceGPUInteraction) { 92 // if (TraceGPUInteraction) {
163 } 126 }
164 127
165 return execute_kernel_void_1d_internal((address) kernel, dimX, args, mh, nm, oops_save, donor_threads, allocBytesPerWorkitem, oop_map_array, CHECK_0); 128 return execute_kernel_void_1d_internal((address) kernel, dimX, args, mh, nm, oops_save, donor_threads, allocBytesPerWorkitem, oop_map_array, CHECK_0);
166 GPU_END 129 GPU_END
167 130
168 static void showRanges(jboolean *a, int len) { 131 static void showRanges(jboolean* a, int len) {
169 // show ranges 132 // show ranges
170 bool lookFor = true; 133 bool lookFor = true;
171 for (int i = 0; i < len; i++) { 134 for (int i = 0; i < len; i++) {
172 if ((lookFor == true) && (a[i] != 0)) { 135 if ((lookFor == true) && (a[i] != 0)) {
173 tty->print("%d", i); 136 tty->print("%d", i);
180 if (lookFor == false) { 143 if (lookFor == false) {
181 tty->print_cr("-%d", len-1); 144 tty->print_cr("-%d", len-1);
182 } 145 }
183 } 146 }
184 147
185 // fill and retire old tlab and get a new one
186 // if we can't get one, no problem someone will eventually do a gc
187 void Hsail::getNewTlabForDonorThread(ThreadLocalAllocBuffer* tlab, size_t tlabMinHsail) {
188 tlab->clear_before_allocation(); // fill and retire old tlab (will also check for null)
189
190 // get a size for a new tlab that is at least tlabMinHsail.
191 size_t new_tlab_size = tlab->compute_size(tlabMinHsail);
192 if (new_tlab_size == 0) return;
193
194 HeapWord* tlab_start = Universe::heap()->allocate_new_tlab(new_tlab_size);
195 if (tlab_start == NULL) return;
196
197 // ..and clear it if required
198 if (ZeroTLAB) {
199 Copy::zero_to_words(tlab_start, new_tlab_size);
200 }
201 // and init the tlab pointers
202 tlab->fill(tlab_start, tlab_start, new_tlab_size);
203 }
204
205 static void printTlabInfo (ThreadLocalAllocBuffer* tlab) {
206 HeapWord *start = tlab->start();
207 HeapWord *top = tlab->top();
208 HeapWord *end = tlab->end();
209 // sizes are in bytes
210 size_t tlabFree = tlab->free() * HeapWordSize;
211 size_t tlabUsed = tlab->used() * HeapWordSize;
212 size_t tlabSize = tlabFree + tlabUsed;
213 double freePct = 100.0 * (double) tlabFree/(double) tlabSize;
214 tty->print_cr("(%p, %p, %p), siz=%ld, free=%ld (%f%%)", start, top, end, tlabSize, tlabFree, freePct);
215 }
216
217 class OopSaver : public StackObj { 148 class OopSaver : public StackObj {
218 private: 149 private:
219 objArrayOop _oopsSaveArray; 150 objArrayOop _oopsSaveArray;
220 typeArrayOop _oopMapArray; 151 typeArrayOop _oopMapArray;
221 jobject _oops_save; 152 jobject _oops_save;
258 void resolveArrays() { 189 void resolveArrays() {
259 _oopsSaveArray = (objArrayOop) JNIHandles::resolve(_oops_save); 190 _oopsSaveArray = (objArrayOop) JNIHandles::resolve(_oops_save);
260 _oopMapArray = (typeArrayOop) JNIHandles::resolve(_oop_map_array); 191 _oopMapArray = (typeArrayOop) JNIHandles::resolve(_oop_map_array);
261 } 192 }
262 193
263 void * getOopForBit(HSAILFrame * hsailFrame, int bit) { 194 void* getOopForBit(HSAILFrame* hsailFrame, int bit) {
264 assert(isOop(hsailFrame, bit), ""); 195 assert(isOop(hsailFrame, bit), "");
265 void *oop; 196 void* oop;
266 if (bit < hsailFrame->num_d_regs()) { 197 if (bit < hsailFrame->num_d_regs()) {
267 // d register 198 // d register
268 oop = (void*) hsailFrame->get_d_reg(bit); 199 oop = (void*) hsailFrame->get_d_reg(bit);
269 } else { 200 } else {
270 // stack slot 201 // stack slot
271 int stackOffset = (bit - hsailFrame->num_d_regs()) * 8; // 8 bytes per stack slot 202 int stackOffset = (bit - hsailFrame->num_d_regs()) * 8; // 8 bytes per stack slot
272 oop = (void *) hsailFrame->get_stackslot64(stackOffset); 203 oop = (void*) hsailFrame->get_stackslot64(stackOffset);
273 } 204 }
274 return oop; 205 return oop;
275 } 206 }
276 207
277 void putOopForBit(HSAILFrame * hsailFrame, int bit, void *oop) { 208 void putOopForBit(HSAILFrame* hsailFrame, int bit, void* oop) {
278 assert(isOop(hsailFrame, bit), ""); 209 assert(isOop(hsailFrame, bit), "");
279 if (bit < hsailFrame->num_d_regs()) { 210 if (bit < hsailFrame->num_d_regs()) {
280 // d register 211 // d register
281 hsailFrame->put_d_reg(bit, (jlong) oop); 212 hsailFrame->put_d_reg(bit, (jlong) oop);
282 } else { 213 } else {
284 int stackOffset = (bit - hsailFrame->num_d_regs()) * 8; // 8 bytes per stack slot 215 int stackOffset = (bit - hsailFrame->num_d_regs()) * 8; // 8 bytes per stack slot
285 hsailFrame->put_stackslot64(stackOffset, (jlong) oop); 216 hsailFrame->put_stackslot64(stackOffset, (jlong) oop);
286 } 217 }
287 } 218 }
288 219
289 void saveOopsFromFrame(HSAILFrame * hsailFrame, int deoptSlot){ 220 void saveOopsFromFrame(HSAILFrame* hsailFrame, int deoptSlot){
290 // as used, no need to resolve arrays on each call 221 // as used, no need to resolve arrays on each call
291 int oopsPerDeopt = hsailFrame->num_d_regs() + hsailFrame->num_stack_slots(); 222 int oopsPerDeopt = hsailFrame->num_d_regs() + hsailFrame->num_stack_slots();
292 223
293 // handle the dregister and stackSlot based oops 224 // handle the dregister and stackSlot based oops
294 for (int bit = 0; bit < oopsPerDeopt; bit++) { 225 for (int bit = 0; bit < oopsPerDeopt; bit++) {
298 _oopsSaveArray->obj_at_put(saveArrayIndex, (oop) saved_oop); 229 _oopsSaveArray->obj_at_put(saveArrayIndex, (oop) saved_oop);
299 } 230 }
300 } 231 }
301 } 232 }
302 233
303 void restoreOopsToFrame(HSAILFrame * hsailFrame, int deoptSlot, int workitem){ 234 void restoreOopsToFrame(HSAILFrame* hsailFrame, int deoptSlot, int workitem){
304 // need to re-resolve on each restore 235 // need to re-resolve on each restore
305 resolveArrays(); 236 resolveArrays();
306 int oopsPerDeopt = hsailFrame->num_d_regs() + hsailFrame->num_stack_slots(); 237 int oopsPerDeopt = hsailFrame->num_d_regs() + hsailFrame->num_stack_slots();
307 238
308 // handle the dregister and stackSlot based oops 239 // handle the dregister and stackSlot based oops
309 for (int bit = 0; bit < oopsPerDeopt; bit++) { 240 for (int bit = 0; bit < oopsPerDeopt; bit++) {
310 if (isOop(hsailFrame, bit)) { 241 if (isOop(hsailFrame, bit)) {
311 // the dregister or stack slot at this bit is an oop, retrieve it from array and put back in frame 242 // the dregister or stack slot at this bit is an oop, retrieve it from array and put back in frame
312 int saveArrayIndex = deoptSlot * oopsPerDeopt + bit; 243 int saveArrayIndex = deoptSlot * oopsPerDeopt + bit;
313 void * newValue = (void *) _oopsSaveArray->obj_at(saveArrayIndex); 244 void* newValue = (void*) _oopsSaveArray->obj_at(saveArrayIndex);
314 void * oldValue = getOopForBit(hsailFrame, bit); 245 void* oldValue = getOopForBit(hsailFrame, bit);
315 assert((oldValue != 0 ? newValue != 0 : newValue == 0), "bad dregValue retrieved"); 246 assert((oldValue != 0 ? newValue != 0 : newValue == 0), "bad dregValue retrieved");
316 if (newValue != oldValue) { 247 if (newValue != oldValue) {
317 if (TraceGPUInteraction) { 248 if (TraceGPUInteraction) {
318 int numDRegs = hsailFrame->num_d_regs(); 249 int numDRegs = hsailFrame->num_d_regs();
319 const char *name = (bit < numDRegs ? "$d" : "stk"); 250 const char* name = (bit < numDRegs ? "$d" : "stk");
320 int num = (bit < numDRegs ? bit : bit - numDRegs); 251 int num = (bit < numDRegs ? bit : bit - numDRegs);
321 tty->print_cr("oop moved for %s%d, workitem %d, slot %d, old=%p, new=%p", 252 tty->print_cr("oop moved for %s%d, workitem %d, slot %d, old=%p, new=%p",
322 name, num, workitem, deoptSlot, oldValue, newValue); 253 name, num, workitem, deoptSlot, oldValue, newValue);
323 } 254 }
324 putOopForBit(hsailFrame, bit, newValue); 255 putOopForBit(hsailFrame, bit, newValue);
325 } 256 }
326 } 257 }
327 } 258 }
328 } 259 }
329 260
330 bool isOop(HSAILFrame * hsailFrame, int bit){ 261 bool isOop(HSAILFrame* hsailFrame, int bit){
331 // re-resolve on each access 262 // re-resolve on each access
332 resolveArrays(); 263 resolveArrays();
333 if (bit > hsailFrame->num_d_regs() + hsailFrame->num_stack_slots()) { 264 if (bit > hsailFrame->num_d_regs() + hsailFrame->num_stack_slots()) {
334 return false; 265 return false;
335 } 266 }
345 return oopMapArray->int_at(SAVEAREACOUNTS_OFST); 276 return oopMapArray->int_at(SAVEAREACOUNTS_OFST);
346 } 277 }
347 278
348 }; 279 };
349 280
350 jboolean Hsail::execute_kernel_void_1d_internal(address kernel, int dimX, jobject args, methodHandle& mh, nmethod *nm, jobject oops_save, 281 jboolean Hsail::execute_kernel_void_1d_internal(address kernel, int dimX, jobject args, methodHandle& mh, nmethod* nm, jobject oops_save,
351 jobject donor_threads, int allocBytesPerWorkitem, jobject oop_map_array, TRAPS) { 282 jobject donor_threads, int allocBytesPerWorkitem, jobject oop_map_array, TRAPS) {
352 ResourceMark rm(THREAD); 283 ResourceMark rm(THREAD);
353 objArrayOop argsArray = (objArrayOop) JNIHandles::resolve(args); 284 objArrayOop argsArray = (objArrayOop) JNIHandles::resolve(args);
354 285
355 // TODO: avoid donor thread logic if kernel does not allocate 286 // We avoid HSAILAllocationInfo logic if kernel does not allocate
356 objArrayOop donorThreadObjects = (objArrayOop) JNIHandles::resolve(donor_threads); 287 // in which case the donor_thread array passed in will be null
357 int numDonorThreads = donorThreadObjects->length(); 288 HSAILAllocationInfo* allocInfo = (donor_threads == NULL ? NULL : new HSAILAllocationInfo(donor_threads, dimX, allocBytesPerWorkitem));
358 guarantee(numDonorThreads > 0, "need at least one donor thread"); 289
359 JavaThread** donorThreads = NEW_RESOURCE_ARRAY(JavaThread*, numDonorThreads);
360 for (int i = 0; i < numDonorThreads; i++) {
361 donorThreads[i] = java_lang_Thread::thread(donorThreadObjects->obj_at(i));
362 }
363
364
365 // compute tlabMinHsail based on number of workitems, number of donor
366 // threads, allocBytesPerWorkitem rounded up
367 size_t tlabMinHsail = (allocBytesPerWorkitem * dimX + (numDonorThreads - 1)) / numDonorThreads;
368 if (TraceGPUInteraction) {
369 tty->print_cr("computed tlabMinHsail = %d", tlabMinHsail);
370 }
371
372 for (int i = 0; i < numDonorThreads; i++) {
373 JavaThread* donorThread = donorThreads[i];
374 ThreadLocalAllocBuffer* tlab = &donorThread->tlab();
375 if (TraceGPUInteraction) {
376 tty->print("donorThread %d, is %p, tlab at %p -> ", i, donorThread, tlab);
377 printTlabInfo(tlab);
378 }
379
380 // note: this used vs. free limit checking should be based on some
381 // heuristic where we see how much this kernel tends to allocate
382 if ((tlab->end() == NULL) || (tlab->free() * HeapWordSize < tlabMinHsail)) {
383 getNewTlabForDonorThread(tlab, tlabMinHsail);
384 if (TraceGPUInteraction) {
385 tty->print("donorThread %d, refilled tlab, -> ", i);
386 printTlabInfo(tlab);
387 }
388 }
389 }
390
391 // Reset the kernel arguments 290 // Reset the kernel arguments
392 _okra_clearargs(kernel); 291 _okra_clearargs(kernel);
393 292
394 HSAILDeoptimizationInfo* e; 293 HSAILDeoptimizationInfo* e;
395 if (UseHSAILDeoptimization) { 294 if (UseHSAILDeoptimization) {
398 int numSRegs = saveAreaCounts & 0xff; 297 int numSRegs = saveAreaCounts & 0xff;
399 int numDRegs = (saveAreaCounts >> 8) & 0xff; 298 int numDRegs = (saveAreaCounts >> 8) & 0xff;
400 int numStackSlots = (saveAreaCounts >> 16); 299 int numStackSlots = (saveAreaCounts >> 16);
401 int bytesPerSaveArea = numSRegs * 4 + (numDRegs + numStackSlots) * 8; 300 int bytesPerSaveArea = numSRegs * 4 + (numDRegs + numStackSlots) * 8;
402 301
403 e = new (MAX_DEOPT_SLOTS, bytesPerSaveArea) HSAILDeoptimizationInfo(MAX_DEOPT_SLOTS, bytesPerSaveArea, dimX, donorThreads); 302 e = new (MAX_DEOPT_SLOTS, bytesPerSaveArea) HSAILDeoptimizationInfo(MAX_DEOPT_SLOTS, bytesPerSaveArea, dimX, allocInfo);
303 // copy cur_tlab_infos
304 if (allocInfo != NULL) {
305 e->setCurTlabInfos(allocInfo->getCurTlabInfos());
306 }
404 } 307 }
405 308
406 // This object sets up the kernel arguments 309 // This object sets up the kernel arguments
407 HSAILKernelArguments hka((address) kernel, mh->signature(), argsArray, mh->is_static(), e); 310 HSAILKernelArguments hka((address) kernel, mh->signature(), argsArray, mh->is_static(), e);
408 if (TraceGPUInteraction) { 311 if (TraceGPUInteraction) {
409 tty->print_cr("[HSAIL] range=%d", dimX); 312 tty->print_cr("[HSAIL] range=%d", dimX);
410 } 313 }
411 314
412 // if any object passed was null, throw an exception here 315 // If any object passed was null, throw an exception here. Doing this
413 // doing this means the kernel code can avoid null checks on the object parameters. 316 // means the kernel code can avoid null checks on the object parameters.
414 if (hka.getFirstNullParameterIndex() >= 0) { 317 if (hka.getFirstNullParameterIndex() >= 0) {
415 char buf[64]; 318 char buf[64];
416 sprintf(buf, "Null Kernel Parameter seen, Parameter Index: %d", hka.getFirstNullParameterIndex()); 319 sprintf(buf, "Null Kernel Parameter seen, Parameter Index: %d", hka.getFirstNullParameterIndex());
417 JavaThread* thread = (JavaThread*)THREAD; 320 JavaThread* thread = (JavaThread*)THREAD;
418 thread->set_gpu_exception_bci(0); 321 thread->set_gpu_exception_bci(0);
429 // Run the kernel 332 // Run the kernel
430 success = _okra_execute_with_range(kernel, dimX); 333 success = _okra_execute_with_range(kernel, dimX);
431 //in_kernel = 0; 334 //in_kernel = 0;
432 } 335 }
433 336
434 // fix up any tlab tops that overflowed 337 // avoid HSAILAllocationInfo logic if kernel does not allocate
435 bool anyOverflows = false; 338 if (allocInfo != NULL) {
436 for (int i = 0; i < numDonorThreads; i++) { 339 allocInfo->postKernelCleanup();
437 JavaThread * donorThread = donorThreads[i];
438 ThreadLocalAllocBuffer* tlab = &donorThread->tlab();
439 if (tlab->top() > tlab->end()) {
440 anyOverflows = true;
441 long overflowAmount = (long) tlab->top() - (long) tlab->pf_top();
442 // tlab->set_top is private this ugly hack gets around that
443 *(long *)((char *)tlab + in_bytes(tlab->top_offset())) = (long) tlab->pf_top();
444 if (TraceGPUInteraction) {
445 tty->print_cr("donorThread %d at %p overflowed by %ld bytes, setting last good top to %p", i, donorThread, overflowAmount, tlab->top());
446 }
447 }
448 }
449 if (anyOverflows) {
450 kernelStats.incOverflows();
451 } 340 }
452 341
453 if (UseHSAILDeoptimization) { 342 if (UseHSAILDeoptimization) {
454 // check if any workitem requested a deopt 343 // check if any workitem requested a deopt
455 int deoptcode = e->deopt_occurred(); 344 int deoptcode = e->deopt_occurred();
463 char msg[200]; 352 char msg[200];
464 sprintf(msg, "deopt error detected, slot for workitem %d was not empty", -1 * (deoptcode + 1)); 353 sprintf(msg, "deopt error detected, slot for workitem %d was not empty", -1 * (deoptcode + 1));
465 guarantee(deoptcode == 1, msg); 354 guarantee(deoptcode == 1, msg);
466 } 355 }
467 } else { 356 } else {
468 kernelStats.incDeopts();
469
470 { 357 {
471 TraceTime t3("handle deoptimizing workitems", TraceGPUInteraction); 358 TraceTime t3("handle deoptimizing workitems", TraceGPUInteraction);
472 if (TraceGPUInteraction) { 359 if (TraceGPUInteraction) {
473 tty->print_cr("deopt happened."); 360 tty->print_cr("deopt happened.");
474 HSAILKernelDeoptimization * pdeopt = e->get_deopt_save_state(0); 361 HSAILKernelDeoptimization* pdeopt = e->get_deopt_save_state(0);
475 tty->print_cr("first deopter was workitem %d", pdeopt->workitem()); 362 tty->print_cr("first deopter was workitem %d", pdeopt->workitem());
476 } 363 }
477 364
478 // Before handling any deopting workitems, save the pointers from 365 // Before handling any deopting workitems, save the pointers from
479 // the hsail frames in oops_save so they get adjusted by any 366 // the hsail frames in oops_save so they get adjusted by any
483 oopSaver.resolveArrays(); 370 oopSaver.resolveArrays();
484 371
485 // since slots are allocated from the beginning, we know how far to look 372 // since slots are allocated from the beginning, we know how far to look
486 assert(e->num_deopts() < e->num_slots(), "deopt save state overflow"); 373 assert(e->num_deopts() < e->num_slots(), "deopt save state overflow");
487 for (int k = 0; k < e->num_deopts(); k++) { 374 for (int k = 0; k < e->num_deopts(); k++) {
488 HSAILKernelDeoptimization * pdeopt = e->get_deopt_save_state(k); 375 HSAILKernelDeoptimization* pdeopt = e->get_deopt_save_state(k);
489 assert (pdeopt->workitem() >= 0, "bad workitem in deopt"); 376 assert (pdeopt->workitem() >= 0, "bad workitem in deopt");
490 // this is a workitem that deopted 377 // this is a workitem that deopted
491 oopSaver.saveOopsFromFrame(pdeopt->first_frame(), k); 378 oopSaver.saveOopsFromFrame(pdeopt->first_frame(), k);
492 } 379 }
493 380
494 // Handle any deopting workitems. 381 // Handle any deopting workitems.
495 int count_deoptimized = 0; 382 int count_deoptimized = 0;
496 for (int k = 0; k < e->num_deopts(); k++) { 383 for (int k = 0; k < e->num_deopts(); k++) {
497 HSAILKernelDeoptimization * pdeopt = e->get_deopt_save_state(k); 384 HSAILKernelDeoptimization* pdeopt = e->get_deopt_save_state(k);
498 385
499 jint workitem = pdeopt->workitem(); 386 jint workitem = pdeopt->workitem();
500 if (workitem != -1) { 387 if (workitem != -1) {
501 int deoptId = pdeopt->pc_offset(); 388 int deoptId = pdeopt->pc_offset();
502 HSAILFrame *hsailFrame = pdeopt->first_frame(); 389 HSAILFrame* hsailFrame = pdeopt->first_frame();
503 390
504 // update the hsailFrame from the oopsSaveArray 391 // Update the hsailFrame from the oopsSaveArray
505 // will re-resolve the handles each time 392 // will re-resolve the handles each time.
506 oopSaver.restoreOopsToFrame(hsailFrame, k, workitem); 393 oopSaver.restoreOopsToFrame(hsailFrame, k, workitem);
507 394
508 JavaValue result(T_VOID); 395 JavaValue result(T_VOID);
509 JavaCallArguments javaArgs; 396 JavaCallArguments javaArgs;
510 javaArgs.set_alternative_target(nm); 397 javaArgs.set_alternative_target(nm);
511 javaArgs.push_int(deoptId); 398 javaArgs.push_int(deoptId);
512 javaArgs.push_long((jlong) hsailFrame); 399 javaArgs.push_long((jlong) hsailFrame);
513 400
514 // override the deoptimization action with Action_none until we decide 401 // Override the deoptimization action with Action_none until we decide
515 // how to handle the other actions. 402 // how to handle the other actions.
516 int myActionReason = Deoptimization::make_trap_request(Deoptimization::trap_request_reason(pdeopt->reason()), Deoptimization::Action_none); 403 int myActionReason = Deoptimization::make_trap_request(Deoptimization::trap_request_reason(pdeopt->reason()), Deoptimization::Action_none);
517 javaArgs.push_int(myActionReason); 404 javaArgs.push_int(myActionReason);
518 javaArgs.push_oop((oop) NULL); 405 javaArgs.push_oop((oop) NULL);
519 if (TraceGPUInteraction) { 406 if (TraceGPUInteraction) {
549 int count_never_ran = 0; 436 int count_never_ran = 0;
550 bool handleNeverRansHere = true; 437 bool handleNeverRansHere = true;
551 // turn off verbose trace stuff for javacall arg setup 438 // turn off verbose trace stuff for javacall arg setup
552 bool savedTraceGPUInteraction = TraceGPUInteraction; 439 bool savedTraceGPUInteraction = TraceGPUInteraction;
553 TraceGPUInteraction = false; 440 TraceGPUInteraction = false;
554 jboolean *never_ran_array = e->never_ran_array(); 441 jboolean* never_ran_array = e->never_ran_array();
555 if (handleNeverRansHere) { 442 if (handleNeverRansHere) {
556 for (int k = 0; k < dimX; k++) { 443 for (int k = 0; k < dimX; k++) {
557 if (never_ran_array[k]) { 444 if (never_ran_array[k]) {
558 // run it as a javaCall 445 // run it as a javaCall
559 KlassHandle methKlass = mh->method_holder(); 446 KlassHandle methKlass = mh->method_holder();
560 Thread* THREAD = Thread::current(); 447 Thread* THREAD = Thread::current();
561 JavaValue result(T_VOID); 448 JavaValue result(T_VOID);
562 JavaCallArguments javaArgs; 449 JavaCallArguments javaArgs;
563 // re-resolve the args_handle here 450 // re-resolve the args_handle here
564 objArrayOop resolvedArgsArray = (objArrayOop) JNIHandles::resolve(args); 451 objArrayOop resolvedArgsArray = (objArrayOop) JNIHandles::resolve(args);
565 // This object sets up the javaCall arguments 452
566 // the way argsArray is set up, this should work for instance methods as well 453 // This object sets up the javaCall arguments. The way
567 // (the receiver will be the first oop pushed) 454 // argsArray is set up, this should work for instance
455 // methods as well (the receiver will be the first oop pushed)
568 HSAILJavaCallArguments hjca(&javaArgs, k, mh->signature(), resolvedArgsArray, mh->is_static()); 456 HSAILJavaCallArguments hjca(&javaArgs, k, mh->signature(), resolvedArgsArray, mh->is_static());
569 if (mh->is_static()) { 457 if (mh->is_static()) {
570 JavaCalls::call_static(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD); 458 JavaCalls::call_static(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD);
571 } else { 459 } else {
572 JavaCalls::call_virtual(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD); 460 JavaCalls::call_virtual(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD);
581 } 469 }
582 } // end of never-ran handling 470 } // end of never-ran handling
583 } 471 }
584 472
585 delete e; 473 delete e;
586 } 474 delete allocInfo;
587 kernelStats.finishDispatch(); 475 }
588 return success; 476 return success;
589 } 477 }
590 478
591 GPU_ENTRY(jlong, Hsail::generate_kernel, (JNIEnv *env, jclass, jbyteArray code_handle, jstring name_handle)) 479 GPU_ENTRY(jlong, Hsail::generate_kernel, (JNIEnv* env, jclass, jbyteArray code_handle, jstring name_handle))
592 guarantee(_okra_create_kernel != NULL, "[HSAIL] Okra not linked"); 480 guarantee(_okra_create_kernel != NULL, "[HSAIL] Okra not linked");
593 ResourceMark rm; 481 ResourceMark rm;
594 jsize name_len = env->GetStringLength(name_handle); 482 jsize name_len = env->GetStringLength(name_handle);
595 jsize code_len = env->GetArrayLength(code_handle); 483 jsize code_len = env->GetArrayLength(code_handle);
596 484
597 char* name = NEW_RESOURCE_ARRAY(char, name_len + 1); 485 char* name = NEW_RESOURCE_ARRAY(char, name_len + 1);
598 unsigned char *code = NEW_RESOURCE_ARRAY(unsigned char, code_len + 1); 486 unsigned char* code = NEW_RESOURCE_ARRAY(unsigned char, code_len + 1);
599 487
600 code[code_len] = 0; 488 code[code_len] = 0;
601 name[name_len] = 0; 489 name[name_len] = 0;
602 490
603 env->GetByteArrayRegion(code_handle, 0, code_len, (jbyte*) code); 491 env->GetByteArrayRegion(code_handle, 0, code_len, (jbyte*) code);
629 if (_##alias == NULL) { \ 517 if (_##alias == NULL) { \
630 tty->print_cr("[HSAIL] ***** Error: Failed to lookup %s in %s, wrong version of OKRA?", STRINGIFY(name), okra_library_name); \ 518 tty->print_cr("[HSAIL] ***** Error: Failed to lookup %s in %s, wrong version of OKRA?", STRINGIFY(name), okra_library_name); \
631 return false; \ 519 return false; \
632 } \ 520 } \
633 521
634 GPU_ENTRY(jboolean, Hsail::initialize, (JNIEnv *env, jclass)) 522 GPU_ENTRY(jboolean, Hsail::initialize, (JNIEnv* env, jclass))
635 if (okra_library_name == NULL) { 523 if (okra_library_name == NULL) {
636 if (TraceGPUInteraction) { 524 if (TraceGPUInteraction) {
637 tty->print_cr("Unsupported HSAIL platform"); 525 tty->print_cr("Unsupported HSAIL platform");
638 } 526 }
639 return false; 527 return false;
640 } 528 }
641 529
642 // here we know we have a valid okra_library_name to try to load 530 // here we know we have a valid okra_library_name to try to load
643 char ebuf[O_BUFLEN]; 531 char ebuf[O_BUFLEN];
644 char *okra_lib_name_from_env_var = getenv("_OKRA_SIM_LIB_PATH_"); 532 char* okra_lib_name_from_env_var = getenv("_OKRA_SIM_LIB_PATH_");
645 if (okra_lib_name_from_env_var != NULL) { 533 if (okra_lib_name_from_env_var != NULL) {
646 okra_library_name = okra_lib_name_from_env_var; 534 okra_library_name = okra_lib_name_from_env_var;
647 } 535 }
648 if (TraceGPUInteraction) { 536 if (TraceGPUInteraction) {
649 tty->print_cr("[HSAIL] library is %s", okra_library_name); 537 tty->print_cr("[HSAIL] library is %s", okra_library_name);
650 } 538 }
651 void *okra_lib_handle = NULL; 539 void* okra_lib_handle = NULL;
652 #if defined(LINUX) 540 #if defined(LINUX)
653 // Check first if the Okra library is already loaded. 541 // Check first if the Okra library is already loaded.
654 // TODO: Figure out how to do this on other OSes. 542 // TODO: Figure out how to do this on other OSes.
655 okra_lib_handle = ::dlopen(okra_library_name, RTLD_LAZY | RTLD_NOLOAD); 543 okra_lib_handle = ::dlopen(okra_library_name, RTLD_LAZY | RTLD_NOLOAD);
656 #endif 544 #endif
666 return false; 554 return false;
667 } 555 }
668 556
669 guarantee(_okra_create_context == NULL, "cannot repeat GPU initialization"); 557 guarantee(_okra_create_context == NULL, "cannot repeat GPU initialization");
670 558
671 // at this point we know okra_lib_handle is valid whether we loaded 559 // At this point we know okra_lib_handle is valid whether we loaded
672 // here or earlier. In either case, we can lookup the functions 560 // here or earlier. In either case, we can lookup the functions.
673 LOOKUP_OKRA_FUNCTION(okra_create_context, okra_create_context); 561 LOOKUP_OKRA_FUNCTION(okra_create_context, okra_create_context);
674 LOOKUP_OKRA_FUNCTION(okra_create_kernel, okra_create_kernel); 562 LOOKUP_OKRA_FUNCTION(okra_create_kernel, okra_create_kernel);
675 LOOKUP_OKRA_FUNCTION(okra_push_object, okra_push_object); 563 LOOKUP_OKRA_FUNCTION(okra_push_object, okra_push_object);
676 LOOKUP_OKRA_FUNCTION(okra_push_boolean, okra_push_boolean); 564 LOOKUP_OKRA_FUNCTION(okra_push_boolean, okra_push_boolean);
677 LOOKUP_OKRA_FUNCTION(okra_push_byte, okra_push_byte); 565 LOOKUP_OKRA_FUNCTION(okra_push_byte, okra_push_byte);