comparison src/gpu/hsail/vm/gpu_hsail.cpp @ 16533:d5c4bb0039d8

HSAIL: update simulator Contributed-by: Eric Caspole <eric.caspole@amd.com>
author Bernhard Urban <bernhard.urban@jku.at>
date Wed, 16 Jul 2014 11:32:34 +0200
parents e9998e2be7f5
children a18c229b9a0b
comparison
equal deleted inserted replaced
16532:2dd966b157e8 16533:d5c4bb0039d8
70 }; 70 };
71 71
72 void* Hsail::_device_context = NULL; 72 void* Hsail::_device_context = NULL;
73 jint Hsail::_notice_safepoints = false; 73 jint Hsail::_notice_safepoints = false;
74 74
75 Hsail::okra_create_context_func_t Hsail::_okra_create_context; 75 Hsail::okra_get_context_func_t Hsail::_okra_get_context;
76 Hsail::okra_create_kernel_func_t Hsail::_okra_create_kernel; 76 Hsail::okra_create_kernel_func_t Hsail::_okra_create_kernel;
77 Hsail::okra_push_object_func_t Hsail::_okra_push_object; 77 Hsail::okra_push_pointer_func_t Hsail::_okra_push_pointer;
78 Hsail::okra_push_boolean_func_t Hsail::_okra_push_boolean; 78 Hsail::okra_push_boolean_func_t Hsail::_okra_push_boolean;
79 Hsail::okra_push_byte_func_t Hsail::_okra_push_byte; 79 Hsail::okra_push_byte_func_t Hsail::_okra_push_byte;
80 Hsail::okra_push_double_func_t Hsail::_okra_push_double; 80 Hsail::okra_push_double_func_t Hsail::_okra_push_double;
81 Hsail::okra_push_float_func_t Hsail::_okra_push_float; 81 Hsail::okra_push_float_func_t Hsail::_okra_push_float;
82 Hsail::okra_push_int_func_t Hsail::_okra_push_int; 82 Hsail::okra_push_int_func_t Hsail::_okra_push_int;
83 Hsail::okra_push_long_func_t Hsail::_okra_push_long; 83 Hsail::okra_push_long_func_t Hsail::_okra_push_long;
84 Hsail::okra_execute_with_range_func_t Hsail::_okra_execute_with_range; 84 Hsail::okra_execute_kernel_func_t Hsail::_okra_execute_kernel;
85 Hsail::okra_clearargs_func_t Hsail::_okra_clearargs; 85 Hsail::okra_clear_args_func_t Hsail::_okra_clear_args;
86 Hsail::okra_register_heap_func_t Hsail::_okra_register_heap; 86 Hsail::okra_dispose_kernel_func_t Hsail::_okra_dispose_kernel;
87 Hsail::okra_dispose_context_func_t Hsail::_okra_dispose_context;
87 88
88 //static jint in_kernel = 0; 89 //static jint in_kernel = 0;
89 90
90 void Hsail::notice_safepoints() { 91 void Hsail::notice_safepoints() {
91 _notice_safepoints = true; 92 _notice_safepoints = true;
94 // } 95 // }
95 } 96 }
96 97
97 void Hsail::ignore_safepoints() { 98 void Hsail::ignore_safepoints() {
98 _notice_safepoints = false; 99 _notice_safepoints = false;
99 }
100
101 void Hsail::register_heap() {
102 // After the okra functions are set up and the heap is initialized, register the java heap with HSA
103 guarantee(Universe::heap() != NULL, "heap should be there by now.");
104 if (TraceGPUInteraction) {
105 tty->print_cr("[HSAIL] heap=" PTR_FORMAT, Universe::heap());
106 tty->print_cr("[HSAIL] base=0x%08x, capacity=%ld", Universe::heap()->base(), Universe::heap()->capacity());
107 }
108 _okra_register_heap(Universe::heap()->base(), Universe::heap()->capacity());
109 } 100 }
110 101
111 GPU_VMENTRY(jboolean, Hsail::execute_kernel_void_1d, (JNIEnv* env, jclass, jobject kernel_handle, jint dimX, jobject args, 102 GPU_VMENTRY(jboolean, Hsail::execute_kernel_void_1d, (JNIEnv* env, jclass, jobject kernel_handle, jint dimX, jobject args,
112 jobject donor_threads, jint allocBytesPerWorkitem, jobject oop_map_array)) 103 jobject donor_threads, jint allocBytesPerWorkitem, jobject oop_map_array))
113 104
154 // We avoid HSAILAllocationInfo logic if kernel does not allocate 145 // We avoid HSAILAllocationInfo logic if kernel does not allocate
155 // in which case the donor_thread array passed in will be null 146 // in which case the donor_thread array passed in will be null
156 HSAILAllocationInfo* allocInfo = (donor_threads == NULL ? NULL : new HSAILAllocationInfo(donor_threads, dimX, allocBytesPerWorkitem)); 147 HSAILAllocationInfo* allocInfo = (donor_threads == NULL ? NULL : new HSAILAllocationInfo(donor_threads, dimX, allocBytesPerWorkitem));
157 148
158 // Reset the kernel arguments 149 // Reset the kernel arguments
159 _okra_clearargs(kernel); 150 _okra_clear_args(kernel);
160 151
161 JavaThread* thread = (JavaThread*)THREAD; 152 JavaThread* thread = (JavaThread*)THREAD;
162 HSAILDeoptimizationInfo* e; 153 HSAILDeoptimizationInfo* e;
163 if (UseHSAILDeoptimization) { 154 if (UseHSAILDeoptimization) {
164 // get how many bytes per deopt save area are required 155 // get how many bytes per deopt save area are required
195 186
196 // Run the kernel 187 // Run the kernel
197 bool success = false; 188 bool success = false;
198 { 189 {
199 TraceTime t("execute kernel", TraceGPUInteraction); 190 TraceTime t("execute kernel", TraceGPUInteraction);
191 graal_okra_range_t kernel_range = {0};
200 192
201 //in_kernel = 1; 193 //in_kernel = 1;
202 // Run the kernel 194 // Run the kernel
203 success = _okra_execute_with_range(kernel, dimX); 195 kernel_range.dimension = 1;
196 kernel_range.global_size[0] = dimX;
197 success = _okra_execute_kernel(_device_context, kernel, &kernel_range);
204 //in_kernel = 0; 198 //in_kernel = 0;
205 } 199 }
206 200
207 // avoid HSAILAllocationInfo logic if kernel does not allocate 201 // avoid HSAILAllocationInfo logic if kernel does not allocate
208 if (allocInfo != NULL) { 202 if (allocInfo != NULL) {
345 name[name_len] = 0; 339 name[name_len] = 0;
346 340
347 env->GetByteArrayRegion(code_handle, 0, code_len, (jbyte*) code); 341 env->GetByteArrayRegion(code_handle, 0, code_len, (jbyte*) code);
348 env->GetStringUTFRegion(name_handle, 0, name_len, name); 342 env->GetStringUTFRegion(name_handle, 0, name_len, name);
349 343
350 register_heap();
351
352 // The kernel entrypoint is always run for the time being 344 // The kernel entrypoint is always run for the time being
353 const char* entryPointName = "&run"; 345 const char* entryPointName = "&run";
354 346 jlong okra_kernel;
355 _device_context = _okra_create_context(); 347 jint okra_status = _okra_create_kernel(_device_context, code, entryPointName, (void**)&okra_kernel);
356 348 guarantee(okra_status==0, "_okra_create_kernel failed");
357 return (jlong) _okra_create_kernel(_device_context, code, entryPointName); 349 return (jlong) okra_kernel;
358 GPU_END 350 GPU_END
359 351
360 #if defined(LINUX) 352 #if defined(LINUX)
361 static const char* okra_library_name = "libokra_x86_64.so"; 353 static const char* okra_library_name = "libokra_x86_64.so";
362 #elif defined(_WINDOWS) 354 #elif defined(_WINDOWS)
408 tty->print_cr("[HSAIL] library load failed."); 400 tty->print_cr("[HSAIL] library load failed.");
409 } 401 }
410 return false; 402 return false;
411 } 403 }
412 404
413 guarantee(_okra_create_context == NULL, "cannot repeat GPU initialization"); 405 guarantee(_okra_get_context == NULL, "cannot repeat GPU initialization");
414 406
415 // At this point we know okra_lib_handle is valid whether we loaded 407 // At this point we know okra_lib_handle is valid whether we loaded
416 // here or earlier. In either case, we can lookup the functions. 408 // here or earlier. In either case, we can lookup the functions.
417 LOOKUP_OKRA_FUNCTION(okra_create_context, okra_create_context); 409 LOOKUP_OKRA_FUNCTION(okra_get_context, okra_get_context);
418 LOOKUP_OKRA_FUNCTION(okra_create_kernel, okra_create_kernel); 410 LOOKUP_OKRA_FUNCTION(okra_create_kernel, okra_create_kernel);
419 LOOKUP_OKRA_FUNCTION(okra_push_object, okra_push_object); 411 LOOKUP_OKRA_FUNCTION(okra_push_pointer, okra_push_pointer);
420 LOOKUP_OKRA_FUNCTION(okra_push_boolean, okra_push_boolean); 412 LOOKUP_OKRA_FUNCTION(okra_push_boolean, okra_push_boolean);
421 LOOKUP_OKRA_FUNCTION(okra_push_byte, okra_push_byte); 413 LOOKUP_OKRA_FUNCTION(okra_push_byte, okra_push_byte);
422 LOOKUP_OKRA_FUNCTION(okra_push_double, okra_push_double); 414 LOOKUP_OKRA_FUNCTION(okra_push_double, okra_push_double);
423 LOOKUP_OKRA_FUNCTION(okra_push_float, okra_push_float); 415 LOOKUP_OKRA_FUNCTION(okra_push_float, okra_push_float);
424 LOOKUP_OKRA_FUNCTION(okra_push_int, okra_push_int); 416 LOOKUP_OKRA_FUNCTION(okra_push_int, okra_push_int);
425 LOOKUP_OKRA_FUNCTION(okra_push_long, okra_push_long); 417 LOOKUP_OKRA_FUNCTION(okra_push_long, okra_push_long);
426 LOOKUP_OKRA_FUNCTION(okra_execute_with_range, okra_execute_with_range); 418 LOOKUP_OKRA_FUNCTION(okra_execute_kernel, okra_execute_kernel);
427 LOOKUP_OKRA_FUNCTION(okra_clearargs, okra_clearargs); 419 LOOKUP_OKRA_FUNCTION(okra_clear_args, okra_clear_args);
428 LOOKUP_OKRA_FUNCTION(okra_register_heap, okra_register_heap); 420 LOOKUP_OKRA_FUNCTION(okra_dispose_kernel, okra_dispose_kernel);
421 LOOKUP_OKRA_FUNCTION(okra_dispose_context, okra_dispose_context);
429 // if we made it this far, real success 422 // if we made it this far, real success
430 423
431 Gpu::initialized_gpu(new Hsail()); 424 Gpu::initialized_gpu(new Hsail());
425
426 // There is 1 context per process
427 jint result = _okra_get_context(&_device_context);
428 guarantee(result==0, "get context failed");
432 429
433 return true; 430 return true;
434 GPU_END 431 GPU_END
435 432
436 433