Mercurial > hg > truffle
comparison src/gpu/ptx/vm/gpu_ptx.cpp @ 13888:51584f76462d
pulled Ptx and Hsail classes out of gpu class namespace
author | Doug Simon <doug.simon@oracle.com> |
---|---|
date | Thu, 06 Feb 2014 11:14:19 +0100 |
parents | b3e69d1f5da0 |
children | 0995dcbd6dd8 |
comparison
equal
deleted
inserted
replaced
13887:f3e4f746e9c6 | 13888:51584f76462d |
---|---|
23 */ | 23 */ |
24 | 24 |
25 #include "precompiled.hpp" | 25 #include "precompiled.hpp" |
26 #include "runtime/javaCalls.hpp" | 26 #include "runtime/javaCalls.hpp" |
27 #include "runtime/gpu.hpp" | 27 #include "runtime/gpu.hpp" |
28 #include "ptx/vm/gpu_ptx.hpp" | |
28 #include "utilities/globalDefinitions.hpp" | 29 #include "utilities/globalDefinitions.hpp" |
29 #include "utilities/ostream.hpp" | 30 #include "utilities/ostream.hpp" |
30 #include "memory/allocation.hpp" | 31 #include "memory/allocation.hpp" |
31 #include "memory/allocation.inline.hpp" | 32 #include "memory/allocation.inline.hpp" |
32 #include "memory/gcLocker.inline.hpp" | 33 #include "memory/gcLocker.inline.hpp" |
45 #define T_ARRAY_BYTE_SIZE sizeof(intptr_t) | 46 #define T_ARRAY_BYTE_SIZE sizeof(intptr_t) |
46 | 47 |
47 // Entry to GPU native method implementation that transitions current thread to '_thread_in_vm'. | 48 // Entry to GPU native method implementation that transitions current thread to '_thread_in_vm'. |
48 #define GPU_VMENTRY(result_type, name, signature) \ | 49 #define GPU_VMENTRY(result_type, name, signature) \ |
49 JNIEXPORT result_type JNICALL name signature { \ | 50 JNIEXPORT result_type JNICALL name signature { \ |
51 if (TraceGPUInteraction) tty->print_cr("[CUDA] Ptx::" #name); \ | |
50 GRAAL_VM_ENTRY_MARK; \ | 52 GRAAL_VM_ENTRY_MARK; \ |
51 | 53 |
52 // Entry to GPU native method implementation that calls a JNI function | 54 // Entry to GPU native method implementation that calls a JNI function |
53 // and hence cannot transition current thread to '_thread_in_vm'. | 55 // and hence cannot transition current thread to '_thread_in_vm'. |
54 #define GPU_ENTRY(result_type, name, signature) \ | 56 #define GPU_ENTRY(result_type, name, signature) \ |
55 JNIEXPORT result_type JNICALL name signature { \ | 57 JNIEXPORT result_type JNICALL name signature { \ |
58 if (TraceGPUInteraction) tty->print_cr("[CUDA] Ptx::" #name); \ | |
56 | 59 |
57 #define GPU_END } | 60 #define GPU_END } |
58 | 61 |
59 #define CC (char*) /*cast a literal from (const char*)*/ | 62 #define CC (char*) /*cast a literal from (const char*)*/ |
60 #define FN_PTR(f) CAST_FROM_FN_PTR(void*, &(f)) | 63 #define FN_PTR(f) CAST_FROM_FN_PTR(void*, &(f)) |
61 | 64 |
62 #define STRING "Ljava/lang/String;" | 65 #define STRING "Ljava/lang/String;" |
63 | 66 |
64 JNINativeMethod gpu::Ptx::PTX_methods[] = { | 67 JNINativeMethod Ptx::PTX_methods[] = { |
65 {CC"initialize", CC"()Z", FN_PTR(gpu::Ptx::initialize)}, | 68 {CC"initialize", CC"()Z", FN_PTR(Ptx::initialize)}, |
66 {CC"generateKernel", CC"([B" STRING ")J", FN_PTR(gpu::Ptx::generate_kernel)}, | 69 {CC"generateKernel", CC"([B" STRING ")J", FN_PTR(Ptx::generate_kernel)}, |
67 {CC"getLaunchKernelAddress", CC"()J", FN_PTR(gpu::Ptx::get_execute_kernel_from_vm_address)}, | 70 {CC"getLaunchKernelAddress", CC"()J", FN_PTR(Ptx::get_execute_kernel_from_vm_address)}, |
68 {CC"getAvailableProcessors0", CC"()I", FN_PTR(gpu::Ptx::get_total_cores)}, | 71 {CC"getAvailableProcessors0", CC"()I", FN_PTR(Ptx::get_total_cores)}, |
69 }; | 72 }; |
70 | 73 |
71 void * gpu::Ptx::_device_context; | 74 void * Ptx::_device_context; |
72 int gpu::Ptx::_cu_device = 0; | 75 int Ptx::_cu_device = 0; |
73 | 76 |
74 gpu::Ptx::cuda_cu_init_func_t gpu::Ptx::_cuda_cu_init; | 77 Ptx::cuda_cu_init_func_t Ptx::_cuda_cu_init; |
75 gpu::Ptx::cuda_cu_ctx_create_func_t gpu::Ptx::_cuda_cu_ctx_create; | 78 Ptx::cuda_cu_ctx_create_func_t Ptx::_cuda_cu_ctx_create; |
76 gpu::Ptx::cuda_cu_ctx_destroy_func_t gpu::Ptx::_cuda_cu_ctx_destroy; | 79 Ptx::cuda_cu_ctx_destroy_func_t Ptx::_cuda_cu_ctx_destroy; |
77 gpu::Ptx::cuda_cu_ctx_synchronize_func_t gpu::Ptx::_cuda_cu_ctx_synchronize; | 80 Ptx::cuda_cu_ctx_synchronize_func_t Ptx::_cuda_cu_ctx_synchronize; |
78 gpu::Ptx::cuda_cu_ctx_get_current_func_t gpu::Ptx::_cuda_cu_ctx_get_current; | 81 Ptx::cuda_cu_ctx_get_current_func_t Ptx::_cuda_cu_ctx_get_current; |
79 gpu::Ptx::cuda_cu_ctx_set_current_func_t gpu::Ptx::_cuda_cu_ctx_set_current; | 82 Ptx::cuda_cu_ctx_set_current_func_t Ptx::_cuda_cu_ctx_set_current; |
80 gpu::Ptx::cuda_cu_device_get_count_func_t gpu::Ptx::_cuda_cu_device_get_count; | 83 Ptx::cuda_cu_device_get_count_func_t Ptx::_cuda_cu_device_get_count; |
81 gpu::Ptx::cuda_cu_device_get_name_func_t gpu::Ptx::_cuda_cu_device_get_name; | 84 Ptx::cuda_cu_device_get_name_func_t Ptx::_cuda_cu_device_get_name; |
82 gpu::Ptx::cuda_cu_device_get_func_t gpu::Ptx::_cuda_cu_device_get; | 85 Ptx::cuda_cu_device_get_func_t Ptx::_cuda_cu_device_get; |
83 gpu::Ptx::cuda_cu_device_compute_capability_func_t gpu::Ptx::_cuda_cu_device_compute_capability; | 86 Ptx::cuda_cu_device_compute_capability_func_t Ptx::_cuda_cu_device_compute_capability; |
84 gpu::Ptx::cuda_cu_device_get_attribute_func_t gpu::Ptx::_cuda_cu_device_get_attribute; | 87 Ptx::cuda_cu_device_get_attribute_func_t Ptx::_cuda_cu_device_get_attribute; |
85 gpu::Ptx::cuda_cu_launch_kernel_func_t gpu::Ptx::_cuda_cu_launch_kernel; | 88 Ptx::cuda_cu_launch_kernel_func_t Ptx::_cuda_cu_launch_kernel; |
86 gpu::Ptx::cuda_cu_module_get_function_func_t gpu::Ptx::_cuda_cu_module_get_function; | 89 Ptx::cuda_cu_module_get_function_func_t Ptx::_cuda_cu_module_get_function; |
87 gpu::Ptx::cuda_cu_module_load_data_ex_func_t gpu::Ptx::_cuda_cu_module_load_data_ex; | 90 Ptx::cuda_cu_module_load_data_ex_func_t Ptx::_cuda_cu_module_load_data_ex; |
88 gpu::Ptx::cuda_cu_memcpy_htod_func_t gpu::Ptx::_cuda_cu_memcpy_htod; | 91 Ptx::cuda_cu_memcpy_htod_func_t Ptx::_cuda_cu_memcpy_htod; |
89 gpu::Ptx::cuda_cu_memcpy_dtoh_func_t gpu::Ptx::_cuda_cu_memcpy_dtoh; | 92 Ptx::cuda_cu_memcpy_dtoh_func_t Ptx::_cuda_cu_memcpy_dtoh; |
90 gpu::Ptx::cuda_cu_memalloc_func_t gpu::Ptx::_cuda_cu_memalloc; | 93 Ptx::cuda_cu_memalloc_func_t Ptx::_cuda_cu_memalloc; |
91 gpu::Ptx::cuda_cu_memfree_func_t gpu::Ptx::_cuda_cu_memfree; | 94 Ptx::cuda_cu_memfree_func_t Ptx::_cuda_cu_memfree; |
92 gpu::Ptx::cuda_cu_mem_host_register_func_t gpu::Ptx::_cuda_cu_mem_host_register; | 95 Ptx::cuda_cu_mem_host_register_func_t Ptx::_cuda_cu_mem_host_register; |
93 gpu::Ptx::cuda_cu_mem_host_get_device_pointer_func_t gpu::Ptx::_cuda_cu_mem_host_get_device_pointer; | 96 Ptx::cuda_cu_mem_host_get_device_pointer_func_t Ptx::_cuda_cu_mem_host_get_device_pointer; |
94 gpu::Ptx::cuda_cu_mem_host_unregister_func_t gpu::Ptx::_cuda_cu_mem_host_unregister; | 97 Ptx::cuda_cu_mem_host_unregister_func_t Ptx::_cuda_cu_mem_host_unregister; |
95 | 98 |
96 #define STRINGIFY(x) #x | 99 #define STRINGIFY(x) #x |
97 | 100 |
98 #define LOOKUP_CUDA_FUNCTION(name, alias) \ | 101 #define LOOKUP_CUDA_FUNCTION(name, alias) \ |
99 _##alias = \ | 102 _##alias = \ |
106 #define LOOKUP_CUDA_V2_FUNCTION(name, alias) LOOKUP_CUDA_FUNCTION(name##_v2, alias) | 109 #define LOOKUP_CUDA_V2_FUNCTION(name, alias) LOOKUP_CUDA_FUNCTION(name##_v2, alias) |
107 | 110 |
108 /* | 111 /* |
109 * see http://en.wikipedia.org/wiki/CUDA#Supported_GPUs | 112 * see http://en.wikipedia.org/wiki/CUDA#Supported_GPUs |
110 */ | 113 */ |
111 int gpu::Ptx::ncores(int major, int minor) { | 114 int Ptx::ncores(int major, int minor) { |
112 int device_type = (major << 4) + minor; | 115 int device_type = (major << 4) + minor; |
113 | 116 |
114 switch (device_type) { | 117 switch (device_type) { |
115 case 0x10: return 8; | 118 case 0x10: return 8; |
116 case 0x11: return 8; | 119 case 0x11: return 8; |
124 tty->print_cr("[CUDA] Warning: Unhandled device %x", device_type); | 127 tty->print_cr("[CUDA] Warning: Unhandled device %x", device_type); |
125 return 0; | 128 return 0; |
126 } | 129 } |
127 } | 130 } |
128 | 131 |
129 bool gpu::Ptx::register_natives(JNIEnv* env) { | 132 bool Ptx::register_natives(JNIEnv* env) { |
130 jclass klass = env->FindClass("com/oracle/graal/hotspot/ptx/PTXHotSpotBackend"); | 133 jclass klass = env->FindClass("com/oracle/graal/hotspot/ptx/PTXHotSpotBackend"); |
131 if (klass == NULL) { | 134 if (klass == NULL) { |
132 if (TraceGPUInteraction) { | 135 if (TraceGPUInteraction) { |
133 tty->print_cr("PTXHotSpotBackend class not found"); | 136 tty->print_cr("PTXHotSpotBackend class not found"); |
134 } | 137 } |
135 return false; | 138 return false; |
136 } | 139 } |
137 jint status = env->RegisterNatives(klass, PTX_methods, sizeof(PTX_methods) / sizeof(JNINativeMethod)); | 140 jint status = env->RegisterNatives(klass, PTX_methods, sizeof(PTX_methods) / sizeof(JNINativeMethod)); |
138 if (status != JNI_OK) { | 141 if (status != JNI_OK) { |
139 if (TraceGPUInteraction) { | 142 if (true || TraceGPUInteraction) { |
140 tty->print_cr("Error registering natives for PTXHotSpotBackend: %d", status); | 143 tty->print_cr("Error registering natives for PTXHotSpotBackend: %d", status); |
141 } | 144 } |
142 return false; | 145 return false; |
143 } | 146 } |
144 return true; | 147 return true; |
145 } | 148 } |
146 | 149 |
147 GPU_ENTRY(jboolean, gpu::Ptx::initialize, (JNIEnv *env, jclass)) | 150 GPU_ENTRY(jboolean, Ptx::initialize, (JNIEnv *env, jclass)) |
148 | 151 |
149 if (!link()) { | 152 if (!link()) { |
150 return false; | 153 return false; |
151 } | 154 } |
152 | 155 |
253 gpu::initialized_gpu(device_name); | 256 gpu::initialized_gpu(device_name); |
254 | 257 |
255 return true; | 258 return true; |
256 GPU_END | 259 GPU_END |
257 | 260 |
258 GPU_ENTRY(jint, gpu::Ptx::get_total_cores, (JNIEnv *env, jobject)) | 261 GPU_ENTRY(jint, Ptx::get_total_cores, (JNIEnv *env, jobject)) |
259 | 262 |
260 int minor, major, nmp; | 263 int minor, major, nmp; |
261 int status = _cuda_cu_device_get_attribute(&minor, | 264 int status = _cuda_cu_device_get_attribute(&minor, |
262 GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, | 265 GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, |
263 _cu_device); | 266 _cu_device); |
340 tty->print_cr("[CUDA] Max threads per block: %d warp size: %d", max_threads_per_block, warp_size); | 343 tty->print_cr("[CUDA] Max threads per block: %d warp size: %d", max_threads_per_block, warp_size); |
341 } | 344 } |
342 return total; | 345 return total; |
343 GPU_END | 346 GPU_END |
344 | 347 |
345 GPU_ENTRY(jlong, gpu::Ptx::generate_kernel, (JNIEnv *env, jclass, jbyteArray code_handle, jstring name_handle)) | 348 GPU_ENTRY(jlong, Ptx::generate_kernel, (JNIEnv *env, jclass, jbyteArray code_handle, jstring name_handle)) |
346 ResourceMark rm; | 349 ResourceMark rm; |
347 jsize name_len = env->GetStringLength(name_handle); | 350 jsize name_len = env->GetStringLength(name_handle); |
348 jsize code_len = env->GetArrayLength(code_handle); | 351 jsize code_len = env->GetArrayLength(code_handle); |
349 | 352 |
350 char* name = NEW_RESOURCE_ARRAY(char, name_len + 1); | 353 char* name = NEW_RESOURCE_ARRAY(char, name_len + 1); |
438 JavaThread* _thread; // the thread on which this call is made | 441 JavaThread* _thread; // the thread on which this call is made |
439 address _buffer; // buffer containing parameters and _return_value | 442 address _buffer; // buffer containing parameters and _return_value |
440 int _buffer_size; // size (in bytes) of _buffer | 443 int _buffer_size; // size (in bytes) of _buffer |
441 oop* _pinned; // objects that have been pinned with cuMemHostRegister | 444 oop* _pinned; // objects that have been pinned with cuMemHostRegister |
442 int _pinned_length; // length of _pinned | 445 int _pinned_length; // length of _pinned |
443 gpu::Ptx::CUdeviceptr _ret_value; // pointer to slot in GPU memory holding the return value | 446 Ptx::CUdeviceptr _ret_value; // pointer to slot in GPU memory holding the return value |
444 int _ret_type_size; // size of the return type value | 447 int _ret_type_size; // size of the return type value |
445 bool _ret_is_object; // specifies if the return type is Object | 448 bool _ret_is_object; // specifies if the return type is Object |
446 bool _gc_locked; // denotes when execution has locked GC | 449 bool _gc_locked; // denotes when execution has locked GC |
447 | 450 |
448 bool check(int status, const char *action) { | 451 bool check(int status, const char *action) { |
472 | 475 |
473 bool is_object_return() { return _ret_is_object; } | 476 bool is_object_return() { return _ret_is_object; } |
474 | 477 |
475 void alloc_return_value() { | 478 void alloc_return_value() { |
476 if (_ret_type_size != 0) { | 479 if (_ret_type_size != 0) { |
477 if (check(gpu::Ptx::_cuda_cu_memalloc(&_ret_value, _ret_type_size), "Allocate device memory for return value")) { | 480 if (check(Ptx::_cuda_cu_memalloc(&_ret_value, _ret_type_size), "Allocate device memory for return value")) { |
478 gpu::Ptx::CUdeviceptr* retValuePtr = (gpu::Ptx::CUdeviceptr*) ((_buffer + _buffer_size) - sizeof(_ret_value)); | 481 Ptx::CUdeviceptr* retValuePtr = (Ptx::CUdeviceptr*) ((_buffer + _buffer_size) - sizeof(_ret_value)); |
479 *retValuePtr = _ret_value; | 482 *retValuePtr = _ret_value; |
480 } | 483 } |
481 } | 484 } |
482 } | 485 } |
483 | 486 |
501 oop obj = *argPtr; | 504 oop obj = *argPtr; |
502 if (obj != NULL) { | 505 if (obj != NULL) { |
503 // Size (in bytes) of object | 506 // Size (in bytes) of object |
504 int objSize = obj->size() * HeapWordSize; | 507 int objSize = obj->size() * HeapWordSize; |
505 //tty->print_cr("Pinning object %d at offset %d: %p", i, offset, obj); | 508 //tty->print_cr("Pinning object %d at offset %d: %p", i, offset, obj); |
506 if (!check(gpu::Ptx::_cuda_cu_mem_host_register(obj, objSize, GRAAL_CU_MEMHOSTREGISTER_DEVICEMAP), "Pin object")) { | 509 if (!check(Ptx::_cuda_cu_mem_host_register(obj, objSize, GRAAL_CU_MEMHOSTREGISTER_DEVICEMAP), "Pin object")) { |
507 return; | 510 return; |
508 } | 511 } |
509 | 512 |
510 // Record original oop so that its memory can be unpinned | 513 // Record original oop so that its memory can be unpinned |
511 _pinned[_pinned_length++] = obj; | 514 _pinned[_pinned_length++] = obj; |
512 | 515 |
513 // Replace host pointer to object with device pointer | 516 // Replace host pointer to object with device pointer |
514 // to object in kernel parameters buffer | 517 // to object in kernel parameters buffer |
515 if (!check(gpu::Ptx::_cuda_cu_mem_host_get_device_pointer((gpu::Ptx::CUdeviceptr*) argPtr, obj, 0), "Get device pointer for pinned object")) { | 518 if (!check(Ptx::_cuda_cu_mem_host_get_device_pointer((Ptx::CUdeviceptr*) argPtr, obj, 0), "Get device pointer for pinned object")) { |
516 return; | 519 return; |
517 } | 520 } |
518 } | 521 } |
519 } | 522 } |
520 } | 523 } |
527 void * config[] = { | 530 void * config[] = { |
528 GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER, (char*) (address) _buffer, | 531 GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER, (char*) (address) _buffer, |
529 GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE, &_buffer_size, | 532 GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE, &_buffer_size, |
530 GRAAL_CU_LAUNCH_PARAM_END | 533 GRAAL_CU_LAUNCH_PARAM_END |
531 }; | 534 }; |
532 if (check(gpu::Ptx::_cuda_cu_launch_kernel((struct CUfunc_st*) (address) kernel, | 535 if (check(Ptx::_cuda_cu_launch_kernel((struct CUfunc_st*) (address) kernel, |
533 gridX, gridY, gridZ, | 536 gridX, gridY, gridZ, |
534 dimX, dimY, dimZ, | 537 dimX, dimY, dimZ, |
535 0, NULL, NULL, (void**) &config), "Launch kernel")) { | 538 0, NULL, NULL, (void**) &config), "Launch kernel")) { |
536 } | 539 } |
537 } | 540 } |
538 | 541 |
539 void synchronize() { | 542 void synchronize() { |
540 check(gpu::Ptx::_cuda_cu_ctx_synchronize(), "Synchronize kernel"); | 543 check(Ptx::_cuda_cu_ctx_synchronize(), "Synchronize kernel"); |
541 } | 544 } |
542 | 545 |
543 void unpin_objects() { | 546 void unpin_objects() { |
544 while (_pinned_length > 0) { | 547 while (_pinned_length > 0) { |
545 oop obj = _pinned[--_pinned_length]; | 548 oop obj = _pinned[--_pinned_length]; |
546 assert(obj != NULL, "npe"); | 549 assert(obj != NULL, "npe"); |
547 //tty->print_cr("Unpinning object %d: %p", _pinned_length, obj); | 550 //tty->print_cr("Unpinning object %d: %p", _pinned_length, obj); |
548 if (!check(gpu::Ptx::_cuda_cu_mem_host_unregister(obj), "Unpin object")) { | 551 if (!check(Ptx::_cuda_cu_mem_host_unregister(obj), "Unpin object")) { |
549 return; | 552 return; |
550 } | 553 } |
551 } | 554 } |
552 } | 555 } |
553 | 556 |
554 oop get_object_return_value() { | 557 oop get_object_return_value() { |
555 oop return_val; | 558 oop return_val; |
556 check(gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, _ret_value, T_OBJECT_BYTE_SIZE), "Copy return value from device"); | 559 check(Ptx::_cuda_cu_memcpy_dtoh(&return_val, _ret_value, T_OBJECT_BYTE_SIZE), "Copy return value from device"); |
557 return return_val; | 560 return return_val; |
558 } | 561 } |
559 | 562 |
560 jlong get_primitive_return_value() { | 563 jlong get_primitive_return_value() { |
561 jlong return_val; | 564 jlong return_val; |
562 check(gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, _ret_value, _ret_type_size), "Copy return value from device"); | 565 check(Ptx::_cuda_cu_memcpy_dtoh(&return_val, _ret_value, _ret_type_size), "Copy return value from device"); |
563 return return_val; | 566 return return_val; |
564 } | 567 } |
565 | 568 |
566 void free_return_value() { | 569 void free_return_value() { |
567 if (_ret_value != 0) { | 570 if (_ret_value != 0) { |
568 check(gpu::Ptx::_cuda_cu_memfree(_ret_value), "Free device memory"); | 571 check(Ptx::_cuda_cu_memfree(_ret_value), "Free device memory"); |
569 _ret_value = 0; | 572 _ret_value = 0; |
570 } | 573 } |
571 } | 574 } |
572 | 575 |
573 void destroy_context() { | 576 void destroy_context() { |
574 if (gpu::Ptx::_device_context != NULL) { | 577 if (Ptx::_device_context != NULL) { |
575 check(gpu::Ptx::_cuda_cu_ctx_destroy(gpu::Ptx::_device_context), "Destroy context"); | 578 check(Ptx::_cuda_cu_ctx_destroy(Ptx::_device_context), "Destroy context"); |
576 gpu::Ptx::_device_context = NULL; | 579 Ptx::_device_context = NULL; |
577 } | 580 } |
578 } | 581 } |
579 | 582 |
580 ~PtxCall() { | 583 ~PtxCall() { |
581 unpin_objects(); | 584 unpin_objects(); |
664 return; | 667 return; |
665 } | 668 } |
666 } | 669 } |
667 } | 670 } |
668 | 671 |
669 GPU_VMENTRY(jlong, gpu::Ptx::get_execute_kernel_from_vm_address, (JNIEnv *env, jclass)) | 672 GPU_VMENTRY(jlong, Ptx::get_execute_kernel_from_vm_address, (JNIEnv *env, jclass)) |
670 return (jlong) gpu::Ptx::execute_kernel_from_vm; | 673 return (jlong) Ptx::execute_kernel_from_vm; |
671 GPU_END | 674 GPU_END |
672 | 675 |
673 JRT_ENTRY(jlong, gpu::Ptx::execute_kernel_from_vm(JavaThread* thread, jlong kernel, jint dimX, jint dimY, jint dimZ, | 676 JRT_ENTRY(jlong, Ptx::execute_kernel_from_vm(JavaThread* thread, jlong kernel, jint dimX, jint dimY, jint dimZ, |
674 jlong buffer, | 677 jlong buffer, |
675 jint bufferSize, | 678 jint bufferSize, |
676 jint objectParametersCount, | 679 jint objectParametersCount, |
677 jlong objectParametersOffsets, | 680 jlong objectParametersOffsets, |
678 jlong pinnedObjects, | 681 jlong pinnedObjects, |
722 static char const cuda_library_name[] = "/usr/local/cuda/lib/libcuda.dylib"; | 725 static char const cuda_library_name[] = "/usr/local/cuda/lib/libcuda.dylib"; |
723 #else | 726 #else |
724 static char const cuda_library_name[] = ""; | 727 static char const cuda_library_name[] = ""; |
725 #endif | 728 #endif |
726 | 729 |
727 bool gpu::Ptx::link() { | 730 bool Ptx::link() { |
728 if (cuda_library_name == NULL) { | 731 if (cuda_library_name == NULL) { |
729 if (TraceGPUInteraction) { | 732 if (TraceGPUInteraction) { |
730 tty->print_cr("Failed to find CUDA linkage"); | 733 tty->print_cr("Failed to find CUDA linkage"); |
731 } | 734 } |
732 return false; | 735 return false; |