# HG changeset patch # User Doug Simon # Date 1391681659 -3600 # Node ID 51584f76462de63c59ee1fdeca5766a0deadcf13 # Parent f3e4f746e9c6a36251d0ce2ced48a4c87be5128c pulled Ptx and Hsail classes out of gpu class namespace diff -r f3e4f746e9c6 -r 51584f76462d graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotBackend.java --- a/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotBackend.java Thu Feb 06 00:21:10 2014 -0800 +++ b/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotBackend.java Thu Feb 06 11:14:19 2014 +0100 @@ -141,7 +141,7 @@ } /** - * Gets the address of {@code gpu::Ptx::execute_kernel_from_vm()}. + * Gets the address of {@code Ptx::execute_kernel_from_vm()}. */ private static native long getLaunchKernelAddress(); diff -r f3e4f746e9c6 -r 51584f76462d src/gpu/hsail/vm/gpu_hsail.cpp --- a/src/gpu/hsail/vm/gpu_hsail.cpp Thu Feb 06 00:21:10 2014 -0800 +++ b/src/gpu/hsail/vm/gpu_hsail.cpp Thu Feb 06 11:14:19 2014 +0100 @@ -25,6 +25,7 @@ #include "precompiled.hpp" #include "runtime/javaCalls.hpp" #include "runtime/gpu.hpp" +#include "hsail/vm/gpu_hsail.hpp" #include "utilities/globalDefinitions.hpp" #include "utilities/ostream.hpp" #include "memory/allocation.hpp" @@ -55,29 +56,29 @@ // public native void executeKernel(HotSpotNmethod kernel, int jobSize, int i, int j, Object[] args) throws InvalidInstalledCodeException; -JNINativeMethod gpu::Hsail::HSAIL_methods[] = { - {CC"initialize", CC"()Z", FN_PTR(gpu::Hsail::initialize)}, - {CC"generateKernel", CC"([B" STRING ")J", FN_PTR(gpu::Hsail::generate_kernel)}, - {CC"executeKernel0", CC"("HS_INSTALLED_CODE"I["OBJECT")Z", FN_PTR(gpu::Hsail::execute_kernel_void_1d)}, +JNINativeMethod Hsail::HSAIL_methods[] = { + {CC"initialize", CC"()Z", FN_PTR(Hsail::initialize)}, + {CC"generateKernel", CC"([B" STRING ")J", FN_PTR(Hsail::generate_kernel)}, + {CC"executeKernel0", CC"("HS_INSTALLED_CODE"I["OBJECT")Z", FN_PTR(Hsail::execute_kernel_void_1d)}, }; -void * gpu::Hsail::_device_context = NULL; +void * Hsail::_device_context = NULL; -gpu::Hsail::okra_create_context_func_t gpu::Hsail::_okra_create_context; -gpu::Hsail::okra_create_kernel_func_t gpu::Hsail::_okra_create_kernel; -gpu::Hsail::okra_push_object_func_t gpu::Hsail::_okra_push_object; -gpu::Hsail::okra_push_boolean_func_t gpu::Hsail::_okra_push_boolean; -gpu::Hsail::okra_push_byte_func_t gpu::Hsail::_okra_push_byte; -gpu::Hsail::okra_push_double_func_t gpu::Hsail::_okra_push_double; -gpu::Hsail::okra_push_float_func_t gpu::Hsail::_okra_push_float; -gpu::Hsail::okra_push_int_func_t gpu::Hsail::_okra_push_int; -gpu::Hsail::okra_push_long_func_t gpu::Hsail::_okra_push_long; -gpu::Hsail::okra_execute_with_range_func_t gpu::Hsail::_okra_execute_with_range; -gpu::Hsail::okra_clearargs_func_t gpu::Hsail::_okra_clearargs; -gpu::Hsail::okra_register_heap_func_t gpu::Hsail::_okra_register_heap; +Hsail::okra_create_context_func_t Hsail::_okra_create_context; +Hsail::okra_create_kernel_func_t Hsail::_okra_create_kernel; +Hsail::okra_push_object_func_t Hsail::_okra_push_object; +Hsail::okra_push_boolean_func_t Hsail::_okra_push_boolean; +Hsail::okra_push_byte_func_t Hsail::_okra_push_byte; +Hsail::okra_push_double_func_t Hsail::_okra_push_double; +Hsail::okra_push_float_func_t Hsail::_okra_push_float; +Hsail::okra_push_int_func_t Hsail::_okra_push_int; +Hsail::okra_push_long_func_t Hsail::_okra_push_long; +Hsail::okra_execute_with_range_func_t Hsail::_okra_execute_with_range; +Hsail::okra_clearargs_func_t Hsail::_okra_clearargs; +Hsail::okra_register_heap_func_t Hsail::_okra_register_heap; -void gpu::Hsail::register_heap() { +void Hsail::register_heap() { // After the okra functions are set up and the heap is initialized, register the java heap with HSA guarantee(Universe::heap() != NULL, "heap should be there by now."); if (TraceGPUInteraction) { @@ -87,7 +88,7 @@ _okra_register_heap(Universe::heap()->base(), Universe::heap()->capacity()); } -GPU_VMENTRY(jboolean, gpu::Hsail::execute_kernel_void_1d, (JNIEnv* env, jclass, jobject kernel_handle, jint dimX, jobject args_handle)) +GPU_VMENTRY(jboolean, Hsail::execute_kernel_void_1d, (JNIEnv* env, jclass, jobject kernel_handle, jint dimX, jobject args_handle)) ResourceMark rm; jlong nmethodValue = HotSpotInstalledCode::codeBlob(kernel_handle); @@ -115,7 +116,7 @@ return _okra_execute_with_range(kernel, dimX); GPU_END -GPU_ENTRY(jlong, gpu::Hsail::generate_kernel, (JNIEnv *env, jclass, jbyteArray code_handle, jstring name_handle)) +GPU_ENTRY(jlong, Hsail::generate_kernel, (JNIEnv *env, jclass, jbyteArray code_handle, jstring name_handle)) guarantee(_okra_create_kernel != NULL, "[HSAIL] Okra not linked"); ResourceMark rm; jsize name_len = env->GetStringLength(name_handle); @@ -158,7 +159,7 @@ return false; \ } \ -GPU_ENTRY(jboolean, gpu::Hsail::initialize, (JNIEnv *env, jclass)) +GPU_ENTRY(jboolean, Hsail::initialize, (JNIEnv *env, jclass)) if (okra_library_name == NULL) { if (TraceGPUInteraction) { tty->print_cr("Unsupported HSAIL platform"); @@ -211,7 +212,7 @@ return true; GPU_END -bool gpu::Hsail::register_natives(JNIEnv* env) { +bool Hsail::register_natives(JNIEnv* env) { jclass klass = env->FindClass("com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend"); if (klass == NULL) { if (TraceGPUInteraction) { diff -r f3e4f746e9c6 -r 51584f76462d src/gpu/hsail/vm/hsailKernelArguments.cpp --- a/src/gpu/hsail/vm/hsailKernelArguments.cpp Thu Feb 06 00:21:10 2014 -0800 +++ b/src/gpu/hsail/vm/hsailKernelArguments.cpp Thu Feb 06 11:14:19 2014 +0100 @@ -46,7 +46,7 @@ jvalue jValue; java_lang_boxing_object::get_value(arg, &jValue); - bool pushed = gpu::Hsail::_okra_push_boolean(_kernel, jValue.z); + bool pushed = Hsail::_okra_push_boolean(_kernel, jValue.z); assert(pushed == true, "arg push failed"); } @@ -58,7 +58,7 @@ jvalue jValue; java_lang_boxing_object::get_value(arg, &jValue); - bool pushed = gpu::Hsail::_okra_push_byte(_kernel, jValue.b); + bool pushed = Hsail::_okra_push_byte(_kernel, jValue.b); assert(pushed == true, "arg push failed"); } @@ -72,7 +72,7 @@ if (TraceGPUInteraction) { tty->print_cr("[HSAIL] HSAILKernelArguments::double value = %e", jValue.d); } - bool pushed = gpu::Hsail::_okra_push_double(_kernel, jValue.d); + bool pushed = Hsail::_okra_push_double(_kernel, jValue.d); assert(pushed == true, "arg push failed"); } @@ -86,7 +86,7 @@ if (TraceGPUInteraction) { tty->print_cr("[HSAIL] HSAILKernelArguments::float value = %f", jValue.f); } - bool pushed = gpu::Hsail::_okra_push_float(_kernel, jValue.f); + bool pushed = Hsail::_okra_push_float(_kernel, jValue.f); assert(pushed == true, "float push failed"); } @@ -107,7 +107,7 @@ jvalue jValue; java_lang_boxing_object::get_value(arg, &jValue); - bool pushed = gpu::Hsail::_okra_push_int(_kernel, jValue.i); + bool pushed = Hsail::_okra_push_int(_kernel, jValue.i); assert(pushed == true, "arg push failed"); } @@ -119,7 +119,7 @@ jvalue jValue; java_lang_boxing_object::get_value(arg, &jValue); - bool pushed = gpu::Hsail::_okra_push_long(_kernel, jValue.j); + bool pushed = Hsail::_okra_push_long(_kernel, jValue.j); assert(pushed == true, "arg push failed"); } @@ -130,7 +130,7 @@ tty->print_cr("[HSAIL] HSAILKernelArguments::do_array 0x%08x, is a %s", (address) arg, arg->klass()->external_name()); } - bool pushed = gpu::Hsail::_okra_push_object(_kernel, arg); + bool pushed = Hsail::_okra_push_object(_kernel, arg); assert(pushed == true, "arg push failed"); } @@ -153,7 +153,7 @@ tty->print_cr("[HSAIL] HSAILKernelArguments::do_object, 0x%08x is a %s", (address) arg, arg->klass()->external_name()); } - bool pushed = gpu::Hsail::_okra_push_object(_kernel, arg); + bool pushed = Hsail::_okra_push_object(_kernel, arg); assert(pushed == true, "arg push failed"); } diff -r f3e4f746e9c6 -r 51584f76462d src/gpu/hsail/vm/hsailKernelArguments.hpp --- a/src/gpu/hsail/vm/hsailKernelArguments.hpp Thu Feb 06 00:21:10 2014 -0800 +++ b/src/gpu/hsail/vm/hsailKernelArguments.hpp Thu Feb 06 11:14:19 2014 +0100 @@ -26,10 +26,11 @@ #define KERNEL_ARGUMENTS_HSAIL_HPP #include "runtime/gpu.hpp" +#include "hsail/vm/gpu_hsail.hpp" #include "runtime/signature.hpp" class HSAILKernelArguments : public SignatureIterator { - friend class gpu::Hsail; + friend class Hsail; public: @@ -71,7 +72,7 @@ if (TraceGPUInteraction) { tty->print_cr("[HSAIL] instance method, this 0x%08x, is a %s", (address) arg, arg->klass()->external_name()); } - bool pushed = gpu::Hsail::_okra_push_object(kernel, arg); + bool pushed = Hsail::_okra_push_object(kernel, arg); assert(pushed == true, "'this' push failed"); } else { if (TraceGPUInteraction) { diff -r f3e4f746e9c6 -r 51584f76462d src/gpu/ptx/vm/gpu_ptx.cpp --- a/src/gpu/ptx/vm/gpu_ptx.cpp Thu Feb 06 00:21:10 2014 -0800 +++ b/src/gpu/ptx/vm/gpu_ptx.cpp Thu Feb 06 11:14:19 2014 +0100 @@ -25,6 +25,7 @@ #include "precompiled.hpp" #include "runtime/javaCalls.hpp" #include "runtime/gpu.hpp" +#include "ptx/vm/gpu_ptx.hpp" #include "utilities/globalDefinitions.hpp" #include "utilities/ostream.hpp" #include "memory/allocation.hpp" @@ -47,12 +48,14 @@ // Entry to GPU native method implementation that transitions current thread to '_thread_in_vm'. #define GPU_VMENTRY(result_type, name, signature) \ JNIEXPORT result_type JNICALL name signature { \ + if (TraceGPUInteraction) tty->print_cr("[CUDA] Ptx::" #name); \ GRAAL_VM_ENTRY_MARK; \ // Entry to GPU native method implementation that calls a JNI function // and hence cannot transition current thread to '_thread_in_vm'. #define GPU_ENTRY(result_type, name, signature) \ JNIEXPORT result_type JNICALL name signature { \ + if (TraceGPUInteraction) tty->print_cr("[CUDA] Ptx::" #name); \ #define GPU_END } @@ -61,37 +64,37 @@ #define STRING "Ljava/lang/String;" -JNINativeMethod gpu::Ptx::PTX_methods[] = { - {CC"initialize", CC"()Z", FN_PTR(gpu::Ptx::initialize)}, - {CC"generateKernel", CC"([B" STRING ")J", FN_PTR(gpu::Ptx::generate_kernel)}, - {CC"getLaunchKernelAddress", CC"()J", FN_PTR(gpu::Ptx::get_execute_kernel_from_vm_address)}, - {CC"getAvailableProcessors0", CC"()I", FN_PTR(gpu::Ptx::get_total_cores)}, +JNINativeMethod Ptx::PTX_methods[] = { + {CC"initialize", CC"()Z", FN_PTR(Ptx::initialize)}, + {CC"generateKernel", CC"([B" STRING ")J", FN_PTR(Ptx::generate_kernel)}, + {CC"getLaunchKernelAddress", CC"()J", FN_PTR(Ptx::get_execute_kernel_from_vm_address)}, + {CC"getAvailableProcessors0", CC"()I", FN_PTR(Ptx::get_total_cores)}, }; -void * gpu::Ptx::_device_context; -int gpu::Ptx::_cu_device = 0; +void * Ptx::_device_context; +int Ptx::_cu_device = 0; -gpu::Ptx::cuda_cu_init_func_t gpu::Ptx::_cuda_cu_init; -gpu::Ptx::cuda_cu_ctx_create_func_t gpu::Ptx::_cuda_cu_ctx_create; -gpu::Ptx::cuda_cu_ctx_destroy_func_t gpu::Ptx::_cuda_cu_ctx_destroy; -gpu::Ptx::cuda_cu_ctx_synchronize_func_t gpu::Ptx::_cuda_cu_ctx_synchronize; -gpu::Ptx::cuda_cu_ctx_get_current_func_t gpu::Ptx::_cuda_cu_ctx_get_current; -gpu::Ptx::cuda_cu_ctx_set_current_func_t gpu::Ptx::_cuda_cu_ctx_set_current; -gpu::Ptx::cuda_cu_device_get_count_func_t gpu::Ptx::_cuda_cu_device_get_count; -gpu::Ptx::cuda_cu_device_get_name_func_t gpu::Ptx::_cuda_cu_device_get_name; -gpu::Ptx::cuda_cu_device_get_func_t gpu::Ptx::_cuda_cu_device_get; -gpu::Ptx::cuda_cu_device_compute_capability_func_t gpu::Ptx::_cuda_cu_device_compute_capability; -gpu::Ptx::cuda_cu_device_get_attribute_func_t gpu::Ptx::_cuda_cu_device_get_attribute; -gpu::Ptx::cuda_cu_launch_kernel_func_t gpu::Ptx::_cuda_cu_launch_kernel; -gpu::Ptx::cuda_cu_module_get_function_func_t gpu::Ptx::_cuda_cu_module_get_function; -gpu::Ptx::cuda_cu_module_load_data_ex_func_t gpu::Ptx::_cuda_cu_module_load_data_ex; -gpu::Ptx::cuda_cu_memcpy_htod_func_t gpu::Ptx::_cuda_cu_memcpy_htod; -gpu::Ptx::cuda_cu_memcpy_dtoh_func_t gpu::Ptx::_cuda_cu_memcpy_dtoh; -gpu::Ptx::cuda_cu_memalloc_func_t gpu::Ptx::_cuda_cu_memalloc; -gpu::Ptx::cuda_cu_memfree_func_t gpu::Ptx::_cuda_cu_memfree; -gpu::Ptx::cuda_cu_mem_host_register_func_t gpu::Ptx::_cuda_cu_mem_host_register; -gpu::Ptx::cuda_cu_mem_host_get_device_pointer_func_t gpu::Ptx::_cuda_cu_mem_host_get_device_pointer; -gpu::Ptx::cuda_cu_mem_host_unregister_func_t gpu::Ptx::_cuda_cu_mem_host_unregister; +Ptx::cuda_cu_init_func_t Ptx::_cuda_cu_init; +Ptx::cuda_cu_ctx_create_func_t Ptx::_cuda_cu_ctx_create; +Ptx::cuda_cu_ctx_destroy_func_t Ptx::_cuda_cu_ctx_destroy; +Ptx::cuda_cu_ctx_synchronize_func_t Ptx::_cuda_cu_ctx_synchronize; +Ptx::cuda_cu_ctx_get_current_func_t Ptx::_cuda_cu_ctx_get_current; +Ptx::cuda_cu_ctx_set_current_func_t Ptx::_cuda_cu_ctx_set_current; +Ptx::cuda_cu_device_get_count_func_t Ptx::_cuda_cu_device_get_count; +Ptx::cuda_cu_device_get_name_func_t Ptx::_cuda_cu_device_get_name; +Ptx::cuda_cu_device_get_func_t Ptx::_cuda_cu_device_get; +Ptx::cuda_cu_device_compute_capability_func_t Ptx::_cuda_cu_device_compute_capability; +Ptx::cuda_cu_device_get_attribute_func_t Ptx::_cuda_cu_device_get_attribute; +Ptx::cuda_cu_launch_kernel_func_t Ptx::_cuda_cu_launch_kernel; +Ptx::cuda_cu_module_get_function_func_t Ptx::_cuda_cu_module_get_function; +Ptx::cuda_cu_module_load_data_ex_func_t Ptx::_cuda_cu_module_load_data_ex; +Ptx::cuda_cu_memcpy_htod_func_t Ptx::_cuda_cu_memcpy_htod; +Ptx::cuda_cu_memcpy_dtoh_func_t Ptx::_cuda_cu_memcpy_dtoh; +Ptx::cuda_cu_memalloc_func_t Ptx::_cuda_cu_memalloc; +Ptx::cuda_cu_memfree_func_t Ptx::_cuda_cu_memfree; +Ptx::cuda_cu_mem_host_register_func_t Ptx::_cuda_cu_mem_host_register; +Ptx::cuda_cu_mem_host_get_device_pointer_func_t Ptx::_cuda_cu_mem_host_get_device_pointer; +Ptx::cuda_cu_mem_host_unregister_func_t Ptx::_cuda_cu_mem_host_unregister; #define STRINGIFY(x) #x @@ -108,7 +111,7 @@ /* * see http://en.wikipedia.org/wiki/CUDA#Supported_GPUs */ -int gpu::Ptx::ncores(int major, int minor) { +int Ptx::ncores(int major, int minor) { int device_type = (major << 4) + minor; switch (device_type) { @@ -126,7 +129,7 @@ } } -bool gpu::Ptx::register_natives(JNIEnv* env) { +bool Ptx::register_natives(JNIEnv* env) { jclass klass = env->FindClass("com/oracle/graal/hotspot/ptx/PTXHotSpotBackend"); if (klass == NULL) { if (TraceGPUInteraction) { @@ -136,7 +139,7 @@ } jint status = env->RegisterNatives(klass, PTX_methods, sizeof(PTX_methods) / sizeof(JNINativeMethod)); if (status != JNI_OK) { - if (TraceGPUInteraction) { + if (true || TraceGPUInteraction) { tty->print_cr("Error registering natives for PTXHotSpotBackend: %d", status); } return false; @@ -144,7 +147,7 @@ return true; } -GPU_ENTRY(jboolean, gpu::Ptx::initialize, (JNIEnv *env, jclass)) +GPU_ENTRY(jboolean, Ptx::initialize, (JNIEnv *env, jclass)) if (!link()) { return false; @@ -255,7 +258,7 @@ return true; GPU_END -GPU_ENTRY(jint, gpu::Ptx::get_total_cores, (JNIEnv *env, jobject)) +GPU_ENTRY(jint, Ptx::get_total_cores, (JNIEnv *env, jobject)) int minor, major, nmp; int status = _cuda_cu_device_get_attribute(&minor, @@ -342,7 +345,7 @@ return total; GPU_END -GPU_ENTRY(jlong, gpu::Ptx::generate_kernel, (JNIEnv *env, jclass, jbyteArray code_handle, jstring name_handle)) +GPU_ENTRY(jlong, Ptx::generate_kernel, (JNIEnv *env, jclass, jbyteArray code_handle, jstring name_handle)) ResourceMark rm; jsize name_len = env->GetStringLength(name_handle); jsize code_len = env->GetArrayLength(code_handle); @@ -440,7 +443,7 @@ int _buffer_size; // size (in bytes) of _buffer oop* _pinned; // objects that have been pinned with cuMemHostRegister int _pinned_length; // length of _pinned - gpu::Ptx::CUdeviceptr _ret_value; // pointer to slot in GPU memory holding the return value + Ptx::CUdeviceptr _ret_value; // pointer to slot in GPU memory holding the return value int _ret_type_size; // size of the return type value bool _ret_is_object; // specifies if the return type is Object bool _gc_locked; // denotes when execution has locked GC @@ -474,8 +477,8 @@ void alloc_return_value() { if (_ret_type_size != 0) { - if (check(gpu::Ptx::_cuda_cu_memalloc(&_ret_value, _ret_type_size), "Allocate device memory for return value")) { - gpu::Ptx::CUdeviceptr* retValuePtr = (gpu::Ptx::CUdeviceptr*) ((_buffer + _buffer_size) - sizeof(_ret_value)); + if (check(Ptx::_cuda_cu_memalloc(&_ret_value, _ret_type_size), "Allocate device memory for return value")) { + Ptx::CUdeviceptr* retValuePtr = (Ptx::CUdeviceptr*) ((_buffer + _buffer_size) - sizeof(_ret_value)); *retValuePtr = _ret_value; } } @@ -503,7 +506,7 @@ // Size (in bytes) of object int objSize = obj->size() * HeapWordSize; //tty->print_cr("Pinning object %d at offset %d: %p", i, offset, obj); - if (!check(gpu::Ptx::_cuda_cu_mem_host_register(obj, objSize, GRAAL_CU_MEMHOSTREGISTER_DEVICEMAP), "Pin object")) { + if (!check(Ptx::_cuda_cu_mem_host_register(obj, objSize, GRAAL_CU_MEMHOSTREGISTER_DEVICEMAP), "Pin object")) { return; } @@ -512,7 +515,7 @@ // Replace host pointer to object with device pointer // to object in kernel parameters buffer - if (!check(gpu::Ptx::_cuda_cu_mem_host_get_device_pointer((gpu::Ptx::CUdeviceptr*) argPtr, obj, 0), "Get device pointer for pinned object")) { + if (!check(Ptx::_cuda_cu_mem_host_get_device_pointer((Ptx::CUdeviceptr*) argPtr, obj, 0), "Get device pointer for pinned object")) { return; } } @@ -529,7 +532,7 @@ GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE, &_buffer_size, GRAAL_CU_LAUNCH_PARAM_END }; - if (check(gpu::Ptx::_cuda_cu_launch_kernel((struct CUfunc_st*) (address) kernel, + if (check(Ptx::_cuda_cu_launch_kernel((struct CUfunc_st*) (address) kernel, gridX, gridY, gridZ, dimX, dimY, dimZ, 0, NULL, NULL, (void**) &config), "Launch kernel")) { @@ -537,7 +540,7 @@ } void synchronize() { - check(gpu::Ptx::_cuda_cu_ctx_synchronize(), "Synchronize kernel"); + check(Ptx::_cuda_cu_ctx_synchronize(), "Synchronize kernel"); } void unpin_objects() { @@ -545,7 +548,7 @@ oop obj = _pinned[--_pinned_length]; assert(obj != NULL, "npe"); //tty->print_cr("Unpinning object %d: %p", _pinned_length, obj); - if (!check(gpu::Ptx::_cuda_cu_mem_host_unregister(obj), "Unpin object")) { + if (!check(Ptx::_cuda_cu_mem_host_unregister(obj), "Unpin object")) { return; } } @@ -553,27 +556,27 @@ oop get_object_return_value() { oop return_val; - check(gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, _ret_value, T_OBJECT_BYTE_SIZE), "Copy return value from device"); + check(Ptx::_cuda_cu_memcpy_dtoh(&return_val, _ret_value, T_OBJECT_BYTE_SIZE), "Copy return value from device"); return return_val; } jlong get_primitive_return_value() { jlong return_val; - check(gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, _ret_value, _ret_type_size), "Copy return value from device"); + check(Ptx::_cuda_cu_memcpy_dtoh(&return_val, _ret_value, _ret_type_size), "Copy return value from device"); return return_val; } void free_return_value() { if (_ret_value != 0) { - check(gpu::Ptx::_cuda_cu_memfree(_ret_value), "Free device memory"); + check(Ptx::_cuda_cu_memfree(_ret_value), "Free device memory"); _ret_value = 0; } } void destroy_context() { - if (gpu::Ptx::_device_context != NULL) { - check(gpu::Ptx::_cuda_cu_ctx_destroy(gpu::Ptx::_device_context), "Destroy context"); - gpu::Ptx::_device_context = NULL; + if (Ptx::_device_context != NULL) { + check(Ptx::_cuda_cu_ctx_destroy(Ptx::_device_context), "Destroy context"); + Ptx::_device_context = NULL; } } @@ -666,11 +669,11 @@ } } -GPU_VMENTRY(jlong, gpu::Ptx::get_execute_kernel_from_vm_address, (JNIEnv *env, jclass)) - return (jlong) gpu::Ptx::execute_kernel_from_vm; +GPU_VMENTRY(jlong, Ptx::get_execute_kernel_from_vm_address, (JNIEnv *env, jclass)) + return (jlong) Ptx::execute_kernel_from_vm; GPU_END -JRT_ENTRY(jlong, gpu::Ptx::execute_kernel_from_vm(JavaThread* thread, jlong kernel, jint dimX, jint dimY, jint dimZ, +JRT_ENTRY(jlong, Ptx::execute_kernel_from_vm(JavaThread* thread, jlong kernel, jint dimX, jint dimY, jint dimZ, jlong buffer, jint bufferSize, jint objectParametersCount, @@ -724,7 +727,7 @@ static char const cuda_library_name[] = ""; #endif -bool gpu::Ptx::link() { +bool Ptx::link() { if (cuda_library_name == NULL) { if (TraceGPUInteraction) { tty->print_cr("Failed to find CUDA linkage"); diff -r f3e4f746e9c6 -r 51584f76462d src/gpu/ptx/vm/gpu_ptx.hpp --- a/src/gpu/ptx/vm/gpu_ptx.hpp Thu Feb 06 00:21:10 2014 -0800 +++ b/src/gpu/ptx/vm/gpu_ptx.hpp Thu Feb 06 11:14:19 2014 +0100 @@ -155,12 +155,12 @@ unsigned int, void*, void**, void**); typedef int (*cuda_cu_module_get_function_func_t)(void*, void*, const char*); typedef int (*cuda_cu_module_load_data_ex_func_t)(void*, void*, unsigned int, void*, void**); - typedef int (*cuda_cu_memalloc_func_t)(gpu::Ptx::CUdeviceptr*, size_t); - typedef int (*cuda_cu_memfree_func_t)(gpu::Ptx::CUdeviceptr); - typedef int (*cuda_cu_memcpy_htod_func_t)(gpu::Ptx::CUdeviceptr, const void*, unsigned int); - typedef int (*cuda_cu_memcpy_dtoh_func_t)(const void*, gpu::Ptx::CUdeviceptr, unsigned int); + typedef int (*cuda_cu_memalloc_func_t)(Ptx::CUdeviceptr*, size_t); + typedef int (*cuda_cu_memfree_func_t)(Ptx::CUdeviceptr); + typedef int (*cuda_cu_memcpy_htod_func_t)(Ptx::CUdeviceptr, const void*, unsigned int); + typedef int (*cuda_cu_memcpy_dtoh_func_t)(const void*, Ptx::CUdeviceptr, unsigned int); typedef int (*cuda_cu_mem_host_register_func_t)(void*, size_t, unsigned int); - typedef int (*cuda_cu_mem_host_get_device_pointer_func_t)(gpu::Ptx::CUdeviceptr*, void*, unsigned int); + typedef int (*cuda_cu_mem_host_get_device_pointer_func_t)(Ptx::CUdeviceptr*, void*, unsigned int); typedef int (*cuda_cu_mem_host_unregister_func_t)(void*); public: diff -r f3e4f746e9c6 -r 51584f76462d src/os/bsd/vm/gpu_bsd.cpp --- a/src/os/bsd/vm/gpu_bsd.cpp Thu Feb 06 00:21:10 2014 -0800 +++ b/src/os/bsd/vm/gpu_bsd.cpp Thu Feb 06 11:14:19 2014 +0100 @@ -23,6 +23,8 @@ */ #include "runtime/gpu.hpp" +#include "ptx/vm/gpu_ptx.hpp" +#include "hsail/vm/gpu_hsail.hpp" #include "utilities/ostream.hpp" jobject gpu::probe_gpus(JNIEnv* env) { @@ -31,7 +33,7 @@ * Let the CUDA driver initialization be the gate to GPU for now, pending * a better detection solution for NVIDA PTX and AMD HSAIL. */ - if (gpu::Ptx::register_natives(env)) { + if (Ptx::register_natives(env)) { if (TraceGPUInteraction) { tty->print_cr("Assuming NVidia/PTX support (APPLE)"); } diff -r f3e4f746e9c6 -r 51584f76462d src/os/linux/vm/gpu_linux.cpp --- a/src/os/linux/vm/gpu_linux.cpp Thu Feb 06 00:21:10 2014 -0800 +++ b/src/os/linux/vm/gpu_linux.cpp Thu Feb 06 11:14:19 2014 +0100 @@ -23,6 +23,8 @@ */ #include "runtime/gpu.hpp" +#include "ptx/vm/gpu_ptx.hpp" +#include "hsail/vm/gpu_hsail.hpp" #include "utilities/ostream.hpp" /* @@ -40,7 +42,7 @@ bool hsail = false; bool ptx = false; - if (UseHSAILSimulator && gpu::Hsail::register_natives(env)) { + if (UseHSAILSimulator && Hsail::register_natives(env)) { hsail = true; } @@ -71,7 +73,7 @@ if (TraceGPUInteraction) { tty->print_cr("Found supported nVidia device [vendor=0x%04x, device=0x%04x]", vendor, device); } - if (!ptx && gpu::Ptx::register_natives(env)) { + if (!ptx && Ptx::register_natives(env)) { ptx = true; } } diff -r f3e4f746e9c6 -r 51584f76462d src/os/windows/vm/gpu_windows.cpp --- a/src/os/windows/vm/gpu_windows.cpp Thu Feb 06 00:21:10 2014 -0800 +++ b/src/os/windows/vm/gpu_windows.cpp Thu Feb 06 11:14:19 2014 +0100 @@ -24,11 +24,12 @@ #include "precompiled.hpp" #include "runtime/gpu.hpp" +#include "hsail/vm/gpu_hsail.hpp" #include "utilities/ostream.hpp" jobject gpu::probe_gpus(JNIEnv* env) { // TODO: add detection of PTX/NVidia - if (UseHSAILSimulator && gpu::Hsail::register_natives(env)) { + if (UseHSAILSimulator && Hsail::register_natives(env)) { return env->NewStringUTF("HSAIL"); } return env->NewStringUTF(""); diff -r f3e4f746e9c6 -r 51584f76462d src/share/vm/runtime/gpu.hpp --- a/src/share/vm/runtime/gpu.hpp Thu Feb 06 00:21:10 2014 -0800 +++ b/src/share/vm/runtime/gpu.hpp Thu Feb 06 11:14:19 2014 +0100 @@ -32,21 +32,18 @@ // Defines the interface to the graphics processor(s). class gpu : AllStatic { private: - static int _initialized_gpus; - - // Notifies that a GPU device has been initialized. - static void initialized_gpu(const char* name); + static int _initialized_gpus; // number of initialize GPU devices public: + // Notification of a GPU device that has been initialized. + static void initialized_gpu(const char* name); + // Gets a comma separated list of supported GPU architecture names. static jobject probe_gpus(JNIEnv* env); + // Gets the number of GPU devices that have been initialized. static int initialized_gpus() { return _initialized_gpus; } - -# include "ptx/vm/gpu_ptx.hpp" -# include "hsail/vm/gpu_hsail.hpp" - }; #endif // SHARE_VM_RUNTIME_GPU_HPP