truffle: src/gpu/ptx/vm/gpu_ptx.cpp comparison

comparison src/gpu/ptx/vm/gpu_ptx.cpp @ 11589:2afda67175e9

Merge

author	Mick Jordan <mick.jordan@oracle.com>
date	Tue, 03 Sep 2013 16:48:17 -0700
parents	49bb1bc983c6
children	c99e65785936

comparison

equal deleted inserted replaced

-:12f1d5fe0133
+:2afda67175e9
 #include "runtime/gpu.hpp"
 #include "utilities/globalDefinitions.hpp"
 #include "utilities/ostream.hpp"
 #include "memory/allocation.hpp"
 #include "memory/allocation.inline.hpp"
+#include "kernelArguments.hpp"
 void * gpu::Ptx::_device_context;
+int    gpu::Ptx::_cu_device = 0;
 gpu::Ptx::cuda_cu_init_func_t gpu::Ptx::_cuda_cu_init;
 gpu::Ptx::cuda_cu_ctx_create_func_t gpu::Ptx::_cuda_cu_ctx_create;
-gpu::Ptx::cuda_cu_ctx_detach_func_t gpu::Ptx::_cuda_cu_ctx_detach;
+gpu::Ptx::cuda_cu_ctx_destroy_func_t gpu::Ptx::_cuda_cu_ctx_destroy;
 gpu::Ptx::cuda_cu_ctx_synchronize_func_t gpu::Ptx::_cuda_cu_ctx_synchronize;
 gpu::Ptx::cuda_cu_device_get_count_func_t gpu::Ptx::_cuda_cu_device_get_count;
 gpu::Ptx::cuda_cu_device_get_name_func_t gpu::Ptx::_cuda_cu_device_get_name;
 gpu::Ptx::cuda_cu_device_get_func_t gpu::Ptx::_cuda_cu_device_get;
 gpu::Ptx::cuda_cu_device_compute_capability_func_t gpu::Ptx::_cuda_cu_device_compute_capability;
 gpu::Ptx::cuda_cu_device_get_attribute_func_t gpu::Ptx::_cuda_cu_device_get_attribute;
 gpu::Ptx::cuda_cu_launch_kernel_func_t gpu::Ptx::_cuda_cu_launch_kernel;
 gpu::Ptx::cuda_cu_module_get_function_func_t gpu::Ptx::_cuda_cu_module_get_function;
 gpu::Ptx::cuda_cu_module_load_data_ex_func_t gpu::Ptx::_cuda_cu_module_load_data_ex;
+gpu::Ptx::cuda_cu_memcpy_dtoh_func_t gpu::Ptx::_cuda_cu_memcpy_dtoh;
+gpu::Ptx::cuda_cu_memfree_func_t gpu::Ptx::_cuda_cu_memfree;
 void gpu::probe_linkage() {
 #if defined(__APPLE__) || defined(LINUX)
 set_gpu_linkage(gpu::Ptx::probe_linkage());
 #else
 } else {
 return NULL;
 }
 }
-bool gpu::execute_kernel(address kernel, JavaCallArguments * jca) {
+bool gpu::execute_kernel(address kernel, PTXKernelArguments & ptxka, JavaValue& ret) {
 if (gpu::has_gpu_linkage()) {
-return (gpu::Ptx::execute_kernel(kernel, jca));
+return (gpu::Ptx::execute_kernel(kernel, ptxka, ret));
 } else {
 return false;
 }
 }
 }
 /* Get the handle to the first compute device */
 int device_id = 0;
 /* Compute-capable device handle */
-int cu_device = 0;
+status = _cuda_cu_device_get(&_cu_device, device_id);
-status = _cuda_cu_device_get(&cu_device, device_id);
 if (status != GRAAL_CUDA_SUCCESS) {
 tty->print_cr("[CUDA] Failed to get handle of first compute-capable device i.e., the one at ordinal: %d", device_id);
 return false;
 }
 tty->print_cr("[CUDA] Got the handle of first compute-device");
 }
 /* Get device attributes */
 int minor, major, unified_addressing;
-status = _cuda_cu_device_get_attribute(&minor, GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cu_device);
+status = _cuda_cu_device_get_attribute(&minor, GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, _cu_device);
 if (status != GRAAL_CUDA_SUCCESS) {
-tty->print_cr("[CUDA] Failed to get minor attribute of device: %d", cu_device);
+tty->print_cr("[CUDA] Failed to get minor attribute of device: %d", _cu_device);
 return false;
 }
-status = _cuda_cu_device_get_attribute(&major, GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cu_device);
+status = _cuda_cu_device_get_attribute(&major, GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, _cu_device);
 if (status != GRAAL_CUDA_SUCCESS) {
-tty->print_cr("[CUDA] Failed to get major attribute of device: %d", cu_device);
+tty->print_cr("[CUDA] Failed to get major attribute of device: %d", _cu_device);
 return false;
 }
 if (TraceGPUInteraction) {
-tty->print_cr("[CUDA] Compatibility version of device %d: %d.%d", cu_device, major, minor);
+tty->print_cr("[CUDA] Compatibility version of device %d: %d.%d", _cu_device, major, minor);
 }
-status = _cuda_cu_device_get_attribute(&unified_addressing, GRAAL_CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, cu_device);
+status = _cuda_cu_device_get_attribute(&unified_addressing, GRAAL_CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, _cu_device);
 if (status != GRAAL_CUDA_SUCCESS) {
-tty->print_cr("[CUDA] Failed to query unified addressing mode of device: %d", cu_device);
+tty->print_cr("[CUDA] Failed to query unified addressing mode of device: %d", _cu_device);
 return false;
 }
 if (TraceGPUInteraction) {
-tty->print_cr("[CUDA] Unified addressing support on device %d: %d", cu_device, unified_addressing);
+tty->print_cr("[CUDA] Unified addressing support on device %d: %d", _cu_device, unified_addressing);
 }
 /* Get device name */
 char device_name[256];
-status = _cuda_cu_device_get_name(device_name, 256, cu_device);
+status = _cuda_cu_device_get_name(device_name, 256, _cu_device);
 if (status != GRAAL_CUDA_SUCCESS) {
-tty->print_cr("[CUDA] Failed to get name of device: %d", cu_device);
+tty->print_cr("[CUDA] Failed to get name of device: %d", _cu_device);
 return false;
 }
 if (TraceGPUInteraction) {
 tty->print_cr("[CUDA] Using %s", device_name);
-}
-/* Create CUDA context */
-status = _cuda_cu_ctx_create(&_device_context, 0, cu_device);
-if (status != GRAAL_CUDA_SUCCESS) {
-tty->print_cr("[CUDA] Failed to create CUDA context for device: %d", cu_device);
-return false;
-}
-if (TraceGPUInteraction) {
-tty->print_cr("[CUDA] Success: Created context for device: %d", cu_device);
 }
 return true;
 }
 tty->print_cr("[CUDA] PTX Kernel\n%s", code);
 tty->print_cr("[CUDA] Function name : %s", name);
 }
+/* Create CUDA context to compile and execute the kernel */
+int status = _cuda_cu_ctx_create(&_device_context, 0, _cu_device);
+if (status != GRAAL_CUDA_SUCCESS) {
+tty->print_cr("[CUDA] Failed to create CUDA context for device: %d", _cu_device);
+return NULL;
+}
+if (TraceGPUInteraction) {
+tty->print_cr("[CUDA] Success: Created context for device: %d", _cu_device);
+}
 /* Load module's data with compiler options */
-int status = _cuda_cu_module_load_data_ex(&cu_module, code, jit_num_options,
+status = _cuda_cu_module_load_data_ex(&cu_module, (void*) code, jit_num_options,
 jit_options, (void **)jit_option_values);
 if (status != GRAAL_CUDA_SUCCESS) {
 if (status == GRAAL_CUDA_ERROR_NO_BINARY_FOR_GPU) {
 tty->print_cr("[CUDA] Check for malformed PTX kernel or incorrect PTX compilation options");
 }
 }
 if (TraceGPUInteraction) {
 tty->print_cr("[CUDA] Got function handle for %s", name);
 }
 return cu_function;
 }
-bool gpu::Ptx::execute_kernel(address kernel, JavaCallArguments * jca) {
+bool gpu::Ptx::execute_kernel(address kernel, PTXKernelArguments &ptxka, JavaValue &ret) {
 // grid dimensionality
 unsigned int gridX = 1;
 unsigned int gridY = 1;
 unsigned int gridZ = 1;
 // thread dimensionality
 unsigned int blockX = 1;
 unsigned int blockY = 1;
 unsigned int blockZ = 1;
-int *cu_function = (int *)kernel;
+struct CUfunc_st* cu_function = (struct CUfunc_st*) kernel;
-char * paramBuffer = (char *) jca->parameters();
+void * config[5] = {
-size_t paramBufferSz = (size_t) jca->size_of_parameters();
+GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER, ptxka._kernelArgBuffer,
+GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE, &(ptxka._bufferOffset),
-void * config[] = {
-GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER, paramBuffer,
-GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE, &paramBufferSz,
 GRAAL_CU_LAUNCH_PARAM_END
 };
 if (kernel == NULL) {
 return false;
 }
 if (TraceGPUInteraction) {
 tty->print_cr("[CUDA] launching kernel");
 }
 int status = _cuda_cu_launch_kernel(cu_function,
 gridX, gridY, gridZ,
 blockX, blockY, blockZ,
-0, NULL, NULL, config);
+0, NULL, NULL, (void **) &config);
 if (status != GRAAL_CUDA_SUCCESS) {
 tty->print_cr("[CUDA] Failed to launch kernel");
 return false;
 }
 if (TraceGPUInteraction) {
 tty->print_cr("[CUDA] Success: Kernel Launch");
 }
-return status == 0;  // GRAAL_CUDA_SUCCESS
+status = _cuda_cu_ctx_synchronize();
+if (status != GRAAL_CUDA_SUCCESS) {
+tty->print_cr("[CUDA] Failed to synchronize launched kernel (%d)", status);
+return false;
+}
+if (TraceGPUInteraction) {
+tty->print_cr("[CUDA] Success: Synchronized launch kernel");
+}
+// Get the result. TODO: Move this code to get_return_oop()
+BasicType return_type = ptxka.get_ret_type();
+switch (return_type) {
+case T_INT :
+{
+int return_val;
+status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._return_value_ptr, T_INT_BYTE_SIZE);
+if (status != GRAAL_CUDA_SUCCESS) {
+tty->print_cr("[CUDA] *** Error (%d) Failed to copy value to device argument", status);
+return false;
+}
+ret.set_jint(return_val);
+}
+break;
+case T_LONG :
+{
+long return_val;
+status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._return_value_ptr, T_LONG_BYTE_SIZE);
+if (status != GRAAL_CUDA_SUCCESS) {
+tty->print_cr("[CUDA] *** Error (%d) Failed to copy value to device argument", status);
+return false;
+}
+ret.set_jlong(return_val);
+}
+break;
+default:
+tty->print_cr("[CUDA] TODO *** Unhandled return type");
+}
+// Free device memory allocated for result
+status = gpu::Ptx::_cuda_cu_memfree(ptxka._return_value_ptr);
+if (status != GRAAL_CUDA_SUCCESS) {
+tty->print_cr("[CUDA] *** Error (%d) Failed to free device memory of return value", status);
+return false;
+}
+if (TraceGPUInteraction) {
+tty->print_cr("[CUDA] Success: Freed device memory of return value");
+}
+// Destroy context
+status = gpu::Ptx::_cuda_cu_ctx_destroy(_device_context);
+if (status != GRAAL_CUDA_SUCCESS) {
+tty->print_cr("[CUDA] *** Error (%d) Failed to destroy context", status);
+return false;
+}
+if (TraceGPUInteraction) {
+tty->print_cr("[CUDA] Success: Destroy context");
+}
+return (status == GRAAL_CUDA_SUCCESS);
 }
 #if defined(LINUX)
 static const char cuda_library_name[] = "libcuda.so";
 #elif defined(__APPLE__)
 if (handle != NULL) {
 _cuda_cu_init =
 CAST_TO_FN_PTR(cuda_cu_init_func_t, os::dll_lookup(handle, "cuInit"));
 _cuda_cu_ctx_create =
 CAST_TO_FN_PTR(cuda_cu_ctx_create_func_t, os::dll_lookup(handle, "cuCtxCreate"));
-_cuda_cu_ctx_detach =
+_cuda_cu_ctx_destroy =
-CAST_TO_FN_PTR(cuda_cu_ctx_detach_func_t, os::dll_lookup(handle, "cuCtxDetach"));
+CAST_TO_FN_PTR(cuda_cu_ctx_destroy_func_t, os::dll_lookup(handle, "cuCtxDestroy"));
 _cuda_cu_ctx_synchronize =
 CAST_TO_FN_PTR(cuda_cu_ctx_synchronize_func_t, os::dll_lookup(handle, "cuCtxSynchronize"));
 _cuda_cu_device_get_count =
 CAST_TO_FN_PTR(cuda_cu_device_get_count_func_t, os::dll_lookup(handle, "cuDeviceGetCount"));
 _cuda_cu_device_get_name =
 CAST_TO_FN_PTR(cuda_cu_module_get_function_func_t, os::dll_lookup(handle, "cuModuleGetFunction"));
 _cuda_cu_module_load_data_ex =
 CAST_TO_FN_PTR(cuda_cu_module_load_data_ex_func_t, os::dll_lookup(handle, "cuModuleLoadDataEx"));
 _cuda_cu_launch_kernel =
 CAST_TO_FN_PTR(cuda_cu_launch_kernel_func_t, os::dll_lookup(handle, "cuLaunchKernel"));
+_cuda_cu_memalloc =
+CAST_TO_FN_PTR(cuda_cu_memalloc_func_t, os::dll_lookup(handle, "cuMemAlloc"));
+_cuda_cu_memfree =
+CAST_TO_FN_PTR(cuda_cu_memfree_func_t, os::dll_lookup(handle, "cuMemFree"));
+_cuda_cu_memcpy_htod =
+CAST_TO_FN_PTR(cuda_cu_memcpy_htod_func_t, os::dll_lookup(handle, "cuMemcpyHtoD"));
+_cuda_cu_memcpy_dtoh =
+CAST_TO_FN_PTR(cuda_cu_memcpy_dtoh_func_t, os::dll_lookup(handle, "cuMemcpyDtoH"));
 if (TraceGPUInteraction) {
 tty->print_cr("[CUDA] Success: library linkage");
 }
 return true;
 } else {

Mercurial > hg > truffle

comparison src/gpu/ptx/vm/gpu_ptx.cpp @ 11589:2afda67175e9