truffle: src/gpu/ptx/vm/ptxKernelArguments.cpp comparison

comparison src/gpu/ptx/vm/ptxKernelArguments.cpp @ 11821:d8659ad83fcc

PTX single-threaded array store, Warp annotation

author	Morris Meyer <morris.meyer@oracle.com>
date	Sat, 28 Sep 2013 21:06:12 -0400
parents	91e5f927af63
children	c7abc8411011

comparison

equal deleted inserted replaced

-:aeeab846e98c
+:d8659ad83fcc
 gpu::Ptx::cuda_cu_memcpy_htod_func_t gpu::Ptx::_cuda_cu_memcpy_htod;
 // Get next java argument
 oop PTXKernelArguments::next_arg(BasicType expectedType) {
 assert(_index < _args->length(), "out of bounds");
-oop arg=((objArrayOop) (_args))->obj_at(_index++);
-assert(expectedType == T_OBJECT || java_lang_boxing_object::is_instance(arg, expectedType), "arg type mismatch");
+oop arg = ((objArrayOop) (_args))->obj_at(_index++);
+assert(expectedType == T_OBJECT ||
+java_lang_boxing_object::is_instance(arg, expectedType), "arg type mismatch");
 return arg;
 }
-void PTXKernelArguments::do_int()    {
+void PTXKernelArguments::do_int() {
+if (is_after_invocation()) {
+return;
+}
 // If the parameter is a return value,
 if (is_return_type()) {
 // Allocate device memory for T_INT return value pointer on device. Size in bytes
 int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_INT_BYTE_SIZE);
 if (status != GRAAL_CUDA_SUCCESS) {
 return;
 }
 // Push _return_value_ptr to _kernelBuffer
 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
 _bufferOffset += sizeof(_return_value_ptr);
-}
+} else {
-else {
 // Get the next java argument and its value which should be a T_INT
 oop arg = next_arg(T_INT);
 // Copy the java argument value to kernelArgBuffer
 jvalue intval;
 if (java_lang_boxing_object::get_value(arg, &intval) != T_INT) {
 _bufferOffset += sizeof(intval.i);
 }
 return;
 }
-void PTXKernelArguments::do_long()    {
+void PTXKernelArguments::do_long() {
+if (is_after_invocation()) {
+return;
+}
 // If the parameter is a return value,
 if (is_return_type()) {
 // Allocate device memory for T_LONG return value pointer on device. Size in bytes
 int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_LONG_BYTE_SIZE);
 if (status != GRAAL_CUDA_SUCCESS) {
 return;
 }
 // Push _return_value_ptr to _kernelBuffer
 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
 _bufferOffset += sizeof(_return_value_ptr);
-}
+} else {
-else {
 // Get the next java argument and its value which should be a T_LONG
 oop arg = next_arg(T_LONG);
 // Copy the java argument value to kernelArgBuffer
 jvalue val;
 if (java_lang_boxing_object::get_value(arg, &val) != T_LONG) {
 _bufferOffset += sizeof(val.j);
 }
 return;
 }
-void PTXKernelArguments::do_byte()    {
+void PTXKernelArguments::do_byte() {
-// If the parameter is a return value,
+if (is_after_invocation()) {
-if (is_return_type()) {
+return;
-// Allocate device memory for T_BYTE return value pointer on device. Size in bytes
+}
-int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_BYTE_SIZE);
+// If the parameter is a return value,
+if (is_return_type()) {
+// Allocate device memory for T_BYTE return value pointer on device. Size in bytes
+int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_BYTE_SIZE);
+if (status != GRAAL_CUDA_SUCCESS) {
+tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
+_success = false;
+return;
+}
+// Push _return_value_ptr to _kernelBuffer
+*((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
+_bufferOffset += sizeof(_return_value_ptr);
+} else {
+// Get the next java argument and its value which should be a T_BYTE
+oop arg = next_arg(T_BYTE);
+// Copy the java argument value to kernelArgBuffer
+jvalue val;
+if (java_lang_boxing_object::get_value(arg, &val) != T_BYTE) {
+tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE");
+_success = false;
+return;
+}
+*((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.b;
+_bufferOffset += sizeof(val.b);
+}
+return;
+}
+void PTXKernelArguments::do_array(int begin, int end) {
+gpu::Ptx::CUdeviceptr _array_ptr;
+int status;
+// Get the next java argument and its value which should be a T_ARRAY
+oop arg = next_arg(T_OBJECT);
+int array_size = arg->size() * HeapWordSize;
+if (is_after_invocation()) {
+_array_ptr = *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]);
+status = gpu::Ptx::_cuda_cu_memcpy_dtoh(arg, _array_ptr, array_size);
+if (status != GRAAL_CUDA_SUCCESS) {
+tty->print_cr("[CUDA] *** Error (%d) Failed to copy array argument to host", status);
+_success = false;
+return;
+} else {
+// tty->print_cr("device: %x host: %x size: %d", _array_ptr, arg, array_size);
+}
+return;
+}
+// Allocate device memory for T_ARRAY return value pointer on device. Size in bytes
+status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, array_size);
 if (status != GRAAL_CUDA_SUCCESS) {
 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
 _success = false;
 return;
+}
+status = gpu::Ptx::_cuda_cu_memcpy_htod(_return_value_ptr, arg, array_size);
+if (status != GRAAL_CUDA_SUCCESS) {
+tty->print_cr("[CUDA] *** Error (%d) Failed to copy array to device argument", status);
+_success = false;
+return;
+} else {
+// tty->print_cr("host: %x device: %x size: %d", arg, _return_value_ptr, array_size);
 }
 // Push _return_value_ptr to _kernelBuffer
 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
 _bufferOffset += sizeof(_return_value_ptr);
-}
+return;
-else {
+}
-// Get the next java argument and its value which should be a T_BYTE
-oop arg = next_arg(T_BYTE);
+void PTXKernelArguments::do_void() {
-// Copy the java argument value to kernelArgBuffer
+return;
-jvalue val;
-if (java_lang_boxing_object::get_value(arg, &val) != T_BYTE) {
-tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE");
-_success = false;
-return;
-}
-*((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.b;
-_bufferOffset += sizeof(val.b);
-}
-return;
 }
 // TODO implement other do_*

Mercurial > hg > truffle

comparison src/gpu/ptx/vm/ptxKernelArguments.cpp @ 11821:d8659ad83fcc