truffle: src/gpu/ptx/vm/ptxKernelArguments.cpp comparison

comparison src/gpu/ptx/vm/ptxKernelArguments.cpp @ 12566:c17bfad2fa98

Merge.

author	Christian Humer <christian.humer@gmail.com>
date	Thu, 24 Oct 2013 15:56:08 +0200
parents	11b086b1bae4
children	1a7e7011a341

comparison

equal deleted inserted replaced

-:e47f373499ec
+:c17bfad2fa98
 gpu::Ptx::cuda_cu_memcpy_htod_func_t gpu::Ptx::_cuda_cu_memcpy_htod;
 // Get next java argument
 oop PTXKernelArguments::next_arg(BasicType expectedType) {
 assert(_index < _args->length(), "out of bounds");
 oop arg = ((objArrayOop) (_args))->obj_at(_index++);
 assert(expectedType == T_OBJECT ||
 java_lang_boxing_object::is_instance(arg, expectedType), "arg type mismatch");
 return arg;
 }
 void PTXKernelArguments::do_int() {
-if (is_after_invocation()) {
+// If the parameter is a return value,
-return;
+if (is_return_type()) {
-}
+if (is_kernel_arg_setup()) {
-// If the parameter is a return value,
+// Allocate device memory for T_INT return value pointer on device. Size in bytes
-if (is_return_type()) {
+int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_INT_BYTE_SIZE);
-// Allocate device memory for T_INT return value pointer on device. Size in bytes
+if (status != GRAAL_CUDA_SUCCESS) {
-int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_INT_BYTE_SIZE);
+tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
-if (status != GRAAL_CUDA_SUCCESS) {
+_success = false;
-tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
+return;
-_success = false;
+}
-return;
+// Push _dev_return_value to _kernelBuffer
-}
+*((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value;
-// Push _return_value_ptr to _kernelBuffer
+}
-*((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
+_bufferOffset += sizeof(_dev_return_value);
-_bufferOffset += sizeof(_return_value_ptr);
+} else {
-} else {
+// Get the next java argument and its value which should be a T_INT
-// Get the next java argument and its value which should be a T_INT
+oop arg = next_arg(T_INT);
-oop arg = next_arg(T_INT);
+// Copy the java argument value to kernelArgBuffer
-// Copy the java argument value to kernelArgBuffer
+jvalue intval;
-jvalue intval;
+if (java_lang_boxing_object::get_value(arg, &intval) != T_INT) {
-if (java_lang_boxing_object::get_value(arg, &intval) != T_INT) {
+tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT");
-tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT");
+_success = false;
-_success = false;
+return;
-return;
+}
-}
+if (is_kernel_arg_setup()) {
 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = intval.i;
-_bufferOffset += sizeof(intval.i);
+}
-}
+// Advance _bufferOffset
-return;
+_bufferOffset += sizeof(intval.i);
+}
+return;
 }
 void PTXKernelArguments::do_float() {
-if (is_after_invocation()) {
+// If the parameter is a return value,
-return;
+if (is_return_type()) {
-}
+if (is_kernel_arg_setup()) {
-// If the parameter is a return value,
+// Allocate device memory for T_INT return value pointer on device. Size in bytes
-if (is_return_type()) {
+int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_FLOAT_BYTE_SIZE);
-// Allocate device memory for T_INT return value pointer on device. Size in bytes
+if (status != GRAAL_CUDA_SUCCESS) {
-int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_FLOAT_BYTE_SIZE);
+tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
-if (status != GRAAL_CUDA_SUCCESS) {
+_success = false;
-tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
+return;
-_success = false;
+}
-return;
+// Push _dev_return_value to _kernelBuffer
-}
+*((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value;
-// Push _return_value_ptr to _kernelBuffer
+}
-*((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
+// Advance _bufferOffset
-_bufferOffset += sizeof(_return_value_ptr);
+_bufferOffset += sizeof(_dev_return_value);
 } else {
-// Get the next java argument and its value which should be a T_INT
+// Get the next java argument and its value which should be a T_FLOAT
 oop arg = next_arg(T_FLOAT);
 // Copy the java argument value to kernelArgBuffer
 jvalue floatval;
 if (java_lang_boxing_object::get_value(arg, &floatval) != T_FLOAT) {
-tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT");
+tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_FLOAT");
 _success = false;
 return;
 }
-*((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = (gpu::Ptx::CUdeviceptr) floatval.f;
+if (is_kernel_arg_setup()) {
-_bufferOffset += sizeof(floatval.f);
+*((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = (gpu::Ptx::CUdeviceptr) floatval.f;
 }
-return;
+// Advance _bufferOffset
+_bufferOffset += sizeof(floatval.f);
+}
+return;
 }
 void PTXKernelArguments::do_double() {
-if (is_after_invocation()) {
+// If the parameter is a return value,
-return;
+jvalue doubleval;
-}
+if (is_return_type()) {
-// If the parameter is a return value,
+if (is_kernel_arg_setup()) {
-jvalue doubleval;
+// Allocate device memory for T_INT return value pointer on device. Size in bytes
-if (is_return_type()) {
+int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_DOUBLE_BYTE_SIZE);
-// Allocate device memory for T_INT return value pointer on device. Size in bytes
+if (status != GRAAL_CUDA_SUCCESS) {
-int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_DOUBLE_BYTE_SIZE);
+tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
-if (status != GRAAL_CUDA_SUCCESS) {
+_success = false;
-tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
+return;
-_success = false;
+}
-return;
+// Push _dev_return_value to _kernelBuffer
-}
+*((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value;
-// Push _return_value_ptr to _kernelBuffer
+}
-*((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
+// Advance _bufferOffset
-// _bufferOffset += sizeof(_return_value_ptr);
+_bufferOffset += sizeof(doubleval.d);
-_bufferOffset += sizeof(doubleval.d);
+} else {
-} else {
+// Get the next java argument and its value which should be a T_INT
-// Get the next java argument and its value which should be a T_INT
+oop arg = next_arg(T_FLOAT);
-oop arg = next_arg(T_FLOAT);
+// Copy the java argument value to kernelArgBuffer
-// Copy the java argument value to kernelArgBuffer
+if (java_lang_boxing_object::get_value(arg, &doubleval) != T_DOUBLE) {
-if (java_lang_boxing_object::get_value(arg, &doubleval) != T_DOUBLE) {
+tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT");
-tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT");
+_success = false;
-_success = false;
+return;
-return;
+}
-}
+if (is_kernel_arg_setup()) {
 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = (gpu::Ptx::CUdeviceptr) doubleval.d;
-_bufferOffset += sizeof(doubleval.d);
+}
-}
+// Advance _bufferOffset
-return;
+_bufferOffset += sizeof(doubleval.d);
+}
+return;
 }
 void PTXKernelArguments::do_long() {
-if (is_after_invocation()) {
+// If the parameter is a return value,
-return;
+if (is_return_type()) {
-}
+if (is_kernel_arg_setup()) {
-// If the parameter is a return value,
+// Allocate device memory for T_LONG return value pointer on device. Size in bytes
-if (is_return_type()) {
+int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_LONG_BYTE_SIZE);
-// Allocate device memory for T_LONG return value pointer on device. Size in bytes
+if (status != GRAAL_CUDA_SUCCESS) {
-int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_LONG_BYTE_SIZE);
+tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
-if (status != GRAAL_CUDA_SUCCESS) {
+_success = false;
-tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
+return;
-_success = false;
+}
-return;
+// Push _dev_return_value to _kernelBuffer
-}
+*((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value;
-// Push _return_value_ptr to _kernelBuffer
+}
-*((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
+// Advance _bufferOffset
-_bufferOffset += sizeof(_return_value_ptr);
+_bufferOffset += sizeof(_dev_return_value);
 } else {
 // Get the next java argument and its value which should be a T_LONG
 oop arg = next_arg(T_LONG);
 // Copy the java argument value to kernelArgBuffer
 jvalue val;
 if (java_lang_boxing_object::get_value(arg, &val) != T_LONG) {
 tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_LONG");
 _success = false;
 return;
 }
-*((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.j;
+if (is_kernel_arg_setup()) {
+*((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.j;
+}
+// Advance _bufferOffset
 _bufferOffset += sizeof(val.j);
 }
 return;
 }
 void PTXKernelArguments::do_byte() {
-if (is_after_invocation()) {
+// If the parameter is a return value,
-return;
+if (is_return_type()) {
-}
+if (is_kernel_arg_setup()) {
-// If the parameter is a return value,
+// Allocate device memory for T_BYTE return value pointer on device. Size in bytes
-if (is_return_type()) {
+int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_BYTE_SIZE);
-// Allocate device memory for T_BYTE return value pointer on device. Size in bytes
+if (status != GRAAL_CUDA_SUCCESS) {
-int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_BYTE_SIZE);
+tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
-if (status != GRAAL_CUDA_SUCCESS) {
+_success = false;
-tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
+return;
-_success = false;
+}
-return;
+// Push _dev_return_value to _kernelBuffer
-}
+*((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value;
-// Push _return_value_ptr to _kernelBuffer
+}
-*((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
+// Advance _bufferOffset
-_bufferOffset += sizeof(_return_value_ptr);
+_bufferOffset += sizeof(_dev_return_value);
 } else {
 // Get the next java argument and its value which should be a T_BYTE
 oop arg = next_arg(T_BYTE);
 // Copy the java argument value to kernelArgBuffer
 jvalue val;
 if (java_lang_boxing_object::get_value(arg, &val) != T_BYTE) {
 tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE");
 _success = false;
 return;
 }
-*((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.b;
+if (is_kernel_arg_setup()) {
-_bufferOffset += sizeof(val.b);
+*((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.b;
 }
-return;
+// Advance _bufferOffset
+_bufferOffset += sizeof(val.b);
+}
+return;
 }
 void PTXKernelArguments::do_bool() {
-if (is_after_invocation()) {
+// If the parameter is a return value,
-return;
+if (is_return_type()) {
-}
+if (is_kernel_arg_setup()) {
-// If the parameter is a return value,
+// Allocate device memory for T_BYTE return value pointer on device. Size in bytes
-if (is_return_type()) {
+int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_BOOLEAN_SIZE);
-// Allocate device memory for T_BYTE return value pointer on device. Size in bytes
+if (status != GRAAL_CUDA_SUCCESS) {
-int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_BOOLEAN_SIZE);
+tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
-if (status != GRAAL_CUDA_SUCCESS) {
+_success = false;
-tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
+return;
-_success = false;
+}
-return;
+// Push _dev_return_value to _kernelBuffer
-}
+*((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value;
-// Push _return_value_ptr to _kernelBuffer
+}
-*((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
+// Advance _bufferOffset
-_bufferOffset += sizeof(_return_value_ptr);
+_bufferOffset += sizeof(_dev_return_value);
 } else {
 // Get the next java argument and its value which should be a T_BYTE
 oop arg = next_arg(T_BYTE);
 // Copy the java argument value to kernelArgBuffer
 jvalue val;
 if (java_lang_boxing_object::get_value(arg, &val) != T_BOOLEAN) {
 tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE");
 _success = false;
 return;
 }
-*((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.z;
+if (is_kernel_arg_setup()) {
-_bufferOffset += sizeof(val.z);
+*((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.z;
 }
-return;
+// Advance _bufferOffset
+_bufferOffset += sizeof(val.z);
+}
+return;
 }
 void PTXKernelArguments::do_array(int begin, int end) {
-gpu::Ptx::CUdeviceptr _array_ptr;
+// Get the next java argument and its value which should be a T_ARRAY
-int status;
+oop arg = next_arg(T_OBJECT);
+assert(arg->is_array(), "argument value not an array");
-// Get the next java argument and its value which should be a T_ARRAY
+// Size of array argument
-oop arg = next_arg(T_OBJECT);
+int argSize = arg->size() * HeapWordSize;
-int array_size = arg->size() * HeapWordSize;
+// Device pointer to array argument.
+gpu::Ptx::CUdeviceptr arrayArgOnDev;
-if (is_after_invocation()) {
+int status;
-_array_ptr = *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]);
-status = gpu::Ptx::_cuda_cu_memcpy_dtoh(arg, _array_ptr, array_size);
+if (is_kernel_arg_setup()) {
-if (status != GRAAL_CUDA_SUCCESS) {
+// Allocate device memory for array argument on device. Size in bytes
-tty->print_cr("[CUDA] *** Error (%d) Failed to copy array argument to host", status);
+status = gpu::Ptx::_cuda_cu_memalloc(&arrayArgOnDev, argSize);
-_success = false;
-return;
-} else {
-// tty->print_cr("device: %x host: %x size: %d", _array_ptr, arg, array_size);
-}
-return;
-}
-// Allocate device memory for T_ARRAY return value pointer on device. Size in bytes
-status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, array_size);
 if (status != GRAAL_CUDA_SUCCESS) {
-tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
+tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for array argument on device",
-_success = false;
+status);
-return;
+_success = false;
-}
+return;
-status = gpu::Ptx::_cuda_cu_memcpy_htod(_return_value_ptr, arg, array_size);
+}
+// Copy array argument to device
+status = gpu::Ptx::_cuda_cu_memcpy_htod(arrayArgOnDev, arg, argSize);
 if (status != GRAAL_CUDA_SUCCESS) {
-tty->print_cr("[CUDA] *** Error (%d) Failed to copy array to device argument", status);
+tty->print_cr("[CUDA] *** Error (%d) Failed to copy array argument content to device memory",
-_success = false;
+status);
-return;
+_success = false;
-} else {
+return;
-// tty->print_cr("host: %x device: %x size: %d", arg, _return_value_ptr, array_size);
+}
-}
-// Push _return_value_ptr to _kernelBuffer
+// Push device array argument to _kernelBuffer
-*((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
+*((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = arrayArgOnDev;
-_bufferOffset += sizeof(_return_value_ptr);
+} else {
-return;
+arrayArgOnDev = *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]);
+status = gpu::Ptx::_cuda_cu_memcpy_dtoh(arg, arrayArgOnDev, argSize);
+if (status != GRAAL_CUDA_SUCCESS) {
+tty->print_cr("[CUDA] *** Error (%d) Failed to copy array argument to host", status);
+_success = false;
+return;
+}
+}
+// Advance _bufferOffset
+_bufferOffset += sizeof(arrayArgOnDev);
+return;
 }
 void PTXKernelArguments::do_void() {
 return;
 }
 // TODO implement other do_*

Mercurial > hg > truffle

comparison src/gpu/ptx/vm/ptxKernelArguments.cpp @ 12566:c17bfad2fa98