Mercurial > hg > graal-jvmci-8
diff src/gpu/ptx/vm/ptxKernelArguments.cpp @ 11821:d8659ad83fcc
PTX single-threaded array store, Warp annotation
author | Morris Meyer <morris.meyer@oracle.com> |
---|---|
date | Sat, 28 Sep 2013 21:06:12 -0400 |
parents | 91e5f927af63 |
children | c7abc8411011 |
line wrap: on
line diff
--- a/src/gpu/ptx/vm/ptxKernelArguments.cpp Fri Sep 27 19:51:01 2013 +0200 +++ b/src/gpu/ptx/vm/ptxKernelArguments.cpp Sat Sep 28 21:06:12 2013 -0400 @@ -32,12 +32,18 @@ // Get next java argument oop PTXKernelArguments::next_arg(BasicType expectedType) { assert(_index < _args->length(), "out of bounds"); - oop arg=((objArrayOop) (_args))->obj_at(_index++); - assert(expectedType == T_OBJECT || java_lang_boxing_object::is_instance(arg, expectedType), "arg type mismatch"); + + oop arg = ((objArrayOop) (_args))->obj_at(_index++); + assert(expectedType == T_OBJECT || + java_lang_boxing_object::is_instance(arg, expectedType), "arg type mismatch"); + return arg; } -void PTXKernelArguments::do_int() { +void PTXKernelArguments::do_int() { + if (is_after_invocation()) { + return; + } // If the parameter is a return value, if (is_return_type()) { // Allocate device memory for T_INT return value pointer on device. Size in bytes @@ -50,8 +56,7 @@ // Push _return_value_ptr to _kernelBuffer *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; _bufferOffset += sizeof(_return_value_ptr); - } - else { + } else { // Get the next java argument and its value which should be a T_INT oop arg = next_arg(T_INT); // Copy the java argument value to kernelArgBuffer @@ -67,7 +72,10 @@ return; } -void PTXKernelArguments::do_long() { +void PTXKernelArguments::do_long() { + if (is_after_invocation()) { + return; + } // If the parameter is a return value, if (is_return_type()) { // Allocate device memory for T_LONG return value pointer on device. Size in bytes @@ -80,8 +88,7 @@ // Push _return_value_ptr to _kernelBuffer *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; _bufferOffset += sizeof(_return_value_ptr); - } - else { + } else { // Get the next java argument and its value which should be a T_LONG oop arg = next_arg(T_LONG); // Copy the java argument value to kernelArgBuffer @@ -97,34 +104,81 @@ return; } -void PTXKernelArguments::do_byte() { - // If the parameter is a return value, - if (is_return_type()) { - // Allocate device memory for T_BYTE return value pointer on device. Size in bytes - int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_BYTE_SIZE); +void PTXKernelArguments::do_byte() { + if (is_after_invocation()) { + return; + } + // If the parameter is a return value, + if (is_return_type()) { + // Allocate device memory for T_BYTE return value pointer on device. Size in bytes + int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_BYTE_SIZE); + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); + _success = false; + return; + } + // Push _return_value_ptr to _kernelBuffer + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; + _bufferOffset += sizeof(_return_value_ptr); + } else { + // Get the next java argument and its value which should be a T_BYTE + oop arg = next_arg(T_BYTE); + // Copy the java argument value to kernelArgBuffer + jvalue val; + if (java_lang_boxing_object::get_value(arg, &val) != T_BYTE) { + tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE"); + _success = false; + return; + } + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.b; + _bufferOffset += sizeof(val.b); + } + return; +} + +void PTXKernelArguments::do_array(int begin, int end) { + gpu::Ptx::CUdeviceptr _array_ptr; + int status; + + // Get the next java argument and its value which should be a T_ARRAY + oop arg = next_arg(T_OBJECT); + int array_size = arg->size() * HeapWordSize; + + if (is_after_invocation()) { + _array_ptr = *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]); + status = gpu::Ptx::_cuda_cu_memcpy_dtoh(arg, _array_ptr, array_size); + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] *** Error (%d) Failed to copy array argument to host", status); + _success = false; + return; + } else { + // tty->print_cr("device: %x host: %x size: %d", _array_ptr, arg, array_size); + } + return; + } + // Allocate device memory for T_ARRAY return value pointer on device. Size in bytes + status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, array_size); if (status != GRAAL_CUDA_SUCCESS) { - tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); - _success = false; - return; + tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); + _success = false; + return; + } + status = gpu::Ptx::_cuda_cu_memcpy_htod(_return_value_ptr, arg, array_size); + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] *** Error (%d) Failed to copy array to device argument", status); + _success = false; + return; + } else { + // tty->print_cr("host: %x device: %x size: %d", arg, _return_value_ptr, array_size); } // Push _return_value_ptr to _kernelBuffer *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; _bufferOffset += sizeof(_return_value_ptr); - } - else { - // Get the next java argument and its value which should be a T_BYTE - oop arg = next_arg(T_BYTE); - // Copy the java argument value to kernelArgBuffer - jvalue val; - if (java_lang_boxing_object::get_value(arg, &val) != T_BYTE) { - tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE"); - _success = false; - return; - } - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.b; - _bufferOffset += sizeof(val.b); - } - return; + return; +} + +void PTXKernelArguments::do_void() { + return; } // TODO implement other do_*