Mercurial > hg > truffle
diff src/gpu/ptx/vm/ptxKernelArguments.cpp @ 12566:c17bfad2fa98
Merge.
author | Christian Humer <christian.humer@gmail.com> |
---|---|
date | Thu, 24 Oct 2013 15:56:08 +0200 |
parents | 11b086b1bae4 |
children | 1a7e7011a341 |
line wrap: on
line diff
--- a/src/gpu/ptx/vm/ptxKernelArguments.cpp Mon Oct 21 11:07:47 2013 +0200 +++ b/src/gpu/ptx/vm/ptxKernelArguments.cpp Thu Oct 24 15:56:08 2013 +0200 @@ -32,127 +32,132 @@ // Get next java argument oop PTXKernelArguments::next_arg(BasicType expectedType) { assert(_index < _args->length(), "out of bounds"); - oop arg = ((objArrayOop) (_args))->obj_at(_index++); assert(expectedType == T_OBJECT || java_lang_boxing_object::is_instance(arg, expectedType), "arg type mismatch"); - return arg; } void PTXKernelArguments::do_int() { - if (is_after_invocation()) { + // If the parameter is a return value, + if (is_return_type()) { + if (is_kernel_arg_setup()) { + // Allocate device memory for T_INT return value pointer on device. Size in bytes + int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_INT_BYTE_SIZE); + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); + _success = false; return; + } + // Push _dev_return_value to _kernelBuffer + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value; } - // If the parameter is a return value, - if (is_return_type()) { - // Allocate device memory for T_INT return value pointer on device. Size in bytes - int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_INT_BYTE_SIZE); - if (status != GRAAL_CUDA_SUCCESS) { - tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); - _success = false; - return; - } - // Push _return_value_ptr to _kernelBuffer - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; - _bufferOffset += sizeof(_return_value_ptr); - } else { - // Get the next java argument and its value which should be a T_INT - oop arg = next_arg(T_INT); - // Copy the java argument value to kernelArgBuffer - jvalue intval; - if (java_lang_boxing_object::get_value(arg, &intval) != T_INT) { - tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT"); - _success = false; - return; - } - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = intval.i; - _bufferOffset += sizeof(intval.i); + _bufferOffset += sizeof(_dev_return_value); + } else { + // Get the next java argument and its value which should be a T_INT + oop arg = next_arg(T_INT); + // Copy the java argument value to kernelArgBuffer + jvalue intval; + if (java_lang_boxing_object::get_value(arg, &intval) != T_INT) { + tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT"); + _success = false; + return; } - return; + if (is_kernel_arg_setup()) { + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = intval.i; + } + // Advance _bufferOffset + _bufferOffset += sizeof(intval.i); + } + return; } void PTXKernelArguments::do_float() { - if (is_after_invocation()) { + // If the parameter is a return value, + if (is_return_type()) { + if (is_kernel_arg_setup()) { + // Allocate device memory for T_INT return value pointer on device. Size in bytes + int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_FLOAT_BYTE_SIZE); + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); + _success = false; return; + } + // Push _dev_return_value to _kernelBuffer + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value; } - // If the parameter is a return value, - if (is_return_type()) { - // Allocate device memory for T_INT return value pointer on device. Size in bytes - int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_FLOAT_BYTE_SIZE); - if (status != GRAAL_CUDA_SUCCESS) { - tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); - _success = false; - return; - } - // Push _return_value_ptr to _kernelBuffer - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; - _bufferOffset += sizeof(_return_value_ptr); - } else { - // Get the next java argument and its value which should be a T_INT - oop arg = next_arg(T_FLOAT); - // Copy the java argument value to kernelArgBuffer - jvalue floatval; - if (java_lang_boxing_object::get_value(arg, &floatval) != T_FLOAT) { - tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT"); - _success = false; - return; - } - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = (gpu::Ptx::CUdeviceptr) floatval.f; - _bufferOffset += sizeof(floatval.f); + // Advance _bufferOffset + _bufferOffset += sizeof(_dev_return_value); + } else { + // Get the next java argument and its value which should be a T_FLOAT + oop arg = next_arg(T_FLOAT); + // Copy the java argument value to kernelArgBuffer + jvalue floatval; + if (java_lang_boxing_object::get_value(arg, &floatval) != T_FLOAT) { + tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_FLOAT"); + _success = false; + return; } - return; + if (is_kernel_arg_setup()) { + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = (gpu::Ptx::CUdeviceptr) floatval.f; + } + // Advance _bufferOffset + _bufferOffset += sizeof(floatval.f); + } + return; } void PTXKernelArguments::do_double() { - if (is_after_invocation()) { + // If the parameter is a return value, + jvalue doubleval; + if (is_return_type()) { + if (is_kernel_arg_setup()) { + // Allocate device memory for T_INT return value pointer on device. Size in bytes + int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_DOUBLE_BYTE_SIZE); + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); + _success = false; return; + } + // Push _dev_return_value to _kernelBuffer + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value; } - // If the parameter is a return value, - jvalue doubleval; - if (is_return_type()) { - // Allocate device memory for T_INT return value pointer on device. Size in bytes - int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_DOUBLE_BYTE_SIZE); - if (status != GRAAL_CUDA_SUCCESS) { - tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); - _success = false; - return; - } - // Push _return_value_ptr to _kernelBuffer - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; - // _bufferOffset += sizeof(_return_value_ptr); - _bufferOffset += sizeof(doubleval.d); - } else { - // Get the next java argument and its value which should be a T_INT - oop arg = next_arg(T_FLOAT); - // Copy the java argument value to kernelArgBuffer - if (java_lang_boxing_object::get_value(arg, &doubleval) != T_DOUBLE) { - tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT"); - _success = false; - return; - } - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = (gpu::Ptx::CUdeviceptr) doubleval.d; - _bufferOffset += sizeof(doubleval.d); + // Advance _bufferOffset + _bufferOffset += sizeof(doubleval.d); + } else { + // Get the next java argument and its value which should be a T_INT + oop arg = next_arg(T_FLOAT); + // Copy the java argument value to kernelArgBuffer + if (java_lang_boxing_object::get_value(arg, &doubleval) != T_DOUBLE) { + tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT"); + _success = false; + return; } - return; + if (is_kernel_arg_setup()) { + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = (gpu::Ptx::CUdeviceptr) doubleval.d; + } + // Advance _bufferOffset + _bufferOffset += sizeof(doubleval.d); + } + return; } void PTXKernelArguments::do_long() { - if (is_after_invocation()) { - return; - } // If the parameter is a return value, if (is_return_type()) { - // Allocate device memory for T_LONG return value pointer on device. Size in bytes - int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_LONG_BYTE_SIZE); - if (status != GRAAL_CUDA_SUCCESS) { - tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); - _success = false; - return; + if (is_kernel_arg_setup()) { + // Allocate device memory for T_LONG return value pointer on device. Size in bytes + int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_LONG_BYTE_SIZE); + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); + _success = false; + return; + } + // Push _dev_return_value to _kernelBuffer + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value; } - // Push _return_value_ptr to _kernelBuffer - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; - _bufferOffset += sizeof(_return_value_ptr); + // Advance _bufferOffset + _bufferOffset += sizeof(_dev_return_value); } else { // Get the next java argument and its value which should be a T_LONG oop arg = next_arg(T_LONG); @@ -163,119 +168,132 @@ _success = false; return; } - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.j; + if (is_kernel_arg_setup()) { + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.j; + } + // Advance _bufferOffset _bufferOffset += sizeof(val.j); } return; } void PTXKernelArguments::do_byte() { - if (is_after_invocation()) { + // If the parameter is a return value, + if (is_return_type()) { + if (is_kernel_arg_setup()) { + // Allocate device memory for T_BYTE return value pointer on device. Size in bytes + int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_BYTE_SIZE); + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); + _success = false; return; + } + // Push _dev_return_value to _kernelBuffer + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value; } - // If the parameter is a return value, - if (is_return_type()) { - // Allocate device memory for T_BYTE return value pointer on device. Size in bytes - int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_BYTE_SIZE); - if (status != GRAAL_CUDA_SUCCESS) { - tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); - _success = false; - return; - } - // Push _return_value_ptr to _kernelBuffer - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; - _bufferOffset += sizeof(_return_value_ptr); - } else { - // Get the next java argument and its value which should be a T_BYTE - oop arg = next_arg(T_BYTE); - // Copy the java argument value to kernelArgBuffer - jvalue val; - if (java_lang_boxing_object::get_value(arg, &val) != T_BYTE) { - tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE"); - _success = false; - return; - } - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.b; - _bufferOffset += sizeof(val.b); + // Advance _bufferOffset + _bufferOffset += sizeof(_dev_return_value); + } else { + // Get the next java argument and its value which should be a T_BYTE + oop arg = next_arg(T_BYTE); + // Copy the java argument value to kernelArgBuffer + jvalue val; + if (java_lang_boxing_object::get_value(arg, &val) != T_BYTE) { + tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE"); + _success = false; + return; } - return; + if (is_kernel_arg_setup()) { + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.b; + } + // Advance _bufferOffset + _bufferOffset += sizeof(val.b); + } + return; } void PTXKernelArguments::do_bool() { - if (is_after_invocation()) { + // If the parameter is a return value, + if (is_return_type()) { + if (is_kernel_arg_setup()) { + // Allocate device memory for T_BYTE return value pointer on device. Size in bytes + int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_BOOLEAN_SIZE); + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); + _success = false; return; + } + // Push _dev_return_value to _kernelBuffer + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value; } - // If the parameter is a return value, - if (is_return_type()) { - // Allocate device memory for T_BYTE return value pointer on device. Size in bytes - int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_BOOLEAN_SIZE); - if (status != GRAAL_CUDA_SUCCESS) { - tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); - _success = false; - return; - } - // Push _return_value_ptr to _kernelBuffer - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; - _bufferOffset += sizeof(_return_value_ptr); - } else { - // Get the next java argument and its value which should be a T_BYTE - oop arg = next_arg(T_BYTE); - // Copy the java argument value to kernelArgBuffer - jvalue val; - if (java_lang_boxing_object::get_value(arg, &val) != T_BOOLEAN) { - tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE"); - _success = false; - return; - } - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.z; - _bufferOffset += sizeof(val.z); + // Advance _bufferOffset + _bufferOffset += sizeof(_dev_return_value); + } else { + // Get the next java argument and its value which should be a T_BYTE + oop arg = next_arg(T_BYTE); + // Copy the java argument value to kernelArgBuffer + jvalue val; + if (java_lang_boxing_object::get_value(arg, &val) != T_BOOLEAN) { + tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE"); + _success = false; + return; } - return; + if (is_kernel_arg_setup()) { + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.z; + } + // Advance _bufferOffset + _bufferOffset += sizeof(val.z); + } + return; } void PTXKernelArguments::do_array(int begin, int end) { - gpu::Ptx::CUdeviceptr _array_ptr; - int status; - - // Get the next java argument and its value which should be a T_ARRAY - oop arg = next_arg(T_OBJECT); - int array_size = arg->size() * HeapWordSize; + // Get the next java argument and its value which should be a T_ARRAY + oop arg = next_arg(T_OBJECT); + assert(arg->is_array(), "argument value not an array"); + // Size of array argument + int argSize = arg->size() * HeapWordSize; + // Device pointer to array argument. + gpu::Ptx::CUdeviceptr arrayArgOnDev; + int status; - if (is_after_invocation()) { - _array_ptr = *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]); - status = gpu::Ptx::_cuda_cu_memcpy_dtoh(arg, _array_ptr, array_size); - if (status != GRAAL_CUDA_SUCCESS) { - tty->print_cr("[CUDA] *** Error (%d) Failed to copy array argument to host", status); - _success = false; - return; - } else { - // tty->print_cr("device: %x host: %x size: %d", _array_ptr, arg, array_size); - } - return; + if (is_kernel_arg_setup()) { + // Allocate device memory for array argument on device. Size in bytes + status = gpu::Ptx::_cuda_cu_memalloc(&arrayArgOnDev, argSize); + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for array argument on device", + status); + _success = false; + return; } - // Allocate device memory for T_ARRAY return value pointer on device. Size in bytes - status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, array_size); - if (status != GRAAL_CUDA_SUCCESS) { - tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); - _success = false; - return; - } - status = gpu::Ptx::_cuda_cu_memcpy_htod(_return_value_ptr, arg, array_size); + // Copy array argument to device + status = gpu::Ptx::_cuda_cu_memcpy_htod(arrayArgOnDev, arg, argSize); if (status != GRAAL_CUDA_SUCCESS) { - tty->print_cr("[CUDA] *** Error (%d) Failed to copy array to device argument", status); - _success = false; - return; - } else { - // tty->print_cr("host: %x device: %x size: %d", arg, _return_value_ptr, array_size); + tty->print_cr("[CUDA] *** Error (%d) Failed to copy array argument content to device memory", + status); + _success = false; + return; } - // Push _return_value_ptr to _kernelBuffer - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; - _bufferOffset += sizeof(_return_value_ptr); - return; + + // Push device array argument to _kernelBuffer + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = arrayArgOnDev; + } else { + arrayArgOnDev = *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]); + status = gpu::Ptx::_cuda_cu_memcpy_dtoh(arg, arrayArgOnDev, argSize); + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] *** Error (%d) Failed to copy array argument to host", status); + _success = false; + return; + } + } + + // Advance _bufferOffset + _bufferOffset += sizeof(arrayArgOnDev); + return; } void PTXKernelArguments::do_void() { - return; + return; } // TODO implement other do_*