# HG changeset patch # User S.Bharadwaj Yadavalli # Date 1382466941 14400 # Node ID f020e149c1b6be311be84c8c6950044fbf4a3bd0 # Parent 0916da3633acf54b32aebde0b44ea71c1515da68 PTX codegen enhancements; fixes to PTX test regressions. diff -r 0916da3633ac -r f020e149c1b6 graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java --- a/graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java Tue Oct 22 17:03:01 2013 +0200 +++ b/graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java Tue Oct 22 14:35:41 2013 -0400 @@ -360,18 +360,18 @@ assert var instanceof Variable; assert val instanceof Constant; Constant constant = (Constant) val; - return ("[" + emitRegister((Variable) var, false) + " + " + constant.asBoxedValue() + "]"); + return ("[" + ((space == PTXStateSpace.Parameter) ? emitParameter((Variable) var) : emitRegister((Variable) var, false)) + " + " + constant.asBoxedValue() + "]"); } @Override public String emitRegister(Variable var, boolean comma) { - /* - * if (space == Parameter) { return ("param" + var.index); } else { return ("%r" + - * var.index); } - */ return ("%r" + var.index); } + public String emitParameter(Variable v) { + return ("param" + v.index); + } + public String emit(boolean isLoad) { if (isLoad) { return (space.getStateName() + "." + typeForKind(valueKind) + " " + emitRegister(dest, false) + ", " + emitAddress(source1, source2) + ";"); @@ -671,7 +671,7 @@ } public String emitParameter(Variable v) { - return (" %r" + v.index); + return (" param" + v.index); } public void emit(PTXAssembler asm) { diff -r 0916da3633ac -r f020e149c1b6 graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/GuardNode.java --- a/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/GuardNode.java Tue Oct 22 17:03:01 2013 +0200 +++ b/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/GuardNode.java Tue Oct 22 14:35:41 2013 -0400 @@ -110,5 +110,5 @@ public void setAction(DeoptimizationAction invalidaterecompile) { this.action = invalidaterecompile; + } } -} diff -r 0916da3633ac -r f020e149c1b6 src/gpu/ptx/vm/gpu_ptx.cpp --- a/src/gpu/ptx/vm/gpu_ptx.cpp Tue Oct 22 17:03:01 2013 +0200 +++ b/src/gpu/ptx/vm/gpu_ptx.cpp Tue Oct 22 14:35:41 2013 -0400 @@ -385,7 +385,7 @@ case T_INT: { int return_val; - status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._return_value_ptr, T_INT_BYTE_SIZE); + status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._dev_return_value, T_INT_BYTE_SIZE); if (status != GRAAL_CUDA_SUCCESS) { tty->print_cr("[CUDA] *** Error (%d) Failed to copy value to device argument", status); return false; @@ -396,7 +396,7 @@ case T_BOOLEAN: { int return_val; - status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._return_value_ptr, T_INT_BYTE_SIZE); + status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._dev_return_value, T_INT_BYTE_SIZE); if (status != GRAAL_CUDA_SUCCESS) { tty->print_cr("[CUDA] *** Error (%d) Failed to copy value to device argument", status); return false; @@ -407,7 +407,7 @@ case T_FLOAT: { float return_val; - status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._return_value_ptr, T_FLOAT_BYTE_SIZE); + status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._dev_return_value, T_FLOAT_BYTE_SIZE); if (status != GRAAL_CUDA_SUCCESS) { tty->print_cr("[CUDA] *** Error (%d) Failed to copy value to device argument", status); return false; @@ -418,7 +418,7 @@ case T_DOUBLE: { double return_val; - status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._return_value_ptr, T_DOUBLE_BYTE_SIZE); + status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._dev_return_value, T_DOUBLE_BYTE_SIZE); if (status != GRAAL_CUDA_SUCCESS) { tty->print_cr("[CUDA] *** Error (%d) Failed to copy value to device argument", status); return false; @@ -429,7 +429,7 @@ case T_LONG: { long return_val; - status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._return_value_ptr, T_LONG_BYTE_SIZE); + status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._dev_return_value, T_LONG_BYTE_SIZE); if (status != GRAAL_CUDA_SUCCESS) { tty->print_cr("[CUDA] *** Error (%d) Failed to copy value to device argument", status); return false; @@ -443,11 +443,11 @@ tty->print_cr("[CUDA] TODO *** Unhandled return type: %d", return_type); } - // handle post-invocation object and array arguemtn - ptxka.reiterate(); + // Copy all reference arguments from device to host memory. + ptxka.copyRefArgsFromDtoH(); // Free device memory allocated for result - status = gpu::Ptx::_cuda_cu_memfree(ptxka._return_value_ptr); + status = gpu::Ptx::_cuda_cu_memfree(ptxka._dev_return_value); if (status != GRAAL_CUDA_SUCCESS) { tty->print_cr("[CUDA] *** Error (%d) Failed to free device memory of return value", status); return false; diff -r 0916da3633ac -r f020e149c1b6 src/gpu/ptx/vm/ptxKernelArguments.cpp --- a/src/gpu/ptx/vm/ptxKernelArguments.cpp Tue Oct 22 17:03:01 2013 +0200 +++ b/src/gpu/ptx/vm/ptxKernelArguments.cpp Tue Oct 22 14:35:41 2013 -0400 @@ -32,127 +32,132 @@ // Get next java argument oop PTXKernelArguments::next_arg(BasicType expectedType) { assert(_index < _args->length(), "out of bounds"); - oop arg = ((objArrayOop) (_args))->obj_at(_index++); assert(expectedType == T_OBJECT || java_lang_boxing_object::is_instance(arg, expectedType), "arg type mismatch"); - return arg; } void PTXKernelArguments::do_int() { - if (is_after_invocation()) { + // If the parameter is a return value, + if (is_return_type()) { + if (is_kernel_arg_setup()) { + // Allocate device memory for T_INT return value pointer on device. Size in bytes + int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_INT_BYTE_SIZE); + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); + _success = false; return; + } + // Push _dev_return_value to _kernelBuffer + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value; } - // If the parameter is a return value, - if (is_return_type()) { - // Allocate device memory for T_INT return value pointer on device. Size in bytes - int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_INT_BYTE_SIZE); - if (status != GRAAL_CUDA_SUCCESS) { - tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); - _success = false; - return; - } - // Push _return_value_ptr to _kernelBuffer - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; - _bufferOffset += sizeof(_return_value_ptr); - } else { - // Get the next java argument and its value which should be a T_INT - oop arg = next_arg(T_INT); - // Copy the java argument value to kernelArgBuffer - jvalue intval; - if (java_lang_boxing_object::get_value(arg, &intval) != T_INT) { - tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT"); - _success = false; - return; - } - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = intval.i; - _bufferOffset += sizeof(intval.i); + _bufferOffset += sizeof(_dev_return_value); + } else { + // Get the next java argument and its value which should be a T_INT + oop arg = next_arg(T_INT); + // Copy the java argument value to kernelArgBuffer + jvalue intval; + if (java_lang_boxing_object::get_value(arg, &intval) != T_INT) { + tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT"); + _success = false; + return; } - return; + if (is_kernel_arg_setup()) { + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = intval.i; + } + // Advance _bufferOffset + _bufferOffset += sizeof(intval.i); + } + return; } void PTXKernelArguments::do_float() { - if (is_after_invocation()) { + // If the parameter is a return value, + if (is_return_type()) { + if (is_kernel_arg_setup()) { + // Allocate device memory for T_INT return value pointer on device. Size in bytes + int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_FLOAT_BYTE_SIZE); + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); + _success = false; return; + } + // Push _dev_return_value to _kernelBuffer + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value; } - // If the parameter is a return value, - if (is_return_type()) { - // Allocate device memory for T_INT return value pointer on device. Size in bytes - int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_FLOAT_BYTE_SIZE); - if (status != GRAAL_CUDA_SUCCESS) { - tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); - _success = false; - return; - } - // Push _return_value_ptr to _kernelBuffer - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; - _bufferOffset += sizeof(_return_value_ptr); - } else { - // Get the next java argument and its value which should be a T_INT - oop arg = next_arg(T_FLOAT); - // Copy the java argument value to kernelArgBuffer - jvalue floatval; - if (java_lang_boxing_object::get_value(arg, &floatval) != T_FLOAT) { - tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT"); - _success = false; - return; - } - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = (gpu::Ptx::CUdeviceptr) floatval.f; - _bufferOffset += sizeof(floatval.f); + // Advance _bufferOffset + _bufferOffset += sizeof(_dev_return_value); + } else { + // Get the next java argument and its value which should be a T_FLOAT + oop arg = next_arg(T_FLOAT); + // Copy the java argument value to kernelArgBuffer + jvalue floatval; + if (java_lang_boxing_object::get_value(arg, &floatval) != T_FLOAT) { + tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_FLOAT"); + _success = false; + return; } - return; + if (is_kernel_arg_setup()) { + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = floatval.f; + } + // Advance _bufferOffset + _bufferOffset += sizeof(floatval.f); + } + return; } void PTXKernelArguments::do_double() { - if (is_after_invocation()) { + // If the parameter is a return value, + jvalue doubleval; + if (is_return_type()) { + if (is_kernel_arg_setup()) { + // Allocate device memory for T_INT return value pointer on device. Size in bytes + int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_DOUBLE_BYTE_SIZE); + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); + _success = false; return; + } + // Push _dev_return_value to _kernelBuffer + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value; } - // If the parameter is a return value, - jvalue doubleval; - if (is_return_type()) { - // Allocate device memory for T_INT return value pointer on device. Size in bytes - int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_DOUBLE_BYTE_SIZE); - if (status != GRAAL_CUDA_SUCCESS) { - tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); - _success = false; - return; - } - // Push _return_value_ptr to _kernelBuffer - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; - // _bufferOffset += sizeof(_return_value_ptr); - _bufferOffset += sizeof(doubleval.d); - } else { - // Get the next java argument and its value which should be a T_INT - oop arg = next_arg(T_FLOAT); - // Copy the java argument value to kernelArgBuffer - if (java_lang_boxing_object::get_value(arg, &doubleval) != T_DOUBLE) { - tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT"); - _success = false; - return; - } - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = (gpu::Ptx::CUdeviceptr) doubleval.d; - _bufferOffset += sizeof(doubleval.d); + // Advance _bufferOffset + _bufferOffset += sizeof(doubleval.d); + } else { + // Get the next java argument and its value which should be a T_INT + oop arg = next_arg(T_FLOAT); + // Copy the java argument value to kernelArgBuffer + if (java_lang_boxing_object::get_value(arg, &doubleval) != T_DOUBLE) { + tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT"); + _success = false; + return; } - return; + if (is_kernel_arg_setup()) { + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = doubleval.d; + } + // Advance _bufferOffset + _bufferOffset += sizeof(doubleval.d); + } + return; } void PTXKernelArguments::do_long() { - if (is_after_invocation()) { - return; - } // If the parameter is a return value, if (is_return_type()) { - // Allocate device memory for T_LONG return value pointer on device. Size in bytes - int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_LONG_BYTE_SIZE); - if (status != GRAAL_CUDA_SUCCESS) { - tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); - _success = false; - return; + if (is_kernel_arg_setup()) { + // Allocate device memory for T_LONG return value pointer on device. Size in bytes + int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_LONG_BYTE_SIZE); + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); + _success = false; + return; + } + // Push _dev_return_value to _kernelBuffer + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value; } - // Push _return_value_ptr to _kernelBuffer - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; - _bufferOffset += sizeof(_return_value_ptr); + // Advance _bufferOffset + _bufferOffset += sizeof(_dev_return_value); } else { // Get the next java argument and its value which should be a T_LONG oop arg = next_arg(T_LONG); @@ -163,119 +168,132 @@ _success = false; return; } - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.j; + if (is_kernel_arg_setup()) { + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.j; + } + // Advance _bufferOffset _bufferOffset += sizeof(val.j); } return; } void PTXKernelArguments::do_byte() { - if (is_after_invocation()) { + // If the parameter is a return value, + if (is_return_type()) { + if (is_kernel_arg_setup()) { + // Allocate device memory for T_BYTE return value pointer on device. Size in bytes + int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_BYTE_SIZE); + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); + _success = false; return; + } + // Push _dev_return_value to _kernelBuffer + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value; } - // If the parameter is a return value, - if (is_return_type()) { - // Allocate device memory for T_BYTE return value pointer on device. Size in bytes - int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_BYTE_SIZE); - if (status != GRAAL_CUDA_SUCCESS) { - tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); - _success = false; - return; - } - // Push _return_value_ptr to _kernelBuffer - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; - _bufferOffset += sizeof(_return_value_ptr); - } else { - // Get the next java argument and its value which should be a T_BYTE - oop arg = next_arg(T_BYTE); - // Copy the java argument value to kernelArgBuffer - jvalue val; - if (java_lang_boxing_object::get_value(arg, &val) != T_BYTE) { - tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE"); - _success = false; - return; - } - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.b; - _bufferOffset += sizeof(val.b); + // Advance _bufferOffset + _bufferOffset += sizeof(_dev_return_value); + } else { + // Get the next java argument and its value which should be a T_BYTE + oop arg = next_arg(T_BYTE); + // Copy the java argument value to kernelArgBuffer + jvalue val; + if (java_lang_boxing_object::get_value(arg, &val) != T_BYTE) { + tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE"); + _success = false; + return; } - return; + if (is_kernel_arg_setup()) { + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.b; + } + // Advance _bufferOffset + _bufferOffset += sizeof(val.b); + } + return; } void PTXKernelArguments::do_bool() { - if (is_after_invocation()) { + // If the parameter is a return value, + if (is_return_type()) { + if (is_kernel_arg_setup()) { + // Allocate device memory for T_BYTE return value pointer on device. Size in bytes + int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_BOOLEAN_SIZE); + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); + _success = false; return; + } + // Push _dev_return_value to _kernelBuffer + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value; } - // If the parameter is a return value, - if (is_return_type()) { - // Allocate device memory for T_BYTE return value pointer on device. Size in bytes - int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_BOOLEAN_SIZE); - if (status != GRAAL_CUDA_SUCCESS) { - tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); - _success = false; - return; - } - // Push _return_value_ptr to _kernelBuffer - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; - _bufferOffset += sizeof(_return_value_ptr); - } else { - // Get the next java argument and its value which should be a T_BYTE - oop arg = next_arg(T_BYTE); - // Copy the java argument value to kernelArgBuffer - jvalue val; - if (java_lang_boxing_object::get_value(arg, &val) != T_BOOLEAN) { - tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE"); - _success = false; - return; - } - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.z; - _bufferOffset += sizeof(val.z); + // Advance _bufferOffset + _bufferOffset += sizeof(_dev_return_value); + } else { + // Get the next java argument and its value which should be a T_BYTE + oop arg = next_arg(T_BYTE); + // Copy the java argument value to kernelArgBuffer + jvalue val; + if (java_lang_boxing_object::get_value(arg, &val) != T_BOOLEAN) { + tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE"); + _success = false; + return; } - return; + if (is_kernel_arg_setup()) { + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.z; + } + // Advance _bufferOffset + _bufferOffset += sizeof(val.z); + } + return; } void PTXKernelArguments::do_array(int begin, int end) { - gpu::Ptx::CUdeviceptr _array_ptr; - int status; - - // Get the next java argument and its value which should be a T_ARRAY - oop arg = next_arg(T_OBJECT); - int array_size = arg->size() * HeapWordSize; + // Get the next java argument and its value which should be a T_ARRAY + oop arg = next_arg(T_OBJECT); + assert(arg->is_array(), "argument value not an array"); + // Size of array argument + int argSize = arg->size() * HeapWordSize; + // Device pointer to array argument. + gpu::Ptx::CUdeviceptr arrayArgOnDev; + int status; - if (is_after_invocation()) { - _array_ptr = *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]); - status = gpu::Ptx::_cuda_cu_memcpy_dtoh(arg, _array_ptr, array_size); - if (status != GRAAL_CUDA_SUCCESS) { - tty->print_cr("[CUDA] *** Error (%d) Failed to copy array argument to host", status); - _success = false; - return; - } else { - // tty->print_cr("device: %x host: %x size: %d", _array_ptr, arg, array_size); - } - return; + if (is_kernel_arg_setup()) { + // Allocate device memory for array argument on device. Size in bytes + status = gpu::Ptx::_cuda_cu_memalloc(&arrayArgOnDev, argSize); + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for array argument on device", + status); + _success = false; + return; } - // Allocate device memory for T_ARRAY return value pointer on device. Size in bytes - status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, array_size); - if (status != GRAAL_CUDA_SUCCESS) { - tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); - _success = false; - return; - } - status = gpu::Ptx::_cuda_cu_memcpy_htod(_return_value_ptr, arg, array_size); + // Copy array argument to device + status = gpu::Ptx::_cuda_cu_memcpy_htod(arrayArgOnDev, arg, argSize); if (status != GRAAL_CUDA_SUCCESS) { - tty->print_cr("[CUDA] *** Error (%d) Failed to copy array to device argument", status); - _success = false; - return; - } else { - // tty->print_cr("host: %x device: %x size: %d", arg, _return_value_ptr, array_size); + tty->print_cr("[CUDA] *** Error (%d) Failed to copy array argument content to device memory", + status); + _success = false; + return; } - // Push _return_value_ptr to _kernelBuffer - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; - _bufferOffset += sizeof(_return_value_ptr); - return; + + // Push device array argument to _kernelBuffer + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = arrayArgOnDev; + } else { + arrayArgOnDev = *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]); + status = gpu::Ptx::_cuda_cu_memcpy_dtoh(arg, arrayArgOnDev, argSize); + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] *** Error (%d) Failed to copy array argument to host", status); + _success = false; + return; + } + } + + // Advance _bufferOffset + _bufferOffset += sizeof(arrayArgOnDev); + return; } void PTXKernelArguments::do_void() { - return; + return; } // TODO implement other do_* diff -r 0916da3633ac -r f020e149c1b6 src/gpu/ptx/vm/ptxKernelArguments.hpp --- a/src/gpu/ptx/vm/ptxKernelArguments.hpp Tue Oct 22 17:03:01 2013 +0200 +++ b/src/gpu/ptx/vm/ptxKernelArguments.hpp Tue Oct 22 14:35:41 2013 -0400 @@ -42,7 +42,13 @@ char _kernelArgBuffer[1024]; // Current offset into _kernelArgBuffer size_t _bufferOffset; - gpu::Ptx::CUdeviceptr _return_value_ptr; + // Device pointer holding return value + gpu::Ptx::CUdeviceptr _dev_return_value; + + // Indicates if signature iteration is being done during kernel + // setup i.e., java arguments are being copied to device pointers. + bool _kernelArgSetup; + private: // Array of java argument oops arrayOop _args; @@ -51,7 +57,6 @@ // Flag to indicate successful creation of kernel argument buffer bool _success; - bool _afterInvoocation; // Get next java argument oop next_arg(BasicType expectedType); @@ -62,7 +67,9 @@ _args = args; _success = true; _bufferOffset = 0; - _return_value_ptr = 0; + _dev_return_value = 0; + _kernelArgSetup = true; + //_dev_call_by_reference_args_index = 0; if (!is_static) { // TODO : Create a device argument for receiver object and add it to _kernelBuffer tty->print_cr("{CUDA] ****** TODO: Support for execution of non-static java methods not implemented yet."); @@ -80,23 +87,23 @@ return _bufferOffset; } - void reiterate() { - _afterInvoocation = true; - _bufferOffset = 0; - _index = 0; - iterate(); - } + void copyRefArgsFromDtoH() { + _kernelArgSetup = false; + _bufferOffset = 0; + _index = 0; + iterate(); + } - inline bool is_after_invocation() { - return _afterInvoocation; - } + inline bool is_kernel_arg_setup() { + return _kernelArgSetup; + } // Get the return oop value oop get_return_oop(); // get device return value ptr - gpu::Ptx::CUdeviceptr get_return_value_ptr() { - return _return_value_ptr; + gpu::Ptx::CUdeviceptr get_dev_return_value() { + return _dev_return_value; }