# HG changeset patch # User Morris Meyer # Date 1380983478 14400 # Node ID c7abc84110112d4216c6f43ccb26e37802588dae # Parent ec267141f753dab40bb4c8a05d9ca93cd4863f46 Fixed BasicPTXTest and IntegerPTXTest diff -r ec267141f753 -r c7abc8411011 graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java --- a/graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java Fri Oct 04 11:56:18 2013 +0200 +++ b/graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java Sat Oct 05 10:31:18 2013 -0400 @@ -185,7 +185,7 @@ assert v != null; if (isConstant(v)) { - return (emitConstant(v)); + return (emitConstant(v, comma)); } else { return (emitRegister((Variable) v, comma)); } @@ -195,21 +195,31 @@ return (" %r" + v.index + (comma ? "," : "")); } - public String emitConstant(Value v) { + public String emitConstant(Value v, boolean comma) { Constant constant = (Constant) v; + String str = null; switch (v.getKind().getTypeChar()) { case 'i': - return (String.valueOf((int) constant.asLong())); + str = String.valueOf((int) constant.asLong()); + break; case 'f': - return (String.valueOf(constant.asFloat())); + str = String.valueOf(constant.asFloat()); + break; case 'j': - return (String.valueOf(constant.asLong())); + str = String.valueOf(constant.asLong()); + break; case 'd': - return (String.valueOf(constant.asDouble())); + str = String.valueOf(constant.asDouble()); + break; default: throw GraalInternalError.shouldNotReachHere(); } + if (comma) { + return (str + ","); + } else { + return str; + } } } @@ -581,8 +591,33 @@ } public void emit(PTXAssembler asm) { - asm.emitString(".param ." + typeForKind(dest.getKind()) + emitParameter(dest) + (lastParameter ? "" : ",")); + asm.emitString(".param ." + paramForKind(dest.getKind()) + emitParameter(dest) + (lastParameter ? "" : ",")); } + + public String paramForKind(Kind k) { + switch (k.getTypeChar()) { + case 'z': + case 'f': + return "s32"; + case 'b': + return "s8"; + case 's': + return "s16"; + case 'c': + return "u16"; + case 'i': + return "s32"; + case 'j': + return "s64"; + case 'd': + return "f64"; + case 'a': + return "u64"; + default: + throw GraalInternalError.shouldNotReachHere(); + } + } + } public final void popc_b32(Register d, Register a) { diff -r ec267141f753 -r c7abc8411011 graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/BasicPTXTest.java --- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/BasicPTXTest.java Fri Oct 04 11:56:18 2013 +0200 +++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/BasicPTXTest.java Sat Oct 05 10:31:18 2013 -0400 @@ -30,7 +30,6 @@ /** * Test class for small Java methods compiled to PTX kernels. */ -@Ignore public class BasicPTXTest extends PTXTestBase { @Test diff -r ec267141f753 -r c7abc8411011 graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/FloatPTXTest.java --- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/FloatPTXTest.java Fri Oct 04 11:56:18 2013 +0200 +++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/FloatPTXTest.java Sat Oct 05 10:31:18 2013 -0400 @@ -29,7 +29,6 @@ import com.oracle.graal.api.code.CompilationResult; /* PTX ISA 3.1 - 8.7.3 Floating-Point Instructions */ -@Ignore public class FloatPTXTest extends PTXTestBase { @Ignore diff -r ec267141f753 -r c7abc8411011 graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/IntegerPTXTest.java --- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/IntegerPTXTest.java Fri Oct 04 11:56:18 2013 +0200 +++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/IntegerPTXTest.java Sat Oct 05 10:31:18 2013 -0400 @@ -30,8 +30,8 @@ @Test public void testAdd() { - /* - Integer r4 = (Integer) invoke(compile("testAdd2B"), (byte) 6, (byte) 4); + + /* Integer r4 = (Integer) invoke(compile("testAdd2B"), (byte) 6, (byte) 4); if (r4 == null) { printReport("testAdd2B FAILED"); } else if (r4.intValue() == testAdd2B((byte) 6, (byte) 4)) { @@ -49,14 +49,14 @@ printReport("testAdd2I FAILED"); } - /* Long r2 = (Long) invoke(compile("testAdd2L"), (long) 12, (long) 6); + Long r2 = (Long) invoke(compile("testAdd2L"), (long) 12, (long) 6); if (r2 == null) { printReport("testAdd2L FAILED"); } else if (r2.longValue() == testAdd2L(12, 6)) { printReport("testAdd2L PASSED"); } else { printReport("testAdd2L FAILED"); - } + } r4 = (Integer) invoke(compile("testAddIConst"), 5); if (r4 == null) { @@ -74,7 +74,7 @@ printReport("testAddConstI PASSED"); } else { printReport("testAddConstI FAILED"); - } */ + } } public static int testAdd2I(int a, int b) { @@ -97,7 +97,6 @@ return 32 + a; } - @Ignore @Test public void testSub() { @@ -155,7 +154,6 @@ return 32 - a; } - @Ignore @Test public void testMul() { diff -r ec267141f753 -r c7abc8411011 src/gpu/ptx/vm/gpu_ptx.cpp --- a/src/gpu/ptx/vm/gpu_ptx.cpp Fri Oct 04 11:56:18 2013 +0200 +++ b/src/gpu/ptx/vm/gpu_ptx.cpp Sat Oct 05 10:31:18 2013 -0400 @@ -180,9 +180,58 @@ int total = nmp * ncores(major, minor); + int max_threads_per_block, warp_size, async_engines, can_map_host_memory, concurrent_kernels; + + status = _cuda_cu_device_get_attribute(&max_threads_per_block, + GRAAL_CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, + _cu_device); + + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] Failed to get GRAAL_CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK: %d", _cu_device); + return 0; + } + + status = _cuda_cu_device_get_attribute(&warp_size, + GRAAL_CU_DEVICE_ATTRIBUTE_WARP_SIZE, + _cu_device); + + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] Failed to get GRAAL_CU_DEVICE_ATTRIBUTE_WARP_SIZE: %d", _cu_device); + return 0; + } + + status = _cuda_cu_device_get_attribute(&async_engines, + GRAAL_CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT, + _cu_device); + + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] Failed to get GRAAL_CU_DEVICE_ATTRIBUTE_WARP_SIZE: %d", _cu_device); + return 0; + } + + status = _cuda_cu_device_get_attribute(&can_map_host_memory, + GRAAL_CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, + _cu_device); + + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] Failed to get GRAAL_CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY: %d", _cu_device); + return 0; + } + + status = _cuda_cu_device_get_attribute(&concurrent_kernels, + GRAAL_CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS, + _cu_device); + + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] Failed to get GRAAL_CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS: %d", _cu_device); + return 0; + } + if (TraceGPUInteraction) { tty->print_cr("[CUDA] Compatibility version of device %d: %d.%d", _cu_device, major, minor); - tty->print_cr("[CUDA] Number of cores: %d", total); + tty->print_cr("[CUDA] Number of cores: %d async engines: %d can map host mem: %d concurrent kernels: %d", + total, async_engines, can_map_host_memory, concurrent_kernels); + tty->print_cr("[CUDA] Max threads per block: %d warp size: %d", max_threads_per_block, warp_size); } return (total); @@ -344,6 +393,17 @@ ret.set_jint(return_val); } break; + case T_FLOAT: + { + float return_val; + status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._return_value_ptr, T_FLOAT_BYTE_SIZE); + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] *** Error (%d) Failed to copy value to device argument", status); + return false; + } + ret.set_jfloat(return_val); + } + break; case T_LONG: { long return_val; diff -r ec267141f753 -r c7abc8411011 src/gpu/ptx/vm/gpu_ptx.hpp --- a/src/gpu/ptx/vm/gpu_ptx.hpp Fri Oct 04 11:56:18 2013 +0200 +++ b/src/gpu/ptx/vm/gpu_ptx.hpp Sat Oct 05 10:31:18 2013 -0400 @@ -31,10 +31,15 @@ */ #define GRAAL_CUDA_SUCCESS 0 /**< Device shares a unified address space with the host */ +#define GRAAL_CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK 1 #define GRAAL_CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING 41 #define GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR 75 #define GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR 76 #define GRAAL_CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT 16 +#define GRAAL_CU_DEVICE_ATTRIBUTE_WARP_SIZE 10 +#define GRAAL_CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY 19 +#define GRAAL_CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS 31 +#define GRAAL_CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT 40 #define GRAAL_CU_JIT_MAX_REGISTERS 0 #define GRAAL_CU_JIT_THREADS_PER_BLOCK 1 #define GRAAL_CU_JIT_INFO_LOG_BUFFER 3 diff -r ec267141f753 -r c7abc8411011 src/gpu/ptx/vm/ptxKernelArguments.cpp --- a/src/gpu/ptx/vm/ptxKernelArguments.cpp Fri Oct 04 11:56:18 2013 +0200 +++ b/src/gpu/ptx/vm/ptxKernelArguments.cpp Sat Oct 05 10:31:18 2013 -0400 @@ -41,35 +41,67 @@ } void PTXKernelArguments::do_int() { - if (is_after_invocation()) { - return; - } - // If the parameter is a return value, - if (is_return_type()) { - // Allocate device memory for T_INT return value pointer on device. Size in bytes - int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_INT_BYTE_SIZE); - if (status != GRAAL_CUDA_SUCCESS) { - tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); - _success = false; - return; + if (is_after_invocation()) { + return; + } + // If the parameter is a return value, + if (is_return_type()) { + // Allocate device memory for T_INT return value pointer on device. Size in bytes + int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_INT_BYTE_SIZE); + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); + _success = false; + return; + } + // Push _return_value_ptr to _kernelBuffer + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; + _bufferOffset += sizeof(_return_value_ptr); + } else { + // Get the next java argument and its value which should be a T_INT + oop arg = next_arg(T_INT); + // Copy the java argument value to kernelArgBuffer + jvalue intval; + if (java_lang_boxing_object::get_value(arg, &intval) != T_INT) { + tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT"); + _success = false; + return; + } + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = intval.i; + _bufferOffset += sizeof(intval.i); } - // Push _return_value_ptr to _kernelBuffer - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; - _bufferOffset += sizeof(_return_value_ptr); - } else { - // Get the next java argument and its value which should be a T_INT - oop arg = next_arg(T_INT); - // Copy the java argument value to kernelArgBuffer - jvalue intval; - if (java_lang_boxing_object::get_value(arg, &intval) != T_INT) { - tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT"); - _success = false; - return; + return; +} + +void PTXKernelArguments::do_float() { + if (is_after_invocation()) { + return; } - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = intval.i; - _bufferOffset += sizeof(intval.i); - } - return; + // If the parameter is a return value, + if (is_return_type()) { + // Allocate device memory for T_INT return value pointer on device. Size in bytes + int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_FLOAT_BYTE_SIZE); + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); + _success = false; + return; + } + // Push _return_value_ptr to _kernelBuffer + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; + _bufferOffset += sizeof(_return_value_ptr); + } else { + // Get the next java argument and its value which should be a T_INT + oop arg = next_arg(T_FLOAT); + // Copy the java argument value to kernelArgBuffer + jvalue floatval; + if (java_lang_boxing_object::get_value(arg, &floatval) != T_FLOAT) { + tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT"); + _success = false; + return; + } + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = floatval.f; + _bufferOffset += sizeof(floatval.f); + } + return; } void PTXKernelArguments::do_long() { diff -r ec267141f753 -r c7abc8411011 src/gpu/ptx/vm/ptxKernelArguments.hpp --- a/src/gpu/ptx/vm/ptxKernelArguments.hpp Fri Oct 04 11:56:18 2013 +0200 +++ b/src/gpu/ptx/vm/ptxKernelArguments.hpp Sat Oct 05 10:31:18 2013 -0400 @@ -30,6 +30,7 @@ #define T_BYTE_SIZE 1 #define T_INT_BYTE_SIZE 4 +#define T_FLOAT_BYTE_SIZE 4 #define T_LONG_BYTE_SIZE 8 #define T_ARRAY_BYTE_SIZE 8 @@ -99,6 +100,7 @@ void do_byte(); void do_int(); + void do_float(); void do_long(); void do_array(int begin, int end); void do_void(); @@ -115,10 +117,6 @@ /* TODO : To be implemented */ guarantee(false, "do_short:NYI"); } - inline void do_float() { - /* TODO : To be implemented */ - guarantee(false, "do_float:NYI"); - } inline void do_double() { /* TODO : To be implemented */ guarantee(false, "do_double:NYI");