# HG changeset patch # User Morris Meyer # Date 1381111643 14400 # Node ID 67a1e27a8dbb0945dc974b3ee3d8ac8af04743d3 # Parent 61767ccd4600af886a36723048e29dc1f37e02e4 PTX initial float and double diff -r 61767ccd4600 -r 67a1e27a8dbb graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java --- a/graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java Sun Oct 06 18:15:56 2013 -0400 +++ b/graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java Sun Oct 06 22:07:23 2013 -0400 @@ -22,6 +22,7 @@ */ package com.oracle.graal.asm.ptx; +import static com.oracle.graal.asm.ptx.PTXStateSpace.*; import static com.oracle.graal.api.code.ValueUtil.*; import com.oracle.graal.asm.Label; @@ -536,7 +537,13 @@ } public void emit(PTXAssembler asm) { - asm.emitString("cvt." + super.emit()); + if (dest.getKind() == Kind.Float || + dest.getKind() == Kind.Double) { + // round-to-zero - might not be right + asm.emitString("cvt.rz." + super.emit()); + } else { + asm.emitString("cvt." + super.emit()); + } } } diff -r 61767ccd4600 -r 67a1e27a8dbb graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/FloatPTXTest.java --- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/FloatPTXTest.java Sun Oct 06 18:15:56 2013 -0400 +++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/FloatPTXTest.java Sun Oct 06 22:07:23 2013 -0400 @@ -31,41 +31,44 @@ /* PTX ISA 3.1 - 8.7.3 Floating-Point Instructions */ public class FloatPTXTest extends PTXTestBase { - @Ignore @Test public void testAdd() { - CompilationResult r = compile("testAdd2F"); - if (r.getTargetCode() == null) { - printReport("Compilation of testAdd2F FAILED"); + Float ret = (Float) invoke(compile("testAdd2I"), 42, 43); + if (ret != null) { + printReport("testAdd2I: " + ret); + } else { + printReport("testAdd2I: no VALUE"); } - /* - r = compile("testAdd2D"); - if (r.getTargetCode() == null) { - printReport("Compilation of testAdd2D FAILED"); + ret = (Float) invoke(compile("testAdd2F"), 42.1F, 43.5F); + if (ret != null) { + printReport("testAdd2F: " + ret); + } else { + printReport("testAdd2F: no VALUE"); } - r = compile("testAddFConst"); - if (r.getTargetCode() == null) { - printReport("Compilation of testAddFConst FAILED"); - } - r = compile("testAddConstF"); - if (r.getTargetCode() == null) { - printReport("Compilation of testConstF FAILED"); + ret = (Float) invoke(compile("testAddFConst"), 42.1F); + if (ret != null) { + printReport("testAddFConst: " + ret); + } else { + printReport("testAddFConst: no VALUE"); } - r = compile("testAddDConst"); - if (r.getTargetCode() == null) { - printReport("Compilation of testAddDConst FAILED"); + + Double dret = (Double) invoke(compile("testAdd2D"), 42.1, 43.5); + if (dret != null) { + printReport("testAdd2D: " + dret); + } else { + printReport("testAdd2D: no VALUE"); } - r = compile("testAddConstD"); - if (r.getTargetCode() == null) { - printReport("Compilation of testConstD FAILED"); - } - */ + + } + + public static float testAdd2I(int a, int b) { + return (float) (a + b); } public static float testAdd2F(float a, float b) { - return a + b; + return (a + b); } public static double testAdd2D(double a, double b) { diff -r 61767ccd4600 -r 67a1e27a8dbb graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXBackend.java --- a/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXBackend.java Sun Oct 06 18:15:56 2013 -0400 +++ b/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXBackend.java Sun Oct 06 22:07:23 2013 -0400 @@ -85,7 +85,8 @@ } @Override - public TargetMethodAssembler newAssembler(LIRGenerator lirGen, CompilationResult compilationResult) { + public TargetMethodAssembler newAssembler(LIRGenerator lirGen, + CompilationResult compilationResult) { // Omit the frame of the method: // - has no spill slots or other slots allocated during register allocation // - has no callee-saved registers @@ -99,7 +100,9 @@ return tasm; } - private static void emitKernelEntry(TargetMethodAssembler tasm, LIRGenerator lirGen, ResolvedJavaMethod codeCacheOwner) { + private static void emitKernelEntry(TargetMethodAssembler tasm, + LIRGenerator lirGen, + ResolvedJavaMethod codeCacheOwner) { // Emit PTX kernel entry text based on PTXParameterOp // instructions in the start block. Remove the instructions // once kernel entry text and directives are emitted to @@ -109,8 +112,8 @@ Buffer codeBuffer = tasm.asm.codeBuffer; // Emit initial boiler-plate directives. - codeBuffer.emitString(".version 2.1"); - codeBuffer.emitString(".target sm_20"); + codeBuffer.emitString(".version 3.0"); + codeBuffer.emitString(".target sm_30"); codeBuffer.emitString0(".entry " + name + " ("); codeBuffer.emitString(""); @@ -140,9 +143,13 @@ } // Emit .reg space declarations - private static void emitRegisterDecl(TargetMethodAssembler tasm, LIRGenerator lirGen, + private static void emitRegisterDecl(TargetMethodAssembler tasm, + LIRGenerator lirGen, ResolvedJavaMethod codeCacheOwner) { - assert codeCacheOwner != null : lirGen.getGraph() + " is not associated with a method"; + + assert codeCacheOwner != null : + lirGen.getGraph() + " is not associated with a method"; + Buffer codeBuffer = tasm.asm.codeBuffer; final SortedSet signed32 = new TreeSet<>(); diff -r 61767ccd4600 -r 67a1e27a8dbb graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXLIRGenerator.java --- a/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXLIRGenerator.java Sun Oct 06 18:15:56 2013 -0400 +++ b/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXLIRGenerator.java Sun Oct 06 22:07:23 2013 -0400 @@ -912,24 +912,46 @@ throw GraalInternalError.unimplemented("PTXLIRGenerator.visitInfopointNode()"); } - public Variable emitLoadParam(Kind kind, Value address, DeoptimizingNode deopting) { + public Variable emitLoadParam(Kind kind, Value address, + DeoptimizingNode deopting) { + PTXAddressValue loadAddress = asAddress(address); Variable result = newVariable(kind); - append(new LoadParamOp(kind, result, loadAddress, deopting != null ? state(deopting) : null)); + append(new LoadParamOp(kind, result, loadAddress, + deopting != null ? state(deopting) : null)); + return result; } - public Variable emitLoadReturnAddress(Kind kind, Value address, DeoptimizingNode deopting) { + public Variable emitLoadReturnAddress(Kind kind, Value address, + DeoptimizingNode deopting) { + PTXAddressValue loadAddress = asAddress(address); - Variable result = newVariable(kind); - append(new LoadReturnAddrOp(kind, result, loadAddress, deopting != null ? state(deopting) : null)); + Variable result; + switch (kind) { + case Float: + result = newVariable(Kind.Int); + break; + case Double: + result = newVariable(Kind.Long); + break; + default: + result = newVariable(kind); + + } + append(new LoadReturnAddrOp(kind, result, loadAddress, + deopting != null ? state(deopting) : null)); + return result; } - public void emitStoreReturnValue(Kind kind, Value address, Value inputVal, DeoptimizingNode deopting) { + public void emitStoreReturnValue(Kind kind, Value address, Value inputVal, + DeoptimizingNode deopting) { + PTXAddressValue storeAddress = asAddress(address); Variable input = load(inputVal); - append(new StoreReturnValOp(kind, storeAddress, input, deopting != null ? state(deopting) : null)); + append(new StoreReturnValOp(kind, storeAddress, input, + deopting != null ? state(deopting) : null)); } @Override diff -r 61767ccd4600 -r 67a1e27a8dbb graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXTargetMethodAssembler.java --- a/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXTargetMethodAssembler.java Sun Oct 06 18:15:56 2013 -0400 +++ b/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXTargetMethodAssembler.java Sun Oct 06 22:07:23 2013 -0400 @@ -33,10 +33,13 @@ public class PTXTargetMethodAssembler extends TargetMethodAssembler { - private static CompilerToGPU toGPU = HotSpotGraalRuntime.graalRuntime().getCompilerToGPU(); + private static CompilerToGPU toGPU = + HotSpotGraalRuntime.graalRuntime().getCompilerToGPU(); + private static boolean validDevice = toGPU.deviceInit(); - private static final int totalProcessors = (validDevice ? toGPU.availableProcessors() : 0); + private static final int totalProcessors = + (validDevice ? toGPU.availableProcessors() : 0); public static int getAvailableProcessors() { return totalProcessors; @@ -44,8 +47,12 @@ // detach ?? - public PTXTargetMethodAssembler(TargetDescription target, CodeCacheProvider runtime, FrameMap frameMap, - AbstractAssembler asm, FrameContext frameContext, CompilationResult compilationResult) { + public PTXTargetMethodAssembler(TargetDescription target, + CodeCacheProvider runtime, + FrameMap frameMap, + AbstractAssembler asm, + FrameContext frameContext, + CompilationResult compilationResult) { super(target, runtime, frameMap, asm, frameContext, compilationResult); } @@ -53,11 +60,14 @@ public CompilationResult finishTargetMethod(StructuredGraph graph) { ResolvedJavaMethod method = graph.method(); assert method != null : graph + " is not associated wth a method"; - ExternalCompilationResult graalCompile = (ExternalCompilationResult) super.finishTargetMethod(graph); + + ExternalCompilationResult graalCompile = + (ExternalCompilationResult) super.finishTargetMethod(graph); try { if ((validDevice) && (graalCompile.getTargetCode() != null)) { - long kernel = toGPU.generateKernel(graalCompile.getTargetCode(), method.getName()); + long kernel = toGPU.generateKernel(graalCompile.getTargetCode(), + method.getName()); graalCompile.setEntryPoint(kernel); } } catch (Throwable th) { diff -r 61767ccd4600 -r 67a1e27a8dbb graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotRegisterConfig.java --- a/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotRegisterConfig.java Sun Oct 06 18:15:56 2013 -0400 +++ b/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotRegisterConfig.java Sun Oct 06 22:07:23 2013 -0400 @@ -104,7 +104,14 @@ int currentStackOffset = 0; Kind returnKind = returnType == null ? Kind.Void : returnType.getKind(); - AllocatableValue returnLocation = returnKind == Kind.Void ? Value.ILLEGAL : new Variable(returnKind, currentGeneral++); + + AllocatableValue returnLocation; + if (returnKind == Kind.Void) { + returnLocation = Value.ILLEGAL; + } else { + returnLocation = new Variable(returnKind, currentGeneral++); + } + AllocatableValue[] locations = new AllocatableValue[parameterTypes.length]; for (int i = 0; i < parameterTypes.length; i++) { diff -r 61767ccd4600 -r 67a1e27a8dbb graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXMemOp.java --- a/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXMemOp.java Sun Oct 06 18:15:56 2013 -0400 +++ b/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXMemOp.java Sun Oct 06 22:07:23 2013 -0400 @@ -43,7 +43,8 @@ @Use({COMPOSITE}) protected PTXAddressValue address; @State protected LIRFrameState state; - public LoadOp(Kind kind, Variable result, PTXAddressValue address, LIRFrameState state) { + public LoadOp(Kind kind, Variable result, PTXAddressValue address, + LIRFrameState state) { this.kind = kind; this.result = result; this.address = address; @@ -62,7 +63,8 @@ case Float: case Double: case Object: - new Ld(Global, result, addr.getBase(), Constant.forLong(addr.getDisplacement())).emit(masm); + new Ld(Global, result, addr.getBase(), + Constant.forLong(addr.getDisplacement())).emit(masm); break; default: throw GraalInternalError.shouldNotReachHere(); @@ -97,7 +99,8 @@ case Float: case Double: case Object: - new St(Global, input, addr.getBase(), Constant.forLong(addr.getDisplacement())).emit(masm); + new St(Global, input, addr.getBase(), + Constant.forLong(addr.getDisplacement())).emit(masm); break; default: throw GraalInternalError.shouldNotReachHere("missing: " + address.getKind()); @@ -114,7 +117,8 @@ @Use({COMPOSITE}) protected PTXAddressValue address; @State protected LIRFrameState state; - public LoadParamOp(Kind kind, Variable result, PTXAddressValue address, LIRFrameState state) { + public LoadParamOp(Kind kind, Variable result, PTXAddressValue address, + LIRFrameState state) { this.kind = kind; this.result = result; this.address = address; @@ -133,7 +137,8 @@ case Float: case Double: case Object: - new Ld(Parameter, result, addr.getBase(), Constant.forLong(addr.getDisplacement())).emit(masm); + new Ld(Parameter, result, addr.getBase(), + Constant.forLong(addr.getDisplacement())).emit(masm); break; default: throw GraalInternalError.shouldNotReachHere(); @@ -151,7 +156,8 @@ @Use({COMPOSITE}) protected PTXAddressValue address; @State protected LIRFrameState state; - public LoadReturnAddrOp(Kind kind, Variable result, PTXAddressValue address, LIRFrameState state) { + public LoadReturnAddrOp(Kind kind, Variable result, + PTXAddressValue address, LIRFrameState state) { this.kind = kind; this.result = result; this.address = address; @@ -166,7 +172,8 @@ case Long: case Float: case Double: - new Ld(Parameter, result, addr.getBase(), Constant.forLong(addr.getDisplacement())).emit(masm); + new Ld(Parameter, result, addr.getBase(), + Constant.forLong(addr.getDisplacement())).emit(masm); break; default: throw GraalInternalError.shouldNotReachHere(); @@ -183,7 +190,8 @@ @Use({REG}) protected Variable input; @State protected LIRFrameState state; - public StoreReturnValOp(Kind kind, PTXAddressValue address, Variable input, LIRFrameState state) { + public StoreReturnValOp(Kind kind, PTXAddressValue address, + Variable input, LIRFrameState state) { this.kind = kind; this.address = address; this.input = input; @@ -202,7 +210,8 @@ case Float: case Double: case Object: - new St(Global, input, addr.getBase(), Constant.forLong(addr.getDisplacement())).emit(masm); + new St(Global, input, addr.getBase(), + Constant.forLong(addr.getDisplacement())).emit(masm); break; default: throw GraalInternalError.shouldNotReachHere("missing: " + address.getKind()); diff -r 61767ccd4600 -r 67a1e27a8dbb src/gpu/ptx/vm/gpu_ptx.cpp --- a/src/gpu/ptx/vm/gpu_ptx.cpp Sun Oct 06 18:15:56 2013 -0400 +++ b/src/gpu/ptx/vm/gpu_ptx.cpp Sun Oct 06 22:07:23 2013 -0400 @@ -415,6 +415,17 @@ ret.set_jfloat(return_val); } break; + case T_DOUBLE: + { + double return_val; + status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._return_value_ptr, T_DOUBLE_BYTE_SIZE); + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] *** Error (%d) Failed to copy value to device argument", status); + return false; + } + ret.set_jdouble(return_val); + } + break; case T_LONG: { long return_val; diff -r 61767ccd4600 -r 67a1e27a8dbb src/gpu/ptx/vm/ptxKernelArguments.cpp --- a/src/gpu/ptx/vm/ptxKernelArguments.cpp Sun Oct 06 18:15:56 2013 -0400 +++ b/src/gpu/ptx/vm/ptxKernelArguments.cpp Sun Oct 06 22:07:23 2013 -0400 @@ -104,6 +104,39 @@ return; } +void PTXKernelArguments::do_double() { + if (is_after_invocation()) { + return; + } + // If the parameter is a return value, + jvalue doubleval; + if (is_return_type()) { + // Allocate device memory for T_INT return value pointer on device. Size in bytes + int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_DOUBLE_BYTE_SIZE); + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); + _success = false; + return; + } + // Push _return_value_ptr to _kernelBuffer + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; + // _bufferOffset += sizeof(_return_value_ptr); + _bufferOffset += sizeof(doubleval.d); + } else { + // Get the next java argument and its value which should be a T_INT + oop arg = next_arg(T_FLOAT); + // Copy the java argument value to kernelArgBuffer + if (java_lang_boxing_object::get_value(arg, &doubleval) != T_DOUBLE) { + tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT"); + _success = false; + return; + } + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = doubleval.d; + _bufferOffset += sizeof(doubleval.d); + } + return; +} + void PTXKernelArguments::do_long() { if (is_after_invocation()) { return; diff -r 61767ccd4600 -r 67a1e27a8dbb src/gpu/ptx/vm/ptxKernelArguments.hpp --- a/src/gpu/ptx/vm/ptxKernelArguments.hpp Sun Oct 06 18:15:56 2013 -0400 +++ b/src/gpu/ptx/vm/ptxKernelArguments.hpp Sun Oct 06 22:07:23 2013 -0400 @@ -28,12 +28,13 @@ #include "runtime/gpu.hpp" #include "runtime/signature.hpp" -#define T_BYTE_SIZE 1 -#define T_BOOLEAN_SIZE 4 -#define T_INT_BYTE_SIZE 4 -#define T_FLOAT_BYTE_SIZE 4 -#define T_LONG_BYTE_SIZE 8 -#define T_ARRAY_BYTE_SIZE 8 +#define T_BYTE_SIZE 1 +#define T_BOOLEAN_SIZE 4 +#define T_INT_BYTE_SIZE 4 +#define T_FLOAT_BYTE_SIZE 4 +#define T_DOUBLE_BYTE_SIZE 8 +#define T_LONG_BYTE_SIZE 8 +#define T_ARRAY_BYTE_SIZE 8 class PTXKernelArguments : public SignatureIterator { public: @@ -103,6 +104,7 @@ void do_bool(); void do_int(); void do_float(); + void do_double(); void do_long(); void do_array(int begin, int end); void do_void(); @@ -115,11 +117,6 @@ /* TODO : To be implemented */ guarantee(false, "do_short:NYI"); } - inline void do_double() { - /* TODO : To be implemented */ - guarantee(false, "do_double:NYI"); - } - inline void do_object() { /* TODO : To be implemented */ guarantee(false, "do_object:NYI"); diff -r 61767ccd4600 -r 67a1e27a8dbb src/share/vm/graal/graalCompilerToGPU.cpp --- a/src/share/vm/graal/graalCompilerToGPU.cpp Sun Oct 06 18:15:56 2013 -0400 +++ b/src/share/vm/graal/graalCompilerToGPU.cpp Sun Oct 06 22:07:23 2013 -0400 @@ -96,7 +96,20 @@ } else { oop o = java_lang_boxing_object::create(ptxka.get_ret_type(), (jvalue *) result.get_value_addr(), CHECK_NULL); if (TraceGPUInteraction) { - tty->print_cr("GPU execution returned %d", result.get_jint()); + switch (ptxka.get_ret_type()) { + case T_INT: + tty->print_cr("GPU execution returned %d", result.get_jint()); + break; + case T_FLOAT: + tty->print_cr("GPU execution returned %f", result.get_jfloat()); + break; + case T_DOUBLE: + tty->print_cr("GPU execution returned %f", result.get_jdouble()); + break; + default: + tty->print_cr("GPU returned unhandled"); + break; + } } return JNIHandles::make_local(o); } @@ -135,7 +148,20 @@ } else { oop o = java_lang_boxing_object::create(ptxka.get_ret_type(), (jvalue *) result.get_value_addr(), CHECK_NULL); if (TraceGPUInteraction) { - tty->print_cr("GPU execution returned %d", result.get_jint()); + switch (ptxka.get_ret_type()) { + case T_INT: + tty->print_cr("GPU execution returned %d", result.get_jint()); + break; + case T_FLOAT: + tty->print_cr("GPU execution returned %f", result.get_jfloat()); + break; + case T_DOUBLE: + tty->print_cr("GPU execution returned %g", result.get_jdouble()); + break; + default: + tty->print_cr("GPU returned unhandled"); + break; + } } return JNIHandles::make_local(o); }