changeset 11908:cf4dd10ced32

Merge with 67a1e27a8dbb0945dc974b3ee3d8ac8af04743d3
author Michael Van De Vanter <michael.van.de.vanter@oracle.com>
date Sun, 06 Oct 2013 21:19:22 -0700
parents 873da100d113 (current diff) 67a1e27a8dbb (diff)
children 43bf803203c0 9fe53a7b42b8
files
diffstat 11 files changed, 199 insertions(+), 67 deletions(-) [+]
line wrap: on
line diff
--- a/graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java	Sun Oct 06 21:17:51 2013 -0700
+++ b/graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java	Sun Oct 06 21:19:22 2013 -0700
@@ -22,6 +22,7 @@
  */
 package com.oracle.graal.asm.ptx;
 
+import static com.oracle.graal.asm.ptx.PTXStateSpace.*;
 import static com.oracle.graal.api.code.ValueUtil.*;
 
 import com.oracle.graal.asm.Label;
@@ -536,7 +537,13 @@
         }
 
         public void emit(PTXAssembler asm) {
-            asm.emitString("cvt." + super.emit());
+            if (dest.getKind() == Kind.Float ||
+                dest.getKind() == Kind.Double) {
+                // round-to-zero - might not be right
+                asm.emitString("cvt.rz." + super.emit());
+            } else {
+                asm.emitString("cvt." + super.emit());
+            }
         }
     }
     
--- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/FloatPTXTest.java	Sun Oct 06 21:17:51 2013 -0700
+++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/FloatPTXTest.java	Sun Oct 06 21:19:22 2013 -0700
@@ -31,41 +31,44 @@
 /* PTX ISA 3.1 - 8.7.3 Floating-Point Instructions */
 public class FloatPTXTest extends PTXTestBase {
 
-    @Ignore
     @Test
     public void testAdd() {
-        CompilationResult r = compile("testAdd2F");
-        if (r.getTargetCode() == null) {
-            printReport("Compilation of testAdd2F FAILED");
+        Float ret = (Float) invoke(compile("testAdd2I"), 42, 43);
+        if (ret != null) {
+            printReport("testAdd2I: " + ret);
+        } else {
+            printReport("testAdd2I: no VALUE");
         }
 
-        /*
-        r = compile("testAdd2D");
-        if (r.getTargetCode() == null) {
-            printReport("Compilation of testAdd2D FAILED");
+        ret = (Float) invoke(compile("testAdd2F"), 42.1F, 43.5F);
+        if (ret != null) {
+            printReport("testAdd2F: " + ret);
+        } else {
+            printReport("testAdd2F: no VALUE");
         }
 
-        r = compile("testAddFConst");
-        if (r.getTargetCode() == null) {
-            printReport("Compilation of testAddFConst FAILED");
-        }
-        r = compile("testAddConstF");
-        if (r.getTargetCode() == null) {
-            printReport("Compilation of testConstF FAILED");
+        ret = (Float) invoke(compile("testAddFConst"), 42.1F);
+        if (ret != null) {
+            printReport("testAddFConst: " + ret);
+        } else {
+            printReport("testAddFConst: no VALUE");
         }
-        r = compile("testAddDConst");
-        if (r.getTargetCode() == null) {
-            printReport("Compilation of testAddDConst FAILED");
+
+        Double dret = (Double) invoke(compile("testAdd2D"), 42.1, 43.5);
+        if (dret != null) {
+            printReport("testAdd2D: " + dret);
+        } else {
+            printReport("testAdd2D: no VALUE");
         }
-        r = compile("testAddConstD");
-        if (r.getTargetCode() == null) {
-            printReport("Compilation of testConstD FAILED");
-        }
-        */
+
+    }
+
+    public static float testAdd2I(int a, int b) {
+        return (float) (a + b);
     }
 
     public static float testAdd2F(float a, float b) {
-        return a + b;
+        return (a + b);
     }
 
     public static double testAdd2D(double a, double b) {
--- a/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXBackend.java	Sun Oct 06 21:17:51 2013 -0700
+++ b/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXBackend.java	Sun Oct 06 21:19:22 2013 -0700
@@ -85,7 +85,8 @@
     }
 
     @Override
-    public TargetMethodAssembler newAssembler(LIRGenerator lirGen, CompilationResult compilationResult) {
+    public TargetMethodAssembler newAssembler(LIRGenerator lirGen,
+                                              CompilationResult compilationResult) {
         // Omit the frame of the method:
         // - has no spill slots or other slots allocated during register allocation
         // - has no callee-saved registers
@@ -99,7 +100,9 @@
         return tasm;
     }
 
-    private static void emitKernelEntry(TargetMethodAssembler tasm, LIRGenerator lirGen, ResolvedJavaMethod codeCacheOwner) {
+    private static void emitKernelEntry(TargetMethodAssembler tasm,
+                                        LIRGenerator lirGen,
+                                        ResolvedJavaMethod codeCacheOwner) {
         // Emit PTX kernel entry text based on PTXParameterOp
         // instructions in the start block. Remove the instructions
         // once kernel entry text and directives are emitted to
@@ -109,8 +112,8 @@
         Buffer codeBuffer = tasm.asm.codeBuffer;
 
         // Emit initial boiler-plate directives.
-        codeBuffer.emitString(".version 2.1");
-        codeBuffer.emitString(".target sm_20");
+        codeBuffer.emitString(".version 3.0");
+        codeBuffer.emitString(".target sm_30");
         codeBuffer.emitString0(".entry " + name + " (");
         codeBuffer.emitString("");
 
@@ -140,9 +143,13 @@
     }
 
     // Emit .reg space declarations
-    private static void emitRegisterDecl(TargetMethodAssembler tasm, LIRGenerator lirGen,
+    private static void emitRegisterDecl(TargetMethodAssembler tasm,
+                                         LIRGenerator lirGen,
                                          ResolvedJavaMethod codeCacheOwner) {
-        assert codeCacheOwner != null : lirGen.getGraph() + " is not associated with a method";
+
+        assert codeCacheOwner != null :
+               lirGen.getGraph() + " is not associated with a method";
+
         Buffer codeBuffer = tasm.asm.codeBuffer;
 
         final SortedSet<Integer> signed32 = new TreeSet<>();
--- a/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXLIRGenerator.java	Sun Oct 06 21:17:51 2013 -0700
+++ b/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXLIRGenerator.java	Sun Oct 06 21:19:22 2013 -0700
@@ -912,24 +912,46 @@
         throw GraalInternalError.unimplemented("PTXLIRGenerator.visitInfopointNode()");
     }
 
-    public Variable emitLoadParam(Kind kind, Value address, DeoptimizingNode deopting) {
+    public Variable emitLoadParam(Kind kind, Value address,
+                                  DeoptimizingNode deopting) {
+
         PTXAddressValue loadAddress = asAddress(address);
         Variable result = newVariable(kind);
-        append(new LoadParamOp(kind, result, loadAddress, deopting != null ? state(deopting) : null));
+        append(new LoadParamOp(kind, result, loadAddress,
+                               deopting != null ? state(deopting) : null));
+
         return result;
     }
 
-    public Variable emitLoadReturnAddress(Kind kind, Value address, DeoptimizingNode deopting) {
+    public Variable emitLoadReturnAddress(Kind kind, Value address,
+                                          DeoptimizingNode deopting) {
+
         PTXAddressValue loadAddress = asAddress(address);
-        Variable result = newVariable(kind);
-        append(new LoadReturnAddrOp(kind, result, loadAddress, deopting != null ? state(deopting) : null));
+        Variable result;
+        switch (kind) {
+            case Float:
+                result = newVariable(Kind.Int);
+                break;
+            case Double:
+                result = newVariable(Kind.Long);
+                break;
+            default:
+                result = newVariable(kind);
+
+        }
+        append(new LoadReturnAddrOp(kind, result, loadAddress,
+                                    deopting != null ? state(deopting) : null));
+
         return result;
     }
 
-    public void emitStoreReturnValue(Kind kind, Value address, Value inputVal, DeoptimizingNode deopting) {
+    public void emitStoreReturnValue(Kind kind, Value address, Value inputVal,
+                                     DeoptimizingNode deopting) {
+
         PTXAddressValue storeAddress = asAddress(address);
         Variable input = load(inputVal);
-        append(new StoreReturnValOp(kind, storeAddress, input, deopting != null ? state(deopting) : null));
+        append(new StoreReturnValOp(kind, storeAddress, input,
+                                    deopting != null ? state(deopting) : null));
     }
 
     @Override
--- a/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXTargetMethodAssembler.java	Sun Oct 06 21:17:51 2013 -0700
+++ b/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXTargetMethodAssembler.java	Sun Oct 06 21:19:22 2013 -0700
@@ -33,10 +33,13 @@
 
 public class PTXTargetMethodAssembler extends TargetMethodAssembler {
 
-    private static CompilerToGPU toGPU = HotSpotGraalRuntime.graalRuntime().getCompilerToGPU();
+    private static CompilerToGPU toGPU =
+                    HotSpotGraalRuntime.graalRuntime().getCompilerToGPU();
+
     private static boolean validDevice = toGPU.deviceInit();
 
-    private static final int totalProcessors = (validDevice ? toGPU.availableProcessors() : 0);
+    private static final int totalProcessors =
+                            (validDevice ? toGPU.availableProcessors() : 0);
 
     public static int getAvailableProcessors() {
         return totalProcessors;
@@ -44,8 +47,12 @@
 
     // detach ??
 
-    public PTXTargetMethodAssembler(TargetDescription target, CodeCacheProvider runtime, FrameMap frameMap,
-                                    AbstractAssembler asm, FrameContext frameContext, CompilationResult compilationResult) {
+    public PTXTargetMethodAssembler(TargetDescription target,
+                                    CodeCacheProvider runtime,
+                                    FrameMap frameMap,
+                                    AbstractAssembler asm,
+                                    FrameContext frameContext,
+                                    CompilationResult compilationResult) {
         super(target, runtime, frameMap, asm, frameContext, compilationResult);
     }
 
@@ -53,11 +60,14 @@
     public CompilationResult finishTargetMethod(StructuredGraph graph) {
         ResolvedJavaMethod method = graph.method();
         assert method != null : graph + " is not associated wth a method";
-        ExternalCompilationResult graalCompile = (ExternalCompilationResult) super.finishTargetMethod(graph);
+
+        ExternalCompilationResult graalCompile =
+            (ExternalCompilationResult) super.finishTargetMethod(graph);
 
         try {
             if ((validDevice) && (graalCompile.getTargetCode() != null)) {
-                long kernel = toGPU.generateKernel(graalCompile.getTargetCode(), method.getName());
+                long kernel = toGPU.generateKernel(graalCompile.getTargetCode(),
+                                                   method.getName());
                 graalCompile.setEntryPoint(kernel);
             }
         } catch (Throwable th) {
--- a/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotRegisterConfig.java	Sun Oct 06 21:17:51 2013 -0700
+++ b/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotRegisterConfig.java	Sun Oct 06 21:19:22 2013 -0700
@@ -104,7 +104,14 @@
         int currentStackOffset = 0;
 
         Kind returnKind = returnType == null ? Kind.Void : returnType.getKind();
-        AllocatableValue returnLocation = returnKind == Kind.Void ? Value.ILLEGAL : new Variable(returnKind, currentGeneral++);
+
+        AllocatableValue returnLocation;
+        if (returnKind == Kind.Void) {
+            returnLocation = Value.ILLEGAL;
+        } else {
+            returnLocation = new Variable(returnKind, currentGeneral++);
+        }
+
         AllocatableValue[] locations = new AllocatableValue[parameterTypes.length];
 
         for (int i = 0; i < parameterTypes.length; i++) {
--- a/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXMemOp.java	Sun Oct 06 21:17:51 2013 -0700
+++ b/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXMemOp.java	Sun Oct 06 21:19:22 2013 -0700
@@ -43,7 +43,8 @@
         @Use({COMPOSITE}) protected PTXAddressValue address;
         @State protected LIRFrameState state;
 
-        public LoadOp(Kind kind, Variable result, PTXAddressValue address, LIRFrameState state) {
+        public LoadOp(Kind kind, Variable result, PTXAddressValue address,
+                      LIRFrameState state) {
             this.kind = kind;
             this.result = result;
             this.address = address;
@@ -62,7 +63,8 @@
                 case Float:
                 case Double:
                 case Object:
-                    new Ld(Global, result, addr.getBase(), Constant.forLong(addr.getDisplacement())).emit(masm);
+                    new Ld(Global, result, addr.getBase(),
+                           Constant.forLong(addr.getDisplacement())).emit(masm);
                     break;
                 default:
                     throw GraalInternalError.shouldNotReachHere();
@@ -97,7 +99,8 @@
                 case Float:
                 case Double:
                 case Object:
-                    new St(Global, input, addr.getBase(), Constant.forLong(addr.getDisplacement())).emit(masm);
+                    new St(Global, input, addr.getBase(),
+                           Constant.forLong(addr.getDisplacement())).emit(masm);
                     break;
                 default:
                     throw GraalInternalError.shouldNotReachHere("missing: " + address.getKind());
@@ -114,7 +117,8 @@
         @Use({COMPOSITE}) protected PTXAddressValue address;
         @State protected LIRFrameState state;
 
-        public LoadParamOp(Kind kind, Variable result, PTXAddressValue address, LIRFrameState state) {
+        public LoadParamOp(Kind kind, Variable result, PTXAddressValue address,
+                           LIRFrameState state) {
             this.kind = kind;
             this.result = result;
             this.address = address;
@@ -133,7 +137,8 @@
                 case Float:
                 case Double:
                 case Object:
-                    new Ld(Parameter, result, addr.getBase(), Constant.forLong(addr.getDisplacement())).emit(masm);
+                    new Ld(Parameter, result, addr.getBase(),
+                           Constant.forLong(addr.getDisplacement())).emit(masm);
                     break;
                 default:
                     throw GraalInternalError.shouldNotReachHere();
@@ -151,7 +156,8 @@
         @Use({COMPOSITE}) protected PTXAddressValue address;
         @State protected LIRFrameState state;
 
-        public LoadReturnAddrOp(Kind kind, Variable result, PTXAddressValue address, LIRFrameState state) {
+        public LoadReturnAddrOp(Kind kind, Variable result,
+                                PTXAddressValue address, LIRFrameState state) {
             this.kind = kind;
             this.result = result;
             this.address = address;
@@ -166,7 +172,8 @@
                 case Long:
                 case Float:
                 case Double:
-                    new Ld(Parameter, result, addr.getBase(), Constant.forLong(addr.getDisplacement())).emit(masm);
+                    new Ld(Parameter, result, addr.getBase(),
+                           Constant.forLong(addr.getDisplacement())).emit(masm);
                     break;
                 default:
                     throw GraalInternalError.shouldNotReachHere();
@@ -183,7 +190,8 @@
         @Use({REG}) protected Variable input;
         @State protected LIRFrameState state;
 
-        public StoreReturnValOp(Kind kind, PTXAddressValue address, Variable input, LIRFrameState state) {
+        public StoreReturnValOp(Kind kind, PTXAddressValue address,
+                                Variable input, LIRFrameState state) {
             this.kind = kind;
             this.address = address;
             this.input = input;
@@ -202,7 +210,8 @@
                 case Float:
                 case Double:
                 case Object:
-                    new St(Global, input, addr.getBase(), Constant.forLong(addr.getDisplacement())).emit(masm);
+                    new St(Global, input, addr.getBase(),
+                           Constant.forLong(addr.getDisplacement())).emit(masm);
                     break;
                 default:
                     throw GraalInternalError.shouldNotReachHere("missing: " + address.getKind());
--- a/src/gpu/ptx/vm/gpu_ptx.cpp	Sun Oct 06 21:17:51 2013 -0700
+++ b/src/gpu/ptx/vm/gpu_ptx.cpp	Sun Oct 06 21:19:22 2013 -0700
@@ -415,6 +415,17 @@
          ret.set_jfloat(return_val);
        }
        break;
+     case T_DOUBLE:
+       {
+         double return_val;
+         status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._return_value_ptr, T_DOUBLE_BYTE_SIZE);
+         if (status != GRAAL_CUDA_SUCCESS) {
+           tty->print_cr("[CUDA] *** Error (%d) Failed to copy value to device argument", status);
+           return false;
+         }
+         ret.set_jdouble(return_val);
+       }
+       break;
      case T_LONG:
        {
          long return_val;
--- a/src/gpu/ptx/vm/ptxKernelArguments.cpp	Sun Oct 06 21:17:51 2013 -0700
+++ b/src/gpu/ptx/vm/ptxKernelArguments.cpp	Sun Oct 06 21:19:22 2013 -0700
@@ -104,6 +104,39 @@
     return;
 }
 
+void PTXKernelArguments::do_double() {
+    if (is_after_invocation()) {
+        return;
+    }
+    // If the parameter is a return value,
+    jvalue doubleval;
+    if (is_return_type()) {
+        // Allocate device memory for T_INT return value pointer on device. Size in bytes
+        int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_DOUBLE_BYTE_SIZE);
+        if (status != GRAAL_CUDA_SUCCESS) {
+            tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
+            _success = false;
+            return;
+        }
+        // Push _return_value_ptr to _kernelBuffer
+        *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
+        // _bufferOffset += sizeof(_return_value_ptr);
+        _bufferOffset += sizeof(doubleval.d);
+    } else {
+        // Get the next java argument and its value which should be a T_INT
+        oop arg = next_arg(T_FLOAT);
+        // Copy the java argument value to kernelArgBuffer
+        if (java_lang_boxing_object::get_value(arg, &doubleval) != T_DOUBLE) {
+            tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT");
+            _success = false;
+            return;
+        }
+        *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = doubleval.d;
+        _bufferOffset += sizeof(doubleval.d);
+    }
+    return;
+}
+
 void PTXKernelArguments::do_long() {
   if (is_after_invocation()) {
     return;
--- a/src/gpu/ptx/vm/ptxKernelArguments.hpp	Sun Oct 06 21:17:51 2013 -0700
+++ b/src/gpu/ptx/vm/ptxKernelArguments.hpp	Sun Oct 06 21:19:22 2013 -0700
@@ -28,12 +28,13 @@
 #include "runtime/gpu.hpp"
 #include "runtime/signature.hpp"
 
-#define T_BYTE_SIZE       1
-#define T_BOOLEAN_SIZE    4
-#define T_INT_BYTE_SIZE   4
-#define T_FLOAT_BYTE_SIZE 4
-#define T_LONG_BYTE_SIZE  8
-#define T_ARRAY_BYTE_SIZE 8
+#define T_BYTE_SIZE        1
+#define T_BOOLEAN_SIZE     4
+#define T_INT_BYTE_SIZE    4
+#define T_FLOAT_BYTE_SIZE  4
+#define T_DOUBLE_BYTE_SIZE 8
+#define T_LONG_BYTE_SIZE   8
+#define T_ARRAY_BYTE_SIZE  8
 
 class PTXKernelArguments : public SignatureIterator {
 public:
@@ -103,6 +104,7 @@
   void do_bool();
   void do_int();
   void do_float();
+  void do_double();
   void do_long();
   void do_array(int begin, int end);
   void do_void();
@@ -115,11 +117,6 @@
     /* TODO : To be implemented */
     guarantee(false, "do_short:NYI");
   }
-  inline void do_double() {
-    /* TODO : To be implemented */
-    guarantee(false, "do_double:NYI");
-  }
-
   inline void do_object() {
     /* TODO : To be implemented */
     guarantee(false, "do_object:NYI");
--- a/src/share/vm/graal/graalCompilerToGPU.cpp	Sun Oct 06 21:17:51 2013 -0700
+++ b/src/share/vm/graal/graalCompilerToGPU.cpp	Sun Oct 06 21:19:22 2013 -0700
@@ -96,7 +96,20 @@
   } else {
     oop o = java_lang_boxing_object::create(ptxka.get_ret_type(), (jvalue *) result.get_value_addr(), CHECK_NULL);
     if (TraceGPUInteraction) {
-      tty->print_cr("GPU execution returned %d", result.get_jint());
+      switch (ptxka.get_ret_type()) {
+        case T_INT:
+          tty->print_cr("GPU execution returned %d", result.get_jint());
+          break;
+        case T_FLOAT:
+          tty->print_cr("GPU execution returned %f", result.get_jfloat());
+          break;
+        case T_DOUBLE:
+          tty->print_cr("GPU execution returned %f", result.get_jdouble());
+          break;
+        default:
+          tty->print_cr("GPU returned unhandled");
+          break;
+        }
     }
     return JNIHandles::make_local(o);
   }
@@ -135,7 +148,20 @@
   } else {
     oop o = java_lang_boxing_object::create(ptxka.get_ret_type(), (jvalue *) result.get_value_addr(), CHECK_NULL);
     if (TraceGPUInteraction) {
-      tty->print_cr("GPU execution returned %d", result.get_jint());
+      switch (ptxka.get_ret_type()) {
+        case T_INT:
+          tty->print_cr("GPU execution returned %d", result.get_jint());
+          break;
+        case T_FLOAT:
+          tty->print_cr("GPU execution returned %f", result.get_jfloat());
+          break;
+        case T_DOUBLE:
+          tty->print_cr("GPU execution returned %g", result.get_jdouble());
+          break;
+        default:
+          tty->print_cr("GPU returned unhandled");
+          break;
+      }
     }
     return JNIHandles::make_local(o);
   }