changeset 13683:de839ec35cc7

schedule lambda method compilation and execution on GPU (PTX) when possible; fix a couple of bugs.
author S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
date Fri, 17 Jan 2014 16:03:13 -0500
parents c4ff08d2aa0d
children 72f85504e79e
files graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/BasicPTXTest.java graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXLIRGenerator.java graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotBackend.java graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotCodeCacheProvider.java graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotForeignCallsProvider.java graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/VMToCompilerImpl.java graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXMove.java src/gpu/ptx/vm/gpu_ptx.cpp src/share/vm/oops/method.cpp src/share/vm/oops/method.hpp src/share/vm/runtime/compilationPolicy.cpp
diffstat 12 files changed, 115 insertions(+), 11 deletions(-) [+]
line wrap: on
line diff
--- a/graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java	Fri Jan 17 17:26:47 2014 +0100
+++ b/graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java	Fri Jan 17 16:03:13 2014 -0500
@@ -700,4 +700,7 @@
         bra(str);
     }
 
+    public void nullCheck(Register r) {
+        // setp(....);
+    }
 }
--- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/BasicPTXTest.java	Fri Jan 17 17:26:47 2014 +0100
+++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/BasicPTXTest.java	Fri Jan 17 16:03:13 2014 -0500
@@ -43,15 +43,15 @@
         test("staticIntKernel", 'a', 42);
     }
 
+    public static int staticIntKernel(char p0, int p1) {
+        return p1 + p0;
+    }
+
     @Test
     public void testVirtualIntKernel() {
         test("virtualIntKernel", 'a', 42);
     }
 
-    public static int staticIntKernel(char p0, int p1) {
-        return p1 + p0;
-    }
-
     public int virtualIntKernel(char p0, int p1) {
         return p1 + p0;
     }
--- a/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXLIRGenerator.java	Fri Jan 17 17:26:47 2014 +0100
+++ b/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXLIRGenerator.java	Fri Jan 17 16:03:13 2014 -0500
@@ -827,7 +827,8 @@
 
     @Override
     public void emitNullCheck(ValueNode v, DeoptimizingNode deopting) {
-        throw GraalInternalError.unimplemented("PTXLIRGenerator.emitNullCheck()");
+        assert v.kind() == Kind.Object;
+        append(new PTXMove.NullCheckOp(load(operand(v)), state(deopting)));
     }
 
     @Override
--- a/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotBackend.java	Fri Jan 17 17:26:47 2014 +0100
+++ b/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotBackend.java	Fri Jan 17 16:03:13 2014 -0500
@@ -94,6 +94,8 @@
 
     public PTXHotSpotBackend(HotSpotGraalRuntime runtime, HotSpotProviders providers) {
         super(runtime, providers);
+        CompilerToGPU compilerToGPU = getRuntime().getCompilerToGPU();
+        deviceInitialized = OmitDeviceInit || compilerToGPU.deviceInit();
     }
 
     @Override
@@ -110,7 +112,6 @@
     public void completeInitialization() {
         HotSpotHostForeignCallsProvider hostForeignCalls = (HotSpotHostForeignCallsProvider) getRuntime().getHostProviders().getForeignCalls();
         CompilerToGPU compilerToGPU = getRuntime().getCompilerToGPU();
-        deviceInitialized = OmitDeviceInit || compilerToGPU.deviceInit();
         if (deviceInitialized) {
             long launchKernel = compilerToGPU.getLaunchKernelAddress();
             hostForeignCalls.registerForeignCall(LAUNCH_KERNEL, launchKernel, NativeCall, DESTROYS_REGISTERS, NOT_LEAF, NOT_REEXECUTABLE, ANY_LOCATION);
--- a/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotCodeCacheProvider.java	Fri Jan 17 17:26:47 2014 +0100
+++ b/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotCodeCacheProvider.java	Fri Jan 17 16:03:13 2014 -0500
@@ -25,12 +25,18 @@
 import com.oracle.graal.api.code.*;
 import com.oracle.graal.hotspot.*;
 import com.oracle.graal.hotspot.meta.*;
+import java.util.Arrays;
 
 public class PTXHotSpotCodeCacheProvider extends HotSpotCodeCacheProvider {
 
     public PTXHotSpotCodeCacheProvider(HotSpotGraalRuntime runtime, TargetDescription target) {
         super(runtime, target);
+    }
 
+    @Override
+    public String disassemble(CompilationResult compResult, InstalledCode installedCode) {
+        byte[] code = installedCode == null ? Arrays.copyOf(compResult.getTargetCode(), compResult.getTargetCodeSize()) : installedCode.getCode();
+        return new String(code);
     }
 
     @Override
--- a/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotForeignCallsProvider.java	Fri Jan 17 17:26:47 2014 +0100
+++ b/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotForeignCallsProvider.java	Fri Jan 17 16:03:13 2014 -0500
@@ -33,10 +33,12 @@
         throw GraalInternalError.unimplemented();
     }
 
+    @Override
     public LocationIdentity[] getKilledLocations(ForeignCallDescriptor descriptor) {
         throw GraalInternalError.unimplemented();
     }
 
+    @Override
     public boolean canDeoptimize(ForeignCallDescriptor descriptor) {
         throw GraalInternalError.unimplemented();
     }
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/VMToCompilerImpl.java	Fri Jan 17 17:26:47 2014 +0100
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/VMToCompilerImpl.java	Fri Jan 17 16:03:13 2014 -0500
@@ -556,8 +556,9 @@
                 int id = allocateCompileTaskId(method, entryBCI);
                 HotSpotBackend backend = runtime.getHostBackend();
                 CompilationTask task = new CompilationTask(backend, method, entryBCI, id);
+                boolean isLambdaMethod = (method.getName().contains("lambda$main$") & method.isSynthetic());
 
-                if (blocking) {
+                if (blocking || isLambdaMethod) {
                     task.runCompilation(true);
                 } else {
                     try {
--- a/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXMove.java	Fri Jan 17 17:26:47 2014 +0100
+++ b/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXMove.java	Fri Jan 17 16:03:13 2014 -0500
@@ -32,7 +32,7 @@
 import com.oracle.graal.asm.ptx.*;
 import com.oracle.graal.graph.*;
 import com.oracle.graal.lir.*;
-import com.oracle.graal.lir.StandardOp.MoveOp;
+import com.oracle.graal.lir.StandardOp.*;
 import com.oracle.graal.lir.asm.*;
 
 public class PTXMove {
@@ -239,4 +239,29 @@
     protected static void compareAndSwap(CompilationResultBuilder crb, PTXAssembler masm, AllocatableValue result, PTXAddressValue address, AllocatableValue cmpValue, AllocatableValue newValue) {
         throw new InternalError("NYI");
     }
+
+    public static class NullCheckOp extends PTXLIRInstruction implements NullCheck {
+
+        @Use({REG}) protected AllocatableValue input;
+        @State protected LIRFrameState state;
+
+        public NullCheckOp(Variable input, LIRFrameState state) {
+            this.input = input;
+            this.state = state;
+        }
+
+        @Override
+        public void emitCode(CompilationResultBuilder crb, PTXMacroAssembler masm) {
+            crb.recordImplicitException(masm.codeBuffer.position(), state);
+            masm.nullCheck(asRegister(input));
+        }
+
+        public Value getCheckedValue() {
+            return input;
+        }
+
+        public LIRFrameState getState() {
+            return state;
+        }
+    }
 }
--- a/src/gpu/ptx/vm/gpu_ptx.cpp	Fri Jan 17 17:26:47 2014 +0100
+++ b/src/gpu/ptx/vm/gpu_ptx.cpp	Fri Jan 17 16:03:13 2014 -0500
@@ -331,7 +331,7 @@
   }
 
   if (TraceGPUInteraction) {
-    tty->print_cr("[CUDA] Got function handle for %s", name);
+    tty->print_cr("[CUDA] Got function handle for %s kernel address %p", name, cu_function);
   }
 
   return cu_function;
@@ -375,7 +375,9 @@
       return 0L;
     }
     // Push device_return_value to kernelParams
-    gpu::Ptx::CUdeviceptr* returnValuePtr = (gpu::Ptx::CUdeviceptr*) (address) parametersAndReturnValueBuffer + parametersAndReturnValueBufferSize - sizeof(device_return_value);
+    gpu::Ptx::CUdeviceptr* returnValuePtr = (gpu::Ptx::CUdeviceptr*)
+                                               ((address) parametersAndReturnValueBuffer +
+                                                parametersAndReturnValueBufferSize - sizeof(device_return_value));
     *returnValuePtr = device_return_value;
   }
 
@@ -417,7 +419,7 @@
     }
     thread->set_vm_result(return_val);
   } else if (returnTypeSize > 0) {
-    status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&primitiveReturnValue, device_return_value, T_LONG_BYTE_SIZE);
+    status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&primitiveReturnValue, device_return_value, returnTypeSize);
     if (status != GRAAL_CUDA_SUCCESS) {
       tty->print_cr("[CUDA] *** Error (%d) Failed to copy value from device argument", status);
       SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_Exception(), "[CUDA] Failed to copy value from device argument");
--- a/src/share/vm/oops/method.cpp	Fri Jan 17 17:26:47 2014 +0100
+++ b/src/share/vm/oops/method.cpp	Fri Jan 17 16:03:13 2014 -0500
@@ -2025,3 +2025,32 @@
   guarantee(md == NULL ||
       md->is_methodData(), "should be method data");
 }
+
+#ifdef GRAAL
+
+// Return true if the name of the method indicates that this is a
+// lambda method other than <init>. Lambda method is one with a name 
+// that starts with lambda$ and is synthetic.
+
+bool Method::is_lambda() const {
+  Symbol * klass_name = method_holder()->name();
+  Symbol * method_name = name();
+  ResourceMark rm;
+  if (klass_name != NULL) {
+    if (klass_name != NULL && method_name != NULL) {
+      const char* lambdaPrefix = "lambda$main$";
+      char* methodPrefix = strstr(method_name->as_C_string(), lambdaPrefix);
+      if (methodPrefix != 0) {
+        if ((strncmp(lambdaPrefix, methodPrefix, strlen(lambdaPrefix)) == 0) && 
+            is_synthetic()) {
+          //tty->print_cr("[Check] %s::%s", klass_name->as_C_string(), method_name->as_C_string());
+          return true;
+        } else {
+          return false;
+        }
+      }
+    }
+    //}
+  }
+}
+#endif
--- a/src/share/vm/oops/method.hpp	Fri Jan 17 17:26:47 2014 +0100
+++ b/src/share/vm/oops/method.hpp	Fri Jan 17 16:03:13 2014 -0500
@@ -617,6 +617,11 @@
   // simultaneously. Use with caution.
   bool has_compiled_code() const                 { return code() != NULL; }
 
+#ifdef GRAAL
+  // Return true if the name of the method indicates that this is a
+  // lambda method other than <init>.
+  bool is_lambda() const;
+#endif
   // sizing
   static int header_size()                       { return sizeof(Method)/HeapWordSize; }
   static int size(bool is_native);
--- a/src/share/vm/runtime/compilationPolicy.cpp	Fri Jan 17 17:26:47 2014 +0100
+++ b/src/share/vm/runtime/compilationPolicy.cpp	Fri Jan 17 16:03:13 2014 -0500
@@ -45,6 +45,9 @@
 #include "runtime/vm_operations.hpp"
 #include "utilities/events.hpp"
 #include "utilities/globalDefinitions.hpp"
+#ifdef GRAAL
+#include "runtime/gpu.hpp"
+#endif
 
 CompilationPolicy* CompilationPolicy::_policy;
 elapsedTimer       CompilationPolicy::_accumulated_time;
@@ -101,6 +104,32 @@
   if (ReplayCompiles) return false;
 
   if (m->has_compiled_code()) return false;       // already compiled
+
+#ifdef GRAAL
+  // Check if this is a Lambda method that can be compiled to a GPU.
+  if (m->is_lambda()) {
+    // If GPU is available and the necessary linkage is available
+    // rerurn true indicatin that this method must be compiled.
+    if (gpu::is_available() && gpu::has_gpu_linkage()) {
+      if (TraceGPUInteraction) {
+        tty->print("Compiling Lambda method");
+        m->print_short_name();
+        switch (gpu::get_target_il_type()) {
+        case gpu::PTX :
+          tty->print_cr(" to PTX");
+          break;
+        case gpu::HSAIL :
+          tty->print_cr(" to HSAIL");
+          break;
+        default :
+          tty->print_cr(" to Unknown GPU!!!");
+        }
+      }
+      return true;
+    }
+  }
+#endif
+
   if (!can_be_compiled(m, comp_level)) return false;
 
   return !UseInterpreter ||                                              // must compile all methods