changeset 10879:d55f24eac4b1

PTX support for Linux
author Morris Meyer <morris.meyer@oracle.com>
date Thu, 25 Jul 2013 22:15:30 -0400
parents d9fcc82766da
children 7bd19a37f764
files graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/BasicPTXTest.java graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXBackend.java make/bsd/makefiles/buildtree.make make/bsd/makefiles/vm.make make/linux/makefiles/buildtree.make make/linux/makefiles/vm.make src/gpu/ptx/gpu_ptx.cpp src/gpu/ptx/gpu_ptx.hpp src/gpu/ptx/vm/gpu_ptx.cpp src/gpu/ptx/vm/gpu_ptx.hpp src/os/bsd/vm/gpu_bsd.cpp src/os/bsd/vm/gpu_bsd.hpp src/os_gpu/bsd_ptx/vm/gpu_bsd.cpp src/os_gpu/bsd_ptx/vm/gpu_bsd.hpp src/os_gpu/linux_ptx/vm/gpu_linux.cpp src/os_gpu/linux_ptx/vm/gpu_linux.hpp src/share/vm/code/nmethod.cpp src/share/vm/graal/graalCompilerToGPU.cpp src/share/vm/graal/graalCompilerToVM.cpp src/share/vm/runtime/globals.hpp src/share/vm/runtime/gpu.cpp src/share/vm/runtime/gpu.hpp src/share/vm/runtime/thread.cpp
diffstat 23 files changed, 727 insertions(+), 440 deletions(-) [+]
line wrap: on
line diff
--- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/BasicPTXTest.java	Thu Jul 25 18:02:29 2013 -0700
+++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/BasicPTXTest.java	Thu Jul 25 22:15:30 2013 -0400
@@ -24,7 +24,6 @@
 
 import java.lang.reflect.Method;
 
-import org.junit.Ignore;
 import org.junit.Test;
 
 /**
@@ -34,10 +33,10 @@
 
     @Test
     public void testAdd() {
-        compile("testConstI");
+        invoke(compile("testConstI"));
     }
 
-    @Ignore
+    @Test
     public void testInvoke() {
         invoke(compile("testConstI"));
     }
--- a/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXBackend.java	Thu Jul 25 18:02:29 2013 -0700
+++ b/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXBackend.java	Thu Jul 25 22:15:30 2013 -0400
@@ -90,8 +90,43 @@
         codeBuffer.emitString("");
 
         Signature signature = codeCacheOwner.getSignature();
-        for (int i = 0; i < signature.getParameterCount(false); i++) {
-            String param = ".param .u32 param" + i;
+        int paramCount = signature.getParameterCount(false);
+        // TODO - Revisit this.
+        // Bit-size of registers to be declared and used by the kernel.
+        int regSize = 32;
+        for (int i = 0; i < paramCount; i++) {
+            String param;
+            // No unsigned types in Java. So using .s specifier
+            switch (signature.getParameterKind(i)) {
+                case Boolean:
+                case Byte:
+                    param = ".param .s8 param" + i;
+                    regSize = 8;
+                    break;
+                case Char:
+                case Short:
+                    param = ".param .s16 param" + i;
+                    regSize = 16;
+                    break;
+                case Int:
+                    param = ".param .s32 param" + i;
+                    regSize = 32;
+                    break;
+                case Long:
+                case Float:
+                case Double:
+                case Void:
+                    param = ".param .s64 param" + i;
+                    regSize = 32;
+                    break;
+                default:
+                    // Not sure but specify 64-bit specifier??
+                    param = ".param .s64 param" + i;
+                    break;
+            }    
+            if (i != (paramCount -1)) {
+                param += ",";
+            }
             codeBuffer.emitString(param);
         }
 
@@ -100,7 +135,7 @@
 
         // XXX For now declare one predicate and all registers
         codeBuffer.emitString("  .reg .pred %p,%q;");
-        codeBuffer.emitString("  .reg .u32 %r<16>;");
+        codeBuffer.emitString("  .reg .s" + regSize +" %r<16>;");
 
         // Emit code for the LIR
         lirGen.lir.emitCode(tasm);
--- a/make/bsd/makefiles/buildtree.make	Thu Jul 25 18:02:29 2013 -0700
+++ b/make/bsd/makefiles/buildtree.make	Thu Jul 25 22:15:30 2013 -0400
@@ -239,12 +239,14 @@
 	echo "$(call gamma-path,commonsrc,cpu/$(SRCARCH)/vm) \\"; \
 	echo "$(call gamma-path,altsrc,os_cpu/$(OS_FAMILY)_$(SRCARCH)/vm) \\"; \
 	echo "$(call gamma-path,commonsrc,os_cpu/$(OS_FAMILY)_$(SRCARCH)/vm) \\"; \
+	echo "$(call gamma-path,altsrc,os_gpu/$(OS_FAMILY)_ptx/vm) \\"; \
+	echo "$(call gamma-path,commonsrc,os_gpu/$(OS_FAMILY)_ptx/vm) \\"; \
 	echo "$(call gamma-path,altsrc,os/$(OS_FAMILY)/vm) \\"; \
 	echo "$(call gamma-path,commonsrc,os/$(OS_FAMILY)/vm) \\"; \
 	echo "$(call gamma-path,altsrc,os/posix/vm) \\"; \
 	echo "$(call gamma-path,commonsrc,os/posix/vm) \\"; \
-	echo "$(call gamma-path,altsrc,gpu/ptx) \\"; \
-	echo "$(call gamma-path,commonsrc,gpu/ptx)"; \
+	echo "$(call gamma-path,altsrc,gpu/ptx/vm) \\"; \
+	echo "$(call gamma-path,commonsrc,gpu/ptx/vm)"; \
 	echo; \
 	echo "Src_Dirs_I = \\"; \
 	echo "$(call gamma-path,altsrc,share/vm/prims) \\"; \
@@ -257,6 +259,8 @@
 	echo "$(call gamma-path,commonsrc,cpu/$(SRCARCH)/vm) \\"; \
 	echo "$(call gamma-path,altsrc,os_cpu/$(OS_FAMILY)_$(SRCARCH)/vm) \\"; \
 	echo "$(call gamma-path,commonsrc,os_cpu/$(OS_FAMILY)_$(SRCARCH)/vm) \\"; \
+	echo "$(call gamma-path,altsrc,os_gpu/$(OS_FAMILY)_ptx/vm) \\"; \
+	echo "$(call gamma-path,commonsrc,os_gpu/$(OS_FAMILY)_ptx/vm) \\"; \
 	echo "$(call gamma-path,altsrc,os/$(OS_FAMILY)/vm) \\"; \
 	echo "$(call gamma-path,commonsrc,os/$(OS_FAMILY)/vm) \\"; \
 	echo "$(call gamma-path,altsrc,os/posix/vm) \\"; \
--- a/make/bsd/makefiles/vm.make	Thu Jul 25 18:02:29 2013 -0700
+++ b/make/bsd/makefiles/vm.make	Thu Jul 25 22:15:30 2013 -0400
@@ -164,7 +164,8 @@
 SOURCE_PATHS+=$(HS_COMMON_SRC)/os/posix/vm
 SOURCE_PATHS+=$(HS_COMMON_SRC)/cpu/$(Platform_arch)/vm
 SOURCE_PATHS+=$(HS_COMMON_SRC)/os_cpu/$(Platform_os_arch)/vm
-SOURCE_PATHS+=$(HS_COMMON_SRC)/gpu/ptx
+SOURCE_PATHS+=$(HS_COMMON_SRC)/gpu/ptx/vm
+SOURCE_PATHS+=$(HS_COMMON_SRC)/os_gpu/bsd_ptx/vm
 
 CORE_PATHS=$(foreach path,$(SOURCE_PATHS),$(call altsrc,$(path)) $(path))
 CORE_PATHS+=$(GENERATED)/jvmtifiles $(GENERATED)/tracefiles
@@ -187,9 +188,9 @@
 SHARK_PATHS := $(GAMMADIR)/src/share/vm/shark
 
 GRAAL_PATHS += $(call altsrc,$(HS_COMMON_SRC)/share/vm/graal)
-GRAAL_PATHS += $(call altsrc,$(HS_COMMON_SRC)/gpu/ptx)
+GRAAL_PATHS += $(call altsrc,$(HS_COMMON_SRC)/gpu/ptx/vm)
 GRAAL_PATHS += $(HS_COMMON_SRC)/share/vm/graal
-GRAAL_PATHS += $(HS_COMMON_SRC)/gpu/ptx
+GRAAL_PATHS += $(HS_COMMON_SRC)/gpu/ptx/vm
 
 # Include dirs per type.
 Src_Dirs/CORE      := $(CORE_PATHS)
--- a/make/linux/makefiles/buildtree.make	Thu Jul 25 18:02:29 2013 -0700
+++ b/make/linux/makefiles/buildtree.make	Thu Jul 25 22:15:30 2013 -0400
@@ -235,12 +235,14 @@
 	echo "$(call gamma-path,commonsrc,cpu/$(SRCARCH)/vm) \\"; \
 	echo "$(call gamma-path,altsrc,os_cpu/$(OS_FAMILY)_$(SRCARCH)/vm) \\"; \
 	echo "$(call gamma-path,commonsrc,os_cpu/$(OS_FAMILY)_$(SRCARCH)/vm) \\"; \
+	echo "$(call gamma-path,altsrc,os_gpu/$(OS_FAMILY)_ptx/vm) \\"; \
+	echo "$(call gamma-path,commonsrc,os_gpu/$(OS_FAMILY)_ptx/vm) \\"; \
 	echo "$(call gamma-path,altsrc,os/$(OS_FAMILY)/vm) \\"; \
 	echo "$(call gamma-path,commonsrc,os/$(OS_FAMILY)/vm) \\"; \
 	echo "$(call gamma-path,altsrc,os/posix/vm) \\"; \
 	echo "$(call gamma-path,commonsrc,os/posix/vm) \\"; \
-	echo "$(call gamma-path,altsrc,gpu/ptx) \\"; \
-	echo "$(call gamma-path,commonsrc,gpu/ptx)"; \
+	echo "$(call gamma-path,altsrc,gpu/ptx/vm) \\"; \
+	echo "$(call gamma-path,commonsrc,gpu/ptx/vm)"; \
 	echo; \
 	echo "Src_Dirs_I = \\"; \
 	echo "$(call gamma-path,altsrc,share/vm/prims) \\"; \
@@ -253,6 +255,8 @@
 	echo "$(call gamma-path,commonsrc,cpu/$(SRCARCH)/vm) \\"; \
 	echo "$(call gamma-path,altsrc,os_cpu/$(OS_FAMILY)_$(SRCARCH)/vm) \\"; \
 	echo "$(call gamma-path,commonsrc,os_cpu/$(OS_FAMILY)_$(SRCARCH)/vm) \\"; \
+	echo "$(call gamma-path,altsrc,os_gpu/$(OS_FAMILY)_ptx/vm) \\"; \
+	echo "$(call gamma-path,commonsrc,os_gpu/$(OS_FAMILY)_ptx/vm) \\"; \
 	echo "$(call gamma-path,altsrc,os/$(OS_FAMILY)/vm) \\"; \
 	echo "$(call gamma-path,commonsrc,os/$(OS_FAMILY)/vm) \\"; \
 	echo "$(call gamma-path,commonsrc,os/posix/vm) \\"; \
--- a/make/linux/makefiles/vm.make	Thu Jul 25 18:02:29 2013 -0700
+++ b/make/linux/makefiles/vm.make	Thu Jul 25 22:15:30 2013 -0400
@@ -151,7 +151,8 @@
 SOURCE_PATHS+=$(HS_COMMON_SRC)/os/posix/vm
 SOURCE_PATHS+=$(HS_COMMON_SRC)/cpu/$(Platform_arch)/vm
 SOURCE_PATHS+=$(HS_COMMON_SRC)/os_cpu/$(Platform_os_arch)/vm
-SOURCE_PATHS+=$(HS_COMMON_SRC)/gpu/ptx
+SOURCE_PATHS+=$(HS_COMMON_SRC)/gpu/ptx/vm
+SOURCE_PATHS+=$(HS_COMMON_SRC)/os_gpu/linux_ptx/vm
 
 CORE_PATHS=$(foreach path,$(SOURCE_PATHS),$(call altsrc,$(path)) $(path))
 CORE_PATHS+=$(GENERATED)/jvmtifiles $(GENERATED)/tracefiles
@@ -174,9 +175,9 @@
 SHARK_PATHS := $(GAMMADIR)/src/share/vm/shark
 
 GRAAL_PATHS += $(call altsrc,$(HS_COMMON_SRC)/share/vm/graal)
-GRAAL_PATHS += $(call altsrc,$(HS_COMMON_SRC)/gpu/ptx)
+GRAAL_PATHS += $(call altsrc,$(HS_COMMON_SRC)/gpu/ptx/vm)
 GRAAL_PATHS += $(HS_COMMON_SRC)/share/vm/graal
-GRAAL_PATHS += $(HS_COMMON_SRC)/gpu/ptx
+GRAAL_PATHS += $(HS_COMMON_SRC)/gpu/ptx/vm
 
 # Include dirs per type.
 Src_Dirs/CORE      := $(CORE_PATHS)
--- a/src/gpu/ptx/gpu_ptx.cpp	Thu Jul 25 18:02:29 2013 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,203 +0,0 @@
-/*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "precompiled.hpp"
-#include "runtime/gpu.hpp"
-#include "utilities/globalDefinitions.hpp"
-#include "utilities/ostream.hpp"
-
-void * gpu::Ptx::_device_context;
-
-gpu::Ptx::cuda_cu_init_func_t gpu::Ptx::_cuda_cu_init;
-gpu::Ptx::cuda_cu_ctx_create_func_t gpu::Ptx::_cuda_cu_ctx_create;
-gpu::Ptx::cuda_cu_ctx_detach_func_t gpu::Ptx::_cuda_cu_ctx_detach;
-gpu::Ptx::cuda_cu_ctx_synchronize_func_t gpu::Ptx::_cuda_cu_ctx_synchronize;
-gpu::Ptx::cuda_cu_device_get_count_func_t gpu::Ptx::_cuda_cu_device_get_count;
-gpu::Ptx::cuda_cu_device_get_name_func_t gpu::Ptx::_cuda_cu_device_get_name;
-gpu::Ptx::cuda_cu_device_get_func_t gpu::Ptx::_cuda_cu_device_get;
-gpu::Ptx::cuda_cu_device_compute_capability_func_t gpu::Ptx::_cuda_cu_device_compute_capability;
-gpu::Ptx::cuda_cu_launch_kernel_func_t gpu::Ptx::_cuda_cu_launch_kernel;
-gpu::Ptx::cuda_cu_module_get_function_func_t gpu::Ptx::_cuda_cu_module_get_function;
-gpu::Ptx::cuda_cu_module_load_data_ex_func_t gpu::Ptx::_cuda_cu_module_load_data_ex;
-
-void gpu::probe_linkage() {
-#ifdef __APPLE__
-  set_gpu_linkage(gpu::Ptx::probe_linkage_apple());
-#else
-  set_gpu_linkage(false);
-#endif
-}
-
-void gpu::initialize_gpu() {
-  if (gpu::has_gpu_linkage()) {
-    set_initialized(gpu::Ptx::initialize_gpu());
-  }
-}
-
-void * gpu::generate_kernel(unsigned char *code, int code_len, const char *name) {
-  if (gpu::has_gpu_linkage()) {
-    return (gpu::Ptx::generate_kernel(code, code_len, name));
-  } else {
-    return NULL;
-  }
-}
-
-bool gpu::execute_kernel(address kernel) {
-  if (gpu::has_gpu_linkage()) {
-    return (gpu::Ptx::execute_kernel(kernel));
-  } else {
-    return false;
-  }
-}
-
-#define __CUDA_API_VERSION 5000
-
-bool gpu::Ptx::initialize_gpu() {
-  int status = _cuda_cu_init(0, __CUDA_API_VERSION);
-  if (TraceWarpLoading) {
-    tty->print_cr("gpu_ptx::_cuda_cu_init: %d", status);
-  }
-
-  int device_count = 0;
-  status = _cuda_cu_device_get_count(&device_count);
-  if (TraceWarpLoading) {
-    tty->print_cr("gpu_ptx::_cuda_cu_device_get_count(%d): %d", device_count, status);
-  }
-
-  int device_id = 0, cu_device = 0;
-  status = _cuda_cu_device_get(&cu_device, device_id);
-  if (TraceWarpLoading) {
-    tty->print_cr("gpu_ptx::_cuda_cu_device_get(%d): %d", cu_device, status);
-  }
-
-  int major, minor;
-  status = _cuda_cu_device_compute_capability(&major, &minor, cu_device);
-  if (TraceWarpLoading) {
-    tty->print_cr("gpu_ptx::_cuda_cu_device_compute_capability(major %d, minor %d): %d",
-                  major, minor, status);
-  }
-
-  char device_name[256];
-  status = _cuda_cu_device_get_name(device_name, 256, cu_device);
-  if (TraceWarpLoading) {
-    tty->print_cr("gpu_ptx::_cuda_cu_device_get_name(%s): %d", device_name, status);
-  }
-
-  status = _cuda_cu_ctx_create(&_device_context, 0, cu_device);
-  if (TraceWarpLoading) {
-    tty->print_cr("gpu_ptx::_cuda_cu_ctx_create(%x): %d", _device_context, status);
-  }
-
-  return status == 0;  // CUDA_SUCCESS
-}
-
-void *gpu::Ptx::generate_kernel(unsigned char *code, int code_len, const char *name) {
-
-  void *cu_module;
-  const unsigned int jit_num_options = 3;
-  int *jit_options = new int[jit_num_options];
-  void **jit_option_values = new void *[jit_num_options];
-
-  jit_options[0] = 4; // CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES
-  int jit_log_buffer_size = 1024;
-  jit_option_values[0] = (void *)(size_t)jit_log_buffer_size;
-
-  jit_options[1] = 3; // CU_JIT_INFO_LOG_BUFFER
-  char *jit_log_buffer = new char[jit_log_buffer_size];
-  jit_option_values[1] = jit_log_buffer;
-
-  jit_options[2] = 0; // CU_JIT_MAX_REGISTERS
-  int jit_register_count = 32;
-  jit_option_values[2] = (void *)(size_t)jit_register_count;
-  
-  int status = _cuda_cu_module_load_data_ex(&cu_module, code,
-                                            jit_num_options, jit_options, (void **)jit_option_values);
-  if (TraceWarpLoading) {
-    tty->print_cr("gpu_ptx::_cuda_cu_module_load_data_ex(%x): %d", cu_module, status);
-    tty->print_cr("gpu_ptx::jit_log_buffer\n%s", jit_log_buffer);
-  }
-
-  void *cu_function;
-
-  status = _cuda_cu_module_get_function(&cu_function, cu_module, name);
-  if (TraceWarpLoading) {
-    tty->print_cr("gpu_ptx::_cuda_cu_module_get_function(%s):%x %d", name, cu_function, status);
-  }
-  return cu_function;
-}
-
-bool gpu::Ptx::execute_kernel(address kernel) {
-  // grid dimensionality
-  unsigned int gridX = 1;
-  unsigned int gridY = 1;
-  unsigned int gridZ = 1;
-
-  // thread dimensionality
-  unsigned int blockX = 1;
-  unsigned int blockY = 1;
-  unsigned int blockZ = 1;
-  
-  int *cu_function = (int *)kernel;
-
-  int status = _cuda_cu_launch_kernel(cu_function,
-                                      gridX, gridY, gridZ,
-                                      blockX, blockY, blockZ,
-                                      0, NULL, NULL, NULL);
-  if (TraceWarpLoading) {
-    tty->print_cr("gpu_ptx::_cuda_cu_launch_kernel(%x): %d", kernel, status);
-  }
-  return status == 0;  // CUDA_SUCCESS
-}
-
-#ifdef __APPLE__
-bool gpu::Ptx::probe_linkage_apple() {
-  void *handle = dlopen("/usr/local/cuda/lib/libcuda.dylib", RTLD_LAZY);
-  if (handle != NULL) {
-    _cuda_cu_init =
-        CAST_TO_FN_PTR(cuda_cu_init_func_t, dlsym(handle, "cuInit"));
-    _cuda_cu_ctx_create =
-        CAST_TO_FN_PTR(cuda_cu_ctx_create_func_t, dlsym(handle, "cuCtxCreate"));
-    _cuda_cu_ctx_detach =
-        CAST_TO_FN_PTR(cuda_cu_ctx_detach_func_t, dlsym(handle, "cuCtxDetach"));
-    _cuda_cu_ctx_synchronize =
-        CAST_TO_FN_PTR(cuda_cu_ctx_synchronize_func_t, dlsym(handle, "cuCtxSynchronize"));
-    _cuda_cu_device_get_count =
-        CAST_TO_FN_PTR(cuda_cu_device_get_count_func_t, dlsym(handle, "cuDeviceGetCount"));
-    _cuda_cu_device_get_name =
-        CAST_TO_FN_PTR(cuda_cu_device_get_name_func_t, dlsym(handle, "cuDeviceGetName"));
-    _cuda_cu_device_get =
-        CAST_TO_FN_PTR(cuda_cu_device_get_func_t, dlsym(handle, "cuDeviceGet"));
-    _cuda_cu_device_compute_capability =
-        CAST_TO_FN_PTR(cuda_cu_device_compute_capability_func_t, dlsym(handle, "cuDeviceComputeCapability"));
-    _cuda_cu_module_get_function =
-        CAST_TO_FN_PTR(cuda_cu_module_get_function_func_t, dlsym(handle, "cuModuleGetFunction"));
-    _cuda_cu_module_load_data_ex =
-        CAST_TO_FN_PTR(cuda_cu_module_load_data_ex_func_t, dlsym(handle, "cuModuleLoadDataEx"));
-    _cuda_cu_launch_kernel =
-        CAST_TO_FN_PTR(cuda_cu_launch_kernel_func_t, dlsym(handle, "cuLaunchKernel"));
-    return true;
-  }
-  return false;
-}
-#endif
\ No newline at end of file
--- a/src/gpu/ptx/gpu_ptx.hpp	Thu Jul 25 18:02:29 2013 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef GPU_PTX_HPP
-#define GPU_PTX_HPP
-
-class Ptx {
-  friend class gpu;
-
- protected:
-  static void probe_linkage();
-#ifdef __APPLE__
-  static bool probe_linkage_apple();
-#endif
-  static bool initialize_gpu();
-  static void * generate_kernel(unsigned char *code, int code_len, const char *name);
-  static bool execute_kernel(address kernel);
-  
-private:
-  typedef int (*cuda_cu_init_func_t)(unsigned int, int);
-  typedef int (*cuda_cu_ctx_create_func_t)(void *, int, int);
-  typedef int (*cuda_cu_ctx_detach_func_t)(int *);
-  typedef int (*cuda_cu_ctx_synchronize_func_t)(int *);
-  typedef int (*cuda_cu_device_get_count_func_t)(int *);
-  typedef int (*cuda_cu_device_get_name_func_t)(char *, int, int);
-  typedef int (*cuda_cu_device_get_func_t)(int *, int);
-  typedef int (*cuda_cu_device_compute_capability_func_t)(int *, int *, int);
-  typedef int (*cuda_cu_launch_kernel_func_t)(void *,
-                                              unsigned int, unsigned int, unsigned int,
-                                              unsigned int, unsigned int, unsigned int,
-                                              unsigned int, void *, void **, void **);
-  typedef int (*cuda_cu_module_get_function_func_t)(void *, void *, const char *);
-  typedef int (*cuda_cu_module_load_data_ex_func_t)(void *, void *, unsigned int, int *, void **);
-
-  static cuda_cu_init_func_t                      _cuda_cu_init;
-  static cuda_cu_ctx_create_func_t                _cuda_cu_ctx_create;
-  static cuda_cu_ctx_detach_func_t                _cuda_cu_ctx_detach;
-  static cuda_cu_ctx_synchronize_func_t           _cuda_cu_ctx_synchronize;
-  static cuda_cu_device_get_count_func_t          _cuda_cu_device_get_count;
-  static cuda_cu_device_get_name_func_t           _cuda_cu_device_get_name;
-  static cuda_cu_device_get_func_t                _cuda_cu_device_get;
-  static cuda_cu_device_compute_capability_func_t _cuda_cu_device_compute_capability;
-  static cuda_cu_launch_kernel_func_t             _cuda_cu_launch_kernel;
-  static cuda_cu_module_get_function_func_t       _cuda_cu_module_get_function;
-  static cuda_cu_module_load_data_ex_func_t       _cuda_cu_module_load_data_ex;
-
-protected:
-  static void * _device_context;
-};
-
-#endif // GPU_PTX_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/gpu/ptx/vm/gpu_ptx.cpp	Thu Jul 25 22:15:30 2013 -0400
@@ -0,0 +1,316 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/gpu.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/ostream.hpp"
+
+void * gpu::Ptx::_device_context;
+
+gpu::Ptx::cuda_cu_init_func_t gpu::Ptx::_cuda_cu_init;
+gpu::Ptx::cuda_cu_ctx_create_func_t gpu::Ptx::_cuda_cu_ctx_create;
+gpu::Ptx::cuda_cu_ctx_detach_func_t gpu::Ptx::_cuda_cu_ctx_detach;
+gpu::Ptx::cuda_cu_ctx_synchronize_func_t gpu::Ptx::_cuda_cu_ctx_synchronize;
+gpu::Ptx::cuda_cu_device_get_count_func_t gpu::Ptx::_cuda_cu_device_get_count;
+gpu::Ptx::cuda_cu_device_get_name_func_t gpu::Ptx::_cuda_cu_device_get_name;
+gpu::Ptx::cuda_cu_device_get_func_t gpu::Ptx::_cuda_cu_device_get;
+gpu::Ptx::cuda_cu_device_compute_capability_func_t gpu::Ptx::_cuda_cu_device_compute_capability;
+gpu::Ptx::cuda_cu_device_get_attribute_func_t gpu::Ptx::_cuda_cu_device_get_attribute;
+gpu::Ptx::cuda_cu_launch_kernel_func_t gpu::Ptx::_cuda_cu_launch_kernel;
+gpu::Ptx::cuda_cu_module_get_function_func_t gpu::Ptx::_cuda_cu_module_get_function;
+gpu::Ptx::cuda_cu_module_load_data_ex_func_t gpu::Ptx::_cuda_cu_module_load_data_ex;
+
+void gpu::probe_linkage() {
+#if defined(__APPLE__) || defined(LINUX)
+  set_gpu_linkage(gpu::Ptx::probe_linkage());
+#else
+  set_gpu_linkage(false);
+#endif
+}
+
+void gpu::initialize_gpu() {
+  if (gpu::has_gpu_linkage()) {
+    set_initialized(gpu::Ptx::initialize_gpu());
+  }
+}
+
+void * gpu::generate_kernel(unsigned char *code, int code_len, const char *name) {
+  if (gpu::has_gpu_linkage()) {
+    return (gpu::Ptx::generate_kernel(code, code_len, name));
+  } else {
+    return NULL;
+  }
+}
+
+bool gpu::execute_kernel(address kernel) {
+  if (gpu::has_gpu_linkage()) {
+    return (gpu::Ptx::execute_kernel(kernel));
+  } else {
+    return false;
+  }
+}
+
+bool gpu::Ptx::initialize_gpu() {
+
+  /* Initialize CUDA driver API */
+  int status = _cuda_cu_init(0);
+  if (status != GRAAL_CUDA_SUCCESS) {
+    tty->print_cr("Failed to initialize CUDA device");
+    return false;
+  }
+ 
+  if (TraceGPUInteraction) {
+    tty->print_cr("CUDA driver initialization: Success");
+  }
+
+  /* Get the number of compute-capable device count */
+  int device_count = 0;
+  status = _cuda_cu_device_get_count(&device_count);
+  if (status != GRAAL_CUDA_SUCCESS) {
+    tty->print_cr("[CUDA] Failed to get compute-capable device count");
+    return false;
+  }
+
+  if (device_count == 0) {
+    tty->print_cr("[CUDA] Found no device supporting CUDA");
+    return false;
+  }
+
+  if (TraceGPUInteraction) {
+    tty->print_cr("[CUDA] Number of compute-capable devices found: %d", device_count);
+  }
+  
+  /* Get the handle to the first compute device */
+  int device_id = 0;
+  /* Compute-capable device handle */
+  int cu_device = 0;
+  status = _cuda_cu_device_get(&cu_device, device_id);
+
+  if (status != GRAAL_CUDA_SUCCESS) {
+    tty->print_cr("[CUDA] Failed to get handle of first compute-capable device i.e., the one at ordinal: %d", device_id);
+    return false;
+  }
+
+  if (TraceGPUInteraction) {
+    tty->print_cr("[CUDA] Got the handle of first compute-device");
+  }
+
+  /* Get device attributes */
+  int minor, major;
+  status = _cuda_cu_device_get_attribute(&minor, GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cu_device);
+
+  if (status != GRAAL_CUDA_SUCCESS) {
+    tty->print_cr("[CUDA] Failed to get minor attribute of device: %d", cu_device);
+    return false;
+  }
+
+  status = _cuda_cu_device_get_attribute(&major, GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cu_device);
+
+  if (status != GRAAL_CUDA_SUCCESS) {
+    tty->print_cr("[CUDA] Failed to get major attribute of device: %d", cu_device);
+    return false;
+  }
+
+  if (TraceGPUInteraction) {
+    tty->print_cr("[CUDA] Compatibility version of device %d: %d.%d", cu_device, major, minor);
+  }
+
+  /* Get device name */
+  char device_name[256];
+  status = _cuda_cu_device_get_name(device_name, 256, cu_device);
+
+  if (status != GRAAL_CUDA_SUCCESS) {
+    tty->print_cr("[CUDA] Failed to get name of device: %d", cu_device);
+    return false;
+  }
+
+  if (TraceGPUInteraction) {
+    tty->print_cr("[CUDA] Using %s", device_name);
+  }
+
+  /* Create CUDA context */
+  status = _cuda_cu_ctx_create(&_device_context, 0, cu_device);
+
+  if (status != GRAAL_CUDA_SUCCESS) {
+    tty->print_cr("[CUDA] Failed to create CUDA context for device: %d", cu_device);
+    return false;
+  }
+
+  if (TraceGPUInteraction) {
+    tty->print_cr("[CUDA] Success: Created context for device: %d", cu_device);
+  }
+
+  return true;
+}
+
+void *gpu::Ptx::generate_kernel(unsigned char *code, int code_len, const char *name) {
+
+  struct CUmod_st * cu_module;
+  // Use three JIT compiler options
+  const unsigned int jit_num_options = 3;
+  int *jit_options = NEW_C_HEAP_ARRAY(int, jit_num_options, mtCompiler);
+  void **jit_option_values = NEW_C_HEAP_ARRAY(void *, jit_num_options, mtCompiler);
+
+  // Set up PTX JIT compiler options
+  // 1. set size of compilation log buffer
+  int jit_log_buffer_size = 1024;
+  jit_options[0] = GRAAL_CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES;
+  jit_option_values[0] = (void *)(size_t)jit_log_buffer_size;
+
+  // 2. set pointer to compilation log buffer
+  char *jit_log_buffer = NEW_C_HEAP_ARRAY(char, jit_log_buffer_size, mtCompiler);
+  jit_options[1] = GRAAL_CU_JIT_INFO_LOG_BUFFER;
+  jit_option_values[1] = jit_log_buffer;
+
+  // 3. set pointer to set the Maximum # of registers (32) for the kernel
+  int jit_register_count = 32;
+  jit_options[2] = GRAAL_CU_JIT_MAX_REGISTERS;
+  jit_option_values[2] = (void *)(size_t)jit_register_count;
+
+  if (TraceGPUInteraction) {
+    tty->print_cr("[CUDA] PTX Kernel\n%s", code);
+    tty->print_cr("[CUDA] Function name : %s", name);
+
+  }
+
+  /* Load module's data with compiler options */
+  int status = _cuda_cu_module_load_data_ex(&cu_module, code, jit_num_options,
+                                            jit_options, (void **)jit_option_values);
+  if (status != GRAAL_CUDA_SUCCESS) {
+    if (status == GRAAL_CUDA_ERROR_NO_BINARY_FOR_GPU) {
+      tty->print_cr("[CUDA] Check for malformed PTX kernel or incorrect PTX compilation options");
+    }
+    tty->print_cr("[CUDA] *** Error (%d) Failed to load module data with online compiler options for method %s", 
+                  status, name);
+    return NULL;
+  }
+
+  if (TraceGPUInteraction) {
+    tty->print_cr("[CUDA] Loaded data for PTX Kernel");
+  }
+
+  struct CUfunc_st * cu_function;
+
+  status = _cuda_cu_module_get_function(&cu_function, cu_module, name);
+
+  if (status != GRAAL_CUDA_SUCCESS) {
+    tty->print_cr("[CUDA] Failed to get function %s", name);
+    return NULL;
+  }
+
+  if (TraceGPUInteraction) {
+    tty->print_cr("[CUDA] Got function handle for %s", name);
+  }
+  return cu_function;
+}
+
+bool gpu::Ptx::execute_kernel(address kernel) {
+  // grid dimensionality
+  unsigned int gridX = 1;
+  unsigned int gridY = 1;
+  unsigned int gridZ = 1;
+
+  // thread dimensionality
+  unsigned int blockX = 1;
+  unsigned int blockY = 1;
+  unsigned int blockZ = 1;
+  
+  int *cu_function = (int *)kernel;
+
+  if (kernel == NULL) {
+    return false;
+  }
+
+  if (TraceGPUInteraction) {
+    tty->print_cr("[CUDA] launching kernel");
+  }
+  int status = _cuda_cu_launch_kernel(cu_function,
+                                      gridX, gridY, gridZ,
+                                      blockX, blockY, blockZ,
+                                      0, NULL, NULL, NULL);
+  if (status != GRAAL_CUDA_SUCCESS) {
+    tty->print_cr("[CUDA] Failed to launch kernel");
+    return false;
+  }
+
+  if (TraceGPUInteraction) {
+    tty->print_cr("[CUDA] Success: Kernel Launch");
+  }
+  return status == 0;  // GRAAL_CUDA_SUCCESS
+}
+
+#if defined(LINUX)
+static const char cuda_library_name[] = "libcuda.so";
+#elif defined(__APPLE__)
+static char const cuda_library_name[] = "/usr/local/cuda/lib/libcuda.dylib";
+#else
+static char const cuda_library_name[] = "";
+#endif
+
+bool gpu::Ptx::probe_linkage() {
+  if (cuda_library_name != NULL) {
+    void *handle = dlopen(cuda_library_name, RTLD_LAZY);
+    if (handle != NULL) {
+      _cuda_cu_init =
+        CAST_TO_FN_PTR(cuda_cu_init_func_t, dlsym(handle, "cuInit"));
+      _cuda_cu_ctx_create =
+        CAST_TO_FN_PTR(cuda_cu_ctx_create_func_t, dlsym(handle, "cuCtxCreate"));
+      _cuda_cu_ctx_detach =
+        CAST_TO_FN_PTR(cuda_cu_ctx_detach_func_t, dlsym(handle, "cuCtxDetach"));
+      _cuda_cu_ctx_synchronize =
+        CAST_TO_FN_PTR(cuda_cu_ctx_synchronize_func_t, dlsym(handle, "cuCtxSynchronize"));
+      _cuda_cu_device_get_count =
+        CAST_TO_FN_PTR(cuda_cu_device_get_count_func_t, dlsym(handle, "cuDeviceGetCount"));
+      _cuda_cu_device_get_name =
+        CAST_TO_FN_PTR(cuda_cu_device_get_name_func_t, dlsym(handle, "cuDeviceGetName"));
+      _cuda_cu_device_get =
+        CAST_TO_FN_PTR(cuda_cu_device_get_func_t, dlsym(handle, "cuDeviceGet"));
+      _cuda_cu_device_compute_capability =
+        CAST_TO_FN_PTR(cuda_cu_device_compute_capability_func_t, dlsym(handle, "cuDeviceComputeCapability"));
+      _cuda_cu_device_get_attribute =
+        CAST_TO_FN_PTR(cuda_cu_device_get_attribute_func_t, dlsym(handle, "cuDeviceGetAttribute"));
+      _cuda_cu_module_get_function =
+        CAST_TO_FN_PTR(cuda_cu_module_get_function_func_t, dlsym(handle, "cuModuleGetFunction"));
+      _cuda_cu_module_load_data_ex =
+        CAST_TO_FN_PTR(cuda_cu_module_load_data_ex_func_t, dlsym(handle, "cuModuleLoadDataEx"));
+      _cuda_cu_launch_kernel =
+        CAST_TO_FN_PTR(cuda_cu_launch_kernel_func_t, dlsym(handle, "cuLaunchKernel"));
+      if (TraceGPUInteraction) {
+        tty->print_cr("[CUDA] Success: library linkage");
+      }
+      return true;
+    } else {
+      // Unable to dlopen libcuda
+      tty->print_cr("Use LD_LIBRARY_PATH (or other means) to specify installed location of CUDA library");
+      return false;
+    }
+  } else {
+    tty->print_cr("Unsupported CUDA platform");
+    return false;
+  }
+  tty->print_cr("Failed to find CUDA linkage");
+  return false;
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/gpu/ptx/vm/gpu_ptx.hpp	Thu Jul 25 22:15:30 2013 -0400
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef GPU_PTX_HPP
+#define GPU_PTX_HPP
+
+/* 
+ * Some useful macro definitions from publicly available cuda.h.
+ * These definitions are for convenience.
+ */
+#define GRAAL_CUDA_SUCCESS                                   0
+#define GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR  75
+#define GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR  76
+#define GRAAL_CU_JIT_MAX_REGISTERS                           0
+#define GRAAL_CU_JIT_THREADS_PER_BLOCK                       1
+#define GRAAL_CU_JIT_INFO_LOG_BUFFER                         3
+#define GRAAL_CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES              4
+#define GRAAL_CUDA_ERROR_NO_BINARY_FOR_GPU                 209
+
+class Ptx {
+  friend class gpu;
+
+ protected:
+  static bool probe_linkage();
+  static bool initialize_gpu();
+  static void * generate_kernel(unsigned char *code, int code_len, const char *name);
+  static bool execute_kernel(address kernel);
+  
+private:
+  typedef int (*cuda_cu_init_func_t)(unsigned int);
+  typedef int (*cuda_cu_ctx_create_func_t)(void *, int, int);
+  typedef int (*cuda_cu_ctx_detach_func_t)(int *);
+  typedef int (*cuda_cu_ctx_synchronize_func_t)(int *);
+  typedef int (*cuda_cu_device_get_count_func_t)(int *);
+  typedef int (*cuda_cu_device_get_name_func_t)(char *, int, int);
+  typedef int (*cuda_cu_device_get_func_t)(int *, int);
+  typedef int (*cuda_cu_device_compute_capability_func_t)(int *, int *, int);
+  typedef int (*cuda_cu_device_get_attribute_func_t)(int *, int, int);
+  typedef int (*cuda_cu_launch_kernel_func_t)(void *,
+                                              unsigned int, unsigned int, unsigned int,
+                                              unsigned int, unsigned int, unsigned int,
+                                              unsigned int, void *, void **, void **);
+  typedef int (*cuda_cu_module_get_function_func_t)(void *, void *, const char *);
+  typedef int (*cuda_cu_module_load_data_ex_func_t)(void *, void *, unsigned int, void *, void **);
+
+  static cuda_cu_init_func_t                      _cuda_cu_init;
+  static cuda_cu_ctx_create_func_t                _cuda_cu_ctx_create;
+  static cuda_cu_ctx_detach_func_t                _cuda_cu_ctx_detach;
+  static cuda_cu_ctx_synchronize_func_t           _cuda_cu_ctx_synchronize;
+  static cuda_cu_device_get_count_func_t          _cuda_cu_device_get_count;
+  static cuda_cu_device_get_name_func_t           _cuda_cu_device_get_name;
+  static cuda_cu_device_get_func_t                _cuda_cu_device_get;
+  static cuda_cu_device_compute_capability_func_t _cuda_cu_device_compute_capability; /* Deprecated as of CUDA 5.0 */
+  static cuda_cu_device_get_attribute_func_t      _cuda_cu_device_get_attribute;
+  static cuda_cu_launch_kernel_func_t             _cuda_cu_launch_kernel;
+  static cuda_cu_module_get_function_func_t       _cuda_cu_module_get_function;
+  static cuda_cu_module_load_data_ex_func_t       _cuda_cu_module_load_data_ex;
+
+protected:
+  static void * _device_context;
+};
+#endif // GPU_PTX_HPP
--- a/src/os/bsd/vm/gpu_bsd.cpp	Thu Jul 25 18:02:29 2013 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "runtime/gpu.hpp"
-#include "utilities/ostream.hpp"
-
-#ifdef __APPLE__
-#include <ApplicationServices/ApplicationServices.h>
-#include <IOKit/IOKitLib.h>
-#endif
-
-void gpu::probe_gpu() {
-#ifdef __APPLE__
-  set_available(gpu::Bsd::probe_gpu_apple());
-  if (TraceWarpLoading) {
-    tty->print_cr("gpu_bsd::probe_gpu(APPLE): %d", gpu::is_available());
-  }
-#else
-  if (TraceWarpLoading) {
-    tty->print_cr("gpu_bsd::probe_gpu(not APPLE)");
-  }
-  set_available(false);
-#endif
-}
-
-#ifdef __APPLE__
-/*
- * This is rudimentary at best, but until we decide on a CUDA Compiler Compatibility
- * level, this will have to suffice.
- */
-bool gpu::Bsd::probe_gpu_apple() {
-  CGError             err = CGDisplayNoErr;
-  CGDisplayCount      displayCount = 0;
-  CFDataRef           vendorID, deviceID, model;
-  CGDirectDisplayID   *displays;
-  IOOptionBits        options = kIORegistryIterateRecursively | kIORegistryIterateParents;
-  io_registry_entry_t displayPort;
-
-  err = CGGetActiveDisplayList(0, NULL, &displayCount);
-  displays = (CGDirectDisplayID *)calloc((size_t)displayCount, sizeof(CGDirectDisplayID));
-  err = CGGetActiveDisplayList(displayCount, displays, &displayCount);
-
-  for (CGDisplayCount i = 0; i < displayCount; i++) {
-	displayPort = CGDisplayIOServicePort(displays[i]);
-	vendorID = (CFDataRef)IORegistryEntrySearchCFProperty(displayPort, kIOServicePlane, CFSTR("vendor-id"),
-                                               kCFAllocatorDefault, options);
-	deviceID = (CFDataRef)IORegistryEntrySearchCFProperty(displayPort, kIOServicePlane, CFSTR("device-id"),
-                                               kCFAllocatorDefault, options);
-	model = (CFDataRef)IORegistryEntrySearchCFProperty(displayPort, kIOServicePlane, CFSTR("model"),
-                                            kCFAllocatorDefault, options);
-    if (TraceWarpLoading) {
-      tty->print_cr("vendor: 0x%08X", *((UInt32*)CFDataGetBytePtr(vendorID)));
-      tty->print_cr("device: 0x%08X", *((UInt32*)CFDataGetBytePtr(deviceID)));
-      tty->print_cr("model: %s", CFDataGetBytePtr(model));
-    }
-    UInt32 vendor = *((UInt32*)CFDataGetBytePtr(vendorID));
-    if (vendor != 0x10DE) {
-      return false;
-    } else {
-      /*
-       * see https://developer.nvidia.com/cuda-gpus
-       * see http://en.wikipedia.org/wiki/CUDA#Supported_GPUs
-       * see http://www.pcidatabase.com/reports.php?type=csv
-       *
-       * Only supporting GK104, GK106, GK107 and GK110 GPUs for now,
-       * which is CUDA Computer Capability 3.0 and greater.
-       */
-      switch (*((UInt32*)CFDataGetBytePtr(deviceID))) {
-        case 0x11C0:
-          return true;  // NVIDIA GeForce GTX 660
-        default:
-          return false;
-      }
-    }
-  }
-  return false;
-}
-#endif
--- a/src/os/bsd/vm/gpu_bsd.hpp	Thu Jul 25 18:02:29 2013 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef OS_BSD_VM_GPU_BSD_HPP
-#define OS_BSD_VM_GPU_BSD_HPP
-
-
-class Bsd {
-  friend class gpu;
-
- protected:
-  static bool probe_gpu();
-#ifdef __APPLE__
-  static bool probe_gpu_apple();
-#endif
-};
-
-#endif // OS_BSD_VM_GPU_BSD_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os_gpu/bsd_ptx/vm/gpu_bsd.cpp	Thu Jul 25 22:15:30 2013 -0400
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "runtime/gpu.hpp"
+#include "utilities/ostream.hpp"
+
+#ifdef __APPLE__
+#include <ApplicationServices/ApplicationServices.h>
+#include <IOKit/IOKitLib.h>
+#endif
+
+void gpu::probe_gpu() {
+#ifdef __APPLE__
+  set_available(gpu::Bsd::probe_gpu_apple());
+  if (TraceGPUInteraction) {
+    tty->print_cr("gpu_bsd::probe_gpu(APPLE): %d", gpu::is_available());
+  }
+#else
+  if (TraceGPUInteraction) {
+    tty->print_cr("gpu_bsd::probe_gpu(not APPLE)");
+  }
+  set_available(false);
+#endif
+}
+
+#ifdef __APPLE__
+/*
+ * This is rudimentary at best, but until we decide on a CUDA Compiler Compatibility
+ * level, this will have to suffice.
+ */
+bool gpu::Bsd::probe_gpu_apple() {
+  CGError             err = CGDisplayNoErr;
+  CGDisplayCount      displayCount = 0;
+  CFDataRef           vendorID, deviceID, model;
+  CGDirectDisplayID   *displays;
+  IOOptionBits        options = kIORegistryIterateRecursively | kIORegistryIterateParents;
+  io_registry_entry_t displayPort;
+
+  err = CGGetActiveDisplayList(0, NULL, &displayCount);
+  displays = (CGDirectDisplayID *)calloc((size_t)displayCount, sizeof(CGDirectDisplayID));
+  err = CGGetActiveDisplayList(displayCount, displays, &displayCount);
+
+  for (CGDisplayCount i = 0; i < displayCount; i++) {
+	displayPort = CGDisplayIOServicePort(displays[i]);
+	vendorID = (CFDataRef)IORegistryEntrySearchCFProperty(displayPort, kIOServicePlane, CFSTR("vendor-id"),
+                                               kCFAllocatorDefault, options);
+	deviceID = (CFDataRef)IORegistryEntrySearchCFProperty(displayPort, kIOServicePlane, CFSTR("device-id"),
+                                               kCFAllocatorDefault, options);
+	model = (CFDataRef)IORegistryEntrySearchCFProperty(displayPort, kIOServicePlane, CFSTR("model"),
+                                            kCFAllocatorDefault, options);
+    if (TraceGPUInteraction) {
+      tty->print_cr("vendor: 0x%08X", *((UInt32*)CFDataGetBytePtr(vendorID)));
+      tty->print_cr("device: 0x%08X", *((UInt32*)CFDataGetBytePtr(deviceID)));
+      tty->print_cr("model: %s", CFDataGetBytePtr(model));
+    }
+    UInt32 vendor = *((UInt32*)CFDataGetBytePtr(vendorID));
+    if (vendor != 0x10DE) {
+      return false;
+    } else {
+      /*
+       * see https://developer.nvidia.com/cuda-gpus
+       * see http://en.wikipedia.org/wiki/CUDA#Supported_GPUs
+       * see http://www.pcidatabase.com/reports.php?type=csv
+       *
+       * Only supporting GK104, GK106, GK107 and GK110 GPUs for now,
+       * which is CUDA Computer Capability 3.0 and greater.
+       */
+      switch (*((UInt32*)CFDataGetBytePtr(deviceID))) {
+        case 0x11C0:
+          return true;  // NVIDIA GeForce GTX 660
+        default:
+          return false;
+      }
+    }
+  }
+  return false;
+}
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os_gpu/bsd_ptx/vm/gpu_bsd.hpp	Thu Jul 25 22:15:30 2013 -0400
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_BSD_VM_GPU_BSD_HPP
+#define OS_BSD_VM_GPU_BSD_HPP
+
+
+class Bsd {
+  friend class gpu;
+
+ protected:
+  static bool probe_gpu();
+#ifdef __APPLE__
+  static bool probe_gpu_apple();
+#endif
+};
+
+#endif // OS_BSD_VM_GPU_BSD_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os_gpu/linux_ptx/vm/gpu_linux.cpp	Thu Jul 25 22:15:30 2013 -0400
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "runtime/gpu.hpp"
+#include "utilities/ostream.hpp"
+
+void gpu::probe_gpu() {
+  set_available(gpu::Linux::probe_gpu());
+  if (TraceGPUInteraction) {
+    tty->print_cr("gpu_linux::probe_gpu(): %d", gpu::is_available());
+  }
+}
+
+/*
+ * Probe for CUDA device on PCI bus using /proc/bus/pci/devices. Do
+ * not rely on CUDA tool kit being installed. We will check if CUDA
+ * library is installed later.
+ */
+
+static unsigned int nvidia_vendor_id = 0x10de;
+static unsigned int nvidia_gk110_dev_id = 0x1005;
+
+bool gpu::Linux::probe_gpu() {
+  /* 
+     Open /proc/bus/pci/devices to look for the first CUDA enabled
+     device. For now, finding the first CUDA device. Will need to
+     revisit this wo support execution on multiple CUDA devices if
+     they exist.
+  */
+  FILE *pci_devices = fopen("/proc/bus/pci/devices", "r");
+  char contents[4096];
+  unsigned int bus_num_devfn_ign;
+  unsigned int vendor;
+  unsigned int device;
+  bool cuda_device_exists = false;
+  if (pci_devices == NULL) {
+    tty->print_cr("*** Failed to open /proc/bus/pci/devices");
+    return cuda_device_exists;
+  }
+
+  while (fgets(contents, sizeof(contents)-1, pci_devices)) {
+    sscanf(contents, "%04x%04x%04x", &bus_num_devfn_ign, &vendor, &device);
+    /* Break after finding the first CUDA device. */
+    if ((vendor == nvidia_vendor_id) && (device = nvidia_gk110_dev_id)) {
+      cuda_device_exists = true;
+      if (TraceGPUInteraction) {
+        tty->print_cr("Found supported nVidia CUDA device vendor : 0x%04x device 0x%04x", vendor, device);
+      }
+      break;
+    }
+  }
+
+  // Close file pointer.
+  fclose(pci_devices);
+
+  return cuda_device_exists;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os_gpu/linux_ptx/vm/gpu_linux.hpp	Thu Jul 25 22:15:30 2013 -0400
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_BSD_VM_GPU_LINUX_HPP
+#define OS_BSD_VM_GPU_LINUX_HPP
+
+
+class Linux {
+  friend class gpu;
+
+ protected:
+  static bool probe_gpu();
+};
+
+#endif // OS_BSD_VM_GPU_LINUX_HPP
--- a/src/share/vm/code/nmethod.cpp	Thu Jul 25 18:02:29 2013 -0700
+++ b/src/share/vm/code/nmethod.cpp	Thu Jul 25 22:15:30 2013 -0400
@@ -950,7 +950,7 @@
 
     // we use the information of entry points to find out if a method is
     // static or non static
-    assert(compiler->is_c2() ||
+    assert(compiler->is_c2() || compiler->is_graal() ||
            _method->is_static() == (entry_point() == _verified_entry_point),
            " entry points must be same for static methods and vice versa");
   }
--- a/src/share/vm/graal/graalCompilerToGPU.cpp	Thu Jul 25 18:02:29 2013 -0700
+++ b/src/share/vm/graal/graalCompilerToGPU.cpp	Thu Jul 25 22:15:30 2013 -0400
@@ -47,7 +47,7 @@
 #define C2V_END }
 
 
-C2V_VMENTRY(jlong, generateKernel, (JNIEnv *env, jobject, jbyteArray code, jstring name))
+C2V_ENTRY(jlong, generateKernel, (JNIEnv *env, jobject, jbyteArray code, jstring name))
   if (gpu::is_available() == false || gpu::has_gpu_linkage() == false && gpu::is_initialized()) {
     tty->print_cr("generateKernel - not available / no linkage / not initialized");
     return 0;
@@ -57,7 +57,12 @@
   jint len = env->GetArrayLength(code);
   const char *namestr = env->GetStringUTFChars(name, &is_copy);
   void *kernel = gpu::generate_kernel((unsigned char *)bytes, len, namestr);
-  tty->print_cr("generateKernel: %x", kernel);
+  if (kernel == NULL) {
+    tty->print_cr("[CUDA] *** Error: Failed to compile kernel");
+  }
+  else if (TraceGPUInteraction) {
+    tty->print_cr("[CUDA] Generated kernel");
+  }
   env->ReleaseByteArrayElements(code, bytes, 0);
   env->ReleaseStringUTFChars(name, namestr);
 
--- a/src/share/vm/graal/graalCompilerToVM.cpp	Thu Jul 25 18:02:29 2013 -0700
+++ b/src/share/vm/graal/graalCompilerToVM.cpp	Thu Jul 25 22:15:30 2013 -0400
@@ -946,7 +946,7 @@
       HotSpotInstalledCode::set_codeBlob(installed_code_handle, (jlong) cb);
       oop comp_result = HotSpotCompiledCode::comp(compiled_code_handle);
       if (comp_result->is_a(ExternalCompilationResult::klass())) {
-        if (TraceWarpLoading) {
+        if (TraceGPUInteraction) {
           tty->print_cr("installCode0: ExternalCompilationResult");
         }
         HotSpotInstalledCode::set_start(installed_code_handle, ExternalCompilationResult::entryPoint(comp_result));
--- a/src/share/vm/runtime/globals.hpp	Thu Jul 25 18:02:29 2013 -0700
+++ b/src/share/vm/runtime/globals.hpp	Thu Jul 25 22:15:30 2013 -0400
@@ -3709,7 +3709,7 @@
   product(bool , AllowNonVirtualCalls, false,                               \
           "Obey the ACC_SUPER flag and allow invokenonvirtual calls")       \
                                                                             \
-  product(bool, TraceWarpLoading, false,                                    \
+  product(bool, TraceGPUInteraction, false,                                    \
           "Trace external GPU warp loading")                                \
                                                                             \
   diagnostic(ccstr, SharedArchiveFile, NULL,                                \
--- a/src/share/vm/runtime/gpu.cpp	Thu Jul 25 18:02:29 2013 -0700
+++ b/src/share/vm/runtime/gpu.cpp	Thu Jul 25 22:15:30 2013 -0400
@@ -24,17 +24,15 @@
 
 #include "precompiled.hpp"
 #include "runtime/gpu.hpp"
-#include "ptx/gpu_ptx.hpp"
 
 bool gpu::_available = false;   // does the hardware exist?
 bool gpu::_gpu_linkage = false; // is the driver library to access the GPU installed
-bool gpu::_initialized = false; // is the GPU defvice initialized
+bool gpu::_initialized = false; // is the GPU device initialized
 
 void gpu::init() {
-#ifdef TARGET_OS_FAMILY_bsd
+#if defined(TARGET_OS_FAMILY_bsd) || defined(TARGET_OS_FAMILY_linux)
   gpu::probe_gpu();
 #endif
   // need multi-gpu TARGET ifdef
   gpu::probe_linkage();
 }
-
--- a/src/share/vm/runtime/gpu.hpp	Thu Jul 25 18:02:29 2013 -0700
+++ b/src/share/vm/runtime/gpu.hpp	Thu Jul 25 22:15:30 2013 -0400
@@ -70,6 +70,7 @@
 
   // Platform dependent stuff
 #ifdef TARGET_OS_FAMILY_linux
+# include "gpu_linux.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_solaris
 #endif
@@ -79,7 +80,7 @@
 # include "gpu_bsd.hpp"
 #endif
 
-# include "ptx/gpu_ptx.hpp"
+# include "ptx/vm/gpu_ptx.hpp"
 
 };
 
--- a/src/share/vm/runtime/thread.cpp	Thu Jul 25 18:02:29 2013 -0700
+++ b/src/share/vm/runtime/thread.cpp	Thu Jul 25 22:15:30 2013 -0400
@@ -3312,7 +3312,8 @@
   // Initialize the os module before using TLS
   os::init();
 
-  // probe for warp capability
+  // Probe for existance of supported GPU and initialize it if one
+  // exists.
   gpu::init();
 
   // Initialize system properties.