Mercurial > hg > truffle

--- a/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotBackend.java	Thu Feb 06 00:21:10 2014 -0800
+++ b/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotBackend.java	Thu Feb 06 11:14:19 2014 +0100
@@ -141,7 +141,7 @@
     }

     /**
-     * Gets the address of {@code gpu::Ptx::execute_kernel_from_vm()}.
+     * Gets the address of {@code Ptx::execute_kernel_from_vm()}.
      */
     private static native long getLaunchKernelAddress();
--- a/src/gpu/hsail/vm/gpu_hsail.cpp	Thu Feb 06 00:21:10 2014 -0800
+++ b/src/gpu/hsail/vm/gpu_hsail.cpp	Thu Feb 06 11:14:19 2014 +0100
@@ -25,6 +25,7 @@
 #include "precompiled.hpp"
 #include "runtime/javaCalls.hpp"
 #include "runtime/gpu.hpp"
+#include "hsail/vm/gpu_hsail.hpp"
 #include "utilities/globalDefinitions.hpp"
 #include "utilities/ostream.hpp"
 #include "memory/allocation.hpp"
@@ -55,29 +56,29 @@

 //  public native void executeKernel(HotSpotNmethod kernel, int jobSize, int i, int j, Object[] args) throws InvalidInstalledCodeException;

-JNINativeMethod gpu::Hsail::HSAIL_methods[] = {
-  {CC"initialize",       CC"()Z",                               FN_PTR(gpu::Hsail::initialize)},
-  {CC"generateKernel",   CC"([B" STRING ")J",                   FN_PTR(gpu::Hsail::generate_kernel)},
-  {CC"executeKernel0",   CC"("HS_INSTALLED_CODE"I["OBJECT")Z",  FN_PTR(gpu::Hsail::execute_kernel_void_1d)},
+JNINativeMethod Hsail::HSAIL_methods[] = {
+  {CC"initialize",       CC"()Z",                               FN_PTR(Hsail::initialize)},
+  {CC"generateKernel",   CC"([B" STRING ")J",                   FN_PTR(Hsail::generate_kernel)},
+  {CC"executeKernel0",   CC"("HS_INSTALLED_CODE"I["OBJECT")Z",  FN_PTR(Hsail::execute_kernel_void_1d)},
 };

-void * gpu::Hsail::_device_context = NULL;
+void * Hsail::_device_context = NULL;

-gpu::Hsail::okra_create_context_func_t  gpu::Hsail::_okra_create_context;
-gpu::Hsail::okra_create_kernel_func_t   gpu::Hsail::_okra_create_kernel;
-gpu::Hsail::okra_push_object_func_t     gpu::Hsail::_okra_push_object;
-gpu::Hsail::okra_push_boolean_func_t    gpu::Hsail::_okra_push_boolean;
-gpu::Hsail::okra_push_byte_func_t       gpu::Hsail::_okra_push_byte;
-gpu::Hsail::okra_push_double_func_t     gpu::Hsail::_okra_push_double;
-gpu::Hsail::okra_push_float_func_t      gpu::Hsail::_okra_push_float;
-gpu::Hsail::okra_push_int_func_t        gpu::Hsail::_okra_push_int;
-gpu::Hsail::okra_push_long_func_t       gpu::Hsail::_okra_push_long;
-gpu::Hsail::okra_execute_with_range_func_t    gpu::Hsail::_okra_execute_with_range;
-gpu::Hsail::okra_clearargs_func_t       gpu::Hsail::_okra_clearargs;
-gpu::Hsail::okra_register_heap_func_t   gpu::Hsail::_okra_register_heap;
+Hsail::okra_create_context_func_t  Hsail::_okra_create_context;
+Hsail::okra_create_kernel_func_t   Hsail::_okra_create_kernel;
+Hsail::okra_push_object_func_t     Hsail::_okra_push_object;
+Hsail::okra_push_boolean_func_t    Hsail::_okra_push_boolean;
+Hsail::okra_push_byte_func_t       Hsail::_okra_push_byte;
+Hsail::okra_push_double_func_t     Hsail::_okra_push_double;
+Hsail::okra_push_float_func_t      Hsail::_okra_push_float;
+Hsail::okra_push_int_func_t        Hsail::_okra_push_int;
+Hsail::okra_push_long_func_t       Hsail::_okra_push_long;
+Hsail::okra_execute_with_range_func_t    Hsail::_okra_execute_with_range;
+Hsail::okra_clearargs_func_t       Hsail::_okra_clearargs;
+Hsail::okra_register_heap_func_t   Hsail::_okra_register_heap;


-void gpu::Hsail::register_heap() {
+void Hsail::register_heap() {
   // After the okra functions are set up and the heap is initialized, register the java heap with HSA
   guarantee(Universe::heap() != NULL, "heap should be there by now.");
   if (TraceGPUInteraction) {
@@ -87,7 +88,7 @@
   _okra_register_heap(Universe::heap()->base(), Universe::heap()->capacity());
 }

-GPU_VMENTRY(jboolean, gpu::Hsail::execute_kernel_void_1d, (JNIEnv* env, jclass, jobject kernel_handle, jint dimX, jobject args_handle))
+GPU_VMENTRY(jboolean, Hsail::execute_kernel_void_1d, (JNIEnv* env, jclass, jobject kernel_handle, jint dimX, jobject args_handle))

   ResourceMark rm;
   jlong nmethodValue = HotSpotInstalledCode::codeBlob(kernel_handle);
@@ -115,7 +116,7 @@
   return _okra_execute_with_range(kernel, dimX);
 GPU_END

-GPU_ENTRY(jlong, gpu::Hsail::generate_kernel, (JNIEnv *env, jclass, jbyteArray code_handle, jstring name_handle))
+GPU_ENTRY(jlong, Hsail::generate_kernel, (JNIEnv *env, jclass, jbyteArray code_handle, jstring name_handle))
   guarantee(_okra_create_kernel != NULL, "[HSAIL] Okra not linked");
   ResourceMark rm;
   jsize name_len = env->GetStringLength(name_handle);
@@ -158,7 +159,7 @@
         return false; \
   } \

-GPU_ENTRY(jboolean, gpu::Hsail::initialize, (JNIEnv *env, jclass))
+GPU_ENTRY(jboolean, Hsail::initialize, (JNIEnv *env, jclass))
   if (okra_library_name == NULL) {
     if (TraceGPUInteraction) {
       tty->print_cr("Unsupported HSAIL platform");
@@ -211,7 +212,7 @@
   return true;
 GPU_END

-bool gpu::Hsail::register_natives(JNIEnv* env) {
+bool Hsail::register_natives(JNIEnv* env) {
   jclass klass = env->FindClass("com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend");
   if (klass == NULL) {
     if (TraceGPUInteraction) {
--- a/src/gpu/hsail/vm/hsailKernelArguments.cpp	Thu Feb 06 00:21:10 2014 -0800
+++ b/src/gpu/hsail/vm/hsailKernelArguments.cpp	Thu Feb 06 11:14:19 2014 +0100
@@ -46,7 +46,7 @@
   jvalue jValue;
   java_lang_boxing_object::get_value(arg, &jValue);

-  bool pushed = gpu::Hsail::_okra_push_boolean(_kernel, jValue.z);
+  bool pushed = Hsail::_okra_push_boolean(_kernel, jValue.z);
   assert(pushed == true, "arg push failed");
 }

@@ -58,7 +58,7 @@
   jvalue jValue;
   java_lang_boxing_object::get_value(arg, &jValue);

-  bool pushed = gpu::Hsail::_okra_push_byte(_kernel, jValue.b);
+  bool pushed = Hsail::_okra_push_byte(_kernel, jValue.b);
   assert(pushed == true, "arg push failed");
 }

@@ -72,7 +72,7 @@
   if (TraceGPUInteraction) {
     tty->print_cr("[HSAIL] HSAILKernelArguments::double value = %e", jValue.d);
   }
-  bool pushed = gpu::Hsail::_okra_push_double(_kernel, jValue.d);
+  bool pushed = Hsail::_okra_push_double(_kernel, jValue.d);
   assert(pushed == true, "arg push failed");
 }

@@ -86,7 +86,7 @@
   if (TraceGPUInteraction) {
     tty->print_cr("[HSAIL] HSAILKernelArguments::float value = %f", jValue.f);
   }
-  bool pushed = gpu::Hsail::_okra_push_float(_kernel, jValue.f);
+  bool pushed = Hsail::_okra_push_float(_kernel, jValue.f);
   assert(pushed == true, "float push failed");
 }

@@ -107,7 +107,7 @@
   jvalue jValue;
   java_lang_boxing_object::get_value(arg, &jValue);

-  bool pushed = gpu::Hsail::_okra_push_int(_kernel, jValue.i);
+  bool pushed = Hsail::_okra_push_int(_kernel, jValue.i);
   assert(pushed == true, "arg push failed");
 }

@@ -119,7 +119,7 @@
   jvalue jValue;
   java_lang_boxing_object::get_value(arg, &jValue);

-  bool pushed = gpu::Hsail::_okra_push_long(_kernel, jValue.j);
+  bool pushed = Hsail::_okra_push_long(_kernel, jValue.j);
   assert(pushed == true, "arg push failed");
 }

@@ -130,7 +130,7 @@
     tty->print_cr("[HSAIL] HSAILKernelArguments::do_array 0x%08x, is a %s", (address) arg, arg->klass()->external_name());
   }

-  bool pushed = gpu::Hsail::_okra_push_object(_kernel, arg);
+  bool pushed = Hsail::_okra_push_object(_kernel, arg);
   assert(pushed == true, "arg push failed");
 }

@@ -153,7 +153,7 @@
     tty->print_cr("[HSAIL] HSAILKernelArguments::do_object, 0x%08x is a %s", (address) arg, arg->klass()->external_name());
   }

-  bool pushed = gpu::Hsail::_okra_push_object(_kernel, arg);
+  bool pushed = Hsail::_okra_push_object(_kernel, arg);
   assert(pushed == true, "arg push failed");
 }
--- a/src/gpu/hsail/vm/hsailKernelArguments.hpp	Thu Feb 06 00:21:10 2014 -0800
+++ b/src/gpu/hsail/vm/hsailKernelArguments.hpp	Thu Feb 06 11:14:19 2014 +0100
@@ -26,10 +26,11 @@
 #define KERNEL_ARGUMENTS_HSAIL_HPP

 #include "runtime/gpu.hpp"
+#include "hsail/vm/gpu_hsail.hpp"
 #include "runtime/signature.hpp"

 class HSAILKernelArguments : public SignatureIterator {
-  friend class gpu::Hsail;
+  friend class Hsail;

 public:

@@ -71,7 +72,7 @@
       if (TraceGPUInteraction) {
         tty->print_cr("[HSAIL] instance method, this 0x%08x, is a %s", (address) arg, arg->klass()->external_name());
       }
-      bool pushed = gpu::Hsail::_okra_push_object(kernel, arg);
+      bool pushed = Hsail::_okra_push_object(kernel, arg);
       assert(pushed == true, "'this' push failed");
     } else {
       if (TraceGPUInteraction) {
--- a/src/gpu/ptx/vm/gpu_ptx.cpp	Thu Feb 06 00:21:10 2014 -0800
+++ b/src/gpu/ptx/vm/gpu_ptx.cpp	Thu Feb 06 11:14:19 2014 +0100
@@ -25,6 +25,7 @@
 #include "precompiled.hpp"
 #include "runtime/javaCalls.hpp"
 #include "runtime/gpu.hpp"
+#include "ptx/vm/gpu_ptx.hpp"
 #include "utilities/globalDefinitions.hpp"
 #include "utilities/ostream.hpp"
 #include "memory/allocation.hpp"
@@ -47,12 +48,14 @@
 // Entry to GPU native method implementation that transitions current thread to '_thread_in_vm'.
 #define GPU_VMENTRY(result_type, name, signature) \
   JNIEXPORT result_type JNICALL name signature { \
+  if (TraceGPUInteraction) tty->print_cr("[CUDA] Ptx::" #name); \
   GRAAL_VM_ENTRY_MARK; \

 // Entry to GPU native method implementation that calls a JNI function
 // and hence cannot transition current thread to '_thread_in_vm'.
 #define GPU_ENTRY(result_type, name, signature) \
   JNIEXPORT result_type JNICALL name signature { \
+  if (TraceGPUInteraction) tty->print_cr("[CUDA] Ptx::" #name); \

 #define GPU_END }

@@ -61,37 +64,37 @@

 #define STRING                "Ljava/lang/String;"

-JNINativeMethod gpu::Ptx::PTX_methods[] = {
-  {CC"initialize",              CC"()Z",               FN_PTR(gpu::Ptx::initialize)},
-  {CC"generateKernel",          CC"([B" STRING ")J",   FN_PTR(gpu::Ptx::generate_kernel)},
-  {CC"getLaunchKernelAddress",  CC"()J",               FN_PTR(gpu::Ptx::get_execute_kernel_from_vm_address)},
-  {CC"getAvailableProcessors0", CC"()I",               FN_PTR(gpu::Ptx::get_total_cores)},
+JNINativeMethod Ptx::PTX_methods[] = {
+  {CC"initialize",              CC"()Z",               FN_PTR(Ptx::initialize)},
+  {CC"generateKernel",          CC"([B" STRING ")J",   FN_PTR(Ptx::generate_kernel)},
+  {CC"getLaunchKernelAddress",  CC"()J",               FN_PTR(Ptx::get_execute_kernel_from_vm_address)},
+  {CC"getAvailableProcessors0", CC"()I",               FN_PTR(Ptx::get_total_cores)},
 };

-void * gpu::Ptx::_device_context;
-int    gpu::Ptx::_cu_device = 0;
+void * Ptx::_device_context;
+int    Ptx::_cu_device = 0;

-gpu::Ptx::cuda_cu_init_func_t gpu::Ptx::_cuda_cu_init;
-gpu::Ptx::cuda_cu_ctx_create_func_t gpu::Ptx::_cuda_cu_ctx_create;
-gpu::Ptx::cuda_cu_ctx_destroy_func_t gpu::Ptx::_cuda_cu_ctx_destroy;
-gpu::Ptx::cuda_cu_ctx_synchronize_func_t gpu::Ptx::_cuda_cu_ctx_synchronize;
-gpu::Ptx::cuda_cu_ctx_get_current_func_t gpu::Ptx::_cuda_cu_ctx_get_current;
-gpu::Ptx::cuda_cu_ctx_set_current_func_t gpu::Ptx::_cuda_cu_ctx_set_current;
-gpu::Ptx::cuda_cu_device_get_count_func_t gpu::Ptx::_cuda_cu_device_get_count;
-gpu::Ptx::cuda_cu_device_get_name_func_t gpu::Ptx::_cuda_cu_device_get_name;
-gpu::Ptx::cuda_cu_device_get_func_t gpu::Ptx::_cuda_cu_device_get;
-gpu::Ptx::cuda_cu_device_compute_capability_func_t gpu::Ptx::_cuda_cu_device_compute_capability;
-gpu::Ptx::cuda_cu_device_get_attribute_func_t gpu::Ptx::_cuda_cu_device_get_attribute;
-gpu::Ptx::cuda_cu_launch_kernel_func_t gpu::Ptx::_cuda_cu_launch_kernel;
-gpu::Ptx::cuda_cu_module_get_function_func_t gpu::Ptx::_cuda_cu_module_get_function;
-gpu::Ptx::cuda_cu_module_load_data_ex_func_t gpu::Ptx::_cuda_cu_module_load_data_ex;
-gpu::Ptx::cuda_cu_memcpy_htod_func_t gpu::Ptx::_cuda_cu_memcpy_htod;
-gpu::Ptx::cuda_cu_memcpy_dtoh_func_t gpu::Ptx::_cuda_cu_memcpy_dtoh;
-gpu::Ptx::cuda_cu_memalloc_func_t gpu::Ptx::_cuda_cu_memalloc;
-gpu::Ptx::cuda_cu_memfree_func_t gpu::Ptx::_cuda_cu_memfree;
-gpu::Ptx::cuda_cu_mem_host_register_func_t gpu::Ptx::_cuda_cu_mem_host_register;
-gpu::Ptx::cuda_cu_mem_host_get_device_pointer_func_t gpu::Ptx::_cuda_cu_mem_host_get_device_pointer;
-gpu::Ptx::cuda_cu_mem_host_unregister_func_t gpu::Ptx::_cuda_cu_mem_host_unregister;
+Ptx::cuda_cu_init_func_t Ptx::_cuda_cu_init;
+Ptx::cuda_cu_ctx_create_func_t Ptx::_cuda_cu_ctx_create;
+Ptx::cuda_cu_ctx_destroy_func_t Ptx::_cuda_cu_ctx_destroy;
+Ptx::cuda_cu_ctx_synchronize_func_t Ptx::_cuda_cu_ctx_synchronize;
+Ptx::cuda_cu_ctx_get_current_func_t Ptx::_cuda_cu_ctx_get_current;
+Ptx::cuda_cu_ctx_set_current_func_t Ptx::_cuda_cu_ctx_set_current;
+Ptx::cuda_cu_device_get_count_func_t Ptx::_cuda_cu_device_get_count;
+Ptx::cuda_cu_device_get_name_func_t Ptx::_cuda_cu_device_get_name;
+Ptx::cuda_cu_device_get_func_t Ptx::_cuda_cu_device_get;
+Ptx::cuda_cu_device_compute_capability_func_t Ptx::_cuda_cu_device_compute_capability;
+Ptx::cuda_cu_device_get_attribute_func_t Ptx::_cuda_cu_device_get_attribute;
+Ptx::cuda_cu_launch_kernel_func_t Ptx::_cuda_cu_launch_kernel;
+Ptx::cuda_cu_module_get_function_func_t Ptx::_cuda_cu_module_get_function;
+Ptx::cuda_cu_module_load_data_ex_func_t Ptx::_cuda_cu_module_load_data_ex;
+Ptx::cuda_cu_memcpy_htod_func_t Ptx::_cuda_cu_memcpy_htod;
+Ptx::cuda_cu_memcpy_dtoh_func_t Ptx::_cuda_cu_memcpy_dtoh;
+Ptx::cuda_cu_memalloc_func_t Ptx::_cuda_cu_memalloc;
+Ptx::cuda_cu_memfree_func_t Ptx::_cuda_cu_memfree;
+Ptx::cuda_cu_mem_host_register_func_t Ptx::_cuda_cu_mem_host_register;
+Ptx::cuda_cu_mem_host_get_device_pointer_func_t Ptx::_cuda_cu_mem_host_get_device_pointer;
+Ptx::cuda_cu_mem_host_unregister_func_t Ptx::_cuda_cu_mem_host_unregister;

 #define STRINGIFY(x)     #x

@@ -108,7 +111,7 @@
 /*
  * see http://en.wikipedia.org/wiki/CUDA#Supported_GPUs
  */
-int gpu::Ptx::ncores(int major, int minor) {
+int Ptx::ncores(int major, int minor) {
     int device_type = (major << 4) + minor;

     switch (device_type) {
@@ -126,7 +129,7 @@
     }
 }

-bool gpu::Ptx::register_natives(JNIEnv* env) {
+bool Ptx::register_natives(JNIEnv* env) {
   jclass klass = env->FindClass("com/oracle/graal/hotspot/ptx/PTXHotSpotBackend");
   if (klass == NULL) {
     if (TraceGPUInteraction) {
@@ -136,7 +139,7 @@
   }
   jint status = env->RegisterNatives(klass, PTX_methods, sizeof(PTX_methods) / sizeof(JNINativeMethod));
   if (status != JNI_OK) {
-    if (TraceGPUInteraction) {
+    if (true || TraceGPUInteraction) {
       tty->print_cr("Error registering natives for PTXHotSpotBackend: %d", status);
     }
     return false;
@@ -144,7 +147,7 @@
   return true;
 }

-GPU_ENTRY(jboolean, gpu::Ptx::initialize, (JNIEnv *env, jclass))
+GPU_ENTRY(jboolean, Ptx::initialize, (JNIEnv *env, jclass))

   if (!link()) {
     return false;
@@ -255,7 +258,7 @@
   return true;
 GPU_END

-GPU_ENTRY(jint, gpu::Ptx::get_total_cores, (JNIEnv *env, jobject))
+GPU_ENTRY(jint, Ptx::get_total_cores, (JNIEnv *env, jobject))

     int minor, major, nmp;
     int status = _cuda_cu_device_get_attribute(&minor,
@@ -342,7 +345,7 @@
     return total;
 GPU_END

-GPU_ENTRY(jlong, gpu::Ptx::generate_kernel, (JNIEnv *env, jclass, jbyteArray code_handle, jstring name_handle))
+GPU_ENTRY(jlong, Ptx::generate_kernel, (JNIEnv *env, jclass, jbyteArray code_handle, jstring name_handle))
   ResourceMark rm;
   jsize name_len = env->GetStringLength(name_handle);
   jsize code_len = env->GetArrayLength(code_handle);
@@ -440,7 +443,7 @@
   int          _buffer_size;   // size (in bytes) of _buffer
   oop*         _pinned;        // objects that have been pinned with cuMemHostRegister
   int          _pinned_length; // length of _pinned
-  gpu::Ptx::CUdeviceptr  _ret_value;     // pointer to slot in GPU memory holding the return value
+  Ptx::CUdeviceptr  _ret_value;     // pointer to slot in GPU memory holding the return value
   int          _ret_type_size; // size of the return type value
   bool         _ret_is_object; // specifies if the return type is Object
   bool         _gc_locked;     // denotes when execution has locked GC
@@ -474,8 +477,8 @@

   void alloc_return_value() {
     if (_ret_type_size != 0) {
-      if (check(gpu::Ptx::_cuda_cu_memalloc(&_ret_value, _ret_type_size), "Allocate device memory for return value")) {
-        gpu::Ptx::CUdeviceptr* retValuePtr = (gpu::Ptx::CUdeviceptr*) ((_buffer + _buffer_size) - sizeof(_ret_value));
+      if (check(Ptx::_cuda_cu_memalloc(&_ret_value, _ret_type_size), "Allocate device memory for return value")) {
+        Ptx::CUdeviceptr* retValuePtr = (Ptx::CUdeviceptr*) ((_buffer + _buffer_size) - sizeof(_ret_value));
         *retValuePtr = _ret_value;
       }
     }
@@ -503,7 +506,7 @@
         // Size (in bytes) of object
         int objSize = obj->size() * HeapWordSize;
         //tty->print_cr("Pinning object %d at offset %d: %p", i, offset, obj);
-        if (!check(gpu::Ptx::_cuda_cu_mem_host_register(obj, objSize, GRAAL_CU_MEMHOSTREGISTER_DEVICEMAP), "Pin object")) {
+        if (!check(Ptx::_cuda_cu_mem_host_register(obj, objSize, GRAAL_CU_MEMHOSTREGISTER_DEVICEMAP), "Pin object")) {
           return;
         }

@@ -512,7 +515,7 @@

         // Replace host pointer to object with device pointer
         // to object in kernel parameters buffer
-        if (!check(gpu::Ptx::_cuda_cu_mem_host_get_device_pointer((gpu::Ptx::CUdeviceptr*) argPtr, obj, 0), "Get device pointer for pinned object")) {
+        if (!check(Ptx::_cuda_cu_mem_host_get_device_pointer((Ptx::CUdeviceptr*) argPtr, obj, 0), "Get device pointer for pinned object")) {
           return;
         }
       }
@@ -529,7 +532,7 @@
       GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE, &_buffer_size,
       GRAAL_CU_LAUNCH_PARAM_END
     };
-    if (check(gpu::Ptx::_cuda_cu_launch_kernel((struct CUfunc_st*) (address) kernel,
+    if (check(Ptx::_cuda_cu_launch_kernel((struct CUfunc_st*) (address) kernel,
                                       gridX, gridY, gridZ,
                                       dimX, dimY, dimZ,
                                       0, NULL, NULL, (void**) &config), "Launch kernel")) {
@@ -537,7 +540,7 @@
   }

   void synchronize() {
-    check(gpu::Ptx::_cuda_cu_ctx_synchronize(), "Synchronize kernel");
+    check(Ptx::_cuda_cu_ctx_synchronize(), "Synchronize kernel");
   }

   void unpin_objects() {
@@ -545,7 +548,7 @@
       oop obj = _pinned[--_pinned_length];
       assert(obj != NULL, "npe");
       //tty->print_cr("Unpinning object %d: %p", _pinned_length, obj);
-      if (!check(gpu::Ptx::_cuda_cu_mem_host_unregister(obj), "Unpin object")) {
+      if (!check(Ptx::_cuda_cu_mem_host_unregister(obj), "Unpin object")) {
         return;
       }
     }
@@ -553,27 +556,27 @@

   oop get_object_return_value() {
     oop return_val;
-    check(gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, _ret_value, T_OBJECT_BYTE_SIZE), "Copy return value from device");
+    check(Ptx::_cuda_cu_memcpy_dtoh(&return_val, _ret_value, T_OBJECT_BYTE_SIZE), "Copy return value from device");
     return return_val;
   }

   jlong get_primitive_return_value() {
     jlong return_val;
-    check(gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, _ret_value, _ret_type_size), "Copy return value from device");
+    check(Ptx::_cuda_cu_memcpy_dtoh(&return_val, _ret_value, _ret_type_size), "Copy return value from device");
     return return_val;
   }

   void free_return_value() {
     if (_ret_value != 0) {
-      check(gpu::Ptx::_cuda_cu_memfree(_ret_value), "Free device memory");
+      check(Ptx::_cuda_cu_memfree(_ret_value), "Free device memory");
       _ret_value = 0;
     }
   }

   void destroy_context() {
-    if (gpu::Ptx::_device_context != NULL) {
-      check(gpu::Ptx::_cuda_cu_ctx_destroy(gpu::Ptx::_device_context), "Destroy context");
-      gpu::Ptx::_device_context = NULL;
+    if (Ptx::_device_context != NULL) {
+      check(Ptx::_cuda_cu_ctx_destroy(Ptx::_device_context), "Destroy context");
+      Ptx::_device_context = NULL;
     }
   }

@@ -666,11 +669,11 @@
   }
 }

-GPU_VMENTRY(jlong, gpu::Ptx::get_execute_kernel_from_vm_address, (JNIEnv *env, jclass))
-  return (jlong) gpu::Ptx::execute_kernel_from_vm;
+GPU_VMENTRY(jlong, Ptx::get_execute_kernel_from_vm_address, (JNIEnv *env, jclass))
+  return (jlong) Ptx::execute_kernel_from_vm;
 GPU_END

-JRT_ENTRY(jlong, gpu::Ptx::execute_kernel_from_vm(JavaThread* thread, jlong kernel, jint dimX, jint dimY, jint dimZ,
+JRT_ENTRY(jlong, Ptx::execute_kernel_from_vm(JavaThread* thread, jlong kernel, jint dimX, jint dimY, jint dimZ,
                                                   jlong buffer,
                                                   jint bufferSize,
                                                   jint objectParametersCount,
@@ -724,7 +727,7 @@
 static char const cuda_library_name[] = "";
 #endif

-bool gpu::Ptx::link() {
+bool Ptx::link() {
   if (cuda_library_name == NULL) {
     if (TraceGPUInteraction) {
       tty->print_cr("Failed to find CUDA linkage");
--- a/src/gpu/ptx/vm/gpu_ptx.hpp	Thu Feb 06 00:21:10 2014 -0800
+++ b/src/gpu/ptx/vm/gpu_ptx.hpp	Thu Feb 06 11:14:19 2014 +0100
@@ -155,12 +155,12 @@
                                               unsigned int, void*, void**, void**);
   typedef int (*cuda_cu_module_get_function_func_t)(void*, void*, const char*);
   typedef int (*cuda_cu_module_load_data_ex_func_t)(void*, void*, unsigned int, void*, void**);
-  typedef int (*cuda_cu_memalloc_func_t)(gpu::Ptx::CUdeviceptr*, size_t);
-  typedef int (*cuda_cu_memfree_func_t)(gpu::Ptx::CUdeviceptr);
-  typedef int (*cuda_cu_memcpy_htod_func_t)(gpu::Ptx::CUdeviceptr, const void*, unsigned int);
-  typedef int (*cuda_cu_memcpy_dtoh_func_t)(const void*, gpu::Ptx::CUdeviceptr,  unsigned int);
+  typedef int (*cuda_cu_memalloc_func_t)(Ptx::CUdeviceptr*, size_t);
+  typedef int (*cuda_cu_memfree_func_t)(Ptx::CUdeviceptr);
+  typedef int (*cuda_cu_memcpy_htod_func_t)(Ptx::CUdeviceptr, const void*, unsigned int);
+  typedef int (*cuda_cu_memcpy_dtoh_func_t)(const void*, Ptx::CUdeviceptr,  unsigned int);
   typedef int (*cuda_cu_mem_host_register_func_t)(void*, size_t, unsigned int);
-  typedef int (*cuda_cu_mem_host_get_device_pointer_func_t)(gpu::Ptx::CUdeviceptr*, void*, unsigned int);
+  typedef int (*cuda_cu_mem_host_get_device_pointer_func_t)(Ptx::CUdeviceptr*, void*, unsigned int);
   typedef int (*cuda_cu_mem_host_unregister_func_t)(void*);

 public:
--- a/src/os/bsd/vm/gpu_bsd.cpp	Thu Feb 06 00:21:10 2014 -0800
+++ b/src/os/bsd/vm/gpu_bsd.cpp	Thu Feb 06 11:14:19 2014 +0100
@@ -23,6 +23,8 @@
  */

 #include "runtime/gpu.hpp"
+#include "ptx/vm/gpu_ptx.hpp"
+#include "hsail/vm/gpu_hsail.hpp"
 #include "utilities/ostream.hpp"

 jobject gpu::probe_gpus(JNIEnv* env) {
@@ -31,7 +33,7 @@
    * Let the CUDA driver initialization be the gate to GPU for now, pending
    * a better detection solution for NVIDA PTX and AMD HSAIL.
    */
-  if (gpu::Ptx::register_natives(env)) {
+  if (Ptx::register_natives(env)) {
     if (TraceGPUInteraction) {
       tty->print_cr("Assuming NVidia/PTX support (APPLE)");
     }
--- a/src/os/linux/vm/gpu_linux.cpp	Thu Feb 06 00:21:10 2014 -0800
+++ b/src/os/linux/vm/gpu_linux.cpp	Thu Feb 06 11:14:19 2014 +0100
@@ -23,6 +23,8 @@
  */

 #include "runtime/gpu.hpp"
+#include "ptx/vm/gpu_ptx.hpp"
+#include "hsail/vm/gpu_hsail.hpp"
 #include "utilities/ostream.hpp"

 /*
@@ -40,7 +42,7 @@
   bool hsail = false;
   bool ptx = false;

-  if (UseHSAILSimulator && gpu::Hsail::register_natives(env)) {
+  if (UseHSAILSimulator && Hsail::register_natives(env)) {
     hsail = true;
   }

@@ -71,7 +73,7 @@
         if (TraceGPUInteraction) {
           tty->print_cr("Found supported nVidia device [vendor=0x%04x, device=0x%04x]", vendor, device);
         }
-        if (!ptx && gpu::Ptx::register_natives(env)) {
+        if (!ptx && Ptx::register_natives(env)) {
           ptx = true;
         }
       }
--- a/src/os/windows/vm/gpu_windows.cpp	Thu Feb 06 00:21:10 2014 -0800
+++ b/src/os/windows/vm/gpu_windows.cpp	Thu Feb 06 11:14:19 2014 +0100
@@ -24,11 +24,12 @@

 #include "precompiled.hpp"
 #include "runtime/gpu.hpp"
+#include "hsail/vm/gpu_hsail.hpp"
 #include "utilities/ostream.hpp"

 jobject gpu::probe_gpus(JNIEnv* env) {
   // TODO: add detection of PTX/NVidia
-  if (UseHSAILSimulator && gpu::Hsail::register_natives(env)) {
+  if (UseHSAILSimulator && Hsail::register_natives(env)) {
     return env->NewStringUTF("HSAIL");
   }
   return env->NewStringUTF("");
--- a/src/share/vm/runtime/gpu.hpp	Thu Feb 06 00:21:10 2014 -0800
+++ b/src/share/vm/runtime/gpu.hpp	Thu Feb 06 11:14:19 2014 +0100
@@ -32,21 +32,18 @@
 // Defines the interface to the graphics processor(s).
 class gpu : AllStatic {
  private:
-  static int _initialized_gpus;
-
-  // Notifies that a GPU device has been initialized.
-  static void initialized_gpu(const char* name);
+  static int _initialized_gpus;  // number of initialize GPU devices

  public:

+  // Notification of a GPU device that has been initialized.
+  static void initialized_gpu(const char* name);
+
   // Gets a comma separated list of supported GPU architecture names.
   static jobject probe_gpus(JNIEnv* env);

+  // Gets the number of GPU devices that have been initialized.
   static int initialized_gpus() { return _initialized_gpus; }
-
-# include "ptx/vm/gpu_ptx.hpp"
-# include "hsail/vm/gpu_hsail.hpp"
-
 };

 #endif // SHARE_VM_RUNTIME_GPU_HPP