Mercurial > hg > truffle

--- a/src/gpu/ptx/vm/gpu_ptx.cpp	Fri Jan 31 16:04:33 2014 +0200
+++ b/src/gpu/ptx/vm/gpu_ptx.cpp	Fri Jan 31 16:05:37 2014 +0100
@@ -31,9 +31,18 @@
 #include "memory/allocation.inline.hpp"
 #include "memory/gcLocker.inline.hpp"
 #include "runtime/interfaceSupport.hpp"
+#include "runtime/vframe.hpp"
 #include "graal/graalEnv.hpp"
 #include "graal/graalCompiler.hpp"
-#include "ptxKernelArguments.hpp"
+
+#define T_BYTE_SIZE        1
+#define T_BOOLEAN_SIZE     4
+#define T_INT_BYTE_SIZE    4
+#define T_FLOAT_BYTE_SIZE  4
+#define T_DOUBLE_BYTE_SIZE 8
+#define T_LONG_BYTE_SIZE   8
+#define T_OBJECT_BYTE_SIZE sizeof(intptr_t)
+#define T_ARRAY_BYTE_SIZE  sizeof(intptr_t)

 // Entry to GPU native method implementation that transitions current thread to '_thread_in_vm'.
 #define GPU_VMENTRY(result_type, name, signature) \
@@ -76,7 +85,9 @@
 gpu::Ptx::cuda_cu_launch_kernel_func_t gpu::Ptx::_cuda_cu_launch_kernel;
 gpu::Ptx::cuda_cu_module_get_function_func_t gpu::Ptx::_cuda_cu_module_get_function;
 gpu::Ptx::cuda_cu_module_load_data_ex_func_t gpu::Ptx::_cuda_cu_module_load_data_ex;
+gpu::Ptx::cuda_cu_memcpy_htod_func_t gpu::Ptx::_cuda_cu_memcpy_htod;
 gpu::Ptx::cuda_cu_memcpy_dtoh_func_t gpu::Ptx::_cuda_cu_memcpy_dtoh;
+gpu::Ptx::cuda_cu_memalloc_func_t gpu::Ptx::_cuda_cu_memalloc;
 gpu::Ptx::cuda_cu_memfree_func_t gpu::Ptx::_cuda_cu_memfree;
 gpu::Ptx::cuda_cu_mem_host_register_func_t gpu::Ptx::_cuda_cu_mem_host_register;
 gpu::Ptx::cuda_cu_mem_host_get_device_pointer_func_t gpu::Ptx::_cuda_cu_mem_host_get_device_pointer;
@@ -432,7 +443,7 @@
   gpu::Ptx::CUdeviceptr  _ret_value;     // pointer to slot in GPU memory holding the return value
   int          _ret_type_size; // size of the return type value
   bool         _ret_is_object; // specifies if the return type is Object
-  bool         _gc_locked;
+  bool         _gc_locked;     // denotes when execution has locked GC

   bool check(int status, const char *action) {
     if (status != GRAAL_CUDA_SUCCESS) {
@@ -575,10 +586,86 @@
       if (TraceGPUInteraction) {
         tty->print_cr("[CUDA] Unlocked GC");
       }
+      _gc_locked = false;
     }
   }
 };

+// Prints values in the kernel arguments buffer
+class KernelArgumentsPrinter: public SignatureIterator {
+  Method*       _method;
+  address       _buffer;
+  size_t        _bufferOffset;
+  outputStream* _st;
+
+private:
+
+  // Get next java argument
+  oop next_arg(BasicType expectedType);
+
+ public:
+  KernelArgumentsPrinter(Method* method, address buffer, outputStream* st) : SignatureIterator(method->signature()),
+    _method(method), _buffer(buffer), _bufferOffset(0), _st(st) {
+    if (!method->is_static()) {
+      print_oop();
+    }
+    iterate();
+  }
+
+  address next(size_t dataSz) {
+    if (is_return_type()) {
+      return _buffer;
+    }
+    if (_bufferOffset != 0) {
+      _st->print(", ");
+    }
+    _bufferOffset = align_size_up_(_bufferOffset, dataSz);
+    address result = _buffer + _bufferOffset;
+    _bufferOffset += dataSz;
+    return result;
+  }
+
+  void print_oop() {
+    oop obj = *((oop*) next(sizeof(oop)));
+    if (obj != NULL) {
+      char type[256];
+      obj->klass()->name()->as_C_string(type, 256);
+      _st->print("oop "PTR_FORMAT" (%s)", obj, type);
+    } else {
+      _st->print("oop null");
+    }
+  }
+
+  bool skip() {
+    return is_return_type();
+  }
+
+  void do_bool  ()                     { if (!skip()) _st->print("bool %d",    *((jboolean*) next(sizeof(jboolean)))); }
+  void do_char  ()                     { if (!skip()) _st->print("char %c",    *((jchar*)    next(sizeof(jchar))));    }
+  void do_float ()                     { if (!skip()) _st->print("float %g",   *((jfloat*)   next(sizeof(jfloat))));   }
+  void do_double()                     { if (!skip()) _st->print("double %g",  *((jdouble*)  next(sizeof(jdouble))));  }
+  void do_byte  ()                     { if (!skip()) _st->print("byte %d",    *((jbyte*)    next(sizeof(jbyte))));    }
+  void do_short ()                     { if (!skip()) _st->print("short %d",   *((jshort*)   next(sizeof(jshort))));   }
+  void do_int   ()                     { if (!skip()) _st->print("int %d",     *((jint*)     next(sizeof(jint))));     }
+  void do_long  ()                     { if (!skip()) _st->print("long "JLONG_FORMAT,  *((jlong*)    next(sizeof(jlong))));    }
+  void do_void  ()                     { }
+  void do_object(int begin, int end)   { if (!skip()) print_oop();      }
+  void do_array (int begin, int end)   { if (!skip()) print_oop();      }
+};
+
+static void printKernelArguments(JavaThread* thread, address buffer) {
+  for (vframeStream vfst(thread); !vfst.at_end(); vfst.next()) {
+    Method* m = vfst.method();
+    if (m != NULL) {
+      stringStream st(O_BUFLEN);
+      st.print("[CUDA] Call: %s.%s(", m->method_holder()->name()->as_C_string(), m->name()->as_C_string());
+      KernelArgumentsPrinter kap(m, buffer, &st);
+      tty->print_cr("%s)", st.as_string());
+      return;
+    }
+  }
+}
+
 GPU_VMENTRY(jlong, gpu::Ptx::get_execute_kernel_from_vm_address, (JNIEnv *env, jclass))
   return (jlong) gpu::Ptx::execute_kernel_from_vm;
 GPU_END
@@ -595,6 +682,10 @@
     return 0L;
   }

+  if (TraceGPUInteraction) {
+    printKernelArguments(thread, (address) buffer);
+  }
+
   PtxCall call(thread, (address) buffer, bufferSize, (oop*) (address) pinnedObjects, encodedReturnTypeSize);

 #define TRY(action) do { \
--- a/src/gpu/ptx/vm/ptxKernelArguments.cpp	Fri Jan 31 16:04:33 2014 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,332 +0,0 @@
-/*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "precompiled.hpp"
-#include "ptxKernelArguments.hpp"
-#include "runtime/javaCalls.hpp"
-
-gpu::Ptx::cuda_cu_memalloc_func_t gpu::Ptx::_cuda_cu_memalloc;
-gpu::Ptx::cuda_cu_memcpy_htod_func_t gpu::Ptx::_cuda_cu_memcpy_htod;
-
-// Get next java argument
-oop PTXKernelArguments::next_arg(BasicType expectedType) {
-  assert(_index < _args->length(), "out of bounds");
-  oop arg = ((objArrayOop) (_args))->obj_at(_index++);
-  assert(expectedType == T_OBJECT ||
-         java_lang_boxing_object::is_instance(arg, expectedType), "arg type mismatch");
-  return arg;
-}
-
-/*
- * Pad kernel argument buffer to naturally align for given size.
- */
-void PTXKernelArguments::pad_kernel_argument_buffer(size_t dataSz) {
-  while ((_bufferOffset % dataSz) != 0) {
-    *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = (char) 0;
-    _bufferOffset += sizeof(char);
-  }
-  return;
-}
-void PTXKernelArguments::do_int() {
-  // If the parameter is a return value,
-  if (is_return_type()) {
-    // Allocate device memory for T_INT return value pointer on device. Size in bytes
-    int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_INT_BYTE_SIZE);
-    if (status != GRAAL_CUDA_SUCCESS) {
-      tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
-      _success = false;
-      return;
-    }
-
-    // Kernel arguments are expected to be naturally aligned.
-    // Insert padding into kernel argument buffer, if needed.
-    pad_kernel_argument_buffer(sizeof(_dev_return_value));
-    // Push _dev_return_value to _kernelBuffer
-    *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value;
-    _bufferOffset += sizeof(_dev_return_value);
-  } else {
-    // Get the next java argument and its value which should be a T_INT
-    oop arg = next_arg(T_INT);
-    // Copy the java argument value to kernelArgBuffer
-    jvalue intval;
-    if (java_lang_boxing_object::get_value(arg, &intval) != T_INT) {
-      tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT");
-      _success = false;
-      return;
-    }
-
-    // Kernel arguments are expected to be naturally aligned.
-    // Insert padding into kernel argument buffer, if needed.
-    pad_kernel_argument_buffer(sizeof(intval.i));
-    // Push _dev_return_value to _kernelBuffer
-    *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = intval.i;
-
-    // Advance _bufferOffset
-    _bufferOffset += sizeof(intval.i);
-  }
-  return;
-}
-
-void PTXKernelArguments::do_float() {
-  // If the parameter is a return value,
-  if (is_return_type()) {
-    // Allocate device memory for T_FLOAT return value pointer on device. Size in bytes
-    int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_FLOAT_BYTE_SIZE);
-    if (status != GRAAL_CUDA_SUCCESS) {
-      tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
-      _success = false;
-      return;
-    }
-    // Kernel arguments are expected to be naturally aligned.
-    // Insert padding into kernel argument buffer, if needed.
-    pad_kernel_argument_buffer(sizeof(_dev_return_value));
-    // Push _dev_return_value to _kernelBuffer
-    *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value;
-    // Advance _bufferOffset
-    _bufferOffset += sizeof(_dev_return_value);
-  } else {
-    // Get the next java argument and its value which should be a T_FLOAT
-    oop arg = next_arg(T_FLOAT);
-    // Copy the java argument value to kernelArgBuffer
-    jvalue floatval;
-    if (java_lang_boxing_object::get_value(arg, &floatval) != T_FLOAT) {
-      tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_FLOAT");
-      _success = false;
-      return;
-    }
-    // Kernel arguments are expected to be naturally aligned.
-    // Insert padding into kernel argument buffer, if needed.
-    pad_kernel_argument_buffer(sizeof(floatval.f));
-    *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = (gpu::Ptx::CUdeviceptr) floatval.f;
-
-    // Advance _bufferOffset
-    _bufferOffset += sizeof(floatval.f);
-  }
-  return;
-}
-
-void PTXKernelArguments::do_double() {
-  // If the parameter is a return value,
-  jvalue doubleval;
-  if (is_return_type()) {
-    // Allocate device memory for T_DOUBLE return value pointer on device. Size in bytes
-    int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_DOUBLE_BYTE_SIZE);
-    if (status != GRAAL_CUDA_SUCCESS) {
-      tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
-      _success = false;
-      return;
-    }
-    // Kernel arguments are expected to be naturally aligned.
-    // Insert padding into kernel argument buffer, if needed.
-    pad_kernel_argument_buffer(sizeof(_dev_return_value));
-    // Push _dev_return_value to _kernelBuffer
-    *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value;
-    // Advance _bufferOffset.
-    _bufferOffset += sizeof(doubleval.d);
-  } else {
-    // Get the next java argument and its value which should be a T_INT
-    oop arg = next_arg(T_FLOAT);
-    // Copy the java argument value to kernelArgBuffer
-    if (java_lang_boxing_object::get_value(arg, &doubleval) != T_DOUBLE) {
-      tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT");
-      _success = false;
-      return;
-    }
-    // Kernel arguments are expected to be naturally aligned.
-    // Insert padding into kernel argument buffer, if needed.
-    pad_kernel_argument_buffer(sizeof(doubleval.d));
-    *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = (gpu::Ptx::CUdeviceptr) doubleval.d;
-
-    // Advance _bufferOffset
-    _bufferOffset += sizeof(doubleval.d);
-    // For a 64-bit host, since size of double is 8, there is no need
-    // to pad the kernel argument buffer to ensure 8-byte alignment of
-    // the next potential argument to be pushed.
-  }
-  return;
-}
-
-void PTXKernelArguments::do_long() {
-  // If the parameter is a return value,
-  if (is_return_type()) {
-    // Allocate device memory for T_LONG return value pointer on device. Size in bytes
-    int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_LONG_BYTE_SIZE);
-    if (status != GRAAL_CUDA_SUCCESS) {
-      tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
-      _success = false;
-      return;
-    }
-    // Kernel arguments are expected to be naturally aligned.
-    // Insert padding into kernel argument buffer, if needed.
-    pad_kernel_argument_buffer(sizeof(_dev_return_value));
-    // Push _dev_return_value to _kernelBuffer
-    *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value;
-    // Advance _bufferOffset
-    _bufferOffset += sizeof(_dev_return_value);
-  } else {
-    // Get the next java argument and its value which should be a T_LONG
-    oop arg = next_arg(T_LONG);
-    // Copy the java argument value to kernelArgBuffer
-    jvalue val;
-    if (java_lang_boxing_object::get_value(arg, &val) != T_LONG) {
-      tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_LONG");
-      _success = false;
-      return;
-    }
-    // Kernel arguments are expected to be naturally aligned.
-    // Insert padding into kernel argument buffer, if needed.
-    pad_kernel_argument_buffer(sizeof(val.j));
-    *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.j;
-
-    // Advance _bufferOffset
-    _bufferOffset += sizeof(val.j);
-    // For a 64-bit host, since size of long is 8, there is no need
-    // to pad the kernel argument buffer to ensure 8-byte alignment of
-    // the next potential argument to be pushed.
-  }
-  return;
-}
-
-void PTXKernelArguments::do_byte() {
-  // If the parameter is a return value,
-  if (is_return_type()) {
-    // Allocate device memory for T_BYTE return value pointer on device. Size in bytes
-    int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_BYTE_SIZE);
-    if (status != GRAAL_CUDA_SUCCESS) {
-      tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
-      _success = false;
-      return;
-    }
-    // Kernel arguments are expected to be naturally aligned.
-    // Insert padding into kernel argument buffer, if needed.
-    pad_kernel_argument_buffer(sizeof(_dev_return_value));
-    // Push _dev_return_value to _kernelBuffer
-    *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value;
-
-    // Advance _bufferOffset
-    _bufferOffset += sizeof(_dev_return_value);
-  } else {
-    // Get the next java argument and its value which should be a T_BYTE
-    oop arg = next_arg(T_BYTE);
-    // Copy the java argument value to kernelArgBuffer
-    jvalue val;
-    if (java_lang_boxing_object::get_value(arg, &val) != T_BYTE) {
-      tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE");
-      _success = false;
-      return;
-    }
-    // Kernel arguments are expected to be naturally aligned.
-    // Insert padding into kernel argument buffer, if needed.
-    pad_kernel_argument_buffer(sizeof(val.b));
-    *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.b;
-
-    // Advance _bufferOffset
-    _bufferOffset += sizeof(val.b);
-    // For a 64-bit host, since size of T_BYTE is 8, there is no need
-    // to pad the kernel argument buffer to ensure 8-byte alignment of
-    // the next potential argument to be pushed.
-  }
-  return;
-}
-
-void PTXKernelArguments::do_bool() {
-  // If the parameter is a return value,
-  if (is_return_type()) {
-    // Allocate device memory for T_BYTE return value pointer on device. Size in bytes
-    int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_BOOLEAN_SIZE);
-    if (status != GRAAL_CUDA_SUCCESS) {
-      tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
-      _success = false;
-      return;
-    }
-    // Kernel arguments are expected to be naturally aligned.
-    // Insert padding into kernel argument buffer, if needed.
-    pad_kernel_argument_buffer(sizeof(_dev_return_value));
-    // Push _dev_return_value to _kernelBuffer
-    *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value;
-    _bufferOffset += sizeof(_dev_return_value);
-  } else {
-    // Get the next java argument and its value which should be a T_BOOLEAN
-    oop arg = next_arg(T_BOOLEAN);
-    // Copy the java argument value to kernelArgBuffer
-    jvalue val;
-    if (java_lang_boxing_object::get_value(arg, &val) != T_BOOLEAN) {
-      tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BOOLEAN");
-      _success = false;
-      return;
-    }
-    // Kernel arguments are expected to be naturally aligned.
-    // Insert padding into kernel argument buffer, if needed.
-    pad_kernel_argument_buffer(sizeof(val.z));
-    *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.z;
-
-    // Advance _bufferOffset
-    _bufferOffset += sizeof(val.z);
-  }
-  return;
-}
-
-void PTXKernelArguments::do_array(int begin, int end) {
-  // Get the next java argument and its value which should be a T_ARRAY
-  oop arg = next_arg(T_OBJECT);
-  assert(arg->is_array(), "argument value not an array");
-  // Size of array argument
-  int argSize = arg->size() * HeapWordSize;
-  // Device pointer to array argument.
-  gpu::Ptx::CUdeviceptr arrayArgOnDev;
-  int status;
-
-  // Register host memory for use by the device. Size in bytes
-  status = gpu::Ptx::_cuda_cu_mem_host_register(arg, argSize, GRAAL_CU_MEMHOSTREGISTER_DEVICEMAP);
-  if (status != GRAAL_CUDA_SUCCESS) {
-    tty->print_cr("[CUDA] *** Error (%d) Failed to register host memory for array argument on device",
-                  status);
-    _success = false;
-    return;
-  }
-  // Get device pointer
-  status = gpu::Ptx::_cuda_cu_mem_host_get_device_pointer(&arrayArgOnDev, arg, 0);
-  if (status != GRAAL_CUDA_SUCCESS) {
-    tty->print_cr("[CUDA] *** Error (%d) Failed to get device pointer of mapped pinned memory of array argument.",
-                  status);
-    _success = false;
-    return;
-  }
-
-  // Kernel arguments are expected to be naturally aligned.
-  // Insert padding into kernel argument buffer, if needed.
-  pad_kernel_argument_buffer(sizeof(arrayArgOnDev));
-  // Push device array argument to _kernelBuffer
-  *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = arrayArgOnDev;
-
-  // Advance _bufferOffset
-  _bufferOffset += sizeof(arrayArgOnDev);
-  return;
-}
-
-void PTXKernelArguments::do_void() {
-  return;
-}
-
-// TODO implement other do_*
--- a/src/gpu/ptx/vm/ptxKernelArguments.hpp	Fri Jan 31 16:04:33 2014 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,127 +0,0 @@
-/*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef KERNEL_ARGUMENTS_PTX_HPP
-#define KERNEL_ARGUMENTS_PTX_HPP
-
-#include "runtime/gpu.hpp"
-#include "runtime/signature.hpp"
-
-#define T_BYTE_SIZE        1
-#define T_BOOLEAN_SIZE     4
-#define T_INT_BYTE_SIZE    4
-#define T_FLOAT_BYTE_SIZE  4
-#define T_DOUBLE_BYTE_SIZE 8
-#define T_LONG_BYTE_SIZE   8
-#define T_OBJECT_BYTE_SIZE 8
-#define T_ARRAY_BYTE_SIZE  8
-
-class PTXKernelArguments : public SignatureIterator {
-public:
-  // Buffer holding CUdeviceptr values that represent the kernel arguments
-  char _kernelArgBuffer[1024];
-  // Current offset into _kernelArgBuffer
-  size_t _bufferOffset;
-  // Device pointer holding return value
-  gpu::Ptx::CUdeviceptr _dev_return_value;
-
-private:
-  // Array of java argument oops
-  arrayOop _args;
-  // Current index into _args
-  int _index;
-  // Flag to indicate successful creation of kernel argument buffer
-  bool _success;
-
-  // Get next java argument
-  oop next_arg(BasicType expectedType);
-
- public:
-  PTXKernelArguments(Symbol* signature, arrayOop args, bool is_static) : SignatureIterator(signature) {
-    this->_return_type = T_ILLEGAL;
-    _index = 0;
-    _args = args;
-    _success = true;
-    _bufferOffset = 0;
-    _dev_return_value = 0;
-    if (!is_static) {
-      // TODO : Create a device argument for receiver object and add it to _kernelBuffer
-      tty->print_cr("{CUDA] ****** TODO: Support for execution of non-static java methods not implemented yet.");
-    }
-    // Iterate over the entire signature
-    iterate();
-    assert((_success && (_index == args->length())), "arg count mismatch with signature");
-  }
-
-  inline char* device_argument_buffer() {
-    return _kernelArgBuffer;
-  }
-
-  inline size_t device_argument_buffer_size() {
-    return _bufferOffset;
-  }
-
-  // Get the return oop value
-  oop get_return_oop();
-
-  // get device return value ptr
-  gpu::Ptx::CUdeviceptr get_dev_return_value() {
-      return _dev_return_value;
-  }
-
-  /*
-   * Pad kernel argument buffer to naturally align for given size.
-   */
-  void pad_kernel_argument_buffer(size_t);
-
-  void do_byte();
-  void do_bool();
-  void do_int();
-  void do_float();
-  void do_double();
-  void do_long();
-  void do_array(int begin, int end);
-  void do_void();
-
-  inline void do_char()   {
-    /* TODO : To be implemented */
-    guarantee(false, "do_char:NYI");
-  }
-  inline void do_short()  {
-    /* TODO : To be implemented */
-    guarantee(false, "do_short:NYI");
-  }
-  inline void do_object() {
-    /* TODO : To be implemented */
-    guarantee(false, "do_object:NYI");
-  }
-
-  inline void do_object(int begin, int end) {
-    /* TODO : To be implemented */
-    guarantee(false, "do_object(II):NYI");
-  }
-
-};
-
-#endif  // KERNEL_ARGUMENTS_HPP
--- a/src/share/vm/runtime/gpu.hpp	Fri Jan 31 16:04:33 2014 +0200
+++ b/src/share/vm/runtime/gpu.hpp	Fri Jan 31 16:05:37 2014 +0100
@@ -29,8 +29,6 @@
 #include "oops/symbol.hpp"
 #include "utilities/array.hpp"

-class PTXKernelArguments;
-
 // Defines the interface to the graphics processor(s).
 class gpu : AllStatic {
  private: