diff src/gpu/ptx/vm/ptxKernelArguments.cpp @ 11821:d8659ad83fcc

PTX single-threaded array store, Warp annotation
author Morris Meyer <morris.meyer@oracle.com>
date Sat, 28 Sep 2013 21:06:12 -0400
parents 91e5f927af63
children c7abc8411011
line wrap: on
line diff
--- a/src/gpu/ptx/vm/ptxKernelArguments.cpp	Fri Sep 27 19:51:01 2013 +0200
+++ b/src/gpu/ptx/vm/ptxKernelArguments.cpp	Sat Sep 28 21:06:12 2013 -0400
@@ -32,12 +32,18 @@
 // Get next java argument
 oop PTXKernelArguments::next_arg(BasicType expectedType) {
   assert(_index < _args->length(), "out of bounds");
-  oop arg=((objArrayOop) (_args))->obj_at(_index++);
-  assert(expectedType == T_OBJECT || java_lang_boxing_object::is_instance(arg, expectedType), "arg type mismatch");
+
+  oop arg = ((objArrayOop) (_args))->obj_at(_index++);
+  assert(expectedType == T_OBJECT ||
+         java_lang_boxing_object::is_instance(arg, expectedType), "arg type mismatch");
+
   return arg;
 }
 
-void PTXKernelArguments::do_int()    {
+void PTXKernelArguments::do_int() {
+  if (is_after_invocation()) {
+    return;
+  }
   // If the parameter is a return value,
   if (is_return_type()) {
     // Allocate device memory for T_INT return value pointer on device. Size in bytes
@@ -50,8 +56,7 @@
     // Push _return_value_ptr to _kernelBuffer
     *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
     _bufferOffset += sizeof(_return_value_ptr);
-  }
-  else {
+  } else {
     // Get the next java argument and its value which should be a T_INT
     oop arg = next_arg(T_INT);
     // Copy the java argument value to kernelArgBuffer
@@ -67,7 +72,10 @@
   return;
 }
 
-void PTXKernelArguments::do_long()    {
+void PTXKernelArguments::do_long() {
+  if (is_after_invocation()) {
+    return;
+  }
   // If the parameter is a return value,
   if (is_return_type()) {
     // Allocate device memory for T_LONG return value pointer on device. Size in bytes
@@ -80,8 +88,7 @@
     // Push _return_value_ptr to _kernelBuffer
     *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
     _bufferOffset += sizeof(_return_value_ptr);
-  }
-  else {
+  } else {
     // Get the next java argument and its value which should be a T_LONG
     oop arg = next_arg(T_LONG);
     // Copy the java argument value to kernelArgBuffer
@@ -97,34 +104,81 @@
   return;
 }
 
-void PTXKernelArguments::do_byte()    {
-  // If the parameter is a return value,
-  if (is_return_type()) {
-    // Allocate device memory for T_BYTE return value pointer on device. Size in bytes
-    int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_BYTE_SIZE);
+void PTXKernelArguments::do_byte() {
+    if (is_after_invocation()) {
+        return;
+    }
+    // If the parameter is a return value,
+    if (is_return_type()) {
+        // Allocate device memory for T_BYTE return value pointer on device. Size in bytes
+        int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_BYTE_SIZE);
+        if (status != GRAAL_CUDA_SUCCESS) {
+            tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
+            _success = false;
+            return;
+        }
+        // Push _return_value_ptr to _kernelBuffer
+        *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
+        _bufferOffset += sizeof(_return_value_ptr);
+    } else {
+        // Get the next java argument and its value which should be a T_BYTE
+        oop arg = next_arg(T_BYTE);
+        // Copy the java argument value to kernelArgBuffer
+        jvalue val;
+        if (java_lang_boxing_object::get_value(arg, &val) != T_BYTE) {
+            tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE");
+            _success = false;
+            return;
+        }
+        *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.b;
+        _bufferOffset += sizeof(val.b);
+    }
+    return;
+}
+
+void PTXKernelArguments::do_array(int begin, int end) {
+    gpu::Ptx::CUdeviceptr _array_ptr;
+    int status;
+
+    // Get the next java argument and its value which should be a T_ARRAY
+    oop arg = next_arg(T_OBJECT);
+    int array_size = arg->size() * HeapWordSize;
+
+    if (is_after_invocation()) {
+        _array_ptr = *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]);
+        status = gpu::Ptx::_cuda_cu_memcpy_dtoh(arg, _array_ptr, array_size);
+        if (status != GRAAL_CUDA_SUCCESS) {
+            tty->print_cr("[CUDA] *** Error (%d) Failed to copy array argument to host", status);
+            _success = false;
+            return;
+        } else {
+            // tty->print_cr("device: %x host: %x size: %d", _array_ptr, arg, array_size);
+        }
+        return;
+    }
+    // Allocate device memory for T_ARRAY return value pointer on device. Size in bytes
+    status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, array_size);
     if (status != GRAAL_CUDA_SUCCESS) {
-      tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
-      _success = false;
-      return;
+        tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
+        _success = false;
+        return;
+    }
+    status = gpu::Ptx::_cuda_cu_memcpy_htod(_return_value_ptr, arg, array_size);
+    if (status != GRAAL_CUDA_SUCCESS) {
+        tty->print_cr("[CUDA] *** Error (%d) Failed to copy array to device argument", status);
+        _success = false;
+        return;
+    } else {
+        // tty->print_cr("host: %x device: %x size: %d", arg, _return_value_ptr, array_size);
     }
     // Push _return_value_ptr to _kernelBuffer
     *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
     _bufferOffset += sizeof(_return_value_ptr);
-  }
-  else {
-    // Get the next java argument and its value which should be a T_BYTE
-    oop arg = next_arg(T_BYTE);
-    // Copy the java argument value to kernelArgBuffer
-    jvalue val;
-    if (java_lang_boxing_object::get_value(arg, &val) != T_BYTE) {
-      tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE");
-      _success = false;
-      return;
-    }
-    *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.b;
-    _bufferOffset += sizeof(val.b);
-  }
-  return;
+    return;
+}
+
+void PTXKernelArguments::do_void() {
+    return;
 }
 
 // TODO implement other do_*