diff src/gpu/ptx/vm/ptxKernelArguments.cpp @ 12566:c17bfad2fa98

Merge.
author Christian Humer <christian.humer@gmail.com>
date Thu, 24 Oct 2013 15:56:08 +0200
parents 11b086b1bae4
children 1a7e7011a341
line wrap: on
line diff
--- a/src/gpu/ptx/vm/ptxKernelArguments.cpp	Mon Oct 21 11:07:47 2013 +0200
+++ b/src/gpu/ptx/vm/ptxKernelArguments.cpp	Thu Oct 24 15:56:08 2013 +0200
@@ -32,127 +32,132 @@
 // Get next java argument
 oop PTXKernelArguments::next_arg(BasicType expectedType) {
   assert(_index < _args->length(), "out of bounds");
-
   oop arg = ((objArrayOop) (_args))->obj_at(_index++);
   assert(expectedType == T_OBJECT ||
          java_lang_boxing_object::is_instance(arg, expectedType), "arg type mismatch");
-
   return arg;
 }
 
 void PTXKernelArguments::do_int() {
-    if (is_after_invocation()) {
+  // If the parameter is a return value,
+  if (is_return_type()) {
+    if (is_kernel_arg_setup()) {
+      // Allocate device memory for T_INT return value pointer on device. Size in bytes
+      int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_INT_BYTE_SIZE);
+      if (status != GRAAL_CUDA_SUCCESS) {
+        tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
+        _success = false;
         return;
+      }
+      // Push _dev_return_value to _kernelBuffer
+      *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value;
     }
-    // If the parameter is a return value,
-    if (is_return_type()) {
-        // Allocate device memory for T_INT return value pointer on device. Size in bytes
-        int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_INT_BYTE_SIZE);
-        if (status != GRAAL_CUDA_SUCCESS) {
-            tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
-            _success = false;
-            return;
-        }
-        // Push _return_value_ptr to _kernelBuffer
-        *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
-        _bufferOffset += sizeof(_return_value_ptr);
-    } else {
-        // Get the next java argument and its value which should be a T_INT
-        oop arg = next_arg(T_INT);
-        // Copy the java argument value to kernelArgBuffer
-        jvalue intval;
-        if (java_lang_boxing_object::get_value(arg, &intval) != T_INT) {
-            tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT");
-            _success = false;
-            return;
-        }
-        *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = intval.i;
-        _bufferOffset += sizeof(intval.i);
+    _bufferOffset += sizeof(_dev_return_value);
+  } else {
+    // Get the next java argument and its value which should be a T_INT
+    oop arg = next_arg(T_INT);
+    // Copy the java argument value to kernelArgBuffer
+    jvalue intval;
+    if (java_lang_boxing_object::get_value(arg, &intval) != T_INT) {
+      tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT");
+      _success = false;
+      return;
     }
-    return;
+    if (is_kernel_arg_setup()) {
+      *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = intval.i;
+    }
+    // Advance _bufferOffset
+    _bufferOffset += sizeof(intval.i);
+  }
+  return;
 }
 
 void PTXKernelArguments::do_float() {
-    if (is_after_invocation()) {
+  // If the parameter is a return value,
+  if (is_return_type()) {
+    if (is_kernel_arg_setup()) {
+      // Allocate device memory for T_INT return value pointer on device. Size in bytes
+      int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_FLOAT_BYTE_SIZE);
+      if (status != GRAAL_CUDA_SUCCESS) {
+        tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
+        _success = false;
         return;
+      }
+      // Push _dev_return_value to _kernelBuffer
+      *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value;
     }
-    // If the parameter is a return value,
-    if (is_return_type()) {
-        // Allocate device memory for T_INT return value pointer on device. Size in bytes
-        int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_FLOAT_BYTE_SIZE);
-        if (status != GRAAL_CUDA_SUCCESS) {
-            tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
-            _success = false;
-            return;
-        }
-        // Push _return_value_ptr to _kernelBuffer
-        *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
-        _bufferOffset += sizeof(_return_value_ptr);
-    } else {
-        // Get the next java argument and its value which should be a T_INT
-        oop arg = next_arg(T_FLOAT);
-        // Copy the java argument value to kernelArgBuffer
-        jvalue floatval;
-        if (java_lang_boxing_object::get_value(arg, &floatval) != T_FLOAT) {
-            tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT");
-            _success = false;
-            return;
-        }
-        *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = (gpu::Ptx::CUdeviceptr) floatval.f;
-        _bufferOffset += sizeof(floatval.f);
+    // Advance _bufferOffset
+    _bufferOffset += sizeof(_dev_return_value);
+  } else {
+    // Get the next java argument and its value which should be a T_FLOAT
+    oop arg = next_arg(T_FLOAT);
+    // Copy the java argument value to kernelArgBuffer
+    jvalue floatval;
+    if (java_lang_boxing_object::get_value(arg, &floatval) != T_FLOAT) {
+      tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_FLOAT");
+      _success = false;
+      return;
     }
-    return;
+    if (is_kernel_arg_setup()) {
+      *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = (gpu::Ptx::CUdeviceptr) floatval.f;
+    }
+    // Advance _bufferOffset
+    _bufferOffset += sizeof(floatval.f);
+  }
+  return;
 }
 
 void PTXKernelArguments::do_double() {
-    if (is_after_invocation()) {
+  // If the parameter is a return value,
+  jvalue doubleval;
+  if (is_return_type()) {
+    if (is_kernel_arg_setup()) {
+      // Allocate device memory for T_INT return value pointer on device. Size in bytes
+      int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_DOUBLE_BYTE_SIZE);
+      if (status != GRAAL_CUDA_SUCCESS) {
+        tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
+        _success = false;
         return;
+      }
+      // Push _dev_return_value to _kernelBuffer
+      *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value;
     }
-    // If the parameter is a return value,
-    jvalue doubleval;
-    if (is_return_type()) {
-        // Allocate device memory for T_INT return value pointer on device. Size in bytes
-        int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_DOUBLE_BYTE_SIZE);
-        if (status != GRAAL_CUDA_SUCCESS) {
-            tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
-            _success = false;
-            return;
-        }
-        // Push _return_value_ptr to _kernelBuffer
-        *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
-        // _bufferOffset += sizeof(_return_value_ptr);
-        _bufferOffset += sizeof(doubleval.d);
-    } else {
-        // Get the next java argument and its value which should be a T_INT
-        oop arg = next_arg(T_FLOAT);
-        // Copy the java argument value to kernelArgBuffer
-        if (java_lang_boxing_object::get_value(arg, &doubleval) != T_DOUBLE) {
-            tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT");
-            _success = false;
-            return;
-        }
-        *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = (gpu::Ptx::CUdeviceptr) doubleval.d;
-        _bufferOffset += sizeof(doubleval.d);
+    // Advance _bufferOffset
+    _bufferOffset += sizeof(doubleval.d);
+  } else {
+    // Get the next java argument and its value which should be a T_INT
+    oop arg = next_arg(T_FLOAT);
+    // Copy the java argument value to kernelArgBuffer
+    if (java_lang_boxing_object::get_value(arg, &doubleval) != T_DOUBLE) {
+      tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT");
+      _success = false;
+      return;
     }
-    return;
+    if (is_kernel_arg_setup()) {
+      *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = (gpu::Ptx::CUdeviceptr) doubleval.d;
+    }
+    // Advance _bufferOffset
+    _bufferOffset += sizeof(doubleval.d);
+  }
+  return;
 }
 
 void PTXKernelArguments::do_long() {
-  if (is_after_invocation()) {
-    return;
-  }
   // If the parameter is a return value,
   if (is_return_type()) {
-    // Allocate device memory for T_LONG return value pointer on device. Size in bytes
-    int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_LONG_BYTE_SIZE);
-    if (status != GRAAL_CUDA_SUCCESS) {
-      tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
-      _success = false;
-      return;
+    if (is_kernel_arg_setup()) {
+      // Allocate device memory for T_LONG return value pointer on device. Size in bytes
+      int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_LONG_BYTE_SIZE);
+      if (status != GRAAL_CUDA_SUCCESS) {
+        tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
+        _success = false;
+        return;
+      }
+      // Push _dev_return_value to _kernelBuffer
+      *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value;
     }
-    // Push _return_value_ptr to _kernelBuffer
-    *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
-    _bufferOffset += sizeof(_return_value_ptr);
+    // Advance _bufferOffset
+    _bufferOffset += sizeof(_dev_return_value);
   } else {
     // Get the next java argument and its value which should be a T_LONG
     oop arg = next_arg(T_LONG);
@@ -163,119 +168,132 @@
       _success = false;
       return;
     }
-    *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.j;
+    if (is_kernel_arg_setup()) {
+      *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.j;
+    }
+    // Advance _bufferOffset
     _bufferOffset += sizeof(val.j);
   }
   return;
 }
 
 void PTXKernelArguments::do_byte() {
-    if (is_after_invocation()) {
+  // If the parameter is a return value,
+  if (is_return_type()) {
+    if (is_kernel_arg_setup()) {
+      // Allocate device memory for T_BYTE return value pointer on device. Size in bytes
+      int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_BYTE_SIZE);
+      if (status != GRAAL_CUDA_SUCCESS) {
+        tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
+        _success = false;
         return;
+      }
+      // Push _dev_return_value to _kernelBuffer
+      *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value;
     }
-    // If the parameter is a return value,
-    if (is_return_type()) {
-        // Allocate device memory for T_BYTE return value pointer on device. Size in bytes
-        int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_BYTE_SIZE);
-        if (status != GRAAL_CUDA_SUCCESS) {
-            tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
-            _success = false;
-            return;
-        }
-        // Push _return_value_ptr to _kernelBuffer
-        *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
-        _bufferOffset += sizeof(_return_value_ptr);
-    } else {
-        // Get the next java argument and its value which should be a T_BYTE
-        oop arg = next_arg(T_BYTE);
-        // Copy the java argument value to kernelArgBuffer
-        jvalue val;
-        if (java_lang_boxing_object::get_value(arg, &val) != T_BYTE) {
-            tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE");
-            _success = false;
-            return;
-        }
-        *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.b;
-        _bufferOffset += sizeof(val.b);
+    // Advance _bufferOffset
+    _bufferOffset += sizeof(_dev_return_value);
+  } else {
+    // Get the next java argument and its value which should be a T_BYTE
+    oop arg = next_arg(T_BYTE);
+    // Copy the java argument value to kernelArgBuffer
+    jvalue val;
+    if (java_lang_boxing_object::get_value(arg, &val) != T_BYTE) {
+      tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE");
+      _success = false;
+      return;
     }
-    return;
+    if (is_kernel_arg_setup()) {
+      *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.b;
+    }
+    // Advance _bufferOffset
+    _bufferOffset += sizeof(val.b);
+  }
+  return;
 }
 
 void PTXKernelArguments::do_bool() {
-    if (is_after_invocation()) {
+  // If the parameter is a return value,
+  if (is_return_type()) {
+    if (is_kernel_arg_setup()) {
+      // Allocate device memory for T_BYTE return value pointer on device. Size in bytes
+      int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_BOOLEAN_SIZE);
+      if (status != GRAAL_CUDA_SUCCESS) {
+        tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
+        _success = false;
         return;
+      }
+      // Push _dev_return_value to _kernelBuffer
+      *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value;
     }
-    // If the parameter is a return value,
-    if (is_return_type()) {
-        // Allocate device memory for T_BYTE return value pointer on device. Size in bytes
-        int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_BOOLEAN_SIZE);
-        if (status != GRAAL_CUDA_SUCCESS) {
-            tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
-            _success = false;
-            return;
-        }
-        // Push _return_value_ptr to _kernelBuffer
-        *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
-        _bufferOffset += sizeof(_return_value_ptr);
-    } else {
-        // Get the next java argument and its value which should be a T_BYTE
-        oop arg = next_arg(T_BYTE);
-        // Copy the java argument value to kernelArgBuffer
-        jvalue val;
-        if (java_lang_boxing_object::get_value(arg, &val) != T_BOOLEAN) {
-            tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE");
-            _success = false;
-            return;
-        }
-        *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.z;
-        _bufferOffset += sizeof(val.z);
+    // Advance _bufferOffset
+    _bufferOffset += sizeof(_dev_return_value);
+  } else {
+    // Get the next java argument and its value which should be a T_BYTE
+    oop arg = next_arg(T_BYTE);
+    // Copy the java argument value to kernelArgBuffer
+    jvalue val;
+    if (java_lang_boxing_object::get_value(arg, &val) != T_BOOLEAN) {
+      tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE");
+      _success = false;
+      return;
     }
-    return;
+    if (is_kernel_arg_setup()) {
+      *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.z;
+    }
+    // Advance _bufferOffset
+    _bufferOffset += sizeof(val.z);
+  }
+  return;
 }
 
 void PTXKernelArguments::do_array(int begin, int end) {
-    gpu::Ptx::CUdeviceptr _array_ptr;
-    int status;
-
-    // Get the next java argument and its value which should be a T_ARRAY
-    oop arg = next_arg(T_OBJECT);
-    int array_size = arg->size() * HeapWordSize;
+  // Get the next java argument and its value which should be a T_ARRAY
+  oop arg = next_arg(T_OBJECT);
+  assert(arg->is_array(), "argument value not an array");
+  // Size of array argument
+  int argSize = arg->size() * HeapWordSize;
+  // Device pointer to array argument.
+  gpu::Ptx::CUdeviceptr arrayArgOnDev;
+  int status;
 
-    if (is_after_invocation()) {
-        _array_ptr = *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]);
-        status = gpu::Ptx::_cuda_cu_memcpy_dtoh(arg, _array_ptr, array_size);
-        if (status != GRAAL_CUDA_SUCCESS) {
-            tty->print_cr("[CUDA] *** Error (%d) Failed to copy array argument to host", status);
-            _success = false;
-            return;
-        } else {
-            // tty->print_cr("device: %x host: %x size: %d", _array_ptr, arg, array_size);
-        }
-        return;
+  if (is_kernel_arg_setup()) {
+    // Allocate device memory for array argument on device. Size in bytes
+    status = gpu::Ptx::_cuda_cu_memalloc(&arrayArgOnDev, argSize);
+    if (status != GRAAL_CUDA_SUCCESS) {
+      tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for array argument on device",
+                    status);
+      _success = false;
+      return;
     }
-    // Allocate device memory for T_ARRAY return value pointer on device. Size in bytes
-    status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, array_size);
-    if (status != GRAAL_CUDA_SUCCESS) {
-        tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
-        _success = false;
-        return;
-    }
-    status = gpu::Ptx::_cuda_cu_memcpy_htod(_return_value_ptr, arg, array_size);
+    // Copy array argument to device
+    status = gpu::Ptx::_cuda_cu_memcpy_htod(arrayArgOnDev, arg, argSize);
     if (status != GRAAL_CUDA_SUCCESS) {
-        tty->print_cr("[CUDA] *** Error (%d) Failed to copy array to device argument", status);
-        _success = false;
-        return;
-    } else {
-        // tty->print_cr("host: %x device: %x size: %d", arg, _return_value_ptr, array_size);
+      tty->print_cr("[CUDA] *** Error (%d) Failed to copy array argument content to device memory",
+                    status);
+      _success = false;
+      return;
     }
-    // Push _return_value_ptr to _kernelBuffer
-    *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
-    _bufferOffset += sizeof(_return_value_ptr);
-    return;
+
+    // Push device array argument to _kernelBuffer
+    *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = arrayArgOnDev;
+  } else {
+    arrayArgOnDev = *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]);
+    status = gpu::Ptx::_cuda_cu_memcpy_dtoh(arg, arrayArgOnDev, argSize);
+    if (status != GRAAL_CUDA_SUCCESS) {
+      tty->print_cr("[CUDA] *** Error (%d) Failed to copy array argument to host", status);
+      _success = false;
+      return;
+    }
+  }
+
+  // Advance _bufferOffset
+  _bufferOffset += sizeof(arrayArgOnDev);
+  return;
 }
 
 void PTXKernelArguments::do_void() {
-    return;
+  return;
 }
 
 // TODO implement other do_*