changeset 11894:c7abc8411011

Fixed BasicPTXTest and IntegerPTXTest
author Morris Meyer <morris.meyer@oracle.com>
date Sat, 05 Oct 2013 10:31:18 -0400
parents ec267141f753
children 8e15a8b570e1
files graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/BasicPTXTest.java graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/FloatPTXTest.java graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/IntegerPTXTest.java src/gpu/ptx/vm/gpu_ptx.cpp src/gpu/ptx/vm/gpu_ptx.hpp src/gpu/ptx/vm/ptxKernelArguments.cpp src/gpu/ptx/vm/ptxKernelArguments.hpp
diffstat 8 files changed, 174 insertions(+), 48 deletions(-) [+]
line wrap: on
line diff
--- a/graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java	Fri Oct 04 11:56:18 2013 +0200
+++ b/graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java	Sat Oct 05 10:31:18 2013 -0400
@@ -185,7 +185,7 @@
             assert v != null;
 
             if (isConstant(v)) {
-                return (emitConstant(v));
+                return (emitConstant(v, comma));
             } else {
                 return (emitRegister((Variable) v, comma));
             }
@@ -195,21 +195,31 @@
             return (" %r" + v.index + (comma ? "," : ""));
         }
 
-        public String emitConstant(Value v) {
+        public String emitConstant(Value v, boolean comma) {
             Constant constant = (Constant) v;
+            String str = null;
 
             switch (v.getKind().getTypeChar()) {
                 case 'i':
-                    return (String.valueOf((int) constant.asLong()));
+                    str = String.valueOf((int) constant.asLong());
+                    break;
                 case 'f':
-                    return (String.valueOf(constant.asFloat()));
+                    str = String.valueOf(constant.asFloat());
+                    break;
                 case 'j':
-                    return (String.valueOf(constant.asLong()));
+                    str = String.valueOf(constant.asLong());
+                    break;
                 case 'd':
-                    return (String.valueOf(constant.asDouble()));
+                    str = String.valueOf(constant.asDouble());
+                    break;
                 default:
                     throw GraalInternalError.shouldNotReachHere();
             }
+            if (comma) {
+                return (str + ",");
+            } else {
+                return str;
+            }
         }
     }
 
@@ -581,8 +591,33 @@
         }
 
         public void emit(PTXAssembler asm) {
-            asm.emitString(".param ." + typeForKind(dest.getKind()) + emitParameter(dest)  + (lastParameter ? "" : ","));
+            asm.emitString(".param ." + paramForKind(dest.getKind()) + emitParameter(dest)  + (lastParameter ? "" : ","));
         }
+
+        public String paramForKind(Kind k) {
+            switch (k.getTypeChar()) {
+                case 'z':
+                case 'f':
+                    return "s32";
+                case 'b':
+                    return "s8";
+                case 's':
+                    return "s16";
+                case 'c':
+                    return "u16";
+                case 'i':
+                    return "s32";
+                case 'j':
+                    return "s64";
+                case 'd':
+                    return "f64";
+                case 'a':
+                    return "u64";
+                default:
+                    throw GraalInternalError.shouldNotReachHere();
+            }
+        }
+
     }
 
     public final void popc_b32(Register d, Register a) {
--- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/BasicPTXTest.java	Fri Oct 04 11:56:18 2013 +0200
+++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/BasicPTXTest.java	Sat Oct 05 10:31:18 2013 -0400
@@ -30,7 +30,6 @@
 /**
  * Test class for small Java methods compiled to PTX kernels.
  */
-@Ignore
 public class BasicPTXTest extends PTXTestBase {
 
     @Test
--- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/FloatPTXTest.java	Fri Oct 04 11:56:18 2013 +0200
+++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/FloatPTXTest.java	Sat Oct 05 10:31:18 2013 -0400
@@ -29,7 +29,6 @@
 import com.oracle.graal.api.code.CompilationResult;
 
 /* PTX ISA 3.1 - 8.7.3 Floating-Point Instructions */
-@Ignore
 public class FloatPTXTest extends PTXTestBase {
 
     @Ignore
--- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/IntegerPTXTest.java	Fri Oct 04 11:56:18 2013 +0200
+++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/IntegerPTXTest.java	Sat Oct 05 10:31:18 2013 -0400
@@ -30,8 +30,8 @@
 
     @Test
     public void testAdd() {
-        /*
-        Integer r4 = (Integer) invoke(compile("testAdd2B"), (byte) 6, (byte) 4);
+
+        /* Integer r4 = (Integer) invoke(compile("testAdd2B"), (byte) 6, (byte) 4);
         if (r4 == null) {
             printReport("testAdd2B FAILED");
         } else if (r4.intValue() == testAdd2B((byte) 6, (byte) 4)) {
@@ -49,14 +49,14 @@
             printReport("testAdd2I FAILED");
         }
 
-        /* Long r2 = (Long) invoke(compile("testAdd2L"), (long) 12, (long) 6);
+        Long r2 = (Long) invoke(compile("testAdd2L"), (long) 12, (long) 6);
         if (r2 == null) {
             printReport("testAdd2L FAILED");
         } else if (r2.longValue() == testAdd2L(12, 6)) {
             printReport("testAdd2L PASSED");
         } else {
             printReport("testAdd2L FAILED");
-        } 
+        }
 
         r4 = (Integer) invoke(compile("testAddIConst"), 5);
         if (r4 == null) {
@@ -74,7 +74,7 @@
             printReport("testAddConstI PASSED");
         } else {
             printReport("testAddConstI FAILED");
-        } */
+        }
     }
 
     public static int testAdd2I(int a, int b) {
@@ -97,7 +97,6 @@
         return 32 + a;
     }
 
-    @Ignore
     @Test
     public void testSub() {
 
@@ -155,7 +154,6 @@
         return 32 - a;
     }
 
-    @Ignore
     @Test
     public void testMul() {
 
--- a/src/gpu/ptx/vm/gpu_ptx.cpp	Fri Oct 04 11:56:18 2013 +0200
+++ b/src/gpu/ptx/vm/gpu_ptx.cpp	Sat Oct 05 10:31:18 2013 -0400
@@ -180,9 +180,58 @@
 
     int total = nmp * ncores(major, minor);
 
+    int max_threads_per_block, warp_size, async_engines, can_map_host_memory, concurrent_kernels;
+
+    status = _cuda_cu_device_get_attribute(&max_threads_per_block,
+                                           GRAAL_CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
+                                           _cu_device);
+
+    if (status != GRAAL_CUDA_SUCCESS) {
+        tty->print_cr("[CUDA] Failed to get GRAAL_CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK: %d", _cu_device);
+        return 0;
+    }
+
+    status = _cuda_cu_device_get_attribute(&warp_size,
+                                           GRAAL_CU_DEVICE_ATTRIBUTE_WARP_SIZE,
+                                           _cu_device);
+
+    if (status != GRAAL_CUDA_SUCCESS) {
+        tty->print_cr("[CUDA] Failed to get GRAAL_CU_DEVICE_ATTRIBUTE_WARP_SIZE: %d", _cu_device);
+        return 0;
+    }
+    
+    status = _cuda_cu_device_get_attribute(&async_engines,
+                                           GRAAL_CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT,
+                                           _cu_device);
+
+    if (status != GRAAL_CUDA_SUCCESS) {
+        tty->print_cr("[CUDA] Failed to get GRAAL_CU_DEVICE_ATTRIBUTE_WARP_SIZE: %d", _cu_device);
+        return 0;
+    }
+
+    status = _cuda_cu_device_get_attribute(&can_map_host_memory,
+                                           GRAAL_CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY,
+                                           _cu_device);
+
+    if (status != GRAAL_CUDA_SUCCESS) {
+        tty->print_cr("[CUDA] Failed to get GRAAL_CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY: %d", _cu_device);
+        return 0;
+    }
+
+    status = _cuda_cu_device_get_attribute(&concurrent_kernels,
+                                           GRAAL_CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS,
+                                           _cu_device);
+
+    if (status != GRAAL_CUDA_SUCCESS) {
+        tty->print_cr("[CUDA] Failed to get GRAAL_CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS: %d", _cu_device);
+        return 0;
+    }
+
     if (TraceGPUInteraction) {
         tty->print_cr("[CUDA] Compatibility version of device %d: %d.%d", _cu_device, major, minor);
-        tty->print_cr("[CUDA] Number of cores: %d", total);
+        tty->print_cr("[CUDA] Number of cores: %d async engines: %d can map host mem: %d concurrent kernels: %d",
+                      total, async_engines, can_map_host_memory, concurrent_kernels);
+        tty->print_cr("[CUDA] Max threads per block: %d warp size: %d", max_threads_per_block, warp_size);
     }
     return (total);
     
@@ -344,6 +393,17 @@
          ret.set_jint(return_val);
        }
        break;
+     case T_FLOAT:
+       {
+         float return_val;
+         status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._return_value_ptr, T_FLOAT_BYTE_SIZE);
+         if (status != GRAAL_CUDA_SUCCESS) {
+           tty->print_cr("[CUDA] *** Error (%d) Failed to copy value to device argument", status);
+           return false;
+         }
+         ret.set_jfloat(return_val);
+       }
+       break;
      case T_LONG:
        {
          long return_val;
--- a/src/gpu/ptx/vm/gpu_ptx.hpp	Fri Oct 04 11:56:18 2013 +0200
+++ b/src/gpu/ptx/vm/gpu_ptx.hpp	Sat Oct 05 10:31:18 2013 -0400
@@ -31,10 +31,15 @@
  */
 #define GRAAL_CUDA_SUCCESS                                   0
 /**< Device shares a unified address space with the host */
+#define GRAAL_CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK     1
 #define GRAAL_CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING        41
 #define GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR  75
 #define GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR  76
 #define GRAAL_CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT      16
+#define GRAAL_CU_DEVICE_ATTRIBUTE_WARP_SIZE                 10
+#define GRAAL_CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY       19
+#define GRAAL_CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS        31
+#define GRAAL_CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT        40
 #define GRAAL_CU_JIT_MAX_REGISTERS                           0
 #define GRAAL_CU_JIT_THREADS_PER_BLOCK                       1
 #define GRAAL_CU_JIT_INFO_LOG_BUFFER                         3
--- a/src/gpu/ptx/vm/ptxKernelArguments.cpp	Fri Oct 04 11:56:18 2013 +0200
+++ b/src/gpu/ptx/vm/ptxKernelArguments.cpp	Sat Oct 05 10:31:18 2013 -0400
@@ -41,35 +41,67 @@
 }
 
 void PTXKernelArguments::do_int() {
-  if (is_after_invocation()) {
-    return;
-  }
-  // If the parameter is a return value,
-  if (is_return_type()) {
-    // Allocate device memory for T_INT return value pointer on device. Size in bytes
-    int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_INT_BYTE_SIZE);
-    if (status != GRAAL_CUDA_SUCCESS) {
-      tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
-      _success = false;
-      return;
+    if (is_after_invocation()) {
+        return;
+    }
+    // If the parameter is a return value,
+    if (is_return_type()) {
+        // Allocate device memory for T_INT return value pointer on device. Size in bytes
+        int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_INT_BYTE_SIZE);
+        if (status != GRAAL_CUDA_SUCCESS) {
+            tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
+            _success = false;
+            return;
+        }
+        // Push _return_value_ptr to _kernelBuffer
+        *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
+        _bufferOffset += sizeof(_return_value_ptr);
+    } else {
+        // Get the next java argument and its value which should be a T_INT
+        oop arg = next_arg(T_INT);
+        // Copy the java argument value to kernelArgBuffer
+        jvalue intval;
+        if (java_lang_boxing_object::get_value(arg, &intval) != T_INT) {
+            tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT");
+            _success = false;
+            return;
+        }
+        *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = intval.i;
+        _bufferOffset += sizeof(intval.i);
     }
-    // Push _return_value_ptr to _kernelBuffer
-    *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
-    _bufferOffset += sizeof(_return_value_ptr);
-  } else {
-    // Get the next java argument and its value which should be a T_INT
-    oop arg = next_arg(T_INT);
-    // Copy the java argument value to kernelArgBuffer
-    jvalue intval;
-    if (java_lang_boxing_object::get_value(arg, &intval) != T_INT) {
-      tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT");
-      _success = false;
-      return;
+    return;
+}
+
+void PTXKernelArguments::do_float() {
+    if (is_after_invocation()) {
+        return;
     }
-    *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = intval.i;
-    _bufferOffset += sizeof(intval.i);
-  }
-  return;
+    // If the parameter is a return value,
+    if (is_return_type()) {
+        // Allocate device memory for T_INT return value pointer on device. Size in bytes
+        int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_FLOAT_BYTE_SIZE);
+        if (status != GRAAL_CUDA_SUCCESS) {
+            tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
+            _success = false;
+            return;
+        }
+        // Push _return_value_ptr to _kernelBuffer
+        *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
+        _bufferOffset += sizeof(_return_value_ptr);
+    } else {
+        // Get the next java argument and its value which should be a T_INT
+        oop arg = next_arg(T_FLOAT);
+        // Copy the java argument value to kernelArgBuffer
+        jvalue floatval;
+        if (java_lang_boxing_object::get_value(arg, &floatval) != T_FLOAT) {
+            tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT");
+            _success = false;
+            return;
+        }
+        *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = floatval.f;
+        _bufferOffset += sizeof(floatval.f);
+    }
+    return;
 }
 
 void PTXKernelArguments::do_long() {
--- a/src/gpu/ptx/vm/ptxKernelArguments.hpp	Fri Oct 04 11:56:18 2013 +0200
+++ b/src/gpu/ptx/vm/ptxKernelArguments.hpp	Sat Oct 05 10:31:18 2013 -0400
@@ -30,6 +30,7 @@
 
 #define T_BYTE_SIZE       1
 #define T_INT_BYTE_SIZE   4
+#define T_FLOAT_BYTE_SIZE 4
 #define T_LONG_BYTE_SIZE  8
 #define T_ARRAY_BYTE_SIZE 8
 
@@ -99,6 +100,7 @@
 
   void do_byte();
   void do_int();
+  void do_float();
   void do_long();
   void do_array(int begin, int end);
   void do_void();
@@ -115,10 +117,6 @@
     /* TODO : To be implemented */
     guarantee(false, "do_short:NYI");
   }
-  inline void do_float()  {
-    /* TODO : To be implemented */
-    guarantee(false, "do_float:NYI");
-  }
   inline void do_double() {
     /* TODO : To be implemented */
     guarantee(false, "do_double:NYI");