# HG changeset patch # User Morris Meyer # Date 1376159910 14400 # Node ID 1cd1f8ff70a151875a13592c36019d2ba251347f # Parent 12661a4492266e8855a6fd43294a5fe9666c6e9e CR-20: PTX kernel invocation with arguments - from Bharadwaj diff -r 12661a449226 -r 1cd1f8ff70a1 graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/IntegerPTXTest.java --- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/IntegerPTXTest.java Sat Aug 10 10:08:56 2013 +0200 +++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/IntegerPTXTest.java Sat Aug 10 14:38:30 2013 -0400 @@ -31,11 +31,11 @@ @Test public void testAdd() { - compile("testAdd2I"); - compile("testAdd2L"); - compile("testAdd2B"); - compile("testAddIConst"); - compile("testAddConstI"); + invoke(compile("testAdd2I"), 8, 4); + invoke(compile("testAdd2L"), 12, 6); + invoke(compile("testAdd2B"), 6, 4); + invoke(compile("testAddIConst"), 5); + invoke(compile("testAddConstI"), 7); } public static int testAdd2I(int a, int b) { @@ -60,10 +60,10 @@ @Test public void testSub() { - compile("testSub2I"); - compile("testSub2L"); - compile("testSubIConst"); - compile("testSubConstI"); + invoke(compile("testSub2I"), 8, 4); + invoke(compile("testSub2L"), 12, 6); + invoke(compile("testSubIConst"), 35); + invoke(compile("testSubConstI"), 12); } public static int testSub2I(int a, int b) { @@ -84,10 +84,10 @@ @Test public void testMul() { - compile("testMul2I"); - compile("testMul2L"); - compile("testMulIConst"); - compile("testMulConstI"); + invoke(compile("testMul2I"), 8, 4); + invoke(compile("testMul2L"), 12, 6); + invoke(compile("testMulIConst"), 4); + invoke(compile("testMulConstI"), 5); } public static int testMul2I(int a, int b) { @@ -108,10 +108,10 @@ @Test public void testDiv() { - compile("testDiv2I"); - compile("testDiv2L"); - compile("testDivIConst"); - compile("testDivConstI"); + invoke(compile("testDiv2I"), 8, 4); + invoke(compile("testDiv2L"), 12, 6); + invoke(compile("testDivIConst"), 64); + invoke(compile("testDivConstI"), 8); } public static int testDiv2I(int a, int b) { @@ -132,8 +132,8 @@ @Test public void testRem() { - compile("testRem2I"); - compile("testRem2L"); + invoke(compile("testRem2I"), 8, 4); + invoke(compile("testRem2L"), 12, 6); } public static int testRem2I(int a, int b) { @@ -146,12 +146,12 @@ @Test public void testIntConversion() { - compile("testI2L"); - compile("testL2I"); - compile("testI2C"); - compile("testI2B"); - compile("testI2F"); - compile("testI2D"); + invoke(compile("testI2L"), 8); + invoke(compile("testL2I"), 12); + invoke(compile("testI2C"), 65); + invoke(compile("testI2B"), 9); + invoke(compile("testI2F"), 17); + invoke(compile("testI2D"), 22); } public static long testI2L(int a) { diff -r 12661a449226 -r 1cd1f8ff70a1 graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/PTXTestBase.java --- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/PTXTestBase.java Sat Aug 10 10:08:56 2013 +0200 +++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/PTXTestBase.java Sat Aug 10 14:38:30 2013 -0400 @@ -33,6 +33,7 @@ import com.oracle.graal.compiler.test.GraalCompilerTest; import com.oracle.graal.debug.Debug; import com.oracle.graal.hotspot.meta.HotSpotRuntime; +import com.oracle.graal.hotspot.meta.HotSpotResolvedJavaMethod; import com.oracle.graal.java.GraphBuilderConfiguration; import com.oracle.graal.java.GraphBuilderPhase; import com.oracle.graal.nodes.StructuredGraph; @@ -42,6 +43,7 @@ import com.oracle.graal.phases.PhasePlan.PhasePosition; import com.oracle.graal.phases.tiers.*; import com.oracle.graal.ptx.PTX; +import java.lang.reflect.Modifier; public abstract class PTXTestBase extends GraalCompilerTest { @@ -69,7 +71,8 @@ * of ECC failure on kernel invocation. */ CompilationResult result = GraalCompiler.compileGraph(graph, cc, graph.method(), runtime, - graalRuntime().getReplacements(), ptxBackend, target, null, phasePlan, + graalRuntime().getReplacements(), ptxBackend, target, + null, phasePlan, OptimisticOptimizations.NONE, new SpeculationLog(), Suites.createDefaultSuites(), new ExternalCompilationResult()); return result; @@ -81,7 +84,15 @@ protected void invoke(CompilationResult result, Object... args) { try { - Object[] executeArgs = argsWithReceiver(this, args); + if (((ExternalCompilationResult) result).getEntryPoint() == 0) { + Debug.dump(result, "[CUDA] *** Null entry point - Not launching kernel"); + return; + } + + /* Check if the method compiled is static */ + HotSpotResolvedJavaMethod compiledMethod = (HotSpotResolvedJavaMethod) sg.method(); + boolean isStatic = Modifier.isStatic(compiledMethod.getModifiers()); + Object[] executeArgs = argsWithReceiver((isStatic ? null : this), args); HotSpotRuntime hsr = (HotSpotRuntime) runtime; InstalledCode installedCode = hsr.addExternalMethod(sg.method(), result, sg); installedCode.executeVarargs(executeArgs); diff -r 12661a449226 -r 1cd1f8ff70a1 src/gpu/ptx/vm/gpu_ptx.cpp --- a/src/gpu/ptx/vm/gpu_ptx.cpp Sat Aug 10 10:08:56 2013 +0200 +++ b/src/gpu/ptx/vm/gpu_ptx.cpp Sat Aug 10 14:38:30 2013 -0400 @@ -66,9 +66,9 @@ } } -bool gpu::execute_kernel(address kernel) { +bool gpu::execute_kernel(address kernel, JavaCallArguments * jca) { if (gpu::has_gpu_linkage()) { - return (gpu::Ptx::execute_kernel(kernel)); + return (gpu::Ptx::execute_kernel(kernel, jca)); } else { return false; } @@ -120,7 +120,7 @@ } /* Get device attributes */ - int minor, major; + int minor, major, unified_addressing; status = _cuda_cu_device_get_attribute(&minor, GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cu_device); if (status != GRAAL_CUDA_SUCCESS) { @@ -139,6 +139,18 @@ tty->print_cr("[CUDA] Compatibility version of device %d: %d.%d", cu_device, major, minor); } + status = _cuda_cu_device_get_attribute(&unified_addressing, GRAAL_CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, cu_device); + + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] Failed to query unified addressing mode of device: %d", cu_device); + return false; + } + + if (TraceGPUInteraction) { + tty->print_cr("[CUDA] Unified addressing support on device %d: %d", cu_device, unified_addressing); + } + + /* Get device name */ char device_name[256]; status = _cuda_cu_device_get_name(device_name, 256, cu_device); @@ -218,7 +230,7 @@ status = _cuda_cu_module_get_function(&cu_function, cu_module, name); if (status != GRAAL_CUDA_SUCCESS) { - tty->print_cr("[CUDA] Failed to get function %s", name); + tty->print_cr("[CUDA] *** Error: Failed to get function %s", name); return NULL; } @@ -228,7 +240,7 @@ return cu_function; } -bool gpu::Ptx::execute_kernel(address kernel) { +bool gpu::Ptx::execute_kernel(address kernel, JavaCallArguments * jca) { // grid dimensionality unsigned int gridX = 1; unsigned int gridY = 1; @@ -241,6 +253,15 @@ int *cu_function = (int *)kernel; + char * paramBuffer = (char *) jca->parameters(); + size_t paramBufferSz = (size_t) jca->size_of_parameters(); + + void * config[] = { + GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER, paramBuffer, + GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE, ¶mBufferSz, + GRAAL_CU_LAUNCH_PARAM_END + }; + if (kernel == NULL) { return false; } @@ -251,7 +272,7 @@ int status = _cuda_cu_launch_kernel(cu_function, gridX, gridY, gridZ, blockX, blockY, blockZ, - 0, NULL, NULL, NULL); + 0, NULL, NULL, config); if (status != GRAAL_CUDA_SUCCESS) { tty->print_cr("[CUDA] Failed to launch kernel"); return false; diff -r 12661a449226 -r 1cd1f8ff70a1 src/gpu/ptx/vm/gpu_ptx.hpp --- a/src/gpu/ptx/vm/gpu_ptx.hpp Sat Aug 10 10:08:56 2013 +0200 +++ b/src/gpu/ptx/vm/gpu_ptx.hpp Sat Aug 10 14:38:30 2013 -0400 @@ -30,6 +30,8 @@ * These definitions are for convenience. */ #define GRAAL_CUDA_SUCCESS 0 +/**< Device shares a unified address space with the host */ +#define GRAAL_CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING 41 #define GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR 75 #define GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR 76 #define GRAAL_CU_JIT_MAX_REGISTERS 0 @@ -38,6 +40,33 @@ #define GRAAL_CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES 4 #define GRAAL_CUDA_ERROR_NO_BINARY_FOR_GPU 209 +/** + * End of array terminator for the extra parameter to + * ::cuLaunchKernel + */ +#define GRAAL_CU_LAUNCH_PARAM_END ((void*)0x00) + +/** + * Indicator that the next value in the extra parameter to + * ::cuLaunchKernel will be a pointer to a buffer containing all kernel + * parameters used for launching kernel f. This buffer needs to + * honor all alignment/padding requirements of the individual parameters. + * If ::GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE is not also specified in the + * extra array, then ::GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER will have no + * effect. + */ +#define GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER ((void*)0x01) + +/** + * Indicator that the next value in the extra parameter to + * ::cuLaunchKernel will be a pointer to a size_t which contains the + * size of the buffer specified with ::GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER. + * It is required that ::GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER also be specified + * in the extra array if the value associated with + * ::GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE is not zero. + */ +#define GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE ((void*)0x02) + class Ptx { friend class gpu; @@ -45,7 +74,7 @@ static bool probe_linkage(); static bool initialize_gpu(); static void * generate_kernel(unsigned char *code, int code_len, const char *name); - static bool execute_kernel(address kernel); + static bool execute_kernel(address kernel, JavaCallArguments *); private: typedef int (*cuda_cu_init_func_t)(unsigned int); diff -r 12661a449226 -r 1cd1f8ff70a1 src/share/vm/graal/graalCompilerToGPU.cpp --- a/src/share/vm/graal/graalCompilerToGPU.cpp Sat Aug 10 10:08:56 2013 +0200 +++ b/src/share/vm/graal/graalCompilerToGPU.cpp Sat Aug 10 14:38:30 2013 -0400 @@ -91,8 +91,11 @@ jlong startValue = HotSpotInstalledCode::start(hotspotInstalledCode); // JavaCalls::call(&result, mh, &jca, CHECK_NULL); - tty->print_cr("executeExternalMethodVarargs: start: %x", (address)startValue); - gpu::execute_kernel((address)startValue); + // tty->print_cr("executeExternalMethodVarargs: start: %x", (address)startValue); + + if (!gpu::execute_kernel((address)startValue, &jca)) { + return NULL; + } if (jap.get_ret_type() == T_VOID) { return NULL; diff -r 12661a449226 -r 1cd1f8ff70a1 src/share/vm/runtime/gpu.hpp --- a/src/share/vm/runtime/gpu.hpp Sat Aug 10 10:08:56 2013 +0200 +++ b/src/share/vm/runtime/gpu.hpp Sat Aug 10 14:38:30 2013 -0400 @@ -43,7 +43,7 @@ static void * generate_kernel(unsigned char *code, int code_len, const char *name); - static bool execute_kernel(address kernel); + static bool execute_kernel(address kernel, JavaCallArguments * jca); static void set_available(bool value) { _available = value;