# HG changeset patch # User Morris Meyer # Date 1380560627 14400 # Node ID 8d8f63069f58fa8f342cb105c486a3612436442d # Parent 6157a71e0a36f1003fa9f2f9562c310c1c9ad74d PTX warp limiter to available GPU processors diff -r 6157a71e0a36 -r 8d8f63069f58 graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/PTXTestBase.java --- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/PTXTestBase.java Mon Sep 30 17:03:14 2013 +0200 +++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/PTXTestBase.java Mon Sep 30 13:03:47 2013 -0400 @@ -30,6 +30,7 @@ import com.oracle.graal.api.runtime.Graal; import com.oracle.graal.compiler.GraalCompiler; import com.oracle.graal.compiler.ptx.PTXBackend; +import com.oracle.graal.compiler.ptx.PTXTargetMethodAssembler; import com.oracle.graal.compiler.test.GraalCompilerTest; import com.oracle.graal.debug.Debug; import com.oracle.graal.hotspot.meta.HotSpotNmethod; @@ -138,6 +139,11 @@ } Object r; if (dimensionX != 1 || dimensionY != 1 || dimensionZ != 1) { + /* + * for now assert that the warp array block is no larger than the number of physical gpu cores. + */ + assert dimensionX * dimensionY * dimensionZ < PTXTargetMethodAssembler.getAvailableProcessors(); + r = ((HotSpotNmethod) installedCode).executeParallel(dimensionX, dimensionY, dimensionZ, executeArgs); } else { r = installedCode.executeVarargs(executeArgs); diff -r 6157a71e0a36 -r 8d8f63069f58 graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXTargetMethodAssembler.java --- a/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXTargetMethodAssembler.java Mon Sep 30 17:03:14 2013 +0200 +++ b/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXTargetMethodAssembler.java Mon Sep 30 13:03:47 2013 -0400 @@ -36,6 +36,12 @@ private static CompilerToGPU toGPU = HotSpotGraalRuntime.graalRuntime().getCompilerToGPU(); private static boolean validDevice = toGPU.deviceInit(); + private static final int totalProcessors = (validDevice ? toGPU.availableProcessors() : 0); + + public static int getAvailableProcessors() { + return totalProcessors; + } + // detach ?? public PTXTargetMethodAssembler(TargetDescription target, CodeCacheProvider runtime, FrameMap frameMap, diff -r 6157a71e0a36 -r 8d8f63069f58 graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/CompilerToGPU.java --- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/CompilerToGPU.java Mon Sep 30 17:03:14 2013 +0200 +++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/CompilerToGPU.java Mon Sep 30 13:03:47 2013 -0400 @@ -45,6 +45,8 @@ */ boolean deviceDetach(); + int availableProcessors(); + /** * Attempts to generate and return a bound function to the * loaded method kernel on the GPU. @@ -56,6 +58,7 @@ Object executeExternalMethodVarargs(Object[] args, HotSpotInstalledCode hotspotInstalledCode) throws InvalidInstalledCodeException; + Object executeParallelMethodVarargs(int dimX, int dimY, int dimZ, Object[] args, HotSpotInstalledCode hotspotInstalledCode) throws InvalidInstalledCodeException; } diff -r 6157a71e0a36 -r 8d8f63069f58 graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/CompilerToGPUImpl.java --- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/CompilerToGPUImpl.java Mon Sep 30 17:03:14 2013 +0200 +++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/CompilerToGPUImpl.java Mon Sep 30 13:03:47 2013 -0400 @@ -38,8 +38,12 @@ public native boolean deviceDetach(); - public native Object executeExternalMethodVarargs(Object[] args, HotSpotInstalledCode hotspotInstalledCode) throws InvalidInstalledCodeException; + public native int availableProcessors(); + + public native Object executeExternalMethodVarargs(Object[] args, + HotSpotInstalledCode hotspotInstalledCode) throws InvalidInstalledCodeException; public native Object executeParallelMethodVarargs(int dimX, int dimY, int dimZ, - Object[] args, HotSpotInstalledCode hotspotInstalledCode) throws InvalidInstalledCodeException; + Object[] args, + HotSpotInstalledCode hotspotInstalledCode) throws InvalidInstalledCodeException; } diff -r 6157a71e0a36 -r 8d8f63069f58 src/gpu/ptx/vm/gpu_ptx.cpp --- a/src/gpu/ptx/vm/gpu_ptx.cpp Mon Sep 30 17:03:14 2013 +0200 +++ b/src/gpu/ptx/vm/gpu_ptx.cpp Mon Sep 30 13:03:47 2013 -0400 @@ -50,6 +50,28 @@ gpu::Ptx::cuda_cu_memcpy_dtoh_func_t gpu::Ptx::_cuda_cu_memcpy_dtoh; gpu::Ptx::cuda_cu_memfree_func_t gpu::Ptx::_cuda_cu_memfree; + +/* + * see http://en.wikipedia.org/wiki/CUDA#Supported_GPUs + */ +int ncores(int major, int minor) { + int device_type = major << 4 + minor; + + switch (device_type) { + case 0x10: return 8; + case 0x11: return 8; + case 0x12: return 8; + case 0x13: return 8; + case 0x20: return 32; + case 0x21: return 48; + case 0x30: return 192; + case 0x35: return 192; + defaulf: + tty->print_cr("[CUDA] Warning: Unhandled device %x", device_type); + return 0; + } +} + bool gpu::Ptx::initialize_gpu() { /* Initialize CUDA driver API */ @@ -95,24 +117,7 @@ } /* Get device attributes */ - int minor, major, unified_addressing; - status = _cuda_cu_device_get_attribute(&minor, GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, _cu_device); - - if (status != GRAAL_CUDA_SUCCESS) { - tty->print_cr("[CUDA] Failed to get minor attribute of device: %d", _cu_device); - return false; - } - - status = _cuda_cu_device_get_attribute(&major, GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, _cu_device); - - if (status != GRAAL_CUDA_SUCCESS) { - tty->print_cr("[CUDA] Failed to get major attribute of device: %d", _cu_device); - return false; - } - - if (TraceGPUInteraction) { - tty->print_cr("[CUDA] Compatibility version of device %d: %d.%d", _cu_device, major, minor); - } + int unified_addressing; status = _cuda_cu_device_get_attribute(&unified_addressing, GRAAL_CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, _cu_device); @@ -139,9 +144,50 @@ tty->print_cr("[CUDA] Using %s", device_name); } + return true; } +unsigned int gpu::Ptx::total_cores() { + + int minor, major, nmp; + int status = _cuda_cu_device_get_attribute(&minor, + GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, + _cu_device); + + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] Failed to get minor attribute of device: %d", _cu_device); + return 0; + } + + status = _cuda_cu_device_get_attribute(&major, + GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, + _cu_device); + + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] Failed to get major attribute of device: %d", _cu_device); + return 0; + } + + status = _cuda_cu_device_get_attribute(&nmp, + GRAAL_CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + _cu_device); + + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] Failed to get numberof MPs on device: %d", _cu_device); + return 0; + } + + int total = nmp * ncores(major, minor); + + if (TraceGPUInteraction) { + tty->print_cr("[CUDA] Compatibility version of device %d: %d.%d", _cu_device, major, minor); + tty->print_cr("[CUDA] Number of cores: %d", total); + } + return (total); + +} + void *gpu::Ptx::generate_kernel(unsigned char *code, int code_len, const char *name) { struct CUmod_st * cu_module; diff -r 6157a71e0a36 -r 8d8f63069f58 src/gpu/ptx/vm/gpu_ptx.hpp --- a/src/gpu/ptx/vm/gpu_ptx.hpp Mon Sep 30 17:03:14 2013 +0200 +++ b/src/gpu/ptx/vm/gpu_ptx.hpp Mon Sep 30 13:03:47 2013 -0400 @@ -34,6 +34,7 @@ #define GRAAL_CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING 41 #define GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR 75 #define GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR 76 +#define GRAAL_CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT 16 #define GRAAL_CU_JIT_MAX_REGISTERS 0 #define GRAAL_CU_JIT_THREADS_PER_BLOCK 1 #define GRAAL_CU_JIT_INFO_LOG_BUFFER 3 @@ -73,6 +74,7 @@ protected: static bool probe_linkage(); static bool initialize_gpu(); + static unsigned int total_cores(); static void * generate_kernel(unsigned char *code, int code_len, const char *name); static bool execute_warp(int dimX, int dimY, int dimZ, address kernel, PTXKernelArguments & ka, JavaValue &ret); static bool execute_kernel(address kernel, PTXKernelArguments & ka, JavaValue &ret); diff -r 6157a71e0a36 -r 8d8f63069f58 src/share/vm/graal/graalCompilerToGPU.cpp --- a/src/share/vm/graal/graalCompilerToGPU.cpp Mon Sep 30 17:03:14 2013 +0200 +++ b/src/share/vm/graal/graalCompilerToGPU.cpp Mon Sep 30 13:03:47 2013 -0400 @@ -111,7 +111,7 @@ HandleMark hm; if (gpu::is_available() == false || gpu::has_gpu_linkage() == false && gpu::is_initialized()) { - tty->print_cr("executeExternalMethodVarargs - not available / no linkage / not initialized"); + tty->print_cr("executeParallelMethodVarargs - not available / no linkage / not initialized"); return NULL; } jlong nmethodValue = HotSpotInstalledCode::codeBlob(hotspotInstalledCode); @@ -155,6 +155,14 @@ return gpu::is_initialized(); C2V_END +C2V_VMENTRY(jint, availableProcessors, (JNIEnv *env, jobject)) + if (gpu::is_available() == false || gpu::has_gpu_linkage() == false) { + tty->print_cr("deviceInit - not available / no linkage"); + return false; + } + return gpu::available_processors(); +C2V_END + C2V_VMENTRY(jboolean, deviceDetach, (JNIEnv *env, jobject)) return true; C2V_END @@ -199,6 +207,7 @@ {CC"generateKernel", CC"([B" STRING ")"GPUSPACE_METHOD, FN_PTR(generateKernel)}, {CC"deviceInit", CC"()Z", FN_PTR(deviceInit)}, {CC"deviceDetach", CC"()Z", FN_PTR(deviceDetach)}, + {CC"availableProcessors", CC"()I", FN_PTR(availableProcessors)}, {CC"executeExternalMethodVarargs", CC"(["OBJECT HS_INSTALLED_CODE")"OBJECT, FN_PTR(executeExternalMethodVarargs)}, {CC"executeParallelMethodVarargs", CC"(III["OBJECT HS_INSTALLED_CODE")"OBJECT, FN_PTR(executeParallelMethodVarargs)}, }; diff -r 6157a71e0a36 -r 8d8f63069f58 src/share/vm/runtime/gpu.cpp --- a/src/share/vm/runtime/gpu.cpp Mon Sep 30 17:03:14 2013 +0200 +++ b/src/share/vm/runtime/gpu.cpp Mon Sep 30 13:03:47 2013 -0400 @@ -81,3 +81,13 @@ return false; } +int gpu::available_processors() { + if (gpu::has_gpu_linkage()) { + if (gpu::get_target_il_type() == gpu::PTX) { + return (gpu::Ptx::total_cores()); + } + // Add kernel execution functionality of other GPUs here + } + return 0; +} + diff -r 6157a71e0a36 -r 8d8f63069f58 src/share/vm/runtime/gpu.hpp --- a/src/share/vm/runtime/gpu.hpp Mon Sep 30 17:03:14 2013 +0200 +++ b/src/share/vm/runtime/gpu.hpp Mon Sep 30 13:03:47 2013 -0400 @@ -43,6 +43,8 @@ static void probe_gpu(); static void initialize_gpu(); + + static int available_processors(); static void * generate_kernel(unsigned char *code, int code_len, const char *name);