# HG changeset patch # User S.Bharadwaj Yadavalli # Date 1392061122 18000 # Node ID 0995dcbd6dd8d62f8fc77fc19c5526c445420692 # Parent eb48fac53e6f7e6c1855775aa92d6005203e6b93 Change CUDA context management to support multiple executions of a kernel. Exclude GPU offloading of lambdas from java.* library code. diff -r eb48fac53e6f -r 0995dcbd6dd8 graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotBackend.java --- a/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotBackend.java Mon Feb 10 16:13:21 2014 +0100 +++ b/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotBackend.java Mon Feb 10 14:38:42 2014 -0500 @@ -137,9 +137,18 @@ long launchKernel = getLaunchKernelAddress(); hostForeignCalls.linkForeignCall(hostProviders, CALL_KERNEL, launchKernel, false, NOT_LEAF, NOT_REEXECUTABLE, ANY_LOCATION); } + /* Add a shutdown hook to destroy CUDA context(s) */ + Runtime.getRuntime().addShutdownHook(new Thread("PTXShutdown") { + @Override + public void run() { + destroyContext(); + } + }); super.completeInitialization(); } + private static native void destroyContext(); + /** * Gets the address of {@code Ptx::execute_kernel_from_vm()}. */ @@ -365,7 +374,7 @@ @Override public LIRGenerator newLIRGenerator(StructuredGraph graph, FrameMap frameMap, CallingConvention cc, LIR lir) { - return new PTXLIRGenerator(graph, getProviders(), frameMap, cc, lir); + return new PTXHotSpotLIRGenerator(graph, getProviders(), getRuntime().getConfig(), frameMap, cc, lir); } private static void emitKernelEntry(CompilationResultBuilder crb, LIRGenerator lirGen, ResolvedJavaMethod codeCacheOwner) { diff -r eb48fac53e6f -r 0995dcbd6dd8 graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotLIRGenerator.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotLIRGenerator.java Mon Feb 10 14:38:42 2014 -0500 @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package com.oracle.graal.hotspot.ptx; + +import com.oracle.graal.api.code.CallingConvention; +import com.oracle.graal.api.code.StackSlot; +import com.oracle.graal.api.meta.DeoptimizationAction; +import com.oracle.graal.api.meta.DeoptimizationReason; +import com.oracle.graal.api.meta.Value; +import com.oracle.graal.compiler.ptx.PTXLIRGenerator; +import com.oracle.graal.graph.GraalInternalError; +import com.oracle.graal.hotspot.HotSpotLIRGenerator; +import com.oracle.graal.hotspot.HotSpotVMConfig; +import com.oracle.graal.hotspot.meta.HotSpotProviders; +import com.oracle.graal.hotspot.nodes.DirectCompareAndSwapNode; +import com.oracle.graal.lir.FrameMap; +import com.oracle.graal.lir.LIR; +import com.oracle.graal.nodes.StructuredGraph; +import com.oracle.graal.nodes.ValueNode; + +/** + * LIR generator specialized for PTX HotSpot. + */ +public class PTXHotSpotLIRGenerator extends PTXLIRGenerator implements HotSpotLIRGenerator { + + private final HotSpotVMConfig config; + + protected PTXHotSpotLIRGenerator(StructuredGraph graph, HotSpotProviders providers, HotSpotVMConfig config, FrameMap frameMap, CallingConvention cc, LIR lir) { + super(graph, providers, frameMap, cc, lir); + assert config.basicLockSize == 8; + this.config = config; + } + + public void emitPrefetchAllocate(ValueNode address, ValueNode distance) { + // nop + } + + public void emitTailcall(Value[] args, Value address) { + throw GraalInternalError.unimplemented(); + } + + public void emitDeoptimizeCaller(DeoptimizationAction action, DeoptimizationReason reason) { + throw GraalInternalError.unimplemented(); + } + + public void emitPatchReturnAddress(ValueNode address) { + throw GraalInternalError.unimplemented(); + } + + public void emitJumpToExceptionHandlerInCaller(ValueNode handlerInCallerPc, ValueNode exception, ValueNode exceptionPc) { + throw GraalInternalError.unimplemented(); + } + + public void visitDirectCompareAndSwap(DirectCompareAndSwapNode x) { + throw GraalInternalError.unimplemented(); + } + + public StackSlot getLockSlot(int lockDepth) { + throw GraalInternalError.unimplemented(); + } + + public HotSpotProviders getProviders() { + throw GraalInternalError.unimplemented(); + } +} diff -r eb48fac53e6f -r 0995dcbd6dd8 src/gpu/ptx/vm/gpu_ptx.cpp --- a/src/gpu/ptx/vm/gpu_ptx.cpp Mon Feb 10 16:13:21 2014 +0100 +++ b/src/gpu/ptx/vm/gpu_ptx.cpp Mon Feb 10 14:38:42 2014 -0500 @@ -48,7 +48,7 @@ // Entry to GPU native method implementation that transitions current thread to '_thread_in_vm'. #define GPU_VMENTRY(result_type, name, signature) \ JNIEXPORT result_type JNICALL name signature { \ - if (TraceGPUInteraction) tty->print_cr("[CUDA] Ptx::" #name); \ + if (TraceGPUInteraction) tty->print_cr("[CUDA] " #name); \ GRAAL_VM_ENTRY_MARK; \ // Entry to GPU native method implementation that calls a JNI function @@ -69,9 +69,10 @@ {CC"generateKernel", CC"([B" STRING ")J", FN_PTR(Ptx::generate_kernel)}, {CC"getLaunchKernelAddress", CC"()J", FN_PTR(Ptx::get_execute_kernel_from_vm_address)}, {CC"getAvailableProcessors0", CC"()I", FN_PTR(Ptx::get_total_cores)}, + {CC"destroyContext", CC"()V", FN_PTR(Ptx::destroy_ptx_context)}, }; -void * Ptx::_device_context; +void * Ptx::_device_context = 0; int Ptx::_cu_device = 0; Ptx::cuda_cu_init_func_t Ptx::_cuda_cu_init; @@ -218,8 +219,8 @@ version = (float) major + ((float) minor)/10; if (version < GRAAL_SUPPORTED_COMPUTE_CAPABILITY_VERSION) { - tty->print_cr("[CUDA] Only cuda compute capability 3.0 and later supported. Device %d supports %.1f", - _cu_device, version); + tty->print_cr("[CUDA] Only cuda compute capability %.1f and later supported. Device %d supports %.1f", + (float) GRAAL_SUPPORTED_COMPUTE_CAPABILITY_VERSION, _cu_device, version); return false; } @@ -253,6 +254,18 @@ tty->print_cr("[CUDA] Using %s", device_name); } + // Create CUDA context to compile and execute the kernel + + status = _cuda_cu_ctx_create(&_device_context, GRAAL_CU_CTX_MAP_HOST, _cu_device); + + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] Failed to create CUDA context for device(%d): %d", _cu_device, status); + return false; + } + if (TraceGPUInteraction) { + tty->print_cr("[CUDA] Success: Created context for device: %d", _cu_device); + } + gpu::initialized_gpu(device_name); return true; @@ -381,23 +394,20 @@ jit_options[2] = GRAAL_CU_JIT_MAX_REGISTERS; jit_option_values[2] = (void *)(size_t)jit_register_count; - // Create CUDA context to compile and execute the kernel - int status = _cuda_cu_ctx_create(&_device_context, GRAAL_CU_CTX_MAP_HOST, _cu_device); + // Set CUDA context to compile and execute the kernel - if (status != GRAAL_CUDA_SUCCESS) { - tty->print_cr("[CUDA] Failed to create CUDA context for device(%d): %d", _cu_device, status); - return 0L; - } - if (TraceGPUInteraction) { - tty->print_cr("[CUDA] Success: Created context for device: %d", _cu_device); + if (_device_context == NULL) { + tty->print_cr("[CUDA] Encountered uninitialized CUDA context for device(%d)", _cu_device); + return 0L; } - status = _cuda_cu_ctx_set_current(_device_context); + int status = _cuda_cu_ctx_set_current(_device_context); if (status != GRAAL_CUDA_SUCCESS) { tty->print_cr("[CUDA] Failed to set current context for device: %d", _cu_device); return 0L; } + if (TraceGPUInteraction) { tty->print_cr("[CUDA] Success: Set current context for device: %d", _cu_device); tty->print_cr("[CUDA] PTX Kernel\n%s", code); @@ -573,17 +583,9 @@ } } - void destroy_context() { - if (Ptx::_device_context != NULL) { - check(Ptx::_cuda_cu_ctx_destroy(Ptx::_device_context), "Destroy context"); - Ptx::_device_context = NULL; - } - } - ~PtxCall() { unpin_objects(); free_return_value(); - destroy_context(); if (_gc_locked) { GC_locker::unlock_critical(_thread); if (TraceGPUInteraction) { @@ -669,6 +671,23 @@ } } +GPU_VMENTRY(void, Ptx::destroy_ptx_context, (void)) + if (_device_context != NULL) { + int status = _cuda_cu_ctx_destroy(_device_context); + if (status != GRAAL_CUDA_SUCCESS) { + if (TraceGPUInteraction) { + tty->print_cr("[CUDA] Error(%d) : Failed to destroy context", status); + } + _device_context = NULL; + } else { + if (TraceGPUInteraction) { + tty->print_cr("[CUDA] Destroyed context", status); + } + } + } + +GPU_END + GPU_VMENTRY(jlong, Ptx::get_execute_kernel_from_vm_address, (JNIEnv *env, jclass)) return (jlong) Ptx::execute_kernel_from_vm; GPU_END @@ -720,7 +739,7 @@ JRT_END #if defined(LINUX) -static const char cuda_library_name[] = "libcuda.so"; +static const char cuda_library_name[] = "/usr/lib/libcuda.so"; #elif defined(__APPLE__) static char const cuda_library_name[] = "/usr/local/cuda/lib/libcuda.dylib"; #else diff -r eb48fac53e6f -r 0995dcbd6dd8 src/gpu/ptx/vm/gpu_ptx.hpp --- a/src/gpu/ptx/vm/gpu_ptx.hpp Mon Feb 10 16:13:21 2014 +0100 +++ b/src/gpu/ptx/vm/gpu_ptx.hpp Mon Feb 10 14:38:42 2014 -0500 @@ -112,6 +112,8 @@ // static native int getAvailableProcessors0(); JNIEXPORT static jint get_total_cores(JNIEnv *env, jobject); + JNIEXPORT static void destroy_ptx_context(); + // Links the CUDA driver library functions static bool link(); diff -r eb48fac53e6f -r 0995dcbd6dd8 src/share/vm/runtime/compilationPolicy.cpp --- a/src/share/vm/runtime/compilationPolicy.cpp Mon Feb 10 16:13:21 2014 +0100 +++ b/src/share/vm/runtime/compilationPolicy.cpp Mon Feb 10 14:38:42 2014 -0500 @@ -172,16 +172,20 @@ { ResourceMark rm; if (klass_name != NULL) { - if (klass_name != NULL && method_name != NULL) { - const char* lambdaPrefix = "lambda$"; - char* methodPrefix = strstr(method_name->as_C_string(), lambdaPrefix); - if (methodPrefix != 0) { - if ((strncmp(lambdaPrefix, methodPrefix, strlen(lambdaPrefix)) == 0)) { - if (TraceGPUInteraction) { - char buf[O_BUFLEN]; - tty->print_cr("Selected lambda method %s for GPU offload", m->name_and_sig_as_C_string(buf, O_BUFLEN)); + const char* javaClass = "java/"; + // Exclude java library classes - for now + if (strncmp(klass_name->as_C_string(), javaClass, strlen(javaClass))) { + if (method_name != NULL) { + const char* lambdaPrefix = "lambda$"; + char* methodPrefix = strstr(method_name->as_C_string(), lambdaPrefix); + if (methodPrefix != 0) { + if ((strncmp(lambdaPrefix, methodPrefix, strlen(lambdaPrefix)) == 0)) { + if (TraceGPUInteraction) { + char buf[O_BUFLEN]; + tty->print_cr("Selected lambda method %s for GPU offload", m->name_and_sig_as_C_string(buf, O_BUFLEN)); + } + return true; } - return true; } } }