# HG changeset patch # User Morris Meyer # Date 1374804930 14400 # Node ID d55f24eac4b15d2139651c3921599d95e15eb547 # Parent d9fcc82766da4a8e3688e4d357b214fdf9139635 PTX support for Linux diff -r d9fcc82766da -r d55f24eac4b1 graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/BasicPTXTest.java --- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/BasicPTXTest.java Thu Jul 25 18:02:29 2013 -0700 +++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/BasicPTXTest.java Thu Jul 25 22:15:30 2013 -0400 @@ -24,7 +24,6 @@ import java.lang.reflect.Method; -import org.junit.Ignore; import org.junit.Test; /** @@ -34,10 +33,10 @@ @Test public void testAdd() { - compile("testConstI"); + invoke(compile("testConstI")); } - @Ignore + @Test public void testInvoke() { invoke(compile("testConstI")); } diff -r d9fcc82766da -r d55f24eac4b1 graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXBackend.java --- a/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXBackend.java Thu Jul 25 18:02:29 2013 -0700 +++ b/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXBackend.java Thu Jul 25 22:15:30 2013 -0400 @@ -90,8 +90,43 @@ codeBuffer.emitString(""); Signature signature = codeCacheOwner.getSignature(); - for (int i = 0; i < signature.getParameterCount(false); i++) { - String param = ".param .u32 param" + i; + int paramCount = signature.getParameterCount(false); + // TODO - Revisit this. + // Bit-size of registers to be declared and used by the kernel. + int regSize = 32; + for (int i = 0; i < paramCount; i++) { + String param; + // No unsigned types in Java. So using .s specifier + switch (signature.getParameterKind(i)) { + case Boolean: + case Byte: + param = ".param .s8 param" + i; + regSize = 8; + break; + case Char: + case Short: + param = ".param .s16 param" + i; + regSize = 16; + break; + case Int: + param = ".param .s32 param" + i; + regSize = 32; + break; + case Long: + case Float: + case Double: + case Void: + param = ".param .s64 param" + i; + regSize = 32; + break; + default: + // Not sure but specify 64-bit specifier?? + param = ".param .s64 param" + i; + break; + } + if (i != (paramCount -1)) { + param += ","; + } codeBuffer.emitString(param); } @@ -100,7 +135,7 @@ // XXX For now declare one predicate and all registers codeBuffer.emitString(" .reg .pred %p,%q;"); - codeBuffer.emitString(" .reg .u32 %r<16>;"); + codeBuffer.emitString(" .reg .s" + regSize +" %r<16>;"); // Emit code for the LIR lirGen.lir.emitCode(tasm); diff -r d9fcc82766da -r d55f24eac4b1 make/bsd/makefiles/buildtree.make --- a/make/bsd/makefiles/buildtree.make Thu Jul 25 18:02:29 2013 -0700 +++ b/make/bsd/makefiles/buildtree.make Thu Jul 25 22:15:30 2013 -0400 @@ -239,12 +239,14 @@ echo "$(call gamma-path,commonsrc,cpu/$(SRCARCH)/vm) \\"; \ echo "$(call gamma-path,altsrc,os_cpu/$(OS_FAMILY)_$(SRCARCH)/vm) \\"; \ echo "$(call gamma-path,commonsrc,os_cpu/$(OS_FAMILY)_$(SRCARCH)/vm) \\"; \ + echo "$(call gamma-path,altsrc,os_gpu/$(OS_FAMILY)_ptx/vm) \\"; \ + echo "$(call gamma-path,commonsrc,os_gpu/$(OS_FAMILY)_ptx/vm) \\"; \ echo "$(call gamma-path,altsrc,os/$(OS_FAMILY)/vm) \\"; \ echo "$(call gamma-path,commonsrc,os/$(OS_FAMILY)/vm) \\"; \ echo "$(call gamma-path,altsrc,os/posix/vm) \\"; \ echo "$(call gamma-path,commonsrc,os/posix/vm) \\"; \ - echo "$(call gamma-path,altsrc,gpu/ptx) \\"; \ - echo "$(call gamma-path,commonsrc,gpu/ptx)"; \ + echo "$(call gamma-path,altsrc,gpu/ptx/vm) \\"; \ + echo "$(call gamma-path,commonsrc,gpu/ptx/vm)"; \ echo; \ echo "Src_Dirs_I = \\"; \ echo "$(call gamma-path,altsrc,share/vm/prims) \\"; \ @@ -257,6 +259,8 @@ echo "$(call gamma-path,commonsrc,cpu/$(SRCARCH)/vm) \\"; \ echo "$(call gamma-path,altsrc,os_cpu/$(OS_FAMILY)_$(SRCARCH)/vm) \\"; \ echo "$(call gamma-path,commonsrc,os_cpu/$(OS_FAMILY)_$(SRCARCH)/vm) \\"; \ + echo "$(call gamma-path,altsrc,os_gpu/$(OS_FAMILY)_ptx/vm) \\"; \ + echo "$(call gamma-path,commonsrc,os_gpu/$(OS_FAMILY)_ptx/vm) \\"; \ echo "$(call gamma-path,altsrc,os/$(OS_FAMILY)/vm) \\"; \ echo "$(call gamma-path,commonsrc,os/$(OS_FAMILY)/vm) \\"; \ echo "$(call gamma-path,altsrc,os/posix/vm) \\"; \ diff -r d9fcc82766da -r d55f24eac4b1 make/bsd/makefiles/vm.make --- a/make/bsd/makefiles/vm.make Thu Jul 25 18:02:29 2013 -0700 +++ b/make/bsd/makefiles/vm.make Thu Jul 25 22:15:30 2013 -0400 @@ -164,7 +164,8 @@ SOURCE_PATHS+=$(HS_COMMON_SRC)/os/posix/vm SOURCE_PATHS+=$(HS_COMMON_SRC)/cpu/$(Platform_arch)/vm SOURCE_PATHS+=$(HS_COMMON_SRC)/os_cpu/$(Platform_os_arch)/vm -SOURCE_PATHS+=$(HS_COMMON_SRC)/gpu/ptx +SOURCE_PATHS+=$(HS_COMMON_SRC)/gpu/ptx/vm +SOURCE_PATHS+=$(HS_COMMON_SRC)/os_gpu/bsd_ptx/vm CORE_PATHS=$(foreach path,$(SOURCE_PATHS),$(call altsrc,$(path)) $(path)) CORE_PATHS+=$(GENERATED)/jvmtifiles $(GENERATED)/tracefiles @@ -187,9 +188,9 @@ SHARK_PATHS := $(GAMMADIR)/src/share/vm/shark GRAAL_PATHS += $(call altsrc,$(HS_COMMON_SRC)/share/vm/graal) -GRAAL_PATHS += $(call altsrc,$(HS_COMMON_SRC)/gpu/ptx) +GRAAL_PATHS += $(call altsrc,$(HS_COMMON_SRC)/gpu/ptx/vm) GRAAL_PATHS += $(HS_COMMON_SRC)/share/vm/graal -GRAAL_PATHS += $(HS_COMMON_SRC)/gpu/ptx +GRAAL_PATHS += $(HS_COMMON_SRC)/gpu/ptx/vm # Include dirs per type. Src_Dirs/CORE := $(CORE_PATHS) diff -r d9fcc82766da -r d55f24eac4b1 make/linux/makefiles/buildtree.make --- a/make/linux/makefiles/buildtree.make Thu Jul 25 18:02:29 2013 -0700 +++ b/make/linux/makefiles/buildtree.make Thu Jul 25 22:15:30 2013 -0400 @@ -235,12 +235,14 @@ echo "$(call gamma-path,commonsrc,cpu/$(SRCARCH)/vm) \\"; \ echo "$(call gamma-path,altsrc,os_cpu/$(OS_FAMILY)_$(SRCARCH)/vm) \\"; \ echo "$(call gamma-path,commonsrc,os_cpu/$(OS_FAMILY)_$(SRCARCH)/vm) \\"; \ + echo "$(call gamma-path,altsrc,os_gpu/$(OS_FAMILY)_ptx/vm) \\"; \ + echo "$(call gamma-path,commonsrc,os_gpu/$(OS_FAMILY)_ptx/vm) \\"; \ echo "$(call gamma-path,altsrc,os/$(OS_FAMILY)/vm) \\"; \ echo "$(call gamma-path,commonsrc,os/$(OS_FAMILY)/vm) \\"; \ echo "$(call gamma-path,altsrc,os/posix/vm) \\"; \ echo "$(call gamma-path,commonsrc,os/posix/vm) \\"; \ - echo "$(call gamma-path,altsrc,gpu/ptx) \\"; \ - echo "$(call gamma-path,commonsrc,gpu/ptx)"; \ + echo "$(call gamma-path,altsrc,gpu/ptx/vm) \\"; \ + echo "$(call gamma-path,commonsrc,gpu/ptx/vm)"; \ echo; \ echo "Src_Dirs_I = \\"; \ echo "$(call gamma-path,altsrc,share/vm/prims) \\"; \ @@ -253,6 +255,8 @@ echo "$(call gamma-path,commonsrc,cpu/$(SRCARCH)/vm) \\"; \ echo "$(call gamma-path,altsrc,os_cpu/$(OS_FAMILY)_$(SRCARCH)/vm) \\"; \ echo "$(call gamma-path,commonsrc,os_cpu/$(OS_FAMILY)_$(SRCARCH)/vm) \\"; \ + echo "$(call gamma-path,altsrc,os_gpu/$(OS_FAMILY)_ptx/vm) \\"; \ + echo "$(call gamma-path,commonsrc,os_gpu/$(OS_FAMILY)_ptx/vm) \\"; \ echo "$(call gamma-path,altsrc,os/$(OS_FAMILY)/vm) \\"; \ echo "$(call gamma-path,commonsrc,os/$(OS_FAMILY)/vm) \\"; \ echo "$(call gamma-path,commonsrc,os/posix/vm) \\"; \ diff -r d9fcc82766da -r d55f24eac4b1 make/linux/makefiles/vm.make --- a/make/linux/makefiles/vm.make Thu Jul 25 18:02:29 2013 -0700 +++ b/make/linux/makefiles/vm.make Thu Jul 25 22:15:30 2013 -0400 @@ -151,7 +151,8 @@ SOURCE_PATHS+=$(HS_COMMON_SRC)/os/posix/vm SOURCE_PATHS+=$(HS_COMMON_SRC)/cpu/$(Platform_arch)/vm SOURCE_PATHS+=$(HS_COMMON_SRC)/os_cpu/$(Platform_os_arch)/vm -SOURCE_PATHS+=$(HS_COMMON_SRC)/gpu/ptx +SOURCE_PATHS+=$(HS_COMMON_SRC)/gpu/ptx/vm +SOURCE_PATHS+=$(HS_COMMON_SRC)/os_gpu/linux_ptx/vm CORE_PATHS=$(foreach path,$(SOURCE_PATHS),$(call altsrc,$(path)) $(path)) CORE_PATHS+=$(GENERATED)/jvmtifiles $(GENERATED)/tracefiles @@ -174,9 +175,9 @@ SHARK_PATHS := $(GAMMADIR)/src/share/vm/shark GRAAL_PATHS += $(call altsrc,$(HS_COMMON_SRC)/share/vm/graal) -GRAAL_PATHS += $(call altsrc,$(HS_COMMON_SRC)/gpu/ptx) +GRAAL_PATHS += $(call altsrc,$(HS_COMMON_SRC)/gpu/ptx/vm) GRAAL_PATHS += $(HS_COMMON_SRC)/share/vm/graal -GRAAL_PATHS += $(HS_COMMON_SRC)/gpu/ptx +GRAAL_PATHS += $(HS_COMMON_SRC)/gpu/ptx/vm # Include dirs per type. Src_Dirs/CORE := $(CORE_PATHS) diff -r d9fcc82766da -r d55f24eac4b1 src/gpu/ptx/gpu_ptx.cpp --- a/src/gpu/ptx/gpu_ptx.cpp Thu Jul 25 18:02:29 2013 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,203 +0,0 @@ -/* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "precompiled.hpp" -#include "runtime/gpu.hpp" -#include "utilities/globalDefinitions.hpp" -#include "utilities/ostream.hpp" - -void * gpu::Ptx::_device_context; - -gpu::Ptx::cuda_cu_init_func_t gpu::Ptx::_cuda_cu_init; -gpu::Ptx::cuda_cu_ctx_create_func_t gpu::Ptx::_cuda_cu_ctx_create; -gpu::Ptx::cuda_cu_ctx_detach_func_t gpu::Ptx::_cuda_cu_ctx_detach; -gpu::Ptx::cuda_cu_ctx_synchronize_func_t gpu::Ptx::_cuda_cu_ctx_synchronize; -gpu::Ptx::cuda_cu_device_get_count_func_t gpu::Ptx::_cuda_cu_device_get_count; -gpu::Ptx::cuda_cu_device_get_name_func_t gpu::Ptx::_cuda_cu_device_get_name; -gpu::Ptx::cuda_cu_device_get_func_t gpu::Ptx::_cuda_cu_device_get; -gpu::Ptx::cuda_cu_device_compute_capability_func_t gpu::Ptx::_cuda_cu_device_compute_capability; -gpu::Ptx::cuda_cu_launch_kernel_func_t gpu::Ptx::_cuda_cu_launch_kernel; -gpu::Ptx::cuda_cu_module_get_function_func_t gpu::Ptx::_cuda_cu_module_get_function; -gpu::Ptx::cuda_cu_module_load_data_ex_func_t gpu::Ptx::_cuda_cu_module_load_data_ex; - -void gpu::probe_linkage() { -#ifdef __APPLE__ - set_gpu_linkage(gpu::Ptx::probe_linkage_apple()); -#else - set_gpu_linkage(false); -#endif -} - -void gpu::initialize_gpu() { - if (gpu::has_gpu_linkage()) { - set_initialized(gpu::Ptx::initialize_gpu()); - } -} - -void * gpu::generate_kernel(unsigned char *code, int code_len, const char *name) { - if (gpu::has_gpu_linkage()) { - return (gpu::Ptx::generate_kernel(code, code_len, name)); - } else { - return NULL; - } -} - -bool gpu::execute_kernel(address kernel) { - if (gpu::has_gpu_linkage()) { - return (gpu::Ptx::execute_kernel(kernel)); - } else { - return false; - } -} - -#define __CUDA_API_VERSION 5000 - -bool gpu::Ptx::initialize_gpu() { - int status = _cuda_cu_init(0, __CUDA_API_VERSION); - if (TraceWarpLoading) { - tty->print_cr("gpu_ptx::_cuda_cu_init: %d", status); - } - - int device_count = 0; - status = _cuda_cu_device_get_count(&device_count); - if (TraceWarpLoading) { - tty->print_cr("gpu_ptx::_cuda_cu_device_get_count(%d): %d", device_count, status); - } - - int device_id = 0, cu_device = 0; - status = _cuda_cu_device_get(&cu_device, device_id); - if (TraceWarpLoading) { - tty->print_cr("gpu_ptx::_cuda_cu_device_get(%d): %d", cu_device, status); - } - - int major, minor; - status = _cuda_cu_device_compute_capability(&major, &minor, cu_device); - if (TraceWarpLoading) { - tty->print_cr("gpu_ptx::_cuda_cu_device_compute_capability(major %d, minor %d): %d", - major, minor, status); - } - - char device_name[256]; - status = _cuda_cu_device_get_name(device_name, 256, cu_device); - if (TraceWarpLoading) { - tty->print_cr("gpu_ptx::_cuda_cu_device_get_name(%s): %d", device_name, status); - } - - status = _cuda_cu_ctx_create(&_device_context, 0, cu_device); - if (TraceWarpLoading) { - tty->print_cr("gpu_ptx::_cuda_cu_ctx_create(%x): %d", _device_context, status); - } - - return status == 0; // CUDA_SUCCESS -} - -void *gpu::Ptx::generate_kernel(unsigned char *code, int code_len, const char *name) { - - void *cu_module; - const unsigned int jit_num_options = 3; - int *jit_options = new int[jit_num_options]; - void **jit_option_values = new void *[jit_num_options]; - - jit_options[0] = 4; // CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES - int jit_log_buffer_size = 1024; - jit_option_values[0] = (void *)(size_t)jit_log_buffer_size; - - jit_options[1] = 3; // CU_JIT_INFO_LOG_BUFFER - char *jit_log_buffer = new char[jit_log_buffer_size]; - jit_option_values[1] = jit_log_buffer; - - jit_options[2] = 0; // CU_JIT_MAX_REGISTERS - int jit_register_count = 32; - jit_option_values[2] = (void *)(size_t)jit_register_count; - - int status = _cuda_cu_module_load_data_ex(&cu_module, code, - jit_num_options, jit_options, (void **)jit_option_values); - if (TraceWarpLoading) { - tty->print_cr("gpu_ptx::_cuda_cu_module_load_data_ex(%x): %d", cu_module, status); - tty->print_cr("gpu_ptx::jit_log_buffer\n%s", jit_log_buffer); - } - - void *cu_function; - - status = _cuda_cu_module_get_function(&cu_function, cu_module, name); - if (TraceWarpLoading) { - tty->print_cr("gpu_ptx::_cuda_cu_module_get_function(%s):%x %d", name, cu_function, status); - } - return cu_function; -} - -bool gpu::Ptx::execute_kernel(address kernel) { - // grid dimensionality - unsigned int gridX = 1; - unsigned int gridY = 1; - unsigned int gridZ = 1; - - // thread dimensionality - unsigned int blockX = 1; - unsigned int blockY = 1; - unsigned int blockZ = 1; - - int *cu_function = (int *)kernel; - - int status = _cuda_cu_launch_kernel(cu_function, - gridX, gridY, gridZ, - blockX, blockY, blockZ, - 0, NULL, NULL, NULL); - if (TraceWarpLoading) { - tty->print_cr("gpu_ptx::_cuda_cu_launch_kernel(%x): %d", kernel, status); - } - return status == 0; // CUDA_SUCCESS -} - -#ifdef __APPLE__ -bool gpu::Ptx::probe_linkage_apple() { - void *handle = dlopen("/usr/local/cuda/lib/libcuda.dylib", RTLD_LAZY); - if (handle != NULL) { - _cuda_cu_init = - CAST_TO_FN_PTR(cuda_cu_init_func_t, dlsym(handle, "cuInit")); - _cuda_cu_ctx_create = - CAST_TO_FN_PTR(cuda_cu_ctx_create_func_t, dlsym(handle, "cuCtxCreate")); - _cuda_cu_ctx_detach = - CAST_TO_FN_PTR(cuda_cu_ctx_detach_func_t, dlsym(handle, "cuCtxDetach")); - _cuda_cu_ctx_synchronize = - CAST_TO_FN_PTR(cuda_cu_ctx_synchronize_func_t, dlsym(handle, "cuCtxSynchronize")); - _cuda_cu_device_get_count = - CAST_TO_FN_PTR(cuda_cu_device_get_count_func_t, dlsym(handle, "cuDeviceGetCount")); - _cuda_cu_device_get_name = - CAST_TO_FN_PTR(cuda_cu_device_get_name_func_t, dlsym(handle, "cuDeviceGetName")); - _cuda_cu_device_get = - CAST_TO_FN_PTR(cuda_cu_device_get_func_t, dlsym(handle, "cuDeviceGet")); - _cuda_cu_device_compute_capability = - CAST_TO_FN_PTR(cuda_cu_device_compute_capability_func_t, dlsym(handle, "cuDeviceComputeCapability")); - _cuda_cu_module_get_function = - CAST_TO_FN_PTR(cuda_cu_module_get_function_func_t, dlsym(handle, "cuModuleGetFunction")); - _cuda_cu_module_load_data_ex = - CAST_TO_FN_PTR(cuda_cu_module_load_data_ex_func_t, dlsym(handle, "cuModuleLoadDataEx")); - _cuda_cu_launch_kernel = - CAST_TO_FN_PTR(cuda_cu_launch_kernel_func_t, dlsym(handle, "cuLaunchKernel")); - return true; - } - return false; -} -#endif \ No newline at end of file diff -r d9fcc82766da -r d55f24eac4b1 src/gpu/ptx/gpu_ptx.hpp --- a/src/gpu/ptx/gpu_ptx.hpp Thu Jul 25 18:02:29 2013 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#ifndef GPU_PTX_HPP -#define GPU_PTX_HPP - -class Ptx { - friend class gpu; - - protected: - static void probe_linkage(); -#ifdef __APPLE__ - static bool probe_linkage_apple(); -#endif - static bool initialize_gpu(); - static void * generate_kernel(unsigned char *code, int code_len, const char *name); - static bool execute_kernel(address kernel); - -private: - typedef int (*cuda_cu_init_func_t)(unsigned int, int); - typedef int (*cuda_cu_ctx_create_func_t)(void *, int, int); - typedef int (*cuda_cu_ctx_detach_func_t)(int *); - typedef int (*cuda_cu_ctx_synchronize_func_t)(int *); - typedef int (*cuda_cu_device_get_count_func_t)(int *); - typedef int (*cuda_cu_device_get_name_func_t)(char *, int, int); - typedef int (*cuda_cu_device_get_func_t)(int *, int); - typedef int (*cuda_cu_device_compute_capability_func_t)(int *, int *, int); - typedef int (*cuda_cu_launch_kernel_func_t)(void *, - unsigned int, unsigned int, unsigned int, - unsigned int, unsigned int, unsigned int, - unsigned int, void *, void **, void **); - typedef int (*cuda_cu_module_get_function_func_t)(void *, void *, const char *); - typedef int (*cuda_cu_module_load_data_ex_func_t)(void *, void *, unsigned int, int *, void **); - - static cuda_cu_init_func_t _cuda_cu_init; - static cuda_cu_ctx_create_func_t _cuda_cu_ctx_create; - static cuda_cu_ctx_detach_func_t _cuda_cu_ctx_detach; - static cuda_cu_ctx_synchronize_func_t _cuda_cu_ctx_synchronize; - static cuda_cu_device_get_count_func_t _cuda_cu_device_get_count; - static cuda_cu_device_get_name_func_t _cuda_cu_device_get_name; - static cuda_cu_device_get_func_t _cuda_cu_device_get; - static cuda_cu_device_compute_capability_func_t _cuda_cu_device_compute_capability; - static cuda_cu_launch_kernel_func_t _cuda_cu_launch_kernel; - static cuda_cu_module_get_function_func_t _cuda_cu_module_get_function; - static cuda_cu_module_load_data_ex_func_t _cuda_cu_module_load_data_ex; - -protected: - static void * _device_context; -}; - -#endif // GPU_PTX_HPP diff -r d9fcc82766da -r d55f24eac4b1 src/gpu/ptx/vm/gpu_ptx.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/gpu/ptx/vm/gpu_ptx.cpp Thu Jul 25 22:15:30 2013 -0400 @@ -0,0 +1,316 @@ +/* + * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "runtime/gpu.hpp" +#include "utilities/globalDefinitions.hpp" +#include "utilities/ostream.hpp" + +void * gpu::Ptx::_device_context; + +gpu::Ptx::cuda_cu_init_func_t gpu::Ptx::_cuda_cu_init; +gpu::Ptx::cuda_cu_ctx_create_func_t gpu::Ptx::_cuda_cu_ctx_create; +gpu::Ptx::cuda_cu_ctx_detach_func_t gpu::Ptx::_cuda_cu_ctx_detach; +gpu::Ptx::cuda_cu_ctx_synchronize_func_t gpu::Ptx::_cuda_cu_ctx_synchronize; +gpu::Ptx::cuda_cu_device_get_count_func_t gpu::Ptx::_cuda_cu_device_get_count; +gpu::Ptx::cuda_cu_device_get_name_func_t gpu::Ptx::_cuda_cu_device_get_name; +gpu::Ptx::cuda_cu_device_get_func_t gpu::Ptx::_cuda_cu_device_get; +gpu::Ptx::cuda_cu_device_compute_capability_func_t gpu::Ptx::_cuda_cu_device_compute_capability; +gpu::Ptx::cuda_cu_device_get_attribute_func_t gpu::Ptx::_cuda_cu_device_get_attribute; +gpu::Ptx::cuda_cu_launch_kernel_func_t gpu::Ptx::_cuda_cu_launch_kernel; +gpu::Ptx::cuda_cu_module_get_function_func_t gpu::Ptx::_cuda_cu_module_get_function; +gpu::Ptx::cuda_cu_module_load_data_ex_func_t gpu::Ptx::_cuda_cu_module_load_data_ex; + +void gpu::probe_linkage() { +#if defined(__APPLE__) || defined(LINUX) + set_gpu_linkage(gpu::Ptx::probe_linkage()); +#else + set_gpu_linkage(false); +#endif +} + +void gpu::initialize_gpu() { + if (gpu::has_gpu_linkage()) { + set_initialized(gpu::Ptx::initialize_gpu()); + } +} + +void * gpu::generate_kernel(unsigned char *code, int code_len, const char *name) { + if (gpu::has_gpu_linkage()) { + return (gpu::Ptx::generate_kernel(code, code_len, name)); + } else { + return NULL; + } +} + +bool gpu::execute_kernel(address kernel) { + if (gpu::has_gpu_linkage()) { + return (gpu::Ptx::execute_kernel(kernel)); + } else { + return false; + } +} + +bool gpu::Ptx::initialize_gpu() { + + /* Initialize CUDA driver API */ + int status = _cuda_cu_init(0); + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("Failed to initialize CUDA device"); + return false; + } + + if (TraceGPUInteraction) { + tty->print_cr("CUDA driver initialization: Success"); + } + + /* Get the number of compute-capable device count */ + int device_count = 0; + status = _cuda_cu_device_get_count(&device_count); + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] Failed to get compute-capable device count"); + return false; + } + + if (device_count == 0) { + tty->print_cr("[CUDA] Found no device supporting CUDA"); + return false; + } + + if (TraceGPUInteraction) { + tty->print_cr("[CUDA] Number of compute-capable devices found: %d", device_count); + } + + /* Get the handle to the first compute device */ + int device_id = 0; + /* Compute-capable device handle */ + int cu_device = 0; + status = _cuda_cu_device_get(&cu_device, device_id); + + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] Failed to get handle of first compute-capable device i.e., the one at ordinal: %d", device_id); + return false; + } + + if (TraceGPUInteraction) { + tty->print_cr("[CUDA] Got the handle of first compute-device"); + } + + /* Get device attributes */ + int minor, major; + status = _cuda_cu_device_get_attribute(&minor, GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cu_device); + + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] Failed to get minor attribute of device: %d", cu_device); + return false; + } + + status = _cuda_cu_device_get_attribute(&major, GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cu_device); + + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] Failed to get major attribute of device: %d", cu_device); + return false; + } + + if (TraceGPUInteraction) { + tty->print_cr("[CUDA] Compatibility version of device %d: %d.%d", cu_device, major, minor); + } + + /* Get device name */ + char device_name[256]; + status = _cuda_cu_device_get_name(device_name, 256, cu_device); + + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] Failed to get name of device: %d", cu_device); + return false; + } + + if (TraceGPUInteraction) { + tty->print_cr("[CUDA] Using %s", device_name); + } + + /* Create CUDA context */ + status = _cuda_cu_ctx_create(&_device_context, 0, cu_device); + + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] Failed to create CUDA context for device: %d", cu_device); + return false; + } + + if (TraceGPUInteraction) { + tty->print_cr("[CUDA] Success: Created context for device: %d", cu_device); + } + + return true; +} + +void *gpu::Ptx::generate_kernel(unsigned char *code, int code_len, const char *name) { + + struct CUmod_st * cu_module; + // Use three JIT compiler options + const unsigned int jit_num_options = 3; + int *jit_options = NEW_C_HEAP_ARRAY(int, jit_num_options, mtCompiler); + void **jit_option_values = NEW_C_HEAP_ARRAY(void *, jit_num_options, mtCompiler); + + // Set up PTX JIT compiler options + // 1. set size of compilation log buffer + int jit_log_buffer_size = 1024; + jit_options[0] = GRAAL_CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES; + jit_option_values[0] = (void *)(size_t)jit_log_buffer_size; + + // 2. set pointer to compilation log buffer + char *jit_log_buffer = NEW_C_HEAP_ARRAY(char, jit_log_buffer_size, mtCompiler); + jit_options[1] = GRAAL_CU_JIT_INFO_LOG_BUFFER; + jit_option_values[1] = jit_log_buffer; + + // 3. set pointer to set the Maximum # of registers (32) for the kernel + int jit_register_count = 32; + jit_options[2] = GRAAL_CU_JIT_MAX_REGISTERS; + jit_option_values[2] = (void *)(size_t)jit_register_count; + + if (TraceGPUInteraction) { + tty->print_cr("[CUDA] PTX Kernel\n%s", code); + tty->print_cr("[CUDA] Function name : %s", name); + + } + + /* Load module's data with compiler options */ + int status = _cuda_cu_module_load_data_ex(&cu_module, code, jit_num_options, + jit_options, (void **)jit_option_values); + if (status != GRAAL_CUDA_SUCCESS) { + if (status == GRAAL_CUDA_ERROR_NO_BINARY_FOR_GPU) { + tty->print_cr("[CUDA] Check for malformed PTX kernel or incorrect PTX compilation options"); + } + tty->print_cr("[CUDA] *** Error (%d) Failed to load module data with online compiler options for method %s", + status, name); + return NULL; + } + + if (TraceGPUInteraction) { + tty->print_cr("[CUDA] Loaded data for PTX Kernel"); + } + + struct CUfunc_st * cu_function; + + status = _cuda_cu_module_get_function(&cu_function, cu_module, name); + + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] Failed to get function %s", name); + return NULL; + } + + if (TraceGPUInteraction) { + tty->print_cr("[CUDA] Got function handle for %s", name); + } + return cu_function; +} + +bool gpu::Ptx::execute_kernel(address kernel) { + // grid dimensionality + unsigned int gridX = 1; + unsigned int gridY = 1; + unsigned int gridZ = 1; + + // thread dimensionality + unsigned int blockX = 1; + unsigned int blockY = 1; + unsigned int blockZ = 1; + + int *cu_function = (int *)kernel; + + if (kernel == NULL) { + return false; + } + + if (TraceGPUInteraction) { + tty->print_cr("[CUDA] launching kernel"); + } + int status = _cuda_cu_launch_kernel(cu_function, + gridX, gridY, gridZ, + blockX, blockY, blockZ, + 0, NULL, NULL, NULL); + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] Failed to launch kernel"); + return false; + } + + if (TraceGPUInteraction) { + tty->print_cr("[CUDA] Success: Kernel Launch"); + } + return status == 0; // GRAAL_CUDA_SUCCESS +} + +#if defined(LINUX) +static const char cuda_library_name[] = "libcuda.so"; +#elif defined(__APPLE__) +static char const cuda_library_name[] = "/usr/local/cuda/lib/libcuda.dylib"; +#else +static char const cuda_library_name[] = ""; +#endif + +bool gpu::Ptx::probe_linkage() { + if (cuda_library_name != NULL) { + void *handle = dlopen(cuda_library_name, RTLD_LAZY); + if (handle != NULL) { + _cuda_cu_init = + CAST_TO_FN_PTR(cuda_cu_init_func_t, dlsym(handle, "cuInit")); + _cuda_cu_ctx_create = + CAST_TO_FN_PTR(cuda_cu_ctx_create_func_t, dlsym(handle, "cuCtxCreate")); + _cuda_cu_ctx_detach = + CAST_TO_FN_PTR(cuda_cu_ctx_detach_func_t, dlsym(handle, "cuCtxDetach")); + _cuda_cu_ctx_synchronize = + CAST_TO_FN_PTR(cuda_cu_ctx_synchronize_func_t, dlsym(handle, "cuCtxSynchronize")); + _cuda_cu_device_get_count = + CAST_TO_FN_PTR(cuda_cu_device_get_count_func_t, dlsym(handle, "cuDeviceGetCount")); + _cuda_cu_device_get_name = + CAST_TO_FN_PTR(cuda_cu_device_get_name_func_t, dlsym(handle, "cuDeviceGetName")); + _cuda_cu_device_get = + CAST_TO_FN_PTR(cuda_cu_device_get_func_t, dlsym(handle, "cuDeviceGet")); + _cuda_cu_device_compute_capability = + CAST_TO_FN_PTR(cuda_cu_device_compute_capability_func_t, dlsym(handle, "cuDeviceComputeCapability")); + _cuda_cu_device_get_attribute = + CAST_TO_FN_PTR(cuda_cu_device_get_attribute_func_t, dlsym(handle, "cuDeviceGetAttribute")); + _cuda_cu_module_get_function = + CAST_TO_FN_PTR(cuda_cu_module_get_function_func_t, dlsym(handle, "cuModuleGetFunction")); + _cuda_cu_module_load_data_ex = + CAST_TO_FN_PTR(cuda_cu_module_load_data_ex_func_t, dlsym(handle, "cuModuleLoadDataEx")); + _cuda_cu_launch_kernel = + CAST_TO_FN_PTR(cuda_cu_launch_kernel_func_t, dlsym(handle, "cuLaunchKernel")); + if (TraceGPUInteraction) { + tty->print_cr("[CUDA] Success: library linkage"); + } + return true; + } else { + // Unable to dlopen libcuda + tty->print_cr("Use LD_LIBRARY_PATH (or other means) to specify installed location of CUDA library"); + return false; + } + } else { + tty->print_cr("Unsupported CUDA platform"); + return false; + } + tty->print_cr("Failed to find CUDA linkage"); + return false; +} + diff -r d9fcc82766da -r d55f24eac4b1 src/gpu/ptx/vm/gpu_ptx.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/gpu/ptx/vm/gpu_ptx.hpp Thu Jul 25 22:15:30 2013 -0400 @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef GPU_PTX_HPP +#define GPU_PTX_HPP + +/* + * Some useful macro definitions from publicly available cuda.h. + * These definitions are for convenience. + */ +#define GRAAL_CUDA_SUCCESS 0 +#define GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR 75 +#define GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR 76 +#define GRAAL_CU_JIT_MAX_REGISTERS 0 +#define GRAAL_CU_JIT_THREADS_PER_BLOCK 1 +#define GRAAL_CU_JIT_INFO_LOG_BUFFER 3 +#define GRAAL_CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES 4 +#define GRAAL_CUDA_ERROR_NO_BINARY_FOR_GPU 209 + +class Ptx { + friend class gpu; + + protected: + static bool probe_linkage(); + static bool initialize_gpu(); + static void * generate_kernel(unsigned char *code, int code_len, const char *name); + static bool execute_kernel(address kernel); + +private: + typedef int (*cuda_cu_init_func_t)(unsigned int); + typedef int (*cuda_cu_ctx_create_func_t)(void *, int, int); + typedef int (*cuda_cu_ctx_detach_func_t)(int *); + typedef int (*cuda_cu_ctx_synchronize_func_t)(int *); + typedef int (*cuda_cu_device_get_count_func_t)(int *); + typedef int (*cuda_cu_device_get_name_func_t)(char *, int, int); + typedef int (*cuda_cu_device_get_func_t)(int *, int); + typedef int (*cuda_cu_device_compute_capability_func_t)(int *, int *, int); + typedef int (*cuda_cu_device_get_attribute_func_t)(int *, int, int); + typedef int (*cuda_cu_launch_kernel_func_t)(void *, + unsigned int, unsigned int, unsigned int, + unsigned int, unsigned int, unsigned int, + unsigned int, void *, void **, void **); + typedef int (*cuda_cu_module_get_function_func_t)(void *, void *, const char *); + typedef int (*cuda_cu_module_load_data_ex_func_t)(void *, void *, unsigned int, void *, void **); + + static cuda_cu_init_func_t _cuda_cu_init; + static cuda_cu_ctx_create_func_t _cuda_cu_ctx_create; + static cuda_cu_ctx_detach_func_t _cuda_cu_ctx_detach; + static cuda_cu_ctx_synchronize_func_t _cuda_cu_ctx_synchronize; + static cuda_cu_device_get_count_func_t _cuda_cu_device_get_count; + static cuda_cu_device_get_name_func_t _cuda_cu_device_get_name; + static cuda_cu_device_get_func_t _cuda_cu_device_get; + static cuda_cu_device_compute_capability_func_t _cuda_cu_device_compute_capability; /* Deprecated as of CUDA 5.0 */ + static cuda_cu_device_get_attribute_func_t _cuda_cu_device_get_attribute; + static cuda_cu_launch_kernel_func_t _cuda_cu_launch_kernel; + static cuda_cu_module_get_function_func_t _cuda_cu_module_get_function; + static cuda_cu_module_load_data_ex_func_t _cuda_cu_module_load_data_ex; + +protected: + static void * _device_context; +}; +#endif // GPU_PTX_HPP diff -r d9fcc82766da -r d55f24eac4b1 src/os/bsd/vm/gpu_bsd.cpp --- a/src/os/bsd/vm/gpu_bsd.cpp Thu Jul 25 18:02:29 2013 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "runtime/gpu.hpp" -#include "utilities/ostream.hpp" - -#ifdef __APPLE__ -#include -#include -#endif - -void gpu::probe_gpu() { -#ifdef __APPLE__ - set_available(gpu::Bsd::probe_gpu_apple()); - if (TraceWarpLoading) { - tty->print_cr("gpu_bsd::probe_gpu(APPLE): %d", gpu::is_available()); - } -#else - if (TraceWarpLoading) { - tty->print_cr("gpu_bsd::probe_gpu(not APPLE)"); - } - set_available(false); -#endif -} - -#ifdef __APPLE__ -/* - * This is rudimentary at best, but until we decide on a CUDA Compiler Compatibility - * level, this will have to suffice. - */ -bool gpu::Bsd::probe_gpu_apple() { - CGError err = CGDisplayNoErr; - CGDisplayCount displayCount = 0; - CFDataRef vendorID, deviceID, model; - CGDirectDisplayID *displays; - IOOptionBits options = kIORegistryIterateRecursively | kIORegistryIterateParents; - io_registry_entry_t displayPort; - - err = CGGetActiveDisplayList(0, NULL, &displayCount); - displays = (CGDirectDisplayID *)calloc((size_t)displayCount, sizeof(CGDirectDisplayID)); - err = CGGetActiveDisplayList(displayCount, displays, &displayCount); - - for (CGDisplayCount i = 0; i < displayCount; i++) { - displayPort = CGDisplayIOServicePort(displays[i]); - vendorID = (CFDataRef)IORegistryEntrySearchCFProperty(displayPort, kIOServicePlane, CFSTR("vendor-id"), - kCFAllocatorDefault, options); - deviceID = (CFDataRef)IORegistryEntrySearchCFProperty(displayPort, kIOServicePlane, CFSTR("device-id"), - kCFAllocatorDefault, options); - model = (CFDataRef)IORegistryEntrySearchCFProperty(displayPort, kIOServicePlane, CFSTR("model"), - kCFAllocatorDefault, options); - if (TraceWarpLoading) { - tty->print_cr("vendor: 0x%08X", *((UInt32*)CFDataGetBytePtr(vendorID))); - tty->print_cr("device: 0x%08X", *((UInt32*)CFDataGetBytePtr(deviceID))); - tty->print_cr("model: %s", CFDataGetBytePtr(model)); - } - UInt32 vendor = *((UInt32*)CFDataGetBytePtr(vendorID)); - if (vendor != 0x10DE) { - return false; - } else { - /* - * see https://developer.nvidia.com/cuda-gpus - * see http://en.wikipedia.org/wiki/CUDA#Supported_GPUs - * see http://www.pcidatabase.com/reports.php?type=csv - * - * Only supporting GK104, GK106, GK107 and GK110 GPUs for now, - * which is CUDA Computer Capability 3.0 and greater. - */ - switch (*((UInt32*)CFDataGetBytePtr(deviceID))) { - case 0x11C0: - return true; // NVIDIA GeForce GTX 660 - default: - return false; - } - } - } - return false; -} -#endif diff -r d9fcc82766da -r d55f24eac4b1 src/os/bsd/vm/gpu_bsd.hpp --- a/src/os/bsd/vm/gpu_bsd.hpp Thu Jul 25 18:02:29 2013 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#ifndef OS_BSD_VM_GPU_BSD_HPP -#define OS_BSD_VM_GPU_BSD_HPP - - -class Bsd { - friend class gpu; - - protected: - static bool probe_gpu(); -#ifdef __APPLE__ - static bool probe_gpu_apple(); -#endif -}; - -#endif // OS_BSD_VM_GPU_BSD_HPP diff -r d9fcc82766da -r d55f24eac4b1 src/os_gpu/bsd_ptx/vm/gpu_bsd.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/os_gpu/bsd_ptx/vm/gpu_bsd.cpp Thu Jul 25 22:15:30 2013 -0400 @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "runtime/gpu.hpp" +#include "utilities/ostream.hpp" + +#ifdef __APPLE__ +#include +#include +#endif + +void gpu::probe_gpu() { +#ifdef __APPLE__ + set_available(gpu::Bsd::probe_gpu_apple()); + if (TraceGPUInteraction) { + tty->print_cr("gpu_bsd::probe_gpu(APPLE): %d", gpu::is_available()); + } +#else + if (TraceGPUInteraction) { + tty->print_cr("gpu_bsd::probe_gpu(not APPLE)"); + } + set_available(false); +#endif +} + +#ifdef __APPLE__ +/* + * This is rudimentary at best, but until we decide on a CUDA Compiler Compatibility + * level, this will have to suffice. + */ +bool gpu::Bsd::probe_gpu_apple() { + CGError err = CGDisplayNoErr; + CGDisplayCount displayCount = 0; + CFDataRef vendorID, deviceID, model; + CGDirectDisplayID *displays; + IOOptionBits options = kIORegistryIterateRecursively | kIORegistryIterateParents; + io_registry_entry_t displayPort; + + err = CGGetActiveDisplayList(0, NULL, &displayCount); + displays = (CGDirectDisplayID *)calloc((size_t)displayCount, sizeof(CGDirectDisplayID)); + err = CGGetActiveDisplayList(displayCount, displays, &displayCount); + + for (CGDisplayCount i = 0; i < displayCount; i++) { + displayPort = CGDisplayIOServicePort(displays[i]); + vendorID = (CFDataRef)IORegistryEntrySearchCFProperty(displayPort, kIOServicePlane, CFSTR("vendor-id"), + kCFAllocatorDefault, options); + deviceID = (CFDataRef)IORegistryEntrySearchCFProperty(displayPort, kIOServicePlane, CFSTR("device-id"), + kCFAllocatorDefault, options); + model = (CFDataRef)IORegistryEntrySearchCFProperty(displayPort, kIOServicePlane, CFSTR("model"), + kCFAllocatorDefault, options); + if (TraceGPUInteraction) { + tty->print_cr("vendor: 0x%08X", *((UInt32*)CFDataGetBytePtr(vendorID))); + tty->print_cr("device: 0x%08X", *((UInt32*)CFDataGetBytePtr(deviceID))); + tty->print_cr("model: %s", CFDataGetBytePtr(model)); + } + UInt32 vendor = *((UInt32*)CFDataGetBytePtr(vendorID)); + if (vendor != 0x10DE) { + return false; + } else { + /* + * see https://developer.nvidia.com/cuda-gpus + * see http://en.wikipedia.org/wiki/CUDA#Supported_GPUs + * see http://www.pcidatabase.com/reports.php?type=csv + * + * Only supporting GK104, GK106, GK107 and GK110 GPUs for now, + * which is CUDA Computer Capability 3.0 and greater. + */ + switch (*((UInt32*)CFDataGetBytePtr(deviceID))) { + case 0x11C0: + return true; // NVIDIA GeForce GTX 660 + default: + return false; + } + } + } + return false; +} +#endif diff -r d9fcc82766da -r d55f24eac4b1 src/os_gpu/bsd_ptx/vm/gpu_bsd.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/os_gpu/bsd_ptx/vm/gpu_bsd.hpp Thu Jul 25 22:15:30 2013 -0400 @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_BSD_VM_GPU_BSD_HPP +#define OS_BSD_VM_GPU_BSD_HPP + + +class Bsd { + friend class gpu; + + protected: + static bool probe_gpu(); +#ifdef __APPLE__ + static bool probe_gpu_apple(); +#endif +}; + +#endif // OS_BSD_VM_GPU_BSD_HPP diff -r d9fcc82766da -r d55f24eac4b1 src/os_gpu/linux_ptx/vm/gpu_linux.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/os_gpu/linux_ptx/vm/gpu_linux.cpp Thu Jul 25 22:15:30 2013 -0400 @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "runtime/gpu.hpp" +#include "utilities/ostream.hpp" + +void gpu::probe_gpu() { + set_available(gpu::Linux::probe_gpu()); + if (TraceGPUInteraction) { + tty->print_cr("gpu_linux::probe_gpu(): %d", gpu::is_available()); + } +} + +/* + * Probe for CUDA device on PCI bus using /proc/bus/pci/devices. Do + * not rely on CUDA tool kit being installed. We will check if CUDA + * library is installed later. + */ + +static unsigned int nvidia_vendor_id = 0x10de; +static unsigned int nvidia_gk110_dev_id = 0x1005; + +bool gpu::Linux::probe_gpu() { + /* + Open /proc/bus/pci/devices to look for the first CUDA enabled + device. For now, finding the first CUDA device. Will need to + revisit this wo support execution on multiple CUDA devices if + they exist. + */ + FILE *pci_devices = fopen("/proc/bus/pci/devices", "r"); + char contents[4096]; + unsigned int bus_num_devfn_ign; + unsigned int vendor; + unsigned int device; + bool cuda_device_exists = false; + if (pci_devices == NULL) { + tty->print_cr("*** Failed to open /proc/bus/pci/devices"); + return cuda_device_exists; + } + + while (fgets(contents, sizeof(contents)-1, pci_devices)) { + sscanf(contents, "%04x%04x%04x", &bus_num_devfn_ign, &vendor, &device); + /* Break after finding the first CUDA device. */ + if ((vendor == nvidia_vendor_id) && (device = nvidia_gk110_dev_id)) { + cuda_device_exists = true; + if (TraceGPUInteraction) { + tty->print_cr("Found supported nVidia CUDA device vendor : 0x%04x device 0x%04x", vendor, device); + } + break; + } + } + + // Close file pointer. + fclose(pci_devices); + + return cuda_device_exists; +} diff -r d9fcc82766da -r d55f24eac4b1 src/os_gpu/linux_ptx/vm/gpu_linux.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/os_gpu/linux_ptx/vm/gpu_linux.hpp Thu Jul 25 22:15:30 2013 -0400 @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_BSD_VM_GPU_LINUX_HPP +#define OS_BSD_VM_GPU_LINUX_HPP + + +class Linux { + friend class gpu; + + protected: + static bool probe_gpu(); +}; + +#endif // OS_BSD_VM_GPU_LINUX_HPP diff -r d9fcc82766da -r d55f24eac4b1 src/share/vm/code/nmethod.cpp --- a/src/share/vm/code/nmethod.cpp Thu Jul 25 18:02:29 2013 -0700 +++ b/src/share/vm/code/nmethod.cpp Thu Jul 25 22:15:30 2013 -0400 @@ -950,7 +950,7 @@ // we use the information of entry points to find out if a method is // static or non static - assert(compiler->is_c2() || + assert(compiler->is_c2() || compiler->is_graal() || _method->is_static() == (entry_point() == _verified_entry_point), " entry points must be same for static methods and vice versa"); } diff -r d9fcc82766da -r d55f24eac4b1 src/share/vm/graal/graalCompilerToGPU.cpp --- a/src/share/vm/graal/graalCompilerToGPU.cpp Thu Jul 25 18:02:29 2013 -0700 +++ b/src/share/vm/graal/graalCompilerToGPU.cpp Thu Jul 25 22:15:30 2013 -0400 @@ -47,7 +47,7 @@ #define C2V_END } -C2V_VMENTRY(jlong, generateKernel, (JNIEnv *env, jobject, jbyteArray code, jstring name)) +C2V_ENTRY(jlong, generateKernel, (JNIEnv *env, jobject, jbyteArray code, jstring name)) if (gpu::is_available() == false || gpu::has_gpu_linkage() == false && gpu::is_initialized()) { tty->print_cr("generateKernel - not available / no linkage / not initialized"); return 0; @@ -57,7 +57,12 @@ jint len = env->GetArrayLength(code); const char *namestr = env->GetStringUTFChars(name, &is_copy); void *kernel = gpu::generate_kernel((unsigned char *)bytes, len, namestr); - tty->print_cr("generateKernel: %x", kernel); + if (kernel == NULL) { + tty->print_cr("[CUDA] *** Error: Failed to compile kernel"); + } + else if (TraceGPUInteraction) { + tty->print_cr("[CUDA] Generated kernel"); + } env->ReleaseByteArrayElements(code, bytes, 0); env->ReleaseStringUTFChars(name, namestr); diff -r d9fcc82766da -r d55f24eac4b1 src/share/vm/graal/graalCompilerToVM.cpp --- a/src/share/vm/graal/graalCompilerToVM.cpp Thu Jul 25 18:02:29 2013 -0700 +++ b/src/share/vm/graal/graalCompilerToVM.cpp Thu Jul 25 22:15:30 2013 -0400 @@ -946,7 +946,7 @@ HotSpotInstalledCode::set_codeBlob(installed_code_handle, (jlong) cb); oop comp_result = HotSpotCompiledCode::comp(compiled_code_handle); if (comp_result->is_a(ExternalCompilationResult::klass())) { - if (TraceWarpLoading) { + if (TraceGPUInteraction) { tty->print_cr("installCode0: ExternalCompilationResult"); } HotSpotInstalledCode::set_start(installed_code_handle, ExternalCompilationResult::entryPoint(comp_result)); diff -r d9fcc82766da -r d55f24eac4b1 src/share/vm/runtime/globals.hpp --- a/src/share/vm/runtime/globals.hpp Thu Jul 25 18:02:29 2013 -0700 +++ b/src/share/vm/runtime/globals.hpp Thu Jul 25 22:15:30 2013 -0400 @@ -3709,7 +3709,7 @@ product(bool , AllowNonVirtualCalls, false, \ "Obey the ACC_SUPER flag and allow invokenonvirtual calls") \ \ - product(bool, TraceWarpLoading, false, \ + product(bool, TraceGPUInteraction, false, \ "Trace external GPU warp loading") \ \ diagnostic(ccstr, SharedArchiveFile, NULL, \ diff -r d9fcc82766da -r d55f24eac4b1 src/share/vm/runtime/gpu.cpp --- a/src/share/vm/runtime/gpu.cpp Thu Jul 25 18:02:29 2013 -0700 +++ b/src/share/vm/runtime/gpu.cpp Thu Jul 25 22:15:30 2013 -0400 @@ -24,17 +24,15 @@ #include "precompiled.hpp" #include "runtime/gpu.hpp" -#include "ptx/gpu_ptx.hpp" bool gpu::_available = false; // does the hardware exist? bool gpu::_gpu_linkage = false; // is the driver library to access the GPU installed -bool gpu::_initialized = false; // is the GPU defvice initialized +bool gpu::_initialized = false; // is the GPU device initialized void gpu::init() { -#ifdef TARGET_OS_FAMILY_bsd +#if defined(TARGET_OS_FAMILY_bsd) || defined(TARGET_OS_FAMILY_linux) gpu::probe_gpu(); #endif // need multi-gpu TARGET ifdef gpu::probe_linkage(); } - diff -r d9fcc82766da -r d55f24eac4b1 src/share/vm/runtime/gpu.hpp --- a/src/share/vm/runtime/gpu.hpp Thu Jul 25 18:02:29 2013 -0700 +++ b/src/share/vm/runtime/gpu.hpp Thu Jul 25 22:15:30 2013 -0400 @@ -70,6 +70,7 @@ // Platform dependent stuff #ifdef TARGET_OS_FAMILY_linux +# include "gpu_linux.hpp" #endif #ifdef TARGET_OS_FAMILY_solaris #endif @@ -79,7 +80,7 @@ # include "gpu_bsd.hpp" #endif -# include "ptx/gpu_ptx.hpp" +# include "ptx/vm/gpu_ptx.hpp" }; diff -r d9fcc82766da -r d55f24eac4b1 src/share/vm/runtime/thread.cpp --- a/src/share/vm/runtime/thread.cpp Thu Jul 25 18:02:29 2013 -0700 +++ b/src/share/vm/runtime/thread.cpp Thu Jul 25 22:15:30 2013 -0400 @@ -3312,7 +3312,8 @@ // Initialize the os module before using TLS os::init(); - // probe for warp capability + // Probe for existance of supported GPU and initialize it if one + // exists. gpu::init(); // Initialize system properties.