annotate src/gpu/ptx/vm/gpu_ptx.cpp @ 13212:eb03a7335eb0

Use fixed instead of virtual register for target in far foreign call, since the register allocator does not support virtual registers to be used at call sites.
author Christian Wimmer <christian.wimmer@oracle.com>
date Mon, 02 Dec 2013 14:20:32 -0800
parents 1a7e7011a341
children 220ed109bf77
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10879
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
1 /*
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
2 * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
4 *
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
5 * This code is free software; you can redistribute it and/or modify it
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
6 * under the terms of the GNU General Public License version 2 only, as
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
7 * published by the Free Software Foundation.
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
8 *
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
9 * This code is distributed in the hope that it will be useful, but WITHOUT
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
12 * version 2 for more details (a copy is included in the LICENSE file that
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
13 * accompanied this code).
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
14 *
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
15 * You should have received a copy of the GNU General Public License version
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
16 * 2 along with this work; if not, write to the Free Software Foundation,
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
18 *
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
20 * or visit www.oracle.com if you need additional information or have any
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
21 * questions.
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
22 *
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
23 */
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
24
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
25 #include "precompiled.hpp"
11284
d876002b98e6 ptx: fix include for non-precompiledheader build in gpu_ptx.cpp
Bernhard Urban <bernhard.urban@jku.at>
parents: 11283
diff changeset
26 #include "runtime/javaCalls.hpp"
10879
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
27 #include "runtime/gpu.hpp"
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
28 #include "utilities/globalDefinitions.hpp"
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
29 #include "utilities/ostream.hpp"
10883
5fcb30bcb90a Fix compilation without precompiled headers.
Roland Schatz <roland.schatz@oracle.com>
parents: 10882
diff changeset
30 #include "memory/allocation.hpp"
5fcb30bcb90a Fix compilation without precompiled headers.
Roland Schatz <roland.schatz@oracle.com>
parents: 10882
diff changeset
31 #include "memory/allocation.inline.hpp"
11596
91e5f927af63 Initial implementation of PTXRuntime (RegisterConfig, PTX description etc); guarded with new flag UseGPU. Specify -XX:+UseGPU to exercise this new implementation.
bharadwaj
parents: 11527
diff changeset
32 #include "ptxKernelArguments.hpp"
10879
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
33
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
34 void * gpu::Ptx::_device_context;
11485
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
35 int gpu::Ptx::_cu_device = 0;
10879
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
36
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
37 gpu::Ptx::cuda_cu_init_func_t gpu::Ptx::_cuda_cu_init;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
38 gpu::Ptx::cuda_cu_ctx_create_func_t gpu::Ptx::_cuda_cu_ctx_create;
11485
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
39 gpu::Ptx::cuda_cu_ctx_destroy_func_t gpu::Ptx::_cuda_cu_ctx_destroy;
10879
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
40 gpu::Ptx::cuda_cu_ctx_synchronize_func_t gpu::Ptx::_cuda_cu_ctx_synchronize;
11527
c99e65785936 Improvements to PTX codegen; allows more PTX tests that run on the device to pass.
bharadwaj
parents: 11485
diff changeset
41 gpu::Ptx::cuda_cu_ctx_set_current_func_t gpu::Ptx::_cuda_cu_ctx_set_current;
10879
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
42 gpu::Ptx::cuda_cu_device_get_count_func_t gpu::Ptx::_cuda_cu_device_get_count;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
43 gpu::Ptx::cuda_cu_device_get_name_func_t gpu::Ptx::_cuda_cu_device_get_name;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
44 gpu::Ptx::cuda_cu_device_get_func_t gpu::Ptx::_cuda_cu_device_get;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
45 gpu::Ptx::cuda_cu_device_compute_capability_func_t gpu::Ptx::_cuda_cu_device_compute_capability;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
46 gpu::Ptx::cuda_cu_device_get_attribute_func_t gpu::Ptx::_cuda_cu_device_get_attribute;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
47 gpu::Ptx::cuda_cu_launch_kernel_func_t gpu::Ptx::_cuda_cu_launch_kernel;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
48 gpu::Ptx::cuda_cu_module_get_function_func_t gpu::Ptx::_cuda_cu_module_get_function;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
49 gpu::Ptx::cuda_cu_module_load_data_ex_func_t gpu::Ptx::_cuda_cu_module_load_data_ex;
11485
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
50 gpu::Ptx::cuda_cu_memcpy_dtoh_func_t gpu::Ptx::_cuda_cu_memcpy_dtoh;
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
51 gpu::Ptx::cuda_cu_memfree_func_t gpu::Ptx::_cuda_cu_memfree;
12653
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
52 gpu::Ptx::cuda_cu_mem_host_register_func_t gpu::Ptx::_cuda_cu_mem_host_register;
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
53 gpu::Ptx::cuda_cu_mem_host_get_device_pointer_func_t gpu::Ptx::_cuda_cu_mem_host_get_device_pointer;
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
54 gpu::Ptx::cuda_cu_mem_host_unregister_func_t gpu::Ptx::_cuda_cu_mem_host_unregister;
10879
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
55
12653
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
56 #define STRINGIFY(x) #x
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
57
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
58 #define LOOKUP_CUDA_FUNCTION(name, alias) \
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
59 _##alias = \
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
60 CAST_TO_FN_PTR(alias##_func_t, os::dll_lookup(handle, STRINGIFY(name))); \
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
61 if (_##alias == NULL) { \
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
62 tty->print_cr("[CUDA] ***** Error: Failed to lookup %s", STRINGIFY(name)); \
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
63 return 0; \
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
64 } \
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
65
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
66 #define LOOKUP_CUDA_V2_FUNCTION(name, alias) LOOKUP_CUDA_FUNCTION(name##_v2, alias)
11842
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
67
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
68 /*
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
69 * see http://en.wikipedia.org/wiki/CUDA#Supported_GPUs
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
70 */
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
71 int ncores(int major, int minor) {
11843
372bacc13022 gtx_ptx.cpp: fix switch default label and operator precedence in ncores().
Andreas Woess <andreas.woess@jku.at>
parents: 11842
diff changeset
72 int device_type = (major << 4) + minor;
11842
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
73
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
74 switch (device_type) {
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
75 case 0x10: return 8;
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
76 case 0x11: return 8;
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
77 case 0x12: return 8;
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
78 case 0x13: return 8;
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
79 case 0x20: return 32;
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
80 case 0x21: return 48;
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
81 case 0x30: return 192;
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
82 case 0x35: return 192;
11843
372bacc13022 gtx_ptx.cpp: fix switch default label and operator precedence in ncores().
Andreas Woess <andreas.woess@jku.at>
parents: 11842
diff changeset
83 default:
11842
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
84 tty->print_cr("[CUDA] Warning: Unhandled device %x", device_type);
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
85 return 0;
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
86 }
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
87 }
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
88
10879
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
89 bool gpu::Ptx::initialize_gpu() {
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
90
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
91 /* Initialize CUDA driver API */
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
92 int status = _cuda_cu_init(0);
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
93 if (status != GRAAL_CUDA_SUCCESS) {
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
94 tty->print_cr("Failed to initialize CUDA device");
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
95 return false;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
96 }
11527
c99e65785936 Improvements to PTX codegen; allows more PTX tests that run on the device to pass.
bharadwaj
parents: 11485
diff changeset
97
10879
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
98 if (TraceGPUInteraction) {
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
99 tty->print_cr("CUDA driver initialization: Success");
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
100 }
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
101
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
102 /* Get the number of compute-capable device count */
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
103 int device_count = 0;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
104 status = _cuda_cu_device_get_count(&device_count);
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
105 if (status != GRAAL_CUDA_SUCCESS) {
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
106 tty->print_cr("[CUDA] Failed to get compute-capable device count");
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
107 return false;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
108 }
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
109
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
110 if (device_count == 0) {
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
111 tty->print_cr("[CUDA] Found no device supporting CUDA");
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
112 return false;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
113 }
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
114
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
115 if (TraceGPUInteraction) {
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
116 tty->print_cr("[CUDA] Number of compute-capable devices found: %d", device_count);
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
117 }
11527
c99e65785936 Improvements to PTX codegen; allows more PTX tests that run on the device to pass.
bharadwaj
parents: 11485
diff changeset
118
10879
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
119 /* Get the handle to the first compute device */
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
120 int device_id = 0;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
121 /* Compute-capable device handle */
11485
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
122 status = _cuda_cu_device_get(&_cu_device, device_id);
10879
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
123
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
124 if (status != GRAAL_CUDA_SUCCESS) {
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
125 tty->print_cr("[CUDA] Failed to get handle of first compute-capable device i.e., the one at ordinal: %d", device_id);
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
126 return false;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
127 }
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
128
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
129 if (TraceGPUInteraction) {
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
130 tty->print_cr("[CUDA] Got the handle of first compute-device");
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
131 }
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
132
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
133 /* Get device attributes */
11842
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
134 int unified_addressing;
10879
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
135
11485
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
136 status = _cuda_cu_device_get_attribute(&unified_addressing, GRAAL_CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, _cu_device);
11283
1cd1f8ff70a1 CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents: 10884
diff changeset
137
1cd1f8ff70a1 CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents: 10884
diff changeset
138 if (status != GRAAL_CUDA_SUCCESS) {
11485
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
139 tty->print_cr("[CUDA] Failed to query unified addressing mode of device: %d", _cu_device);
11283
1cd1f8ff70a1 CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents: 10884
diff changeset
140 return false;
1cd1f8ff70a1 CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents: 10884
diff changeset
141 }
1cd1f8ff70a1 CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents: 10884
diff changeset
142
1cd1f8ff70a1 CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents: 10884
diff changeset
143 if (TraceGPUInteraction) {
11485
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
144 tty->print_cr("[CUDA] Unified addressing support on device %d: %d", _cu_device, unified_addressing);
11283
1cd1f8ff70a1 CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents: 10884
diff changeset
145 }
1cd1f8ff70a1 CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents: 10884
diff changeset
146
1cd1f8ff70a1 CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents: 10884
diff changeset
147
10879
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
148 /* Get device name */
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
149 char device_name[256];
11485
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
150 status = _cuda_cu_device_get_name(device_name, 256, _cu_device);
10879
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
151
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
152 if (status != GRAAL_CUDA_SUCCESS) {
11485
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
153 tty->print_cr("[CUDA] Failed to get name of device: %d", _cu_device);
10879
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
154 return false;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
155 }
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
156
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
157 if (TraceGPUInteraction) {
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
158 tty->print_cr("[CUDA] Using %s", device_name);
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
159 }
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
160
11842
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
161
10879
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
162 return true;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
163 }
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
164
11842
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
165 unsigned int gpu::Ptx::total_cores() {
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
166
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
167 int minor, major, nmp;
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
168 int status = _cuda_cu_device_get_attribute(&minor,
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
169 GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
170 _cu_device);
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
171
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
172 if (status != GRAAL_CUDA_SUCCESS) {
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
173 tty->print_cr("[CUDA] Failed to get minor attribute of device: %d", _cu_device);
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
174 return 0;
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
175 }
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
176
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
177 status = _cuda_cu_device_get_attribute(&major,
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
178 GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
179 _cu_device);
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
180
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
181 if (status != GRAAL_CUDA_SUCCESS) {
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
182 tty->print_cr("[CUDA] Failed to get major attribute of device: %d", _cu_device);
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
183 return 0;
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
184 }
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
185
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
186 status = _cuda_cu_device_get_attribute(&nmp,
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
187 GRAAL_CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
188 _cu_device);
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
189
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
190 if (status != GRAAL_CUDA_SUCCESS) {
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
191 tty->print_cr("[CUDA] Failed to get numberof MPs on device: %d", _cu_device);
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
192 return 0;
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
193 }
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
194
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
195 int total = nmp * ncores(major, minor);
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
196
11894
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
197 int max_threads_per_block, warp_size, async_engines, can_map_host_memory, concurrent_kernels;
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
198
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
199 status = _cuda_cu_device_get_attribute(&max_threads_per_block,
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
200 GRAAL_CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
201 _cu_device);
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
202
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
203 if (status != GRAAL_CUDA_SUCCESS) {
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
204 tty->print_cr("[CUDA] Failed to get GRAAL_CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK: %d", _cu_device);
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
205 return 0;
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
206 }
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
207
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
208 status = _cuda_cu_device_get_attribute(&warp_size,
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
209 GRAAL_CU_DEVICE_ATTRIBUTE_WARP_SIZE,
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
210 _cu_device);
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
211
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
212 if (status != GRAAL_CUDA_SUCCESS) {
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
213 tty->print_cr("[CUDA] Failed to get GRAAL_CU_DEVICE_ATTRIBUTE_WARP_SIZE: %d", _cu_device);
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
214 return 0;
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
215 }
12653
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
216
11894
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
217 status = _cuda_cu_device_get_attribute(&async_engines,
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
218 GRAAL_CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT,
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
219 _cu_device);
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
220
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
221 if (status != GRAAL_CUDA_SUCCESS) {
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
222 tty->print_cr("[CUDA] Failed to get GRAAL_CU_DEVICE_ATTRIBUTE_WARP_SIZE: %d", _cu_device);
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
223 return 0;
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
224 }
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
225
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
226 status = _cuda_cu_device_get_attribute(&can_map_host_memory,
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
227 GRAAL_CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY,
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
228 _cu_device);
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
229
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
230 if (status != GRAAL_CUDA_SUCCESS) {
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
231 tty->print_cr("[CUDA] Failed to get GRAAL_CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY: %d", _cu_device);
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
232 return 0;
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
233 }
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
234
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
235 status = _cuda_cu_device_get_attribute(&concurrent_kernels,
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
236 GRAAL_CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS,
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
237 _cu_device);
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
238
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
239 if (status != GRAAL_CUDA_SUCCESS) {
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
240 tty->print_cr("[CUDA] Failed to get GRAAL_CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS: %d", _cu_device);
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
241 return 0;
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
242 }
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
243
11842
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
244 if (TraceGPUInteraction) {
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
245 tty->print_cr("[CUDA] Compatibility version of device %d: %d.%d", _cu_device, major, minor);
11894
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
246 tty->print_cr("[CUDA] Number of cores: %d async engines: %d can map host mem: %d concurrent kernels: %d",
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
247 total, async_engines, can_map_host_memory, concurrent_kernels);
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
248 tty->print_cr("[CUDA] Max threads per block: %d warp size: %d", max_threads_per_block, warp_size);
11842
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
249 }
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
250 return (total);
12653
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
251
11842
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
252 }
8d8f63069f58 PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents: 11822
diff changeset
253
10879
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
254 void *gpu::Ptx::generate_kernel(unsigned char *code, int code_len, const char *name) {
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
255
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
256 struct CUmod_st * cu_module;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
257 // Use three JIT compiler options
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
258 const unsigned int jit_num_options = 3;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
259 int *jit_options = NEW_C_HEAP_ARRAY(int, jit_num_options, mtCompiler);
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
260 void **jit_option_values = NEW_C_HEAP_ARRAY(void *, jit_num_options, mtCompiler);
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
261
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
262 // Set up PTX JIT compiler options
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
263 // 1. set size of compilation log buffer
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
264 int jit_log_buffer_size = 1024;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
265 jit_options[0] = GRAAL_CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
266 jit_option_values[0] = (void *)(size_t)jit_log_buffer_size;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
267
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
268 // 2. set pointer to compilation log buffer
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
269 char *jit_log_buffer = NEW_C_HEAP_ARRAY(char, jit_log_buffer_size, mtCompiler);
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
270 jit_options[1] = GRAAL_CU_JIT_INFO_LOG_BUFFER;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
271 jit_option_values[1] = jit_log_buffer;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
272
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
273 // 3. set pointer to set the Maximum # of registers (32) for the kernel
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
274 int jit_register_count = 32;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
275 jit_options[2] = GRAAL_CU_JIT_MAX_REGISTERS;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
276 jit_option_values[2] = (void *)(size_t)jit_register_count;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
277
11485
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
278 /* Create CUDA context to compile and execute the kernel */
12653
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
279 int status = _cuda_cu_ctx_create(&_device_context, GRAAL_CU_CTX_MAP_HOST, _cu_device);
11485
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
280
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
281 if (status != GRAAL_CUDA_SUCCESS) {
11634
bff2b88444f5 Start of PTX array passing
Morris Meyer <morris.meyer@oracle.com>
parents: 11606
diff changeset
282 tty->print_cr("[CUDA] Failed to create CUDA context for device(%d): %d", _cu_device, status);
11485
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
283 return NULL;
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
284 }
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
285
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
286 if (TraceGPUInteraction) {
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
287 tty->print_cr("[CUDA] Success: Created context for device: %d", _cu_device);
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
288 }
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
289
11527
c99e65785936 Improvements to PTX codegen; allows more PTX tests that run on the device to pass.
bharadwaj
parents: 11485
diff changeset
290 status = _cuda_cu_ctx_set_current(_device_context);
c99e65785936 Improvements to PTX codegen; allows more PTX tests that run on the device to pass.
bharadwaj
parents: 11485
diff changeset
291
c99e65785936 Improvements to PTX codegen; allows more PTX tests that run on the device to pass.
bharadwaj
parents: 11485
diff changeset
292 if (status != GRAAL_CUDA_SUCCESS) {
c99e65785936 Improvements to PTX codegen; allows more PTX tests that run on the device to pass.
bharadwaj
parents: 11485
diff changeset
293 tty->print_cr("[CUDA] Failed to set current context for device: %d", _cu_device);
c99e65785936 Improvements to PTX codegen; allows more PTX tests that run on the device to pass.
bharadwaj
parents: 11485
diff changeset
294 return NULL;
c99e65785936 Improvements to PTX codegen; allows more PTX tests that run on the device to pass.
bharadwaj
parents: 11485
diff changeset
295 }
c99e65785936 Improvements to PTX codegen; allows more PTX tests that run on the device to pass.
bharadwaj
parents: 11485
diff changeset
296
c99e65785936 Improvements to PTX codegen; allows more PTX tests that run on the device to pass.
bharadwaj
parents: 11485
diff changeset
297 if (TraceGPUInteraction) {
c99e65785936 Improvements to PTX codegen; allows more PTX tests that run on the device to pass.
bharadwaj
parents: 11485
diff changeset
298 tty->print_cr("[CUDA] Success: Set current context for device: %d", _cu_device);
c99e65785936 Improvements to PTX codegen; allows more PTX tests that run on the device to pass.
bharadwaj
parents: 11485
diff changeset
299 }
c99e65785936 Improvements to PTX codegen; allows more PTX tests that run on the device to pass.
bharadwaj
parents: 11485
diff changeset
300
c99e65785936 Improvements to PTX codegen; allows more PTX tests that run on the device to pass.
bharadwaj
parents: 11485
diff changeset
301 if (TraceGPUInteraction) {
c99e65785936 Improvements to PTX codegen; allows more PTX tests that run on the device to pass.
bharadwaj
parents: 11485
diff changeset
302 tty->print_cr("[CUDA] PTX Kernel\n%s", code);
c99e65785936 Improvements to PTX codegen; allows more PTX tests that run on the device to pass.
bharadwaj
parents: 11485
diff changeset
303 tty->print_cr("[CUDA] Function name : %s", name);
c99e65785936 Improvements to PTX codegen; allows more PTX tests that run on the device to pass.
bharadwaj
parents: 11485
diff changeset
304
c99e65785936 Improvements to PTX codegen; allows more PTX tests that run on the device to pass.
bharadwaj
parents: 11485
diff changeset
305 }
c99e65785936 Improvements to PTX codegen; allows more PTX tests that run on the device to pass.
bharadwaj
parents: 11485
diff changeset
306
10879
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
307 /* Load module's data with compiler options */
11485
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
308 status = _cuda_cu_module_load_data_ex(&cu_module, (void*) code, jit_num_options,
10879
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
309 jit_options, (void **)jit_option_values);
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
310 if (status != GRAAL_CUDA_SUCCESS) {
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
311 if (status == GRAAL_CUDA_ERROR_NO_BINARY_FOR_GPU) {
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
312 tty->print_cr("[CUDA] Check for malformed PTX kernel or incorrect PTX compilation options");
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
313 }
11527
c99e65785936 Improvements to PTX codegen; allows more PTX tests that run on the device to pass.
bharadwaj
parents: 11485
diff changeset
314 tty->print_cr("[CUDA] *** Error (%d) Failed to load module data with online compiler options for method %s",
10879
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
315 status, name);
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
316 return NULL;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
317 }
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
318
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
319 if (TraceGPUInteraction) {
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
320 tty->print_cr("[CUDA] Loaded data for PTX Kernel");
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
321 }
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
322
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
323 struct CUfunc_st * cu_function;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
324
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
325 status = _cuda_cu_module_get_function(&cu_function, cu_module, name);
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
326
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
327 if (status != GRAAL_CUDA_SUCCESS) {
11283
1cd1f8ff70a1 CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents: 10884
diff changeset
328 tty->print_cr("[CUDA] *** Error: Failed to get function %s", name);
10879
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
329 return NULL;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
330 }
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
331
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
332 if (TraceGPUInteraction) {
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
333 tty->print_cr("[CUDA] Got function handle for %s", name);
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
334 }
11485
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
335
10879
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
336 return cu_function;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
337 }
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
338
11485
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
339 bool gpu::Ptx::execute_kernel(address kernel, PTXKernelArguments &ptxka, JavaValue &ret) {
11822
365d8f385fb5 PTX: one-dimensional parallel warp invocation, ParallelOver annotation
Morris Meyer <morris.meyer@oracle.com>
parents: 11821
diff changeset
340 return gpu::Ptx::execute_warp(1, 1, 1, kernel, ptxka, ret);
365d8f385fb5 PTX: one-dimensional parallel warp invocation, ParallelOver annotation
Morris Meyer <morris.meyer@oracle.com>
parents: 11821
diff changeset
341 }
365d8f385fb5 PTX: one-dimensional parallel warp invocation, ParallelOver annotation
Morris Meyer <morris.meyer@oracle.com>
parents: 11821
diff changeset
342
365d8f385fb5 PTX: one-dimensional parallel warp invocation, ParallelOver annotation
Morris Meyer <morris.meyer@oracle.com>
parents: 11821
diff changeset
343 bool gpu::Ptx::execute_warp(int dimX, int dimY, int dimZ,
365d8f385fb5 PTX: one-dimensional parallel warp invocation, ParallelOver annotation
Morris Meyer <morris.meyer@oracle.com>
parents: 11821
diff changeset
344 address kernel, PTXKernelArguments &ptxka, JavaValue &ret) {
10879
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
345 // grid dimensionality
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
346 unsigned int gridX = 1;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
347 unsigned int gridY = 1;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
348 unsigned int gridZ = 1;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
349
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
350 // thread dimensionality
11822
365d8f385fb5 PTX: one-dimensional parallel warp invocation, ParallelOver annotation
Morris Meyer <morris.meyer@oracle.com>
parents: 11821
diff changeset
351 unsigned int blockX = dimX;
365d8f385fb5 PTX: one-dimensional parallel warp invocation, ParallelOver annotation
Morris Meyer <morris.meyer@oracle.com>
parents: 11821
diff changeset
352 unsigned int blockY = dimY;
365d8f385fb5 PTX: one-dimensional parallel warp invocation, ParallelOver annotation
Morris Meyer <morris.meyer@oracle.com>
parents: 11821
diff changeset
353 unsigned int blockZ = dimZ;
11527
c99e65785936 Improvements to PTX codegen; allows more PTX tests that run on the device to pass.
bharadwaj
parents: 11485
diff changeset
354
11485
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
355 struct CUfunc_st* cu_function = (struct CUfunc_st*) kernel;
10879
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
356
11485
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
357 void * config[5] = {
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
358 GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER, ptxka._kernelArgBuffer,
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
359 GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE, &(ptxka._bufferOffset),
11283
1cd1f8ff70a1 CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents: 10884
diff changeset
360 GRAAL_CU_LAUNCH_PARAM_END
1cd1f8ff70a1 CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents: 10884
diff changeset
361 };
1cd1f8ff70a1 CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents: 10884
diff changeset
362
10879
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
363 if (kernel == NULL) {
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
364 return false;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
365 }
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
366
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
367 if (TraceGPUInteraction) {
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
368 tty->print_cr("[CUDA] launching kernel");
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
369 }
11485
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
370
10879
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
371 int status = _cuda_cu_launch_kernel(cu_function,
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
372 gridX, gridY, gridZ,
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
373 blockX, blockY, blockZ,
11485
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
374 0, NULL, NULL, (void **) &config);
10879
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
375 if (status != GRAAL_CUDA_SUCCESS) {
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
376 tty->print_cr("[CUDA] Failed to launch kernel");
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
377 return false;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
378 }
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
379
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
380 if (TraceGPUInteraction) {
11822
365d8f385fb5 PTX: one-dimensional parallel warp invocation, ParallelOver annotation
Morris Meyer <morris.meyer@oracle.com>
parents: 11821
diff changeset
381 tty->print_cr("[CUDA] Success: Kernel Launch: X: %d Y: %d Z: %d", blockX, blockY, blockZ);
10879
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
382 }
11485
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
383
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
384 status = _cuda_cu_ctx_synchronize();
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
385
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
386 if (status != GRAAL_CUDA_SUCCESS) {
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
387 tty->print_cr("[CUDA] Failed to synchronize launched kernel (%d)", status);
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
388 return false;
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
389 }
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
390
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
391 if (TraceGPUInteraction) {
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
392 tty->print_cr("[CUDA] Success: Synchronized launch kernel");
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
393 }
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
394
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
395
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
396 // Get the result. TODO: Move this code to get_return_oop()
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
397 BasicType return_type = ptxka.get_ret_type();
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
398 switch (return_type) {
11821
d8659ad83fcc PTX single-threaded array store, Warp annotation
Morris Meyer <morris.meyer@oracle.com>
parents: 11634
diff changeset
399 case T_INT:
11485
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
400 {
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
401 int return_val;
12519
f020e149c1b6 PTX codegen enhancements; fixes to PTX test regressions.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 11902
diff changeset
402 status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._dev_return_value, T_INT_BYTE_SIZE);
11485
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
403 if (status != GRAAL_CUDA_SUCCESS) {
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
404 tty->print_cr("[CUDA] *** Error (%d) Failed to copy value to device argument", status);
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
405 return false;
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
406 }
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
407 ret.set_jint(return_val);
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
408 }
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
409 break;
11901
61767ccd4600 PTX boolean return value, emitIntegerTestMove, warnings
Morris Meyer <morris.meyer@oracle.com>
parents: 11894
diff changeset
410 case T_BOOLEAN:
61767ccd4600 PTX boolean return value, emitIntegerTestMove, warnings
Morris Meyer <morris.meyer@oracle.com>
parents: 11894
diff changeset
411 {
61767ccd4600 PTX boolean return value, emitIntegerTestMove, warnings
Morris Meyer <morris.meyer@oracle.com>
parents: 11894
diff changeset
412 int return_val;
12519
f020e149c1b6 PTX codegen enhancements; fixes to PTX test regressions.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 11902
diff changeset
413 status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._dev_return_value, T_INT_BYTE_SIZE);
11901
61767ccd4600 PTX boolean return value, emitIntegerTestMove, warnings
Morris Meyer <morris.meyer@oracle.com>
parents: 11894
diff changeset
414 if (status != GRAAL_CUDA_SUCCESS) {
61767ccd4600 PTX boolean return value, emitIntegerTestMove, warnings
Morris Meyer <morris.meyer@oracle.com>
parents: 11894
diff changeset
415 tty->print_cr("[CUDA] *** Error (%d) Failed to copy value to device argument", status);
61767ccd4600 PTX boolean return value, emitIntegerTestMove, warnings
Morris Meyer <morris.meyer@oracle.com>
parents: 11894
diff changeset
416 return false;
61767ccd4600 PTX boolean return value, emitIntegerTestMove, warnings
Morris Meyer <morris.meyer@oracle.com>
parents: 11894
diff changeset
417 }
61767ccd4600 PTX boolean return value, emitIntegerTestMove, warnings
Morris Meyer <morris.meyer@oracle.com>
parents: 11894
diff changeset
418 ret.set_jint(return_val);
61767ccd4600 PTX boolean return value, emitIntegerTestMove, warnings
Morris Meyer <morris.meyer@oracle.com>
parents: 11894
diff changeset
419 }
61767ccd4600 PTX boolean return value, emitIntegerTestMove, warnings
Morris Meyer <morris.meyer@oracle.com>
parents: 11894
diff changeset
420 break;
11894
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
421 case T_FLOAT:
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
422 {
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
423 float return_val;
12519
f020e149c1b6 PTX codegen enhancements; fixes to PTX test regressions.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 11902
diff changeset
424 status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._dev_return_value, T_FLOAT_BYTE_SIZE);
11894
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
425 if (status != GRAAL_CUDA_SUCCESS) {
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
426 tty->print_cr("[CUDA] *** Error (%d) Failed to copy value to device argument", status);
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
427 return false;
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
428 }
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
429 ret.set_jfloat(return_val);
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
430 }
c7abc8411011 Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents: 11843
diff changeset
431 break;
11902
67a1e27a8dbb PTX initial float and double
Morris Meyer <morris.meyer@oracle.com>
parents: 11901
diff changeset
432 case T_DOUBLE:
67a1e27a8dbb PTX initial float and double
Morris Meyer <morris.meyer@oracle.com>
parents: 11901
diff changeset
433 {
67a1e27a8dbb PTX initial float and double
Morris Meyer <morris.meyer@oracle.com>
parents: 11901
diff changeset
434 double return_val;
12519
f020e149c1b6 PTX codegen enhancements; fixes to PTX test regressions.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 11902
diff changeset
435 status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._dev_return_value, T_DOUBLE_BYTE_SIZE);
11902
67a1e27a8dbb PTX initial float and double
Morris Meyer <morris.meyer@oracle.com>
parents: 11901
diff changeset
436 if (status != GRAAL_CUDA_SUCCESS) {
67a1e27a8dbb PTX initial float and double
Morris Meyer <morris.meyer@oracle.com>
parents: 11901
diff changeset
437 tty->print_cr("[CUDA] *** Error (%d) Failed to copy value to device argument", status);
67a1e27a8dbb PTX initial float and double
Morris Meyer <morris.meyer@oracle.com>
parents: 11901
diff changeset
438 return false;
67a1e27a8dbb PTX initial float and double
Morris Meyer <morris.meyer@oracle.com>
parents: 11901
diff changeset
439 }
67a1e27a8dbb PTX initial float and double
Morris Meyer <morris.meyer@oracle.com>
parents: 11901
diff changeset
440 ret.set_jdouble(return_val);
67a1e27a8dbb PTX initial float and double
Morris Meyer <morris.meyer@oracle.com>
parents: 11901
diff changeset
441 }
67a1e27a8dbb PTX initial float and double
Morris Meyer <morris.meyer@oracle.com>
parents: 11901
diff changeset
442 break;
11821
d8659ad83fcc PTX single-threaded array store, Warp annotation
Morris Meyer <morris.meyer@oracle.com>
parents: 11634
diff changeset
443 case T_LONG:
11485
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
444 {
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
445 long return_val;
12519
f020e149c1b6 PTX codegen enhancements; fixes to PTX test regressions.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 11902
diff changeset
446 status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._dev_return_value, T_LONG_BYTE_SIZE);
11485
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
447 if (status != GRAAL_CUDA_SUCCESS) {
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
448 tty->print_cr("[CUDA] *** Error (%d) Failed to copy value to device argument", status);
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
449 return false;
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
450 }
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
451 ret.set_jlong(return_val);
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
452 }
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
453 break;
11821
d8659ad83fcc PTX single-threaded array store, Warp annotation
Morris Meyer <morris.meyer@oracle.com>
parents: 11634
diff changeset
454 case T_VOID:
d8659ad83fcc PTX single-threaded array store, Warp annotation
Morris Meyer <morris.meyer@oracle.com>
parents: 11634
diff changeset
455 break;
11485
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
456 default:
11821
d8659ad83fcc PTX single-threaded array store, Warp annotation
Morris Meyer <morris.meyer@oracle.com>
parents: 11634
diff changeset
457 tty->print_cr("[CUDA] TODO *** Unhandled return type: %d", return_type);
11485
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
458 }
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
459
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
460 // Free device memory allocated for result
12519
f020e149c1b6 PTX codegen enhancements; fixes to PTX test regressions.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 11902
diff changeset
461 status = gpu::Ptx::_cuda_cu_memfree(ptxka._dev_return_value);
11485
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
462 if (status != GRAAL_CUDA_SUCCESS) {
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
463 tty->print_cr("[CUDA] *** Error (%d) Failed to free device memory of return value", status);
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
464 return false;
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
465 }
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
466
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
467 if (TraceGPUInteraction) {
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
468 tty->print_cr("[CUDA] Success: Freed device memory of return value");
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
469 }
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
470
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
471 // Destroy context
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
472 status = gpu::Ptx::_cuda_cu_ctx_destroy(_device_context);
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
473 if (status != GRAAL_CUDA_SUCCESS) {
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
474 tty->print_cr("[CUDA] *** Error (%d) Failed to destroy context", status);
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
475 return false;
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
476 }
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
477
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
478 if (TraceGPUInteraction) {
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
479 tty->print_cr("[CUDA] Success: Destroy context");
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
480 }
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
481
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
482 return (status == GRAAL_CUDA_SUCCESS);
10879
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
483 }
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
484
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
485 #if defined(LINUX)
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
486 static const char cuda_library_name[] = "libcuda.so";
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
487 #elif defined(__APPLE__)
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
488 static char const cuda_library_name[] = "/usr/local/cuda/lib/libcuda.dylib";
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
489 #else
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
490 static char const cuda_library_name[] = "";
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
491 #endif
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
492
10882
6a2d65cb5d7d fix: gpu_ptx.cpp did not compile on windows
Christian Wirth <christian.wirth@oracle.com>
parents: 10879
diff changeset
493 #define STD_BUFFER_SIZE 1024
6a2d65cb5d7d fix: gpu_ptx.cpp did not compile on windows
Christian Wirth <christian.wirth@oracle.com>
parents: 10879
diff changeset
494
10879
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
495 bool gpu::Ptx::probe_linkage() {
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
496 if (cuda_library_name != NULL) {
10882
6a2d65cb5d7d fix: gpu_ptx.cpp did not compile on windows
Christian Wirth <christian.wirth@oracle.com>
parents: 10879
diff changeset
497 char *buffer = (char*)malloc(STD_BUFFER_SIZE);
6a2d65cb5d7d fix: gpu_ptx.cpp did not compile on windows
Christian Wirth <christian.wirth@oracle.com>
parents: 10879
diff changeset
498 void *handle = os::dll_load(cuda_library_name, buffer, STD_BUFFER_SIZE);
11527
c99e65785936 Improvements to PTX codegen; allows more PTX tests that run on the device to pass.
bharadwaj
parents: 11485
diff changeset
499 free(buffer);
10879
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
500 if (handle != NULL) {
12653
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
501 LOOKUP_CUDA_FUNCTION(cuInit, cuda_cu_init);
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
502 LOOKUP_CUDA_FUNCTION(cuCtxSynchronize, cuda_cu_ctx_synchronize);
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
503 LOOKUP_CUDA_FUNCTION(cuCtxSetCurrent, cuda_cu_ctx_set_current);
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
504 LOOKUP_CUDA_FUNCTION(cuDeviceGetCount, cuda_cu_device_get_count);
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
505 LOOKUP_CUDA_FUNCTION(cuDeviceGetName, cuda_cu_device_get_name);
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
506 LOOKUP_CUDA_FUNCTION(cuDeviceGet, cuda_cu_device_get);
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
507 LOOKUP_CUDA_FUNCTION(cuDeviceComputeCapability, cuda_cu_device_compute_capability);
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
508 LOOKUP_CUDA_FUNCTION(cuDeviceGetAttribute, cuda_cu_device_get_attribute);
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
509 LOOKUP_CUDA_FUNCTION(cuModuleGetFunction, cuda_cu_module_get_function);
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
510 LOOKUP_CUDA_FUNCTION(cuModuleLoadDataEx, cuda_cu_module_load_data_ex);
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
511 LOOKUP_CUDA_FUNCTION(cuLaunchKernel, cuda_cu_launch_kernel);
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
512 LOOKUP_CUDA_FUNCTION(cuMemHostRegister, cuda_cu_mem_host_register);
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
513 LOOKUP_CUDA_FUNCTION(cuMemHostUnregister, cuda_cu_mem_host_unregister);
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
514 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
515 LOOKUP_CUDA_V2_FUNCTION(cuCtxCreate, cuda_cu_ctx_create);
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
516 LOOKUP_CUDA_V2_FUNCTION(cuCtxDestroy, cuda_cu_ctx_destroy);
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
517 LOOKUP_CUDA_V2_FUNCTION(cuMemAlloc, cuda_cu_memalloc);
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
518 LOOKUP_CUDA_V2_FUNCTION(cuMemFree, cuda_cu_memfree);
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
519 LOOKUP_CUDA_V2_FUNCTION(cuMemcpyHtoD, cuda_cu_memcpy_htod);
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
520 LOOKUP_CUDA_V2_FUNCTION(cuMemcpyDtoH, cuda_cu_memcpy_dtoh);
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
521 LOOKUP_CUDA_V2_FUNCTION(cuMemHostGetDevicePointer, cuda_cu_mem_host_get_device_pointer);
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
522 #else
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
523 LOOKUP_CUDA_FUNCTION(cuCtxCreate, cuda_cu_ctx_create);
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
524 LOOKUP_CUDA_FUNCTION(cuCtxDestroy, cuda_cu_ctx_destroy);
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
525 LOOKUP_CUDA_FUNCTION(cuMemAlloc, cuda_cu_memalloc);
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
526 LOOKUP_CUDA_FUNCTION(cuMemFree, cuda_cu_memfree);
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
527 LOOKUP_CUDA_FUNCTION(cuMemcpyHtoD, cuda_cu_memcpy_htod);
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
528 LOOKUP_CUDA_FUNCTION(cuMemcpyDtoH, cuda_cu_memcpy_dtoh);
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
529 LOOKUP_CUDA_FUNCTION(cuMemHostGetDevicePointer, cuda_cu_mem_host_get_device_pointer);
1a7e7011a341 * PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents: 12519
diff changeset
530 #endif
11485
49bb1bc983c6 Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents: 11284
diff changeset
531
10879
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
532 if (TraceGPUInteraction) {
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
533 tty->print_cr("[CUDA] Success: library linkage");
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
534 }
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
535 return true;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
536 } else {
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
537 // Unable to dlopen libcuda
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
538 return false;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
539 }
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
540 } else {
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
541 tty->print_cr("Unsupported CUDA platform");
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
542 return false;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
543 }
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
544 tty->print_cr("Failed to find CUDA linkage");
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
545 return false;
d55f24eac4b1 PTX support for Linux
Morris Meyer <morris.meyer@oracle.com>
parents:
diff changeset
546 }