Mercurial > hg > truffle
annotate src/gpu/ptx/vm/gpu_ptx.hpp @ 16734:7d7fae9e9d17
merge
author | Michael Haupt <michael.haupt@oracle.com> |
---|---|
date | Sat, 09 Aug 2014 08:51:01 -0700 |
parents | 66e3af78ea96 |
children | e86659e1f813 |
rev | line source |
---|---|
10879 | 1 /* |
2 * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. | |
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | |
20 * or visit www.oracle.com if you need additional information or have any | |
21 * questions. | |
22 * | |
23 */ | |
24 | |
25 #ifndef GPU_PTX_HPP | |
26 #define GPU_PTX_HPP | |
27 | |
11485
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
28 /* |
10879 | 29 * Some useful macro definitions from publicly available cuda.h. |
30 * These definitions are for convenience. | |
31 */ | |
32 #define GRAAL_CUDA_SUCCESS 0 | |
11283
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
33 /**< Device shares a unified address space with the host */ |
11894
c7abc8411011
Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents:
11842
diff
changeset
|
34 #define GRAAL_CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK 1 |
11283
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
35 #define GRAAL_CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING 41 |
10879 | 36 #define GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR 75 |
37 #define GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR 76 | |
11842
8d8f63069f58
PTX warp limiter to available GPU processors
Morris Meyer <morris.meyer@oracle.com>
parents:
11822
diff
changeset
|
38 #define GRAAL_CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT 16 |
11894
c7abc8411011
Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents:
11842
diff
changeset
|
39 #define GRAAL_CU_DEVICE_ATTRIBUTE_WARP_SIZE 10 |
c7abc8411011
Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents:
11842
diff
changeset
|
40 #define GRAAL_CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY 19 |
c7abc8411011
Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents:
11842
diff
changeset
|
41 #define GRAAL_CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS 31 |
c7abc8411011
Fixed BasicPTXTest and IntegerPTXTest
Morris Meyer <morris.meyer@oracle.com>
parents:
11842
diff
changeset
|
42 #define GRAAL_CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT 40 |
10879 | 43 #define GRAAL_CU_JIT_MAX_REGISTERS 0 |
44 #define GRAAL_CU_JIT_THREADS_PER_BLOCK 1 | |
45 #define GRAAL_CU_JIT_INFO_LOG_BUFFER 3 | |
46 #define GRAAL_CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES 4 | |
47 #define GRAAL_CUDA_ERROR_NO_BINARY_FOR_GPU 209 | |
48 | |
12653
1a7e7011a341
* PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents:
11894
diff
changeset
|
49 /* |
1a7e7011a341
* PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents:
11894
diff
changeset
|
50 * Flags for cuMemHostRegister |
1a7e7011a341
* PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents:
11894
diff
changeset
|
51 */ |
1a7e7011a341
* PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents:
11894
diff
changeset
|
52 |
1a7e7011a341
* PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents:
11894
diff
changeset
|
53 #define GRAAL_CU_MEMHOSTREGISTER_PORTABLE 1 |
1a7e7011a341
* PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents:
11894
diff
changeset
|
54 #define GRAAL_CU_MEMHOSTREGISTER_DEVICEMAP 2 |
1a7e7011a341
* PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents:
11894
diff
changeset
|
55 |
11283
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
56 /** |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
57 * End of array terminator for the extra parameter to |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
58 * ::cuLaunchKernel |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
59 */ |
11485
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
60 #define GRAAL_CU_LAUNCH_PARAM_END ((void*) 0x00) |
11283
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
61 |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
62 /** |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
63 * Indicator that the next value in the extra parameter to |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
64 * ::cuLaunchKernel will be a pointer to a buffer containing all kernel |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
65 * parameters used for launching kernel f. This buffer needs to |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
66 * honor all alignment/padding requirements of the individual parameters. |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
67 * If ::GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE is not also specified in the |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
68 * extra array, then ::GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER will have no |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
69 * effect. |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
70 */ |
11485
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
71 #define GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER ((void*) 0x01) |
11283
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
72 |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
73 /** |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
74 * Indicator that the next value in the extra parameter to |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
75 * ::cuLaunchKernel will be a pointer to a size_t which contains the |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
76 * size of the buffer specified with ::GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER. |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
77 * It is required that ::GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER also be specified |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
78 * in the extra array if the value associated with |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
79 * ::GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE is not zero. |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
80 */ |
11485
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
81 #define GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE ((void*) 0x02) |
11283
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
82 |
12653
1a7e7011a341
* PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents:
11894
diff
changeset
|
83 /* |
1a7e7011a341
* PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents:
11894
diff
changeset
|
84 * Context creation flags |
1a7e7011a341
* PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents:
11894
diff
changeset
|
85 */ |
1a7e7011a341
* PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents:
11894
diff
changeset
|
86 |
13753
80cd5c3b8827
partially fixed passing of object parameters to PTX kernels; use a C++ object for managing resource allocation and cleanup around a PTX kernel execution
Doug Simon <doug.simon@oracle.com>
parents:
13648
diff
changeset
|
87 #define GRAAL_CU_CTX_MAP_HOST 0x08 |
80cd5c3b8827
partially fixed passing of object parameters to PTX kernels; use a C++ object for managing resource allocation and cleanup around a PTX kernel execution
Doug Simon <doug.simon@oracle.com>
parents:
13648
diff
changeset
|
88 #define GRAAL_CU_CTX_SCHED_BLOCKING_SYNC 0x04 |
12653
1a7e7011a341
* PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents:
11894
diff
changeset
|
89 |
13828
5c8a3c09397b
Add additional device checks and compute capability checks for CUDA devices found on Linux.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents:
13827
diff
changeset
|
90 /** |
5c8a3c09397b
Add additional device checks and compute capability checks for CUDA devices found on Linux.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents:
13827
diff
changeset
|
91 * Support compute capability 3.0 and later |
5c8a3c09397b
Add additional device checks and compute capability checks for CUDA devices found on Linux.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents:
13827
diff
changeset
|
92 */ |
5c8a3c09397b
Add additional device checks and compute capability checks for CUDA devices found on Linux.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents:
13827
diff
changeset
|
93 |
5c8a3c09397b
Add additional device checks and compute capability checks for CUDA devices found on Linux.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents:
13827
diff
changeset
|
94 #define GRAAL_SUPPORTED_COMPUTE_CAPABILITY_VERSION 3.0 |
5c8a3c09397b
Add additional device checks and compute capability checks for CUDA devices found on Linux.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents:
13827
diff
changeset
|
95 |
15177
66e3af78ea96
HSAIL: added safepoint support
Doug Simon <doug.simon@oracle.com>
parents:
13922
diff
changeset
|
96 class Ptx : public Gpu { |
13753
80cd5c3b8827
partially fixed passing of object parameters to PTX kernels; use a C++ object for managing resource allocation and cleanup around a PTX kernel execution
Doug Simon <doug.simon@oracle.com>
parents:
13648
diff
changeset
|
97 friend class PtxCall; |
10879 | 98 |
13819
49db2c1e3bee
added support for co-existing GPU backends (JBS:GRAAL-1)
Doug Simon <doug.simon@oracle.com>
parents:
13753
diff
changeset
|
99 private: |
49db2c1e3bee
added support for co-existing GPU backends (JBS:GRAAL-1)
Doug Simon <doug.simon@oracle.com>
parents:
13753
diff
changeset
|
100 |
49db2c1e3bee
added support for co-existing GPU backends (JBS:GRAAL-1)
Doug Simon <doug.simon@oracle.com>
parents:
13753
diff
changeset
|
101 static JNINativeMethod PTX_methods[]; |
49db2c1e3bee
added support for co-existing GPU backends (JBS:GRAAL-1)
Doug Simon <doug.simon@oracle.com>
parents:
13753
diff
changeset
|
102 |
49db2c1e3bee
added support for co-existing GPU backends (JBS:GRAAL-1)
Doug Simon <doug.simon@oracle.com>
parents:
13753
diff
changeset
|
103 // static native boolean initialize(); |
13827
8053c3ede984
fixed Windows build issues
Doug Simon <doug.simon@oracle.com>
parents:
13819
diff
changeset
|
104 JNIEXPORT static jboolean initialize(JNIEnv* env, jclass); |
13819
49db2c1e3bee
added support for co-existing GPU backends (JBS:GRAAL-1)
Doug Simon <doug.simon@oracle.com>
parents:
13753
diff
changeset
|
105 |
49db2c1e3bee
added support for co-existing GPU backends (JBS:GRAAL-1)
Doug Simon <doug.simon@oracle.com>
parents:
13753
diff
changeset
|
106 // static native long generateKernel(byte[] targetCode, String name); |
13827
8053c3ede984
fixed Windows build issues
Doug Simon <doug.simon@oracle.com>
parents:
13819
diff
changeset
|
107 JNIEXPORT static jlong generate_kernel(JNIEnv *env, jclass, jbyteArray code_handle, jstring name_handle); |
13819
49db2c1e3bee
added support for co-existing GPU backends (JBS:GRAAL-1)
Doug Simon <doug.simon@oracle.com>
parents:
13753
diff
changeset
|
108 |
49db2c1e3bee
added support for co-existing GPU backends (JBS:GRAAL-1)
Doug Simon <doug.simon@oracle.com>
parents:
13753
diff
changeset
|
109 // static native long getLaunchKernelAddress(); |
13827
8053c3ede984
fixed Windows build issues
Doug Simon <doug.simon@oracle.com>
parents:
13819
diff
changeset
|
110 JNIEXPORT static jlong get_execute_kernel_from_vm_address(JNIEnv *env, jclass); |
13819
49db2c1e3bee
added support for co-existing GPU backends (JBS:GRAAL-1)
Doug Simon <doug.simon@oracle.com>
parents:
13753
diff
changeset
|
111 |
49db2c1e3bee
added support for co-existing GPU backends (JBS:GRAAL-1)
Doug Simon <doug.simon@oracle.com>
parents:
13753
diff
changeset
|
112 // static native int getAvailableProcessors0(); |
13827
8053c3ede984
fixed Windows build issues
Doug Simon <doug.simon@oracle.com>
parents:
13819
diff
changeset
|
113 JNIEXPORT static jint get_total_cores(JNIEnv *env, jobject); |
13819
49db2c1e3bee
added support for co-existing GPU backends (JBS:GRAAL-1)
Doug Simon <doug.simon@oracle.com>
parents:
13753
diff
changeset
|
114 |
13922
0995dcbd6dd8
Change CUDA context management to support multiple executions of a kernel. Exclude GPU offloading of lambdas from java.* library code.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents:
13888
diff
changeset
|
115 JNIEXPORT static void destroy_ptx_context(); |
0995dcbd6dd8
Change CUDA context management to support multiple executions of a kernel. Exclude GPU offloading of lambdas from java.* library code.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents:
13888
diff
changeset
|
116 |
13819
49db2c1e3bee
added support for co-existing GPU backends (JBS:GRAAL-1)
Doug Simon <doug.simon@oracle.com>
parents:
13753
diff
changeset
|
117 // Links the CUDA driver library functions |
49db2c1e3bee
added support for co-existing GPU backends (JBS:GRAAL-1)
Doug Simon <doug.simon@oracle.com>
parents:
13753
diff
changeset
|
118 static bool link(); |
49db2c1e3bee
added support for co-existing GPU backends (JBS:GRAAL-1)
Doug Simon <doug.simon@oracle.com>
parents:
13753
diff
changeset
|
119 |
49db2c1e3bee
added support for co-existing GPU backends (JBS:GRAAL-1)
Doug Simon <doug.simon@oracle.com>
parents:
13753
diff
changeset
|
120 static int ncores(int major, int minor); |
49db2c1e3bee
added support for co-existing GPU backends (JBS:GRAAL-1)
Doug Simon <doug.simon@oracle.com>
parents:
13753
diff
changeset
|
121 |
11485
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
122 public: |
15177
66e3af78ea96
HSAIL: added safepoint support
Doug Simon <doug.simon@oracle.com>
parents:
13922
diff
changeset
|
123 |
66e3af78ea96
HSAIL: added safepoint support
Doug Simon <doug.simon@oracle.com>
parents:
13922
diff
changeset
|
124 virtual const char* name() { return "PTX"; } |
66e3af78ea96
HSAIL: added safepoint support
Doug Simon <doug.simon@oracle.com>
parents:
13922
diff
changeset
|
125 |
13819
49db2c1e3bee
added support for co-existing GPU backends (JBS:GRAAL-1)
Doug Simon <doug.simon@oracle.com>
parents:
13753
diff
changeset
|
126 // Registers the implementations for the native methods in PTXHotSpotBackend |
49db2c1e3bee
added support for co-existing GPU backends (JBS:GRAAL-1)
Doug Simon <doug.simon@oracle.com>
parents:
13753
diff
changeset
|
127 static bool register_natives(JNIEnv* env); |
49db2c1e3bee
added support for co-existing GPU backends (JBS:GRAAL-1)
Doug Simon <doug.simon@oracle.com>
parents:
13753
diff
changeset
|
128 |
11485
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
129 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
130 typedef unsigned long long CUdeviceptr; |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
131 #else |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
132 typedef unsigned int CUdeviceptr; |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
133 #endif |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
134 |
13624
220ed109bf77
initial code for calling PTX kernel code from Java with parameter marshaling and return value unmarshaling performed by a wrapper specified via manual graph construction
Doug Simon <doug.simon@oracle.com>
parents:
12653
diff
changeset
|
135 typedef int CUdevice; /* CUDA device */ |
220ed109bf77
initial code for calling PTX kernel code from Java with parameter marshaling and return value unmarshaling performed by a wrapper specified via manual graph construction
Doug Simon <doug.simon@oracle.com>
parents:
12653
diff
changeset
|
136 |
13648
27acedac70b7
added support for @ParallelOver annotation in PTX kernel call wrapper
Doug Simon <doug.simon@oracle.com>
parents:
13624
diff
changeset
|
137 static jlong execute_kernel_from_vm(JavaThread* thread, jlong kernel, jint dimX, jint dimY, jint dimZ, |
13753
80cd5c3b8827
partially fixed passing of object parameters to PTX kernels; use a C++ object for managing resource allocation and cleanup around a PTX kernel execution
Doug Simon <doug.simon@oracle.com>
parents:
13648
diff
changeset
|
138 jlong buffer, |
80cd5c3b8827
partially fixed passing of object parameters to PTX kernels; use a C++ object for managing resource allocation and cleanup around a PTX kernel execution
Doug Simon <doug.simon@oracle.com>
parents:
13648
diff
changeset
|
139 jint bufferSize, |
80cd5c3b8827
partially fixed passing of object parameters to PTX kernels; use a C++ object for managing resource allocation and cleanup around a PTX kernel execution
Doug Simon <doug.simon@oracle.com>
parents:
13648
diff
changeset
|
140 jint objectParametersCount, |
80cd5c3b8827
partially fixed passing of object parameters to PTX kernels; use a C++ object for managing resource allocation and cleanup around a PTX kernel execution
Doug Simon <doug.simon@oracle.com>
parents:
13648
diff
changeset
|
141 jlong objectParametersOffsets, |
80cd5c3b8827
partially fixed passing of object parameters to PTX kernels; use a C++ object for managing resource allocation and cleanup around a PTX kernel execution
Doug Simon <doug.simon@oracle.com>
parents:
13648
diff
changeset
|
142 jlong pinnedObjects, |
13648
27acedac70b7
added support for @ParallelOver annotation in PTX kernel call wrapper
Doug Simon <doug.simon@oracle.com>
parents:
13624
diff
changeset
|
143 int encodedReturnTypeSize); |
12653
1a7e7011a341
* PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents:
11894
diff
changeset
|
144 |
10879 | 145 private: |
146 typedef int (*cuda_cu_init_func_t)(unsigned int); | |
12653
1a7e7011a341
* PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents:
11894
diff
changeset
|
147 typedef int (*cuda_cu_ctx_create_func_t)(void*, unsigned int, CUdevice); |
11485
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
148 typedef int (*cuda_cu_ctx_destroy_func_t)(void*); |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
149 typedef int (*cuda_cu_ctx_synchronize_func_t)(void); |
13753
80cd5c3b8827
partially fixed passing of object parameters to PTX kernels; use a C++ object for managing resource allocation and cleanup around a PTX kernel execution
Doug Simon <doug.simon@oracle.com>
parents:
13648
diff
changeset
|
150 typedef int (*cuda_cu_ctx_get_current_func_t)(void*); |
11527
c99e65785936
Improvements to PTX codegen; allows more PTX tests that run on the device to pass.
bharadwaj
parents:
11485
diff
changeset
|
151 typedef int (*cuda_cu_ctx_set_current_func_t)(void*); |
11485
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
152 typedef int (*cuda_cu_device_get_count_func_t)(int*); |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
153 typedef int (*cuda_cu_device_get_name_func_t)(char*, int, int); |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
154 typedef int (*cuda_cu_device_get_func_t)(int*, int); |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
155 typedef int (*cuda_cu_device_compute_capability_func_t)(int*, int*, int); |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
156 typedef int (*cuda_cu_device_get_attribute_func_t)(int*, int, int); |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
157 typedef int (*cuda_cu_launch_kernel_func_t)(struct CUfunc_st*, |
10879 | 158 unsigned int, unsigned int, unsigned int, |
159 unsigned int, unsigned int, unsigned int, | |
11485
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
160 unsigned int, void*, void**, void**); |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
161 typedef int (*cuda_cu_module_get_function_func_t)(void*, void*, const char*); |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
162 typedef int (*cuda_cu_module_load_data_ex_func_t)(void*, void*, unsigned int, void*, void**); |
13888
51584f76462d
pulled Ptx and Hsail classes out of gpu class namespace
Doug Simon <doug.simon@oracle.com>
parents:
13828
diff
changeset
|
163 typedef int (*cuda_cu_memalloc_func_t)(Ptx::CUdeviceptr*, size_t); |
51584f76462d
pulled Ptx and Hsail classes out of gpu class namespace
Doug Simon <doug.simon@oracle.com>
parents:
13828
diff
changeset
|
164 typedef int (*cuda_cu_memfree_func_t)(Ptx::CUdeviceptr); |
51584f76462d
pulled Ptx and Hsail classes out of gpu class namespace
Doug Simon <doug.simon@oracle.com>
parents:
13828
diff
changeset
|
165 typedef int (*cuda_cu_memcpy_htod_func_t)(Ptx::CUdeviceptr, const void*, unsigned int); |
51584f76462d
pulled Ptx and Hsail classes out of gpu class namespace
Doug Simon <doug.simon@oracle.com>
parents:
13828
diff
changeset
|
166 typedef int (*cuda_cu_memcpy_dtoh_func_t)(const void*, Ptx::CUdeviceptr, unsigned int); |
12653
1a7e7011a341
* PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents:
11894
diff
changeset
|
167 typedef int (*cuda_cu_mem_host_register_func_t)(void*, size_t, unsigned int); |
13888
51584f76462d
pulled Ptx and Hsail classes out of gpu class namespace
Doug Simon <doug.simon@oracle.com>
parents:
13828
diff
changeset
|
168 typedef int (*cuda_cu_mem_host_get_device_pointer_func_t)(Ptx::CUdeviceptr*, void*, unsigned int); |
12653
1a7e7011a341
* PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents:
11894
diff
changeset
|
169 typedef int (*cuda_cu_mem_host_unregister_func_t)(void*); |
10879 | 170 |
11485
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
171 public: |
10879 | 172 static cuda_cu_init_func_t _cuda_cu_init; |
173 static cuda_cu_ctx_create_func_t _cuda_cu_ctx_create; | |
11485
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
174 static cuda_cu_ctx_destroy_func_t _cuda_cu_ctx_destroy; |
10879 | 175 static cuda_cu_ctx_synchronize_func_t _cuda_cu_ctx_synchronize; |
176 static cuda_cu_device_get_count_func_t _cuda_cu_device_get_count; | |
177 static cuda_cu_device_get_name_func_t _cuda_cu_device_get_name; | |
178 static cuda_cu_device_get_func_t _cuda_cu_device_get; | |
179 static cuda_cu_device_compute_capability_func_t _cuda_cu_device_compute_capability; /* Deprecated as of CUDA 5.0 */ | |
180 static cuda_cu_device_get_attribute_func_t _cuda_cu_device_get_attribute; | |
181 static cuda_cu_launch_kernel_func_t _cuda_cu_launch_kernel; | |
182 static cuda_cu_module_get_function_func_t _cuda_cu_module_get_function; | |
183 static cuda_cu_module_load_data_ex_func_t _cuda_cu_module_load_data_ex; | |
11485
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
184 static cuda_cu_memalloc_func_t _cuda_cu_memalloc; |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
185 static cuda_cu_memfree_func_t _cuda_cu_memfree; |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
186 static cuda_cu_memcpy_htod_func_t _cuda_cu_memcpy_htod; |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
187 static cuda_cu_memcpy_dtoh_func_t _cuda_cu_memcpy_dtoh; |
13753
80cd5c3b8827
partially fixed passing of object parameters to PTX kernels; use a C++ object for managing resource allocation and cleanup around a PTX kernel execution
Doug Simon <doug.simon@oracle.com>
parents:
13648
diff
changeset
|
188 static cuda_cu_ctx_get_current_func_t _cuda_cu_ctx_get_current; |
11527
c99e65785936
Improvements to PTX codegen; allows more PTX tests that run on the device to pass.
bharadwaj
parents:
11485
diff
changeset
|
189 static cuda_cu_ctx_set_current_func_t _cuda_cu_ctx_set_current; |
12653
1a7e7011a341
* PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents:
11894
diff
changeset
|
190 static cuda_cu_mem_host_register_func_t _cuda_cu_mem_host_register; |
1a7e7011a341
* PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents:
11894
diff
changeset
|
191 static cuda_cu_mem_host_get_device_pointer_func_t _cuda_cu_mem_host_get_device_pointer; |
1a7e7011a341
* PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
parents:
11894
diff
changeset
|
192 static cuda_cu_mem_host_unregister_func_t _cuda_cu_mem_host_unregister; |
10879 | 193 |
194 protected: | |
11485
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
195 static void* _device_context; |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
196 static int _cu_device; |
10879 | 197 }; |
198 #endif // GPU_PTX_HPP |