Mercurial > hg > truffle
annotate src/gpu/ptx/vm/gpu_ptx.hpp @ 11625:94a28e1b2c86
Add a test to ConditionalEliminationTest
author | Gilles Duboscq <duboscq@ssw.jku.at> |
---|---|
date | Wed, 11 Sep 2013 15:19:58 +0200 |
parents | c99e65785936 |
children | 365d8f385fb5 |
rev | line source |
---|---|
10879 | 1 /* |
2 * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. | |
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | |
20 * or visit www.oracle.com if you need additional information or have any | |
21 * questions. | |
22 * | |
23 */ | |
24 | |
25 #ifndef GPU_PTX_HPP | |
26 #define GPU_PTX_HPP | |
27 | |
11485
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
28 /* |
10879 | 29 * Some useful macro definitions from publicly available cuda.h. |
30 * These definitions are for convenience. | |
31 */ | |
32 #define GRAAL_CUDA_SUCCESS 0 | |
11283
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
33 /**< Device shares a unified address space with the host */ |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
34 #define GRAAL_CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING 41 |
10879 | 35 #define GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR 75 |
36 #define GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR 76 | |
37 #define GRAAL_CU_JIT_MAX_REGISTERS 0 | |
38 #define GRAAL_CU_JIT_THREADS_PER_BLOCK 1 | |
39 #define GRAAL_CU_JIT_INFO_LOG_BUFFER 3 | |
40 #define GRAAL_CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES 4 | |
41 #define GRAAL_CUDA_ERROR_NO_BINARY_FOR_GPU 209 | |
42 | |
11283
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
43 /** |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
44 * End of array terminator for the extra parameter to |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
45 * ::cuLaunchKernel |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
46 */ |
11485
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
47 #define GRAAL_CU_LAUNCH_PARAM_END ((void*) 0x00) |
11283
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
48 |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
49 /** |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
50 * Indicator that the next value in the extra parameter to |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
51 * ::cuLaunchKernel will be a pointer to a buffer containing all kernel |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
52 * parameters used for launching kernel f. This buffer needs to |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
53 * honor all alignment/padding requirements of the individual parameters. |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
54 * If ::GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE is not also specified in the |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
55 * extra array, then ::GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER will have no |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
56 * effect. |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
57 */ |
11485
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
58 #define GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER ((void*) 0x01) |
11283
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
59 |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
60 /** |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
61 * Indicator that the next value in the extra parameter to |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
62 * ::cuLaunchKernel will be a pointer to a size_t which contains the |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
63 * size of the buffer specified with ::GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER. |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
64 * It is required that ::GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER also be specified |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
65 * in the extra array if the value associated with |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
66 * ::GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE is not zero. |
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
67 */ |
11485
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
68 #define GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE ((void*) 0x02) |
11283
1cd1f8ff70a1
CR-20: PTX kernel invocation with arguments - from Bharadwaj
Morris Meyer <morris.meyer@oracle.com>
parents:
10879
diff
changeset
|
69 |
10879 | 70 class Ptx { |
71 friend class gpu; | |
72 | |
73 protected: | |
74 static bool probe_linkage(); | |
75 static bool initialize_gpu(); | |
76 static void * generate_kernel(unsigned char *code, int code_len, const char *name); | |
11485
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
77 static bool execute_kernel(address kernel, PTXKernelArguments & ka, JavaValue &ret); |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
78 public: |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
79 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
80 typedef unsigned long long CUdeviceptr; |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
81 #else |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
82 typedef unsigned int CUdeviceptr; |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
83 #endif |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
84 |
10879 | 85 private: |
86 typedef int (*cuda_cu_init_func_t)(unsigned int); | |
11485
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
87 typedef int (*cuda_cu_ctx_create_func_t)(void*, int, int); |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
88 typedef int (*cuda_cu_ctx_destroy_func_t)(void*); |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
89 typedef int (*cuda_cu_ctx_synchronize_func_t)(void); |
11527
c99e65785936
Improvements to PTX codegen; allows more PTX tests that run on the device to pass.
bharadwaj
parents:
11485
diff
changeset
|
90 typedef int (*cuda_cu_ctx_set_current_func_t)(void*); |
11485
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
91 typedef int (*cuda_cu_device_get_count_func_t)(int*); |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
92 typedef int (*cuda_cu_device_get_name_func_t)(char*, int, int); |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
93 typedef int (*cuda_cu_device_get_func_t)(int*, int); |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
94 typedef int (*cuda_cu_device_compute_capability_func_t)(int*, int*, int); |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
95 typedef int (*cuda_cu_device_get_attribute_func_t)(int*, int, int); |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
96 typedef int (*cuda_cu_launch_kernel_func_t)(struct CUfunc_st*, |
10879 | 97 unsigned int, unsigned int, unsigned int, |
98 unsigned int, unsigned int, unsigned int, | |
11485
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
99 unsigned int, void*, void**, void**); |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
100 typedef int (*cuda_cu_module_get_function_func_t)(void*, void*, const char*); |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
101 typedef int (*cuda_cu_module_load_data_ex_func_t)(void*, void*, unsigned int, void*, void**); |
11527
c99e65785936
Improvements to PTX codegen; allows more PTX tests that run on the device to pass.
bharadwaj
parents:
11485
diff
changeset
|
102 typedef int (*cuda_cu_memalloc_func_t)(void*, size_t); |
11485
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
103 typedef int (*cuda_cu_memfree_func_t)(gpu::Ptx::CUdeviceptr); |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
104 typedef int (*cuda_cu_memcpy_htod_func_t)(gpu::Ptx::CUdeviceptr, const void*, unsigned int); |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
105 typedef int (*cuda_cu_memcpy_dtoh_func_t)(const void*, gpu::Ptx::CUdeviceptr, unsigned int); |
10879 | 106 |
11485
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
107 public: |
10879 | 108 static cuda_cu_init_func_t _cuda_cu_init; |
109 static cuda_cu_ctx_create_func_t _cuda_cu_ctx_create; | |
11485
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
110 static cuda_cu_ctx_destroy_func_t _cuda_cu_ctx_destroy; |
10879 | 111 static cuda_cu_ctx_synchronize_func_t _cuda_cu_ctx_synchronize; |
112 static cuda_cu_device_get_count_func_t _cuda_cu_device_get_count; | |
113 static cuda_cu_device_get_name_func_t _cuda_cu_device_get_name; | |
114 static cuda_cu_device_get_func_t _cuda_cu_device_get; | |
115 static cuda_cu_device_compute_capability_func_t _cuda_cu_device_compute_capability; /* Deprecated as of CUDA 5.0 */ | |
116 static cuda_cu_device_get_attribute_func_t _cuda_cu_device_get_attribute; | |
117 static cuda_cu_launch_kernel_func_t _cuda_cu_launch_kernel; | |
118 static cuda_cu_module_get_function_func_t _cuda_cu_module_get_function; | |
119 static cuda_cu_module_load_data_ex_func_t _cuda_cu_module_load_data_ex; | |
11485
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
120 static cuda_cu_memalloc_func_t _cuda_cu_memalloc; |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
121 static cuda_cu_memfree_func_t _cuda_cu_memfree; |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
122 static cuda_cu_memcpy_htod_func_t _cuda_cu_memcpy_htod; |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
123 static cuda_cu_memcpy_dtoh_func_t _cuda_cu_memcpy_dtoh; |
11527
c99e65785936
Improvements to PTX codegen; allows more PTX tests that run on the device to pass.
bharadwaj
parents:
11485
diff
changeset
|
124 static cuda_cu_ctx_set_current_func_t _cuda_cu_ctx_set_current; |
10879 | 125 |
126 protected: | |
11485
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
127 static void* _device_context; |
49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
bharadwaj
parents:
11291
diff
changeset
|
128 static int _cu_device; |
10879 | 129 }; |
130 #endif // GPU_PTX_HPP |