Mercurial > hg > graal-jvmci-8
comparison src/gpu/ptx/vm/gpu_ptx.hpp @ 11485:49bb1bc983c6
Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
author | bharadwaj |
---|---|
date | Fri, 30 Aug 2013 16:39:05 -0400 |
parents | 4a6b03a16808 |
children | c99e65785936 |
comparison
equal
deleted
inserted
replaced
11484:2aac62d79af4 | 11485:49bb1bc983c6 |
---|---|
23 */ | 23 */ |
24 | 24 |
25 #ifndef GPU_PTX_HPP | 25 #ifndef GPU_PTX_HPP |
26 #define GPU_PTX_HPP | 26 #define GPU_PTX_HPP |
27 | 27 |
28 /* | 28 /* |
29 * Some useful macro definitions from publicly available cuda.h. | 29 * Some useful macro definitions from publicly available cuda.h. |
30 * These definitions are for convenience. | 30 * These definitions are for convenience. |
31 */ | 31 */ |
32 #define GRAAL_CUDA_SUCCESS 0 | 32 #define GRAAL_CUDA_SUCCESS 0 |
33 /**< Device shares a unified address space with the host */ | 33 /**< Device shares a unified address space with the host */ |
42 | 42 |
43 /** | 43 /** |
44 * End of array terminator for the extra parameter to | 44 * End of array terminator for the extra parameter to |
45 * ::cuLaunchKernel | 45 * ::cuLaunchKernel |
46 */ | 46 */ |
47 #define GRAAL_CU_LAUNCH_PARAM_END ((void *) 0x00) | 47 #define GRAAL_CU_LAUNCH_PARAM_END ((void*) 0x00) |
48 | 48 |
49 /** | 49 /** |
50 * Indicator that the next value in the extra parameter to | 50 * Indicator that the next value in the extra parameter to |
51 * ::cuLaunchKernel will be a pointer to a buffer containing all kernel | 51 * ::cuLaunchKernel will be a pointer to a buffer containing all kernel |
52 * parameters used for launching kernel f. This buffer needs to | 52 * parameters used for launching kernel f. This buffer needs to |
53 * honor all alignment/padding requirements of the individual parameters. | 53 * honor all alignment/padding requirements of the individual parameters. |
54 * If ::GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE is not also specified in the | 54 * If ::GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE is not also specified in the |
55 * extra array, then ::GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER will have no | 55 * extra array, then ::GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER will have no |
56 * effect. | 56 * effect. |
57 */ | 57 */ |
58 #define GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER ((void *) 0x01) | 58 #define GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER ((void*) 0x01) |
59 | 59 |
60 /** | 60 /** |
61 * Indicator that the next value in the extra parameter to | 61 * Indicator that the next value in the extra parameter to |
62 * ::cuLaunchKernel will be a pointer to a size_t which contains the | 62 * ::cuLaunchKernel will be a pointer to a size_t which contains the |
63 * size of the buffer specified with ::GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER. | 63 * size of the buffer specified with ::GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER. |
64 * It is required that ::GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER also be specified | 64 * It is required that ::GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER also be specified |
65 * in the extra array if the value associated with | 65 * in the extra array if the value associated with |
66 * ::GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE is not zero. | 66 * ::GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE is not zero. |
67 */ | 67 */ |
68 #define GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE ((void *) 0x02) | 68 #define GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE ((void*) 0x02) |
69 | 69 |
70 class Ptx { | 70 class Ptx { |
71 friend class gpu; | 71 friend class gpu; |
72 | 72 |
73 protected: | 73 protected: |
74 static bool probe_linkage(); | 74 static bool probe_linkage(); |
75 static bool initialize_gpu(); | 75 static bool initialize_gpu(); |
76 static void * generate_kernel(unsigned char *code, int code_len, const char *name); | 76 static void * generate_kernel(unsigned char *code, int code_len, const char *name); |
77 static bool execute_kernel(address kernel, JavaCallArguments *); | 77 static bool execute_kernel(address kernel, PTXKernelArguments & ka, JavaValue &ret); |
78 | 78 public: |
79 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) | |
80 typedef unsigned long long CUdeviceptr; | |
81 #else | |
82 typedef unsigned int CUdeviceptr; | |
83 #endif | |
84 | |
79 private: | 85 private: |
80 typedef int (*cuda_cu_init_func_t)(unsigned int); | 86 typedef int (*cuda_cu_init_func_t)(unsigned int); |
81 typedef int (*cuda_cu_ctx_create_func_t)(void *, int, int); | 87 typedef int (*cuda_cu_ctx_create_func_t)(void*, int, int); |
82 typedef int (*cuda_cu_ctx_detach_func_t)(int *); | 88 typedef int (*cuda_cu_ctx_destroy_func_t)(void*); |
83 typedef int (*cuda_cu_ctx_synchronize_func_t)(int *); | 89 typedef int (*cuda_cu_ctx_synchronize_func_t)(void); |
84 typedef int (*cuda_cu_device_get_count_func_t)(int *); | 90 typedef int (*cuda_cu_device_get_count_func_t)(int*); |
85 typedef int (*cuda_cu_device_get_name_func_t)(char *, int, int); | 91 typedef int (*cuda_cu_device_get_name_func_t)(char*, int, int); |
86 typedef int (*cuda_cu_device_get_func_t)(int *, int); | 92 typedef int (*cuda_cu_device_get_func_t)(int*, int); |
87 typedef int (*cuda_cu_device_compute_capability_func_t)(int *, int *, int); | 93 typedef int (*cuda_cu_device_compute_capability_func_t)(int*, int*, int); |
88 typedef int (*cuda_cu_device_get_attribute_func_t)(int *, int, int); | 94 typedef int (*cuda_cu_device_get_attribute_func_t)(int*, int, int); |
89 typedef int (*cuda_cu_launch_kernel_func_t)(void *, | 95 typedef int (*cuda_cu_launch_kernel_func_t)(struct CUfunc_st*, |
90 unsigned int, unsigned int, unsigned int, | 96 unsigned int, unsigned int, unsigned int, |
91 unsigned int, unsigned int, unsigned int, | 97 unsigned int, unsigned int, unsigned int, |
92 unsigned int, void *, void **, void **); | 98 unsigned int, void*, void**, void**); |
93 typedef int (*cuda_cu_module_get_function_func_t)(void *, void *, const char *); | 99 typedef int (*cuda_cu_module_get_function_func_t)(void*, void*, const char*); |
94 typedef int (*cuda_cu_module_load_data_ex_func_t)(void *, void *, unsigned int, void *, void **); | 100 typedef int (*cuda_cu_module_load_data_ex_func_t)(void*, void*, unsigned int, void*, void**); |
101 typedef int (*cuda_cu_memalloc_func_t)(void*, unsigned int); | |
102 typedef int (*cuda_cu_memfree_func_t)(gpu::Ptx::CUdeviceptr); | |
103 typedef int (*cuda_cu_memcpy_htod_func_t)(gpu::Ptx::CUdeviceptr, const void*, unsigned int); | |
104 typedef int (*cuda_cu_memcpy_dtoh_func_t)(const void*, gpu::Ptx::CUdeviceptr, unsigned int); | |
95 | 105 |
106 public: | |
96 static cuda_cu_init_func_t _cuda_cu_init; | 107 static cuda_cu_init_func_t _cuda_cu_init; |
97 static cuda_cu_ctx_create_func_t _cuda_cu_ctx_create; | 108 static cuda_cu_ctx_create_func_t _cuda_cu_ctx_create; |
98 static cuda_cu_ctx_detach_func_t _cuda_cu_ctx_detach; | 109 static cuda_cu_ctx_destroy_func_t _cuda_cu_ctx_destroy; |
99 static cuda_cu_ctx_synchronize_func_t _cuda_cu_ctx_synchronize; | 110 static cuda_cu_ctx_synchronize_func_t _cuda_cu_ctx_synchronize; |
100 static cuda_cu_device_get_count_func_t _cuda_cu_device_get_count; | 111 static cuda_cu_device_get_count_func_t _cuda_cu_device_get_count; |
101 static cuda_cu_device_get_name_func_t _cuda_cu_device_get_name; | 112 static cuda_cu_device_get_name_func_t _cuda_cu_device_get_name; |
102 static cuda_cu_device_get_func_t _cuda_cu_device_get; | 113 static cuda_cu_device_get_func_t _cuda_cu_device_get; |
103 static cuda_cu_device_compute_capability_func_t _cuda_cu_device_compute_capability; /* Deprecated as of CUDA 5.0 */ | 114 static cuda_cu_device_compute_capability_func_t _cuda_cu_device_compute_capability; /* Deprecated as of CUDA 5.0 */ |
104 static cuda_cu_device_get_attribute_func_t _cuda_cu_device_get_attribute; | 115 static cuda_cu_device_get_attribute_func_t _cuda_cu_device_get_attribute; |
105 static cuda_cu_launch_kernel_func_t _cuda_cu_launch_kernel; | 116 static cuda_cu_launch_kernel_func_t _cuda_cu_launch_kernel; |
106 static cuda_cu_module_get_function_func_t _cuda_cu_module_get_function; | 117 static cuda_cu_module_get_function_func_t _cuda_cu_module_get_function; |
107 static cuda_cu_module_load_data_ex_func_t _cuda_cu_module_load_data_ex; | 118 static cuda_cu_module_load_data_ex_func_t _cuda_cu_module_load_data_ex; |
119 static cuda_cu_memalloc_func_t _cuda_cu_memalloc; | |
120 static cuda_cu_memfree_func_t _cuda_cu_memfree; | |
121 static cuda_cu_memcpy_htod_func_t _cuda_cu_memcpy_htod; | |
122 static cuda_cu_memcpy_dtoh_func_t _cuda_cu_memcpy_dtoh; | |
108 | 123 |
109 protected: | 124 protected: |
110 static void * _device_context; | 125 static void* _device_context; |
126 static int _cu_device; | |
111 }; | 127 }; |
112 #endif // GPU_PTX_HPP | 128 #endif // GPU_PTX_HPP |