comparison src/gpu/ptx/vm/gpu_ptx.hpp @ 11485:49bb1bc983c6

Implement several missing PTX codegen features; return value capture and method args passing of java method executed on GPU.
author bharadwaj
date Fri, 30 Aug 2013 16:39:05 -0400
parents 4a6b03a16808
children c99e65785936
comparison
equal deleted inserted replaced
11484:2aac62d79af4 11485:49bb1bc983c6
23 */ 23 */
24 24
25 #ifndef GPU_PTX_HPP 25 #ifndef GPU_PTX_HPP
26 #define GPU_PTX_HPP 26 #define GPU_PTX_HPP
27 27
28 /* 28 /*
29 * Some useful macro definitions from publicly available cuda.h. 29 * Some useful macro definitions from publicly available cuda.h.
30 * These definitions are for convenience. 30 * These definitions are for convenience.
31 */ 31 */
32 #define GRAAL_CUDA_SUCCESS 0 32 #define GRAAL_CUDA_SUCCESS 0
33 /**< Device shares a unified address space with the host */ 33 /**< Device shares a unified address space with the host */
42 42
43 /** 43 /**
44 * End of array terminator for the extra parameter to 44 * End of array terminator for the extra parameter to
45 * ::cuLaunchKernel 45 * ::cuLaunchKernel
46 */ 46 */
47 #define GRAAL_CU_LAUNCH_PARAM_END ((void *) 0x00) 47 #define GRAAL_CU_LAUNCH_PARAM_END ((void*) 0x00)
48 48
49 /** 49 /**
50 * Indicator that the next value in the extra parameter to 50 * Indicator that the next value in the extra parameter to
51 * ::cuLaunchKernel will be a pointer to a buffer containing all kernel 51 * ::cuLaunchKernel will be a pointer to a buffer containing all kernel
52 * parameters used for launching kernel f. This buffer needs to 52 * parameters used for launching kernel f. This buffer needs to
53 * honor all alignment/padding requirements of the individual parameters. 53 * honor all alignment/padding requirements of the individual parameters.
54 * If ::GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE is not also specified in the 54 * If ::GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE is not also specified in the
55 * extra array, then ::GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER will have no 55 * extra array, then ::GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER will have no
56 * effect. 56 * effect.
57 */ 57 */
58 #define GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER ((void *) 0x01) 58 #define GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER ((void*) 0x01)
59 59
60 /** 60 /**
61 * Indicator that the next value in the extra parameter to 61 * Indicator that the next value in the extra parameter to
62 * ::cuLaunchKernel will be a pointer to a size_t which contains the 62 * ::cuLaunchKernel will be a pointer to a size_t which contains the
63 * size of the buffer specified with ::GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER. 63 * size of the buffer specified with ::GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER.
64 * It is required that ::GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER also be specified 64 * It is required that ::GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER also be specified
65 * in the extra array if the value associated with 65 * in the extra array if the value associated with
66 * ::GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE is not zero. 66 * ::GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE is not zero.
67 */ 67 */
68 #define GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE ((void *) 0x02) 68 #define GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE ((void*) 0x02)
69 69
70 class Ptx { 70 class Ptx {
71 friend class gpu; 71 friend class gpu;
72 72
73 protected: 73 protected:
74 static bool probe_linkage(); 74 static bool probe_linkage();
75 static bool initialize_gpu(); 75 static bool initialize_gpu();
76 static void * generate_kernel(unsigned char *code, int code_len, const char *name); 76 static void * generate_kernel(unsigned char *code, int code_len, const char *name);
77 static bool execute_kernel(address kernel, JavaCallArguments *); 77 static bool execute_kernel(address kernel, PTXKernelArguments & ka, JavaValue &ret);
78 78 public:
79 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
80 typedef unsigned long long CUdeviceptr;
81 #else
82 typedef unsigned int CUdeviceptr;
83 #endif
84
79 private: 85 private:
80 typedef int (*cuda_cu_init_func_t)(unsigned int); 86 typedef int (*cuda_cu_init_func_t)(unsigned int);
81 typedef int (*cuda_cu_ctx_create_func_t)(void *, int, int); 87 typedef int (*cuda_cu_ctx_create_func_t)(void*, int, int);
82 typedef int (*cuda_cu_ctx_detach_func_t)(int *); 88 typedef int (*cuda_cu_ctx_destroy_func_t)(void*);
83 typedef int (*cuda_cu_ctx_synchronize_func_t)(int *); 89 typedef int (*cuda_cu_ctx_synchronize_func_t)(void);
84 typedef int (*cuda_cu_device_get_count_func_t)(int *); 90 typedef int (*cuda_cu_device_get_count_func_t)(int*);
85 typedef int (*cuda_cu_device_get_name_func_t)(char *, int, int); 91 typedef int (*cuda_cu_device_get_name_func_t)(char*, int, int);
86 typedef int (*cuda_cu_device_get_func_t)(int *, int); 92 typedef int (*cuda_cu_device_get_func_t)(int*, int);
87 typedef int (*cuda_cu_device_compute_capability_func_t)(int *, int *, int); 93 typedef int (*cuda_cu_device_compute_capability_func_t)(int*, int*, int);
88 typedef int (*cuda_cu_device_get_attribute_func_t)(int *, int, int); 94 typedef int (*cuda_cu_device_get_attribute_func_t)(int*, int, int);
89 typedef int (*cuda_cu_launch_kernel_func_t)(void *, 95 typedef int (*cuda_cu_launch_kernel_func_t)(struct CUfunc_st*,
90 unsigned int, unsigned int, unsigned int, 96 unsigned int, unsigned int, unsigned int,
91 unsigned int, unsigned int, unsigned int, 97 unsigned int, unsigned int, unsigned int,
92 unsigned int, void *, void **, void **); 98 unsigned int, void*, void**, void**);
93 typedef int (*cuda_cu_module_get_function_func_t)(void *, void *, const char *); 99 typedef int (*cuda_cu_module_get_function_func_t)(void*, void*, const char*);
94 typedef int (*cuda_cu_module_load_data_ex_func_t)(void *, void *, unsigned int, void *, void **); 100 typedef int (*cuda_cu_module_load_data_ex_func_t)(void*, void*, unsigned int, void*, void**);
101 typedef int (*cuda_cu_memalloc_func_t)(void*, unsigned int);
102 typedef int (*cuda_cu_memfree_func_t)(gpu::Ptx::CUdeviceptr);
103 typedef int (*cuda_cu_memcpy_htod_func_t)(gpu::Ptx::CUdeviceptr, const void*, unsigned int);
104 typedef int (*cuda_cu_memcpy_dtoh_func_t)(const void*, gpu::Ptx::CUdeviceptr, unsigned int);
95 105
106 public:
96 static cuda_cu_init_func_t _cuda_cu_init; 107 static cuda_cu_init_func_t _cuda_cu_init;
97 static cuda_cu_ctx_create_func_t _cuda_cu_ctx_create; 108 static cuda_cu_ctx_create_func_t _cuda_cu_ctx_create;
98 static cuda_cu_ctx_detach_func_t _cuda_cu_ctx_detach; 109 static cuda_cu_ctx_destroy_func_t _cuda_cu_ctx_destroy;
99 static cuda_cu_ctx_synchronize_func_t _cuda_cu_ctx_synchronize; 110 static cuda_cu_ctx_synchronize_func_t _cuda_cu_ctx_synchronize;
100 static cuda_cu_device_get_count_func_t _cuda_cu_device_get_count; 111 static cuda_cu_device_get_count_func_t _cuda_cu_device_get_count;
101 static cuda_cu_device_get_name_func_t _cuda_cu_device_get_name; 112 static cuda_cu_device_get_name_func_t _cuda_cu_device_get_name;
102 static cuda_cu_device_get_func_t _cuda_cu_device_get; 113 static cuda_cu_device_get_func_t _cuda_cu_device_get;
103 static cuda_cu_device_compute_capability_func_t _cuda_cu_device_compute_capability; /* Deprecated as of CUDA 5.0 */ 114 static cuda_cu_device_compute_capability_func_t _cuda_cu_device_compute_capability; /* Deprecated as of CUDA 5.0 */
104 static cuda_cu_device_get_attribute_func_t _cuda_cu_device_get_attribute; 115 static cuda_cu_device_get_attribute_func_t _cuda_cu_device_get_attribute;
105 static cuda_cu_launch_kernel_func_t _cuda_cu_launch_kernel; 116 static cuda_cu_launch_kernel_func_t _cuda_cu_launch_kernel;
106 static cuda_cu_module_get_function_func_t _cuda_cu_module_get_function; 117 static cuda_cu_module_get_function_func_t _cuda_cu_module_get_function;
107 static cuda_cu_module_load_data_ex_func_t _cuda_cu_module_load_data_ex; 118 static cuda_cu_module_load_data_ex_func_t _cuda_cu_module_load_data_ex;
119 static cuda_cu_memalloc_func_t _cuda_cu_memalloc;
120 static cuda_cu_memfree_func_t _cuda_cu_memfree;
121 static cuda_cu_memcpy_htod_func_t _cuda_cu_memcpy_htod;
122 static cuda_cu_memcpy_dtoh_func_t _cuda_cu_memcpy_dtoh;
108 123
109 protected: 124 protected:
110 static void * _device_context; 125 static void* _device_context;
126 static int _cu_device;
111 }; 127 };
112 #endif // GPU_PTX_HPP 128 #endif // GPU_PTX_HPP