comparison src/gpu/ptx/vm/ptxKernelArguments.cpp @ 12566:c17bfad2fa98

Merge.
author Christian Humer <christian.humer@gmail.com>
date Thu, 24 Oct 2013 15:56:08 +0200
parents 11b086b1bae4
children 1a7e7011a341
comparison
equal deleted inserted replaced
12565:e47f373499ec 12566:c17bfad2fa98
30 gpu::Ptx::cuda_cu_memcpy_htod_func_t gpu::Ptx::_cuda_cu_memcpy_htod; 30 gpu::Ptx::cuda_cu_memcpy_htod_func_t gpu::Ptx::_cuda_cu_memcpy_htod;
31 31
32 // Get next java argument 32 // Get next java argument
33 oop PTXKernelArguments::next_arg(BasicType expectedType) { 33 oop PTXKernelArguments::next_arg(BasicType expectedType) {
34 assert(_index < _args->length(), "out of bounds"); 34 assert(_index < _args->length(), "out of bounds");
35
36 oop arg = ((objArrayOop) (_args))->obj_at(_index++); 35 oop arg = ((objArrayOop) (_args))->obj_at(_index++);
37 assert(expectedType == T_OBJECT || 36 assert(expectedType == T_OBJECT ||
38 java_lang_boxing_object::is_instance(arg, expectedType), "arg type mismatch"); 37 java_lang_boxing_object::is_instance(arg, expectedType), "arg type mismatch");
39
40 return arg; 38 return arg;
41 } 39 }
42 40
43 void PTXKernelArguments::do_int() { 41 void PTXKernelArguments::do_int() {
44 if (is_after_invocation()) { 42 // If the parameter is a return value,
45 return; 43 if (is_return_type()) {
46 } 44 if (is_kernel_arg_setup()) {
47 // If the parameter is a return value, 45 // Allocate device memory for T_INT return value pointer on device. Size in bytes
48 if (is_return_type()) { 46 int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_INT_BYTE_SIZE);
49 // Allocate device memory for T_INT return value pointer on device. Size in bytes 47 if (status != GRAAL_CUDA_SUCCESS) {
50 int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_INT_BYTE_SIZE); 48 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
51 if (status != GRAAL_CUDA_SUCCESS) { 49 _success = false;
52 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); 50 return;
53 _success = false; 51 }
54 return; 52 // Push _dev_return_value to _kernelBuffer
55 } 53 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value;
56 // Push _return_value_ptr to _kernelBuffer 54 }
57 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; 55 _bufferOffset += sizeof(_dev_return_value);
58 _bufferOffset += sizeof(_return_value_ptr); 56 } else {
59 } else { 57 // Get the next java argument and its value which should be a T_INT
60 // Get the next java argument and its value which should be a T_INT 58 oop arg = next_arg(T_INT);
61 oop arg = next_arg(T_INT); 59 // Copy the java argument value to kernelArgBuffer
62 // Copy the java argument value to kernelArgBuffer 60 jvalue intval;
63 jvalue intval; 61 if (java_lang_boxing_object::get_value(arg, &intval) != T_INT) {
64 if (java_lang_boxing_object::get_value(arg, &intval) != T_INT) { 62 tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT");
65 tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT"); 63 _success = false;
66 _success = false; 64 return;
67 return; 65 }
68 } 66 if (is_kernel_arg_setup()) {
69 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = intval.i; 67 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = intval.i;
70 _bufferOffset += sizeof(intval.i); 68 }
71 } 69 // Advance _bufferOffset
72 return; 70 _bufferOffset += sizeof(intval.i);
71 }
72 return;
73 } 73 }
74 74
75 void PTXKernelArguments::do_float() { 75 void PTXKernelArguments::do_float() {
76 if (is_after_invocation()) { 76 // If the parameter is a return value,
77 return; 77 if (is_return_type()) {
78 } 78 if (is_kernel_arg_setup()) {
79 // If the parameter is a return value, 79 // Allocate device memory for T_INT return value pointer on device. Size in bytes
80 if (is_return_type()) { 80 int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_FLOAT_BYTE_SIZE);
81 // Allocate device memory for T_INT return value pointer on device. Size in bytes 81 if (status != GRAAL_CUDA_SUCCESS) {
82 int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_FLOAT_BYTE_SIZE); 82 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
83 if (status != GRAAL_CUDA_SUCCESS) { 83 _success = false;
84 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); 84 return;
85 _success = false; 85 }
86 return; 86 // Push _dev_return_value to _kernelBuffer
87 } 87 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value;
88 // Push _return_value_ptr to _kernelBuffer 88 }
89 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; 89 // Advance _bufferOffset
90 _bufferOffset += sizeof(_return_value_ptr); 90 _bufferOffset += sizeof(_dev_return_value);
91 } else { 91 } else {
92 // Get the next java argument and its value which should be a T_INT 92 // Get the next java argument and its value which should be a T_FLOAT
93 oop arg = next_arg(T_FLOAT); 93 oop arg = next_arg(T_FLOAT);
94 // Copy the java argument value to kernelArgBuffer 94 // Copy the java argument value to kernelArgBuffer
95 jvalue floatval; 95 jvalue floatval;
96 if (java_lang_boxing_object::get_value(arg, &floatval) != T_FLOAT) { 96 if (java_lang_boxing_object::get_value(arg, &floatval) != T_FLOAT) {
97 tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT"); 97 tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_FLOAT");
98 _success = false; 98 _success = false;
99 return; 99 return;
100 } 100 }
101 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = (gpu::Ptx::CUdeviceptr) floatval.f; 101 if (is_kernel_arg_setup()) {
102 _bufferOffset += sizeof(floatval.f); 102 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = (gpu::Ptx::CUdeviceptr) floatval.f;
103 } 103 }
104 return; 104 // Advance _bufferOffset
105 _bufferOffset += sizeof(floatval.f);
106 }
107 return;
105 } 108 }
106 109
107 void PTXKernelArguments::do_double() { 110 void PTXKernelArguments::do_double() {
108 if (is_after_invocation()) { 111 // If the parameter is a return value,
109 return; 112 jvalue doubleval;
110 } 113 if (is_return_type()) {
111 // If the parameter is a return value, 114 if (is_kernel_arg_setup()) {
112 jvalue doubleval; 115 // Allocate device memory for T_INT return value pointer on device. Size in bytes
113 if (is_return_type()) { 116 int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_DOUBLE_BYTE_SIZE);
114 // Allocate device memory for T_INT return value pointer on device. Size in bytes 117 if (status != GRAAL_CUDA_SUCCESS) {
115 int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_DOUBLE_BYTE_SIZE); 118 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
116 if (status != GRAAL_CUDA_SUCCESS) { 119 _success = false;
117 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); 120 return;
118 _success = false; 121 }
119 return; 122 // Push _dev_return_value to _kernelBuffer
120 } 123 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value;
121 // Push _return_value_ptr to _kernelBuffer 124 }
122 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; 125 // Advance _bufferOffset
123 // _bufferOffset += sizeof(_return_value_ptr); 126 _bufferOffset += sizeof(doubleval.d);
124 _bufferOffset += sizeof(doubleval.d); 127 } else {
125 } else { 128 // Get the next java argument and its value which should be a T_INT
126 // Get the next java argument and its value which should be a T_INT 129 oop arg = next_arg(T_FLOAT);
127 oop arg = next_arg(T_FLOAT); 130 // Copy the java argument value to kernelArgBuffer
128 // Copy the java argument value to kernelArgBuffer 131 if (java_lang_boxing_object::get_value(arg, &doubleval) != T_DOUBLE) {
129 if (java_lang_boxing_object::get_value(arg, &doubleval) != T_DOUBLE) { 132 tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT");
130 tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT"); 133 _success = false;
131 _success = false; 134 return;
132 return; 135 }
133 } 136 if (is_kernel_arg_setup()) {
134 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = (gpu::Ptx::CUdeviceptr) doubleval.d; 137 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = (gpu::Ptx::CUdeviceptr) doubleval.d;
135 _bufferOffset += sizeof(doubleval.d); 138 }
136 } 139 // Advance _bufferOffset
137 return; 140 _bufferOffset += sizeof(doubleval.d);
141 }
142 return;
138 } 143 }
139 144
140 void PTXKernelArguments::do_long() { 145 void PTXKernelArguments::do_long() {
141 if (is_after_invocation()) { 146 // If the parameter is a return value,
142 return; 147 if (is_return_type()) {
143 } 148 if (is_kernel_arg_setup()) {
144 // If the parameter is a return value, 149 // Allocate device memory for T_LONG return value pointer on device. Size in bytes
145 if (is_return_type()) { 150 int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_LONG_BYTE_SIZE);
146 // Allocate device memory for T_LONG return value pointer on device. Size in bytes 151 if (status != GRAAL_CUDA_SUCCESS) {
147 int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_LONG_BYTE_SIZE); 152 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
148 if (status != GRAAL_CUDA_SUCCESS) { 153 _success = false;
149 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); 154 return;
150 _success = false; 155 }
151 return; 156 // Push _dev_return_value to _kernelBuffer
152 } 157 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value;
153 // Push _return_value_ptr to _kernelBuffer 158 }
154 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; 159 // Advance _bufferOffset
155 _bufferOffset += sizeof(_return_value_ptr); 160 _bufferOffset += sizeof(_dev_return_value);
156 } else { 161 } else {
157 // Get the next java argument and its value which should be a T_LONG 162 // Get the next java argument and its value which should be a T_LONG
158 oop arg = next_arg(T_LONG); 163 oop arg = next_arg(T_LONG);
159 // Copy the java argument value to kernelArgBuffer 164 // Copy the java argument value to kernelArgBuffer
160 jvalue val; 165 jvalue val;
161 if (java_lang_boxing_object::get_value(arg, &val) != T_LONG) { 166 if (java_lang_boxing_object::get_value(arg, &val) != T_LONG) {
162 tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_LONG"); 167 tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_LONG");
163 _success = false; 168 _success = false;
164 return; 169 return;
165 } 170 }
166 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.j; 171 if (is_kernel_arg_setup()) {
172 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.j;
173 }
174 // Advance _bufferOffset
167 _bufferOffset += sizeof(val.j); 175 _bufferOffset += sizeof(val.j);
168 } 176 }
169 return; 177 return;
170 } 178 }
171 179
172 void PTXKernelArguments::do_byte() { 180 void PTXKernelArguments::do_byte() {
173 if (is_after_invocation()) { 181 // If the parameter is a return value,
174 return; 182 if (is_return_type()) {
175 } 183 if (is_kernel_arg_setup()) {
176 // If the parameter is a return value, 184 // Allocate device memory for T_BYTE return value pointer on device. Size in bytes
177 if (is_return_type()) { 185 int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_BYTE_SIZE);
178 // Allocate device memory for T_BYTE return value pointer on device. Size in bytes 186 if (status != GRAAL_CUDA_SUCCESS) {
179 int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_BYTE_SIZE); 187 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
180 if (status != GRAAL_CUDA_SUCCESS) { 188 _success = false;
181 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); 189 return;
182 _success = false; 190 }
183 return; 191 // Push _dev_return_value to _kernelBuffer
184 } 192 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value;
185 // Push _return_value_ptr to _kernelBuffer 193 }
186 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; 194 // Advance _bufferOffset
187 _bufferOffset += sizeof(_return_value_ptr); 195 _bufferOffset += sizeof(_dev_return_value);
188 } else { 196 } else {
189 // Get the next java argument and its value which should be a T_BYTE 197 // Get the next java argument and its value which should be a T_BYTE
190 oop arg = next_arg(T_BYTE); 198 oop arg = next_arg(T_BYTE);
191 // Copy the java argument value to kernelArgBuffer 199 // Copy the java argument value to kernelArgBuffer
192 jvalue val; 200 jvalue val;
193 if (java_lang_boxing_object::get_value(arg, &val) != T_BYTE) { 201 if (java_lang_boxing_object::get_value(arg, &val) != T_BYTE) {
194 tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE"); 202 tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE");
195 _success = false; 203 _success = false;
196 return; 204 return;
197 } 205 }
198 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.b; 206 if (is_kernel_arg_setup()) {
199 _bufferOffset += sizeof(val.b); 207 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.b;
200 } 208 }
201 return; 209 // Advance _bufferOffset
210 _bufferOffset += sizeof(val.b);
211 }
212 return;
202 } 213 }
203 214
204 void PTXKernelArguments::do_bool() { 215 void PTXKernelArguments::do_bool() {
205 if (is_after_invocation()) { 216 // If the parameter is a return value,
206 return; 217 if (is_return_type()) {
207 } 218 if (is_kernel_arg_setup()) {
208 // If the parameter is a return value, 219 // Allocate device memory for T_BYTE return value pointer on device. Size in bytes
209 if (is_return_type()) { 220 int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_BOOLEAN_SIZE);
210 // Allocate device memory for T_BYTE return value pointer on device. Size in bytes 221 if (status != GRAAL_CUDA_SUCCESS) {
211 int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_BOOLEAN_SIZE); 222 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
212 if (status != GRAAL_CUDA_SUCCESS) { 223 _success = false;
213 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); 224 return;
214 _success = false; 225 }
215 return; 226 // Push _dev_return_value to _kernelBuffer
216 } 227 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value;
217 // Push _return_value_ptr to _kernelBuffer 228 }
218 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; 229 // Advance _bufferOffset
219 _bufferOffset += sizeof(_return_value_ptr); 230 _bufferOffset += sizeof(_dev_return_value);
220 } else { 231 } else {
221 // Get the next java argument and its value which should be a T_BYTE 232 // Get the next java argument and its value which should be a T_BYTE
222 oop arg = next_arg(T_BYTE); 233 oop arg = next_arg(T_BYTE);
223 // Copy the java argument value to kernelArgBuffer 234 // Copy the java argument value to kernelArgBuffer
224 jvalue val; 235 jvalue val;
225 if (java_lang_boxing_object::get_value(arg, &val) != T_BOOLEAN) { 236 if (java_lang_boxing_object::get_value(arg, &val) != T_BOOLEAN) {
226 tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE"); 237 tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE");
227 _success = false; 238 _success = false;
228 return; 239 return;
229 } 240 }
230 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.z; 241 if (is_kernel_arg_setup()) {
231 _bufferOffset += sizeof(val.z); 242 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.z;
232 } 243 }
233 return; 244 // Advance _bufferOffset
245 _bufferOffset += sizeof(val.z);
246 }
247 return;
234 } 248 }
235 249
236 void PTXKernelArguments::do_array(int begin, int end) { 250 void PTXKernelArguments::do_array(int begin, int end) {
237 gpu::Ptx::CUdeviceptr _array_ptr; 251 // Get the next java argument and its value which should be a T_ARRAY
238 int status; 252 oop arg = next_arg(T_OBJECT);
239 253 assert(arg->is_array(), "argument value not an array");
240 // Get the next java argument and its value which should be a T_ARRAY 254 // Size of array argument
241 oop arg = next_arg(T_OBJECT); 255 int argSize = arg->size() * HeapWordSize;
242 int array_size = arg->size() * HeapWordSize; 256 // Device pointer to array argument.
243 257 gpu::Ptx::CUdeviceptr arrayArgOnDev;
244 if (is_after_invocation()) { 258 int status;
245 _array_ptr = *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]); 259
246 status = gpu::Ptx::_cuda_cu_memcpy_dtoh(arg, _array_ptr, array_size); 260 if (is_kernel_arg_setup()) {
247 if (status != GRAAL_CUDA_SUCCESS) { 261 // Allocate device memory for array argument on device. Size in bytes
248 tty->print_cr("[CUDA] *** Error (%d) Failed to copy array argument to host", status); 262 status = gpu::Ptx::_cuda_cu_memalloc(&arrayArgOnDev, argSize);
249 _success = false;
250 return;
251 } else {
252 // tty->print_cr("device: %x host: %x size: %d", _array_ptr, arg, array_size);
253 }
254 return;
255 }
256 // Allocate device memory for T_ARRAY return value pointer on device. Size in bytes
257 status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, array_size);
258 if (status != GRAAL_CUDA_SUCCESS) { 263 if (status != GRAAL_CUDA_SUCCESS) {
259 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); 264 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for array argument on device",
260 _success = false; 265 status);
261 return; 266 _success = false;
262 } 267 return;
263 status = gpu::Ptx::_cuda_cu_memcpy_htod(_return_value_ptr, arg, array_size); 268 }
269 // Copy array argument to device
270 status = gpu::Ptx::_cuda_cu_memcpy_htod(arrayArgOnDev, arg, argSize);
264 if (status != GRAAL_CUDA_SUCCESS) { 271 if (status != GRAAL_CUDA_SUCCESS) {
265 tty->print_cr("[CUDA] *** Error (%d) Failed to copy array to device argument", status); 272 tty->print_cr("[CUDA] *** Error (%d) Failed to copy array argument content to device memory",
266 _success = false; 273 status);
267 return; 274 _success = false;
268 } else { 275 return;
269 // tty->print_cr("host: %x device: %x size: %d", arg, _return_value_ptr, array_size); 276 }
270 } 277
271 // Push _return_value_ptr to _kernelBuffer 278 // Push device array argument to _kernelBuffer
272 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; 279 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = arrayArgOnDev;
273 _bufferOffset += sizeof(_return_value_ptr); 280 } else {
274 return; 281 arrayArgOnDev = *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]);
282 status = gpu::Ptx::_cuda_cu_memcpy_dtoh(arg, arrayArgOnDev, argSize);
283 if (status != GRAAL_CUDA_SUCCESS) {
284 tty->print_cr("[CUDA] *** Error (%d) Failed to copy array argument to host", status);
285 _success = false;
286 return;
287 }
288 }
289
290 // Advance _bufferOffset
291 _bufferOffset += sizeof(arrayArgOnDev);
292 return;
275 } 293 }
276 294
277 void PTXKernelArguments::do_void() { 295 void PTXKernelArguments::do_void() {
278 return; 296 return;
279 } 297 }
280 298
281 // TODO implement other do_* 299 // TODO implement other do_*