Mercurial > hg > truffle
comparison src/gpu/ptx/vm/ptxKernelArguments.cpp @ 12566:c17bfad2fa98
Merge.
author | Christian Humer <christian.humer@gmail.com> |
---|---|
date | Thu, 24 Oct 2013 15:56:08 +0200 |
parents | 11b086b1bae4 |
children | 1a7e7011a341 |
comparison
equal
deleted
inserted
replaced
12565:e47f373499ec | 12566:c17bfad2fa98 |
---|---|
30 gpu::Ptx::cuda_cu_memcpy_htod_func_t gpu::Ptx::_cuda_cu_memcpy_htod; | 30 gpu::Ptx::cuda_cu_memcpy_htod_func_t gpu::Ptx::_cuda_cu_memcpy_htod; |
31 | 31 |
32 // Get next java argument | 32 // Get next java argument |
33 oop PTXKernelArguments::next_arg(BasicType expectedType) { | 33 oop PTXKernelArguments::next_arg(BasicType expectedType) { |
34 assert(_index < _args->length(), "out of bounds"); | 34 assert(_index < _args->length(), "out of bounds"); |
35 | |
36 oop arg = ((objArrayOop) (_args))->obj_at(_index++); | 35 oop arg = ((objArrayOop) (_args))->obj_at(_index++); |
37 assert(expectedType == T_OBJECT || | 36 assert(expectedType == T_OBJECT || |
38 java_lang_boxing_object::is_instance(arg, expectedType), "arg type mismatch"); | 37 java_lang_boxing_object::is_instance(arg, expectedType), "arg type mismatch"); |
39 | |
40 return arg; | 38 return arg; |
41 } | 39 } |
42 | 40 |
43 void PTXKernelArguments::do_int() { | 41 void PTXKernelArguments::do_int() { |
44 if (is_after_invocation()) { | 42 // If the parameter is a return value, |
45 return; | 43 if (is_return_type()) { |
46 } | 44 if (is_kernel_arg_setup()) { |
47 // If the parameter is a return value, | 45 // Allocate device memory for T_INT return value pointer on device. Size in bytes |
48 if (is_return_type()) { | 46 int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_INT_BYTE_SIZE); |
49 // Allocate device memory for T_INT return value pointer on device. Size in bytes | 47 if (status != GRAAL_CUDA_SUCCESS) { |
50 int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_INT_BYTE_SIZE); | 48 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); |
51 if (status != GRAAL_CUDA_SUCCESS) { | 49 _success = false; |
52 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); | 50 return; |
53 _success = false; | 51 } |
54 return; | 52 // Push _dev_return_value to _kernelBuffer |
55 } | 53 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value; |
56 // Push _return_value_ptr to _kernelBuffer | 54 } |
57 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; | 55 _bufferOffset += sizeof(_dev_return_value); |
58 _bufferOffset += sizeof(_return_value_ptr); | 56 } else { |
59 } else { | 57 // Get the next java argument and its value which should be a T_INT |
60 // Get the next java argument and its value which should be a T_INT | 58 oop arg = next_arg(T_INT); |
61 oop arg = next_arg(T_INT); | 59 // Copy the java argument value to kernelArgBuffer |
62 // Copy the java argument value to kernelArgBuffer | 60 jvalue intval; |
63 jvalue intval; | 61 if (java_lang_boxing_object::get_value(arg, &intval) != T_INT) { |
64 if (java_lang_boxing_object::get_value(arg, &intval) != T_INT) { | 62 tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT"); |
65 tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT"); | 63 _success = false; |
66 _success = false; | 64 return; |
67 return; | 65 } |
68 } | 66 if (is_kernel_arg_setup()) { |
69 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = intval.i; | 67 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = intval.i; |
70 _bufferOffset += sizeof(intval.i); | 68 } |
71 } | 69 // Advance _bufferOffset |
72 return; | 70 _bufferOffset += sizeof(intval.i); |
71 } | |
72 return; | |
73 } | 73 } |
74 | 74 |
75 void PTXKernelArguments::do_float() { | 75 void PTXKernelArguments::do_float() { |
76 if (is_after_invocation()) { | 76 // If the parameter is a return value, |
77 return; | 77 if (is_return_type()) { |
78 } | 78 if (is_kernel_arg_setup()) { |
79 // If the parameter is a return value, | 79 // Allocate device memory for T_INT return value pointer on device. Size in bytes |
80 if (is_return_type()) { | 80 int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_FLOAT_BYTE_SIZE); |
81 // Allocate device memory for T_INT return value pointer on device. Size in bytes | 81 if (status != GRAAL_CUDA_SUCCESS) { |
82 int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_FLOAT_BYTE_SIZE); | 82 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); |
83 if (status != GRAAL_CUDA_SUCCESS) { | 83 _success = false; |
84 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); | 84 return; |
85 _success = false; | 85 } |
86 return; | 86 // Push _dev_return_value to _kernelBuffer |
87 } | 87 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value; |
88 // Push _return_value_ptr to _kernelBuffer | 88 } |
89 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; | 89 // Advance _bufferOffset |
90 _bufferOffset += sizeof(_return_value_ptr); | 90 _bufferOffset += sizeof(_dev_return_value); |
91 } else { | 91 } else { |
92 // Get the next java argument and its value which should be a T_INT | 92 // Get the next java argument and its value which should be a T_FLOAT |
93 oop arg = next_arg(T_FLOAT); | 93 oop arg = next_arg(T_FLOAT); |
94 // Copy the java argument value to kernelArgBuffer | 94 // Copy the java argument value to kernelArgBuffer |
95 jvalue floatval; | 95 jvalue floatval; |
96 if (java_lang_boxing_object::get_value(arg, &floatval) != T_FLOAT) { | 96 if (java_lang_boxing_object::get_value(arg, &floatval) != T_FLOAT) { |
97 tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT"); | 97 tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_FLOAT"); |
98 _success = false; | 98 _success = false; |
99 return; | 99 return; |
100 } | 100 } |
101 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = (gpu::Ptx::CUdeviceptr) floatval.f; | 101 if (is_kernel_arg_setup()) { |
102 _bufferOffset += sizeof(floatval.f); | 102 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = (gpu::Ptx::CUdeviceptr) floatval.f; |
103 } | 103 } |
104 return; | 104 // Advance _bufferOffset |
105 _bufferOffset += sizeof(floatval.f); | |
106 } | |
107 return; | |
105 } | 108 } |
106 | 109 |
107 void PTXKernelArguments::do_double() { | 110 void PTXKernelArguments::do_double() { |
108 if (is_after_invocation()) { | 111 // If the parameter is a return value, |
109 return; | 112 jvalue doubleval; |
110 } | 113 if (is_return_type()) { |
111 // If the parameter is a return value, | 114 if (is_kernel_arg_setup()) { |
112 jvalue doubleval; | 115 // Allocate device memory for T_INT return value pointer on device. Size in bytes |
113 if (is_return_type()) { | 116 int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_DOUBLE_BYTE_SIZE); |
114 // Allocate device memory for T_INT return value pointer on device. Size in bytes | 117 if (status != GRAAL_CUDA_SUCCESS) { |
115 int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_DOUBLE_BYTE_SIZE); | 118 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); |
116 if (status != GRAAL_CUDA_SUCCESS) { | 119 _success = false; |
117 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); | 120 return; |
118 _success = false; | 121 } |
119 return; | 122 // Push _dev_return_value to _kernelBuffer |
120 } | 123 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value; |
121 // Push _return_value_ptr to _kernelBuffer | 124 } |
122 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; | 125 // Advance _bufferOffset |
123 // _bufferOffset += sizeof(_return_value_ptr); | 126 _bufferOffset += sizeof(doubleval.d); |
124 _bufferOffset += sizeof(doubleval.d); | 127 } else { |
125 } else { | 128 // Get the next java argument and its value which should be a T_INT |
126 // Get the next java argument and its value which should be a T_INT | 129 oop arg = next_arg(T_FLOAT); |
127 oop arg = next_arg(T_FLOAT); | 130 // Copy the java argument value to kernelArgBuffer |
128 // Copy the java argument value to kernelArgBuffer | 131 if (java_lang_boxing_object::get_value(arg, &doubleval) != T_DOUBLE) { |
129 if (java_lang_boxing_object::get_value(arg, &doubleval) != T_DOUBLE) { | 132 tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT"); |
130 tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT"); | 133 _success = false; |
131 _success = false; | 134 return; |
132 return; | 135 } |
133 } | 136 if (is_kernel_arg_setup()) { |
134 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = (gpu::Ptx::CUdeviceptr) doubleval.d; | 137 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = (gpu::Ptx::CUdeviceptr) doubleval.d; |
135 _bufferOffset += sizeof(doubleval.d); | 138 } |
136 } | 139 // Advance _bufferOffset |
137 return; | 140 _bufferOffset += sizeof(doubleval.d); |
141 } | |
142 return; | |
138 } | 143 } |
139 | 144 |
140 void PTXKernelArguments::do_long() { | 145 void PTXKernelArguments::do_long() { |
141 if (is_after_invocation()) { | 146 // If the parameter is a return value, |
142 return; | 147 if (is_return_type()) { |
143 } | 148 if (is_kernel_arg_setup()) { |
144 // If the parameter is a return value, | 149 // Allocate device memory for T_LONG return value pointer on device. Size in bytes |
145 if (is_return_type()) { | 150 int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_LONG_BYTE_SIZE); |
146 // Allocate device memory for T_LONG return value pointer on device. Size in bytes | 151 if (status != GRAAL_CUDA_SUCCESS) { |
147 int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_LONG_BYTE_SIZE); | 152 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); |
148 if (status != GRAAL_CUDA_SUCCESS) { | 153 _success = false; |
149 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); | 154 return; |
150 _success = false; | 155 } |
151 return; | 156 // Push _dev_return_value to _kernelBuffer |
152 } | 157 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value; |
153 // Push _return_value_ptr to _kernelBuffer | 158 } |
154 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; | 159 // Advance _bufferOffset |
155 _bufferOffset += sizeof(_return_value_ptr); | 160 _bufferOffset += sizeof(_dev_return_value); |
156 } else { | 161 } else { |
157 // Get the next java argument and its value which should be a T_LONG | 162 // Get the next java argument and its value which should be a T_LONG |
158 oop arg = next_arg(T_LONG); | 163 oop arg = next_arg(T_LONG); |
159 // Copy the java argument value to kernelArgBuffer | 164 // Copy the java argument value to kernelArgBuffer |
160 jvalue val; | 165 jvalue val; |
161 if (java_lang_boxing_object::get_value(arg, &val) != T_LONG) { | 166 if (java_lang_boxing_object::get_value(arg, &val) != T_LONG) { |
162 tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_LONG"); | 167 tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_LONG"); |
163 _success = false; | 168 _success = false; |
164 return; | 169 return; |
165 } | 170 } |
166 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.j; | 171 if (is_kernel_arg_setup()) { |
172 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.j; | |
173 } | |
174 // Advance _bufferOffset | |
167 _bufferOffset += sizeof(val.j); | 175 _bufferOffset += sizeof(val.j); |
168 } | 176 } |
169 return; | 177 return; |
170 } | 178 } |
171 | 179 |
172 void PTXKernelArguments::do_byte() { | 180 void PTXKernelArguments::do_byte() { |
173 if (is_after_invocation()) { | 181 // If the parameter is a return value, |
174 return; | 182 if (is_return_type()) { |
175 } | 183 if (is_kernel_arg_setup()) { |
176 // If the parameter is a return value, | 184 // Allocate device memory for T_BYTE return value pointer on device. Size in bytes |
177 if (is_return_type()) { | 185 int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_BYTE_SIZE); |
178 // Allocate device memory for T_BYTE return value pointer on device. Size in bytes | 186 if (status != GRAAL_CUDA_SUCCESS) { |
179 int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_BYTE_SIZE); | 187 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); |
180 if (status != GRAAL_CUDA_SUCCESS) { | 188 _success = false; |
181 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); | 189 return; |
182 _success = false; | 190 } |
183 return; | 191 // Push _dev_return_value to _kernelBuffer |
184 } | 192 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value; |
185 // Push _return_value_ptr to _kernelBuffer | 193 } |
186 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; | 194 // Advance _bufferOffset |
187 _bufferOffset += sizeof(_return_value_ptr); | 195 _bufferOffset += sizeof(_dev_return_value); |
188 } else { | 196 } else { |
189 // Get the next java argument and its value which should be a T_BYTE | 197 // Get the next java argument and its value which should be a T_BYTE |
190 oop arg = next_arg(T_BYTE); | 198 oop arg = next_arg(T_BYTE); |
191 // Copy the java argument value to kernelArgBuffer | 199 // Copy the java argument value to kernelArgBuffer |
192 jvalue val; | 200 jvalue val; |
193 if (java_lang_boxing_object::get_value(arg, &val) != T_BYTE) { | 201 if (java_lang_boxing_object::get_value(arg, &val) != T_BYTE) { |
194 tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE"); | 202 tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE"); |
195 _success = false; | 203 _success = false; |
196 return; | 204 return; |
197 } | 205 } |
198 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.b; | 206 if (is_kernel_arg_setup()) { |
199 _bufferOffset += sizeof(val.b); | 207 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.b; |
200 } | 208 } |
201 return; | 209 // Advance _bufferOffset |
210 _bufferOffset += sizeof(val.b); | |
211 } | |
212 return; | |
202 } | 213 } |
203 | 214 |
204 void PTXKernelArguments::do_bool() { | 215 void PTXKernelArguments::do_bool() { |
205 if (is_after_invocation()) { | 216 // If the parameter is a return value, |
206 return; | 217 if (is_return_type()) { |
207 } | 218 if (is_kernel_arg_setup()) { |
208 // If the parameter is a return value, | 219 // Allocate device memory for T_BYTE return value pointer on device. Size in bytes |
209 if (is_return_type()) { | 220 int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_BOOLEAN_SIZE); |
210 // Allocate device memory for T_BYTE return value pointer on device. Size in bytes | 221 if (status != GRAAL_CUDA_SUCCESS) { |
211 int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_BOOLEAN_SIZE); | 222 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); |
212 if (status != GRAAL_CUDA_SUCCESS) { | 223 _success = false; |
213 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); | 224 return; |
214 _success = false; | 225 } |
215 return; | 226 // Push _dev_return_value to _kernelBuffer |
216 } | 227 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value; |
217 // Push _return_value_ptr to _kernelBuffer | 228 } |
218 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; | 229 // Advance _bufferOffset |
219 _bufferOffset += sizeof(_return_value_ptr); | 230 _bufferOffset += sizeof(_dev_return_value); |
220 } else { | 231 } else { |
221 // Get the next java argument and its value which should be a T_BYTE | 232 // Get the next java argument and its value which should be a T_BYTE |
222 oop arg = next_arg(T_BYTE); | 233 oop arg = next_arg(T_BYTE); |
223 // Copy the java argument value to kernelArgBuffer | 234 // Copy the java argument value to kernelArgBuffer |
224 jvalue val; | 235 jvalue val; |
225 if (java_lang_boxing_object::get_value(arg, &val) != T_BOOLEAN) { | 236 if (java_lang_boxing_object::get_value(arg, &val) != T_BOOLEAN) { |
226 tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE"); | 237 tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE"); |
227 _success = false; | 238 _success = false; |
228 return; | 239 return; |
229 } | 240 } |
230 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.z; | 241 if (is_kernel_arg_setup()) { |
231 _bufferOffset += sizeof(val.z); | 242 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.z; |
232 } | 243 } |
233 return; | 244 // Advance _bufferOffset |
245 _bufferOffset += sizeof(val.z); | |
246 } | |
247 return; | |
234 } | 248 } |
235 | 249 |
236 void PTXKernelArguments::do_array(int begin, int end) { | 250 void PTXKernelArguments::do_array(int begin, int end) { |
237 gpu::Ptx::CUdeviceptr _array_ptr; | 251 // Get the next java argument and its value which should be a T_ARRAY |
238 int status; | 252 oop arg = next_arg(T_OBJECT); |
239 | 253 assert(arg->is_array(), "argument value not an array"); |
240 // Get the next java argument and its value which should be a T_ARRAY | 254 // Size of array argument |
241 oop arg = next_arg(T_OBJECT); | 255 int argSize = arg->size() * HeapWordSize; |
242 int array_size = arg->size() * HeapWordSize; | 256 // Device pointer to array argument. |
243 | 257 gpu::Ptx::CUdeviceptr arrayArgOnDev; |
244 if (is_after_invocation()) { | 258 int status; |
245 _array_ptr = *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]); | 259 |
246 status = gpu::Ptx::_cuda_cu_memcpy_dtoh(arg, _array_ptr, array_size); | 260 if (is_kernel_arg_setup()) { |
247 if (status != GRAAL_CUDA_SUCCESS) { | 261 // Allocate device memory for array argument on device. Size in bytes |
248 tty->print_cr("[CUDA] *** Error (%d) Failed to copy array argument to host", status); | 262 status = gpu::Ptx::_cuda_cu_memalloc(&arrayArgOnDev, argSize); |
249 _success = false; | |
250 return; | |
251 } else { | |
252 // tty->print_cr("device: %x host: %x size: %d", _array_ptr, arg, array_size); | |
253 } | |
254 return; | |
255 } | |
256 // Allocate device memory for T_ARRAY return value pointer on device. Size in bytes | |
257 status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, array_size); | |
258 if (status != GRAAL_CUDA_SUCCESS) { | 263 if (status != GRAAL_CUDA_SUCCESS) { |
259 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); | 264 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for array argument on device", |
260 _success = false; | 265 status); |
261 return; | 266 _success = false; |
262 } | 267 return; |
263 status = gpu::Ptx::_cuda_cu_memcpy_htod(_return_value_ptr, arg, array_size); | 268 } |
269 // Copy array argument to device | |
270 status = gpu::Ptx::_cuda_cu_memcpy_htod(arrayArgOnDev, arg, argSize); | |
264 if (status != GRAAL_CUDA_SUCCESS) { | 271 if (status != GRAAL_CUDA_SUCCESS) { |
265 tty->print_cr("[CUDA] *** Error (%d) Failed to copy array to device argument", status); | 272 tty->print_cr("[CUDA] *** Error (%d) Failed to copy array argument content to device memory", |
266 _success = false; | 273 status); |
267 return; | 274 _success = false; |
268 } else { | 275 return; |
269 // tty->print_cr("host: %x device: %x size: %d", arg, _return_value_ptr, array_size); | 276 } |
270 } | 277 |
271 // Push _return_value_ptr to _kernelBuffer | 278 // Push device array argument to _kernelBuffer |
272 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; | 279 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = arrayArgOnDev; |
273 _bufferOffset += sizeof(_return_value_ptr); | 280 } else { |
274 return; | 281 arrayArgOnDev = *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]); |
282 status = gpu::Ptx::_cuda_cu_memcpy_dtoh(arg, arrayArgOnDev, argSize); | |
283 if (status != GRAAL_CUDA_SUCCESS) { | |
284 tty->print_cr("[CUDA] *** Error (%d) Failed to copy array argument to host", status); | |
285 _success = false; | |
286 return; | |
287 } | |
288 } | |
289 | |
290 // Advance _bufferOffset | |
291 _bufferOffset += sizeof(arrayArgOnDev); | |
292 return; | |
275 } | 293 } |
276 | 294 |
277 void PTXKernelArguments::do_void() { | 295 void PTXKernelArguments::do_void() { |
278 return; | 296 return; |
279 } | 297 } |
280 | 298 |
281 // TODO implement other do_* | 299 // TODO implement other do_* |