comparison src/gpu/ptx/vm/gpu_ptx.cpp @ 11822:365d8f385fb5

PTX: one-dimensional parallel warp invocation, ParallelOver annotation
author Morris Meyer <morris.meyer@oracle.com>
date Sun, 29 Sep 2013 14:47:12 -0400
parents d8659ad83fcc
children 8d8f63069f58
comparison
equal deleted inserted replaced
11821:d8659ad83fcc 11822:365d8f385fb5
226 226
227 return cu_function; 227 return cu_function;
228 } 228 }
229 229
230 bool gpu::Ptx::execute_kernel(address kernel, PTXKernelArguments &ptxka, JavaValue &ret) { 230 bool gpu::Ptx::execute_kernel(address kernel, PTXKernelArguments &ptxka, JavaValue &ret) {
231 return gpu::Ptx::execute_warp(1, 1, 1, kernel, ptxka, ret);
232 }
233
234 bool gpu::Ptx::execute_warp(int dimX, int dimY, int dimZ,
235 address kernel, PTXKernelArguments &ptxka, JavaValue &ret) {
231 // grid dimensionality 236 // grid dimensionality
232 unsigned int gridX = 1; 237 unsigned int gridX = 1;
233 unsigned int gridY = 1; 238 unsigned int gridY = 1;
234 unsigned int gridZ = 1; 239 unsigned int gridZ = 1;
235 240
236 // thread dimensionality 241 // thread dimensionality
237 unsigned int blockX = 1; 242 unsigned int blockX = dimX;
238 unsigned int blockY = 1; 243 unsigned int blockY = dimY;
239 unsigned int blockZ = 1; 244 unsigned int blockZ = dimZ;
240 245
241 struct CUfunc_st* cu_function = (struct CUfunc_st*) kernel; 246 struct CUfunc_st* cu_function = (struct CUfunc_st*) kernel;
242 247
243 void * config[5] = { 248 void * config[5] = {
244 GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER, ptxka._kernelArgBuffer, 249 GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER, ptxka._kernelArgBuffer,
262 tty->print_cr("[CUDA] Failed to launch kernel"); 267 tty->print_cr("[CUDA] Failed to launch kernel");
263 return false; 268 return false;
264 } 269 }
265 270
266 if (TraceGPUInteraction) { 271 if (TraceGPUInteraction) {
267 tty->print_cr("[CUDA] Success: Kernel Launch"); 272 tty->print_cr("[CUDA] Success: Kernel Launch: X: %d Y: %d Z: %d", blockX, blockY, blockZ);
268 } 273 }
269 274
270 status = _cuda_cu_ctx_synchronize(); 275 status = _cuda_cu_ctx_synchronize();
271 276
272 if (status != GRAAL_CUDA_SUCCESS) { 277 if (status != GRAAL_CUDA_SUCCESS) {