comparison src/gpu/ptx/vm/gpu_ptx.cpp @ 13648:27acedac70b7

added support for @ParallelOver annotation in PTX kernel call wrapper
author Doug Simon <doug.simon@oracle.com>
date Wed, 15 Jan 2014 20:24:13 +0100
parents 220ed109bf77
children fff17b5dff8a
comparison
equal deleted inserted replaced
13647:8edc6b0779f0 13648:27acedac70b7
335 } 335 }
336 336
337 return cu_function; 337 return cu_function;
338 } 338 }
339 339
340 JRT_ENTRY(jlong, gpu::Ptx::execute_kernel_from_vm(JavaThread* thread, jlong kernel, jlong parametersAndReturnValueBuffer, jint parametersAndReturnValueBufferSize, int encodedReturnTypeSize)) 340 JRT_ENTRY(jlong, gpu::Ptx::execute_kernel_from_vm(JavaThread* thread, jlong kernel, jint dimX, jint dimY, jint dimZ,
341 tty->print_cr("*** gpu::Ptx::execute_kernel_from_vm(kernel=%p, parametersAndReturnValueBuffer=%p, parametersAndReturnValueBufferSize=%d, encodedReturnTypeSize=%d)", 341 jlong parametersAndReturnValueBuffer,
342 kernel, parametersAndReturnValueBuffer, parametersAndReturnValueBufferSize, encodedReturnTypeSize); 342 jint parametersAndReturnValueBufferSize,
343 tty->print(" buffer as bytes: "); 343 int encodedReturnTypeSize))
344 for (int i = 0; i < parametersAndReturnValueBufferSize; i++) {
345 tty->print(" 0x%02x", ((jbyte*) (address) parametersAndReturnValueBuffer)[i] & 0xFF);
346 }
347 tty->cr();
348 tty->print(" buffer as ints: ");
349 for (int i = 0; i < (parametersAndReturnValueBufferSize / 4); i++) {
350 tty->print(" %d", ((jint*) (address) parametersAndReturnValueBuffer)[i]);
351 }
352 tty->cr();
353 tty->print(" buffer as words: ");
354 for (unsigned i = 0; i < (parametersAndReturnValueBufferSize / sizeof(void*)); i++) {
355 tty->print(" "INTPTR_FORMAT, ((void**) (address) parametersAndReturnValueBuffer)[i]);
356 }
357 tty->cr();
358 if (kernel == 0L) { 344 if (kernel == 0L) {
359 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_NullPointerException(), NULL); 345 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_NullPointerException(), NULL);
360 return 0L; 346 return 0L;
361 } 347 }
362
363 348
364 // grid dimensionality 349 // grid dimensionality
365 unsigned int gridX = 1; 350 unsigned int gridX = 1;
366 unsigned int gridY = 1; 351 unsigned int gridY = 1;
367 unsigned int gridZ = 1; 352 unsigned int gridZ = 1;
368
369 // thread dimensionality
370 unsigned int blockX = 1;
371 unsigned int blockY = 1;
372 unsigned int blockZ = 1;
373 353
374 struct CUfunc_st* cu_function = (struct CUfunc_st*) (address) kernel; 354 struct CUfunc_st* cu_function = (struct CUfunc_st*) (address) kernel;
375 355
376 void * config[5] = { 356 void * config[5] = {
377 GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER, (char*) (address) parametersAndReturnValueBuffer, 357 GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER, (char*) (address) parametersAndReturnValueBuffer,
389 int status; 369 int status;
390 if (returnTypeSize != 0) { 370 if (returnTypeSize != 0) {
391 status = _cuda_cu_memalloc(&device_return_value, returnTypeSize); 371 status = _cuda_cu_memalloc(&device_return_value, returnTypeSize);
392 if (status != GRAAL_CUDA_SUCCESS) { 372 if (status != GRAAL_CUDA_SUCCESS) {
393 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); 373 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
394 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_Exception(), "[CUDA] Failed to allocate memory for return value pointer on device"); 374 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_RuntimeException(), "[CUDA] Failed to allocate memory for return value pointer on device");
395 return 0L; 375 return 0L;
396 } 376 }
397 // Push device_return_value to kernelParams 377 // Push device_return_value to kernelParams
398 gpu::Ptx::CUdeviceptr* returnValuePtr = (gpu::Ptx::CUdeviceptr*) (address) parametersAndReturnValueBuffer + parametersAndReturnValueBufferSize - sizeof(device_return_value); 378 gpu::Ptx::CUdeviceptr* returnValuePtr = (gpu::Ptx::CUdeviceptr*) (address) parametersAndReturnValueBuffer + parametersAndReturnValueBufferSize - sizeof(device_return_value);
399 *returnValuePtr = device_return_value; 379 *returnValuePtr = device_return_value;
400 } 380 }
401 381
402 status = _cuda_cu_launch_kernel(cu_function, 382 status = _cuda_cu_launch_kernel(cu_function,
403 gridX, gridY, gridZ, 383 gridX, gridY, gridZ,
404 blockX, blockY, blockZ, 384 dimX, dimY, dimZ,
405 0, NULL, NULL, (void **) &config); 385 0, NULL, NULL, (void **) &config);
406 386
407 if (status != GRAAL_CUDA_SUCCESS) { 387 if (status != GRAAL_CUDA_SUCCESS) {
408 tty->print_cr("[CUDA] Failed to launch kernel"); 388 tty->print_cr("[CUDA] Failed to launch kernel");
409 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_Exception(), "[CUDA] Failed to launch kernel"); 389 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_RuntimeException(), "[CUDA] Failed to launch kernel");
410 return 0L; 390 return 0L;
411 } 391 }
412 392
413 if (TraceGPUInteraction) { 393 if (TraceGPUInteraction) {
414 tty->print_cr("[CUDA] Success: Kernel Launch: X: %d Y: %d Z: %d", blockX, blockY, blockZ); 394 tty->print_cr("[CUDA] Success: Kernel Launch: X: %d Y: %d Z: %d", dimX, dimY, dimZ);
415 } 395 }
416 396
417 status = _cuda_cu_ctx_synchronize(); 397 status = _cuda_cu_ctx_synchronize();
418 398
419 if (status != GRAAL_CUDA_SUCCESS) { 399 if (status != GRAAL_CUDA_SUCCESS) {
420 tty->print_cr("[CUDA] Failed to synchronize launched kernel (%d)", status); 400 tty->print_cr("[CUDA] Failed to synchronize launched kernel (%d)", status);
421 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_Exception(), "[CUDA] Failed to synchronize launched kernel"); 401 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_RuntimeException(), "[CUDA] Failed to synchronize launched kernel");
422 return 0L; 402 return 0L;
423 } 403 }
424 404
425 if (TraceGPUInteraction) { 405 if (TraceGPUInteraction) {
426 tty->print_cr("[CUDA] Success: Synchronized launch kernel"); 406 tty->print_cr("[CUDA] Success: Synchronized launch kernel");