Mercurial > hg > graal-compiler
comparison src/gpu/ptx/vm/gpu_ptx.cpp @ 13648:27acedac70b7
added support for @ParallelOver annotation in PTX kernel call wrapper
author | Doug Simon <doug.simon@oracle.com> |
---|---|
date | Wed, 15 Jan 2014 20:24:13 +0100 |
parents | 220ed109bf77 |
children | fff17b5dff8a |
comparison
equal
deleted
inserted
replaced
13647:8edc6b0779f0 | 13648:27acedac70b7 |
---|---|
335 } | 335 } |
336 | 336 |
337 return cu_function; | 337 return cu_function; |
338 } | 338 } |
339 | 339 |
340 JRT_ENTRY(jlong, gpu::Ptx::execute_kernel_from_vm(JavaThread* thread, jlong kernel, jlong parametersAndReturnValueBuffer, jint parametersAndReturnValueBufferSize, int encodedReturnTypeSize)) | 340 JRT_ENTRY(jlong, gpu::Ptx::execute_kernel_from_vm(JavaThread* thread, jlong kernel, jint dimX, jint dimY, jint dimZ, |
341 tty->print_cr("*** gpu::Ptx::execute_kernel_from_vm(kernel=%p, parametersAndReturnValueBuffer=%p, parametersAndReturnValueBufferSize=%d, encodedReturnTypeSize=%d)", | 341 jlong parametersAndReturnValueBuffer, |
342 kernel, parametersAndReturnValueBuffer, parametersAndReturnValueBufferSize, encodedReturnTypeSize); | 342 jint parametersAndReturnValueBufferSize, |
343 tty->print(" buffer as bytes: "); | 343 int encodedReturnTypeSize)) |
344 for (int i = 0; i < parametersAndReturnValueBufferSize; i++) { | |
345 tty->print(" 0x%02x", ((jbyte*) (address) parametersAndReturnValueBuffer)[i] & 0xFF); | |
346 } | |
347 tty->cr(); | |
348 tty->print(" buffer as ints: "); | |
349 for (int i = 0; i < (parametersAndReturnValueBufferSize / 4); i++) { | |
350 tty->print(" %d", ((jint*) (address) parametersAndReturnValueBuffer)[i]); | |
351 } | |
352 tty->cr(); | |
353 tty->print(" buffer as words: "); | |
354 for (unsigned i = 0; i < (parametersAndReturnValueBufferSize / sizeof(void*)); i++) { | |
355 tty->print(" "INTPTR_FORMAT, ((void**) (address) parametersAndReturnValueBuffer)[i]); | |
356 } | |
357 tty->cr(); | |
358 if (kernel == 0L) { | 344 if (kernel == 0L) { |
359 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_NullPointerException(), NULL); | 345 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_NullPointerException(), NULL); |
360 return 0L; | 346 return 0L; |
361 } | 347 } |
362 | |
363 | 348 |
364 // grid dimensionality | 349 // grid dimensionality |
365 unsigned int gridX = 1; | 350 unsigned int gridX = 1; |
366 unsigned int gridY = 1; | 351 unsigned int gridY = 1; |
367 unsigned int gridZ = 1; | 352 unsigned int gridZ = 1; |
368 | |
369 // thread dimensionality | |
370 unsigned int blockX = 1; | |
371 unsigned int blockY = 1; | |
372 unsigned int blockZ = 1; | |
373 | 353 |
374 struct CUfunc_st* cu_function = (struct CUfunc_st*) (address) kernel; | 354 struct CUfunc_st* cu_function = (struct CUfunc_st*) (address) kernel; |
375 | 355 |
376 void * config[5] = { | 356 void * config[5] = { |
377 GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER, (char*) (address) parametersAndReturnValueBuffer, | 357 GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER, (char*) (address) parametersAndReturnValueBuffer, |
389 int status; | 369 int status; |
390 if (returnTypeSize != 0) { | 370 if (returnTypeSize != 0) { |
391 status = _cuda_cu_memalloc(&device_return_value, returnTypeSize); | 371 status = _cuda_cu_memalloc(&device_return_value, returnTypeSize); |
392 if (status != GRAAL_CUDA_SUCCESS) { | 372 if (status != GRAAL_CUDA_SUCCESS) { |
393 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); | 373 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); |
394 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_Exception(), "[CUDA] Failed to allocate memory for return value pointer on device"); | 374 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_RuntimeException(), "[CUDA] Failed to allocate memory for return value pointer on device"); |
395 return 0L; | 375 return 0L; |
396 } | 376 } |
397 // Push device_return_value to kernelParams | 377 // Push device_return_value to kernelParams |
398 gpu::Ptx::CUdeviceptr* returnValuePtr = (gpu::Ptx::CUdeviceptr*) (address) parametersAndReturnValueBuffer + parametersAndReturnValueBufferSize - sizeof(device_return_value); | 378 gpu::Ptx::CUdeviceptr* returnValuePtr = (gpu::Ptx::CUdeviceptr*) (address) parametersAndReturnValueBuffer + parametersAndReturnValueBufferSize - sizeof(device_return_value); |
399 *returnValuePtr = device_return_value; | 379 *returnValuePtr = device_return_value; |
400 } | 380 } |
401 | 381 |
402 status = _cuda_cu_launch_kernel(cu_function, | 382 status = _cuda_cu_launch_kernel(cu_function, |
403 gridX, gridY, gridZ, | 383 gridX, gridY, gridZ, |
404 blockX, blockY, blockZ, | 384 dimX, dimY, dimZ, |
405 0, NULL, NULL, (void **) &config); | 385 0, NULL, NULL, (void **) &config); |
406 | 386 |
407 if (status != GRAAL_CUDA_SUCCESS) { | 387 if (status != GRAAL_CUDA_SUCCESS) { |
408 tty->print_cr("[CUDA] Failed to launch kernel"); | 388 tty->print_cr("[CUDA] Failed to launch kernel"); |
409 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_Exception(), "[CUDA] Failed to launch kernel"); | 389 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_RuntimeException(), "[CUDA] Failed to launch kernel"); |
410 return 0L; | 390 return 0L; |
411 } | 391 } |
412 | 392 |
413 if (TraceGPUInteraction) { | 393 if (TraceGPUInteraction) { |
414 tty->print_cr("[CUDA] Success: Kernel Launch: X: %d Y: %d Z: %d", blockX, blockY, blockZ); | 394 tty->print_cr("[CUDA] Success: Kernel Launch: X: %d Y: %d Z: %d", dimX, dimY, dimZ); |
415 } | 395 } |
416 | 396 |
417 status = _cuda_cu_ctx_synchronize(); | 397 status = _cuda_cu_ctx_synchronize(); |
418 | 398 |
419 if (status != GRAAL_CUDA_SUCCESS) { | 399 if (status != GRAAL_CUDA_SUCCESS) { |
420 tty->print_cr("[CUDA] Failed to synchronize launched kernel (%d)", status); | 400 tty->print_cr("[CUDA] Failed to synchronize launched kernel (%d)", status); |
421 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_Exception(), "[CUDA] Failed to synchronize launched kernel"); | 401 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_RuntimeException(), "[CUDA] Failed to synchronize launched kernel"); |
422 return 0L; | 402 return 0L; |
423 } | 403 } |
424 | 404 |
425 if (TraceGPUInteraction) { | 405 if (TraceGPUInteraction) { |
426 tty->print_cr("[CUDA] Success: Synchronized launch kernel"); | 406 tty->print_cr("[CUDA] Success: Synchronized launch kernel"); |