Mercurial > hg > truffle
comparison src/gpu/ptx/vm/gpu_ptx.cpp @ 13624:220ed109bf77
initial code for calling PTX kernel code from Java with parameter marshaling and return value unmarshaling performed by a wrapper specified via manual graph construction
author | Doug Simon <doug.simon@oracle.com> |
---|---|
date | Mon, 13 Jan 2014 22:32:27 +0100 |
parents | 1a7e7011a341 |
children | 27acedac70b7 |
comparison
equal
deleted
inserted
replaced
13623:c70dddf5ce4a | 13624:220ed109bf77 |
---|---|
27 #include "runtime/gpu.hpp" | 27 #include "runtime/gpu.hpp" |
28 #include "utilities/globalDefinitions.hpp" | 28 #include "utilities/globalDefinitions.hpp" |
29 #include "utilities/ostream.hpp" | 29 #include "utilities/ostream.hpp" |
30 #include "memory/allocation.hpp" | 30 #include "memory/allocation.hpp" |
31 #include "memory/allocation.inline.hpp" | 31 #include "memory/allocation.inline.hpp" |
32 #include "runtime/interfaceSupport.hpp" | |
32 #include "ptxKernelArguments.hpp" | 33 #include "ptxKernelArguments.hpp" |
33 | 34 |
34 void * gpu::Ptx::_device_context; | 35 void * gpu::Ptx::_device_context; |
35 int gpu::Ptx::_cu_device = 0; | 36 int gpu::Ptx::_cu_device = 0; |
36 | 37 |
333 tty->print_cr("[CUDA] Got function handle for %s", name); | 334 tty->print_cr("[CUDA] Got function handle for %s", name); |
334 } | 335 } |
335 | 336 |
336 return cu_function; | 337 return cu_function; |
337 } | 338 } |
339 | |
340 JRT_ENTRY(jlong, gpu::Ptx::execute_kernel_from_vm(JavaThread* thread, jlong kernel, jlong parametersAndReturnValueBuffer, jint parametersAndReturnValueBufferSize, int encodedReturnTypeSize)) | |
341 tty->print_cr("*** gpu::Ptx::execute_kernel_from_vm(kernel=%p, parametersAndReturnValueBuffer=%p, parametersAndReturnValueBufferSize=%d, encodedReturnTypeSize=%d)", | |
342 kernel, parametersAndReturnValueBuffer, parametersAndReturnValueBufferSize, encodedReturnTypeSize); | |
343 tty->print(" buffer as bytes: "); | |
344 for (int i = 0; i < parametersAndReturnValueBufferSize; i++) { | |
345 tty->print(" 0x%02x", ((jbyte*) (address) parametersAndReturnValueBuffer)[i] & 0xFF); | |
346 } | |
347 tty->cr(); | |
348 tty->print(" buffer as ints: "); | |
349 for (int i = 0; i < (parametersAndReturnValueBufferSize / 4); i++) { | |
350 tty->print(" %d", ((jint*) (address) parametersAndReturnValueBuffer)[i]); | |
351 } | |
352 tty->cr(); | |
353 tty->print(" buffer as words: "); | |
354 for (unsigned i = 0; i < (parametersAndReturnValueBufferSize / sizeof(void*)); i++) { | |
355 tty->print(" "INTPTR_FORMAT, ((void**) (address) parametersAndReturnValueBuffer)[i]); | |
356 } | |
357 tty->cr(); | |
358 if (kernel == 0L) { | |
359 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_NullPointerException(), NULL); | |
360 return 0L; | |
361 } | |
362 | |
363 | |
364 // grid dimensionality | |
365 unsigned int gridX = 1; | |
366 unsigned int gridY = 1; | |
367 unsigned int gridZ = 1; | |
368 | |
369 // thread dimensionality | |
370 unsigned int blockX = 1; | |
371 unsigned int blockY = 1; | |
372 unsigned int blockZ = 1; | |
373 | |
374 struct CUfunc_st* cu_function = (struct CUfunc_st*) (address) kernel; | |
375 | |
376 void * config[5] = { | |
377 GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER, (char*) (address) parametersAndReturnValueBuffer, | |
378 GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE, ¶metersAndReturnValueBufferSize, | |
379 GRAAL_CU_LAUNCH_PARAM_END | |
380 }; | |
381 | |
382 if (TraceGPUInteraction) { | |
383 tty->print_cr("[CUDA] launching kernel"); | |
384 } | |
385 | |
386 bool isObjectReturn = encodedReturnTypeSize < 0; | |
387 int returnTypeSize = encodedReturnTypeSize < 0 ? -encodedReturnTypeSize : encodedReturnTypeSize; | |
388 gpu::Ptx::CUdeviceptr device_return_value; | |
389 int status; | |
390 if (returnTypeSize != 0) { | |
391 status = _cuda_cu_memalloc(&device_return_value, returnTypeSize); | |
392 if (status != GRAAL_CUDA_SUCCESS) { | |
393 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); | |
394 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_Exception(), "[CUDA] Failed to allocate memory for return value pointer on device"); | |
395 return 0L; | |
396 } | |
397 // Push device_return_value to kernelParams | |
398 gpu::Ptx::CUdeviceptr* returnValuePtr = (gpu::Ptx::CUdeviceptr*) (address) parametersAndReturnValueBuffer + parametersAndReturnValueBufferSize - sizeof(device_return_value); | |
399 *returnValuePtr = device_return_value; | |
400 } | |
401 | |
402 status = _cuda_cu_launch_kernel(cu_function, | |
403 gridX, gridY, gridZ, | |
404 blockX, blockY, blockZ, | |
405 0, NULL, NULL, (void **) &config); | |
406 | |
407 if (status != GRAAL_CUDA_SUCCESS) { | |
408 tty->print_cr("[CUDA] Failed to launch kernel"); | |
409 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_Exception(), "[CUDA] Failed to launch kernel"); | |
410 return 0L; | |
411 } | |
412 | |
413 if (TraceGPUInteraction) { | |
414 tty->print_cr("[CUDA] Success: Kernel Launch: X: %d Y: %d Z: %d", blockX, blockY, blockZ); | |
415 } | |
416 | |
417 status = _cuda_cu_ctx_synchronize(); | |
418 | |
419 if (status != GRAAL_CUDA_SUCCESS) { | |
420 tty->print_cr("[CUDA] Failed to synchronize launched kernel (%d)", status); | |
421 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_Exception(), "[CUDA] Failed to synchronize launched kernel"); | |
422 return 0L; | |
423 } | |
424 | |
425 if (TraceGPUInteraction) { | |
426 tty->print_cr("[CUDA] Success: Synchronized launch kernel"); | |
427 } | |
428 | |
429 jlong primitiveReturnValue = 0L; | |
430 if (isObjectReturn) { | |
431 oop return_val; | |
432 status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, device_return_value, T_OBJECT_BYTE_SIZE); | |
433 if (status != GRAAL_CUDA_SUCCESS) { | |
434 tty->print_cr("[CUDA] *** Error (%d) Failed to copy value from device argument", status); | |
435 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_Exception(), "[CUDA] Failed to copy value from device argument"); | |
436 return 0L; | |
437 } | |
438 thread->set_vm_result(return_val); | |
439 } else if (returnTypeSize > 0) { | |
440 jlong result; | |
441 status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&primitiveReturnValue, device_return_value, T_LONG_BYTE_SIZE); | |
442 if (status != GRAAL_CUDA_SUCCESS) { | |
443 tty->print_cr("[CUDA] *** Error (%d) Failed to copy value from device argument", status); | |
444 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_Exception(), "[CUDA] Failed to copy value from device argument"); | |
445 return 0L; | |
446 } | |
447 } | |
448 | |
449 // Free device memory allocated for result | |
450 if (returnTypeSize != 0) { | |
451 status = gpu::Ptx::_cuda_cu_memfree(device_return_value); | |
452 if (status != GRAAL_CUDA_SUCCESS) { | |
453 tty->print_cr("[CUDA] *** Error (%d) Failed to free device memory of return value", status); | |
454 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_Exception(), "[CUDA] Failed to free device memory of return value"); | |
455 return 0L; | |
456 } | |
457 } | |
458 | |
459 if (TraceGPUInteraction) { | |
460 tty->print_cr("[CUDA] Success: Freed device memory of return value"); | |
461 } | |
462 | |
463 // Destroy context | |
464 status = gpu::Ptx::_cuda_cu_ctx_destroy(_device_context); | |
465 if (status != GRAAL_CUDA_SUCCESS) { | |
466 tty->print_cr("[CUDA] *** Error (%d) Failed to destroy context", status); | |
467 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_Exception(), "[CUDA] Failed to destroy context"); | |
468 return 0L; | |
469 } | |
470 | |
471 if (TraceGPUInteraction) { | |
472 tty->print_cr("[CUDA] Success: Destroy context"); | |
473 } | |
474 | |
475 return primitiveReturnValue; | |
476 JRT_END | |
338 | 477 |
339 bool gpu::Ptx::execute_kernel(address kernel, PTXKernelArguments &ptxka, JavaValue &ret) { | 478 bool gpu::Ptx::execute_kernel(address kernel, PTXKernelArguments &ptxka, JavaValue &ret) { |
340 return gpu::Ptx::execute_warp(1, 1, 1, kernel, ptxka, ret); | 479 return gpu::Ptx::execute_warp(1, 1, 1, kernel, ptxka, ret); |
341 } | 480 } |
342 | 481 |