comparison src/gpu/ptx/vm/gpu_ptx.cpp @ 13624:220ed109bf77

initial code for calling PTX kernel code from Java with parameter marshaling and return value unmarshaling performed by a wrapper specified via manual graph construction
author Doug Simon <doug.simon@oracle.com>
date Mon, 13 Jan 2014 22:32:27 +0100
parents 1a7e7011a341
children 27acedac70b7
comparison
equal deleted inserted replaced
13623:c70dddf5ce4a 13624:220ed109bf77
27 #include "runtime/gpu.hpp" 27 #include "runtime/gpu.hpp"
28 #include "utilities/globalDefinitions.hpp" 28 #include "utilities/globalDefinitions.hpp"
29 #include "utilities/ostream.hpp" 29 #include "utilities/ostream.hpp"
30 #include "memory/allocation.hpp" 30 #include "memory/allocation.hpp"
31 #include "memory/allocation.inline.hpp" 31 #include "memory/allocation.inline.hpp"
32 #include "runtime/interfaceSupport.hpp"
32 #include "ptxKernelArguments.hpp" 33 #include "ptxKernelArguments.hpp"
33 34
34 void * gpu::Ptx::_device_context; 35 void * gpu::Ptx::_device_context;
35 int gpu::Ptx::_cu_device = 0; 36 int gpu::Ptx::_cu_device = 0;
36 37
333 tty->print_cr("[CUDA] Got function handle for %s", name); 334 tty->print_cr("[CUDA] Got function handle for %s", name);
334 } 335 }
335 336
336 return cu_function; 337 return cu_function;
337 } 338 }
339
340 JRT_ENTRY(jlong, gpu::Ptx::execute_kernel_from_vm(JavaThread* thread, jlong kernel, jlong parametersAndReturnValueBuffer, jint parametersAndReturnValueBufferSize, int encodedReturnTypeSize))
341 tty->print_cr("*** gpu::Ptx::execute_kernel_from_vm(kernel=%p, parametersAndReturnValueBuffer=%p, parametersAndReturnValueBufferSize=%d, encodedReturnTypeSize=%d)",
342 kernel, parametersAndReturnValueBuffer, parametersAndReturnValueBufferSize, encodedReturnTypeSize);
343 tty->print(" buffer as bytes: ");
344 for (int i = 0; i < parametersAndReturnValueBufferSize; i++) {
345 tty->print(" 0x%02x", ((jbyte*) (address) parametersAndReturnValueBuffer)[i] & 0xFF);
346 }
347 tty->cr();
348 tty->print(" buffer as ints: ");
349 for (int i = 0; i < (parametersAndReturnValueBufferSize / 4); i++) {
350 tty->print(" %d", ((jint*) (address) parametersAndReturnValueBuffer)[i]);
351 }
352 tty->cr();
353 tty->print(" buffer as words: ");
354 for (unsigned i = 0; i < (parametersAndReturnValueBufferSize / sizeof(void*)); i++) {
355 tty->print(" "INTPTR_FORMAT, ((void**) (address) parametersAndReturnValueBuffer)[i]);
356 }
357 tty->cr();
358 if (kernel == 0L) {
359 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_NullPointerException(), NULL);
360 return 0L;
361 }
362
363
364 // grid dimensionality
365 unsigned int gridX = 1;
366 unsigned int gridY = 1;
367 unsigned int gridZ = 1;
368
369 // thread dimensionality
370 unsigned int blockX = 1;
371 unsigned int blockY = 1;
372 unsigned int blockZ = 1;
373
374 struct CUfunc_st* cu_function = (struct CUfunc_st*) (address) kernel;
375
376 void * config[5] = {
377 GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER, (char*) (address) parametersAndReturnValueBuffer,
378 GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE, &parametersAndReturnValueBufferSize,
379 GRAAL_CU_LAUNCH_PARAM_END
380 };
381
382 if (TraceGPUInteraction) {
383 tty->print_cr("[CUDA] launching kernel");
384 }
385
386 bool isObjectReturn = encodedReturnTypeSize < 0;
387 int returnTypeSize = encodedReturnTypeSize < 0 ? -encodedReturnTypeSize : encodedReturnTypeSize;
388 gpu::Ptx::CUdeviceptr device_return_value;
389 int status;
390 if (returnTypeSize != 0) {
391 status = _cuda_cu_memalloc(&device_return_value, returnTypeSize);
392 if (status != GRAAL_CUDA_SUCCESS) {
393 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
394 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_Exception(), "[CUDA] Failed to allocate memory for return value pointer on device");
395 return 0L;
396 }
397 // Push device_return_value to kernelParams
398 gpu::Ptx::CUdeviceptr* returnValuePtr = (gpu::Ptx::CUdeviceptr*) (address) parametersAndReturnValueBuffer + parametersAndReturnValueBufferSize - sizeof(device_return_value);
399 *returnValuePtr = device_return_value;
400 }
401
402 status = _cuda_cu_launch_kernel(cu_function,
403 gridX, gridY, gridZ,
404 blockX, blockY, blockZ,
405 0, NULL, NULL, (void **) &config);
406
407 if (status != GRAAL_CUDA_SUCCESS) {
408 tty->print_cr("[CUDA] Failed to launch kernel");
409 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_Exception(), "[CUDA] Failed to launch kernel");
410 return 0L;
411 }
412
413 if (TraceGPUInteraction) {
414 tty->print_cr("[CUDA] Success: Kernel Launch: X: %d Y: %d Z: %d", blockX, blockY, blockZ);
415 }
416
417 status = _cuda_cu_ctx_synchronize();
418
419 if (status != GRAAL_CUDA_SUCCESS) {
420 tty->print_cr("[CUDA] Failed to synchronize launched kernel (%d)", status);
421 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_Exception(), "[CUDA] Failed to synchronize launched kernel");
422 return 0L;
423 }
424
425 if (TraceGPUInteraction) {
426 tty->print_cr("[CUDA] Success: Synchronized launch kernel");
427 }
428
429 jlong primitiveReturnValue = 0L;
430 if (isObjectReturn) {
431 oop return_val;
432 status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, device_return_value, T_OBJECT_BYTE_SIZE);
433 if (status != GRAAL_CUDA_SUCCESS) {
434 tty->print_cr("[CUDA] *** Error (%d) Failed to copy value from device argument", status);
435 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_Exception(), "[CUDA] Failed to copy value from device argument");
436 return 0L;
437 }
438 thread->set_vm_result(return_val);
439 } else if (returnTypeSize > 0) {
440 jlong result;
441 status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&primitiveReturnValue, device_return_value, T_LONG_BYTE_SIZE);
442 if (status != GRAAL_CUDA_SUCCESS) {
443 tty->print_cr("[CUDA] *** Error (%d) Failed to copy value from device argument", status);
444 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_Exception(), "[CUDA] Failed to copy value from device argument");
445 return 0L;
446 }
447 }
448
449 // Free device memory allocated for result
450 if (returnTypeSize != 0) {
451 status = gpu::Ptx::_cuda_cu_memfree(device_return_value);
452 if (status != GRAAL_CUDA_SUCCESS) {
453 tty->print_cr("[CUDA] *** Error (%d) Failed to free device memory of return value", status);
454 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_Exception(), "[CUDA] Failed to free device memory of return value");
455 return 0L;
456 }
457 }
458
459 if (TraceGPUInteraction) {
460 tty->print_cr("[CUDA] Success: Freed device memory of return value");
461 }
462
463 // Destroy context
464 status = gpu::Ptx::_cuda_cu_ctx_destroy(_device_context);
465 if (status != GRAAL_CUDA_SUCCESS) {
466 tty->print_cr("[CUDA] *** Error (%d) Failed to destroy context", status);
467 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_Exception(), "[CUDA] Failed to destroy context");
468 return 0L;
469 }
470
471 if (TraceGPUInteraction) {
472 tty->print_cr("[CUDA] Success: Destroy context");
473 }
474
475 return primitiveReturnValue;
476 JRT_END
338 477
339 bool gpu::Ptx::execute_kernel(address kernel, PTXKernelArguments &ptxka, JavaValue &ret) { 478 bool gpu::Ptx::execute_kernel(address kernel, PTXKernelArguments &ptxka, JavaValue &ret) {
340 return gpu::Ptx::execute_warp(1, 1, 1, kernel, ptxka, ret); 479 return gpu::Ptx::execute_warp(1, 1, 1, kernel, ptxka, ret);
341 } 480 }
342 481