comparison src/gpu/ptx/vm/gpu_ptx.cpp @ 13848:b97e2515d392

removed PTXKernelArguments; added mechanism for printing incoming kernel arguments
author Doug Simon <doug.simon@oracle.com>
date Fri, 31 Jan 2014 16:05:37 +0100
parents 4ca607fc94e0
children b3e69d1f5da0
comparison
equal deleted inserted replaced
13846:1e72cd05b77e 13848:b97e2515d392
29 #include "utilities/ostream.hpp" 29 #include "utilities/ostream.hpp"
30 #include "memory/allocation.hpp" 30 #include "memory/allocation.hpp"
31 #include "memory/allocation.inline.hpp" 31 #include "memory/allocation.inline.hpp"
32 #include "memory/gcLocker.inline.hpp" 32 #include "memory/gcLocker.inline.hpp"
33 #include "runtime/interfaceSupport.hpp" 33 #include "runtime/interfaceSupport.hpp"
34 #include "runtime/vframe.hpp"
34 #include "graal/graalEnv.hpp" 35 #include "graal/graalEnv.hpp"
35 #include "graal/graalCompiler.hpp" 36 #include "graal/graalCompiler.hpp"
36 #include "ptxKernelArguments.hpp" 37
38 #define T_BYTE_SIZE 1
39 #define T_BOOLEAN_SIZE 4
40 #define T_INT_BYTE_SIZE 4
41 #define T_FLOAT_BYTE_SIZE 4
42 #define T_DOUBLE_BYTE_SIZE 8
43 #define T_LONG_BYTE_SIZE 8
44 #define T_OBJECT_BYTE_SIZE sizeof(intptr_t)
45 #define T_ARRAY_BYTE_SIZE sizeof(intptr_t)
37 46
38 // Entry to GPU native method implementation that transitions current thread to '_thread_in_vm'. 47 // Entry to GPU native method implementation that transitions current thread to '_thread_in_vm'.
39 #define GPU_VMENTRY(result_type, name, signature) \ 48 #define GPU_VMENTRY(result_type, name, signature) \
40 JNIEXPORT result_type JNICALL name signature { \ 49 JNIEXPORT result_type JNICALL name signature { \
41 GRAAL_VM_ENTRY_MARK; \ 50 GRAAL_VM_ENTRY_MARK; \
74 gpu::Ptx::cuda_cu_device_compute_capability_func_t gpu::Ptx::_cuda_cu_device_compute_capability; 83 gpu::Ptx::cuda_cu_device_compute_capability_func_t gpu::Ptx::_cuda_cu_device_compute_capability;
75 gpu::Ptx::cuda_cu_device_get_attribute_func_t gpu::Ptx::_cuda_cu_device_get_attribute; 84 gpu::Ptx::cuda_cu_device_get_attribute_func_t gpu::Ptx::_cuda_cu_device_get_attribute;
76 gpu::Ptx::cuda_cu_launch_kernel_func_t gpu::Ptx::_cuda_cu_launch_kernel; 85 gpu::Ptx::cuda_cu_launch_kernel_func_t gpu::Ptx::_cuda_cu_launch_kernel;
77 gpu::Ptx::cuda_cu_module_get_function_func_t gpu::Ptx::_cuda_cu_module_get_function; 86 gpu::Ptx::cuda_cu_module_get_function_func_t gpu::Ptx::_cuda_cu_module_get_function;
78 gpu::Ptx::cuda_cu_module_load_data_ex_func_t gpu::Ptx::_cuda_cu_module_load_data_ex; 87 gpu::Ptx::cuda_cu_module_load_data_ex_func_t gpu::Ptx::_cuda_cu_module_load_data_ex;
88 gpu::Ptx::cuda_cu_memcpy_htod_func_t gpu::Ptx::_cuda_cu_memcpy_htod;
79 gpu::Ptx::cuda_cu_memcpy_dtoh_func_t gpu::Ptx::_cuda_cu_memcpy_dtoh; 89 gpu::Ptx::cuda_cu_memcpy_dtoh_func_t gpu::Ptx::_cuda_cu_memcpy_dtoh;
90 gpu::Ptx::cuda_cu_memalloc_func_t gpu::Ptx::_cuda_cu_memalloc;
80 gpu::Ptx::cuda_cu_memfree_func_t gpu::Ptx::_cuda_cu_memfree; 91 gpu::Ptx::cuda_cu_memfree_func_t gpu::Ptx::_cuda_cu_memfree;
81 gpu::Ptx::cuda_cu_mem_host_register_func_t gpu::Ptx::_cuda_cu_mem_host_register; 92 gpu::Ptx::cuda_cu_mem_host_register_func_t gpu::Ptx::_cuda_cu_mem_host_register;
82 gpu::Ptx::cuda_cu_mem_host_get_device_pointer_func_t gpu::Ptx::_cuda_cu_mem_host_get_device_pointer; 93 gpu::Ptx::cuda_cu_mem_host_get_device_pointer_func_t gpu::Ptx::_cuda_cu_mem_host_get_device_pointer;
83 gpu::Ptx::cuda_cu_mem_host_unregister_func_t gpu::Ptx::_cuda_cu_mem_host_unregister; 94 gpu::Ptx::cuda_cu_mem_host_unregister_func_t gpu::Ptx::_cuda_cu_mem_host_unregister;
84 95
430 oop* _pinned; // objects that have been pinned with cuMemHostRegister 441 oop* _pinned; // objects that have been pinned with cuMemHostRegister
431 int _pinned_length; // length of _pinned 442 int _pinned_length; // length of _pinned
432 gpu::Ptx::CUdeviceptr _ret_value; // pointer to slot in GPU memory holding the return value 443 gpu::Ptx::CUdeviceptr _ret_value; // pointer to slot in GPU memory holding the return value
433 int _ret_type_size; // size of the return type value 444 int _ret_type_size; // size of the return type value
434 bool _ret_is_object; // specifies if the return type is Object 445 bool _ret_is_object; // specifies if the return type is Object
435 bool _gc_locked; 446 bool _gc_locked; // denotes when execution has locked GC
436 447
437 bool check(int status, const char *action) { 448 bool check(int status, const char *action) {
438 if (status != GRAAL_CUDA_SUCCESS) { 449 if (status != GRAAL_CUDA_SUCCESS) {
439 Thread* THREAD = _thread; 450 Thread* THREAD = _thread;
440 char* message = NEW_RESOURCE_ARRAY_IN_THREAD(THREAD, char, O_BUFLEN + 1); 451 char* message = NEW_RESOURCE_ARRAY_IN_THREAD(THREAD, char, O_BUFLEN + 1);
573 if (_gc_locked) { 584 if (_gc_locked) {
574 GC_locker::unlock_critical(_thread); 585 GC_locker::unlock_critical(_thread);
575 if (TraceGPUInteraction) { 586 if (TraceGPUInteraction) {
576 tty->print_cr("[CUDA] Unlocked GC"); 587 tty->print_cr("[CUDA] Unlocked GC");
577 } 588 }
589 _gc_locked = false;
578 } 590 }
579 } 591 }
580 }; 592 };
593
594 // Prints values in the kernel arguments buffer
595 class KernelArgumentsPrinter: public SignatureIterator {
596 Method* _method;
597 address _buffer;
598 size_t _bufferOffset;
599 outputStream* _st;
600
601 private:
602
603 // Get next java argument
604 oop next_arg(BasicType expectedType);
605
606 public:
607 KernelArgumentsPrinter(Method* method, address buffer, outputStream* st) : SignatureIterator(method->signature()),
608 _method(method), _buffer(buffer), _bufferOffset(0), _st(st) {
609 if (!method->is_static()) {
610 print_oop();
611 }
612 iterate();
613 }
614
615 address next(size_t dataSz) {
616 if (is_return_type()) {
617 return _buffer;
618 }
619 if (_bufferOffset != 0) {
620 _st->print(", ");
621 }
622 _bufferOffset = align_size_up_(_bufferOffset, dataSz);
623 address result = _buffer + _bufferOffset;
624 _bufferOffset += dataSz;
625 return result;
626 }
627
628 void print_oop() {
629 oop obj = *((oop*) next(sizeof(oop)));
630 if (obj != NULL) {
631 char type[256];
632 obj->klass()->name()->as_C_string(type, 256);
633 _st->print("oop "PTR_FORMAT" (%s)", obj, type);
634 } else {
635 _st->print("oop null");
636 }
637 }
638
639 bool skip() {
640 return is_return_type();
641 }
642
643 void do_bool () { if (!skip()) _st->print("bool %d", *((jboolean*) next(sizeof(jboolean)))); }
644 void do_char () { if (!skip()) _st->print("char %c", *((jchar*) next(sizeof(jchar)))); }
645 void do_float () { if (!skip()) _st->print("float %g", *((jfloat*) next(sizeof(jfloat)))); }
646 void do_double() { if (!skip()) _st->print("double %g", *((jdouble*) next(sizeof(jdouble)))); }
647 void do_byte () { if (!skip()) _st->print("byte %d", *((jbyte*) next(sizeof(jbyte)))); }
648 void do_short () { if (!skip()) _st->print("short %d", *((jshort*) next(sizeof(jshort)))); }
649 void do_int () { if (!skip()) _st->print("int %d", *((jint*) next(sizeof(jint)))); }
650 void do_long () { if (!skip()) _st->print("long "JLONG_FORMAT, *((jlong*) next(sizeof(jlong)))); }
651 void do_void () { }
652 void do_object(int begin, int end) { if (!skip()) print_oop(); }
653 void do_array (int begin, int end) { if (!skip()) print_oop(); }
654 };
655
656 static void printKernelArguments(JavaThread* thread, address buffer) {
657 for (vframeStream vfst(thread); !vfst.at_end(); vfst.next()) {
658 Method* m = vfst.method();
659 if (m != NULL) {
660 stringStream st(O_BUFLEN);
661 st.print("[CUDA] Call: %s.%s(", m->method_holder()->name()->as_C_string(), m->name()->as_C_string());
662 KernelArgumentsPrinter kap(m, buffer, &st);
663 tty->print_cr("%s)", st.as_string());
664 return;
665 }
666 }
667 }
581 668
582 GPU_VMENTRY(jlong, gpu::Ptx::get_execute_kernel_from_vm_address, (JNIEnv *env, jclass)) 669 GPU_VMENTRY(jlong, gpu::Ptx::get_execute_kernel_from_vm_address, (JNIEnv *env, jclass))
583 return (jlong) gpu::Ptx::execute_kernel_from_vm; 670 return (jlong) gpu::Ptx::execute_kernel_from_vm;
584 GPU_END 671 GPU_END
585 672
591 jlong pinnedObjects, 678 jlong pinnedObjects,
592 int encodedReturnTypeSize)) 679 int encodedReturnTypeSize))
593 if (kernel == 0L) { 680 if (kernel == 0L) {
594 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_NullPointerException(), NULL); 681 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_NullPointerException(), NULL);
595 return 0L; 682 return 0L;
683 }
684
685 if (TraceGPUInteraction) {
686 printKernelArguments(thread, (address) buffer);
596 } 687 }
597 688
598 PtxCall call(thread, (address) buffer, bufferSize, (oop*) (address) pinnedObjects, encodedReturnTypeSize); 689 PtxCall call(thread, (address) buffer, bufferSize, (oop*) (address) pinnedObjects, encodedReturnTypeSize);
599 690
600 #define TRY(action) do { \ 691 #define TRY(action) do { \