Mercurial > hg > truffle
comparison src/gpu/ptx/vm/gpu_ptx.cpp @ 13848:b97e2515d392
removed PTXKernelArguments; added mechanism for printing incoming kernel arguments
author | Doug Simon <doug.simon@oracle.com> |
---|---|
date | Fri, 31 Jan 2014 16:05:37 +0100 |
parents | 4ca607fc94e0 |
children | b3e69d1f5da0 |
comparison
equal
deleted
inserted
replaced
13846:1e72cd05b77e | 13848:b97e2515d392 |
---|---|
29 #include "utilities/ostream.hpp" | 29 #include "utilities/ostream.hpp" |
30 #include "memory/allocation.hpp" | 30 #include "memory/allocation.hpp" |
31 #include "memory/allocation.inline.hpp" | 31 #include "memory/allocation.inline.hpp" |
32 #include "memory/gcLocker.inline.hpp" | 32 #include "memory/gcLocker.inline.hpp" |
33 #include "runtime/interfaceSupport.hpp" | 33 #include "runtime/interfaceSupport.hpp" |
34 #include "runtime/vframe.hpp" | |
34 #include "graal/graalEnv.hpp" | 35 #include "graal/graalEnv.hpp" |
35 #include "graal/graalCompiler.hpp" | 36 #include "graal/graalCompiler.hpp" |
36 #include "ptxKernelArguments.hpp" | 37 |
38 #define T_BYTE_SIZE 1 | |
39 #define T_BOOLEAN_SIZE 4 | |
40 #define T_INT_BYTE_SIZE 4 | |
41 #define T_FLOAT_BYTE_SIZE 4 | |
42 #define T_DOUBLE_BYTE_SIZE 8 | |
43 #define T_LONG_BYTE_SIZE 8 | |
44 #define T_OBJECT_BYTE_SIZE sizeof(intptr_t) | |
45 #define T_ARRAY_BYTE_SIZE sizeof(intptr_t) | |
37 | 46 |
38 // Entry to GPU native method implementation that transitions current thread to '_thread_in_vm'. | 47 // Entry to GPU native method implementation that transitions current thread to '_thread_in_vm'. |
39 #define GPU_VMENTRY(result_type, name, signature) \ | 48 #define GPU_VMENTRY(result_type, name, signature) \ |
40 JNIEXPORT result_type JNICALL name signature { \ | 49 JNIEXPORT result_type JNICALL name signature { \ |
41 GRAAL_VM_ENTRY_MARK; \ | 50 GRAAL_VM_ENTRY_MARK; \ |
74 gpu::Ptx::cuda_cu_device_compute_capability_func_t gpu::Ptx::_cuda_cu_device_compute_capability; | 83 gpu::Ptx::cuda_cu_device_compute_capability_func_t gpu::Ptx::_cuda_cu_device_compute_capability; |
75 gpu::Ptx::cuda_cu_device_get_attribute_func_t gpu::Ptx::_cuda_cu_device_get_attribute; | 84 gpu::Ptx::cuda_cu_device_get_attribute_func_t gpu::Ptx::_cuda_cu_device_get_attribute; |
76 gpu::Ptx::cuda_cu_launch_kernel_func_t gpu::Ptx::_cuda_cu_launch_kernel; | 85 gpu::Ptx::cuda_cu_launch_kernel_func_t gpu::Ptx::_cuda_cu_launch_kernel; |
77 gpu::Ptx::cuda_cu_module_get_function_func_t gpu::Ptx::_cuda_cu_module_get_function; | 86 gpu::Ptx::cuda_cu_module_get_function_func_t gpu::Ptx::_cuda_cu_module_get_function; |
78 gpu::Ptx::cuda_cu_module_load_data_ex_func_t gpu::Ptx::_cuda_cu_module_load_data_ex; | 87 gpu::Ptx::cuda_cu_module_load_data_ex_func_t gpu::Ptx::_cuda_cu_module_load_data_ex; |
88 gpu::Ptx::cuda_cu_memcpy_htod_func_t gpu::Ptx::_cuda_cu_memcpy_htod; | |
79 gpu::Ptx::cuda_cu_memcpy_dtoh_func_t gpu::Ptx::_cuda_cu_memcpy_dtoh; | 89 gpu::Ptx::cuda_cu_memcpy_dtoh_func_t gpu::Ptx::_cuda_cu_memcpy_dtoh; |
90 gpu::Ptx::cuda_cu_memalloc_func_t gpu::Ptx::_cuda_cu_memalloc; | |
80 gpu::Ptx::cuda_cu_memfree_func_t gpu::Ptx::_cuda_cu_memfree; | 91 gpu::Ptx::cuda_cu_memfree_func_t gpu::Ptx::_cuda_cu_memfree; |
81 gpu::Ptx::cuda_cu_mem_host_register_func_t gpu::Ptx::_cuda_cu_mem_host_register; | 92 gpu::Ptx::cuda_cu_mem_host_register_func_t gpu::Ptx::_cuda_cu_mem_host_register; |
82 gpu::Ptx::cuda_cu_mem_host_get_device_pointer_func_t gpu::Ptx::_cuda_cu_mem_host_get_device_pointer; | 93 gpu::Ptx::cuda_cu_mem_host_get_device_pointer_func_t gpu::Ptx::_cuda_cu_mem_host_get_device_pointer; |
83 gpu::Ptx::cuda_cu_mem_host_unregister_func_t gpu::Ptx::_cuda_cu_mem_host_unregister; | 94 gpu::Ptx::cuda_cu_mem_host_unregister_func_t gpu::Ptx::_cuda_cu_mem_host_unregister; |
84 | 95 |
430 oop* _pinned; // objects that have been pinned with cuMemHostRegister | 441 oop* _pinned; // objects that have been pinned with cuMemHostRegister |
431 int _pinned_length; // length of _pinned | 442 int _pinned_length; // length of _pinned |
432 gpu::Ptx::CUdeviceptr _ret_value; // pointer to slot in GPU memory holding the return value | 443 gpu::Ptx::CUdeviceptr _ret_value; // pointer to slot in GPU memory holding the return value |
433 int _ret_type_size; // size of the return type value | 444 int _ret_type_size; // size of the return type value |
434 bool _ret_is_object; // specifies if the return type is Object | 445 bool _ret_is_object; // specifies if the return type is Object |
435 bool _gc_locked; | 446 bool _gc_locked; // denotes when execution has locked GC |
436 | 447 |
437 bool check(int status, const char *action) { | 448 bool check(int status, const char *action) { |
438 if (status != GRAAL_CUDA_SUCCESS) { | 449 if (status != GRAAL_CUDA_SUCCESS) { |
439 Thread* THREAD = _thread; | 450 Thread* THREAD = _thread; |
440 char* message = NEW_RESOURCE_ARRAY_IN_THREAD(THREAD, char, O_BUFLEN + 1); | 451 char* message = NEW_RESOURCE_ARRAY_IN_THREAD(THREAD, char, O_BUFLEN + 1); |
573 if (_gc_locked) { | 584 if (_gc_locked) { |
574 GC_locker::unlock_critical(_thread); | 585 GC_locker::unlock_critical(_thread); |
575 if (TraceGPUInteraction) { | 586 if (TraceGPUInteraction) { |
576 tty->print_cr("[CUDA] Unlocked GC"); | 587 tty->print_cr("[CUDA] Unlocked GC"); |
577 } | 588 } |
589 _gc_locked = false; | |
578 } | 590 } |
579 } | 591 } |
580 }; | 592 }; |
593 | |
594 // Prints values in the kernel arguments buffer | |
595 class KernelArgumentsPrinter: public SignatureIterator { | |
596 Method* _method; | |
597 address _buffer; | |
598 size_t _bufferOffset; | |
599 outputStream* _st; | |
600 | |
601 private: | |
602 | |
603 // Get next java argument | |
604 oop next_arg(BasicType expectedType); | |
605 | |
606 public: | |
607 KernelArgumentsPrinter(Method* method, address buffer, outputStream* st) : SignatureIterator(method->signature()), | |
608 _method(method), _buffer(buffer), _bufferOffset(0), _st(st) { | |
609 if (!method->is_static()) { | |
610 print_oop(); | |
611 } | |
612 iterate(); | |
613 } | |
614 | |
615 address next(size_t dataSz) { | |
616 if (is_return_type()) { | |
617 return _buffer; | |
618 } | |
619 if (_bufferOffset != 0) { | |
620 _st->print(", "); | |
621 } | |
622 _bufferOffset = align_size_up_(_bufferOffset, dataSz); | |
623 address result = _buffer + _bufferOffset; | |
624 _bufferOffset += dataSz; | |
625 return result; | |
626 } | |
627 | |
628 void print_oop() { | |
629 oop obj = *((oop*) next(sizeof(oop))); | |
630 if (obj != NULL) { | |
631 char type[256]; | |
632 obj->klass()->name()->as_C_string(type, 256); | |
633 _st->print("oop "PTR_FORMAT" (%s)", obj, type); | |
634 } else { | |
635 _st->print("oop null"); | |
636 } | |
637 } | |
638 | |
639 bool skip() { | |
640 return is_return_type(); | |
641 } | |
642 | |
643 void do_bool () { if (!skip()) _st->print("bool %d", *((jboolean*) next(sizeof(jboolean)))); } | |
644 void do_char () { if (!skip()) _st->print("char %c", *((jchar*) next(sizeof(jchar)))); } | |
645 void do_float () { if (!skip()) _st->print("float %g", *((jfloat*) next(sizeof(jfloat)))); } | |
646 void do_double() { if (!skip()) _st->print("double %g", *((jdouble*) next(sizeof(jdouble)))); } | |
647 void do_byte () { if (!skip()) _st->print("byte %d", *((jbyte*) next(sizeof(jbyte)))); } | |
648 void do_short () { if (!skip()) _st->print("short %d", *((jshort*) next(sizeof(jshort)))); } | |
649 void do_int () { if (!skip()) _st->print("int %d", *((jint*) next(sizeof(jint)))); } | |
650 void do_long () { if (!skip()) _st->print("long "JLONG_FORMAT, *((jlong*) next(sizeof(jlong)))); } | |
651 void do_void () { } | |
652 void do_object(int begin, int end) { if (!skip()) print_oop(); } | |
653 void do_array (int begin, int end) { if (!skip()) print_oop(); } | |
654 }; | |
655 | |
656 static void printKernelArguments(JavaThread* thread, address buffer) { | |
657 for (vframeStream vfst(thread); !vfst.at_end(); vfst.next()) { | |
658 Method* m = vfst.method(); | |
659 if (m != NULL) { | |
660 stringStream st(O_BUFLEN); | |
661 st.print("[CUDA] Call: %s.%s(", m->method_holder()->name()->as_C_string(), m->name()->as_C_string()); | |
662 KernelArgumentsPrinter kap(m, buffer, &st); | |
663 tty->print_cr("%s)", st.as_string()); | |
664 return; | |
665 } | |
666 } | |
667 } | |
581 | 668 |
582 GPU_VMENTRY(jlong, gpu::Ptx::get_execute_kernel_from_vm_address, (JNIEnv *env, jclass)) | 669 GPU_VMENTRY(jlong, gpu::Ptx::get_execute_kernel_from_vm_address, (JNIEnv *env, jclass)) |
583 return (jlong) gpu::Ptx::execute_kernel_from_vm; | 670 return (jlong) gpu::Ptx::execute_kernel_from_vm; |
584 GPU_END | 671 GPU_END |
585 | 672 |
591 jlong pinnedObjects, | 678 jlong pinnedObjects, |
592 int encodedReturnTypeSize)) | 679 int encodedReturnTypeSize)) |
593 if (kernel == 0L) { | 680 if (kernel == 0L) { |
594 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_NullPointerException(), NULL); | 681 SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_NullPointerException(), NULL); |
595 return 0L; | 682 return 0L; |
683 } | |
684 | |
685 if (TraceGPUInteraction) { | |
686 printKernelArguments(thread, (address) buffer); | |
596 } | 687 } |
597 | 688 |
598 PtxCall call(thread, (address) buffer, bufferSize, (oop*) (address) pinnedObjects, encodedReturnTypeSize); | 689 PtxCall call(thread, (address) buffer, bufferSize, (oop*) (address) pinnedObjects, encodedReturnTypeSize); |
599 | 690 |
600 #define TRY(action) do { \ | 691 #define TRY(action) do { \ |