comparison src/gpu/ptx/vm/ptxKernelArguments.cpp @ 11821:d8659ad83fcc

PTX single-threaded array store, Warp annotation
author Morris Meyer <morris.meyer@oracle.com>
date Sat, 28 Sep 2013 21:06:12 -0400
parents 91e5f927af63
children c7abc8411011
comparison
equal deleted inserted replaced
11820:aeeab846e98c 11821:d8659ad83fcc
30 gpu::Ptx::cuda_cu_memcpy_htod_func_t gpu::Ptx::_cuda_cu_memcpy_htod; 30 gpu::Ptx::cuda_cu_memcpy_htod_func_t gpu::Ptx::_cuda_cu_memcpy_htod;
31 31
32 // Get next java argument 32 // Get next java argument
33 oop PTXKernelArguments::next_arg(BasicType expectedType) { 33 oop PTXKernelArguments::next_arg(BasicType expectedType) {
34 assert(_index < _args->length(), "out of bounds"); 34 assert(_index < _args->length(), "out of bounds");
35 oop arg=((objArrayOop) (_args))->obj_at(_index++); 35
36 assert(expectedType == T_OBJECT || java_lang_boxing_object::is_instance(arg, expectedType), "arg type mismatch"); 36 oop arg = ((objArrayOop) (_args))->obj_at(_index++);
37 assert(expectedType == T_OBJECT ||
38 java_lang_boxing_object::is_instance(arg, expectedType), "arg type mismatch");
39
37 return arg; 40 return arg;
38 } 41 }
39 42
40 void PTXKernelArguments::do_int() { 43 void PTXKernelArguments::do_int() {
44 if (is_after_invocation()) {
45 return;
46 }
41 // If the parameter is a return value, 47 // If the parameter is a return value,
42 if (is_return_type()) { 48 if (is_return_type()) {
43 // Allocate device memory for T_INT return value pointer on device. Size in bytes 49 // Allocate device memory for T_INT return value pointer on device. Size in bytes
44 int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_INT_BYTE_SIZE); 50 int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_INT_BYTE_SIZE);
45 if (status != GRAAL_CUDA_SUCCESS) { 51 if (status != GRAAL_CUDA_SUCCESS) {
48 return; 54 return;
49 } 55 }
50 // Push _return_value_ptr to _kernelBuffer 56 // Push _return_value_ptr to _kernelBuffer
51 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; 57 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
52 _bufferOffset += sizeof(_return_value_ptr); 58 _bufferOffset += sizeof(_return_value_ptr);
53 } 59 } else {
54 else {
55 // Get the next java argument and its value which should be a T_INT 60 // Get the next java argument and its value which should be a T_INT
56 oop arg = next_arg(T_INT); 61 oop arg = next_arg(T_INT);
57 // Copy the java argument value to kernelArgBuffer 62 // Copy the java argument value to kernelArgBuffer
58 jvalue intval; 63 jvalue intval;
59 if (java_lang_boxing_object::get_value(arg, &intval) != T_INT) { 64 if (java_lang_boxing_object::get_value(arg, &intval) != T_INT) {
65 _bufferOffset += sizeof(intval.i); 70 _bufferOffset += sizeof(intval.i);
66 } 71 }
67 return; 72 return;
68 } 73 }
69 74
70 void PTXKernelArguments::do_long() { 75 void PTXKernelArguments::do_long() {
76 if (is_after_invocation()) {
77 return;
78 }
71 // If the parameter is a return value, 79 // If the parameter is a return value,
72 if (is_return_type()) { 80 if (is_return_type()) {
73 // Allocate device memory for T_LONG return value pointer on device. Size in bytes 81 // Allocate device memory for T_LONG return value pointer on device. Size in bytes
74 int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_LONG_BYTE_SIZE); 82 int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_LONG_BYTE_SIZE);
75 if (status != GRAAL_CUDA_SUCCESS) { 83 if (status != GRAAL_CUDA_SUCCESS) {
78 return; 86 return;
79 } 87 }
80 // Push _return_value_ptr to _kernelBuffer 88 // Push _return_value_ptr to _kernelBuffer
81 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; 89 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
82 _bufferOffset += sizeof(_return_value_ptr); 90 _bufferOffset += sizeof(_return_value_ptr);
83 } 91 } else {
84 else {
85 // Get the next java argument and its value which should be a T_LONG 92 // Get the next java argument and its value which should be a T_LONG
86 oop arg = next_arg(T_LONG); 93 oop arg = next_arg(T_LONG);
87 // Copy the java argument value to kernelArgBuffer 94 // Copy the java argument value to kernelArgBuffer
88 jvalue val; 95 jvalue val;
89 if (java_lang_boxing_object::get_value(arg, &val) != T_LONG) { 96 if (java_lang_boxing_object::get_value(arg, &val) != T_LONG) {
95 _bufferOffset += sizeof(val.j); 102 _bufferOffset += sizeof(val.j);
96 } 103 }
97 return; 104 return;
98 } 105 }
99 106
100 void PTXKernelArguments::do_byte() { 107 void PTXKernelArguments::do_byte() {
101 // If the parameter is a return value, 108 if (is_after_invocation()) {
102 if (is_return_type()) { 109 return;
103 // Allocate device memory for T_BYTE return value pointer on device. Size in bytes 110 }
104 int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_BYTE_SIZE); 111 // If the parameter is a return value,
112 if (is_return_type()) {
113 // Allocate device memory for T_BYTE return value pointer on device. Size in bytes
114 int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_BYTE_SIZE);
115 if (status != GRAAL_CUDA_SUCCESS) {
116 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
117 _success = false;
118 return;
119 }
120 // Push _return_value_ptr to _kernelBuffer
121 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
122 _bufferOffset += sizeof(_return_value_ptr);
123 } else {
124 // Get the next java argument and its value which should be a T_BYTE
125 oop arg = next_arg(T_BYTE);
126 // Copy the java argument value to kernelArgBuffer
127 jvalue val;
128 if (java_lang_boxing_object::get_value(arg, &val) != T_BYTE) {
129 tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE");
130 _success = false;
131 return;
132 }
133 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.b;
134 _bufferOffset += sizeof(val.b);
135 }
136 return;
137 }
138
139 void PTXKernelArguments::do_array(int begin, int end) {
140 gpu::Ptx::CUdeviceptr _array_ptr;
141 int status;
142
143 // Get the next java argument and its value which should be a T_ARRAY
144 oop arg = next_arg(T_OBJECT);
145 int array_size = arg->size() * HeapWordSize;
146
147 if (is_after_invocation()) {
148 _array_ptr = *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]);
149 status = gpu::Ptx::_cuda_cu_memcpy_dtoh(arg, _array_ptr, array_size);
150 if (status != GRAAL_CUDA_SUCCESS) {
151 tty->print_cr("[CUDA] *** Error (%d) Failed to copy array argument to host", status);
152 _success = false;
153 return;
154 } else {
155 // tty->print_cr("device: %x host: %x size: %d", _array_ptr, arg, array_size);
156 }
157 return;
158 }
159 // Allocate device memory for T_ARRAY return value pointer on device. Size in bytes
160 status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, array_size);
105 if (status != GRAAL_CUDA_SUCCESS) { 161 if (status != GRAAL_CUDA_SUCCESS) {
106 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); 162 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
107 _success = false; 163 _success = false;
108 return; 164 return;
165 }
166 status = gpu::Ptx::_cuda_cu_memcpy_htod(_return_value_ptr, arg, array_size);
167 if (status != GRAAL_CUDA_SUCCESS) {
168 tty->print_cr("[CUDA] *** Error (%d) Failed to copy array to device argument", status);
169 _success = false;
170 return;
171 } else {
172 // tty->print_cr("host: %x device: %x size: %d", arg, _return_value_ptr, array_size);
109 } 173 }
110 // Push _return_value_ptr to _kernelBuffer 174 // Push _return_value_ptr to _kernelBuffer
111 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; 175 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
112 _bufferOffset += sizeof(_return_value_ptr); 176 _bufferOffset += sizeof(_return_value_ptr);
113 } 177 return;
114 else { 178 }
115 // Get the next java argument and its value which should be a T_BYTE 179
116 oop arg = next_arg(T_BYTE); 180 void PTXKernelArguments::do_void() {
117 // Copy the java argument value to kernelArgBuffer 181 return;
118 jvalue val;
119 if (java_lang_boxing_object::get_value(arg, &val) != T_BYTE) {
120 tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE");
121 _success = false;
122 return;
123 }
124 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.b;
125 _bufferOffset += sizeof(val.b);
126 }
127 return;
128 } 182 }
129 183
130 // TODO implement other do_* 184 // TODO implement other do_*