Mercurial > hg > graal-jvmci-8
comparison src/gpu/ptx/vm/ptxKernelArguments.cpp @ 11821:d8659ad83fcc
PTX single-threaded array store, Warp annotation
author | Morris Meyer <morris.meyer@oracle.com> |
---|---|
date | Sat, 28 Sep 2013 21:06:12 -0400 |
parents | 91e5f927af63 |
children | c7abc8411011 |
comparison
equal
deleted
inserted
replaced
11820:aeeab846e98c | 11821:d8659ad83fcc |
---|---|
30 gpu::Ptx::cuda_cu_memcpy_htod_func_t gpu::Ptx::_cuda_cu_memcpy_htod; | 30 gpu::Ptx::cuda_cu_memcpy_htod_func_t gpu::Ptx::_cuda_cu_memcpy_htod; |
31 | 31 |
32 // Get next java argument | 32 // Get next java argument |
33 oop PTXKernelArguments::next_arg(BasicType expectedType) { | 33 oop PTXKernelArguments::next_arg(BasicType expectedType) { |
34 assert(_index < _args->length(), "out of bounds"); | 34 assert(_index < _args->length(), "out of bounds"); |
35 oop arg=((objArrayOop) (_args))->obj_at(_index++); | 35 |
36 assert(expectedType == T_OBJECT || java_lang_boxing_object::is_instance(arg, expectedType), "arg type mismatch"); | 36 oop arg = ((objArrayOop) (_args))->obj_at(_index++); |
37 assert(expectedType == T_OBJECT || | |
38 java_lang_boxing_object::is_instance(arg, expectedType), "arg type mismatch"); | |
39 | |
37 return arg; | 40 return arg; |
38 } | 41 } |
39 | 42 |
40 void PTXKernelArguments::do_int() { | 43 void PTXKernelArguments::do_int() { |
44 if (is_after_invocation()) { | |
45 return; | |
46 } | |
41 // If the parameter is a return value, | 47 // If the parameter is a return value, |
42 if (is_return_type()) { | 48 if (is_return_type()) { |
43 // Allocate device memory for T_INT return value pointer on device. Size in bytes | 49 // Allocate device memory for T_INT return value pointer on device. Size in bytes |
44 int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_INT_BYTE_SIZE); | 50 int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_INT_BYTE_SIZE); |
45 if (status != GRAAL_CUDA_SUCCESS) { | 51 if (status != GRAAL_CUDA_SUCCESS) { |
48 return; | 54 return; |
49 } | 55 } |
50 // Push _return_value_ptr to _kernelBuffer | 56 // Push _return_value_ptr to _kernelBuffer |
51 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; | 57 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; |
52 _bufferOffset += sizeof(_return_value_ptr); | 58 _bufferOffset += sizeof(_return_value_ptr); |
53 } | 59 } else { |
54 else { | |
55 // Get the next java argument and its value which should be a T_INT | 60 // Get the next java argument and its value which should be a T_INT |
56 oop arg = next_arg(T_INT); | 61 oop arg = next_arg(T_INT); |
57 // Copy the java argument value to kernelArgBuffer | 62 // Copy the java argument value to kernelArgBuffer |
58 jvalue intval; | 63 jvalue intval; |
59 if (java_lang_boxing_object::get_value(arg, &intval) != T_INT) { | 64 if (java_lang_boxing_object::get_value(arg, &intval) != T_INT) { |
65 _bufferOffset += sizeof(intval.i); | 70 _bufferOffset += sizeof(intval.i); |
66 } | 71 } |
67 return; | 72 return; |
68 } | 73 } |
69 | 74 |
70 void PTXKernelArguments::do_long() { | 75 void PTXKernelArguments::do_long() { |
76 if (is_after_invocation()) { | |
77 return; | |
78 } | |
71 // If the parameter is a return value, | 79 // If the parameter is a return value, |
72 if (is_return_type()) { | 80 if (is_return_type()) { |
73 // Allocate device memory for T_LONG return value pointer on device. Size in bytes | 81 // Allocate device memory for T_LONG return value pointer on device. Size in bytes |
74 int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_LONG_BYTE_SIZE); | 82 int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_LONG_BYTE_SIZE); |
75 if (status != GRAAL_CUDA_SUCCESS) { | 83 if (status != GRAAL_CUDA_SUCCESS) { |
78 return; | 86 return; |
79 } | 87 } |
80 // Push _return_value_ptr to _kernelBuffer | 88 // Push _return_value_ptr to _kernelBuffer |
81 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; | 89 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; |
82 _bufferOffset += sizeof(_return_value_ptr); | 90 _bufferOffset += sizeof(_return_value_ptr); |
83 } | 91 } else { |
84 else { | |
85 // Get the next java argument and its value which should be a T_LONG | 92 // Get the next java argument and its value which should be a T_LONG |
86 oop arg = next_arg(T_LONG); | 93 oop arg = next_arg(T_LONG); |
87 // Copy the java argument value to kernelArgBuffer | 94 // Copy the java argument value to kernelArgBuffer |
88 jvalue val; | 95 jvalue val; |
89 if (java_lang_boxing_object::get_value(arg, &val) != T_LONG) { | 96 if (java_lang_boxing_object::get_value(arg, &val) != T_LONG) { |
95 _bufferOffset += sizeof(val.j); | 102 _bufferOffset += sizeof(val.j); |
96 } | 103 } |
97 return; | 104 return; |
98 } | 105 } |
99 | 106 |
100 void PTXKernelArguments::do_byte() { | 107 void PTXKernelArguments::do_byte() { |
101 // If the parameter is a return value, | 108 if (is_after_invocation()) { |
102 if (is_return_type()) { | 109 return; |
103 // Allocate device memory for T_BYTE return value pointer on device. Size in bytes | 110 } |
104 int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_BYTE_SIZE); | 111 // If the parameter is a return value, |
112 if (is_return_type()) { | |
113 // Allocate device memory for T_BYTE return value pointer on device. Size in bytes | |
114 int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_BYTE_SIZE); | |
115 if (status != GRAAL_CUDA_SUCCESS) { | |
116 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); | |
117 _success = false; | |
118 return; | |
119 } | |
120 // Push _return_value_ptr to _kernelBuffer | |
121 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; | |
122 _bufferOffset += sizeof(_return_value_ptr); | |
123 } else { | |
124 // Get the next java argument and its value which should be a T_BYTE | |
125 oop arg = next_arg(T_BYTE); | |
126 // Copy the java argument value to kernelArgBuffer | |
127 jvalue val; | |
128 if (java_lang_boxing_object::get_value(arg, &val) != T_BYTE) { | |
129 tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE"); | |
130 _success = false; | |
131 return; | |
132 } | |
133 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.b; | |
134 _bufferOffset += sizeof(val.b); | |
135 } | |
136 return; | |
137 } | |
138 | |
139 void PTXKernelArguments::do_array(int begin, int end) { | |
140 gpu::Ptx::CUdeviceptr _array_ptr; | |
141 int status; | |
142 | |
143 // Get the next java argument and its value which should be a T_ARRAY | |
144 oop arg = next_arg(T_OBJECT); | |
145 int array_size = arg->size() * HeapWordSize; | |
146 | |
147 if (is_after_invocation()) { | |
148 _array_ptr = *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]); | |
149 status = gpu::Ptx::_cuda_cu_memcpy_dtoh(arg, _array_ptr, array_size); | |
150 if (status != GRAAL_CUDA_SUCCESS) { | |
151 tty->print_cr("[CUDA] *** Error (%d) Failed to copy array argument to host", status); | |
152 _success = false; | |
153 return; | |
154 } else { | |
155 // tty->print_cr("device: %x host: %x size: %d", _array_ptr, arg, array_size); | |
156 } | |
157 return; | |
158 } | |
159 // Allocate device memory for T_ARRAY return value pointer on device. Size in bytes | |
160 status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, array_size); | |
105 if (status != GRAAL_CUDA_SUCCESS) { | 161 if (status != GRAAL_CUDA_SUCCESS) { |
106 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); | 162 tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); |
107 _success = false; | 163 _success = false; |
108 return; | 164 return; |
165 } | |
166 status = gpu::Ptx::_cuda_cu_memcpy_htod(_return_value_ptr, arg, array_size); | |
167 if (status != GRAAL_CUDA_SUCCESS) { | |
168 tty->print_cr("[CUDA] *** Error (%d) Failed to copy array to device argument", status); | |
169 _success = false; | |
170 return; | |
171 } else { | |
172 // tty->print_cr("host: %x device: %x size: %d", arg, _return_value_ptr, array_size); | |
109 } | 173 } |
110 // Push _return_value_ptr to _kernelBuffer | 174 // Push _return_value_ptr to _kernelBuffer |
111 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; | 175 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; |
112 _bufferOffset += sizeof(_return_value_ptr); | 176 _bufferOffset += sizeof(_return_value_ptr); |
113 } | 177 return; |
114 else { | 178 } |
115 // Get the next java argument and its value which should be a T_BYTE | 179 |
116 oop arg = next_arg(T_BYTE); | 180 void PTXKernelArguments::do_void() { |
117 // Copy the java argument value to kernelArgBuffer | 181 return; |
118 jvalue val; | |
119 if (java_lang_boxing_object::get_value(arg, &val) != T_BYTE) { | |
120 tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE"); | |
121 _success = false; | |
122 return; | |
123 } | |
124 *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.b; | |
125 _bufferOffset += sizeof(val.b); | |
126 } | |
127 return; | |
128 } | 182 } |
129 | 183 |
130 // TODO implement other do_* | 184 // TODO implement other do_* |