Mercurial > hg > graal-jvmci-8
comparison src/gpu/hsail/vm/gpu_hsail.cpp @ 14969:a6c144380ce7
HSAIL: added UseHSAILDeoptimization VM option for disabling HSAIL deopt support
Contributed-by: Eric Caspole <eric.caspole@amd.com>
author | Doug Simon <doug.simon@oracle.com> |
---|---|
date | Fri, 04 Apr 2014 12:22:49 +0200 |
parents | 3e9a960f0da1 |
children | 2cae21d9f122 |
comparison
equal
deleted
inserted
replaced
14968:169caf662ac7 | 14969:a6c144380ce7 |
---|---|
131 if (lookFor == false) { | 131 if (lookFor == false) { |
132 tty->print_cr("-%d", len-1); | 132 tty->print_cr("-%d", len-1); |
133 } | 133 } |
134 } | 134 } |
135 | 135 |
136 | |
137 // for experimentation | |
138 static bool useDeoptInfo = true; | |
139 | |
140 jboolean Hsail::execute_kernel_void_1d_internal(address kernel, int dimX, jobject args_handle, methodHandle& mh, nmethod *nm, jobject oops_save_handle, TRAPS) { | 136 jboolean Hsail::execute_kernel_void_1d_internal(address kernel, int dimX, jobject args_handle, methodHandle& mh, nmethod *nm, jobject oops_save_handle, TRAPS) { |
141 | 137 |
142 ResourceMark rm(THREAD); | 138 ResourceMark rm(THREAD); |
143 objArrayOop argsArray = (objArrayOop) JNIHandles::resolve(args_handle); | 139 objArrayOop argsArray = (objArrayOop) JNIHandles::resolve(args_handle); |
144 | 140 |
145 // Reset the kernel arguments | 141 // Reset the kernel arguments |
146 _okra_clearargs(kernel); | 142 _okra_clearargs(kernel); |
147 | 143 |
148 | |
149 HSAILDeoptimizationInfo* e; | 144 HSAILDeoptimizationInfo* e; |
150 if (useDeoptInfo) { | 145 if (UseHSAILDeoptimization) { |
151 e = new (ResourceObj::C_HEAP, mtInternal) HSAILDeoptimizationInfo(); | 146 e = new (ResourceObj::C_HEAP, mtInternal) HSAILDeoptimizationInfo(); |
152 e->set_never_ran_array(NEW_C_HEAP_ARRAY(jboolean, dimX, mtInternal)); | 147 e->set_never_ran_array(NEW_C_HEAP_ARRAY(jboolean, dimX, mtInternal)); |
153 memset(e->never_ran_array(), 0, dimX * sizeof(jboolean)); | 148 memset(e->never_ran_array(), 0, dimX * sizeof(jboolean)); |
154 } | 149 } |
155 | 150 |
166 thread->set_gpu_exception_method(mh()); | 161 thread->set_gpu_exception_method(mh()); |
167 THROW_MSG_0(vmSymbols::java_lang_NullPointerException(), buf); | 162 THROW_MSG_0(vmSymbols::java_lang_NullPointerException(), buf); |
168 } | 163 } |
169 | 164 |
170 // Run the kernel | 165 // Run the kernel |
171 bool success = _okra_execute_with_range(kernel, dimX); | 166 bool success = false; |
172 // check if any workitem requested a deopt | 167 { |
173 // currently we only support at most one such workitem | 168 TraceTime t1("execute kernel", TraceGPUInteraction); |
174 | 169 success = _okra_execute_with_range(kernel, dimX); |
175 | 170 } |
176 int deoptcode = e->deopt_occurred(); | 171 |
177 if (useDeoptInfo && deoptcode != 0) { | 172 if (UseHSAILDeoptimization) { |
178 if (deoptcode != 1) { | 173 // check if any workitem requested a deopt |
179 // error condition detected in deopt code | 174 // currently we only support at most one such workitem |
180 char msg[200]; | 175 int deoptcode = e->deopt_occurred(); |
181 sprintf(msg, "deopt error detected, slot for workitem %d was not empty", -1*(deoptcode + 1)); | 176 if (deoptcode != 0) { |
182 guarantee(deoptcode == 1, msg); | 177 if (deoptcode != 1) { |
183 } | 178 // error condition detected in deopt code |
184 if (TraceGPUInteraction) { | 179 char msg[200]; |
185 tty->print_cr("deopt happened."); | 180 sprintf(msg, "deopt error detected, slot for workitem %d was not empty", -1 * (deoptcode + 1)); |
186 HSAILKernelDeoptimization * pdeopt = &e->_deopt_save_states[0]; | 181 guarantee(deoptcode == 1, msg); |
187 tty->print_cr("first deopter was workitem %d", pdeopt->workitem()); | 182 } |
188 } | 183 |
189 | 184 { |
190 // Before handling any deopting workitems, save the pointers from | 185 TraceTime t3("handle deoptimizing workitems", TraceGPUInteraction); |
191 // the hsail frames in oops_save so they get adjusted by any | 186 |
192 // GC. Need to do this before leaving thread_in_vm mode. | 187 if (TraceGPUInteraction) { |
193 // resolve handle only needed once here (not exiting vm mode) | 188 tty->print_cr("deopt happened."); |
194 objArrayOop oopsSaveArray = (objArrayOop) JNIHandles::resolve(oops_save_handle); | 189 HSAILKernelDeoptimization * pdeopt = &e->_deopt_save_states[0]; |
195 | 190 tty->print_cr("first deopter was workitem %d", pdeopt->workitem()); |
196 // since slots are allocated from the beginning, we know how far to look | |
197 assert(e->num_deopts() < MAX_DEOPT_SAVE_STATES_SIZE, "deopt save state overflow"); | |
198 for (int k = 0; k < e->num_deopts(); k++) { | |
199 HSAILKernelDeoptimization * pdeopt = &e->_deopt_save_states[k]; | |
200 jint workitem = pdeopt->workitem(); | |
201 if (workitem != -1) { | |
202 // this is a workitem that deopted | |
203 HSAILFrame *hsailFrame = pdeopt->first_frame(); | |
204 int dregOopMap = hsailFrame->dreg_oops_map(); | |
205 for (int bit = 0; bit < 16; bit++) { | |
206 if ((dregOopMap & (1 << bit)) != 0) { | |
207 // the dregister at this bit is an oop, save it in the array | |
208 int index = k * 16 + bit; | |
209 void* saved_oop = (void*) hsailFrame->get_d_reg(bit); | |
210 oopsSaveArray->obj_at_put(index, (oop) saved_oop); | |
211 } | |
212 } | 191 } |
213 } | 192 |
214 } | 193 // Before handling any deopting workitems, save the pointers from |
215 | 194 // the hsail frames in oops_save so they get adjusted by any |
216 // Handle any deopting workitems. | 195 // GC. Need to do this before leaving thread_in_vm mode. |
217 int count_deoptimized = 0; | 196 // resolve handle only needed once here (not exiting vm mode) |
218 for (int k = 0; k < e->num_deopts(); k++) { | 197 objArrayOop oopsSaveArray = (objArrayOop) JNIHandles::resolve(oops_save_handle); |
219 HSAILKernelDeoptimization * pdeopt = &e->_deopt_save_states[k]; | 198 |
220 | 199 // since slots are allocated from the beginning, we know how far to look |
221 jint workitem = pdeopt->workitem(); | 200 assert(e->num_deopts() < MAX_DEOPT_SAVE_STATES_SIZE, "deopt save state overflow"); |
222 if (workitem != -1) { | 201 for (int k = 0; k < e->num_deopts(); k++) { |
223 int deoptId = pdeopt->pc_offset(); | 202 HSAILKernelDeoptimization * pdeopt = &e->_deopt_save_states[k]; |
224 HSAILFrame *hsailFrame = pdeopt->first_frame(); | 203 jint workitem = pdeopt->workitem(); |
225 | 204 if (workitem != -1) { |
226 // update the hsailFrame from the oopsSaveArray | 205 // this is a workitem that deopted |
227 // re-resolve the handle | 206 HSAILFrame *hsailFrame = pdeopt->first_frame(); |
228 oopsSaveArray = (objArrayOop) JNIHandles::resolve(oops_save_handle); | 207 int dregOopMap = hsailFrame->dreg_oops_map(); |
229 | 208 for (int bit = 0; bit < 16; bit++) { |
230 int dregOopMap = hsailFrame->dreg_oops_map(); | 209 if ((dregOopMap & (1 << bit)) != 0) { |
231 for (int bit = 0; bit < 16; bit++) { | 210 // the dregister at this bit is an oop, save it in the array |
232 if ((dregOopMap & (1 << bit)) != 0) { | 211 int index = k * 16 + bit; |
233 // the dregister at this bit is an oop, retrieve it from array and put back in frame | 212 void* saved_oop = (void*) hsailFrame->get_d_reg(bit); |
234 int index = k * 16 + bit; | 213 oopsSaveArray->obj_at_put(index, (oop) saved_oop); |
235 void * dregValue = (void *) oopsSaveArray->obj_at(index); | |
236 void * oldDregValue = (void *) hsailFrame->get_d_reg(bit); | |
237 assert((oldDregValue != 0 ? dregValue != 0 : dregValue == 0) , "bad dregValue retrieved"); | |
238 if (TraceGPUInteraction) { | |
239 if (dregValue != oldDregValue) { | |
240 tty->print_cr("oop moved for $d%d, workitem %d, slot %d, old=%p, new=%p", bit, workitem, k, oldDregValue, dregValue); | |
241 } | 214 } |
242 } | |
243 hsailFrame->put_d_reg(bit, (jlong) dregValue); | |
244 } | |
245 } | |
246 | |
247 JavaValue result(T_VOID); | |
248 JavaCallArguments javaArgs; | |
249 javaArgs.set_alternative_target(nm); | |
250 javaArgs.push_int(deoptId); | |
251 javaArgs.push_long((jlong) hsailFrame); | |
252 | |
253 // override the deoptimization action with Action_none until we decide | |
254 // how to handle the other actions. | |
255 int myActionReason = Deoptimization::make_trap_request(Deoptimization::trap_request_reason(pdeopt->reason()), Deoptimization::Action_none); | |
256 javaArgs.push_int(myActionReason); | |
257 javaArgs.push_oop((oop)NULL); | |
258 if (TraceGPUInteraction) { | |
259 int dregOopMap = hsailFrame->dreg_oops_map(); | |
260 tty->print_cr("[HSAIL] Deoptimizing to host for workitem=%d (slot=%d) with deoptId=%d, frame=" INTPTR_FORMAT ", actionAndReason=%d, dregOopMap=%04x", workitem, k, deoptId, hsailFrame, myActionReason, dregOopMap); | |
261 // show the registers containing references | |
262 for (int bit = 0; bit < 16; bit++) { | |
263 if ((dregOopMap & (1 << bit)) != 0) { | |
264 tty->print_cr(" oop $d%d = %p", bit, hsailFrame->get_d_reg(bit)); | |
265 } | 215 } |
266 } | 216 } |
267 } | 217 } |
268 JavaCalls::call(&result, mh, &javaArgs, THREAD); | 218 |
269 count_deoptimized++; | 219 // Handle any deopting workitems. |
270 } | 220 int count_deoptimized = 0; |
271 } | 221 for (int k = 0; k < e->num_deopts(); k++) { |
272 if (TraceGPUInteraction) { | 222 HSAILKernelDeoptimization * pdeopt = &e->_deopt_save_states[k]; |
273 tty->print_cr("[HSAIL] Deoptimizing to host completed for %d workitems", count_deoptimized); | 223 |
274 } | 224 jint workitem = pdeopt->workitem(); |
275 | 225 if (workitem != -1) { |
276 // Handle any never_ran workitems if there were any | 226 int deoptId = pdeopt->pc_offset(); |
277 int count_never_ran = 0; | 227 HSAILFrame *hsailFrame = pdeopt->first_frame(); |
278 bool handleNeverRansHere = true; | 228 |
279 // turn off verbose trace stuff for javacall arg setup | 229 // update the hsailFrame from the oopsSaveArray |
280 bool savedTraceGPUInteraction = TraceGPUInteraction; | 230 // re-resolve the handle |
281 TraceGPUInteraction = false; | 231 oopsSaveArray = (objArrayOop) JNIHandles::resolve(oops_save_handle); |
282 jboolean *never_ran_array = e->never_ran_array(); | 232 |
283 if (handleNeverRansHere) { | 233 int dregOopMap = hsailFrame->dreg_oops_map(); |
284 for (int k = 0; k < dimX; k++) { | 234 for (int bit = 0; bit < 16; bit++) { |
285 if (never_ran_array[k]) { | 235 if ((dregOopMap & (1 << bit)) != 0) { |
286 // run it as a javaCall | 236 // the dregister at this bit is an oop, retrieve it from array and put back in frame |
287 KlassHandle methKlass = mh->method_holder(); | 237 int index = k * 16 + bit; |
288 Thread* THREAD = Thread::current(); | 238 void * dregValue = (void *) oopsSaveArray->obj_at(index); |
289 JavaValue result(T_VOID); | 239 void * oldDregValue = (void *) hsailFrame->get_d_reg(bit); |
290 JavaCallArguments javaArgs; | 240 assert((oldDregValue != 0 ? dregValue != 0 : dregValue == 0), "bad dregValue retrieved"); |
291 // re-resolve the args_handle here | 241 if (TraceGPUInteraction) { |
292 objArrayOop resolvedArgsArray = (objArrayOop) JNIHandles::resolve(args_handle); | 242 if (dregValue != oldDregValue) { |
293 // This object sets up the javaCall arguments | 243 tty->print_cr("oop moved for $d%d, workitem %d, slot %d, old=%p, new=%p", bit, workitem, k, oldDregValue, dregValue); |
294 // the way argsArray is set up, this should work for instance methods as well | 244 } |
295 // (the receiver will be the first oop pushed) | 245 } |
296 HSAILJavaCallArguments hjca(&javaArgs, k, mh->signature(), resolvedArgsArray, mh->is_static()); | 246 hsailFrame->put_d_reg(bit, (jlong) dregValue); |
297 if (mh->is_static()) { | 247 } |
298 JavaCalls::call_static(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD); | 248 } |
299 } else { | 249 |
300 JavaCalls::call_virtual(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD); | 250 JavaValue result(T_VOID); |
251 JavaCallArguments javaArgs; | |
252 javaArgs.set_alternative_target(nm); | |
253 javaArgs.push_int(deoptId); | |
254 javaArgs.push_long((jlong) hsailFrame); | |
255 | |
256 // override the deoptimization action with Action_none until we decide | |
257 // how to handle the other actions. | |
258 int myActionReason = Deoptimization::make_trap_request(Deoptimization::trap_request_reason(pdeopt->reason()), Deoptimization::Action_none); | |
259 javaArgs.push_int(myActionReason); | |
260 javaArgs.push_oop((oop) NULL); | |
261 if (TraceGPUInteraction) { | |
262 int dregOopMap = hsailFrame->dreg_oops_map(); | |
263 tty->print_cr("[HSAIL] Deoptimizing to host for workitem=%d (slot=%d) with deoptId=%d, frame=" INTPTR_FORMAT ", actionAndReason=%d, dregOopMap=%04x", workitem, k, deoptId, hsailFrame, myActionReason, dregOopMap); | |
264 // show the registers containing references | |
265 for (int bit = 0; bit < 16; bit++) { | |
266 if ((dregOopMap & (1 << bit)) != 0) { | |
267 tty->print_cr(" oop $d%d = %p", bit, hsailFrame->get_d_reg(bit)); | |
268 } | |
269 } | |
270 } | |
271 JavaCalls::call(&result, mh, &javaArgs, THREAD); | |
272 count_deoptimized++; | |
301 } | 273 } |
302 count_never_ran++; | 274 } |
275 if (TraceGPUInteraction) { | |
276 tty->print_cr("[HSAIL] Deoptimizing to host completed for %d workitems", count_deoptimized); | |
303 } | 277 } |
304 } | 278 } |
305 TraceGPUInteraction = savedTraceGPUInteraction; | 279 |
306 if (TraceGPUInteraction) { | 280 { |
307 tty->print_cr("%d workitems never ran, have been run via JavaCall", count_never_ran); | 281 TraceTime t3("handle never-rans", TraceGPUInteraction); |
308 showRanges(never_ran_array, dimX); | 282 |
283 // Handle any never_ran workitems if there were any | |
284 int count_never_ran = 0; | |
285 bool handleNeverRansHere = true; | |
286 // turn off verbose trace stuff for javacall arg setup | |
287 bool savedTraceGPUInteraction = TraceGPUInteraction; | |
288 TraceGPUInteraction = false; | |
289 jboolean *never_ran_array = e->never_ran_array(); | |
290 if (handleNeverRansHere) { | |
291 for (int k = 0; k < dimX; k++) { | |
292 if (never_ran_array[k]) { | |
293 // run it as a javaCall | |
294 KlassHandle methKlass = mh->method_holder(); | |
295 Thread* THREAD = Thread::current(); | |
296 JavaValue result(T_VOID); | |
297 JavaCallArguments javaArgs; | |
298 // re-resolve the args_handle here | |
299 objArrayOop resolvedArgsArray = (objArrayOop) JNIHandles::resolve(args_handle); | |
300 // This object sets up the javaCall arguments | |
301 // the way argsArray is set up, this should work for instance methods as well | |
302 // (the receiver will be the first oop pushed) | |
303 HSAILJavaCallArguments hjca(&javaArgs, k, mh->signature(), resolvedArgsArray, mh->is_static()); | |
304 if (mh->is_static()) { | |
305 JavaCalls::call_static(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD); | |
306 } else { | |
307 JavaCalls::call_virtual(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD); | |
308 } | |
309 count_never_ran++; | |
310 } | |
311 } | |
312 TraceGPUInteraction = savedTraceGPUInteraction; | |
313 if (TraceGPUInteraction) { | |
314 tty->print_cr("%d workitems never ran, have been run via JavaCall", count_never_ran); | |
315 showRanges(never_ran_array, dimX); | |
316 } | |
317 } // end of never-ran handling | |
309 } | 318 } |
310 } // end of never-ran handling | 319 } |
311 | 320 |
312 } | |
313 | |
314 if (useDeoptInfo) { | |
315 FREE_C_HEAP_ARRAY(jboolean, e->never_ran_array(), mtInternal); | 321 FREE_C_HEAP_ARRAY(jboolean, e->never_ran_array(), mtInternal); |
316 delete e; | 322 delete e; |
317 } | 323 } |
318 return success; | 324 return success; |
319 } | 325 } |