diff src/gpu/hsail/vm/gpu_hsail.cpp @ 14969:a6c144380ce7

HSAIL: added UseHSAILDeoptimization VM option for disabling HSAIL deopt support Contributed-by: Eric Caspole <eric.caspole@amd.com>
author Doug Simon <doug.simon@oracle.com>
date Fri, 04 Apr 2014 12:22:49 +0200
parents 3e9a960f0da1
children 2cae21d9f122
line wrap: on
line diff
--- a/src/gpu/hsail/vm/gpu_hsail.cpp	Fri Apr 04 12:05:41 2014 +0200
+++ b/src/gpu/hsail/vm/gpu_hsail.cpp	Fri Apr 04 12:22:49 2014 +0200
@@ -133,10 +133,6 @@
   }
 }
 
-
-// for experimentation
-static bool useDeoptInfo = true;
-
 jboolean Hsail::execute_kernel_void_1d_internal(address kernel, int dimX, jobject args_handle, methodHandle& mh, nmethod *nm, jobject oops_save_handle, TRAPS) {
 
   ResourceMark rm(THREAD);
@@ -145,9 +141,8 @@
   // Reset the kernel arguments
   _okra_clearargs(kernel);
 
-  
   HSAILDeoptimizationInfo* e;
-  if (useDeoptInfo) {
+  if (UseHSAILDeoptimization) {
     e = new (ResourceObj::C_HEAP, mtInternal) HSAILDeoptimizationInfo();
     e->set_never_ran_array(NEW_C_HEAP_ARRAY(jboolean, dimX, mtInternal));
     memset(e->never_ran_array(), 0, dimX * sizeof(jboolean));
@@ -168,150 +163,161 @@
   }
 
   // Run the kernel
-  bool success = _okra_execute_with_range(kernel, dimX);
-  // check if any workitem requested a deopt
-  // currently we only support at most one such workitem
-
-
-  int deoptcode = e->deopt_occurred();
-  if (useDeoptInfo &&  deoptcode != 0) {
-    if (deoptcode != 1) {
-      // error condition detected in deopt code
-      char msg[200];
-      sprintf(msg, "deopt error detected, slot for workitem %d was not empty", -1*(deoptcode + 1));
-      guarantee(deoptcode == 1, msg);
-    }
-    if (TraceGPUInteraction) {
-      tty->print_cr("deopt happened.");
-      HSAILKernelDeoptimization * pdeopt = &e->_deopt_save_states[0];
-      tty->print_cr("first deopter was workitem %d", pdeopt->workitem());
-    }
+  bool success = false;
+  {
+    TraceTime t1("execute kernel", TraceGPUInteraction);
+    success = _okra_execute_with_range(kernel, dimX);
+  }
 
-    // Before handling any deopting workitems, save the pointers from
-    // the hsail frames in oops_save so they get adjusted by any
-    // GC. Need to do this before leaving thread_in_vm mode.
-    // resolve handle only needed once here (not exiting vm mode)
-    objArrayOop oopsSaveArray = (objArrayOop) JNIHandles::resolve(oops_save_handle);
+  if (UseHSAILDeoptimization) {
+    // check if any workitem requested a deopt
+    // currently we only support at most one such workitem
+    int deoptcode = e->deopt_occurred();
+    if (deoptcode != 0) {
+      if (deoptcode != 1) {
+        // error condition detected in deopt code
+        char msg[200];
+        sprintf(msg, "deopt error detected, slot for workitem %d was not empty", -1 * (deoptcode + 1));
+        guarantee(deoptcode == 1, msg);
+      }
 
-    // since slots are allocated from the beginning, we know how far to look
-    assert(e->num_deopts() < MAX_DEOPT_SAVE_STATES_SIZE, "deopt save state overflow");
-    for (int k = 0; k < e->num_deopts(); k++) {
-      HSAILKernelDeoptimization * pdeopt = &e->_deopt_save_states[k];
-      jint workitem = pdeopt->workitem();
-      if (workitem != -1) {      
-        // this is a workitem that deopted
-        HSAILFrame *hsailFrame = pdeopt->first_frame();
-        int dregOopMap = hsailFrame->dreg_oops_map();
-        for (int bit = 0; bit < 16; bit++) {
-          if ((dregOopMap & (1 << bit)) != 0) {
-            // the dregister at this bit is an oop, save it in the array
-            int index = k * 16 + bit;
-            void* saved_oop = (void*) hsailFrame->get_d_reg(bit);
-            oopsSaveArray->obj_at_put(index, (oop) saved_oop);
-          }
-        }
-      }
-    }   
+      {
+        TraceTime t3("handle deoptimizing workitems", TraceGPUInteraction);
 
-    // Handle any deopting workitems. 
-    int count_deoptimized = 0;
-    for (int k = 0; k < e->num_deopts(); k++) {
-      HSAILKernelDeoptimization * pdeopt = &e->_deopt_save_states[k];
-    
-      jint workitem = pdeopt->workitem();
-      if (workitem != -1) {      
-        int deoptId = pdeopt->pc_offset();
-        HSAILFrame *hsailFrame = pdeopt->first_frame();
+        if (TraceGPUInteraction) {
+          tty->print_cr("deopt happened.");
+          HSAILKernelDeoptimization * pdeopt = &e->_deopt_save_states[0];
+          tty->print_cr("first deopter was workitem %d", pdeopt->workitem());
+        }
 
-        // update the hsailFrame from the oopsSaveArray
-        // re-resolve the handle
-        oopsSaveArray = (objArrayOop) JNIHandles::resolve(oops_save_handle);
+        // Before handling any deopting workitems, save the pointers from
+        // the hsail frames in oops_save so they get adjusted by any
+        // GC. Need to do this before leaving thread_in_vm mode.
+        // resolve handle only needed once here (not exiting vm mode)
+        objArrayOop oopsSaveArray = (objArrayOop) JNIHandles::resolve(oops_save_handle);
 
-        int dregOopMap = hsailFrame->dreg_oops_map();
-        for (int bit = 0; bit < 16; bit++) {
-          if ((dregOopMap & (1 << bit)) != 0) {
-            // the dregister at this bit is an oop, retrieve it from array and put back in frame
-            int index = k * 16 + bit;
-            void * dregValue = (void *) oopsSaveArray->obj_at(index);
-            void * oldDregValue = (void *) hsailFrame->get_d_reg(bit);
-            assert((oldDregValue != 0 ? dregValue != 0 : dregValue == 0) , "bad dregValue retrieved");
-            if (TraceGPUInteraction) {
-              if (dregValue != oldDregValue) {
-                tty->print_cr("oop moved for $d%d, workitem %d, slot %d, old=%p, new=%p", bit, workitem, k, oldDregValue, dregValue);
+        // since slots are allocated from the beginning, we know how far to look
+        assert(e->num_deopts() < MAX_DEOPT_SAVE_STATES_SIZE, "deopt save state overflow");
+        for (int k = 0; k < e->num_deopts(); k++) {
+          HSAILKernelDeoptimization * pdeopt = &e->_deopt_save_states[k];
+          jint workitem = pdeopt->workitem();
+          if (workitem != -1) {
+            // this is a workitem that deopted
+            HSAILFrame *hsailFrame = pdeopt->first_frame();
+            int dregOopMap = hsailFrame->dreg_oops_map();
+            for (int bit = 0; bit < 16; bit++) {
+              if ((dregOopMap & (1 << bit)) != 0) {
+                // the dregister at this bit is an oop, save it in the array
+                int index = k * 16 + bit;
+                void* saved_oop = (void*) hsailFrame->get_d_reg(bit);
+                oopsSaveArray->obj_at_put(index, (oop) saved_oop);
               }
             }
-            hsailFrame->put_d_reg(bit, (jlong) dregValue);
-          }
-        }
-       
-        JavaValue result(T_VOID);
-        JavaCallArguments javaArgs;
-        javaArgs.set_alternative_target(nm);
-        javaArgs.push_int(deoptId);
-        javaArgs.push_long((jlong) hsailFrame);
-
-        // override the deoptimization action with Action_none until we decide
-        // how to handle the other actions.
-        int myActionReason = Deoptimization::make_trap_request(Deoptimization::trap_request_reason(pdeopt->reason()), Deoptimization::Action_none);
-        javaArgs.push_int(myActionReason);
-        javaArgs.push_oop((oop)NULL);
-        if (TraceGPUInteraction) {
-          int dregOopMap = hsailFrame->dreg_oops_map();
-          tty->print_cr("[HSAIL] Deoptimizing to host for workitem=%d (slot=%d) with deoptId=%d, frame=" INTPTR_FORMAT ", actionAndReason=%d, dregOopMap=%04x", workitem, k, deoptId, hsailFrame, myActionReason, dregOopMap);
-          // show the registers containing references
-          for (int bit = 0; bit < 16; bit++) {
-            if ((dregOopMap & (1 << bit)) != 0) {
-              tty->print_cr("  oop $d%d = %p", bit, hsailFrame->get_d_reg(bit));
-            }
           }
         }
-        JavaCalls::call(&result, mh, &javaArgs, THREAD);
-        count_deoptimized++;
+
+        // Handle any deopting workitems.
+        int count_deoptimized = 0;
+        for (int k = 0; k < e->num_deopts(); k++) {
+          HSAILKernelDeoptimization * pdeopt = &e->_deopt_save_states[k];
+
+          jint workitem = pdeopt->workitem();
+          if (workitem != -1) {
+            int deoptId = pdeopt->pc_offset();
+            HSAILFrame *hsailFrame = pdeopt->first_frame();
+
+            // update the hsailFrame from the oopsSaveArray
+            // re-resolve the handle
+            oopsSaveArray = (objArrayOop) JNIHandles::resolve(oops_save_handle);
+
+            int dregOopMap = hsailFrame->dreg_oops_map();
+            for (int bit = 0; bit < 16; bit++) {
+              if ((dregOopMap & (1 << bit)) != 0) {
+                // the dregister at this bit is an oop, retrieve it from array and put back in frame
+                int index = k * 16 + bit;
+                void * dregValue = (void *) oopsSaveArray->obj_at(index);
+                void * oldDregValue = (void *) hsailFrame->get_d_reg(bit);
+                assert((oldDregValue != 0 ? dregValue != 0 : dregValue == 0), "bad dregValue retrieved");
+                if (TraceGPUInteraction) {
+                  if (dregValue != oldDregValue) {
+                    tty->print_cr("oop moved for $d%d, workitem %d, slot %d, old=%p, new=%p", bit, workitem, k, oldDregValue, dregValue);
+                  }
+                }
+                hsailFrame->put_d_reg(bit, (jlong) dregValue);
+              }
+            }
+
+            JavaValue result(T_VOID);
+            JavaCallArguments javaArgs;
+            javaArgs.set_alternative_target(nm);
+            javaArgs.push_int(deoptId);
+            javaArgs.push_long((jlong) hsailFrame);
+
+            // override the deoptimization action with Action_none until we decide
+            // how to handle the other actions.
+            int myActionReason = Deoptimization::make_trap_request(Deoptimization::trap_request_reason(pdeopt->reason()), Deoptimization::Action_none);
+            javaArgs.push_int(myActionReason);
+            javaArgs.push_oop((oop) NULL);
+            if (TraceGPUInteraction) {
+              int dregOopMap = hsailFrame->dreg_oops_map();
+              tty->print_cr("[HSAIL] Deoptimizing to host for workitem=%d (slot=%d) with deoptId=%d, frame=" INTPTR_FORMAT ", actionAndReason=%d, dregOopMap=%04x", workitem, k, deoptId, hsailFrame, myActionReason, dregOopMap);
+              // show the registers containing references
+              for (int bit = 0; bit < 16; bit++) {
+                if ((dregOopMap & (1 << bit)) != 0) {
+                  tty->print_cr("  oop $d%d = %p", bit, hsailFrame->get_d_reg(bit));
+                }
+              }
+            }
+            JavaCalls::call(&result, mh, &javaArgs, THREAD);
+            count_deoptimized++;
+          }
+        }
+        if (TraceGPUInteraction) {
+          tty->print_cr("[HSAIL] Deoptimizing to host completed for %d workitems", count_deoptimized);
+        }
       }
-    }    
-    if (TraceGPUInteraction) {
-      tty->print_cr("[HSAIL] Deoptimizing to host completed for %d workitems", count_deoptimized);
+
+      {
+        TraceTime t3("handle never-rans", TraceGPUInteraction);
+
+        // Handle any never_ran workitems if there were any
+        int count_never_ran = 0;
+        bool handleNeverRansHere = true;
+        // turn off verbose trace stuff for javacall arg setup
+        bool savedTraceGPUInteraction = TraceGPUInteraction;
+        TraceGPUInteraction = false;
+        jboolean *never_ran_array = e->never_ran_array();
+        if (handleNeverRansHere) {
+          for (int k = 0; k < dimX; k++) {
+            if (never_ran_array[k]) {
+              // run it as a javaCall
+              KlassHandle methKlass = mh->method_holder();
+              Thread* THREAD = Thread::current();
+              JavaValue result(T_VOID);
+              JavaCallArguments javaArgs;
+              // re-resolve the args_handle here
+              objArrayOop resolvedArgsArray = (objArrayOop) JNIHandles::resolve(args_handle);
+              // This object sets up the javaCall arguments
+              // the way argsArray is set up, this should work for instance methods as well
+              // (the receiver will be the first oop pushed)
+              HSAILJavaCallArguments hjca(&javaArgs, k, mh->signature(), resolvedArgsArray, mh->is_static());
+              if (mh->is_static()) {
+                JavaCalls::call_static(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD);
+              } else {
+                JavaCalls::call_virtual(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD);
+              }
+              count_never_ran++;
+            }
+          }
+          TraceGPUInteraction = savedTraceGPUInteraction;
+          if (TraceGPUInteraction) {
+            tty->print_cr("%d workitems never ran, have been run via JavaCall", count_never_ran);
+            showRanges(never_ran_array, dimX);
+          }
+        } // end of never-ran handling
+      }
     }
 
-    // Handle any never_ran workitems if there were any
-    int count_never_ran = 0;
-    bool handleNeverRansHere = true;
-    // turn off verbose trace stuff for javacall arg setup
-    bool savedTraceGPUInteraction = TraceGPUInteraction;
-    TraceGPUInteraction = false;
-    jboolean *never_ran_array = e->never_ran_array();
-    if (handleNeverRansHere) {
-      for (int k = 0; k < dimX; k++) {
-        if (never_ran_array[k]) {
-          // run it as a javaCall
-          KlassHandle methKlass = mh->method_holder();
-          Thread* THREAD = Thread::current();
-          JavaValue result(T_VOID);
-          JavaCallArguments javaArgs;
-          // re-resolve the args_handle here
-          objArrayOop resolvedArgsArray = (objArrayOop) JNIHandles::resolve(args_handle);
-          // This object sets up the javaCall arguments
-          // the way argsArray is set up, this should work for instance methods as well
-          // (the receiver will be the first oop pushed)
-          HSAILJavaCallArguments hjca(&javaArgs, k, mh->signature(), resolvedArgsArray, mh->is_static());
-          if (mh->is_static()) {
-            JavaCalls::call_static(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD);
-          } else {
-            JavaCalls::call_virtual(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD);
-          }
-          count_never_ran++;
-        }
-      }
-      TraceGPUInteraction = savedTraceGPUInteraction;
-      if (TraceGPUInteraction) {
-        tty->print_cr("%d workitems never ran, have been run via JavaCall", count_never_ran);
-        showRanges(never_ran_array, dimX);
-      }
-    } // end of never-ran handling
-
-  }
-
-  if (useDeoptInfo) {
     FREE_C_HEAP_ARRAY(jboolean, e->never_ran_array(), mtInternal);
     delete e;
   }