changeset 8691:571076d3c79d

8009120: Fuzz instruction scheduling in HotSpot compilers Reviewed-by: kvn, vlivanov
author shade
date Tue, 05 Mar 2013 04:24:50 -0800
parents bf06968a8a00
children 4f553e24b3b5
files src/share/vm/opto/c2_globals.hpp src/share/vm/opto/compile.cpp src/share/vm/opto/compile.hpp src/share/vm/opto/gcm.cpp src/share/vm/opto/lcm.cpp
diffstat 5 files changed, 61 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/src/share/vm/opto/c2_globals.hpp	Mon Mar 04 13:15:01 2013 -0800
+++ b/src/share/vm/opto/c2_globals.hpp	Tue Mar 05 04:24:50 2013 -0800
@@ -54,6 +54,12 @@
 
 #define C2_FLAGS(develop, develop_pd, product, product_pd, diagnostic, experimental, notproduct) \
                                                                             \
+  develop(bool, StressLCM, false,                                           \
+          "Randomize instruction scheduling in LCM")                        \
+                                                                            \
+  develop(bool, StressGCM, false,                                           \
+          "Randomize instruction scheduling in GCM")                        \
+                                                                            \
   notproduct(intx, CompileZapFirst, 0,                                      \
           "If +ZapDeadCompiledLocals, "                                     \
           "skip this many before compiling in zap calls")                   \
--- a/src/share/vm/opto/compile.cpp	Mon Mar 04 13:15:01 2013 -0800
+++ b/src/share/vm/opto/compile.cpp	Tue Mar 05 04:24:50 2013 -0800
@@ -3669,3 +3669,38 @@
     n->set_req(0, NULL);
   }
 }
+
+// Auxiliary method to support randomized stressing/fuzzing.
+//
+// This method can be called the arbitrary number of times, with current count
+// as the argument. The logic allows selecting a single candidate from the
+// running list of candidates as follows:
+//    int count = 0;
+//    Cand* selected = null;
+//    while(cand = cand->next()) {
+//      if (randomized_select(++count)) {
+//        selected = cand;
+//      }
+//    }
+//
+// Including count equalizes the chances any candidate is "selected".
+// This is useful when we don't have the complete list of candidates to choose
+// from uniformly. In this case, we need to adjust the randomicity of the
+// selection, or else we will end up biasing the selection towards the latter
+// candidates.
+//
+// Quick back-envelope calculation shows that for the list of n candidates
+// the equal probability for the candidate to persist as "best" can be
+// achieved by replacing it with "next" k-th candidate with the probability
+// of 1/k. It can be easily shown that by the end of the run, the
+// probability for any candidate is converged to 1/n, thus giving the
+// uniform distribution among all the candidates.
+//
+// We don't care about the domain size as long as (RANDOMIZED_DOMAIN / count) is large.
+#define RANDOMIZED_DOMAIN_POW 29
+#define RANDOMIZED_DOMAIN (1 << RANDOMIZED_DOMAIN_POW)
+#define RANDOMIZED_DOMAIN_MASK ((1 << (RANDOMIZED_DOMAIN_POW + 1)) - 1)
+bool Compile::randomized_select(int count) {
+  assert(count > 0, "only positive");
+  return (os::random() & RANDOMIZED_DOMAIN_MASK) < (RANDOMIZED_DOMAIN / count);
+}
--- a/src/share/vm/opto/compile.hpp	Mon Mar 04 13:15:01 2013 -0800
+++ b/src/share/vm/opto/compile.hpp	Tue Mar 05 04:24:50 2013 -0800
@@ -1086,6 +1086,9 @@
 
   // Definitions of pd methods
   static void pd_compiler2_init();
+
+  // Auxiliary method for randomized fuzzing/stressing
+  static bool randomized_select(int count);
 };
 
 #endif // SHARE_VM_OPTO_COMPILE_HPP
--- a/src/share/vm/opto/gcm.cpp	Mon Mar 04 13:15:01 2013 -0800
+++ b/src/share/vm/opto/gcm.cpp	Tue Mar 05 04:24:50 2013 -0800
@@ -1046,6 +1046,8 @@
   }
 #endif
 
+  int cand_cnt = 0;  // number of candidates tried
+
   // Walk up the dominator tree from LCA (Lowest common ancestor) to
   // the earliest legal location.  Capture the least execution frequency.
   while (LCA != early) {
@@ -1071,8 +1073,11 @@
         LCA->_pre_order, LCA->_nodes[0]->_idx, start_lat, end_idx, end_lat, LCA_freq);
     }
 #endif
+    cand_cnt++;
     if (LCA_freq < least_freq              || // Better Frequency
-        ( !in_latency                   &&    // No block containing latency
+        (StressGCM && Compile::randomized_select(cand_cnt)) || // Should be randomly accepted in stress mode
+         (!StressGCM                    &&    // Otherwise, choose with latency
+          !in_latency                   &&    // No block containing latency
           LCA_freq < least_freq * delta &&    // No worse frequency
           target >= end_lat             &&    // within latency range
           !self->is_iteratively_computed() )  // But don't hoist IV increments
@@ -1210,7 +1215,8 @@
     }
 
     // If there is no opportunity to hoist, then we're done.
-    bool try_to_hoist = (LCA != early);
+    // In stress mode, try to hoist even the single operations.
+    bool try_to_hoist = StressGCM || (LCA != early);
 
     // Must clone guys stay next to use; no hoisting allowed.
     // Also cannot hoist guys that alter memory or are otherwise not
--- a/src/share/vm/opto/lcm.cpp	Mon Mar 04 13:15:01 2013 -0800
+++ b/src/share/vm/opto/lcm.cpp	Tue Mar 05 04:24:50 2013 -0800
@@ -421,6 +421,7 @@
   uint latency = 0; // Bigger is scheduled first
   uint score   = 0; // Bigger is better
   int idx = -1;     // Index in worklist
+  int cand_cnt = 0; // Candidate count
 
   for( uint i=0; i<cnt; i++ ) { // Inspect entire worklist
     // Order in worklist is used to break ties.
@@ -503,11 +504,14 @@
     uint n_score   = n->req();   // Many inputs get high score to break ties
 
     // Keep best latency found
-    if( choice < n_choice ||
-        ( choice == n_choice &&
-          ( latency < n_latency ||
-            ( latency == n_latency &&
-              ( score < n_score ))))) {
+    cand_cnt++;
+    if (choice < n_choice ||
+        (choice == n_choice &&
+         ((StressLCM && Compile::randomized_select(cand_cnt)) ||
+          (!StressLCM &&
+           (latency < n_latency ||
+            (latency == n_latency &&
+             (score < n_score))))))) {
       choice  = n_choice;
       latency = n_latency;
       score   = n_score;