Mercurial > hg > truffle

--- a/src/share/vm/ci/ciObjectFactory.cpp	Fri Aug 14 15:53:46 2009 -0700
+++ b/src/share/vm/ci/ciObjectFactory.cpp	Fri Aug 14 15:55:26 2009 -0700
@@ -219,24 +219,27 @@
   ASSERT_IN_VM;

 #ifdef ASSERT
-  oop last = NULL;
-  for (int j = 0; j< _ci_objects->length(); j++) {
-    oop o = _ci_objects->at(j)->get_oop();
-    assert(last < o, "out of order");
-    last = o;
+  if (CIObjectFactoryVerify) {
+    oop last = NULL;
+    for (int j = 0; j< _ci_objects->length(); j++) {
+      oop o = _ci_objects->at(j)->get_oop();
+      assert(last < o, "out of order");
+      last = o;
+    }
   }
 #endif // ASSERT
   int len = _ci_objects->length();
   int index = find(key, _ci_objects);
 #ifdef ASSERT
-  for (int i=0; i<_ci_objects->length(); i++) {
-    if (_ci_objects->at(i)->get_oop() == key) {
-      assert(index == i, " bad lookup");
+  if (CIObjectFactoryVerify) {
+    for (int i=0; i<_ci_objects->length(); i++) {
+      if (_ci_objects->at(i)->get_oop() == key) {
+        assert(index == i, " bad lookup");
+      }
     }
   }
 #endif
   if (!is_found_at(index, key, _ci_objects)) {
-
     // Check in the non-perm area before putting it in the list.
     NonPermObject* &bucket = find_non_perm(key);
     if (bucket != NULL) {
@@ -539,11 +542,13 @@
     objects->at_put(index, obj);
   }
 #ifdef ASSERT
-  oop last = NULL;
-  for (int j = 0; j< objects->length(); j++) {
-    oop o = objects->at(j)->get_oop();
-    assert(last < o, "out of order");
-    last = o;
+  if (CIObjectFactoryVerify) {
+    oop last = NULL;
+    for (int j = 0; j< objects->length(); j++) {
+      oop o = objects->at(j)->get_oop();
+      assert(last < o, "out of order");
+      last = o;
+    }
   }
 #endif // ASSERT
 }
--- a/src/share/vm/opto/compile.cpp	Fri Aug 14 15:53:46 2009 -0700
+++ b/src/share/vm/opto/compile.cpp	Fri Aug 14 15:55:26 2009 -0700
@@ -1545,7 +1545,7 @@
   if((loop_opts_cnt > 0) && (has_loops() || has_split_ifs())) {
     {
       TracePhase t2("idealLoop", &_t_idealLoop, true);
-      PhaseIdealLoop ideal_loop( igvn, NULL, true );
+      PhaseIdealLoop ideal_loop( igvn, true );
       loop_opts_cnt--;
       if (major_progress()) print_method("PhaseIdealLoop 1", 2);
       if (failing())  return;
@@ -1553,7 +1553,7 @@
     // Loop opts pass if partial peeling occurred in previous pass
     if(PartialPeelLoop && major_progress() && (loop_opts_cnt > 0)) {
       TracePhase t3("idealLoop", &_t_idealLoop, true);
-      PhaseIdealLoop ideal_loop( igvn, NULL, false );
+      PhaseIdealLoop ideal_loop( igvn, false );
       loop_opts_cnt--;
       if (major_progress()) print_method("PhaseIdealLoop 2", 2);
       if (failing())  return;
@@ -1561,10 +1561,15 @@
     // Loop opts pass for loop-unrolling before CCP
     if(major_progress() && (loop_opts_cnt > 0)) {
       TracePhase t4("idealLoop", &_t_idealLoop, true);
-      PhaseIdealLoop ideal_loop( igvn, NULL, false );
+      PhaseIdealLoop ideal_loop( igvn, false );
       loop_opts_cnt--;
       if (major_progress()) print_method("PhaseIdealLoop 3", 2);
     }
+    if (!failing()) {
+      // Verify that last round of loop opts produced a valid graph
+      NOT_PRODUCT( TracePhase t2("idealLoopVerify", &_t_idealLoopVerify, TimeCompiler); )
+      PhaseIdealLoop::verify(igvn);
+    }
   }
   if (failing())  return;

@@ -1597,12 +1602,20 @@
     while(major_progress() && (loop_opts_cnt > 0)) {
       TracePhase t2("idealLoop", &_t_idealLoop, true);
       assert( cnt++ < 40, "infinite cycle in loop optimization" );
-      PhaseIdealLoop ideal_loop( igvn, NULL, true );
+      PhaseIdealLoop ideal_loop( igvn, true );
       loop_opts_cnt--;
       if (major_progress()) print_method("PhaseIdealLoop iterations", 2);
       if (failing())  return;
     }
   }
+
+  {
+    // Verify that all previous optimizations produced a valid graph
+    // at least to this point, even if no loop optimizations were done.
+    NOT_PRODUCT( TracePhase t2("idealLoopVerify", &_t_idealLoopVerify, TimeCompiler); )
+    PhaseIdealLoop::verify(igvn);
+  }
+
   {
     NOT_PRODUCT( TracePhase t2("macroExpand", &_t_macroExpand, TimeCompiler); )
     PhaseMacroExpand  mex(igvn);
--- a/src/share/vm/opto/domgraph.cpp	Fri Aug 14 15:53:46 2009 -0700
+++ b/src/share/vm/opto/domgraph.cpp	Fri Aug 14 15:55:26 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -396,7 +396,7 @@
 // nodes (using the is_CFG() call) and places them in a dominator tree.  Thus,
 // it needs a count of the CFG nodes for the mapping table. This is the
 // Lengauer & Tarjan O(E-alpha(E,V)) algorithm.
-void PhaseIdealLoop::Dominators( ) {
+void PhaseIdealLoop::Dominators() {
   ResourceMark rm;
   // Setup mappings from my Graph to Tarjan's stuff and back
   // Note: Tarjan uses 1-based arrays
@@ -454,7 +454,7 @@
     // flow into the main graph (and hence into ROOT) but are not reachable
     // from above.  Such code is dead, but requires a global pass to detect
     // it; this global pass was the 'build_loop_tree' pass run just prior.
-    if( whead->is_Region() ) {
+    if( !_verify_only && whead->is_Region() ) {
       for( uint i = 1; i < whead->req(); i++ ) {
         if (!has_node(whead->in(i))) {
           // Kill dead input path
--- a/src/share/vm/opto/loopnode.cpp	Fri Aug 14 15:53:46 2009 -0700
+++ b/src/share/vm/opto/loopnode.cpp	Fri Aug 14 15:55:26 2009 -0700
@@ -1420,13 +1420,12 @@
 }

 //=============================================================================
-//------------------------------PhaseIdealLoop---------------------------------
+//----------------------------build_and_optimize-------------------------------
 // Create a PhaseLoop.  Build the ideal Loop tree.  Map each Ideal Node to
 // its corresponding LoopNode.  If 'optimize' is true, do some loop cleanups.
-PhaseIdealLoop::PhaseIdealLoop( PhaseIterGVN &igvn, const PhaseIdealLoop *verify_me, bool do_split_ifs )
-  : PhaseTransform(Ideal_Loop),
-    _igvn(igvn),
-    _dom_lca_tags(C->comp_arena()) {
+void PhaseIdealLoop::build_and_optimize(bool do_split_ifs) {
+  int old_progress = C->major_progress();
+
   // Reset major-progress flag for the driver's heuristics
   C->clear_major_progress();

@@ -1465,18 +1464,20 @@
   }

   // No loops after all
-  if( !_ltree_root->_child ) C->set_has_loops(false);
+  if( !_ltree_root->_child && !_verify_only ) C->set_has_loops(false);

   // There should always be an outer loop containing the Root and Return nodes.
   // If not, we have a degenerate empty program.  Bail out in this case.
   if (!has_node(C->root())) {
-    C->clear_major_progress();
-    C->record_method_not_compilable("empty program detected during loop optimization");
+    if (!_verify_only) {
+      C->clear_major_progress();
+      C->record_method_not_compilable("empty program detected during loop optimization");
+    }
     return;
   }

   // Nothing to do, so get out
-  if( !C->has_loops() && !do_split_ifs && !verify_me) {
+  if( !C->has_loops() && !do_split_ifs && !_verify_me && !_verify_only ) {
     _igvn.optimize();           // Cleanup NeverBranches
     return;
   }
@@ -1486,7 +1487,7 @@

   // Split shared headers and insert loop landing pads.
   // Do not bother doing this on the Root loop of course.
-  if( !verify_me && _ltree_root->_child ) {
+  if( !_verify_me && !_verify_only && _ltree_root->_child ) {
     if( _ltree_root->_child->beautify_loops( this ) ) {
       // Re-build loop tree!
       _ltree_root->_child = NULL;
@@ -1515,24 +1516,26 @@

   Dominators();

-  // As a side effect, Dominators removed any unreachable CFG paths
-  // into RegionNodes.  It doesn't do this test against Root, so
-  // we do it here.
-  for( uint i = 1; i < C->root()->req(); i++ ) {
-    if( !_nodes[C->root()->in(i)->_idx] ) {    // Dead path into Root?
-      _igvn.hash_delete(C->root());
-      C->root()->del_req(i);
-      _igvn._worklist.push(C->root());
-      i--;                      // Rerun same iteration on compressed edges
+  if (!_verify_only) {
+    // As a side effect, Dominators removed any unreachable CFG paths
+    // into RegionNodes.  It doesn't do this test against Root, so
+    // we do it here.
+    for( uint i = 1; i < C->root()->req(); i++ ) {
+      if( !_nodes[C->root()->in(i)->_idx] ) {    // Dead path into Root?
+        _igvn.hash_delete(C->root());
+        C->root()->del_req(i);
+        _igvn._worklist.push(C->root());
+        i--;                      // Rerun same iteration on compressed edges
+      }
     }
+
+    // Given dominators, try to find inner loops with calls that must
+    // always be executed (call dominates loop tail).  These loops do
+    // not need a separate safepoint.
+    Node_List cisstack(a);
+    _ltree_root->check_safepts(visited, cisstack);
   }

-  // Given dominators, try to find inner loops with calls that must
-  // always be executed (call dominates loop tail).  These loops do
-  // not need a separate safepoint.
-  Node_List cisstack(a);
-  _ltree_root->check_safepts(visited, cisstack);
-
   // Walk the DATA nodes and place into loops.  Find earliest control
   // node.  For CFG nodes, the _nodes array starts out and remains
   // holding the associated IdealLoopTree pointer.  For DATA nodes, the
@@ -1548,11 +1551,11 @@
   // it will be processed among C->top() inputs
   worklist.push( C->top() );
   visited.set( C->top()->_idx ); // Set C->top() as visited now
-  build_loop_early( visited, worklist, nstack, verify_me );
+  build_loop_early( visited, worklist, nstack );

   // Given early legal placement, try finding counted loops.  This placement
   // is good enough to discover most loop invariants.
-  if( !verify_me )
+  if( !_verify_me && !_verify_only )
     _ltree_root->counted_loop( this );

   // Find latest loop placement.  Find ideal loop placement.
@@ -1562,16 +1565,25 @@
   worklist.push( C->root() );
   NOT_PRODUCT( C->verify_graph_edges(); )
   worklist.push( C->top() );
-  build_loop_late( visited, worklist, nstack, verify_me );
+  build_loop_late( visited, worklist, nstack );
+
+  if (_verify_only) {
+    // restore major progress flag
+    for (int i = 0; i < old_progress; i++)
+      C->set_major_progress();
+    assert(C->unique() == unique, "verification mode made Nodes? ? ?");
+    assert(_igvn._worklist.size() == 0, "shouldn't push anything");
+    return;
+  }

   // clear out the dead code
   while(_deadlist.size()) {
-    igvn.remove_globally_dead_node(_deadlist.pop());
+    _igvn.remove_globally_dead_node(_deadlist.pop());
   }

 #ifndef PRODUCT
   C->verify_graph_edges();
-  if( verify_me ) {             // Nested verify pass?
+  if( _verify_me ) {             // Nested verify pass?
     // Check to see if the verify mode is broken
     assert(C->unique() == unique, "non-optimize mode made Nodes? ? ?");
     return;
@@ -1678,7 +1690,7 @@
 void PhaseIdealLoop::verify() const {
   int old_progress = C->major_progress();
   ResourceMark rm;
-  PhaseIdealLoop loop_verify( _igvn, this, false );
+  PhaseIdealLoop loop_verify( _igvn, this );
   VectorSet visited(Thread::current()->resource_area());

   fail = 0;
@@ -2138,54 +2150,58 @@
         // optimizing an infinite loop?
         l = _ltree_root;        // Oops, found infinite loop

-        // Insert the NeverBranch between 'm' and it's control user.
-        NeverBranchNode *iff = new (C, 1) NeverBranchNode( m );
-        _igvn.register_new_node_with_optimizer(iff);
-        set_loop(iff, l);
-        Node *if_t = new (C, 1) CProjNode( iff, 0 );
-        _igvn.register_new_node_with_optimizer(if_t);
-        set_loop(if_t, l);
+        if (!_verify_only) {
+          // Insert the NeverBranch between 'm' and it's control user.
+          NeverBranchNode *iff = new (C, 1) NeverBranchNode( m );
+          _igvn.register_new_node_with_optimizer(iff);
+          set_loop(iff, l);
+          Node *if_t = new (C, 1) CProjNode( iff, 0 );
+          _igvn.register_new_node_with_optimizer(if_t);
+          set_loop(if_t, l);

-        Node* cfg = NULL;       // Find the One True Control User of m
-        for (DUIterator_Fast jmax, j = m->fast_outs(jmax); j < jmax; j++) {
-          Node* x = m->fast_out(j);
-          if (x->is_CFG() && x != m && x != iff)
-            { cfg = x; break; }
+          Node* cfg = NULL;       // Find the One True Control User of m
+          for (DUIterator_Fast jmax, j = m->fast_outs(jmax); j < jmax; j++) {
+            Node* x = m->fast_out(j);
+            if (x->is_CFG() && x != m && x != iff)
+              { cfg = x; break; }
+          }
+          assert(cfg != NULL, "must find the control user of m");
+          uint k = 0;             // Probably cfg->in(0)
+          while( cfg->in(k) != m ) k++; // But check incase cfg is a Region
+          cfg->set_req( k, if_t ); // Now point to NeverBranch
+
+          // Now create the never-taken loop exit
+          Node *if_f = new (C, 1) CProjNode( iff, 1 );
+          _igvn.register_new_node_with_optimizer(if_f);
+          set_loop(if_f, l);
+          // Find frame ptr for Halt.  Relies on the optimizer
+          // V-N'ing.  Easier and quicker than searching through
+          // the program structure.
+          Node *frame = new (C, 1) ParmNode( C->start(), TypeFunc::FramePtr );
+          _igvn.register_new_node_with_optimizer(frame);
+          // Halt & Catch Fire
+          Node *halt = new (C, TypeFunc::Parms) HaltNode( if_f, frame );
+          _igvn.register_new_node_with_optimizer(halt);
+          set_loop(halt, l);
+          C->root()->add_req(halt);
         }
-        assert(cfg != NULL, "must find the control user of m");
-        uint k = 0;             // Probably cfg->in(0)
-        while( cfg->in(k) != m ) k++; // But check incase cfg is a Region
-        cfg->set_req( k, if_t ); // Now point to NeverBranch
-
-        // Now create the never-taken loop exit
-        Node *if_f = new (C, 1) CProjNode( iff, 1 );
-        _igvn.register_new_node_with_optimizer(if_f);
-        set_loop(if_f, l);
-        // Find frame ptr for Halt.  Relies on the optimizer
-        // V-N'ing.  Easier and quicker than searching through
-        // the program structure.
-        Node *frame = new (C, 1) ParmNode( C->start(), TypeFunc::FramePtr );
-        _igvn.register_new_node_with_optimizer(frame);
-        // Halt & Catch Fire
-        Node *halt = new (C, TypeFunc::Parms) HaltNode( if_f, frame );
-        _igvn.register_new_node_with_optimizer(halt);
-        set_loop(halt, l);
-        C->root()->add_req(halt);
         set_loop(C->root(), _ltree_root);
       }
     }
     // Weeny check for irreducible.  This child was already visited (this
     // IS the post-work phase).  Is this child's loop header post-visited
     // as well?  If so, then I found another entry into the loop.
-    while( is_postvisited(l->_head) ) {
-      // found irreducible
-      l->_irreducible = 1; // = true
-      l = l->_parent;
-      _has_irreducible_loops = true;
-      // Check for bad CFG here to prevent crash, and bailout of compile
-      if (l == NULL) {
-        C->record_method_not_compilable("unhandled CFG detected during loop optimization");
-        return pre_order;
+    if (!_verify_only) {
+      while( is_postvisited(l->_head) ) {
+        // found irreducible
+        l->_irreducible = 1; // = true
+        l = l->_parent;
+        _has_irreducible_loops = true;
+        // Check for bad CFG here to prevent crash, and bailout of compile
+        if (l == NULL) {
+          C->record_method_not_compilable("unhandled CFG detected during loop optimization");
+          return pre_order;
+        }
       }
     }

@@ -2253,7 +2269,7 @@
 // Put Data nodes into some loop nest, by setting the _nodes[]->loop mapping.
 // First pass computes the earliest controlling node possible.  This is the
 // controlling input with the deepest dominating depth.
-void PhaseIdealLoop::build_loop_early( VectorSet &visited, Node_List &worklist, Node_Stack &nstack, const PhaseIdealLoop *verify_me ) {
+void PhaseIdealLoop::build_loop_early( VectorSet &visited, Node_List &worklist, Node_Stack &nstack ) {
   while (worklist.size() != 0) {
     // Use local variables nstack_top_n & nstack_top_i to cache values
     // on nstack's top.
@@ -2285,7 +2301,7 @@
           // (the old code here would yank a 2nd safepoint after seeing a
           // first one, even though the 1st did not dominate in the loop body
           // and thus could be avoided indefinitely)
-          if( !verify_me && ilt->_has_sfpt && n->Opcode() == Op_SafePoint &&
+          if( !_verify_only && !_verify_me && ilt->_has_sfpt && n->Opcode() == Op_SafePoint &&
               is_deleteable_safept(n)) {
             Node *in = n->in(TypeFunc::Control);
             lazy_replace(n,in);       // Pull safepoint now
@@ -2408,12 +2424,31 @@
   return LCA;
 }

-//------------------------------get_late_ctrl----------------------------------
-// Compute latest legal control.
-Node *PhaseIdealLoop::get_late_ctrl( Node *n, Node *early ) {
-  assert(early != NULL, "early control should not be NULL");
+bool PhaseIdealLoop::verify_dominance(Node* n, Node* use, Node* LCA, Node* early) {
+  bool had_error = false;
+#ifdef ASSERT
+  if (early != C->root()) {
+    // Make sure that there's a dominance path from use to LCA
+    Node* d = use;
+    while (d != LCA) {
+      d = idom(d);
+      if (d == C->root()) {
+        tty->print_cr("*** Use %d isn't dominated by def %s", use->_idx, n->_idx);
+        n->dump();
+        use->dump();
+        had_error = true;
+        break;
+      }
+    }
+  }
+#endif
+  return had_error;
+}

+
+Node* PhaseIdealLoop::compute_lca_of_uses(Node* n, Node* early, bool verify) {
   // Compute LCA over list of uses
+  bool had_error = false;
   Node *LCA = NULL;
   for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax && LCA != early; i++) {
     Node* c = n->fast_out(i);
@@ -2423,15 +2458,34 @@
       for( uint j=1; j<c->req(); j++ ) {// For all inputs
         if( c->in(j) == n ) {   // Found matching input?
           Node *use = c->in(0)->in(j);
+          if (_verify_only && use->is_top()) continue;
           LCA = dom_lca_for_get_late_ctrl( LCA, use, n );
+          if (verify) had_error = verify_dominance(n, use, LCA, early) || had_error;
         }
       }
     } else {
       // For CFG data-users, use is in the block just prior
       Node *use = has_ctrl(c) ? get_ctrl(c) : c->in(0);
       LCA = dom_lca_for_get_late_ctrl( LCA, use, n );
+      if (verify) had_error = verify_dominance(n, use, LCA, early) || had_error;
     }
   }
+  assert(!had_error, "bad dominance");
+  return LCA;
+}
+
+//------------------------------get_late_ctrl----------------------------------
+// Compute latest legal control.
+Node *PhaseIdealLoop::get_late_ctrl( Node *n, Node *early ) {
+  assert(early != NULL, "early control should not be NULL");
+
+  Node* LCA = compute_lca_of_uses(n, early);
+#ifdef ASSERT
+  if (LCA == C->root() && LCA != early) {
+    // def doesn't dominate uses so print some useful debugging output
+    compute_lca_of_uses(n, early, true);
+  }
+#endif

   // if this is a load, check for anti-dependent stores
   // We use a conservative algorithm to identify potential interfering
@@ -2576,7 +2630,7 @@
 //------------------------------build_loop_late--------------------------------
 // Put Data nodes into some loop nest, by setting the _nodes[]->loop mapping.
 // Second pass finds latest legal placement, and ideal loop placement.
-void PhaseIdealLoop::build_loop_late( VectorSet &visited, Node_List &worklist, Node_Stack &nstack, const PhaseIdealLoop *verify_me ) {
+void PhaseIdealLoop::build_loop_late( VectorSet &visited, Node_List &worklist, Node_Stack &nstack ) {
   while (worklist.size() != 0) {
     Node *n = worklist.pop();
     // Only visit once
@@ -2612,7 +2666,7 @@
         }
       } else {
         // All of n's children have been processed, complete post-processing.
-        build_loop_late_post(n, verify_me);
+        build_loop_late_post(n);
         if (nstack.is_empty()) {
           // Finished all nodes on stack.
           // Process next node on the worklist.
@@ -2631,9 +2685,9 @@
 //------------------------------build_loop_late_post---------------------------
 // Put Data nodes into some loop nest, by setting the _nodes[]->loop mapping.
 // Second pass finds latest legal placement, and ideal loop placement.
-void PhaseIdealLoop::build_loop_late_post( Node *n, const PhaseIdealLoop *verify_me ) {
+void PhaseIdealLoop::build_loop_late_post( Node *n ) {

-  if (n->req() == 2 && n->Opcode() == Op_ConvI2L && !C->major_progress()) {
+  if (n->req() == 2 && n->Opcode() == Op_ConvI2L && !C->major_progress() && !_verify_only) {
     _igvn._worklist.push(n);  // Maybe we'll normalize it, if no more loops.
   }

@@ -2714,6 +2768,7 @@
     if( get_loop(legal)->_nest < get_loop(least)->_nest )
       least = legal;
   }
+  assert(early == legal || legal != C->root(), "bad dominance of inputs");

   // Try not to place code on a loop entry projection
   // which can inhibit range check elimination.
@@ -2731,8 +2786,8 @@
 #ifdef ASSERT
   // If verifying, verify that 'verify_me' has a legal location
   // and choose it as our location.
-  if( verify_me ) {
-    Node *v_ctrl = verify_me->get_ctrl_no_update(n);
+  if( _verify_me ) {
+    Node *v_ctrl = _verify_me->get_ctrl_no_update(n);
     Node *legal = LCA;
     while( early != legal ) {   // While not at earliest legal
       if( legal == v_ctrl ) break;  // Check for prior good location
--- a/src/share/vm/opto/loopnode.hpp	Fri Aug 14 15:53:46 2009 -0700
+++ b/src/share/vm/opto/loopnode.hpp	Fri Aug 14 15:55:26 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1998-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1998-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -442,6 +442,9 @@
   uint *_preorders;
   uint _max_preorder;

+  const PhaseIdealLoop* _verify_me;
+  bool _verify_only;
+
   // Allocate _preorders[] array
   void allocate_preorders() {
     _max_preorder = C->unique()+8;
@@ -497,6 +500,12 @@
   Node_Array _dom_lca_tags;
   void   init_dom_lca_tags();
   void   clear_dom_lca_tags();
+
+  // Helper for debugging bad dominance relationships
+  bool verify_dominance(Node* n, Node* use, Node* LCA, Node* early);
+
+  Node* compute_lca_of_uses(Node* n, Node* early, bool verify = false);
+
   // Inline wrapper for frequent cases:
   // 1) only one use
   // 2) a use is the same as the current LCA passed as 'n1'
@@ -511,6 +520,7 @@
     return find_non_split_ctrl(n);
   }
   Node *dom_lca_for_get_late_ctrl_internal( Node *lca, Node *n, Node *tag );
+
   // true if CFG node d dominates CFG node n
   bool is_dominator(Node *d, Node *n);

@@ -621,9 +631,9 @@
   IdealLoopTree *sort( IdealLoopTree *loop, IdealLoopTree *innermost );

   // Place Data nodes in some loop nest
-  void build_loop_early( VectorSet &visited, Node_List &worklist, Node_Stack &nstack, const PhaseIdealLoop *verify_me );
-  void build_loop_late ( VectorSet &visited, Node_List &worklist, Node_Stack &nstack, const PhaseIdealLoop *verify_me );
-  void build_loop_late_post ( Node* n, const PhaseIdealLoop *verify_me );
+  void build_loop_early( VectorSet &visited, Node_List &worklist, Node_Stack &nstack );
+  void build_loop_late ( VectorSet &visited, Node_List &worklist, Node_Stack &nstack );
+  void build_loop_late_post ( Node* n );

   // Array of immediate dominance info for each CFG node indexed by node idx
 private:
@@ -662,6 +672,19 @@
   // Is safept not required by an outer loop?
   bool is_deleteable_safept(Node* sfpt);

+  // Perform verification that the graph is valid.
+  PhaseIdealLoop( PhaseIterGVN &igvn) :
+    PhaseTransform(Ideal_Loop),
+    _igvn(igvn),
+    _dom_lca_tags(C->comp_arena()),
+    _verify_me(NULL),
+    _verify_only(true) {
+    build_and_optimize(false);
+  }
+
+  // build the loop tree and perform any requested optimizations
+  void build_and_optimize(bool do_split_if);
+
 public:
   // Dominators for the sea of nodes
   void Dominators();
@@ -671,7 +694,32 @@
   Node *dom_lca_internal( Node *n1, Node *n2 ) const;

   // Compute the Ideal Node to Loop mapping
-  PhaseIdealLoop( PhaseIterGVN &igvn, const PhaseIdealLoop *verify_me, bool do_split_ifs );
+  PhaseIdealLoop( PhaseIterGVN &igvn, bool do_split_ifs) :
+    PhaseTransform(Ideal_Loop),
+    _igvn(igvn),
+    _dom_lca_tags(C->comp_arena()),
+    _verify_me(NULL),
+    _verify_only(false) {
+    build_and_optimize(do_split_ifs);
+  }
+
+  // Verify that verify_me made the same decisions as a fresh run.
+  PhaseIdealLoop( PhaseIterGVN &igvn, const PhaseIdealLoop *verify_me) :
+    PhaseTransform(Ideal_Loop),
+    _igvn(igvn),
+    _dom_lca_tags(C->comp_arena()),
+    _verify_me(verify_me),
+    _verify_only(false) {
+    build_and_optimize(false);
+  }
+
+  // Build and verify the loop tree without modifying the graph.  This
+  // is useful to verify that all inputs properly dominate their uses.
+  static void verify(PhaseIterGVN& igvn) {
+#ifdef ASSERT
+    PhaseIdealLoop v(igvn);
+#endif
+  }

   // True if the method has at least 1 irreducible loop
   bool _has_irreducible_loops;
--- a/src/share/vm/opto/phase.cpp	Fri Aug 14 15:53:46 2009 -0700
+++ b/src/share/vm/opto/phase.cpp	Fri Aug 14 15:55:26 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -53,6 +53,7 @@
 elapsedTimer Phase::_t_registerMethod;
 elapsedTimer Phase::_t_temporaryTimer1;
 elapsedTimer Phase::_t_temporaryTimer2;
+elapsedTimer Phase::_t_idealLoopVerify;

 // Subtimers for _t_optimizer
 elapsedTimer   Phase::_t_iterGVN;
@@ -88,51 +89,52 @@
   tty->print_cr ("Accumulated compiler times:");
   tty->print_cr ("---------------------------");
   tty->print_cr ("  Total compilation: %3.3f sec.", Phase::_t_totalCompilation.seconds());
-  tty->print    ("    method compilation : %3.3f sec", Phase::_t_methodCompilation.seconds());
+  tty->print    ("    method compilation   : %3.3f sec", Phase::_t_methodCompilation.seconds());
   tty->print    ("/%d bytes",_total_bytes_compiled);
   tty->print_cr (" (%3.0f bytes per sec) ", Phase::_total_bytes_compiled / Phase::_t_methodCompilation.seconds());
-  tty->print_cr ("    stub compilation   : %3.3f sec.", Phase::_t_stubCompilation.seconds());
+  tty->print_cr ("    stub compilation     : %3.3f sec.", Phase::_t_stubCompilation.seconds());
   tty->print_cr ("  Phases:");
-  tty->print_cr ("    parse        : %3.3f sec", Phase::_t_parser.seconds());
+  tty->print_cr ("    parse          : %3.3f sec", Phase::_t_parser.seconds());
   if (DoEscapeAnalysis) {
-    tty->print_cr ("    escape analysis : %3.3f sec", Phase::_t_escapeAnalysis.seconds());
+    tty->print_cr ("    escape analysis   : %3.3f sec", Phase::_t_escapeAnalysis.seconds());
   }
-  tty->print_cr ("    optimizer    : %3.3f sec", Phase::_t_optimizer.seconds());
+  tty->print_cr ("    optimizer      : %3.3f sec", Phase::_t_optimizer.seconds());
   if( Verbose || WizardMode ) {
-    tty->print_cr ("      iterGVN      : %3.3f sec", Phase::_t_iterGVN.seconds());
-    tty->print_cr ("      idealLoop    : %3.3f sec", Phase::_t_idealLoop.seconds());
-    tty->print_cr ("      ccp          : %3.3f sec", Phase::_t_ccp.seconds());
-    tty->print_cr ("      iterGVN2     : %3.3f sec", Phase::_t_iterGVN2.seconds());
-    tty->print_cr ("      graphReshape : %3.3f sec", Phase::_t_graphReshaping.seconds());
+    tty->print_cr ("      iterGVN        : %3.3f sec", Phase::_t_iterGVN.seconds());
+    tty->print_cr ("      idealLoop      : %3.3f sec", Phase::_t_idealLoop.seconds());
+    tty->print_cr ("      idealLoopVerify: %3.3f sec", Phase::_t_idealLoopVerify.seconds());
+    tty->print_cr ("      ccp            : %3.3f sec", Phase::_t_ccp.seconds());
+    tty->print_cr ("      iterGVN2       : %3.3f sec", Phase::_t_iterGVN2.seconds());
+    tty->print_cr ("      graphReshape   : %3.3f sec", Phase::_t_graphReshaping.seconds());
     double optimizer_subtotal = Phase::_t_iterGVN.seconds() +
       Phase::_t_idealLoop.seconds() + Phase::_t_ccp.seconds() +
       Phase::_t_graphReshaping.seconds();
     double percent_of_optimizer = ((optimizer_subtotal == 0.0) ? 0.0 : (optimizer_subtotal / Phase::_t_optimizer.seconds() * 100.0));
-    tty->print_cr ("      subtotal     : %3.3f sec,  %3.2f %%", optimizer_subtotal, percent_of_optimizer);
+    tty->print_cr ("      subtotal       : %3.3f sec,  %3.2f %%", optimizer_subtotal, percent_of_optimizer);
   }
-  tty->print_cr ("    matcher      : %3.3f sec", Phase::_t_matcher.seconds());
-  tty->print_cr ("    scheduler    : %3.3f sec", Phase::_t_scheduler.seconds());
-  tty->print_cr ("    regalloc     : %3.3f sec", Phase::_t_registerAllocation.seconds());
+  tty->print_cr ("    matcher        : %3.3f sec", Phase::_t_matcher.seconds());
+  tty->print_cr ("    scheduler      : %3.3f sec", Phase::_t_scheduler.seconds());
+  tty->print_cr ("    regalloc       : %3.3f sec", Phase::_t_registerAllocation.seconds());
   if( Verbose || WizardMode ) {
-    tty->print_cr ("      ctorChaitin  : %3.3f sec", Phase::_t_ctorChaitin.seconds());
-    tty->print_cr ("      buildIFG     : %3.3f sec", Phase::_t_buildIFGphysical.seconds());
-    tty->print_cr ("      computeLive  : %3.3f sec", Phase::_t_computeLive.seconds());
-    tty->print_cr ("      regAllocSplit: %3.3f sec", Phase::_t_regAllocSplit.seconds());
+    tty->print_cr ("      ctorChaitin    : %3.3f sec", Phase::_t_ctorChaitin.seconds());
+    tty->print_cr ("      buildIFG       : %3.3f sec", Phase::_t_buildIFGphysical.seconds());
+    tty->print_cr ("      computeLive    : %3.3f sec", Phase::_t_computeLive.seconds());
+    tty->print_cr ("      regAllocSplit  : %3.3f sec", Phase::_t_regAllocSplit.seconds());
     tty->print_cr ("      postAllocCopyRemoval: %3.3f sec", Phase::_t_postAllocCopyRemoval.seconds());
-    tty->print_cr ("      fixupSpills  : %3.3f sec", Phase::_t_fixupSpills.seconds());
+    tty->print_cr ("      fixupSpills    : %3.3f sec", Phase::_t_fixupSpills.seconds());
     double regalloc_subtotal = Phase::_t_ctorChaitin.seconds() +
       Phase::_t_buildIFGphysical.seconds() + Phase::_t_computeLive.seconds() +
       Phase::_t_regAllocSplit.seconds()    + Phase::_t_fixupSpills.seconds() +
       Phase::_t_postAllocCopyRemoval.seconds();
     double percent_of_regalloc = ((regalloc_subtotal == 0.0) ? 0.0 : (regalloc_subtotal / Phase::_t_registerAllocation.seconds() * 100.0));
-    tty->print_cr ("      subtotal     : %3.3f sec,  %3.2f %%", regalloc_subtotal, percent_of_regalloc);
+    tty->print_cr ("      subtotal       : %3.3f sec,  %3.2f %%", regalloc_subtotal, percent_of_regalloc);
   }
-  tty->print_cr ("    macroExpand  : %3.3f sec", Phase::_t_macroExpand.seconds());
-  tty->print_cr ("    blockOrdering: %3.3f sec", Phase::_t_blockOrdering.seconds());
-  tty->print_cr ("    peephole     : %3.3f sec", Phase::_t_peephole.seconds());
-  tty->print_cr ("    codeGen      : %3.3f sec", Phase::_t_codeGeneration.seconds());
-  tty->print_cr ("    install_code : %3.3f sec", Phase::_t_registerMethod.seconds());
-  tty->print_cr ("    ------------ : ----------");
+  tty->print_cr ("    macroExpand    : %3.3f sec", Phase::_t_macroExpand.seconds());
+  tty->print_cr ("    blockOrdering  : %3.3f sec", Phase::_t_blockOrdering.seconds());
+  tty->print_cr ("    peephole       : %3.3f sec", Phase::_t_peephole.seconds());
+  tty->print_cr ("    codeGen        : %3.3f sec", Phase::_t_codeGeneration.seconds());
+  tty->print_cr ("    install_code   : %3.3f sec", Phase::_t_registerMethod.seconds());
+  tty->print_cr ("    -------------- : ----------");
   double phase_subtotal = Phase::_t_parser.seconds() +
     (DoEscapeAnalysis ? Phase::_t_escapeAnalysis.seconds() : 0.0) +
     Phase::_t_optimizer.seconds() + Phase::_t_graphReshaping.seconds() +
@@ -143,7 +145,7 @@
   double percent_of_method_compile = ((phase_subtotal == 0.0) ? 0.0 : phase_subtotal / Phase::_t_methodCompilation.seconds()) * 100.0;
   // counters inside Compile::CodeGen include time for adapters and stubs
   // so phase-total can be greater than 100%
-  tty->print_cr ("    total        : %3.3f sec,  %3.2f %%", phase_subtotal, percent_of_method_compile);
+  tty->print_cr ("    total          : %3.3f sec,  %3.2f %%", phase_subtotal, percent_of_method_compile);

   assert( percent_of_method_compile > expected_method_compile_coverage ||
           phase_subtotal < minimum_meaningful_method_compile,
@@ -157,8 +159,8 @@
     tty->cr();
     tty->print_cr ("    temporaryTimer2: %3.3f sec", Phase::_t_temporaryTimer2.seconds());
   }
-  tty->print_cr ("    output    : %3.3f sec", Phase::_t_output.seconds());
-  tty->print_cr ("      isched    : %3.3f sec", Phase::_t_instrSched.seconds());
-  tty->print_cr ("      bldOopMaps: %3.3f sec", Phase::_t_buildOopMaps.seconds());
+  tty->print_cr ("    output         : %3.3f sec", Phase::_t_output.seconds());
+  tty->print_cr ("      isched         : %3.3f sec", Phase::_t_instrSched.seconds());
+  tty->print_cr ("      bldOopMaps     : %3.3f sec", Phase::_t_buildOopMaps.seconds());
 }
 #endif
--- a/src/share/vm/opto/phase.hpp	Fri Aug 14 15:53:46 2009 -0700
+++ b/src/share/vm/opto/phase.hpp	Fri Aug 14 15:55:26 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -83,6 +83,7 @@
   static elapsedTimer _t_registerMethod;
   static elapsedTimer _t_temporaryTimer1;
   static elapsedTimer _t_temporaryTimer2;
+  static elapsedTimer _t_idealLoopVerify;

 // Subtimers for _t_optimizer
   static elapsedTimer   _t_iterGVN;
--- a/src/share/vm/runtime/globals.hpp	Fri Aug 14 15:53:46 2009 -0700
+++ b/src/share/vm/runtime/globals.hpp	Fri Aug 14 15:55:26 2009 -0700
@@ -3034,6 +3034,9 @@
           "Wait for this many CI accesses to occur in all compiles before " \
           "beginning to throw OutOfMemoryErrors in each compile")           \
                                                                             \
+  notproduct(bool, CIObjectFactoryVerify, false,                            \
+          "enable potentially expensive verification in ciObjectFactory")   \
+                                                                            \
   /* Priorities */                                                          \
   product_pd(bool, UseThreadPriorities,  "Use native thread priorities")    \
                                                                             \
@@ -3284,7 +3287,7 @@
   product(uintx, SharedReadWriteSize,  12*M,                                \
           "Size of read-write space in permanent generation (in bytes)")    \
                                                                             \
-  product(uintx, SharedReadOnlySize,    8*M,                                \
+  product(uintx, SharedReadOnlySize,   10*M,                                \
           "Size of read-only space in permanent generation (in bytes)")     \
                                                                             \
   product(uintx, SharedMiscDataSize,    4*M,                                \
@@ -3309,7 +3312,7 @@
   product(bool, AnonymousClasses, false,                                    \
           "support sun.misc.Unsafe.defineAnonymousClass")                   \
                                                                             \
-  product(bool, EnableMethodHandles, false,                                 \
+  experimental(bool, EnableMethodHandles, false,                            \
           "support method handles (true by default under JSR 292)")         \
                                                                             \
   diagnostic(intx, MethodHandlePushLimit, 3,                                \
@@ -3324,7 +3327,7 @@
   diagnostic(bool, OptimizeMethodHandles, true,                             \
           "when constructing method handles, try to improve them")          \
                                                                             \
-  product(bool, EnableInvokeDynamic, false,                                 \
+  experimental(bool, EnableInvokeDynamic, false,                            \
           "recognize the invokedynamic instruction")                        \
                                                                             \
   develop(bool, TraceInvokeDynamic, false,                                  \
--- a/src/share/vm/utilities/taskqueue.hpp	Fri Aug 14 15:53:46 2009 -0700
+++ b/src/share/vm/utilities/taskqueue.hpp	Fri Aug 14 15:55:26 2009 -0700
@@ -22,94 +22,90 @@
  *
  */

-#ifdef LP64
-typedef juint TAG_TYPE;
-// for a taskqueue size of 4M
-#define LOG_TASKQ_SIZE 22
-#else
-typedef jushort TAG_TYPE;
-// for a taskqueue size of 16K
-#define LOG_TASKQ_SIZE 14
-#endif
-
 class TaskQueueSuper: public CHeapObj {
 protected:
-  // The first free element after the last one pushed (mod _n).
+  // Internal type for indexing the queue; also used for the tag.
+  typedef NOT_LP64(uint16_t) LP64_ONLY(uint32_t) idx_t;
+
+  // The first free element after the last one pushed (mod N).
   volatile uint _bottom;

-  // log2 of the size of the queue.
-  enum SomeProtectedConstants {
-    Log_n = LOG_TASKQ_SIZE
+  enum {
+    N = 1 << NOT_LP64(14) LP64_ONLY(17), // Queue size: 16K or 128K
+    MOD_N_MASK = N - 1                   // To compute x mod N efficiently.
   };
-#undef LOG_TASKQ_SIZE
+
+  class Age {
+  public:
+    Age(size_t data = 0)         { _data = data; }
+    Age(const Age& age)          { _data = age._data; }
+    Age(idx_t top, idx_t tag)    { _fields._top = top; _fields._tag = tag; }

-  // Size of the queue.
-  uint n() { return (1 << Log_n); }
-  // For computing "x mod n" efficiently.
-  uint n_mod_mask() { return n() - 1; }
+    Age   get()        const volatile { return _data; }
+    void  set(Age age) volatile       { _data = age._data; }
+
+    idx_t top()        const volatile { return _fields._top; }
+    idx_t tag()        const volatile { return _fields._tag; }

-  struct Age {
-    TAG_TYPE _top;
-    TAG_TYPE _tag;
+    // Increment top; if it wraps, increment tag also.
+    void increment() {
+      _fields._top = increment_index(_fields._top);
+      if (_fields._top == 0) ++_fields._tag;
+    }

-    TAG_TYPE tag() const { return _tag; }
-    TAG_TYPE top() const { return _top; }
+    Age cmpxchg(const Age new_age, const Age old_age) volatile {
+      return (size_t) Atomic::cmpxchg_ptr((intptr_t)new_age._data,
+                                          (volatile intptr_t *)&_data,
+                                          (intptr_t)old_age._data);
+    }
+
+    bool operator ==(const Age& other) const { return _data == other._data; }

-    Age() { _tag = 0; _top = 0; }
-
-    friend bool operator ==(const Age& a1, const Age& a2) {
-      return a1.tag() == a2.tag() && a1.top() == a2.top();
-    }
+  private:
+    struct fields {
+      idx_t _top;
+      idx_t _tag;
+    };
+    union {
+      size_t _data;
+      fields _fields;
+    };
   };
-  Age _age;
-  // These make sure we do single atomic reads and writes.
-  Age get_age() {
-    uint res = *(volatile uint*)(&_age);
-    return *(Age*)(&res);
+
+  volatile Age _age;
+
+  // These both operate mod N.
+  static uint increment_index(uint ind) {
+    return (ind + 1) & MOD_N_MASK;
   }
-  void set_age(Age a) {
-    *(volatile uint*)(&_age) = *(uint*)(&a);
+  static uint decrement_index(uint ind) {
+    return (ind - 1) & MOD_N_MASK;
   }

-  TAG_TYPE get_top() {
-    return get_age().top();
-  }
-
-  // These both operate mod _n.
-  uint increment_index(uint ind) {
-    return (ind + 1) & n_mod_mask();
-  }
-  uint decrement_index(uint ind) {
-    return (ind - 1) & n_mod_mask();
-  }
-
-  // Returns a number in the range [0.._n).  If the result is "n-1", it
-  // should be interpreted as 0.
+  // Returns a number in the range [0..N).  If the result is "N-1", it should be
+  // interpreted as 0.
   uint dirty_size(uint bot, uint top) {
-    return ((int)bot - (int)top) & n_mod_mask();
+    return (bot - top) & MOD_N_MASK;
   }

   // Returns the size corresponding to the given "bot" and "top".
   uint size(uint bot, uint top) {
     uint sz = dirty_size(bot, top);
-    // Has the queue "wrapped", so that bottom is less than top?
-    // There's a complicated special case here.  A pair of threads could
-    // perform pop_local and pop_global operations concurrently, starting
-    // from a state in which _bottom == _top+1.  The pop_local could
-    // succeed in decrementing _bottom, and the pop_global in incrementing
-    // _top (in which case the pop_global will be awarded the contested
-    // queue element.)  The resulting state must be interpreted as an empty
-    // queue.  (We only need to worry about one such event: only the queue
-    // owner performs pop_local's, and several concurrent threads
-    // attempting to perform the pop_global will all perform the same CAS,
-    // and only one can succeed.  Any stealing thread that reads after
-    // either the increment or decrement will see an empty queue, and will
-    // not join the competitors.  The "sz == -1 || sz == _n-1" state will
-    // not be modified  by concurrent queues, so the owner thread can reset
-    // the state to _bottom == top so subsequent pushes will be performed
-    // normally.
-    if (sz == (n()-1)) return 0;
-    else return sz;
+    // Has the queue "wrapped", so that bottom is less than top?  There's a
+    // complicated special case here.  A pair of threads could perform pop_local
+    // and pop_global operations concurrently, starting from a state in which
+    // _bottom == _top+1.  The pop_local could succeed in decrementing _bottom,
+    // and the pop_global in incrementing _top (in which case the pop_global
+    // will be awarded the contested queue element.)  The resulting state must
+    // be interpreted as an empty queue.  (We only need to worry about one such
+    // event: only the queue owner performs pop_local's, and several concurrent
+    // threads attempting to perform the pop_global will all perform the same
+    // CAS, and only one can succeed.)  Any stealing thread that reads after
+    // either the increment or decrement will see an empty queue, and will not
+    // join the competitors.  The "sz == -1 || sz == N-1" state will not be
+    // modified by concurrent queues, so the owner thread can reset the state to
+    // _bottom == top so subsequent pushes will be performed normally.
+    return (sz == N - 1) ? 0 : sz;
   }

 public:
@@ -122,22 +118,21 @@
   // The "careful" version admits the possibility of pop_local/pop_global
   // races.
   uint size() {
-    return size(_bottom, get_top());
+    return size(_bottom, _age.top());
   }

   uint dirty_size() {
-    return dirty_size(_bottom, get_top());
+    return dirty_size(_bottom, _age.top());
   }

   void set_empty() {
     _bottom = 0;
-    _age = Age();
+    _age.set(0);
   }

   // Maximum number of elements allowed in the queue.  This is two less
   // than the actual queue size, for somewhat complicated reasons.
-  uint max_elems() { return n() - 2; }
-
+  uint max_elems() { return N - 2; }
 };

 template<class E> class GenericTaskQueue: public TaskQueueSuper {
@@ -179,12 +174,12 @@

 template<class E>
 GenericTaskQueue<E>::GenericTaskQueue():TaskQueueSuper() {
-  assert(sizeof(Age) == sizeof(int), "Depends on this.");
+  assert(sizeof(Age) == sizeof(size_t), "Depends on this.");
 }

 template<class E>
 void GenericTaskQueue<E>::initialize() {
-  _elems = NEW_C_HEAP_ARRAY(E, n());
+  _elems = NEW_C_HEAP_ARRAY(E, N);
   guarantee(_elems != NULL, "Allocation failed.");
 }

@@ -208,14 +203,14 @@

 template<class E>
 bool GenericTaskQueue<E>::push_slow(E t, uint dirty_n_elems) {
-  if (dirty_n_elems == n() - 1) {
+  if (dirty_n_elems == N - 1) {
     // Actually means 0, so do the push.
     uint localBot = _bottom;
     _elems[localBot] = t;
     _bottom = increment_index(localBot);
     return true;
-  } else
-    return false;
+  }
+  return false;
 }

 template<class E>
@@ -230,53 +225,45 @@
   // then have the owner thread do a pop followed by another push.  Without
   // the incrementing of "tag", the pop_global's CAS could succeed,
   // allowing it to believe it has claimed the stale element.)
-  Age newAge;
-  newAge._top = localBot;
-  newAge._tag = oldAge.tag() + 1;
+  Age newAge((idx_t)localBot, oldAge.tag() + 1);
   // Perhaps a competing pop_global has already incremented "top", in which
   // case it wins the element.
   if (localBot == oldAge.top()) {
-    Age tempAge;
     // No competing pop_global has yet incremented "top"; we'll try to
     // install new_age, thus claiming the element.
-    assert(sizeof(Age) == sizeof(int), "Assumption about CAS unit.");
-    *(uint*)&tempAge = Atomic::cmpxchg(*(uint*)&newAge, (volatile uint*)&_age, *(uint*)&oldAge);
+    Age tempAge = _age.cmpxchg(newAge, oldAge);
     if (tempAge == oldAge) {
       // We win.
-      assert(dirty_size(localBot, get_top()) != n() - 1,
-             "Shouldn't be possible...");
+      assert(dirty_size(localBot, _age.top()) != N - 1, "sanity");
       return true;
     }
   }
-  // We fail; a completing pop_global gets the element.  But the queue is
-  // empty (and top is greater than bottom.)  Fix this representation of
-  // the empty queue to become the canonical one.
-  set_age(newAge);
-  assert(dirty_size(localBot, get_top()) != n() - 1,
-         "Shouldn't be possible...");
+  // We lose; a completing pop_global gets the element.  But the queue is empty
+  // and top is greater than bottom.  Fix this representation of the empty queue
+  // to become the canonical one.
+  _age.set(newAge);
+  assert(dirty_size(localBot, _age.top()) != N - 1, "sanity");
   return false;
 }

 template<class E>
 bool GenericTaskQueue<E>::pop_global(E& t) {
-  Age newAge;
-  Age oldAge = get_age();
+  Age oldAge = _age.get();
   uint localBot = _bottom;
   uint n_elems = size(localBot, oldAge.top());
   if (n_elems == 0) {
     return false;
   }
+
   t = _elems[oldAge.top()];
-  newAge = oldAge;
-  newAge._top = increment_index(newAge.top());
-  if ( newAge._top == 0 ) newAge._tag++;
-  Age resAge;
-  *(uint*)&resAge = Atomic::cmpxchg(*(uint*)&newAge, (volatile uint*)&_age, *(uint*)&oldAge);
+  Age newAge(oldAge);
+  newAge.increment();
+  Age resAge = _age.cmpxchg(newAge, oldAge);
+
   // Note that using "_bottom" here might fail, since a pop_local might
   // have decremented it.
-  assert(dirty_size(localBot, newAge._top) != n() - 1,
-         "Shouldn't be possible...");
-  return (resAge == oldAge);
+  assert(dirty_size(localBot, newAge.top()) != N - 1, "sanity");
+  return resAge == oldAge;
 }

 template<class E>
@@ -459,7 +446,7 @@
     return offer_termination(NULL);
   }

-  // As above, but it also terminates of the should_exit_termination()
+  // As above, but it also terminates if the should_exit_termination()
   // method of the terminator parameter returns true. If terminator is
   // NULL, then it is ignored.
   bool offer_termination(TerminatorTerminator* terminator);
@@ -492,11 +479,10 @@
   }
 #else
   uint localBot = _bottom;
-  assert((localBot >= 0) && (localBot < n()), "_bottom out of range.");
-  TAG_TYPE top = get_top();
+  assert((localBot >= 0) && (localBot < N), "_bottom out of range.");
+  idx_t top = _age.top();
   uint dirty_n_elems = dirty_size(localBot, top);
-  assert((dirty_n_elems >= 0) && (dirty_n_elems < n()),
-         "n_elems out of range.");
+  assert((dirty_n_elems >= 0) && (dirty_n_elems < N), "n_elems out of range.");
   if (dirty_n_elems < max_elems()) {
     _elems[localBot] = t;
     _bottom = increment_index(localBot);
@@ -517,12 +503,12 @@
   return true;
 #else
   uint localBot = _bottom;
-  // This value cannot be n-1.  That can only occur as a result of
+  // This value cannot be N-1.  That can only occur as a result of
   // the assignment to bottom in this method.  If it does, this method
   // resets the size( to 0 before the next call (which is sequential,
   // since this is pop_local.)
-  uint dirty_n_elems = dirty_size(localBot, get_top());
-  assert(dirty_n_elems != n() - 1, "Shouldn't be possible...");
+  uint dirty_n_elems = dirty_size(localBot, _age.top());
+  assert(dirty_n_elems != N - 1, "Shouldn't be possible...");
   if (dirty_n_elems == 0) return false;
   localBot = decrement_index(localBot);
   _bottom = localBot;
@@ -534,15 +520,14 @@
   // If there's still at least one element in the queue, based on the
   // "_bottom" and "age" we've read, then there can be no interference with
   // a "pop_global" operation, and we're done.
-  TAG_TYPE tp = get_top();    // XXX
+  idx_t tp = _age.top();    // XXX
   if (size(localBot, tp) > 0) {
-    assert(dirty_size(localBot, tp) != n() - 1,
-           "Shouldn't be possible...");
+    assert(dirty_size(localBot, tp) != N - 1, "sanity");
     return true;
   } else {
     // Otherwise, the queue contained exactly one element; we take the slow
     // path.
-    return pop_local_slow(localBot, get_age());
+    return pop_local_slow(localBot, _age.get());
   }
 #endif
 }