diff src/share/vm/opto/divnode.cpp @ 567:bbef4344adb2

6800154: Add comments to long_by_long_mulhi() for better understandability Summary: This patch adds a comment pointing to the Hacker's Delight version of the algorithm plus a verbatim copy of it. Furthermore it adds inline comments. Reviewed-by: kvn, jrose
author twisti
date Fri, 13 Feb 2009 09:09:35 -0800
parents 78c058bc5cdc
children 30663ca5e8f4
line wrap: on
line diff
--- a/src/share/vm/opto/divnode.cpp	Fri Feb 06 13:31:03 2009 -0800
+++ b/src/share/vm/opto/divnode.cpp	Fri Feb 13 09:09:35 2009 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -244,42 +244,73 @@
 
 //---------------------long_by_long_mulhi--------------------------------------
 // Generate ideal node graph for upper half of a 64 bit x 64 bit multiplication
-static Node *long_by_long_mulhi( PhaseGVN *phase, Node *dividend, jlong magic_const) {
+static Node* long_by_long_mulhi(PhaseGVN* phase, Node* dividend, jlong magic_const) {
   // If the architecture supports a 64x64 mulhi, there is
   // no need to synthesize it in ideal nodes.
   if (Matcher::has_match_rule(Op_MulHiL)) {
-    Node *v = phase->longcon(magic_const);
+    Node* v = phase->longcon(magic_const);
     return new (phase->C, 3) MulHiLNode(dividend, v);
   }
 
+  // Taken from Hacker's Delight, Fig. 8-2. Multiply high signed.
+  // (http://www.hackersdelight.org/HDcode/mulhs.c)
+  //
+  // int mulhs(int u, int v) {
+  //    unsigned u0, v0, w0;
+  //    int u1, v1, w1, w2, t;
+  //
+  //    u0 = u & 0xFFFF;  u1 = u >> 16;
+  //    v0 = v & 0xFFFF;  v1 = v >> 16;
+  //    w0 = u0*v0;
+  //    t  = u1*v0 + (w0 >> 16);
+  //    w1 = t & 0xFFFF;
+  //    w2 = t >> 16;
+  //    w1 = u0*v1 + w1;
+  //    return u1*v1 + w2 + (w1 >> 16);
+  // }
+  //
+  // Note: The version above is for 32x32 multiplications, while the
+  // following inline comments are adapted to 64x64.
+
   const int N = 64;
 
-  Node *u_hi = phase->transform(new (phase->C, 3) RShiftLNode(dividend, phase->intcon(N / 2)));
-  Node *u_lo = phase->transform(new (phase->C, 3) AndLNode(dividend, phase->longcon(0xFFFFFFFF)));
+  // u0 = u & 0xFFFFFFFF;  u1 = u >> 32;
+  Node* u0 = phase->transform(new (phase->C, 3) AndLNode(dividend, phase->longcon(0xFFFFFFFF)));
+  Node* u1 = phase->transform(new (phase->C, 3) RShiftLNode(dividend, phase->intcon(N / 2)));
+
+  // v0 = v & 0xFFFFFFFF;  v1 = v >> 32;
+  Node* v0 = phase->longcon(magic_const & 0xFFFFFFFF);
+  Node* v1 = phase->longcon(magic_const >> (N / 2));
 
-  Node *v_hi = phase->longcon(magic_const >> N/2);
-  Node *v_lo = phase->longcon(magic_const & 0XFFFFFFFF);
+  // w0 = u0*v0;
+  Node* w0 = phase->transform(new (phase->C, 3) MulLNode(u0, v0));
 
-  Node *hihi_product = phase->transform(new (phase->C, 3) MulLNode(u_hi, v_hi));
-  Node *hilo_product = phase->transform(new (phase->C, 3) MulLNode(u_hi, v_lo));
-  Node *lohi_product = phase->transform(new (phase->C, 3) MulLNode(u_lo, v_hi));
-  Node *lolo_product = phase->transform(new (phase->C, 3) MulLNode(u_lo, v_lo));
+  // t = u1*v0 + (w0 >> 32);
+  Node* u1v0 = phase->transform(new (phase->C, 3) MulLNode(u1, v0));
+  Node* temp = phase->transform(new (phase->C, 3) URShiftLNode(w0, phase->intcon(N / 2)));
+  Node* t    = phase->transform(new (phase->C, 3) AddLNode(u1v0, temp));
+
+  // w1 = t & 0xFFFFFFFF;
+  Node* w1 = new (phase->C, 3) AndLNode(t, phase->longcon(0xFFFFFFFF));
 
-  Node *t1 = phase->transform(new (phase->C, 3) URShiftLNode(lolo_product, phase->intcon(N / 2)));
-  Node *t2 = phase->transform(new (phase->C, 3) AddLNode(hilo_product, t1));
+  // w2 = t >> 32;
+  Node* w2 = new (phase->C, 3) RShiftLNode(t, phase->intcon(N / 2));
+
+  // 6732154: Construct both w1 and w2 before transforming, so t
+  // doesn't go dead prematurely.
+  w1 = phase->transform(w1);
+  w2 = phase->transform(w2);
 
-  // Construct both t3 and t4 before transforming so t2 doesn't go dead
-  // prematurely.
-  Node *t3 = new (phase->C, 3) RShiftLNode(t2, phase->intcon(N / 2));
-  Node *t4 = new (phase->C, 3) AndLNode(t2, phase->longcon(0xFFFFFFFF));
-  t3 = phase->transform(t3);
-  t4 = phase->transform(t4);
+  // w1 = u0*v1 + w1;
+  Node* u0v1 = phase->transform(new (phase->C, 3) MulLNode(u0, v1));
+  w1         = phase->transform(new (phase->C, 3) AddLNode(u0v1, w1));
 
-  Node *t5 = phase->transform(new (phase->C, 3) AddLNode(t4, lohi_product));
-  Node *t6 = phase->transform(new (phase->C, 3) RShiftLNode(t5, phase->intcon(N / 2)));
-  Node *t7 = phase->transform(new (phase->C, 3) AddLNode(t3, hihi_product));
+  // return u1*v1 + w2 + (w1 >> 32);
+  Node* u1v1  = phase->transform(new (phase->C, 3) MulLNode(u1, v1));
+  Node* temp1 = phase->transform(new (phase->C, 3) AddLNode(u1v1, w2));
+  Node* temp2 = phase->transform(new (phase->C, 3) RShiftLNode(w1, phase->intcon(N / 2)));
 
-  return new (phase->C, 3) AddLNode(t7, t6);
+  return new (phase->C, 3) AddLNode(temp1, temp2);
 }