x86: Fix the multiplication microops.

author Gabe Black <gabeblack@google.com>

Tue, 16 May 2017 02:39:51 +0000 (19:39 -0700)

committer Anthony Gutierrez <anthony.gutierrez@amd.com>

Tue, 16 May 2017 20:02:03 +0000 (20:02 +0000)
author Gabe Black <gabeblack@google.com>
Tue, 16 May 2017 02:39:51 +0000 (19:39 -0700)
committer Anthony Gutierrez <anthony.gutierrez@amd.com>
Tue, 16 May 2017 20:02:03 +0000 (20:02 +0000)
diff --git a/src/arch/x86/isa/microops/regop.isa b/src/arch/x86/isa/microops/regop.isa

index ef0c4cb18320ead7db5c7b91e87792f90bb443f5..dc5f0affe8ea9bf926f264f9182af34a3480d5fb 100644 (file)
--- a/src/arch/x86/isa/microops/regop.isa
+++ b/src/arch/x86/isa/microops/regop.isa
@@ -546,23 +546,42 @@ let {{
      class Mul1s(WrRegOp):
          op_class = 'IntMultOp'
  
+        # Multiply two values Aa and Bb where Aa = A << p + a, then correct for
+        # negative operands.
+        #   Aa * Bb
+        # = (A << p + a) * (B << p + b)
+        # = (A * B) << 2p + (A * b + a * B) << p + a * b
          code = '''
              ProdLow = psrc1 * op2;
-            int halfSize = (dataSize * 8) / 2;
-            uint64_t shifter = (ULL(1) << halfSize);
-            uint64_t hiResult;
-            uint64_t psrc1_h = psrc1 / shifter;
-            uint64_t psrc1_l = psrc1 & mask(halfSize);
-            uint64_t psrc2_h = (op2 / shifter) & mask(halfSize);
-            uint64_t psrc2_l = op2 & mask(halfSize);
-            hiResult = ((psrc1_l * psrc2_h + psrc1_h * psrc2_l +
-                        ((psrc1_l * psrc2_l) / shifter)) /shifter) +
-                       psrc1_h * psrc2_h;
+
+            int p = (dataSize * 8) / 2;
+            uint64_t A = bits(psrc1, 2 * p - 1, p);
+            uint64_t a = bits(psrc1, p - 1, 0);
+            uint64_t B = bits<uint64_t>(op2, 2 * p - 1, p);
+            uint64_t b = bits<uint64_t>(op2, p - 1, 0);
+
+            uint64_t c1, c2; // Carry between place values.
+            uint64_t ab = a * b, Ab = A * b, aB = a * B, AB = A * B;
+
+            c1 = ab >> p;
+
+            // Be careful to avoid overflow if p is large.
+            if (p == 32) {
+                c2 = (c1 >> 1) + (Ab >> 1) + (aB >> 1);
+                c2 += ((c1 & 0x1) + (Ab & 0x1) + (aB & 0x1)) >> 1;
+                c2 >>= (p - 1);
+            } else {
+                c2 = (c1 + Ab + aB) >> p;
+            }
+
+            uint64_t hi = AB + c2;
+
              if (bits(psrc1, dataSize * 8 - 1))
-                hiResult -= op2;
+                hi -= op2;
              if (bits(op2, dataSize * 8 - 1))
-                hiResult -= psrc1;
-            ProdHi = hiResult;
+                hi -= psrc1;
+
+            ProdHi = hi;
              '''
          flag_code = '''
              if ((-ProdHi & mask(dataSize * 8)) !=
@@ -578,17 +597,34 @@ let {{
      class Mul1u(WrRegOp):
          op_class = 'IntMultOp'
  
+        # Multiply two values Aa and Bb where Aa = A << p + a.
+        #   Aa * Bb
+        # = (A << p + a) * (B << p + b)
+        # = (A * B) << 2p + (A * b + a * B) << p + a * b
          code = '''
              ProdLow = psrc1 * op2;
-            int halfSize = (dataSize * 8) / 2;
-            uint64_t shifter = (ULL(1) << halfSize);
-            uint64_t psrc1_h = psrc1 / shifter;
-            uint64_t psrc1_l = psrc1 & mask(halfSize);
-            uint64_t psrc2_h = (op2 / shifter) & mask(halfSize);
-            uint64_t psrc2_l = op2 & mask(halfSize);
-            ProdHi = ((psrc1_l * psrc2_h + psrc1_h * psrc2_l +
-                      ((psrc1_l * psrc2_l) / shifter)) / shifter) +
-                     psrc1_h * psrc2_h;
+
+            int p = (dataSize * 8) / 2;
+            uint64_t A = bits(psrc1, 2 * p - 1, p);
+            uint64_t a = bits(psrc1, p - 1, 0);
+            uint64_t B = bits<uint64_t>(op2, 2 * p - 1, p);
+            uint64_t b = bits<uint64_t>(op2, p - 1, 0);
+
+            uint64_t c1, c2; // Carry between place values.
+            uint64_t ab = a * b, Ab = A * b, aB = a * B, AB = A * B;
+
+            c1 = ab >> p;
+
+            // Be careful to avoid overflow if p is large.
+            if (p == 32) {
+                c2 = (c1 >> 1) + (Ab >> 1) + (aB >> 1);
+                c2 += ((c1 & 0x1) + (Ab & 0x1) + (aB & 0x1)) >> 1;
+                c2 >>= (p - 1);
+            } else {
+                c2 = (c1 + Ab + aB) >> p;
+            }
+
+            ProdHi = AB + c2;
              '''
          flag_code = '''
              if (ProdHi) {
author	Gabe Black <gabeblack@google.com>
	Tue, 16 May 2017 02:39:51 +0000 (19:39 -0700)
committer	Anthony Gutierrez <anthony.gutierrez@amd.com>
	Tue, 16 May 2017 20:02:03 +0000 (20:02 +0000)