From ff9f7758944dbd157f32ca78df61fd6902b1ce4a Mon Sep 17 00:00:00 2001
From: lkcl <lkcl@web>
Date: Fri, 22 Apr 2022 10:32:26 +0100
Subject: [PATCH]

---
 openpower/sv/biginteger/analysis.mdwn | 49 +++++++++++++++------------
 1 file changed, 27 insertions(+), 22 deletions(-)

diff --git a/openpower/sv/biginteger/analysis.mdwn b/openpower/sv/biginteger/analysis.mdwn
index 6c1322bdd..b8233655f 100644
--- a/openpower/sv/biginteger/analysis.mdwn
+++ b/openpower/sv/biginteger/analysis.mdwn
@@ -274,31 +274,36 @@ shows that it can be split into two in exactly the same way as Algorithm M,
 this time using subtract instead of add.
 
 ```
-      // this becomes the basis for sv.msubed in RS=RC Mode,
-      // where k is RC
-      k = 0;
-      for (i = 0; i < m; i++) {
-         unsigned product = k - u[i]*v[j];
-         k = product>>16;
-         plo[i] = product; // & 0xffff
-      }
-      // this is simply sv.subfe where k is XER.CA
-      k = 1; // borrow not carry
-      for (i = 0; i < m; i++) {
-         t = w[i + j] + k - plo[i];
-         w[i + j] = t;          // (I.e., t & 0xFFFF).
-         k = t >> 16; // borrow: should only be 1 bit
-      }
+        uint32_t carry = 0;
+        uint32_t product[n + 1];
+        // this becomes the basis for sv.msubed in RS=RC Mode,
+        // where carry is RC
+        // VL = n + 1
+        // sv.madded product.v, vn.v, qhat.s, carry.s
+        for (int i = 0; i <= n; i++)
+        {
+            uint32_t vn_v = i < n ? vn[i] : 0;
+            uint64_t value = (uint64_t)vn_v * (uint64_t)qhat + carry;
+            carry = (uint32_t)(value >> 32);
+            product[i] = (uint32_t)value;
+        }
+        bool ca = true;
+        uint32_t *un_j = &un[j];
+        // this is simply sv.subfe where ca is XER.CA
+        // sv.subfe un_j.v, product.v, un_j.v
+        for (int i = 0; i <= n; i++)
+        {
+            uint64_t value = (uint64_t)~product[i] + (uint64_t)un_j[i] + ca;
+            ca = value >> 32 != 0;
+            un_j[i] = value;
+        }
+        bool need_fixup = !ca;
 ```
 
 In essence then the primary focus of Vectorised Big-Int divide is in
-fact big-integer multiply (more specifically, mul-and-subtract).
-
-    product = RC - (RA) * (RB)
-    RT = lowerhalf(product)
-    RS = upperhalf(product)
+fact big-integer multiply
 
 Detection of the fixup (phase 3) is determined by the Carry (borrow)
 bit at the end. Logically: if borrow was required then the qhat estimate
-was too large and the correction is required, which is nothing more than
-a Vectorised big-integer add (one instruction).
+was too large and the correction is required, which is, again,
+nothing more than a Vectorised big-integer add (one instruction).
-- 
2.30.2