From e8e0d803a3ac8acc85b9c0c49b864888ce0a34f8 Mon Sep 17 00:00:00 2001
From: lkcl <lkcl@web>
Date: Mon, 18 Apr 2022 22:12:22 +0100
Subject: [PATCH]

---
 openpower/sv/bitmanip/appendix.mdwn | 61 ++++++++---------------------
 1 file changed, 16 insertions(+), 45 deletions(-)

diff --git a/openpower/sv/bitmanip/appendix.mdwn b/openpower/sv/bitmanip/appendix.mdwn
index 643d4e94f..e87166043 100644
--- a/openpower/sv/bitmanip/appendix.mdwn
+++ b/openpower/sv/bitmanip/appendix.mdwn
@@ -97,52 +97,24 @@ so the inner loop in the bigint division algorithm would end up being
 ```
 
 This algorithm may be morphed into a pair of Vector operations by temporary
-storage of the products. Assume a maddxd (produces HI-LO)
+storage of the products.
 
 ```
-        std::int64_t carry{0};     // signed; carry > 0, borrow < 0
-        std::int64_t widedigit;    // signed
-        for (int i = 0; i < n; ++i) {
-            std::uint64_t product = static_cast<std::uint32_t>(qhat) 
-                            * static_cast<std::uint64_t>(divisor.digits_[i]);
-            
-            widedigit = (dividend.digits_[k + i] + carry) 
-                        - (product & 0xffffffffLL);
-            
-            dividend.digits_[k + i] = widedigit; // assigns 2^32-complement
-                                                 // if widedigit < 0
-            
-            carry = (widedigit >> 32) - (product >> 32);
-        }
-
-```
-
-may be morphed to:
-
-
+      uint32_t borrow = 0;
+      for(int i = 0; i <= n; i++) {
+         uint32_t vn_i = i < n ? vn[i] : 0;
+         uint64_t value = un[i + j] - (uint64_t)qhat * vn_i;
+         plo[i] = value & 0xffffffffLL;
+         phi[i] = value >> 32;
+      }
+      for(int i = 0; i <= n; i++) {
+         uint64_t value = (((uint64_t)phi[i]<<32) | plo[i]) - borrow;
+         borrow = ~(value >> 32)+1; // -(uint32_t)(value >> 32);
+         un[i + j] = (uint32_t)value;
+      }
+      bool need_fixup = borrow != 0;
 ```
-        std::int64_t carry{0};     // signed; carry > 0, borrow < 0
-        std::int64_t widedigit;    // signed
-        std::uint32_t prodhi[];
-        std::uint32_t prodlo[];
-
-        // a maddxd on dividend[k+1] - qhat * divisor[i]
-        for (int i = 0; i < n; ++i) {
-            std::uint64_t product = static_cast<std::uint32_t>(qhat) 
-                            * static_cast<std::uint64_t>(divisor.digits_[i]);
-            widedigit = (dividend.digits_[k + i]) - (product & 0xffffffffLL);
-            prodlo[i] = widedigit&0xffffffff;
-            prodhi[i] = product>>32;
-        }
 
-        // um?
-        for (int i = 0; i < n; ++i) {
-            widedigit = prodlo[i] + carry
-            dividend.digits_[k + i] = widedigit&0xffffffff
-            carry = (widedigit >> 32) - prodhi[i];
-        }
-
-```
 
 Transformation of 4-in, 2-out into a pair of operations:
 
@@ -170,10 +142,9 @@ in order to carry on the algorithm.
 **weirdaddx RT, RA, RB** (RS=RB+VL for SVP64, RS=RB+1 for scalar)
 
     cat[0:127] = (RS) || (RB)
-    sum[0:127] = cat + EXTZ(RA) - 1
+    sum[0:127] = cat - EXTZ(RA)
     rhi[0:63] = sum[0:63]
-    if (RA) <= 1 then rhi = rhi + 1
-    RA = rhi
+    RA = ~rhi + 1
     RT = sum[64:127]
 
 These two combine as, simply:
-- 
2.30.2