```
This algorithm may be morphed into a pair of Vector operations by temporary
-storage of the products. Assume a maddxd (produces HI-LO)
+storage of the products.
```
- std::int64_t carry{0}; // signed; carry > 0, borrow < 0
- std::int64_t widedigit; // signed
- for (int i = 0; i < n; ++i) {
- std::uint64_t product = static_cast<std::uint32_t>(qhat)
- * static_cast<std::uint64_t>(divisor.digits_[i]);
-
- widedigit = (dividend.digits_[k + i] + carry)
- - (product & 0xffffffffLL);
-
- dividend.digits_[k + i] = widedigit; // assigns 2^32-complement
- // if widedigit < 0
-
- carry = (widedigit >> 32) - (product >> 32);
- }
-
-```
-
-may be morphed to:
-
-
+ uint32_t borrow = 0;
+ for(int i = 0; i <= n; i++) {
+ uint32_t vn_i = i < n ? vn[i] : 0;
+ uint64_t value = un[i + j] - (uint64_t)qhat * vn_i;
+ plo[i] = value & 0xffffffffLL;
+ phi[i] = value >> 32;
+ }
+ for(int i = 0; i <= n; i++) {
+ uint64_t value = (((uint64_t)phi[i]<<32) | plo[i]) - borrow;
+ borrow = ~(value >> 32)+1; // -(uint32_t)(value >> 32);
+ un[i + j] = (uint32_t)value;
+ }
+ bool need_fixup = borrow != 0;
```
- std::int64_t carry{0}; // signed; carry > 0, borrow < 0
- std::int64_t widedigit; // signed
- std::uint32_t prodhi[];
- std::uint32_t prodlo[];
-
- // a maddxd on dividend[k+1] - qhat * divisor[i]
- for (int i = 0; i < n; ++i) {
- std::uint64_t product = static_cast<std::uint32_t>(qhat)
- * static_cast<std::uint64_t>(divisor.digits_[i]);
- widedigit = (dividend.digits_[k + i]) - (product & 0xffffffffLL);
- prodlo[i] = widedigit&0xffffffff;
- prodhi[i] = product>>32;
- }
- // um?
- for (int i = 0; i < n; ++i) {
- widedigit = prodlo[i] + carry
- dividend.digits_[k + i] = widedigit&0xffffffff
- carry = (widedigit >> 32) - prodhi[i];
- }
-
-```
Transformation of 4-in, 2-out into a pair of operations:
**weirdaddx RT, RA, RB** (RS=RB+VL for SVP64, RS=RB+1 for scalar)
cat[0:127] = (RS) || (RB)
- sum[0:127] = cat + EXTZ(RA) - 1
+ sum[0:127] = cat - EXTZ(RA)
rhi[0:63] = sum[0:63]
- if (RA) <= 1 then rhi = rhi + 1
- RA = rhi
+ RA = ~rhi + 1
RT = sum[64:127]
These two combine as, simply: