}
```
-The key loop may be implemented with a 4-in, 2-out mul-twin-add:
+The key loop may be implemented with a 4-in, 2-out mul-twin-add
+(which is too much):
```
On Sat, Apr 16, 2022, 22:06 Jacob Lifshay <programmerjake@gmail.com> wrote:
setvl loop_count
sv.mrsubcarry rn.v, rd.v, rq.s, rn.v
```
+
+This algorithm may be morphed into a pair of Vector operations by temporary
+storage of the products. Assume a maddxd (produces HI-LO)
+
+```
+ std::int64_t carry{0}; // signed; carry > 0, borrow < 0
+ std::int64_t widedigit; // signed
+ for (int i = 0; i < n; ++i) {
+ std::uint64_t product = static_cast<std::uint32_t>(qhat)
+ * static_cast<std::uint64_t>(divisor.digits_[i]);
+
+ widedigit = (dividend.digits_[k + i] + carry)
+ - (product & 0xffffffffLL);
+
+ dividend.digits_[k + i] = widedigit; // assigns 2^32-complement
+ // if widedigit < 0
+
+ carry = (widedigit >> 32) - (product >> 32);
+ }
+
+```
+
+may be morphed to:
+
+
+```
+ std::int64_t carry{0}; // signed; carry > 0, borrow < 0
+ std::int64_t widedigit; // signed
+ std::uint32_t prodhi[];
+ std::uint32_t prodlo[];
+
+ // a maddxd on dividend[k+1] - qhat * divisor[i]
+ for (int i = 0; i < n; ++i) {
+ std::uint64_t product = static_cast<std::uint32_t>(qhat)
+ * static_cast<std::uint64_t>(divisor.digits_[i]);
+ widedigit = (dividend.digits_[k + i]) - (product & 0xffffffffLL);
+ prodlo[i] = widedigit&0xffffffff;
+ prodhi[i] = product>>32;
+ }
+
+ // um?
+ for (int i = 0; i < n; ++i) {
+ widedigit = prodlo[i] + carry
+ dividend.digits_[k + i] = widedigit&0xffffffff
+ carry = (widedigit >> 32) - prodhi[i];
+ }
+
+```