From e8e0d803a3ac8acc85b9c0c49b864888ce0a34f8 Mon Sep 17 00:00:00 2001 From: lkcl Date: Mon, 18 Apr 2022 22:12:22 +0100 Subject: [PATCH] --- openpower/sv/bitmanip/appendix.mdwn | 61 ++++++++--------------------- 1 file changed, 16 insertions(+), 45 deletions(-) diff --git a/openpower/sv/bitmanip/appendix.mdwn b/openpower/sv/bitmanip/appendix.mdwn index 643d4e94f..e87166043 100644 --- a/openpower/sv/bitmanip/appendix.mdwn +++ b/openpower/sv/bitmanip/appendix.mdwn @@ -97,52 +97,24 @@ so the inner loop in the bigint division algorithm would end up being ``` This algorithm may be morphed into a pair of Vector operations by temporary -storage of the products. Assume a maddxd (produces HI-LO) +storage of the products. ``` - std::int64_t carry{0}; // signed; carry > 0, borrow < 0 - std::int64_t widedigit; // signed - for (int i = 0; i < n; ++i) { - std::uint64_t product = static_cast(qhat) - * static_cast(divisor.digits_[i]); - - widedigit = (dividend.digits_[k + i] + carry) - - (product & 0xffffffffLL); - - dividend.digits_[k + i] = widedigit; // assigns 2^32-complement - // if widedigit < 0 - - carry = (widedigit >> 32) - (product >> 32); - } - -``` - -may be morphed to: - - + uint32_t borrow = 0; + for(int i = 0; i <= n; i++) { + uint32_t vn_i = i < n ? vn[i] : 0; + uint64_t value = un[i + j] - (uint64_t)qhat * vn_i; + plo[i] = value & 0xffffffffLL; + phi[i] = value >> 32; + } + for(int i = 0; i <= n; i++) { + uint64_t value = (((uint64_t)phi[i]<<32) | plo[i]) - borrow; + borrow = ~(value >> 32)+1; // -(uint32_t)(value >> 32); + un[i + j] = (uint32_t)value; + } + bool need_fixup = borrow != 0; ``` - std::int64_t carry{0}; // signed; carry > 0, borrow < 0 - std::int64_t widedigit; // signed - std::uint32_t prodhi[]; - std::uint32_t prodlo[]; - - // a maddxd on dividend[k+1] - qhat * divisor[i] - for (int i = 0; i < n; ++i) { - std::uint64_t product = static_cast(qhat) - * static_cast(divisor.digits_[i]); - widedigit = (dividend.digits_[k + i]) - (product & 0xffffffffLL); - prodlo[i] = widedigit&0xffffffff; - prodhi[i] = product>>32; - } - // um? - for (int i = 0; i < n; ++i) { - widedigit = prodlo[i] + carry - dividend.digits_[k + i] = widedigit&0xffffffff - carry = (widedigit >> 32) - prodhi[i]; - } - -``` Transformation of 4-in, 2-out into a pair of operations: @@ -170,10 +142,9 @@ in order to carry on the algorithm. **weirdaddx RT, RA, RB** (RS=RB+VL for SVP64, RS=RB+1 for scalar) cat[0:127] = (RS) || (RB) - sum[0:127] = cat + EXTZ(RA) - 1 + sum[0:127] = cat - EXTZ(RA) rhi[0:63] = sum[0:63] - if (RA) <= 1 then rhi = rhi + 1 - RA = rhi + RA = ~rhi + 1 RT = sum[64:127] These two combine as, simply: -- 2.30.2