From: lkcl Date: Sun, 17 Apr 2022 12:55:54 +0000 (+0100) Subject: (no commit message) X-Git-Tag: opf_rfc_ls005_v1~2756 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=e1a696551305fc4d24191fbf23f5e1396ed1761b;p=libreriscv.git --- diff --git a/openpower/sv/bitmanip/appendix.mdwn b/openpower/sv/bitmanip/appendix.mdwn index 49d7f048a..cdd3d7dba 100644 --- a/openpower/sv/bitmanip/appendix.mdwn +++ b/openpower/sv/bitmanip/appendix.mdwn @@ -36,7 +36,8 @@ void div(uint32_t *n, uint32_t *d, uint32_t* q, int n_bytes, int d_bytes) { } ``` -The key loop may be implemented with a 4-in, 2-out mul-twin-add: +The key loop may be implemented with a 4-in, 2-out mul-twin-add +(which is too much): ``` On Sat, Apr 16, 2022, 22:06 Jacob Lifshay wrote: @@ -64,3 +65,51 @@ mtspr CARRY, r3 # init carry spr setvl loop_count sv.mrsubcarry rn.v, rd.v, rq.s, rn.v ``` + +This algorithm may be morphed into a pair of Vector operations by temporary +storage of the products. Assume a maddxd (produces HI-LO) + +``` + std::int64_t carry{0}; // signed; carry > 0, borrow < 0 + std::int64_t widedigit; // signed + for (int i = 0; i < n; ++i) { + std::uint64_t product = static_cast(qhat) + * static_cast(divisor.digits_[i]); + + widedigit = (dividend.digits_[k + i] + carry) + - (product & 0xffffffffLL); + + dividend.digits_[k + i] = widedigit; // assigns 2^32-complement + // if widedigit < 0 + + carry = (widedigit >> 32) - (product >> 32); + } + +``` + +may be morphed to: + + +``` + std::int64_t carry{0}; // signed; carry > 0, borrow < 0 + std::int64_t widedigit; // signed + std::uint32_t prodhi[]; + std::uint32_t prodlo[]; + + // a maddxd on dividend[k+1] - qhat * divisor[i] + for (int i = 0; i < n; ++i) { + std::uint64_t product = static_cast(qhat) + * static_cast(divisor.digits_[i]); + widedigit = (dividend.digits_[k + i]) - (product & 0xffffffffLL); + prodlo[i] = widedigit&0xffffffff; + prodhi[i] = product>>32; + } + + // um? + for (int i = 0; i < n; ++i) { + widedigit = prodlo[i] + carry + dividend.digits_[k + i] = widedigit&0xffffffff + carry = (widedigit >> 32) - prodhi[i]; + } + +```