From 4aa38a9fbc432405e27da10d18ae1a5e9c78f76a Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Thu, 21 Apr 2022 19:27:04 -0700 Subject: [PATCH] add sv.madded sv.subfe to divmnu64.c --- openpower/sv/biginteger/divmnu64.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/openpower/sv/biginteger/divmnu64.c b/openpower/sv/biginteger/divmnu64.c index 93f106b30..eb157139d 100644 --- a/openpower/sv/biginteger/divmnu64.c +++ b/openpower/sv/biginteger/divmnu64.c @@ -135,7 +135,8 @@ int divmnu(unsigned q[], unsigned r[], const unsigned u[], const unsigned v[], if (rhat < b) goto again; } -#define MUL_RSUB_CARRY_2_STAGE2 +// don't define here, allowing easily testing all options by passing -D... +// #define MUL_RSUB_CARRY_2_STAGE2 #ifdef ORIGINAL // Multiply and subtract. k = 0; @@ -288,6 +289,32 @@ int divmnu(unsigned q[], unsigned r[], const unsigned u[], const unsigned v[], un[i + j] = (uint32_t)result; } bool need_fixup = carry != 0; +#elif defined(MADDED_SUBFE) + (void)p; // shut up unused variable warning + + // Multiply and subtract. + uint32_t carry = 0; + uint32_t product[n + 1]; + // VL = n + 1 + // sv.madded product.v, vn.v, qhat.s, carry.s + for (int i = 0; i <= n; i++) + { + uint32_t vn_v = i < n ? vn[i] : 0; + uint64_t value = (uint64_t)vn_v * (uint64_t)qhat + carry; + carry = (uint32_t)(value >> 32); + product[i] = (uint32_t)value; + } + bool ca = true; + uint32_t *un_j = &un[j]; + // VL = n + 1 + // sv.subfe un_j.v, product.v, un_j.v + for (int i = 0; i <= n; i++) + { + uint64_t value = (uint64_t)~product[i] + (uint64_t)un_j[i] + ca; + ca = value >> 32 != 0; + un_j[i] = value; + } + bool need_fixup = !ca; #else #error need to choose one of the algorithm options; e.g. -DORIGINAL #endif -- 2.30.2