rhat = rhat + vn[n-1];
if (rhat < b) goto again;
}
-
+#define MUL_RSUB_CARRY_2_STAGE2
#ifdef ORIGINAL
// Multiply and subtract.
k = 0;
un[i + j] = (uint32_t)result;
}
bool need_fixup = carry != 1;
+#elif defined(MUL_RSUB_CARRY_2_STAGE2)
+ (void)p; // shut up unused variable warning
+
+ // Multiply and subtract.
+ uint32_t carry = 0;
+ uint32_t phi[2000]; // plenty space
+ uint32_t plo[2000]; // plenty space
+ // same mul-and-sub as SUB_MUL_BORROW but not the same
+ // mul-and-sub-minus-one as MUL_RSUB_CARRY
+ for(int i = 0; i <= n; i++) {
+ uint32_t vn_i = i < n ? vn[i] : 0;
+ uint64_t value = un[i + j] - ((uint64_t)qhat * vn_i);
+ plo[i] = value & 0xffffffffLL;
+ phi[i] = value >> 32;
+ }
+ // NOW it starts to make sense. when no carry this time, next
+ // carry as-is. rlse next carry reduces by one.
+ // it here (as ~(0))
+ for(int i = 0; i <= n; i++) {
+ uint64_t result = (((uint64_t)phi[i]<<32) | plo[i]) + carry;
+ uint32_t result_high = result >> 32;
+ if(carry == 0)
+ carry = result_high;
+ else
+ carry = result_high-1;
+ un[i + j] = (uint32_t)result;
+ }
+ bool need_fixup = carry != 0;
#else
#error need to choose one of the algorithm options; e.g. -DORIGINAL
#endif