if (rhat < b) goto again;
}
+#define SUB_MUL_BORROW
#ifdef ORIGINAL
// Multiply and subtract.
k = 0;
// Multiply and subtract.
uint32_t borrow = 0;
+ uint32_t phi[2000]; // plenty space
+ uint32_t plo[2000]; // plenty space
+ // first, perform mul-and-sub and store in split hi-lo
+ // this shows the vectorised sv.msubx which stores 128-bit in
+ // two 64-bit registers
for(int i = 0; i <= n; i++) {
uint32_t vn_i = i < n ? vn[i] : 0;
- uint64_t value = un[i + j] - (uint64_t)qhat * vn_i - borrow;
+ uint64_t value = un[i + j] - (uint64_t)qhat * vn_i;
+ plo[i] = value & 0xffffffffLL;
+ phi[i] = value >> 32;
+ }
+ // second, reconstruct the 64-bit result, subtract borrow,
+ // store top-half (-ve) in new borrow and store low-half as answer
+ // this is the new (odd) instruction
+ for(int i = 0; i <= n; i++) {
+ uint64_t value = (((uint64_t)phi[i]<<32) | plo[i]) - borrow;
borrow = -(uint32_t)(value >> 32);
un[i + j] = (uint32_t)value;
}