openpower/sv/bitmanip/divmnu64.c

   1 /* original source code from Hackers-Delight
   2    https://github.com/hcs0/Hackers-Delight
   3 */
   4 /* This divides an n-word dividend by an m-word divisor, giving an
   5 n-m+1-word quotient and m-word remainder. The bignums are in arrays of
   6 words. Here a "word" is 32 bits. This routine is designed for a 64-bit
   7 machine which has a 64/64 division instruction. */
   8
   9 #include <stdio.h>
  10 #include <stdlib.h>     //To define "exit", req'd by XLC.
  11 #include <stdbool.h>
  12 #include <stdint.h>
  13
  14 #define max(x, y) ((x) > (y) ? (x) : (y))
  15
  16 int nlz(unsigned x) {
  17    int n;
  18
  19    if (x == 0) return(32);
  20    n = 0;
  21    if (x <= 0x0000FFFF) {n = n +16; x = x <<16;}
  22    if (x <= 0x00FFFFFF) {n = n + 8; x = x << 8;}
  23    if (x <= 0x0FFFFFFF) {n = n + 4; x = x << 4;}
  24    if (x <= 0x3FFFFFFF) {n = n + 2; x = x << 2;}
  25    if (x <= 0x7FFFFFFF) {n = n + 1;}
  26    return n;
  27 }
  28
  29 void dumpit(char *msg, int n, unsigned v[]) {
  30    int i;
  31    printf("%s", msg);
  32    for (i = n-1; i >= 0; i--) printf(" %08x", v[i]);
  33    printf("\n");
  34 }
  35
  36 /* q[0], r[0], u[0], and v[0] contain the LEAST significant words.
  37 (The sequence is in little-endian order).
  38
  39 This is a fairly precise implementation of Knuth's Algorithm D, for a
  40 binary computer with base b = 2**32. The caller supplies:
  41    1. Space q for the quotient, m - n + 1 words (at least one).
  42    2. Space r for the remainder (optional), n words.
  43    3. The dividend u, m words, m >= 1.
  44    4. The divisor v, n words, n >= 2.
  45 The most significant digit of the divisor, v[n-1], must be nonzero.  The
  46 dividend u may have leading zeros; this just makes the algorithm take
  47 longer and makes the quotient contain more leading zeros.  A value of
  48 NULL may be given for the address of the remainder to signify that the
  49 caller does not want the remainder.
  50    The program does not alter the input parameters u and v.
  51    The quotient and remainder returned may have leading zeros.  The
  52 function itself returns a value of 0 for success and 1 for invalid
  53 parameters (e.g., division by 0).
  54    For now, we must have m >= n.  Knuth's Algorithm D also requires
  55 that the dividend be at least as long as the divisor.  (In his terms,
  56 m >= 0 (unstated).  Therefore m+n >= n.) */
  57
  58 int divmnu(unsigned q[], unsigned r[],
  59      const unsigned u[], const unsigned v[],
  60      int m, int n) {
  61
  62    const unsigned long long b = 4294967296LL; // Number base (2**32).
  63    unsigned *un, *vn;                         // Normalized form of u, v.
  64    unsigned long long qhat;                   // Estimated quotient digit.
  65    unsigned long long rhat;                   // A remainder.
  66    unsigned long long p;                      // Product of two digits.
  67    long long t, k;
  68    int s, i, j;
  69
  70    if (m < n || n <= 0 || v[n-1] == 0)
  71       return 1;                         // Return if invalid param.
  72
  73    if (n == 1) {                        // Take care of
  74       k = 0;                            // the case of a
  75       for (j = m - 1; j >= 0; j--) {    // single-digit
  76          q[j] = (k*b + u[j])/v[0];      // divisor here.
  77          k = (k*b + u[j]) - q[j]*v[0];
  78       }
  79       if (r != NULL) r[0] = k;
  80       return 0;
  81    }
  82
  83    /* Normalize by shifting v left just enough so that its high-order
  84    bit is on, and shift u left the same amount. We may have to append a
  85    high-order digit on the dividend; we do that unconditionally. */
  86
  87    s = nlz(v[n-1]);             // 0 <= s <= 31.
  88    vn = (unsigned *)alloca(4*n);
  89    for (i = n - 1; i > 0; i--)
  90       vn[i] = (v[i] << s) | ((unsigned long long)v[i-1] >> (32-s));
  91    vn[0] = v[0] << s;
  92
  93    un = (unsigned *)alloca(4*(m + 1));
  94    un[m] = (unsigned long long)u[m-1] >> (32-s);
  95    for (i = m - 1; i > 0; i--)
  96       un[i] = (u[i] << s) | ((unsigned long long)u[i-1] >> (32-s));
  97    un[0] = u[0] << s;
  98
  99    for (j = m - n; j >= 0; j--) {       // Main loop.
 100       // Compute estimate qhat of q[j].
 101       qhat = (un[j+n]*b + un[j+n-1])/vn[n-1];
 102       rhat = (un[j+n]*b + un[j+n-1]) - qhat*vn[n-1];
 103 again:
 104       if (qhat >= b || qhat*vn[n-2] > b*rhat + un[j+n-2])
 105       { qhat = qhat - 1;
 106         rhat = rhat + vn[n-1];
 107         if (rhat < b) goto again;
 108       }
 109
 110 #ifdef ORIGINAL
 111       // Multiply and subtract.
 112       k = 0;
 113       for (i = 0; i < n; i++) {
 114          p = qhat*vn[i];
 115          t = un[i+j] - k - (p & 0xFFFFFFFFLL);
 116          un[i+j] = t;
 117          k = (p >> 32) - (t >> 32);
 118       }
 119       t = un[j+n] - k;
 120       un[j+n] = t;
 121       bool need_fixup = t < 0;
 122 #elif defined(SUB_MUL_BORROW)
 123       (void)p; // shut up unused variable warning
 124
 125       // Multiply and subtract.
 126       uint32_t borrow = 0;
 127       for(int i = 0; i <= n; i++) {
 128          uint32_t vn_i = i < n ? vn[i] : 0;
 129          uint64_t value = un[i + j] - (uint64_t)qhat * vn_i - borrow;
 130          borrow = -(uint32_t)(value >> 32);
 131          un[i + j] = (uint32_t)value;
 132       }
 133       bool need_fixup = borrow != 0;
 134 #elif defined(MUL_RSUB_CARRY)
 135       (void)p; // shut up unused variable warning
 136
 137       // Multiply and subtract.
 138       uint32_t carry = 1;
 139       for(int i = 0; i <= n; i++) {
 140          uint32_t vn_i = i < n ? vn[i] : 0;
 141          uint64_t result = un[i + j] + ~((uint64_t)qhat * vn_i) + carry;
 142          uint32_t result_high = result >> 32;
 143          if(carry <= 1)
 144             result_high++;
 145          carry = result_high;
 146          un[i + j] = (uint32_t)result;
 147       }
 148       bool need_fixup = carry != 1;
 149 #elif defined(SUB_MUL_BORROW_2_STAGE)
 150       (void)p; // shut up unused variable warning
 151
 152       // Multiply and subtract.
 153       uint32_t borrow = 0;
 154       uint32_t phi[2000]; // plenty space
 155       uint32_t plo[2000]; // plenty space
 156       // first, perform mul-and-sub and store in split hi-lo
 157       // this shows the vectorised sv.msubx which stores 128-bit in
 158       // two 64-bit registers
 159       for(int i = 0; i <= n; i++) {
 160          uint32_t vn_i = i < n ? vn[i] : 0;
 161          uint64_t value = un[i + j] - (uint64_t)qhat * vn_i;
 162          plo[i] = value & 0xffffffffLL;
 163          phi[i] = value >> 32;
 164       }
 165       // second, reconstruct the 64-bit result, subtract borrow,
 166       // store top-half (-ve) in new borrow and store low-half as answer
 167       // this is the new (odd) instruction
 168       for(int i = 0; i <= n; i++) {
 169          uint64_t value = (((uint64_t)phi[i]<<32) | plo[i]) - borrow;
 170          borrow = ~(value >> 32)+1; // -(uint32_t)(value >> 32);
 171          un[i + j] = (uint32_t)value;
 172       }
 173       bool need_fixup = borrow != 0;
 174 #elif defined(MUL_RSUB_CARRY_2_STAGE)
 175       (void)p; // shut up unused variable warning
 176
 177       // Multiply and subtract.
 178       uint32_t carry = 1;
 179       uint32_t phi[2000]; // plenty space
 180       uint32_t plo[2000]; // plenty space
 181       for(int i = 0; i <= n; i++) {
 182          uint32_t vn_i = i < n ? vn[i] : 0;
 183          uint64_t value = un[i + j] + ~((uint64_t)qhat * vn_i);
 184          plo[i] = value & 0xffffffffLL;
 185          phi[i] = value >> 32;
 186       }
 187       for(int i = 0; i <= n; i++) {
 188          uint64_t result = (((uint64_t)phi[i]<<32) | plo[i]) + carry;
 189          uint32_t result_high = result >> 32;
 190          if(carry <= 1)
 191             result_high++;
 192          carry = result_high;
 193          un[i + j] = (uint32_t)result;
 194       }
 195       bool need_fixup = carry != 1;
 196 #elif defined(MUL_RSUB_CARRY_2_STAGE1)
 197       (void)p; // shut up unused variable warning
 198
 199       // Multiply and subtract.
 200       uint32_t carry = 1;
 201       uint32_t phi[2000]; // plenty space
 202       uint32_t plo[2000]; // plenty space
 203       // same mul-and-sub as SUB_MUL_BORROW but not the same
 204       // mul-and-sub-minus-one as MUL_RSUB_CARRY
 205       for(int i = 0; i <= n; i++) {
 206          uint32_t vn_i = i < n ? vn[i] : 0;
 207          uint64_t value = un[i + j] - ((uint64_t)qhat * vn_i);
 208          plo[i] = value & 0xffffffffLL;
 209          phi[i] = value >> 32;
 210       }
 211       // compensate for the +1 that was added by mul-and-sub by subtracting
 212       // it here (as ~(0))
 213       for(int i = 0; i <= n; i++) {
 214          uint64_t result = (((uint64_t)phi[i]<<32) | plo[i]) + carry+
 215                            ~(0); // a way to express "-1"
 216          uint32_t result_high = result >> 32;
 217          if(carry <= 1)
 218             result_high++;
 219          carry = result_high;
 220          un[i + j] = (uint32_t)result;
 221       }
 222       bool need_fixup = carry != 1;
 223 #else
 224 #error need to choose one of the algorithm options; e.g. -DORIGINAL
 225 #endif
 226
 227       q[j] = qhat;              // Store quotient digit.
 228       if (need_fixup) {         // If we subtracted too
 229          q[j] = q[j] - 1;       // much, add back.
 230          k = 0;
 231          for (i = 0; i < n; i++) {
 232             t = (unsigned long long)un[i+j] + vn[i] + k;
 233             un[i+j] = t;
 234             k = t >> 32;
 235          }
 236          un[j+n] = un[j+n] + k;
 237       }
 238    } // End j.
 239    // If the caller wants the remainder, unnormalize
 240    // it and pass it back.
 241    if (r != NULL) {
 242       for (i = 0; i < n-1; i++)
 243          r[i] = (un[i] >> s) | ((unsigned long long)un[i+1] << (32-s));
 244       r[n-1] = un[n-1] >> s;
 245    }
 246    return 0;
 247 }
 248
 249 int errors;
 250
 251 void check(unsigned q[], unsigned r[],
 252            unsigned u[], unsigned v[],
 253            int m, int n,
 254            unsigned cq[], unsigned cr[]) {
 255    int i, szq;
 256
 257    szq = max(m - n + 1, 1);
 258    for (i = 0; i < szq; i++) {
 259       if (q[i] != cq[i]) {
 260          errors = errors + 1;
 261          dumpit("Error, dividend u =", m, u);
 262          dumpit("       divisor  v =", n, v);
 263          dumpit("For quotient,  got:", m-n+1, q);
 264          dumpit("        Should get:", m-n+1, cq);
 265          return;
 266       }
 267    }
 268    for (i = 0; i < n; i++) {
 269       if (r[i] != cr[i]) {
 270          errors = errors + 1;
 271          dumpit("Error, dividend u =", m, u);
 272          dumpit("       divisor  v =", n, v);
 273          dumpit("For remainder, got:", n, r);
 274          dumpit("        Should get:", n, cr);
 275          return;
 276       }
 277    }
 278    return;
 279 }
 280
 281 int main() {
 282    static unsigned test[] = {
 283    // m, n, u...,          v...,          cq...,  cr....
 284       1, 1, 3,             0,             1,      1,            // Error, divide by 0.
 285       1, 2, 7,             1,3,           0,      7,0,          // Error, n > m.
 286       2, 2, 0,0,           1,0,           0,      0,0,          // Error, incorrect remainder cr.
 287       1, 1, 3,             2,             1,      1,
 288       1, 1, 3,             3,             1,      0,
 289       1, 1, 3,             4,             0,      3,
 290       1, 1, 0,             0xffffffff,    0,      0,
 291       1, 1, 0xffffffff,    1,             0xffffffff, 0,
 292       1, 1, 0xffffffff,    0xffffffff,    1,      0,
 293       1, 1, 0xffffffff,    3,             0x55555555, 0,
 294       2, 1, 0xffffffff,0xffffffff, 1,     0xffffffff,0xffffffff, 0,
 295       2, 1, 0xffffffff,0xffffffff, 0xffffffff,        1,1,    0,
 296       2, 1, 0xffffffff,0xfffffffe, 0xffffffff,        0xffffffff,0, 0xfffffffe,
 297       2, 1, 0x00005678,0x00001234, 0x00009abc,        0x1e1dba76,0, 0x6bd0,
 298       2, 2, 0,0,           0,1,           0,      0,0,
 299       2, 2, 0,7,           0,3,           2,      0,1,
 300       2, 2, 5,7,           0,3,           2,      5,1,
 301       2, 2, 0,6,           0,2,           3,      0,0,
 302       1, 1, 0x80000000,  0x40000001, 0x00000001, 0x3fffffff,
 303       2, 1, 0x00000000,0x80000000, 0x40000001, 0xfffffff8,0x00000001, 0x00000008,
 304       2, 2, 0x00000000,0x80000000, 0x00000001,0x40000000, 0x00000001, 0xffffffff,0x3fffffff,
 305       2, 2, 0x0000789a,0x0000bcde, 0x0000789a,0x0000bcde,          1,          0,0,
 306       2, 2, 0x0000789b,0x0000bcde, 0x0000789a,0x0000bcde,          1,          1,0,
 307       2, 2, 0x00007899,0x0000bcde, 0x0000789a,0x0000bcde,          0, 0x00007899,0x0000bcde,
 308       2, 2, 0x0000ffff,0x0000ffff, 0x0000ffff,0x0000ffff,          1,          0,0,
 309       2, 2, 0x0000ffff,0x0000ffff, 0x00000000,0x00000001, 0x0000ffff, 0x0000ffff,0,
 310       3, 2, 0x000089ab,0x00004567,0x00000123, 0x00000000,0x00000001,   0x00004567,0x00000123, 0x000089ab,0,
 311       3, 2, 0x00000000,0x0000fffe,0x00008000, 0x0000ffff,0x00008000,   0xffffffff,0x00000000, 0x0000ffff,0x00007fff, // Shows that first qhat can = b + 1.
 312       3, 3, 0x00000003,0x00000000,0x80000000, 0x00000001,0x00000000,0x20000000,   0x00000003, 0,0,0x20000000, // Adding back step req'd.
 313       3, 3, 0x00000003,0x00000000,0x00008000, 0x00000001,0x00000000,0x00002000,   0x00000003, 0,0,0x00002000, // Adding back step req'd.
 314       4, 3, 0,0,0x00008000,0x00007fff, 1,0,0x00008000,   0xfffe0000,0, 0x00020000,0xffffffff,0x00007fff,  // Add back req'd.
 315       4, 3, 0,0x0000fffe,0,0x00008000, 0x0000ffff,0,0x00008000, 0xffffffff,0, 0x0000ffff,0xffffffff,0x00007fff,  // Shows that mult-sub quantity cannot be treated as signed.
 316       4, 3, 0,0xfffffffe,0,0x80000000, 0x0000ffff,0,0x80000000, 0x00000000,1, 0x00000000,0xfffeffff,0x00000000,  // Shows that mult-sub quantity cannot be treated as signed.
 317       4, 3, 0,0xfffffffe,0,0x80000000, 0xffffffff,0,0x80000000, 0xffffffff,0, 0xffffffff,0xffffffff,0x7fffffff,  // Shows that mult-sub quantity cannot be treated as signed.
 318    };
 319    int i, n, m, ncases, f;
 320    unsigned q[10], r[10];
 321    unsigned *u, *v, *cq, *cr;
 322
 323    printf("divmnu:\n");
 324    i = 0;
 325    ncases = 0;
 326    while (i < sizeof(test)/4) {
 327       m = test[i];
 328       n = test[i+1];
 329       u = &test[i+2];
 330       v = &test[i+2+m];
 331       cq = &test[i+2+m+n];
 332       cr = &test[i+2+m+n+max(m-n+1, 1)];
 333
 334       f = divmnu(q, r, u, v, m, n);
 335       if (f) {
 336          dumpit("Error return code for dividend u =", m, u);
 337          dumpit("                      divisor  v =", n, v);
 338          errors = errors + 1;
 339       }
 340       else
 341          check(q, r, u, v, m, n, cq, cr);
 342       i = i + 2 + m + n + max(m-n+1, 1) + n;
 343       ncases = ncases + 1;
 344    }
 345
 346    printf("%d errors out of %d cases; there should be 3.\n", errors, ncases);
 347    return 0;
 348 }