openpower/sv/bitmanip/divmnu64.c

   1 /* original source code from Hackers-Delight
   2    https://github.com/hcs0/Hackers-Delight
   3 */
   4 /* This divides an n-word dividend by an m-word divisor, giving an
   5 n-m+1-word quotient and m-word remainder. The bignums are in arrays of
   6 words. Here a "word" is 32 bits. This routine is designed for a 64-bit
   7 machine which has a 64/64 division instruction. */
   8
   9 #include <stdio.h>
  10 #include <stdlib.h>     //To define "exit", req'd by XLC.
  11 #include <stdbool.h>
  12 #include <stdint.h>
  13
  14 #define max(x, y) ((x) > (y) ? (x) : (y))
  15
  16 int nlz(unsigned x) {
  17    int n;
  18
  19    if (x == 0) return(32);
  20    n = 0;
  21    if (x <= 0x0000FFFF) {n = n +16; x = x <<16;}
  22    if (x <= 0x00FFFFFF) {n = n + 8; x = x << 8;}
  23    if (x <= 0x0FFFFFFF) {n = n + 4; x = x << 4;}
  24    if (x <= 0x3FFFFFFF) {n = n + 2; x = x << 2;}
  25    if (x <= 0x7FFFFFFF) {n = n + 1;}
  26    return n;
  27 }
  28
  29 void dumpit(char *msg, int n, unsigned v[]) {
  30    int i;
  31    printf("%s", msg);
  32    for (i = n-1; i >= 0; i--) printf(" %08x", v[i]);
  33    printf("\n");
  34 }
  35
  36 /* q[0], r[0], u[0], and v[0] contain the LEAST significant words.
  37 (The sequence is in little-endian order).
  38
  39 This is a fairly precise implementation of Knuth's Algorithm D, for a
  40 binary computer with base b = 2**32. The caller supplies:
  41    1. Space q for the quotient, m - n + 1 words (at least one).
  42    2. Space r for the remainder (optional), n words.
  43    3. The dividend u, m words, m >= 1.
  44    4. The divisor v, n words, n >= 2.
  45 The most significant digit of the divisor, v[n-1], must be nonzero.  The
  46 dividend u may have leading zeros; this just makes the algorithm take
  47 longer and makes the quotient contain more leading zeros.  A value of
  48 NULL may be given for the address of the remainder to signify that the
  49 caller does not want the remainder.
  50    The program does not alter the input parameters u and v.
  51    The quotient and remainder returned may have leading zeros.  The
  52 function itself returns a value of 0 for success and 1 for invalid
  53 parameters (e.g., division by 0).
  54    For now, we must have m >= n.  Knuth's Algorithm D also requires
  55 that the dividend be at least as long as the divisor.  (In his terms,
  56 m >= 0 (unstated).  Therefore m+n >= n.) */
  57
  58 int divmnu(unsigned q[], unsigned r[],
  59      const unsigned u[], const unsigned v[],
  60      int m, int n) {
  61
  62    const unsigned long long b = 4294967296LL; // Number base (2**32).
  63    unsigned *un, *vn;                         // Normalized form of u, v.
  64    unsigned long long qhat;                   // Estimated quotient digit.
  65    unsigned long long rhat;                   // A remainder.
  66    unsigned long long p;                      // Product of two digits.
  67    long long t, k;
  68    int s, i, j;
  69
  70    if (m < n || n <= 0 || v[n-1] == 0)
  71       return 1;                         // Return if invalid param.
  72
  73    if (n == 1) {                        // Take care of
  74       k = 0;                            // the case of a
  75       for (j = m - 1; j >= 0; j--) {    // single-digit
  76          q[j] = (k*b + u[j])/v[0];      // divisor here.
  77          k = (k*b + u[j]) - q[j]*v[0];
  78       }
  79       if (r != NULL) r[0] = k;
  80       return 0;
  81    }
  82
  83    /* Normalize by shifting v left just enough so that its high-order
  84    bit is on, and shift u left the same amount. We may have to append a
  85    high-order digit on the dividend; we do that unconditionally. */
  86
  87    s = nlz(v[n-1]);             // 0 <= s <= 31.
  88    vn = (unsigned *)alloca(4*n);
  89    for (i = n - 1; i > 0; i--)
  90       vn[i] = (v[i] << s) | ((unsigned long long)v[i-1] >> (32-s));
  91    vn[0] = v[0] << s;
  92
  93    un = (unsigned *)alloca(4*(m + 1));
  94    un[m] = (unsigned long long)u[m-1] >> (32-s);
  95    for (i = m - 1; i > 0; i--)
  96       un[i] = (u[i] << s) | ((unsigned long long)u[i-1] >> (32-s));
  97    un[0] = u[0] << s;
  98
  99    for (j = m - n; j >= 0; j--) {       // Main loop.
 100       // Compute estimate qhat of q[j].
 101       qhat = (un[j+n]*b + un[j+n-1])/vn[n-1];
 102       rhat = (un[j+n]*b + un[j+n-1]) - qhat*vn[n-1];
 103 again:
 104       if (qhat >= b || qhat*vn[n-2] > b*rhat + un[j+n-2])
 105       { qhat = qhat - 1;
 106         rhat = rhat + vn[n-1];
 107         if (rhat < b) goto again;
 108       }
 109
 110 #ifdef ORIGINAL
 111       // Multiply and subtract.
 112       k = 0;
 113       for (i = 0; i < n; i++) {
 114          p = qhat*vn[i];
 115          t = un[i+j] - k - (p & 0xFFFFFFFFLL);
 116          un[i+j] = t;
 117          k = (p >> 32) - (t >> 32);
 118       }
 119       t = un[j+n] - k;
 120       un[j+n] = t;
 121       bool need_fixup = t < 0;
 122 #elif defined(SUB_MUL_BORROW)
 123       (void)p; // shut up unused variable warning
 124
 125       // Multiply and subtract.
 126       uint32_t borrow = 0;
 127       for(int i = 0; i <= n; i++) {
 128          uint32_t vn_i = i < n ? vn[i] : 0;
 129          uint64_t value = un[i + j] - (uint64_t)qhat * vn_i - borrow;
 130          borrow = -(uint32_t)(value >> 32);
 131          un[i + j] = (uint32_t)value;
 132       }
 133       bool need_fixup = borrow != 0;
 134 #elif defined(MUL_RSUB_CARRY)
 135       (void)p; // shut up unused variable warning
 136
 137       // Multiply and subtract.
 138       uint32_t carry = 1;
 139       for(int i = 0; i <= n; i++) {
 140          uint32_t vn_i = i < n ? vn[i] : 0;
 141          uint64_t result = un[i + j] + ~((uint64_t)qhat * vn_i) + carry;
 142          uint32_t result_high = result >> 32;
 143          if(carry <= 1)
 144             result_high++;
 145          carry = result_high;
 146          un[i + j] = (uint32_t)result;
 147       }
 148       bool need_fixup = carry != 1;
 149 #else
 150 #error need to define one of ORIGINAL, SUB_MUL_BORROW, or MUL_RSUB_CARRY
 151 #endif
 152
 153       q[j] = qhat;              // Store quotient digit.
 154       if (need_fixup) {         // If we subtracted too
 155          q[j] = q[j] - 1;       // much, add back.
 156          k = 0;
 157          for (i = 0; i < n; i++) {
 158             t = (unsigned long long)un[i+j] + vn[i] + k;
 159             un[i+j] = t;
 160             k = t >> 32;
 161          }
 162          un[j+n] = un[j+n] + k;
 163       }
 164    } // End j.
 165    // If the caller wants the remainder, unnormalize
 166    // it and pass it back.
 167    if (r != NULL) {
 168       for (i = 0; i < n-1; i++)
 169          r[i] = (un[i] >> s) | ((unsigned long long)un[i+1] << (32-s));
 170       r[n-1] = un[n-1] >> s;
 171    }
 172    return 0;
 173 }
 174
 175 int errors;
 176
 177 void check(unsigned q[], unsigned r[],
 178            unsigned u[], unsigned v[],
 179            int m, int n,
 180            unsigned cq[], unsigned cr[]) {
 181    int i, szq;
 182
 183    szq = max(m - n + 1, 1);
 184    for (i = 0; i < szq; i++) {
 185       if (q[i] != cq[i]) {
 186          errors = errors + 1;
 187          dumpit("Error, dividend u =", m, u);
 188          dumpit("       divisor  v =", n, v);
 189          dumpit("For quotient,  got:", m-n+1, q);
 190          dumpit("        Should get:", m-n+1, cq);
 191          return;
 192       }
 193    }
 194    for (i = 0; i < n; i++) {
 195       if (r[i] != cr[i]) {
 196          errors = errors + 1;
 197          dumpit("Error, dividend u =", m, u);
 198          dumpit("       divisor  v =", n, v);
 199          dumpit("For remainder, got:", n, r);
 200          dumpit("        Should get:", n, cr);
 201          return;
 202       }
 203    }
 204    return;
 205 }
 206
 207 int main() {
 208    static unsigned test[] = {
 209    // m, n, u...,          v...,          cq...,  cr....
 210       1, 1, 3,             0,             1,      1,            // Error, divide by 0.
 211       1, 2, 7,             1,3,           0,      7,0,          // Error, n > m.
 212       2, 2, 0,0,           1,0,           0,      0,0,          // Error, incorrect remainder cr.
 213       1, 1, 3,             2,             1,      1,
 214       1, 1, 3,             3,             1,      0,
 215       1, 1, 3,             4,             0,      3,
 216       1, 1, 0,             0xffffffff,    0,      0,
 217       1, 1, 0xffffffff,    1,             0xffffffff, 0,
 218       1, 1, 0xffffffff,    0xffffffff,    1,      0,
 219       1, 1, 0xffffffff,    3,             0x55555555, 0,
 220       2, 1, 0xffffffff,0xffffffff, 1,     0xffffffff,0xffffffff, 0,
 221       2, 1, 0xffffffff,0xffffffff, 0xffffffff,        1,1,    0,
 222       2, 1, 0xffffffff,0xfffffffe, 0xffffffff,        0xffffffff,0, 0xfffffffe,
 223       2, 1, 0x00005678,0x00001234, 0x00009abc,        0x1e1dba76,0, 0x6bd0,
 224       2, 2, 0,0,           0,1,           0,      0,0,
 225       2, 2, 0,7,           0,3,           2,      0,1,
 226       2, 2, 5,7,           0,3,           2,      5,1,
 227       2, 2, 0,6,           0,2,           3,      0,0,
 228       1, 1, 0x80000000,  0x40000001, 0x00000001, 0x3fffffff,
 229       2, 1, 0x00000000,0x80000000, 0x40000001, 0xfffffff8,0x00000001, 0x00000008,
 230       2, 2, 0x00000000,0x80000000, 0x00000001,0x40000000, 0x00000001, 0xffffffff,0x3fffffff,
 231       2, 2, 0x0000789a,0x0000bcde, 0x0000789a,0x0000bcde,          1,          0,0,
 232       2, 2, 0x0000789b,0x0000bcde, 0x0000789a,0x0000bcde,          1,          1,0,
 233       2, 2, 0x00007899,0x0000bcde, 0x0000789a,0x0000bcde,          0, 0x00007899,0x0000bcde,
 234       2, 2, 0x0000ffff,0x0000ffff, 0x0000ffff,0x0000ffff,          1,          0,0,
 235       2, 2, 0x0000ffff,0x0000ffff, 0x00000000,0x00000001, 0x0000ffff, 0x0000ffff,0,
 236       3, 2, 0x000089ab,0x00004567,0x00000123, 0x00000000,0x00000001,   0x00004567,0x00000123, 0x000089ab,0,
 237       3, 2, 0x00000000,0x0000fffe,0x00008000, 0x0000ffff,0x00008000,   0xffffffff,0x00000000, 0x0000ffff,0x00007fff, // Shows that first qhat can = b + 1.
 238       3, 3, 0x00000003,0x00000000,0x80000000, 0x00000001,0x00000000,0x20000000,   0x00000003, 0,0,0x20000000, // Adding back step req'd.
 239       3, 3, 0x00000003,0x00000000,0x00008000, 0x00000001,0x00000000,0x00002000,   0x00000003, 0,0,0x00002000, // Adding back step req'd.
 240       4, 3, 0,0,0x00008000,0x00007fff, 1,0,0x00008000,   0xfffe0000,0, 0x00020000,0xffffffff,0x00007fff,  // Add back req'd.
 241       4, 3, 0,0x0000fffe,0,0x00008000, 0x0000ffff,0,0x00008000, 0xffffffff,0, 0x0000ffff,0xffffffff,0x00007fff,  // Shows that mult-sub quantity cannot be treated as signed.
 242       4, 3, 0,0xfffffffe,0,0x80000000, 0x0000ffff,0,0x80000000, 0x00000000,1, 0x00000000,0xfffeffff,0x00000000,  // Shows that mult-sub quantity cannot be treated as signed.
 243       4, 3, 0,0xfffffffe,0,0x80000000, 0xffffffff,0,0x80000000, 0xffffffff,0, 0xffffffff,0xffffffff,0x7fffffff,  // Shows that mult-sub quantity cannot be treated as signed.
 244    };
 245    int i, n, m, ncases, f;
 246    unsigned q[10], r[10];
 247    unsigned *u, *v, *cq, *cr;
 248
 249    printf("divmnu:\n");
 250    i = 0;
 251    ncases = 0;
 252    while (i < sizeof(test)/4) {
 253       m = test[i];
 254       n = test[i+1];
 255       u = &test[i+2];
 256       v = &test[i+2+m];
 257       cq = &test[i+2+m+n];
 258       cr = &test[i+2+m+n+max(m-n+1, 1)];
 259
 260       f = divmnu(q, r, u, v, m, n);
 261       if (f) {
 262          dumpit("Error return code for dividend u =", m, u);
 263          dumpit("                      divisor  v =", n, v);
 264          errors = errors + 1;
 265       }
 266       else
 267          check(q, r, u, v, m, n, cq, cr);
 268       i = i + 2 + m + n + max(m-n+1, 1) + n;
 269       ncases = ncases + 1;
 270    }
 271
 272    printf("%d errors out of %d cases; there should be 3.\n", errors, ncases);
 273    return 0;
 274 }