openpower/sv/bitmanip/divmnu64.c

   1 /* original source code from Hackers-Delight
   2    https://github.com/hcs0/Hackers-Delight
   3 */
   4 /* This divides an n-word dividend by an m-word divisor, giving an
   5 n-m+1-word quotient and m-word remainder. The bignums are in arrays of
   6 words. Here a "word" is 32 bits. This routine is designed for a 64-bit
   7 machine which has a 64/64 division instruction. */
   8
   9 #include <stdio.h>
  10 #include <stdlib.h>     //To define "exit", req'd by XLC.
  11
  12 #define max(x, y) ((x) > (y) ? (x) : (y))
  13
  14 int nlz(unsigned x) {
  15    int n;
  16
  17    if (x == 0) return(32);
  18    n = 0;
  19    if (x <= 0x0000FFFF) {n = n +16; x = x <<16;}
  20    if (x <= 0x00FFFFFF) {n = n + 8; x = x << 8;}
  21    if (x <= 0x0FFFFFFF) {n = n + 4; x = x << 4;}
  22    if (x <= 0x3FFFFFFF) {n = n + 2; x = x << 2;}
  23    if (x <= 0x7FFFFFFF) {n = n + 1;}
  24    return n;
  25 }
  26
  27 void dumpit(char *msg, int n, unsigned v[]) {
  28    int i;
  29    printf(msg);
  30    for (i = n-1; i >= 0; i--) printf(" %08x", v[i]);
  31    printf("\n");
  32 }
  33
  34 /* q[0], r[0], u[0], and v[0] contain the LEAST significant words.
  35 (The sequence is in little-endian order).
  36
  37 This is a fairly precise implementation of Knuth's Algorithm D, for a
  38 binary computer with base b = 2**32. The caller supplies:
  39    1. Space q for the quotient, m - n + 1 words (at least one).
  40    2. Space r for the remainder (optional), n words.
  41    3. The dividend u, m words, m >= 1.
  42    4. The divisor v, n words, n >= 2.
  43 The most significant digit of the divisor, v[n-1], must be nonzero.  The
  44 dividend u may have leading zeros; this just makes the algorithm take
  45 longer and makes the quotient contain more leading zeros.  A value of
  46 NULL may be given for the address of the remainder to signify that the
  47 caller does not want the remainder.
  48    The program does not alter the input parameters u and v.
  49    The quotient and remainder returned may have leading zeros.  The
  50 function itself returns a value of 0 for success and 1 for invalid
  51 parameters (e.g., division by 0).
  52    For now, we must have m >= n.  Knuth's Algorithm D also requires
  53 that the dividend be at least as long as the divisor.  (In his terms,
  54 m >= 0 (unstated).  Therefore m+n >= n.) */
  55
  56 int divmnu(unsigned q[], unsigned r[],
  57      const unsigned u[], const unsigned v[],
  58      int m, int n) {
  59
  60    const unsigned long long b = 4294967296LL; // Number base (2**32).
  61    unsigned *un, *vn;                         // Normalized form of u, v.
  62    unsigned long long qhat;                   // Estimated quotient digit.
  63    unsigned long long rhat;                   // A remainder.
  64    unsigned long long p;                      // Product of two digits.
  65    long long t, k;
  66    int s, i, j;
  67
  68    if (m < n || n <= 0 || v[n-1] == 0)
  69       return 1;                         // Return if invalid param.
  70
  71    if (n == 1) {                        // Take care of
  72       k = 0;                            // the case of a
  73       for (j = m - 1; j >= 0; j--) {    // single-digit
  74          q[j] = (k*b + u[j])/v[0];      // divisor here.
  75          k = (k*b + u[j]) - q[j]*v[0];
  76       }
  77       if (r != NULL) r[0] = k;
  78       return 0;
  79    }
  80
  81    /* Normalize by shifting v left just enough so that its high-order
  82    bit is on, and shift u left the same amount. We may have to append a
  83    high-order digit on the dividend; we do that unconditionally. */
  84
  85    s = nlz(v[n-1]);             // 0 <= s <= 31.
  86    vn = (unsigned *)alloca(4*n);
  87    for (i = n - 1; i > 0; i--)
  88       vn[i] = (v[i] << s) | ((unsigned long long)v[i-1] >> (32-s));
  89    vn[0] = v[0] << s;
  90
  91    un = (unsigned *)alloca(4*(m + 1));
  92    un[m] = (unsigned long long)u[m-1] >> (32-s);
  93    for (i = m - 1; i > 0; i--)
  94       un[i] = (u[i] << s) | ((unsigned long long)u[i-1] >> (32-s));
  95    un[0] = u[0] << s;
  96
  97    for (j = m - n; j >= 0; j--) {       // Main loop.
  98       // Compute estimate qhat of q[j].
  99       qhat = (un[j+n]*b + un[j+n-1])/vn[n-1];
 100       rhat = (un[j+n]*b + un[j+n-1]) - qhat*vn[n-1];
 101 again:
 102       if (qhat >= b || qhat*vn[n-2] > b*rhat + un[j+n-2])
 103       { qhat = qhat - 1;
 104         rhat = rhat + vn[n-1];
 105         if (rhat < b) goto again;
 106       }
 107
 108       // Multiply and subtract.
 109       k = 0;
 110 #ifdef ORIGINAL
 111       for (i = 0; i < n; i++) {
 112          p = qhat*vn[i];
 113          t = un[i+j] - k - (p & 0xFFFFFFFFLL);
 114          un[i+j] = (t & 0xffffffffLL);
 115          k = (p >> 32) - (t >> 32);
 116       }
 117 #endif
 118 #ifdef EXPERIMENT1
 119       for (i = 0; i < n; i++) {
 120          unsigned rhi;
 121          unsigned long long sum;
 122          p = ((unsigned long long)un[i+j])+ ~(qhat*vn[i]) + k;
 123          rhi = (p >> 32);
 124          if (((unsigned long long)k) <= 1) rhi += 1;
 125          k = rhi;
 126          un[i+j] = sum & 0xffffffff;
 127       }
 128 #endif
 129 #define EXPERIMENT2
 130 #ifdef EXPERIMENT2
 131       unsigned long phi[200]; // yes, not malloced, we know
 132       unsigned long plo[200]; // yes, not malloced, we know
 133       // double-width multiply with strange subtract only on bottom half
 134       for (i = 0; i < n; i++) {
 135         unsigned long long p = qhat*vn[i];
 136         plo[i] = un[i+j] - (p&0xffffffffLL);
 137         phi[i] = p >> 32;
 138       }
 139       for (i = 0; i < n; i++) {
 140          t = plo[i] - k; // subtract previous carry
 141          un[i+j] = (t & 0xffffffffLL);
 142          k = phi[i] - (t >> 32); // take top-halves for new carry
 143       }
 144 #endif
 145       t = un[j+n] - k;
 146       un[j+n] = t;
 147
 148       q[j] = qhat;              // Store quotient digit.
 149       if (t < 0) {              // If we subtracted too
 150          q[j] = q[j] - 1;       // much, add back.
 151          k = 0;
 152          for (i = 0; i < n; i++) {
 153             t = (unsigned long long)un[i+j] + vn[i] + k;
 154             un[i+j] = t;
 155             k = t >> 32;
 156          }
 157          un[j+n] = un[j+n] + k;
 158       }
 159    } // End j.
 160    // If the caller wants the remainder, unnormalize
 161    // it and pass it back.
 162    if (r != NULL) {
 163       for (i = 0; i < n-1; i++)
 164          r[i] = (un[i] >> s) | ((unsigned long long)un[i+1] << (32-s));
 165       r[n-1] = un[n-1] >> s;
 166    }
 167    return 0;
 168 }
 169
 170 int errors;
 171
 172 void check(unsigned q[], unsigned r[],
 173            unsigned u[], unsigned v[],
 174            int m, int n,
 175            unsigned cq[], unsigned cr[]) {
 176    int i, szq;
 177
 178    szq = max(m - n + 1, 1);
 179    for (i = 0; i < szq; i++) {
 180       if (q[i] != cq[i]) {
 181          errors = errors + 1;
 182          dumpit("Error, dividend u =", m, u);
 183          dumpit("       divisor  v =", n, v);
 184          dumpit("For quotient,  got:", m-n+1, q);
 185          dumpit("        Should get:", m-n+1, cq);
 186          return;
 187       }
 188    }
 189    for (i = 0; i < n; i++) {
 190       if (r[i] != cr[i]) {
 191          errors = errors + 1;
 192          dumpit("Error, dividend u =", m, u);
 193          dumpit("       divisor  v =", n, v);
 194          dumpit("For remainder, got:", n, r);
 195          dumpit("        Should get:", n, cr);
 196          return;
 197       }
 198    }
 199    return;
 200 }
 201
 202 int main() {
 203    static unsigned test[] = {
 204    // m, n, u...,          v...,          cq...,  cr....
 205       1, 1, 3,             0,             1,      1,            // Error, divide by 0.
 206       1, 2, 7,             1,3,           0,      7,0,          // Error, n > m.
 207       2, 2, 0,0,           1,0,           0,      0,0,          // Error, incorrect remainder cr.
 208       1, 1, 3,             2,             1,      1,
 209       1, 1, 3,             3,             1,      0,
 210       1, 1, 3,             4,             0,      3,
 211       1, 1, 0,             0xffffffff,    0,      0,
 212       1, 1, 0xffffffff,    1,             0xffffffff, 0,
 213       1, 1, 0xffffffff,    0xffffffff,    1,      0,
 214       1, 1, 0xffffffff,    3,             0x55555555, 0,
 215       2, 1, 0xffffffff,0xffffffff, 1,     0xffffffff,0xffffffff, 0,
 216       2, 1, 0xffffffff,0xffffffff, 0xffffffff,        1,1,    0,
 217       2, 1, 0xffffffff,0xfffffffe, 0xffffffff,        0xffffffff,0, 0xfffffffe,
 218       2, 1, 0x00005678,0x00001234, 0x00009abc,        0x1e1dba76,0, 0x6bd0,
 219       2, 2, 0,0,           0,1,           0,      0,0,
 220       2, 2, 0,7,           0,3,           2,      0,1,
 221       2, 2, 5,7,           0,3,           2,      5,1,
 222       2, 2, 0,6,           0,2,           3,      0,0,
 223       1, 1, 0x80000000,  0x40000001, 0x00000001, 0x3fffffff,
 224       2, 1, 0x00000000,0x80000000, 0x40000001, 0xfffffff8,0x00000001, 0x00000008,
 225       2, 2, 0x00000000,0x80000000, 0x00000001,0x40000000, 0x00000001, 0xffffffff,0x3fffffff,
 226       2, 2, 0x0000789a,0x0000bcde, 0x0000789a,0x0000bcde,          1,          0,0,
 227       2, 2, 0x0000789b,0x0000bcde, 0x0000789a,0x0000bcde,          1,          1,0,
 228       2, 2, 0x00007899,0x0000bcde, 0x0000789a,0x0000bcde,          0, 0x00007899,0x0000bcde,
 229       2, 2, 0x0000ffff,0x0000ffff, 0x0000ffff,0x0000ffff,          1,          0,0,
 230       2, 2, 0x0000ffff,0x0000ffff, 0x00000000,0x00000001, 0x0000ffff, 0x0000ffff,0,
 231       3, 2, 0x000089ab,0x00004567,0x00000123, 0x00000000,0x00000001,   0x00004567,0x00000123, 0x000089ab,0,
 232       3, 2, 0x00000000,0x0000fffe,0x00008000, 0x0000ffff,0x00008000,   0xffffffff,0x00000000, 0x0000ffff,0x00007fff, // Shows that first qhat can = b + 1.
 233       3, 3, 0x00000003,0x00000000,0x80000000, 0x00000001,0x00000000,0x20000000,   0x00000003, 0,0,0x20000000, // Adding back step req'd.
 234       3, 3, 0x00000003,0x00000000,0x00008000, 0x00000001,0x00000000,0x00002000,   0x00000003, 0,0,0x00002000, // Adding back step req'd.
 235       4, 3, 0,0,0x00008000,0x00007fff, 1,0,0x00008000,   0xfffe0000,0, 0x00020000,0xffffffff,0x00007fff,  // Add back req'd.
 236       4, 3, 0,0x0000fffe,0,0x00008000, 0x0000ffff,0,0x00008000, 0xffffffff,0, 0x0000ffff,0xffffffff,0x00007fff,  // Shows that mult-sub quantity cannot be treated as signed.
 237       4, 3, 0,0xfffffffe,0,0x80000000, 0x0000ffff,0,0x80000000, 0x00000000,1, 0x00000000,0xfffeffff,0x00000000,  // Shows that mult-sub quantity cannot be treated as signed.
 238       4, 3, 0,0xfffffffe,0,0x80000000, 0xffffffff,0,0x80000000, 0xffffffff,0, 0xffffffff,0xffffffff,0x7fffffff,  // Shows that mult-sub quantity cannot be treated as signed.
 239    };
 240    int i, n, m, ncases, f;
 241    unsigned q[10], r[10];
 242    unsigned *u, *v, *cq, *cr;
 243
 244    printf("divmnu:\n");
 245    i = 0;
 246    ncases = 0;
 247    while (i < sizeof(test)/4) {
 248       m = test[i];
 249       n = test[i+1];
 250       u = &test[i+2];
 251       v = &test[i+2+m];
 252       cq = &test[i+2+m+n];
 253       cr = &test[i+2+m+n+max(m-n+1, 1)];
 254
 255       f = divmnu(q, r, u, v, m, n);
 256       if (f) {
 257          dumpit("Error return code for dividend u =", m, u);
 258          dumpit("                      divisor  v =", n, v);
 259          errors = errors + 1;
 260       }
 261       else
 262          check(q, r, u, v, m, n, cq, cr);
 263       i = i + 2 + m + n + max(m-n+1, 1) + n;
 264       ncases = ncases + 1;
 265    }
 266
 267    printf("%d errors out of %d cases; there should be 3.\n", errors, ncases);
 268    return 0;
 269 }