openpower/sv/bitmanip/divmnu64.c

   1 /* original source code from Hackers-Delight
   2    https://github.com/hcs0/Hackers-Delight
   3 */
   4 /* This divides an n-word dividend by an m-word divisor, giving an
   5 n-m+1-word quotient and m-word remainder. The bignums are in arrays of
   6 words. Here a "word" is 32 bits. This routine is designed for a 64-bit
   7 machine which has a 64/64 division instruction. */
   8
   9 #include <stdio.h>
  10 #include <stdlib.h>     //To define "exit", req'd by XLC.
  11
  12 #define max(x, y) ((x) > (y) ? (x) : (y))
  13
  14 int nlz(unsigned x) {
  15    int n;
  16
  17    if (x == 0) return(32);
  18    n = 0;
  19    if (x <= 0x0000FFFF) {n = n +16; x = x <<16;}
  20    if (x <= 0x00FFFFFF) {n = n + 8; x = x << 8;}
  21    if (x <= 0x0FFFFFFF) {n = n + 4; x = x << 4;}
  22    if (x <= 0x3FFFFFFF) {n = n + 2; x = x << 2;}
  23    if (x <= 0x7FFFFFFF) {n = n + 1;}
  24    return n;
  25 }
  26
  27 void dumpit(char *msg, int n, unsigned v[]) {
  28    int i;
  29    printf(msg);
  30    for (i = n-1; i >= 0; i--) printf(" %08x", v[i]);
  31    printf("\n");
  32 }
  33
  34 /* q[0], r[0], u[0], and v[0] contain the LEAST significant words.
  35 (The sequence is in little-endian order).
  36
  37 This is a fairly precise implementation of Knuth's Algorithm D, for a
  38 binary computer with base b = 2**32. The caller supplies:
  39    1. Space q for the quotient, m - n + 1 words (at least one).
  40    2. Space r for the remainder (optional), n words.
  41    3. The dividend u, m words, m >= 1.
  42    4. The divisor v, n words, n >= 2.
  43 The most significant digit of the divisor, v[n-1], must be nonzero.  The
  44 dividend u may have leading zeros; this just makes the algorithm take
  45 longer and makes the quotient contain more leading zeros.  A value of
  46 NULL may be given for the address of the remainder to signify that the
  47 caller does not want the remainder.
  48    The program does not alter the input parameters u and v.
  49    The quotient and remainder returned may have leading zeros.  The
  50 function itself returns a value of 0 for success and 1 for invalid
  51 parameters (e.g., division by 0).
  52    For now, we must have m >= n.  Knuth's Algorithm D also requires
  53 that the dividend be at least as long as the divisor.  (In his terms,
  54 m >= 0 (unstated).  Therefore m+n >= n.) */
  55
  56 int divmnu(unsigned q[], unsigned r[],
  57      const unsigned u[], const unsigned v[],
  58      int m, int n) {
  59
  60    const unsigned long long b = 4294967296LL; // Number base (2**32).
  61    unsigned *un, *vn;                         // Normalized form of u, v.
  62    unsigned long long qhat;                   // Estimated quotient digit.
  63    unsigned long long rhat;                   // A remainder.
  64    unsigned long long p;                      // Product of two digits.
  65    long long t, k;
  66    int s, i, j;
  67
  68    if (m < n || n <= 0 || v[n-1] == 0)
  69       return 1;                         // Return if invalid param.
  70
  71    if (n == 1) {                        // Take care of
  72       k = 0;                            // the case of a
  73       for (j = m - 1; j >= 0; j--) {    // single-digit
  74          q[j] = (k*b + u[j])/v[0];      // divisor here.
  75          k = (k*b + u[j]) - q[j]*v[0];
  76       }
  77       if (r != NULL) r[0] = k;
  78       return 0;
  79    }
  80
  81    /* Normalize by shifting v left just enough so that its high-order
  82    bit is on, and shift u left the same amount. We may have to append a
  83    high-order digit on the dividend; we do that unconditionally. */
  84
  85    s = nlz(v[n-1]);             // 0 <= s <= 31.
  86    vn = (unsigned *)alloca(4*n);
  87    for (i = n - 1; i > 0; i--)
  88       vn[i] = (v[i] << s) | ((unsigned long long)v[i-1] >> (32-s));
  89    vn[0] = v[0] << s;
  90
  91    un = (unsigned *)alloca(4*(m + 1));
  92    un[m] = (unsigned long long)u[m-1] >> (32-s);
  93    for (i = m - 1; i > 0; i--)
  94       un[i] = (u[i] << s) | ((unsigned long long)u[i-1] >> (32-s));
  95    un[0] = u[0] << s;
  96
  97    for (j = m - n; j >= 0; j--) {       // Main loop.
  98       // Compute estimate qhat of q[j].
  99       qhat = (un[j+n]*b + un[j+n-1])/vn[n-1];
 100       rhat = (un[j+n]*b + un[j+n-1]) - qhat*vn[n-1];
 101 again:
 102       if (qhat >= b || qhat*vn[n-2] > b*rhat + un[j+n-2])
 103       { qhat = qhat - 1;
 104         rhat = rhat + vn[n-1];
 105         if (rhat < b) goto again;
 106       }
 107
 108       // Multiply and subtract.
 109       k = 0;
 110 #ifdef ORIGINAL
 111       for (i = 0; i < n; i++) {
 112          p = qhat*vn[i];
 113          t = un[i+j] - k - (p & 0xFFFFFFFFLL);
 114          un[i+j] = (t & 0xffffffffLL);
 115          k = (p >> 32) - (t >> 32);
 116       }
 117       t = un[j+n] - k;
 118       un[j+n] = t;
 119 #endif
 120 #define EXPERIMENT1
 121       k = 1;
 122 #ifdef EXPERIMENT1
 123       for (i = 0; i < n+1; i++) {
 124          unsigned rhi;
 125          unsigned long long sum;
 126          if (i != n) {
 127              p = qhat*vn[i];
 128              sum = ((unsigned long long)un[i+j]) + ~p + k;
 129          } else { // for last loop instead of separate cleanup do special sum
 130             sum = (~0)+k;
 131          }
 132          rhi = (sum >> 32);
 133          if (((unsigned long long)k) <= 1) rhi += 1;
 134          k = rhi;
 135          un[i+j] = sum & 0xffffffff;
 136       }
 137       // XXX t is not properly computed, it must contain -ve
 138       // if the subtract should not have occurred
 139 #endif
 140 //#define EXPERIMENT2
 141 #ifdef EXPERIMENT2
 142       k = 0;
 143       unsigned long phi[200]; // yes, not malloced, we know
 144       unsigned long plo[200]; // yes, not malloced, we know
 145       // double-width multiply with strange subtract only on bottom half
 146       for (i = 0; i < n; i++) {
 147         unsigned long long p = qhat*vn[i];
 148         plo[i] = un[i+j] - (p&0xffffffffLL);
 149         phi[i] = p >> 32;
 150       }
 151       for (i = 0; i < n; i++) {
 152          t = plo[i] - k; // subtract previous carry
 153          un[i+j] = (t & 0xffffffffLL);
 154          k = phi[i] - (t >> 32); // take top-halves for new carry
 155       }
 156       t = un[j+n] - k;
 157       un[j+n] = t;
 158 #endif
 159
 160       q[j] = qhat;              // Store quotient digit.
 161       if (t < 0) {              // If we subtracted too
 162          q[j] = q[j] - 1;       // much, add back.
 163          k = 0;
 164          for (i = 0; i < n; i++) {
 165             t = (unsigned long long)un[i+j] + vn[i] + k;
 166             un[i+j] = t;
 167             k = t >> 32;
 168          }
 169          un[j+n] = un[j+n] + k;
 170       }
 171    } // End j.
 172    // If the caller wants the remainder, unnormalize
 173    // it and pass it back.
 174    if (r != NULL) {
 175       for (i = 0; i < n-1; i++)
 176          r[i] = (un[i] >> s) | ((unsigned long long)un[i+1] << (32-s));
 177       r[n-1] = un[n-1] >> s;
 178    }
 179    return 0;
 180 }
 181
 182 int errors;
 183
 184 void check(unsigned q[], unsigned r[],
 185            unsigned u[], unsigned v[],
 186            int m, int n,
 187            unsigned cq[], unsigned cr[]) {
 188    int i, szq;
 189
 190    szq = max(m - n + 1, 1);
 191    for (i = 0; i < szq; i++) {
 192       if (q[i] != cq[i]) {
 193          errors = errors + 1;
 194          dumpit("Error, dividend u =", m, u);
 195          dumpit("       divisor  v =", n, v);
 196          dumpit("For quotient,  got:", m-n+1, q);
 197          dumpit("        Should get:", m-n+1, cq);
 198          return;
 199       }
 200    }
 201    for (i = 0; i < n; i++) {
 202       if (r[i] != cr[i]) {
 203          errors = errors + 1;
 204          dumpit("Error, dividend u =", m, u);
 205          dumpit("       divisor  v =", n, v);
 206          dumpit("For remainder, got:", n, r);
 207          dumpit("        Should get:", n, cr);
 208          return;
 209       }
 210    }
 211    return;
 212 }
 213
 214 int main() {
 215    static unsigned test[] = {
 216    // m, n, u...,          v...,          cq...,  cr....
 217       1, 1, 3,             0,             1,      1,            // Error, divide by 0.
 218       1, 2, 7,             1,3,           0,      7,0,          // Error, n > m.
 219       2, 2, 0,0,           1,0,           0,      0,0,          // Error, incorrect remainder cr.
 220       1, 1, 3,             2,             1,      1,
 221       1, 1, 3,             3,             1,      0,
 222       1, 1, 3,             4,             0,      3,
 223       1, 1, 0,             0xffffffff,    0,      0,
 224       1, 1, 0xffffffff,    1,             0xffffffff, 0,
 225       1, 1, 0xffffffff,    0xffffffff,    1,      0,
 226       1, 1, 0xffffffff,    3,             0x55555555, 0,
 227       2, 1, 0xffffffff,0xffffffff, 1,     0xffffffff,0xffffffff, 0,
 228       2, 1, 0xffffffff,0xffffffff, 0xffffffff,        1,1,    0,
 229       2, 1, 0xffffffff,0xfffffffe, 0xffffffff,        0xffffffff,0, 0xfffffffe,
 230       2, 1, 0x00005678,0x00001234, 0x00009abc,        0x1e1dba76,0, 0x6bd0,
 231       2, 2, 0,0,           0,1,           0,      0,0,
 232       2, 2, 0,7,           0,3,           2,      0,1,
 233       2, 2, 5,7,           0,3,           2,      5,1,
 234       2, 2, 0,6,           0,2,           3,      0,0,
 235       1, 1, 0x80000000,  0x40000001, 0x00000001, 0x3fffffff,
 236       2, 1, 0x00000000,0x80000000, 0x40000001, 0xfffffff8,0x00000001, 0x00000008,
 237       2, 2, 0x00000000,0x80000000, 0x00000001,0x40000000, 0x00000001, 0xffffffff,0x3fffffff,
 238       2, 2, 0x0000789a,0x0000bcde, 0x0000789a,0x0000bcde,          1,          0,0,
 239       2, 2, 0x0000789b,0x0000bcde, 0x0000789a,0x0000bcde,          1,          1,0,
 240       2, 2, 0x00007899,0x0000bcde, 0x0000789a,0x0000bcde,          0, 0x00007899,0x0000bcde,
 241       2, 2, 0x0000ffff,0x0000ffff, 0x0000ffff,0x0000ffff,          1,          0,0,
 242       2, 2, 0x0000ffff,0x0000ffff, 0x00000000,0x00000001, 0x0000ffff, 0x0000ffff,0,
 243       3, 2, 0x000089ab,0x00004567,0x00000123, 0x00000000,0x00000001,   0x00004567,0x00000123, 0x000089ab,0,
 244       3, 2, 0x00000000,0x0000fffe,0x00008000, 0x0000ffff,0x00008000,   0xffffffff,0x00000000, 0x0000ffff,0x00007fff, // Shows that first qhat can = b + 1.
 245       3, 3, 0x00000003,0x00000000,0x80000000, 0x00000001,0x00000000,0x20000000,   0x00000003, 0,0,0x20000000, // Adding back step req'd.
 246       3, 3, 0x00000003,0x00000000,0x00008000, 0x00000001,0x00000000,0x00002000,   0x00000003, 0,0,0x00002000, // Adding back step req'd.
 247       4, 3, 0,0,0x00008000,0x00007fff, 1,0,0x00008000,   0xfffe0000,0, 0x00020000,0xffffffff,0x00007fff,  // Add back req'd.
 248       4, 3, 0,0x0000fffe,0,0x00008000, 0x0000ffff,0,0x00008000, 0xffffffff,0, 0x0000ffff,0xffffffff,0x00007fff,  // Shows that mult-sub quantity cannot be treated as signed.
 249       4, 3, 0,0xfffffffe,0,0x80000000, 0x0000ffff,0,0x80000000, 0x00000000,1, 0x00000000,0xfffeffff,0x00000000,  // Shows that mult-sub quantity cannot be treated as signed.
 250       4, 3, 0,0xfffffffe,0,0x80000000, 0xffffffff,0,0x80000000, 0xffffffff,0, 0xffffffff,0xffffffff,0x7fffffff,  // Shows that mult-sub quantity cannot be treated as signed.
 251    };
 252    int i, n, m, ncases, f;
 253    unsigned q[10], r[10];
 254    unsigned *u, *v, *cq, *cr;
 255
 256    printf("divmnu:\n");
 257    i = 0;
 258    ncases = 0;
 259    while (i < sizeof(test)/4) {
 260       m = test[i];
 261       n = test[i+1];
 262       u = &test[i+2];
 263       v = &test[i+2+m];
 264       cq = &test[i+2+m+n];
 265       cr = &test[i+2+m+n+max(m-n+1, 1)];
 266
 267       f = divmnu(q, r, u, v, m, n);
 268       if (f) {
 269          dumpit("Error return code for dividend u =", m, u);
 270          dumpit("                      divisor  v =", n, v);
 271          errors = errors + 1;
 272       }
 273       else
 274          check(q, r, u, v, m, n, cq, cr);
 275       i = i + 2 + m + n + max(m-n+1, 1) + n;
 276       ncases = ncases + 1;
 277    }
 278
 279    printf("%d errors out of %d cases; there should be 3.\n", errors, ncases);
 280    return 0;
 281 }