openpower/sv/bitmanip/divmnu64.c

   1 /* original source code from Hackers-Delight
   2    https://github.com/hcs0/Hackers-Delight
   3 */
   4 /* This divides an n-word dividend by an m-word divisor, giving an
   5 n-m+1-word quotient and m-word remainder. The bignums are in arrays of
   6 words. Here a "word" is 32 bits. This routine is designed for a 64-bit
   7 machine which has a 64/64 division instruction. */
   8
   9 #include <stdio.h>
  10 #include <stdlib.h>     //To define "exit", req'd by XLC.
  11 #include <stdbool.h>
  12 #include <stdint.h>
  13
  14 #define max(x, y) ((x) > (y) ? (x) : (y))
  15
  16 int nlz(unsigned x) {
  17    int n;
  18
  19    if (x == 0) return(32);
  20    n = 0;
  21    if (x <= 0x0000FFFF) {n = n +16; x = x <<16;}
  22    if (x <= 0x00FFFFFF) {n = n + 8; x = x << 8;}
  23    if (x <= 0x0FFFFFFF) {n = n + 4; x = x << 4;}
  24    if (x <= 0x3FFFFFFF) {n = n + 2; x = x << 2;}
  25    if (x <= 0x7FFFFFFF) {n = n + 1;}
  26    return n;
  27 }
  28
  29 void dumpit(char *msg, int n, unsigned v[]) {
  30    int i;
  31    printf("%s", msg);
  32    for (i = n-1; i >= 0; i--) printf(" %08x", v[i]);
  33    printf("\n");
  34 }
  35
  36 /* q[0], r[0], u[0], and v[0] contain the LEAST significant words.
  37 (The sequence is in little-endian order).
  38
  39 This is a fairly precise implementation of Knuth's Algorithm D, for a
  40 binary computer with base b = 2**32. The caller supplies:
  41    1. Space q for the quotient, m - n + 1 words (at least one).
  42    2. Space r for the remainder (optional), n words.
  43    3. The dividend u, m words, m >= 1.
  44    4. The divisor v, n words, n >= 2.
  45 The most significant digit of the divisor, v[n-1], must be nonzero.  The
  46 dividend u may have leading zeros; this just makes the algorithm take
  47 longer and makes the quotient contain more leading zeros.  A value of
  48 NULL may be given for the address of the remainder to signify that the
  49 caller does not want the remainder.
  50    The program does not alter the input parameters u and v.
  51    The quotient and remainder returned may have leading zeros.  The
  52 function itself returns a value of 0 for success and 1 for invalid
  53 parameters (e.g., division by 0).
  54    For now, we must have m >= n.  Knuth's Algorithm D also requires
  55 that the dividend be at least as long as the divisor.  (In his terms,
  56 m >= 0 (unstated).  Therefore m+n >= n.) */
  57
  58 int divmnu(unsigned q[], unsigned r[],
  59      const unsigned u[], const unsigned v[],
  60      int m, int n) {
  61
  62    const unsigned long long b = 4294967296LL; // Number base (2**32).
  63    unsigned *un, *vn;                         // Normalized form of u, v.
  64    unsigned long long qhat;                   // Estimated quotient digit.
  65    unsigned long long rhat;                   // A remainder.
  66    unsigned long long p;                      // Product of two digits.
  67    long long t, k;
  68    int s, i, j;
  69
  70    if (m < n || n <= 0 || v[n-1] == 0)
  71       return 1;                         // Return if invalid param.
  72
  73    if (n == 1) {                        // Take care of
  74       k = 0;                            // the case of a
  75       for (j = m - 1; j >= 0; j--) {    // single-digit
  76          q[j] = (k*b + u[j])/v[0];      // divisor here.
  77          k = (k*b + u[j]) - q[j]*v[0];
  78       }
  79       if (r != NULL) r[0] = k;
  80       return 0;
  81    }
  82
  83    /* Normalize by shifting v left just enough so that its high-order
  84    bit is on, and shift u left the same amount. We may have to append a
  85    high-order digit on the dividend; we do that unconditionally. */
  86
  87    s = nlz(v[n-1]);             // 0 <= s <= 31.
  88    vn = (unsigned *)alloca(4*n);
  89    for (i = n - 1; i > 0; i--)
  90       vn[i] = (v[i] << s) | ((unsigned long long)v[i-1] >> (32-s));
  91    vn[0] = v[0] << s;
  92
  93    un = (unsigned *)alloca(4*(m + 1));
  94    un[m] = (unsigned long long)u[m-1] >> (32-s);
  95    for (i = m - 1; i > 0; i--)
  96       un[i] = (u[i] << s) | ((unsigned long long)u[i-1] >> (32-s));
  97    un[0] = u[0] << s;
  98
  99    for (j = m - n; j >= 0; j--) {       // Main loop.
 100       // Compute estimate qhat of q[j].
 101       qhat = (un[j+n]*b + un[j+n-1])/vn[n-1];
 102       rhat = (un[j+n]*b + un[j+n-1]) - qhat*vn[n-1];
 103 again:
 104       if (qhat >= b || qhat*vn[n-2] > b*rhat + un[j+n-2])
 105       { qhat = qhat - 1;
 106         rhat = rhat + vn[n-1];
 107         if (rhat < b) goto again;
 108       }
 109
 110 #define MUL_RSUB_CARRY
 111 #ifdef ORIGINAL
 112       // Multiply and subtract.
 113       k = 0;
 114       for (i = 0; i < n; i++) {
 115          p = qhat*vn[i];
 116          t = un[i+j] - k - (p & 0xFFFFFFFFLL);
 117          un[i+j] = t;
 118          k = (p >> 32) - (t >> 32);
 119       }
 120       t = un[j+n] - k;
 121       un[j+n] = t;
 122       bool need_fixup = t < 0;
 123 #elif defined(SUB_MUL_BORROW)
 124       (void)p; // shut up unused variable warning
 125
 126       // Multiply and subtract.
 127       uint32_t borrow = 0;
 128       uint32_t phi[2000]; // plenty space
 129       uint32_t plo[2000]; // plenty space
 130       // first, perform mul-and-sub and store in split hi-lo
 131       // this shows the vectorised sv.msubx which stores 128-bit in
 132       // two 64-bit registers
 133       for(int i = 0; i <= n; i++) {
 134          uint32_t vn_i = i < n ? vn[i] : 0;
 135          uint64_t value = un[i + j] - (uint64_t)qhat * vn_i;
 136          plo[i] = value & 0xffffffffLL;
 137          phi[i] = value >> 32;
 138       }
 139       // second, reconstruct the 64-bit result, subtract borrow,
 140       // store top-half (-ve) in new borrow and store low-half as answer
 141       // this is the new (odd) instruction
 142       for(int i = 0; i <= n; i++) {
 143          uint64_t value = (((uint64_t)phi[i]<<32) | plo[i]) - borrow;
 144          borrow = -(uint32_t)(value >> 32);
 145          un[i + j] = (uint32_t)value;
 146       }
 147       bool need_fixup = borrow != 0;
 148 #elif defined(MUL_RSUB_CARRY)
 149       (void)p; // shut up unused variable warning
 150
 151       // Multiply and subtract.
 152       uint32_t carry = 1;
 153       uint32_t phi[2000]; // plenty space
 154       uint32_t plo[2000]; // plenty space
 155       for(int i = 0; i <= n; i++) {
 156          uint32_t vn_i = i < n ? vn[i] : 0;
 157          uint64_t value = un[i + j] + ~((uint64_t)qhat * vn_i);
 158          plo[i] = value & 0xffffffffLL;
 159          phi[i] = value >> 32;
 160       }
 161       for(int i = 0; i <= n; i++) {
 162          uint64_t result = (((uint64_t)phi[i]<<32) | plo[i]) + carry;
 163          uint32_t result_high = result >> 32;
 164          if(carry <= 1)
 165             result_high++;
 166          carry = result_high;
 167          un[i + j] = (uint32_t)result;
 168       }
 169       bool need_fixup = carry != 1;
 170 #else
 171 #error need to define one of ORIGINAL, SUB_MUL_BORROW, or MUL_RSUB_CARRY
 172 #endif
 173
 174       q[j] = qhat;              // Store quotient digit.
 175       if (need_fixup) {         // If we subtracted too
 176          q[j] = q[j] - 1;       // much, add back.
 177          k = 0;
 178          for (i = 0; i < n; i++) {
 179             t = (unsigned long long)un[i+j] + vn[i] + k;
 180             un[i+j] = t;
 181             k = t >> 32;
 182          }
 183          un[j+n] = un[j+n] + k;
 184       }
 185    } // End j.
 186    // If the caller wants the remainder, unnormalize
 187    // it and pass it back.
 188    if (r != NULL) {
 189       for (i = 0; i < n-1; i++)
 190          r[i] = (un[i] >> s) | ((unsigned long long)un[i+1] << (32-s));
 191       r[n-1] = un[n-1] >> s;
 192    }
 193    return 0;
 194 }
 195
 196 int errors;
 197
 198 void check(unsigned q[], unsigned r[],
 199            unsigned u[], unsigned v[],
 200            int m, int n,
 201            unsigned cq[], unsigned cr[]) {
 202    int i, szq;
 203
 204    szq = max(m - n + 1, 1);
 205    for (i = 0; i < szq; i++) {
 206       if (q[i] != cq[i]) {
 207          errors = errors + 1;
 208          dumpit("Error, dividend u =", m, u);
 209          dumpit("       divisor  v =", n, v);
 210          dumpit("For quotient,  got:", m-n+1, q);
 211          dumpit("        Should get:", m-n+1, cq);
 212          return;
 213       }
 214    }
 215    for (i = 0; i < n; i++) {
 216       if (r[i] != cr[i]) {
 217          errors = errors + 1;
 218          dumpit("Error, dividend u =", m, u);
 219          dumpit("       divisor  v =", n, v);
 220          dumpit("For remainder, got:", n, r);
 221          dumpit("        Should get:", n, cr);
 222          return;
 223       }
 224    }
 225    return;
 226 }
 227
 228 int main() {
 229    static unsigned test[] = {
 230    // m, n, u...,          v...,          cq...,  cr....
 231       1, 1, 3,             0,             1,      1,            // Error, divide by 0.
 232       1, 2, 7,             1,3,           0,      7,0,          // Error, n > m.
 233       2, 2, 0,0,           1,0,           0,      0,0,          // Error, incorrect remainder cr.
 234       1, 1, 3,             2,             1,      1,
 235       1, 1, 3,             3,             1,      0,
 236       1, 1, 3,             4,             0,      3,
 237       1, 1, 0,             0xffffffff,    0,      0,
 238       1, 1, 0xffffffff,    1,             0xffffffff, 0,
 239       1, 1, 0xffffffff,    0xffffffff,    1,      0,
 240       1, 1, 0xffffffff,    3,             0x55555555, 0,
 241       2, 1, 0xffffffff,0xffffffff, 1,     0xffffffff,0xffffffff, 0,
 242       2, 1, 0xffffffff,0xffffffff, 0xffffffff,        1,1,    0,
 243       2, 1, 0xffffffff,0xfffffffe, 0xffffffff,        0xffffffff,0, 0xfffffffe,
 244       2, 1, 0x00005678,0x00001234, 0x00009abc,        0x1e1dba76,0, 0x6bd0,
 245       2, 2, 0,0,           0,1,           0,      0,0,
 246       2, 2, 0,7,           0,3,           2,      0,1,
 247       2, 2, 5,7,           0,3,           2,      5,1,
 248       2, 2, 0,6,           0,2,           3,      0,0,
 249       1, 1, 0x80000000,  0x40000001, 0x00000001, 0x3fffffff,
 250       2, 1, 0x00000000,0x80000000, 0x40000001, 0xfffffff8,0x00000001, 0x00000008,
 251       2, 2, 0x00000000,0x80000000, 0x00000001,0x40000000, 0x00000001, 0xffffffff,0x3fffffff,
 252       2, 2, 0x0000789a,0x0000bcde, 0x0000789a,0x0000bcde,          1,          0,0,
 253       2, 2, 0x0000789b,0x0000bcde, 0x0000789a,0x0000bcde,          1,          1,0,
 254       2, 2, 0x00007899,0x0000bcde, 0x0000789a,0x0000bcde,          0, 0x00007899,0x0000bcde,
 255       2, 2, 0x0000ffff,0x0000ffff, 0x0000ffff,0x0000ffff,          1,          0,0,
 256       2, 2, 0x0000ffff,0x0000ffff, 0x00000000,0x00000001, 0x0000ffff, 0x0000ffff,0,
 257       3, 2, 0x000089ab,0x00004567,0x00000123, 0x00000000,0x00000001,   0x00004567,0x00000123, 0x000089ab,0,
 258       3, 2, 0x00000000,0x0000fffe,0x00008000, 0x0000ffff,0x00008000,   0xffffffff,0x00000000, 0x0000ffff,0x00007fff, // Shows that first qhat can = b + 1.
 259       3, 3, 0x00000003,0x00000000,0x80000000, 0x00000001,0x00000000,0x20000000,   0x00000003, 0,0,0x20000000, // Adding back step req'd.
 260       3, 3, 0x00000003,0x00000000,0x00008000, 0x00000001,0x00000000,0x00002000,   0x00000003, 0,0,0x00002000, // Adding back step req'd.
 261       4, 3, 0,0,0x00008000,0x00007fff, 1,0,0x00008000,   0xfffe0000,0, 0x00020000,0xffffffff,0x00007fff,  // Add back req'd.
 262       4, 3, 0,0x0000fffe,0,0x00008000, 0x0000ffff,0,0x00008000, 0xffffffff,0, 0x0000ffff,0xffffffff,0x00007fff,  // Shows that mult-sub quantity cannot be treated as signed.
 263       4, 3, 0,0xfffffffe,0,0x80000000, 0x0000ffff,0,0x80000000, 0x00000000,1, 0x00000000,0xfffeffff,0x00000000,  // Shows that mult-sub quantity cannot be treated as signed.
 264       4, 3, 0,0xfffffffe,0,0x80000000, 0xffffffff,0,0x80000000, 0xffffffff,0, 0xffffffff,0xffffffff,0x7fffffff,  // Shows that mult-sub quantity cannot be treated as signed.
 265    };
 266    int i, n, m, ncases, f;
 267    unsigned q[10], r[10];
 268    unsigned *u, *v, *cq, *cr;
 269
 270    printf("divmnu:\n");
 271    i = 0;
 272    ncases = 0;
 273    while (i < sizeof(test)/4) {
 274       m = test[i];
 275       n = test[i+1];
 276       u = &test[i+2];
 277       v = &test[i+2+m];
 278       cq = &test[i+2+m+n];
 279       cr = &test[i+2+m+n+max(m-n+1, 1)];
 280
 281       f = divmnu(q, r, u, v, m, n);
 282       if (f) {
 283          dumpit("Error return code for dividend u =", m, u);
 284          dumpit("                      divisor  v =", n, v);
 285          errors = errors + 1;
 286       }
 287       else
 288          check(q, r, u, v, m, n, cq, cr);
 289       i = i + 2 + m + n + max(m-n+1, 1) + n;
 290       ncases = ncases + 1;
 291    }
 292
 293    printf("%d errors out of %d cases; there should be 3.\n", errors, ncases);
 294    return 0;
 295 }