openpower/sv/biginteger/divgnu64.c

   1 /* original source code from Hackers-Delight
   2    https://github.com/hcs0/Hackers-Delight
   3 */
   4 /* This divides an n-word dividend by an m-word divisor, giving an
   5 n-m+1-word quotient and m-word remainder. The bignums are in arrays of
   6 words. Here a "word" is 32 bits. This routine is designed for a 64-bit
   7 machine which has a 64/64 division instruction. */
   8
   9 #include <stdbool.h>
  10 #include <stdint.h>
  11 #include <stdio.h>
  12 #include <stdlib.h> //To define "exit", req'd by XLC.
  13
  14 #define max(x, y) ((x) > (y) ? (x) : (y))
  15
  16 int nlz(unsigned x)
  17 {
  18     int n;
  19
  20     if (x == 0)
  21         return (32);
  22     n = 0;
  23     if (x <= 0x0000FFFF)
  24     {
  25         n = n + 16;
  26         x = x << 16;
  27     }
  28     if (x <= 0x00FFFFFF)
  29     {
  30         n = n + 8;
  31         x = x << 8;
  32     }
  33     if (x <= 0x0FFFFFFF)
  34     {
  35         n = n + 4;
  36         x = x << 4;
  37     }
  38     if (x <= 0x3FFFFFFF)
  39     {
  40         n = n + 2;
  41         x = x << 2;
  42     }
  43     if (x <= 0x7FFFFFFF)
  44     {
  45         n = n + 1;
  46     }
  47     return n;
  48 }
  49
  50 void dumpit(char *msg, int n, unsigned v[])
  51 {
  52     int i;
  53     printf("%s", msg);
  54     for (i = n - 1; i >= 0; i--)
  55         printf(" %08x", v[i]);
  56     printf("\n");
  57 }
  58
  59 void bigrsh(unsigned s, unsigned r[], unsigned un[], int n)
  60 {
  61     for (int i = 0; i < n - 1; i++)
  62         r[i] = (un[i] >> s) | ((unsigned long long)un[i + 1] << (32 - s));
  63     r[n - 1] = un[n - 1] >> s;
  64 }
  65
  66 void biglsh(unsigned s, unsigned vn[], unsigned const v[], int n)
  67 {
  68     for (int i = n - 1; i > 0; i--)
  69         vn[i] = (v[i] << s) | ((unsigned long long)v[i - 1] >> (32 - s));
  70     vn[0] = v[0] << s;
  71 }
  72
  73 unsigned long bigdiv(unsigned v, unsigned q[], unsigned const u[], int m)
  74 {
  75     unsigned long long k = 0; // the case of a
  76     for (int j = m - 1; j >= 0; j--)
  77     { // single-digit
  78         unsigned long d2 = (k << 32) | u[j];
  79         q[j] = d2 / v; // divisor here.
  80         k = d2 % v;
  81     }
  82     return k;
  83 }
  84
  85 bool bigmul(unsigned long long qhat, unsigned product[], unsigned vn[], int m,
  86             int n)
  87 {
  88     // Multiply and subtract.
  89     uint32_t carry = 0;
  90     // VL = n + 1
  91     // sv.maddedu product.v, vn.v, qhat.s, carry.s
  92     for (int i = 0; i <= n; i++)
  93     {
  94         uint32_t vn_v = i < n ? vn[i] : 0;
  95         uint64_t value = (uint64_t)vn_v * (uint64_t)qhat + carry;
  96         carry = (uint32_t)(value >> 32);
  97         product[i] = (uint32_t)value;
  98     }
  99     return carry != 0;
 100 }
 101
 102 bool bigadd(unsigned result[], unsigned vn[], unsigned un[], int m, int n,
 103             bool ca)
 104 {
 105     // VL = n + 1
 106     // sv.subfe un_j.v, product.v, un_j.v
 107     for (int i = 0; i <= n; i++)
 108     {
 109         uint64_t value = (uint64_t)vn[i] + (uint64_t)un[i] + ca;
 110         ca = value >> 32 != 0;
 111         result[i] = value;
 112     }
 113     return ca;
 114 }
 115
 116 bool bigsub(unsigned result[], unsigned vn[], unsigned un[], int m, int n,
 117             bool ca)
 118 {
 119     // VL = n + 1
 120     // sv.subfe un_j.v, product.v, un_j.v
 121     for (int i = 0; i <= n; i++)
 122     {
 123         uint64_t value = (uint64_t)~vn[i] + (uint64_t)un[i] + ca;
 124         ca = value >> 32 != 0;
 125         result[i] = value;
 126     }
 127     return ca;
 128 }
 129
 130 bool bigmulsub(unsigned long long qhat, unsigned vn[], unsigned un[], int j,
 131                int m, int n)
 132 {
 133     // Multiply and subtract.
 134     uint32_t product[n + 1];
 135     bigmul(qhat, product, vn, m, n);
 136     bool ca = bigsub(un, product, un, m, n, true);
 137     bool need_fixup = !ca;
 138     return need_fixup;
 139 }
 140
 141 /* q[0], r[0], u[0], and v[0] contain the LEAST significant words.
 142 (The sequence is in little-endian order).
 143
 144 This is a fairly precise implementation of Knuth's Algorithm D, for a
 145 binary computer with base b = 2**32. The caller supplies:
 146    1. Space q for the quotient, m - n + 1 words (at least one).
 147    2. Space r for the remainder (optional), n words.
 148    3. The dividend u, m words, m >= 1.
 149    4. The divisor v, n words, n >= 2.
 150 The most significant digit of the divisor, v[n-1], must be nonzero.  The
 151 dividend u may have leading zeros; this just makes the algorithm take
 152 longer and makes the quotient contain more leading zeros.  A value of
 153 NULL may be given for the address of the remainder to signify that the
 154 caller does not want the remainder.
 155    The program does not alter the input parameters u and v.
 156    The quotient and remainder returned may have leading zeros.  The
 157 function itself returns a value of 0 for success and 1 for invalid
 158 parameters (e.g., division by 0).
 159    For now, we must have m >= n.  Knuth's Algorithm D also requires
 160 that the dividend be at least as long as the divisor.  (In his terms,
 161 m >= 0 (unstated).  Therefore m+n >= n.) */
 162
 163 int divmnu(unsigned q[], unsigned r[], const unsigned u[], const unsigned v[],
 164            int m, int n)
 165 {
 166
 167     const unsigned long long b = 1LL << 32; // Number base (2**32).
 168     unsigned *un, *vn;                      // Normalized form of u, v.
 169     unsigned long long qhat;                // Estimated quotient digit.
 170     unsigned long long rhat;                // A remainder.
 171     int s, j;
 172
 173     if (m < n || n <= 0 || v[n - 1] == 0)
 174         return 1; // Return if invalid param.
 175
 176     if (n == 1)
 177     { // Take care of
 178         unsigned long k = bigdiv(v[0], q, u, m);
 179         if (r != NULL)
 180             r[0] = k;
 181         return 0;
 182     }
 183
 184     /* Normalize by shifting v left just enough so that its high-order
 185     bit is on, and shift u left the same amount. We may have to append a
 186     high-order digit on the dividend; we do that unconditionally. */
 187
 188     s = nlz(v[n - 1]); // 0 <= s <= 31.
 189
 190     vn = (unsigned *)alloca(4 * n);
 191     biglsh(s, vn, v, n);
 192
 193     un = (unsigned *)alloca(4 * (m + 1));
 194     un[m] = (unsigned long long)u[m - 1] >> (32 - s); // extra digit
 195     biglsh(s, un, u, m);
 196
 197     for (j = m - n; j >= 0; j--)
 198     { // Main loop.
 199         // Compute estimate qhat of q[j] from top 2 digits
 200         uint64_t dig2 = ((uint64_t)un[j + n] << 32) | un[j + n - 1];
 201         qhat = dig2 / vn[n - 1];
 202         rhat = dig2 % vn[n - 1];
 203     again:
 204         // use 3rd-from-top digit to obtain better accuracy
 205         if (qhat >= b || qhat * vn[n - 2] > b * rhat + un[j + n - 2])
 206         {
 207             qhat = qhat - 1;
 208             rhat = rhat + vn[n - 1];
 209             if (rhat < b)
 210                 goto again;
 211         }
 212
 213         uint32_t *un_j = &un[j];
 214         bool need_fixup = bigmulsub(qhat, vn, un_j, j, m, n);
 215
 216         q[j] = qhat; // Store quotient digit.
 217         if (need_fixup)
 218         {                    // If we subtracted too
 219             q[j] = q[j] - 1; // much, add back.
 220             bigadd(un_j, vn, un_j, m, n, 0);
 221         }
 222     } // End j.
 223     // If the caller wants the remainder, unnormalize
 224     // it and pass it back.
 225     if (r != NULL)
 226         bigrsh(s, r, un, n);
 227     return 0;
 228 }
 229
 230 int errors;
 231
 232 void check(unsigned q[], unsigned r[], unsigned u[], unsigned v[], int m,
 233            int n, unsigned cq[], unsigned cr[])
 234 {
 235     int i, szq;
 236
 237     szq = max(m - n + 1, 1);
 238     for (i = 0; i < szq; i++)
 239     {
 240         if (q[i] != cq[i])
 241         {
 242             errors = errors + 1;
 243             dumpit("Error, dividend u =", m, u);
 244             dumpit("       divisor  v =", n, v);
 245             dumpit("For quotient,  got:", m - n + 1, q);
 246             dumpit("        Should get:", m - n + 1, cq);
 247             return;
 248         }
 249     }
 250     for (i = 0; i < n; i++)
 251     {
 252         if (r[i] != cr[i])
 253         {
 254             errors = errors + 1;
 255             dumpit("Error, dividend u =", m, u);
 256             dumpit("       divisor  v =", n, v);
 257             dumpit("For remainder, got:", n, r);
 258             dumpit("        Should get:", n, cr);
 259             return;
 260         }
 261     }
 262     return;
 263 }
 264
 265 int main()
 266 {
 267     static struct
 268     {
 269         int m, n;
 270         uint32_t u[10], v[10], cq[10], cr[10];
 271         bool error;
 272     } test[] = {
 273         // clang-format off
 274         {.m=1, .n=1, .u={3}, .v={0}, .cq={1}, .cr={1}, .error=true}, // Error, divide by 0.
 275         {.m=1, .n=2, .u={7}, .v={1,3}, .cq={0}, .cr={7,0}, .error=true}, // Error, n > m.
 276         {.m=2, .n=2, .u={0,0}, .v={1,0}, .cq={0}, .cr={0,0}, .error=true}, // Error, incorrect remainder cr.
 277         {.m=1, .n=1, .u={3}, .v={2}, .cq={1}, .cr={1}},
 278         {.m=1, .n=1, .u={3}, .v={3}, .cq={1}, .cr={0}},
 279         {.m=1, .n=1, .u={3}, .v={4}, .cq={0}, .cr={3}},
 280         {.m=1, .n=1, .u={0}, .v={0xffffffff}, .cq={0}, .cr={0}},
 281         {.m=1, .n=1, .u={0xffffffff}, .v={1}, .cq={0xffffffff}, .cr={0}},
 282         {.m=1, .n=1, .u={0xffffffff}, .v={0xffffffff}, .cq={1}, .cr={0}},
 283         {.m=1, .n=1, .u={0xffffffff}, .v={3}, .cq={0x55555555}, .cr={0}},
 284         {.m=2, .n=1, .u={0xffffffff,0xffffffff}, .v={1}, .cq={0xffffffff,0xffffffff}, .cr={0}},
 285         {.m=2, .n=1, .u={0xffffffff,0xffffffff}, .v={0xffffffff}, .cq={1,1}, .cr={0}},
 286         {.m=2, .n=1, .u={0xffffffff,0xfffffffe}, .v={0xffffffff}, .cq={0xffffffff,0}, .cr={0xfffffffe}},
 287         {.m=2, .n=1, .u={0x00005678,0x00001234}, .v={0x00009abc}, .cq={0x1e1dba76,0}, .cr={0x6bd0}},
 288         {.m=2, .n=2, .u={0,0}, .v={0,1}, .cq={0}, .cr={0,0}},
 289         {.m=2, .n=2, .u={0,7}, .v={0,3}, .cq={2}, .cr={0,1}},
 290         {.m=2, .n=2, .u={5,7}, .v={0,3}, .cq={2}, .cr={5,1}},
 291         {.m=2, .n=2, .u={0,6}, .v={0,2}, .cq={3}, .cr={0,0}},
 292         {.m=1, .n=1, .u={0x80000000}, .v={0x40000001}, .cq={0x00000001}, .cr={0x3fffffff}},
 293         {.m=2, .n=1, .u={0x00000000,0x80000000}, .v={0x40000001}, .cq={0xfffffff8,0x00000001}, .cr={0x00000008}},
 294         {.m=2, .n=2, .u={0x00000000,0x80000000}, .v={0x00000001,0x40000000}, .cq={0x00000001}, .cr={0xffffffff,0x3fffffff}},
 295         {.m=2, .n=2, .u={0x0000789a,0x0000bcde}, .v={0x0000789a,0x0000bcde}, .cq={1}, .cr={0,0}},
 296         {.m=2, .n=2, .u={0x0000789b,0x0000bcde}, .v={0x0000789a,0x0000bcde}, .cq={1}, .cr={1,0}},
 297         {.m=2, .n=2, .u={0x00007899,0x0000bcde}, .v={0x0000789a,0x0000bcde}, .cq={0}, .cr={0x00007899,0x0000bcde}},
 298         {.m=2, .n=2, .u={0x0000ffff,0x0000ffff}, .v={0x0000ffff,0x0000ffff}, .cq={1}, .cr={0,0}},
 299         {.m=2, .n=2, .u={0x0000ffff,0x0000ffff}, .v={0x00000000,0x00000001}, .cq={0x0000ffff}, .cr={0x0000ffff,0}},
 300         {.m=3, .n=2, .u={0x000089ab,0x00004567,0x00000123}, .v={0x00000000,0x00000001}, .cq={0x00004567,0x00000123}, .cr={0x000089ab,0}},
 301         {.m=3, .n=2, .u={0x00000000,0x0000fffe,0x00008000}, .v={0x0000ffff,0x00008000}, .cq={0xffffffff,0x00000000}, .cr={0x0000ffff,0x00007fff}}, // Shows that first qhat can = b + 1.
 302         {.m=3, .n=3, .u={0x00000003,0x00000000,0x80000000}, .v={0x00000001,0x00000000,0x20000000}, .cq={0x00000003}, .cr={0,0,0x20000000}}, // Adding back step req'd.
 303         {.m=3, .n=3, .u={0x00000003,0x00000000,0x00008000}, .v={0x00000001,0x00000000,0x00002000}, .cq={0x00000003}, .cr={0,0,0x00002000}}, // Adding back step req'd.
 304         {.m=4, .n=3, .u={0,0,0x00008000,0x00007fff}, .v={1,0,0x00008000}, .cq={0xfffe0000,0}, .cr={0x00020000,0xffffffff,0x00007fff}}, // Add back req'd.
 305         {.m=4, .n=3, .u={0,0x0000fffe,0,0x00008000}, .v={0x0000ffff,0,0x00008000}, .cq={0xffffffff,0}, .cr={0x0000ffff,0xffffffff,0x00007fff}}, // Shows that mult-sub quantity cannot be treated as signed.
 306         {.m=4, .n=3, .u={0,0xfffffffe,0,0x80000000}, .v={0x0000ffff,0,0x80000000}, .cq={0x00000000,1}, .cr={0x00000000,0xfffeffff,0x00000000}}, // Shows that mult-sub quantity cannot be treated as signed.
 307         {.m=4, .n=3, .u={0,0xfffffffe,0,0x80000000}, .v={0xffffffff,0,0x80000000}, .cq={0xffffffff,0}, .cr={0xffffffff,0xffffffff,0x7fffffff}}, // Shows that mult-sub quantity cannot be treated as signed.
 308         // clang-format on
 309     };
 310     const int ncases = sizeof(test) / sizeof(test[0]);
 311     unsigned q[10], r[10];
 312
 313     printf("divmnu:\n");
 314     for (int i = 0; i < ncases; i++)
 315     {
 316         int m = test[i].m;
 317         int n = test[i].n;
 318         uint32_t *u = test[i].u;
 319         uint32_t *v = test[i].v;
 320         uint32_t *cq = test[i].cq;
 321         uint32_t *cr = test[i].cr;
 322
 323         int f = divmnu(q, r, u, v, m, n);
 324         if (f && !test[i].error)
 325         {
 326             dumpit("Unexpected error return code for dividend u =", m, u);
 327             dumpit("                                 divisor  v =", n, v);
 328             errors = errors + 1;
 329         }
 330         else if (!f && test[i].error)
 331         {
 332             dumpit("Unexpected success return code for dividend u =", m, u);
 333             dumpit("                                 divisor  v =", n, v);
 334             errors = errors + 1;
 335         }
 336
 337         if (!f)
 338             check(q, r, u, v, m, n, cq, cr);
 339     }
 340
 341     printf("%d test failures out of %d cases.\n", errors, ncases);
 342     return 0;
 343 }