src/compiler/glsl/int64.glsl

   1 /* Compile with:
   2  *
   3  * glsl_compiler --version 400 --dump-builder int64.glsl > builtin_int64.h
   4  *
   5  * Version 4.00+ is required for umulExtended.
   6  */
   7 #version 400
   8 #extension GL_ARB_gpu_shader_int64: require
   9 #extension GL_ARB_shading_language_420pack: require
  10
  11 uvec2
  12 umul64(uvec2 a, uvec2 b)
  13 {
  14    uvec2 result;
  15
  16    umulExtended(a.x, b.x, result.y, result.x);
  17    result.y += a.x * b.y + a.y * b.x;
  18
  19    return result;
  20 }
  21
  22 ivec2
  23 sign64(ivec2 a)
  24 {
  25    ivec2 result;
  26
  27    result.y = a.y >> 31;
  28    result.x = result.y | int((a.x | a.y) != 0);
  29
  30    return result;
  31 }
  32
  33 uvec4
  34 udivmod64(uvec2 n, uvec2 d)
  35 {
  36    uvec2 quot = uvec2(0U, 0U);
  37    int log2_denom = findMSB(d.y) + 32;
  38
  39    /* If the upper 32 bits of denom are non-zero, it is impossible for shifts
  40     * greater than 32 bits to occur.  If the upper 32 bits of the numerator
  41     * are zero, it is impossible for (denom << [63, 32]) <= numer unless
  42     * denom == 0.
  43     */
  44    if (d.y == 0 && n.y >= d.x) {
  45       log2_denom = findMSB(d.x);
  46
  47       /* Since the upper 32 bits of denom are zero, log2_denom <= 31 and we
  48        * don't have to compare log2_denom inside the loop as is done in the
  49        * general case (below).
  50        */
  51       for (int i = 31; i >= 1; i--) {
  52          if (log2_denom <= 31 - i && (d.x << i) <= n.y) {
  53             n.y -= d.x << i;
  54             quot.y |= 1U << i;
  55          }
  56       }
  57
  58       /* log2_denom is always <= 31, so manually peel the last loop
  59        * iteration.
  60        */
  61       if (d.x <= n.y) {
  62          n.y -= d.x;
  63          quot.y |= 1U;
  64       }
  65    }
  66
  67    uint64_t d64 = packUint2x32(d);
  68    uint64_t n64 = packUint2x32(n);
  69    for (int i = 31; i >= 1; i--) {
  70       if (log2_denom <= 63 - i && (d64 << i) <= n64) {
  71          n64 -= d64 << i;
  72          quot.x |= 1U << i;
  73       }
  74    }
  75
  76    /* log2_denom is always <= 63, so manually peel the last loop
  77     * iteration.
  78     */
  79    if (d64 <= n64) {
  80       n64 -= d64;
  81       quot.x |= 1U;
  82    }
  83
  84    return uvec4(quot, unpackUint2x32(n64));
  85 }
  86
  87 uvec2
  88 udiv64(uvec2 n, uvec2 d)
  89 {
  90    return udivmod64(n, d).xy;
  91 }
  92
  93 ivec2
  94 idiv64(ivec2 _n, ivec2 _d)
  95 {
  96    const bool negate = (_n.y < 0) != (_d.y < 0);
  97    uvec2 n = unpackUint2x32(uint64_t(abs(packInt2x32(_n))));
  98    uvec2 d = unpackUint2x32(uint64_t(abs(packInt2x32(_d))));
  99
 100    uvec2 quot = udivmod64(n, d).xy;
 101
 102    return negate ? unpackInt2x32(-int64_t(packUint2x32(quot))) : ivec2(quot);
 103 }
 104
 105 uvec2
 106 umod64(uvec2 n, uvec2 d)
 107 {
 108    return udivmod64(n, d).zw;
 109 }
 110
 111 ivec2
 112 imod64(ivec2 _n, ivec2 _d)
 113 {
 114    const bool negate = (_n.y < 0) != (_d.y < 0);
 115    uvec2 n = unpackUint2x32(uint64_t(abs(packInt2x32(_n))));
 116    uvec2 d = unpackUint2x32(uint64_t(abs(packInt2x32(_d))));
 117
 118    uvec2 rem = udivmod64(n, d).zw;
 119
 120    return negate ? unpackInt2x32(-int64_t(packUint2x32(rem))) : ivec2(rem);
 121 }