src/gallium/auxiliary/util/u_half.py

   1 # Copyright 2010 Luca Barbieri
   2 #
   3 # Permission is hereby granted, free of charge, to any person obtaining
   4 # a copy of this software and associated documentation files (the
   5 # "Software"), to deal in the Software without restriction, including
   6 # without limitation the rights to use, copy, modify, merge, publish,
   7 # distribute, sublicense, and/or sell copies of the Software, and to
   8 # permit persons to whom the Software is furnished to do so, subject to
   9 # the following conditions:
  10 #
  11 # The above copyright notice and this permission notice (including the
  12 # next paragraph) shall be included in all copies or substantial
  13 # portions of the Software.
  14 #
  15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  16 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  17 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  18 # IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  19 # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  20 # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  21 # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  22 #
  23 # *************************************************************************
  24
  25 # The code is a reimplementation of the algorithm in
  26 #  www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
  27 # "Fast Half Float Conversions" by Jeroen van der Zijp, Nov 2008
  28 #
  29 # The table contents have been slightly changed so that the exponent
  30 # bias is now in the exponent table instead of the mantissa table (mostly
  31 # for cosmetic reasons, and because it theoretically allows a variant
  32 # that flushes denormal to zero but uses a mantissa table with 24-bit
  33 # entries).
  34 #
  35 # The tables are also constructed slightly differently.
  36 #
  37
  38 # Note that using a 64K * 4 table is a terrible idea since it will not fit
  39 # in the L1 cache and will massively pollute the L2 cache as well
  40 #
  41 # These should instead fit in the L1 cache.
  42 #
  43 # TODO: we could use a denormal bias table instead of the mantissa/offset
  44 # tables: this would reduce the L1 cache usage from 8704 to 2304 bytes
  45 # but would involve more computation
  46 #
  47 # Note however that if denormals are never encountered, the L1 cache usage
  48 # is only about 4608 bytes anyway.
  49
  50 table_index = None
  51 table_length = None
  52
  53 def begin(t, n, l):
  54         global table_length
  55         global table_index
  56         table_index = 0
  57         table_length = l
  58         print
  59         print "const " + t + " " + n + "[" + str(l) + "] = {"
  60
  61 def value(v):
  62         global table_index
  63         table_index += 1
  64         print "\t" + hex(v) + ","
  65
  66 def end():
  67         global table_length
  68         global table_index
  69         print "};"
  70         assert table_index == table_length
  71
  72 print "/* This file is autogenerated by u_half.py. Do not edit directly. */"
  73 print "#include \"util/u_half.h\""
  74
  75 begin("uint32_t", "util_half_to_float_mantissa_table", 2048)
  76 # zero
  77 value(0)
  78
  79 # denormals
  80 for i in xrange(1, 1024):
  81         m = i << 13
  82         e = 0
  83
  84         # normalize number
  85         while (m & 0x00800000) == 0:
  86                 e -= 0x00800000;
  87                 m <<= 1;
  88
  89         m &= ~0x00800000;
  90         e += 0x38800000;
  91         value(m | e)
  92
  93 # normals
  94 for i in xrange(1024, 2048):
  95         value((i - 1024) << 13)
  96 end()
  97
  98 begin("uint32_t", "util_half_to_float_exponent_table", 64)
  99 # positive zero or denormals
 100 value(0)
 101
 102 # positive numbers
 103 for i in xrange(1, 31):
 104         value(0x38000000 + (i << 23))
 105
 106 # positive infinity/NaN
 107 value(0x7f800000)
 108
 109 # negative zero or denormals
 110 value(0x80000000)
 111
 112 # negative numbers
 113 for i in range(33, 63):
 114         value(0xb8000000 + ((i - 32) << 23))
 115
 116 # negative infinity/NaN
 117 value(0xff800000)
 118 end()
 119
 120 begin("uint32_t", "util_half_to_float_offset_table", 64)
 121 # positive zero or denormals
 122 value(0)
 123
 124 # positive normals
 125 for i in range(1, 32):
 126         value(1024)
 127
 128 # negative zero or denormals
 129 value(0)
 130
 131 # negative normals
 132 for i in xrange(33, 64):
 133         value(1024)
 134 end()
 135
 136 begin("uint16_t", "util_float_to_half_base_table", 512)
 137 for sign in (0, 0x8000):
 138         # very small numbers mapping to zero
 139         for i in xrange(-127, -24):
 140                 value(sign | 0)
 141
 142         # small numbers mapping to denormals
 143         for i in xrange(-24, -14):
 144                 value(sign | (0x400 >> (-14 -i)))
 145
 146         # normal numbers
 147         for i in xrange(-14, 16):
 148                 value(sign | ((i + 15) << 10))
 149
 150         # large numbers mapping to infinity
 151         for i in xrange(16, 128):
 152                 value(sign | 0x7c00)
 153
 154         # infinity and NaNs
 155         value(sign | 0x7c00)
 156 end()
 157
 158 begin("uint8_t", "util_float_to_half_shift_table", 512)
 159 for sign in (0, 0x8000):
 160         # very small numbers mapping to zero
 161         for i in xrange(-127, -24):
 162                 value(24)
 163
 164         # small numbers mapping to denormals
 165         for i in xrange(-24, -14):
 166                 value(-1 - i)
 167
 168         # normal numbers
 169         for i in xrange(-14, 16):
 170                 value(13)
 171
 172         # large numbers mapping to infinity
 173         for i in xrange(16, 128):
 174                 value(24)
 175
 176         # infinity and NaNs
 177         value(13)
 178 end()
 179