Merge branch '7.8'
[mesa.git] / src / gallium / auxiliary / util / u_half.py
1 # Copyright 2010 Luca Barbieri
2 #
3 # Permission is hereby granted, free of charge, to any person obtaining
4 # a copy of this software and associated documentation files (the
5 # "Software"), to deal in the Software without restriction, including
6 # without limitation the rights to use, copy, modify, merge, publish,
7 # distribute, sublicense, and/or sell copies of the Software, and to
8 # permit persons to whom the Software is furnished to do so, subject to
9 # the following conditions:
10 #
11 # The above copyright notice and this permission notice (including the
12 # next paragraph) shall be included in all copies or substantial
13 # portions of the Software.
14 #
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 # IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
19 # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20 # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21 # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 #
23 # *************************************************************************
24
25 # The code is a reimplementation of the algorithm in
26 # www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
27 # "Fast Half Float Conversions" by Jeroen van der Zijp, Nov 2008
28 #
29 # The table contents have been slightly changed so that the exponent
30 # bias is now in the exponent table instead of the mantissa table (mostly
31 # for cosmetic reasons, and because it theoretically allows a variant
32 # that flushes denormal to zero but uses a mantissa table with 24-bit
33 # entries).
34 #
35 # The tables are also constructed slightly differently.
36 #
37
38 # Note that using a 64K * 4 table is a terrible idea since it will not fit
39 # in the L1 cache and will massively pollute the L2 cache as well
40 #
41 # These should instead fit in the L1 cache.
42 #
43 # TODO: we could use a denormal bias table instead of the mantissa/offset
44 # tables: this would reduce the L1 cache usage from 8704 to 2304 bytes
45 # but would involve more computation
46 #
47 # Note however that if denormals are never encountered, the L1 cache usage
48 # is only about 4608 bytes anyway.
49
50 table_index = None
51 table_length = None
52
53 def begin(t, n, l):
54 global table_length
55 global table_index
56 table_index = 0
57 table_length = l
58 print
59 print "const " + t + " " + n + "[" + str(l) + "] = {"
60
61 def value(v):
62 global table_index
63 table_index += 1
64 print "\t" + hex(v) + ","
65
66 def end():
67 global table_length
68 global table_index
69 print "};"
70 assert table_index == table_length
71
72 print "/* This file is autogenerated by u_half.py. Do not edit directly. */"
73 print "#include \"util/u_half.h\""
74
75 begin("uint32_t", "util_half_to_float_mantissa_table", 2048)
76 # zero
77 value(0)
78
79 # denormals
80 for i in xrange(1, 1024):
81 m = i << 13
82 e = 0
83
84 # normalize number
85 while (m & 0x00800000) == 0:
86 e -= 0x00800000;
87 m <<= 1;
88
89 m &= ~0x00800000;
90 e += 0x38800000;
91 value(m | e)
92
93 # normals
94 for i in xrange(1024, 2048):
95 value((i - 1024) << 13)
96 end()
97
98 begin("uint32_t", "util_half_to_float_exponent_table", 64)
99 # positive zero or denormals
100 value(0)
101
102 # positive numbers
103 for i in xrange(1, 31):
104 value(0x38000000 + (i << 23))
105
106 # positive infinity/NaN
107 value(0x7f800000)
108
109 # negative zero or denormals
110 value(0x80000000)
111
112 # negative numbers
113 for i in range(33, 63):
114 value(0xb8000000 + ((i - 32) << 23))
115
116 # negative infinity/NaN
117 value(0xff800000)
118 end()
119
120 begin("uint32_t", "util_half_to_float_offset_table", 64)
121 # positive zero or denormals
122 value(0)
123
124 # positive normals
125 for i in range(1, 32):
126 value(1024)
127
128 # negative zero or denormals
129 value(0)
130
131 # negative normals
132 for i in xrange(33, 64):
133 value(1024)
134 end()
135
136 begin("uint16_t", "util_float_to_half_base_table", 512)
137 for sign in (0, 0x8000):
138 # very small numbers mapping to zero
139 for i in xrange(-127, -24):
140 value(sign | 0)
141
142 # small numbers mapping to denormals
143 for i in xrange(-24, -14):
144 value(sign | (0x400 >> (-14 -i)))
145
146 # normal numbers
147 for i in xrange(-14, 16):
148 value(sign | ((i + 15) << 10))
149
150 # large numbers mapping to infinity
151 for i in xrange(16, 128):
152 value(sign | 0x7c00)
153
154 # infinity and NaNs
155 value(sign | 0x7c00)
156 end()
157
158 begin("uint8_t", "util_float_to_half_shift_table", 512)
159 for sign in (0, 0x8000):
160 # very small numbers mapping to zero
161 for i in xrange(-127, -24):
162 value(24)
163
164 # small numbers mapping to denormals
165 for i in xrange(-24, -14):
166 value(-1 - i)
167
168 # normal numbers
169 for i in xrange(-14, 16):
170 value(13)
171
172 # large numbers mapping to infinity
173 for i in xrange(16, 128):
174 value(24)
175
176 # infinity and NaNs
177 value(13)
178 end()
179