st/egl: Add a missing break.
[mesa.git] / src / gallium / auxiliary / util / u_half.c
1
2 /*
3 * Copyright 2010 Luca Barbieri
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial
15 * portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 **************************************************************************/
26
27 /* The code is a reimplementation of the algorithm in
28 * www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
29 * "Fast Half Float Conversions" by Jeroen van der Zijp, Nov 2008
30 *
31 * The table contents have been slightly changed so that the exponent
32 * bias is now in the exponent table instead of the mantissa table (mostly
33 * for cosmetic reasons, and because it theoretically allows a variant
34 * that flushes denormal to zero but uses a mantissa table with 24-bit
35 * entries).
36 *
37 * The tables are also constructed slightly differently.
38 */
39
40 /* Note that using a 64K * 4 table is a terrible idea since it will not fit
41 * in the L1 cache and will massively pollute the L2 cache as well
42 *
43 * These should instead fit in the L1 cache.
44 *
45 * TODO: we could use a denormal bias table instead of the mantissa/offset
46 * tables: this would reduce the L1 cache usage from 8704 to 2304 bytes
47 * but would involve more computation
48 *
49 * Note however that if denormals are never encountered, the L1 cache usage
50 * is only about 4608 bytes anyway.
51 */
52
53 #include "util/u_half.h"
54 #include "util/u_init.h"
55
56 uint32_t util_half_to_float_mantissa_table[2048];
57 uint32_t util_half_to_float_exponent_table[64];
58 uint32_t util_half_to_float_offset_table[64];
59 uint16_t util_float_to_half_base_table[512];
60 uint8_t util_float_to_half_shift_table[512];
61
62 boolean util_half_inited;
63
64 void
65 util_half_do_init(void)
66 {
67 int i;
68
69 /* zero */
70 util_half_to_float_mantissa_table[0] = 0;
71
72 /* denormals */
73 for(i = 1; i < 1024; ++i)
74 {
75 unsigned int m = i << 13;
76 unsigned int e = 0;
77
78 /* Normalize number */
79 while(!(m & 0x00800000))
80 {
81 e -= 0x00800000;
82 m <<= 1;
83 }
84 m &= ~0x00800000;
85 e += 0x38800000;
86 util_half_to_float_mantissa_table[i] = m | e;
87 }
88
89 /* normals */
90 for(i = 1024; i < 2048; ++i)
91 util_half_to_float_mantissa_table[i] = ((i - 1024) << 13);
92
93 /* positive zero or denormals */
94 util_half_to_float_exponent_table[0] = 0;
95
96 /* positive numbers */
97 for(i = 1; i <= 30; ++i)
98 util_half_to_float_exponent_table[i] = 0x38000000 + (i << 23);
99
100 /* positive infinity/NaN */
101 util_half_to_float_exponent_table[31] = 0x7f800000;
102
103 /* negative zero or denormals */
104 util_half_to_float_exponent_table[32] = 0x80000000;
105
106 /* negative numbers */
107 for(i = 33; i <= 62; ++i)
108 util_half_to_float_exponent_table[i] = 0xb8000000 + ((i - 32) << 23);
109
110 /* negative infinity/NaN */
111 util_half_to_float_exponent_table[63] = 0xff800000;
112
113 /* positive zero or denormals */
114 util_half_to_float_offset_table[0] = 0;
115
116 /* positive normals */
117 for(i = 1; i < 32; ++i)
118 util_half_to_float_offset_table[i] = 1024;
119
120 /* negative zero or denormals */
121 util_half_to_float_offset_table[32] = 0;
122
123 /* negative normals */
124 for(i = 33; i < 64; ++i)
125 util_half_to_float_offset_table[i] = 1024;
126
127 /* very small numbers mapping to zero */
128 for(i = -127; i < -24; ++i)
129 {
130 util_float_to_half_base_table[127 + i] = 0;
131 util_float_to_half_shift_table[127 + i] = 24;
132 }
133
134 /* small numbers mapping to denormals */
135 for(i = -24; i < -14; ++i)
136 {
137 util_float_to_half_base_table[127 + i] = 0x0400 >> (-14 - i);
138 util_float_to_half_shift_table[127 + i] = -i - 1;
139 }
140
141 /* normal numbers */
142 for(i = -14; i < 16; ++i)
143 {
144 util_float_to_half_base_table[127 + i] = (i + 15) << 10;
145 util_float_to_half_shift_table[127 + i] = 13;
146 }
147
148 /* large numbers mapping to infinity */
149 for(i = 16; i < 128; ++i)
150 {
151 util_float_to_half_base_table[127 + i] = 0x7c00;
152 util_float_to_half_shift_table[127 + i] = 24;
153 }
154
155 /* infinity and NaNs */
156 util_float_to_half_base_table[255] = 0x7c00;
157 util_float_to_half_shift_table[255] = 13;
158
159 /* negative numbers */
160 for(i = 0; i < 256; ++i)
161 {
162 util_float_to_half_base_table[256 + i] = util_float_to_half_base_table[i] | 0x8000;
163 util_float_to_half_shift_table[256 + i] = util_float_to_half_shift_table[i];
164 }
165 }