2 * Copyright 2010 Luca Barbieri
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial
14 * portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
20 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 **************************************************************************/
26 /* The code is a reimplementation of the algorithm in
27 * www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
28 * "Fast Half Float Conversions" by Jeroen van der Zijp, Nov 2008
30 * The table contents have been slightly changed so that the exponent
31 * bias is now in the exponent table instead of the mantissa table (mostly
32 * for cosmetic reasons, and because it theoretically allows a variant
33 * that flushes denormal to zero but uses a mantissa table with 24-bit
36 * The tables are also constructed slightly differently.
39 /* Note that using a 64K * 4 table is a terrible idea since it will not fit
40 * in the L1 cache and will massively pollute the L2 cache as well
42 * These should instead fit in the L1 cache.
44 * TODO: we could use a denormal bias table instead of the mantissa/offset
45 * tables: this would reduce the L1 cache usage from 8704 to 2304 bytes
46 * but would involve more computation
48 * Note however that if denormals are never encountered, the L1 cache usage
49 * is only about 4608 bytes anyway.
52 #include "util/u_half.h"
54 uint32_t util_half_to_float_mantissa_table
[2048];
55 uint32_t util_half_to_float_exponent_table
[64];
56 uint32_t util_half_to_float_offset_table
[64];
57 uint16_t util_float_to_half_base_table
[512];
58 uint8_t util_float_to_half_shift_table
[512];
60 void util_half_init_tables(void)
65 util_half_to_float_mantissa_table
[0] = 0;
68 for(i
= 1; i
< 1024; ++i
) {
69 unsigned int m
= i
<< 13;
72 /* Normalize number */
73 while(!(m
& 0x00800000)) {
79 util_half_to_float_mantissa_table
[i
] = m
| e
;
83 for(i
= 1024; i
< 2048; ++i
)
84 util_half_to_float_mantissa_table
[i
] = ((i
-1024)<<13);
86 /* positive zero or denormals */
87 util_half_to_float_exponent_table
[0] = 0;
89 /* positive numbers */
90 for(i
= 1; i
<= 30; ++i
)
91 util_half_to_float_exponent_table
[i
] = 0x38000000 + (i
<< 23);
93 /* positive infinity/NaN */
94 util_half_to_float_exponent_table
[31] = 0x7f800000;
96 /* negative zero or denormals */
97 util_half_to_float_exponent_table
[32] = 0x80000000;
99 /* negative numbers */
100 for(i
= 33; i
<= 62; ++i
)
101 util_half_to_float_exponent_table
[i
] = 0xb8000000 + ((i
- 32) << 23);
103 /* negative infinity/NaN */
104 util_half_to_float_exponent_table
[63] = 0xff800000;
106 /* positive zero or denormals */
107 util_half_to_float_offset_table
[0] = 0;
109 /* positive normals */
110 for(i
= 1; i
< 32; ++i
)
111 util_half_to_float_offset_table
[i
] = 1024;
113 /* negative zero or denormals */
114 util_half_to_float_offset_table
[32] = 0;
116 /* negative normals */
117 for(i
= 33; i
< 64; ++i
)
118 util_half_to_float_offset_table
[i
] = 1024;
122 /* very small numbers mapping to zero */
123 for(i
= -127; i
< -24; ++i
) {
124 util_float_to_half_base_table
[127 + i
] = 0;
125 util_float_to_half_shift_table
[127 + i
] = 24;
128 /* small numbers mapping to denormals */
129 for(i
= -24; i
< -14; ++i
) {
130 util_float_to_half_base_table
[127 + i
] = 0x0400 >> (-14 - i
);
131 util_float_to_half_shift_table
[127 + i
] = -i
- 1;
135 for(i
= -14; i
< 16; ++i
) {
136 util_float_to_half_base_table
[127 + i
] = (i
+ 15) << 10;
137 util_float_to_half_shift_table
[127 + i
] = 13;
140 /* large numbers mapping to infinity */
141 for(i
= 16; i
< 128; ++i
) {
142 util_float_to_half_base_table
[127 + i
] = 0x7c00;
143 util_float_to_half_shift_table
[127 + i
] = 24;
146 /* infinity and NaNs */
147 util_float_to_half_base_table
[255] = 0x7c00;
148 util_float_to_half_shift_table
[255] = 13;
150 /* negative numbers */
151 for(i
= 0; i
< 256; ++i
) {
152 util_float_to_half_base_table
[256 + i
] = util_float_to_half_base_table
[i
] | 0x8000;
153 util_float_to_half_shift_table
[256 + i
] = util_float_to_half_shift_table
[i
];