1 #include "util/u_half.h"
3 /* see www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
4 * "Fast Half Float Conversions" by Jeroen van der Zijp, Nov 2008
7 /* Note that using a 64K * 4 table is a terrible idea since it will not fit
8 * in the L1 cache and will massively pollute the L2 cache as well
10 * These should instead fit in the L1 cache.
12 * TODO: we could use a denormal bias table instead of the mantissa/offset
13 * tables: this would reduce the L1 cache usage from 8704 to 2304 bytes
14 * but would involve more computation
16 * Note however that if denormals are never encountered, the L1 cache usage
17 * is only about 4608 bytes anyway.
19 uint32_t util_half_to_float_mantissa_table
[2048];
20 uint32_t util_half_to_float_exponent_table
[64];
21 uint32_t util_half_to_float_offset_table
[64];
22 uint16_t util_float_to_half_base_table
[512];
23 uint8_t util_float_to_half_shift_table
[512];
25 void util_half_init_tables(void)
30 util_half_to_float_mantissa_table
[0] = 0;
33 for(i
= 1; i
< 1024; ++i
) {
34 unsigned int m
= i
<< 13;
37 /* Normalize number */
38 while(!(m
& 0x00800000)) {
44 util_half_to_float_mantissa_table
[i
] = m
| e
;
48 for(i
= 1024; i
< 2048; ++i
)
49 util_half_to_float_mantissa_table
[i
] = ((i
-1024)<<13);
51 /* positive zero or denormals */
52 util_half_to_float_exponent_table
[0] = 0;
54 /* positive numbers */
55 for(i
= 1; i
<= 30; ++i
)
56 util_half_to_float_exponent_table
[i
] = 0x38000000 + (i
<< 23);
58 /* positive infinity/NaN */
59 util_half_to_float_exponent_table
[31] = 0x7f800000;
61 /* negative zero or denormals */
62 util_half_to_float_exponent_table
[32] = 0x80000000;
64 /* negative numbers */
65 for(i
= 33; i
<= 62; ++i
)
66 util_half_to_float_exponent_table
[i
] = 0xb8000000 + ((i
- 32) << 23);
68 /* negative infinity/NaN */
69 util_half_to_float_exponent_table
[63] = 0xff800000;
71 /* positive zero or denormals */
72 util_half_to_float_offset_table
[0] = 0;
74 /* positive normals */
75 for(i
= 1; i
< 32; ++i
)
76 util_half_to_float_offset_table
[i
] = 1024;
78 /* negative zero or denormals */
79 util_half_to_float_offset_table
[32] = 0;
81 /* negative normals */
82 for(i
= 33; i
< 64; ++i
)
83 util_half_to_float_offset_table
[i
] = 1024;
87 /* very small numbers mapping to zero */
88 for(i
= -127; i
< -24; ++i
) {
89 util_float_to_half_base_table
[127 + i
] = 0;
90 util_float_to_half_shift_table
[127 + i
] = 24;
93 /* small numbers mapping to denormals */
94 for(i
= -24; i
< -14; ++i
) {
95 util_float_to_half_base_table
[127 + i
] = 0x0400 >> (-14 - i
);
96 util_float_to_half_shift_table
[127 + i
] = -i
- 1;
100 for(i
= -14; i
< 16; ++i
) {
101 util_float_to_half_base_table
[127 + i
] = (i
+ 15) << 10;
102 util_float_to_half_shift_table
[127 + i
] = 13;
105 /* large numbers mapping to infinity */
106 for(i
= 16; i
< 128; ++i
) {
107 util_float_to_half_base_table
[127 + i
] = 0x7c00;
108 util_float_to_half_shift_table
[127 + i
] = 24;
111 /* infinity and NaNs */
112 util_float_to_half_base_table
[255] = 0x7c00;
113 util_float_to_half_shift_table
[255] = 13;
115 /* negative numbers */
116 for(i
= 0; i
< 256; ++i
) {
117 util_float_to_half_base_table
[256 + i
] = util_float_to_half_base_table
[i
] | 0x8000;
118 util_float_to_half_shift_table
[256 + i
] = util_float_to_half_shift_table
[i
];