62d74d4c6f4faa8741a45b04680188e27a2b54c8
[mesa.git] / src / gallium / auxiliary / util / u_half.c
1 #include "util/u_half.h"
2
3 /* see www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
4 * "Fast Half Float Conversions" by Jeroen van der Zijp, Nov 2008
5 */
6
7 /* Note that using a 64K * 4 table is a terrible idea since it will not fit
8 * in the L1 cache and will massively pollute the L2 cache as well
9 *
10 * These should instead fit in the L1 cache.
11 *
12 * TODO: we could use a denormal bias table instead of the mantissa/offset
13 * tables: this would reduce the L1 cache usage from 8704 to 2304 bytes
14 * but would involve more computation
15 *
16 * Note however that if denormals are never encountered, the L1 cache usage
17 * is only about 4608 bytes anyway.
18 */
19 uint32_t util_half_to_float_mantissa_table[2048];
20 uint32_t util_half_to_float_exponent_table[64];
21 uint32_t util_half_to_float_offset_table[64];
22 uint16_t util_float_to_half_base_table[512];
23 uint8_t util_float_to_half_shift_table[512];
24
25 void util_half_init_tables(void)
26 {
27 int i;
28
29 /* zero */
30 util_half_to_float_mantissa_table[0] = 0;
31
32 /* denormals */
33 for(i = 1; i < 1024; ++i) {
34 unsigned int m = i << 13;
35 unsigned int e = 0;
36
37 /* Normalize number */
38 while(!(m & 0x00800000)) {
39 e -= 0x00800000;
40 m<<=1;
41 }
42 m &= ~0x00800000;
43 e+= 0x38800000;
44 util_half_to_float_mantissa_table[i] = m | e;
45 }
46
47 /* normals */
48 for(i = 1024; i < 2048; ++i)
49 util_half_to_float_mantissa_table[i] = ((i-1024)<<13);
50
51 /* positive zero or denormals */
52 util_half_to_float_exponent_table[0] = 0;
53
54 /* positive numbers */
55 for(i = 1; i <= 30; ++i)
56 util_half_to_float_exponent_table[i] = 0x38000000 + (i << 23);
57
58 /* positive infinity/NaN */
59 util_half_to_float_exponent_table[31] = 0x7f800000;
60
61 /* negative zero or denormals */
62 util_half_to_float_exponent_table[32] = 0x80000000;
63
64 /* negative numbers */
65 for(i = 33; i <= 62; ++i)
66 util_half_to_float_exponent_table[i] = 0xb8000000 + ((i - 32) << 23);
67
68 /* negative infinity/NaN */
69 util_half_to_float_exponent_table[63] = 0xff800000;
70
71 /* positive zero or denormals */
72 util_half_to_float_offset_table[0] = 0;
73
74 /* positive normals */
75 for(i = 1; i < 32; ++i)
76 util_half_to_float_offset_table[i] = 1024;
77
78 /* negative zero or denormals */
79 util_half_to_float_offset_table[32] = 0;
80
81 /* negative normals */
82 for(i = 33; i < 64; ++i)
83 util_half_to_float_offset_table[i] = 1024;
84
85
86
87 /* very small numbers mapping to zero */
88 for(i = -127; i < -24; ++i) {
89 util_float_to_half_base_table[127 + i] = 0;
90 util_float_to_half_shift_table[127 + i] = 24;
91 }
92
93 /* small numbers mapping to denormals */
94 for(i = -24; i < -14; ++i) {
95 util_float_to_half_base_table[127 + i] = 0x0400 >> (-14 - i);
96 util_float_to_half_shift_table[127 + i] = -i - 1;
97 }
98
99 /* normal numbers */
100 for(i = -14; i < 16; ++i) {
101 util_float_to_half_base_table[127 + i] = (i + 15) << 10;
102 util_float_to_half_shift_table[127 + i] = 13;
103 }
104
105 /* large numbers mapping to infinity */
106 for(i = 16; i < 128; ++i) {
107 util_float_to_half_base_table[127 + i] = 0x7c00;
108 util_float_to_half_shift_table[127 + i] = 24;
109 }
110
111 /* infinity and NaNs */
112 util_float_to_half_base_table[255] = 0x7c00;
113 util_float_to_half_shift_table[255] = 13;
114
115 /* negative numbers */
116 for(i = 0; i < 256; ++i) {
117 util_float_to_half_base_table[256 + i] = util_float_to_half_base_table[i] | 0x8000;
118 util_float_to_half_shift_table[256 + i] = util_float_to_half_shift_table[i];
119 }
120 }