4df64c2ccf9383ebfff77506ab16d47d976f497b
[mesa.git] / src / util / half_float.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 #include <math.h>
26 #include <assert.h>
27 #include "half_float.h"
28 #include "rounding.h"
29
30 typedef union { float f; int32_t i; uint32_t u; } fi_type;
31
32 /**
33 * Convert a 4-byte float to a 2-byte half float.
34 *
35 * Not all float32 values can be represented exactly as a float16 value. We
36 * round such intermediate float32 values to the nearest float16. When the
37 * float32 lies exactly between to float16 values, we round to the one with
38 * an even mantissa.
39 *
40 * This rounding behavior has several benefits:
41 * - It has no sign bias.
42 *
43 * - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's
44 * GPU ISA.
45 *
46 * - By reproducing the behavior of the GPU (at least on Intel hardware),
47 * compile-time evaluation of constant packHalf2x16 GLSL expressions will
48 * result in the same value as if the expression were executed on the GPU.
49 */
50 uint16_t
51 _mesa_float_to_half(float val)
52 {
53 const fi_type fi = {val};
54 const int flt_m = fi.i & 0x7fffff;
55 const int flt_e = (fi.i >> 23) & 0xff;
56 const int flt_s = (fi.i >> 31) & 0x1;
57 int s, e, m = 0;
58 uint16_t result;
59
60 /* sign bit */
61 s = flt_s;
62
63 /* handle special cases */
64 if ((flt_e == 0) && (flt_m == 0)) {
65 /* zero */
66 /* m = 0; - already set */
67 e = 0;
68 }
69 else if ((flt_e == 0) && (flt_m != 0)) {
70 /* denorm -- denorm float maps to 0 half */
71 /* m = 0; - already set */
72 e = 0;
73 }
74 else if ((flt_e == 0xff) && (flt_m == 0)) {
75 /* infinity */
76 /* m = 0; - already set */
77 e = 31;
78 }
79 else if ((flt_e == 0xff) && (flt_m != 0)) {
80 /* NaN */
81 m = 1;
82 e = 31;
83 }
84 else {
85 /* regular number */
86 const int new_exp = flt_e - 127;
87 if (new_exp < -14) {
88 /* The float32 lies in the range (0.0, min_normal16) and is rounded
89 * to a nearby float16 value. The result will be either zero, subnormal,
90 * or normal.
91 */
92 e = 0;
93 m = _mesa_lroundevenf((1 << 24) * fabsf(fi.f));
94 }
95 else if (new_exp > 15) {
96 /* map this value to infinity */
97 /* m = 0; - already set */
98 e = 31;
99 }
100 else {
101 /* The float32 lies in the range
102 * [min_normal16, max_normal16 + max_step16)
103 * and is rounded to a nearby float16 value. The result will be
104 * either normal or infinite.
105 */
106 e = new_exp + 15;
107 m = _mesa_lroundevenf(flt_m / (float) (1 << 13));
108 }
109 }
110
111 assert(0 <= m && m <= 1024);
112 if (m == 1024) {
113 /* The float32 was rounded upwards into the range of the next exponent,
114 * so bump the exponent. This correctly handles the case where f32
115 * should be rounded up to float16 infinity.
116 */
117 ++e;
118 m = 0;
119 }
120
121 result = (s << 15) | (e << 10) | m;
122 return result;
123 }
124
125
126 /**
127 * Convert a 2-byte half float to a 4-byte float.
128 * Based on code from:
129 * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html
130 */
131 float
132 _mesa_half_to_float(uint16_t val)
133 {
134 /* XXX could also use a 64K-entry lookup table */
135 const int m = val & 0x3ff;
136 const int e = (val >> 10) & 0x1f;
137 const int s = (val >> 15) & 0x1;
138 int flt_m, flt_e, flt_s;
139 fi_type fi;
140 float result;
141
142 /* sign bit */
143 flt_s = s;
144
145 /* handle special cases */
146 if ((e == 0) && (m == 0)) {
147 /* zero */
148 flt_m = 0;
149 flt_e = 0;
150 }
151 else if ((e == 0) && (m != 0)) {
152 /* denorm -- denorm half will fit in non-denorm single */
153 const float half_denorm = 1.0f / 16384.0f; /* 2^-14 */
154 float mantissa = ((float) (m)) / 1024.0f;
155 float sign = s ? -1.0f : 1.0f;
156 return sign * mantissa * half_denorm;
157 }
158 else if ((e == 31) && (m == 0)) {
159 /* infinity */
160 flt_e = 0xff;
161 flt_m = 0;
162 }
163 else if ((e == 31) && (m != 0)) {
164 /* NaN */
165 flt_e = 0xff;
166 flt_m = 1;
167 }
168 else {
169 /* regular */
170 flt_e = e + 112;
171 flt_m = m << 13;
172 }
173
174 fi.i = (flt_s << 31) | (flt_e << 23) | flt_m;
175 result = fi.f;
176 return result;
177 }