nir: Add some more consts to the nir_format_convert.h helpers.
[mesa.git] / src / compiler / nir / nir_format_convert.h
1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_builder.h"
25
26 #include "util/format_rgb9e5.h"
27
28 static inline nir_ssa_def *
29 nir_shift(nir_builder *b, nir_ssa_def *value, int left_shift)
30 {
31 if (left_shift > 0)
32 return nir_ishl(b, value, nir_imm_int(b, left_shift));
33 else if (left_shift < 0)
34 return nir_ushr(b, value, nir_imm_int(b, -left_shift));
35 else
36 return value;
37 }
38
39 static inline nir_ssa_def *
40 nir_mask_shift(struct nir_builder *b, nir_ssa_def *src,
41 uint32_t mask, int left_shift)
42 {
43 return nir_shift(b, nir_iand(b, src, nir_imm_int(b, mask)), left_shift);
44 }
45
46 static inline nir_ssa_def *
47 nir_mask_shift_or(struct nir_builder *b, nir_ssa_def *dst, nir_ssa_def *src,
48 uint32_t src_mask, int src_left_shift)
49 {
50 return nir_ior(b, nir_mask_shift(b, src, src_mask, src_left_shift), dst);
51 }
52
53 static inline nir_ssa_def *
54 nir_format_mask_uvec(nir_builder *b, nir_ssa_def *src, const unsigned *bits)
55 {
56 nir_const_value mask;
57 for (unsigned i = 0; i < src->num_components; i++) {
58 assert(bits[i] < 32);
59 mask.u32[i] = (1u << bits[i]) - 1;
60 }
61 return nir_iand(b, src, nir_build_imm(b, src->num_components, 32, mask));
62 }
63
64 static inline nir_ssa_def *
65 nir_format_sign_extend_ivec(nir_builder *b, nir_ssa_def *src,
66 const unsigned *bits)
67 {
68 assert(src->num_components <= 4);
69 nir_ssa_def *comps[4];
70 for (unsigned i = 0; i < src->num_components; i++) {
71 nir_ssa_def *shift = nir_imm_int(b, src->bit_size - bits[i]);
72 comps[i] = nir_ishr(b, nir_ishl(b, nir_channel(b, src, i), shift), shift);
73 }
74 return nir_vec(b, comps, src->num_components);
75 }
76
77
78 static inline nir_ssa_def *
79 nir_format_unpack_int(nir_builder *b, nir_ssa_def *packed,
80 const unsigned *bits, unsigned num_components,
81 bool sign_extend)
82 {
83 assert(num_components >= 1 && num_components <= 4);
84 const unsigned bit_size = packed->bit_size;
85 nir_ssa_def *comps[4];
86
87 if (bits[0] >= bit_size) {
88 assert(bits[0] == bit_size);
89 assert(num_components == 1);
90 return packed;
91 }
92
93 unsigned offset = 0;
94 for (unsigned i = 0; i < num_components; i++) {
95 assert(bits[i] < bit_size);
96 assert(offset + bits[i] <= bit_size);
97 nir_ssa_def *lshift = nir_imm_int(b, bit_size - (offset + bits[i]));
98 nir_ssa_def *rshift = nir_imm_int(b, bit_size - bits[i]);
99 if (sign_extend)
100 comps[i] = nir_ishr(b, nir_ishl(b, packed, lshift), rshift);
101 else
102 comps[i] = nir_ushr(b, nir_ishl(b, packed, lshift), rshift);
103 offset += bits[i];
104 }
105 assert(offset <= bit_size);
106
107 return nir_vec(b, comps, num_components);
108 }
109
110 static inline nir_ssa_def *
111 nir_format_unpack_uint(nir_builder *b, nir_ssa_def *packed,
112 const unsigned *bits, unsigned num_components)
113 {
114 return nir_format_unpack_int(b, packed, bits, num_components, false);
115 }
116
117 static inline nir_ssa_def *
118 nir_format_unpack_sint(nir_builder *b, nir_ssa_def *packed,
119 const unsigned *bits, unsigned num_components)
120 {
121 return nir_format_unpack_int(b, packed, bits, num_components, true);
122 }
123
124 static inline nir_ssa_def *
125 nir_format_pack_uint_unmasked(nir_builder *b, nir_ssa_def *color,
126 const unsigned *bits, unsigned num_components)
127 {
128 assert(num_components >= 1 && num_components <= 4);
129 nir_ssa_def *packed = nir_imm_int(b, 0);
130 unsigned offset = 0;
131 for (unsigned i = 0; i < num_components; i++) {
132 packed = nir_ior(b, packed, nir_shift(b, nir_channel(b, color, i),
133 offset));
134 offset += bits[i];
135 }
136 assert(offset <= packed->bit_size);
137
138 return packed;
139 }
140
141 static inline nir_ssa_def *
142 nir_format_pack_uint(nir_builder *b, nir_ssa_def *color,
143 const unsigned *bits, unsigned num_components)
144 {
145 return nir_format_pack_uint_unmasked(b, nir_format_mask_uvec(b, color, bits),
146 bits, num_components);
147 }
148
149 static inline nir_ssa_def *
150 nir_format_bitcast_uvec_unmasked(nir_builder *b, nir_ssa_def *src,
151 unsigned src_bits, unsigned dst_bits)
152 {
153 assert(src->bit_size >= src_bits && src->bit_size >= dst_bits);
154 assert(src_bits == 8 || src_bits == 16 || src_bits == 32);
155 assert(dst_bits == 8 || dst_bits == 16 || dst_bits == 32);
156
157 if (src_bits == dst_bits)
158 return src;
159
160 const unsigned dst_components =
161 DIV_ROUND_UP(src->num_components * src_bits, dst_bits);
162 assert(dst_components <= 4);
163
164 nir_ssa_def *dst_chan[4] = {0};
165 if (dst_bits > src_bits) {
166 unsigned shift = 0;
167 unsigned dst_idx = 0;
168 for (unsigned i = 0; i < src->num_components; i++) {
169 nir_ssa_def *shifted = nir_ishl(b, nir_channel(b, src, i),
170 nir_imm_int(b, shift));
171 if (shift == 0) {
172 dst_chan[dst_idx] = shifted;
173 } else {
174 dst_chan[dst_idx] = nir_ior(b, dst_chan[dst_idx], shifted);
175 }
176
177 shift += src_bits;
178 if (shift >= dst_bits) {
179 dst_idx++;
180 shift = 0;
181 }
182 }
183 } else {
184 nir_ssa_def *mask = nir_imm_int(b, ~0u >> (32 - dst_bits));
185
186 unsigned src_idx = 0;
187 unsigned shift = 0;
188 for (unsigned i = 0; i < dst_components; i++) {
189 dst_chan[i] = nir_iand(b, nir_ushr(b, nir_channel(b, src, src_idx),
190 nir_imm_int(b, shift)),
191 mask);
192 shift += dst_bits;
193 if (shift >= src_bits) {
194 src_idx++;
195 shift = 0;
196 }
197 }
198 }
199
200 return nir_vec(b, dst_chan, dst_components);
201 }
202
203 static inline nir_ssa_def *
204 _nir_format_norm_factor(nir_builder *b, const unsigned *bits,
205 unsigned num_components,
206 bool is_signed)
207 {
208 nir_const_value factor;
209 for (unsigned i = 0; i < num_components; i++) {
210 assert(bits[i] < 32);
211 factor.f32[i] = (1ul << (bits[i] - is_signed)) - 1;
212 }
213 return nir_build_imm(b, num_components, 32, factor);
214 }
215
216 static inline nir_ssa_def *
217 nir_format_unorm_to_float(nir_builder *b, nir_ssa_def *u, const unsigned *bits)
218 {
219 nir_ssa_def *factor =
220 _nir_format_norm_factor(b, bits, u->num_components, false);
221
222 return nir_fdiv(b, nir_u2f32(b, u), factor);
223 }
224
225 static inline nir_ssa_def *
226 nir_format_snorm_to_float(nir_builder *b, nir_ssa_def *s, const unsigned *bits)
227 {
228 nir_ssa_def *factor =
229 _nir_format_norm_factor(b, bits, s->num_components, true);
230
231 return nir_fmax(b, nir_fdiv(b, nir_i2f32(b, s), factor),
232 nir_imm_float(b, -1.0f));
233 }
234
235 static inline nir_ssa_def *
236 nir_format_float_to_unorm(nir_builder *b, nir_ssa_def *f, const unsigned *bits)
237 {
238 nir_ssa_def *factor =
239 _nir_format_norm_factor(b, bits, f->num_components, false);
240
241 /* Clamp to the range [0, 1] */
242 f = nir_fsat(b, f);
243
244 return nir_f2u32(b, nir_fround_even(b, nir_fmul(b, f, factor)));
245 }
246
247 static inline nir_ssa_def *
248 nir_format_float_to_snorm(nir_builder *b, nir_ssa_def *f, const unsigned *bits)
249 {
250 nir_ssa_def *factor =
251 _nir_format_norm_factor(b, bits, f->num_components, true);
252
253 /* Clamp to the range [-1, 1] */
254 f = nir_fmin(b, nir_fmax(b, f, nir_imm_float(b, -1)), nir_imm_float(b, 1));
255
256 return nir_f2i32(b, nir_fround_even(b, nir_fmul(b, f, factor)));
257 }
258
259 static inline nir_ssa_def *
260 nir_format_linear_to_srgb(nir_builder *b, nir_ssa_def *c)
261 {
262 nir_ssa_def *linear = nir_fmul(b, c, nir_imm_float(b, 12.92f));
263 nir_ssa_def *curved =
264 nir_fsub(b, nir_fmul(b, nir_imm_float(b, 1.055f),
265 nir_fpow(b, c, nir_imm_float(b, 1.0 / 2.4))),
266 nir_imm_float(b, 0.055f));
267
268 return nir_fsat(b, nir_bcsel(b, nir_flt(b, c, nir_imm_float(b, 0.0031308f)),
269 linear, curved));
270 }
271
272 static inline nir_ssa_def *
273 nir_format_srgb_to_linear(nir_builder *b, nir_ssa_def *c)
274 {
275 nir_ssa_def *linear = nir_fdiv(b, c, nir_imm_float(b, 12.92f));
276 nir_ssa_def *curved =
277 nir_fpow(b, nir_fdiv(b, nir_fadd(b, c, nir_imm_float(b, 0.055f)),
278 nir_imm_float(b, 1.055f)),
279 nir_imm_float(b, 2.4f));
280
281 return nir_fsat(b, nir_bcsel(b, nir_fge(b, nir_imm_float(b, 0.04045f), c),
282 linear, curved));
283 }
284
285 static inline nir_ssa_def *
286 nir_format_unpack_11f11f10f(nir_builder *b, nir_ssa_def *packed)
287 {
288 nir_ssa_def *chans[3];
289 chans[0] = nir_mask_shift(b, packed, 0x000007ff, 4);
290 chans[1] = nir_mask_shift(b, packed, 0x003ff800, -7);
291 chans[2] = nir_mask_shift(b, packed, 0xffc00000, -17);
292
293 for (unsigned i = 0; i < 3; i++)
294 chans[i] = nir_unpack_half_2x16_split_x(b, chans[i]);
295
296 return nir_vec(b, chans, 3);
297 }
298
299 static inline nir_ssa_def *
300 nir_format_pack_11f11f10f(nir_builder *b, nir_ssa_def *color)
301 {
302 /* 10 and 11-bit floats are unsigned. Clamp to non-negative */
303 nir_ssa_def *clamped = nir_fmax(b, color, nir_imm_float(b, 0));
304
305 nir_ssa_def *undef = nir_ssa_undef(b, 1, color->bit_size);
306 nir_ssa_def *p1 = nir_pack_half_2x16_split(b, nir_channel(b, clamped, 0),
307 nir_channel(b, clamped, 1));
308 nir_ssa_def *p2 = nir_pack_half_2x16_split(b, nir_channel(b, clamped, 2),
309 undef);
310
311 /* A 10 or 11-bit float has the same exponent as a 16-bit float but with
312 * fewer mantissa bits and no sign bit. All we have to do is throw away
313 * the sign bit and the bottom mantissa bits and shift it into place.
314 */
315 nir_ssa_def *packed = nir_imm_int(b, 0);
316 packed = nir_mask_shift_or(b, packed, p1, 0x00007ff0, -4);
317 packed = nir_mask_shift_or(b, packed, p1, 0x7ff00000, -9);
318 packed = nir_mask_shift_or(b, packed, p2, 0x00007fe0, 17);
319
320 return packed;
321 }
322
323 static inline nir_ssa_def *
324 nir_format_pack_r9g9b9e5(nir_builder *b, nir_ssa_def *color)
325 {
326 /* See also float3_to_rgb9e5 */
327
328 /* First, we need to clamp it to range. */
329 nir_ssa_def *clamped = nir_fmin(b, color, nir_imm_float(b, MAX_RGB9E5));
330
331 /* Get rid of negatives and NaN */
332 clamped = nir_bcsel(b, nir_ult(b, nir_imm_int(b, 0x7f800000), color),
333 nir_imm_float(b, 0), clamped);
334
335 /* maxrgb.u = MAX3(rc.u, gc.u, bc.u); */
336 nir_ssa_def *maxu = nir_umax(b, nir_channel(b, clamped, 0),
337 nir_umax(b, nir_channel(b, clamped, 1),
338 nir_channel(b, clamped, 2)));
339
340 /* maxrgb.u += maxrgb.u & (1 << (23-9)); */
341 maxu = nir_iadd(b, maxu, nir_iand(b, maxu, nir_imm_int(b, 1 << 14)));
342
343 /* exp_shared = MAX2((maxrgb.u >> 23), -RGB9E5_EXP_BIAS - 1 + 127) +
344 * 1 + RGB9E5_EXP_BIAS - 127;
345 */
346 nir_ssa_def *exp_shared =
347 nir_iadd(b, nir_umax(b, nir_ushr(b, maxu, nir_imm_int(b, 23)),
348 nir_imm_int(b, -RGB9E5_EXP_BIAS - 1 + 127)),
349 nir_imm_int(b, 1 + RGB9E5_EXP_BIAS - 127));
350
351 /* revdenom_biasedexp = 127 - (exp_shared - RGB9E5_EXP_BIAS -
352 * RGB9E5_MANTISSA_BITS) + 1;
353 */
354 nir_ssa_def *revdenom_biasedexp =
355 nir_isub(b, nir_imm_int(b, 127 + RGB9E5_EXP_BIAS +
356 RGB9E5_MANTISSA_BITS + 1),
357 exp_shared);
358
359 /* revdenom.u = revdenom_biasedexp << 23; */
360 nir_ssa_def *revdenom =
361 nir_ishl(b, revdenom_biasedexp, nir_imm_int(b, 23));
362
363 /* rm = (int) (rc.f * revdenom.f);
364 * gm = (int) (gc.f * revdenom.f);
365 * bm = (int) (bc.f * revdenom.f);
366 */
367 nir_ssa_def *mantissa =
368 nir_f2i32(b, nir_fmul(b, clamped, revdenom));
369
370 /* rm = (rm & 1) + (rm >> 1);
371 * gm = (gm & 1) + (gm >> 1);
372 * bm = (bm & 1) + (bm >> 1);
373 */
374 mantissa = nir_iadd(b, nir_iand(b, mantissa, nir_imm_int(b, 1)),
375 nir_ushr(b, mantissa, nir_imm_int(b, 1)));
376
377 nir_ssa_def *packed = nir_channel(b, mantissa, 0);
378 packed = nir_mask_shift_or(b, packed, nir_channel(b, mantissa, 1), ~0, 9);
379 packed = nir_mask_shift_or(b, packed, nir_channel(b, mantissa, 2), ~0, 18);
380 packed = nir_mask_shift_or(b, packed, exp_shared, ~0, 27);
381
382 return packed;
383 }