2 * Copyright © 2018 Red Hat Inc.
3 * Copyright © 2015 Intel Corporation
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 #include "nir_builtin_builder.h"
31 nir_cross3(nir_builder
*b
, nir_ssa_def
*x
, nir_ssa_def
*y
)
33 unsigned yzx
[3] = { 1, 2, 0 };
34 unsigned zxy
[3] = { 2, 0, 1 };
36 return nir_fsub(b
, nir_fmul(b
, nir_swizzle(b
, x
, yzx
, 3),
37 nir_swizzle(b
, y
, zxy
, 3)),
38 nir_fmul(b
, nir_swizzle(b
, x
, zxy
, 3),
39 nir_swizzle(b
, y
, yzx
, 3)));
43 nir_cross4(nir_builder
*b
, nir_ssa_def
*x
, nir_ssa_def
*y
)
45 nir_ssa_def
*cross
= nir_cross3(b
, x
, y
);
48 nir_channel(b
, cross
, 0),
49 nir_channel(b
, cross
, 1),
50 nir_channel(b
, cross
, 2),
51 nir_imm_intN_t(b
, 0, cross
->bit_size
));
55 nir_length(nir_builder
*b
, nir_ssa_def
*vec
)
57 nir_ssa_def
*finf
= nir_imm_floatN_t(b
, INFINITY
, vec
->bit_size
);
59 nir_ssa_def
*abs
= nir_fabs(b
, vec
);
60 if (vec
->num_components
== 1)
63 nir_ssa_def
*maxc
= nir_fmax_abs_vec_comp(b
, abs
);
64 abs
= nir_fdiv(b
, abs
, maxc
);
65 nir_ssa_def
*res
= nir_fmul(b
, nir_fsqrt(b
, nir_fdot(b
, abs
, abs
)), maxc
);
66 return nir_bcsel(b
, nir_feq(b
, maxc
, finf
), maxc
, res
);
70 nir_fast_length(nir_builder
*b
, nir_ssa_def
*vec
)
72 switch (vec
->num_components
) {
73 case 1: return nir_fsqrt(b
, nir_fmul(b
, vec
, vec
));
74 case 2: return nir_fsqrt(b
, nir_fdot2(b
, vec
, vec
));
75 case 3: return nir_fsqrt(b
, nir_fdot3(b
, vec
, vec
));
76 case 4: return nir_fsqrt(b
, nir_fdot4(b
, vec
, vec
));
78 unreachable("Invalid number of components");
83 nir_nextafter(nir_builder
*b
, nir_ssa_def
*x
, nir_ssa_def
*y
)
85 nir_ssa_def
*zero
= nir_imm_intN_t(b
, 0, x
->bit_size
);
86 nir_ssa_def
*one
= nir_imm_intN_t(b
, 1, x
->bit_size
);
88 nir_ssa_def
*condeq
= nir_feq(b
, x
, y
);
89 nir_ssa_def
*conddir
= nir_flt(b
, x
, y
);
90 nir_ssa_def
*condzero
= nir_feq(b
, x
, zero
);
92 /* beware of: +/-0.0 - 1 == NaN */
96 nir_imm_intN_t(b
, (1 << (x
->bit_size
- 1)) + 1, x
->bit_size
),
99 /* beware of -0.0 + 1 == -0x1p-149 */
100 nir_ssa_def
*xp
= nir_bcsel(b
, condzero
, one
, nir_iadd(b
, x
, one
));
102 /* nextafter can be implemented by just +/- 1 on the int value */
104 nir_bcsel(b
, nir_ixor(b
, conddir
, nir_flt(b
, x
, zero
)), xp
, xn
);
106 return nir_nan_check2(b
, x
, y
, nir_bcsel(b
, condeq
, x
, res
));
110 nir_normalize(nir_builder
*b
, nir_ssa_def
*vec
)
112 if (vec
->num_components
== 1)
113 return nir_fsign(b
, vec
);
115 nir_ssa_def
*f0
= nir_imm_floatN_t(b
, 0.0, vec
->bit_size
);
116 nir_ssa_def
*f1
= nir_imm_floatN_t(b
, 1.0, vec
->bit_size
);
117 nir_ssa_def
*finf
= nir_imm_floatN_t(b
, INFINITY
, vec
->bit_size
);
119 /* scale the input to increase precision */
120 nir_ssa_def
*maxc
= nir_fmax_abs_vec_comp(b
, vec
);
121 nir_ssa_def
*svec
= nir_fdiv(b
, vec
, maxc
);
123 nir_ssa_def
*finfvec
= nir_copysign(b
, nir_bcsel(b
, nir_feq(b
, vec
, finf
), f1
, f0
), f1
);
125 nir_ssa_def
*temp
= nir_bcsel(b
, nir_feq(b
, maxc
, finf
), finfvec
, svec
);
126 nir_ssa_def
*res
= nir_fmul(b
, temp
, nir_frsq(b
, nir_fdot(b
, temp
, temp
)));
128 return nir_bcsel(b
, nir_feq(b
, maxc
, f0
), vec
, res
);
132 nir_rotate(nir_builder
*b
, nir_ssa_def
*x
, nir_ssa_def
*y
)
134 nir_ssa_def
*shift_mask
= nir_imm_int(b
, x
->bit_size
- 1);
136 if (y
->bit_size
!= 32)
139 nir_ssa_def
*lshift
= nir_iand(b
, y
, shift_mask
);
140 nir_ssa_def
*rshift
= nir_isub(b
, nir_imm_int(b
, x
->bit_size
), lshift
);
142 nir_ssa_def
*hi
= nir_ishl(b
, x
, lshift
);
143 nir_ssa_def
*lo
= nir_ushr(b
, x
, rshift
);
145 return nir_ior(b
, hi
, lo
);
149 nir_smoothstep(nir_builder
*b
, nir_ssa_def
*edge0
, nir_ssa_def
*edge1
, nir_ssa_def
*x
)
151 nir_ssa_def
*f2
= nir_imm_floatN_t(b
, 2.0, x
->bit_size
);
152 nir_ssa_def
*f3
= nir_imm_floatN_t(b
, 3.0, x
->bit_size
);
154 /* t = clamp((x - edge0) / (edge1 - edge0), 0, 1) */
156 nir_fsat(b
, nir_fdiv(b
, nir_fsub(b
, x
, edge0
),
157 nir_fsub(b
, edge1
, edge0
)));
159 /* result = t * t * (3 - 2 * t) */
160 return nir_fmul(b
, t
, nir_fmul(b
, t
, nir_fsub(b
, f3
, nir_fmul(b
, f2
, t
))));
164 nir_upsample(nir_builder
*b
, nir_ssa_def
*hi
, nir_ssa_def
*lo
)
166 assert(lo
->num_components
== hi
->num_components
);
167 assert(lo
->bit_size
== hi
->bit_size
);
169 nir_ssa_def
*res
[NIR_MAX_VEC_COMPONENTS
];
170 for (unsigned i
= 0; i
< lo
->num_components
; ++i
) {
171 nir_ssa_def
*vec
= nir_vec2(b
, nir_channel(b
, lo
, i
), nir_channel(b
, hi
, i
));
172 res
[i
] = nir_pack_bits(b
, vec
, vec
->bit_size
* 2);
175 return nir_vec(b
, res
, lo
->num_components
);
179 * Compute xs[0] + xs[1] + xs[2] + ... using fadd.
182 build_fsum(nir_builder
*b
, nir_ssa_def
**xs
, int terms
)
184 nir_ssa_def
*accum
= xs
[0];
186 for (int i
= 1; i
< terms
; i
++)
187 accum
= nir_fadd(b
, accum
, xs
[i
]);
193 nir_atan(nir_builder
*b
, nir_ssa_def
*y_over_x
)
195 const uint32_t bit_size
= y_over_x
->bit_size
;
197 nir_ssa_def
*abs_y_over_x
= nir_fabs(b
, y_over_x
);
198 nir_ssa_def
*one
= nir_imm_floatN_t(b
, 1.0f
, bit_size
);
201 * range-reduction, first step:
203 * / y_over_x if |y_over_x| <= 1.0;
205 * \ 1.0 / y_over_x otherwise
207 nir_ssa_def
*x
= nir_fdiv(b
, nir_fmin(b
, abs_y_over_x
, one
),
208 nir_fmax(b
, abs_y_over_x
, one
));
211 * approximate atan by evaluating polynomial:
213 * x * 0.9999793128310355 - x^3 * 0.3326756418091246 +
214 * x^5 * 0.1938924977115610 - x^7 * 0.1173503194786851 +
215 * x^9 * 0.0536813784310406 - x^11 * 0.0121323213173444
217 nir_ssa_def
*x_2
= nir_fmul(b
, x
, x
);
218 nir_ssa_def
*x_3
= nir_fmul(b
, x_2
, x
);
219 nir_ssa_def
*x_5
= nir_fmul(b
, x_3
, x_2
);
220 nir_ssa_def
*x_7
= nir_fmul(b
, x_5
, x_2
);
221 nir_ssa_def
*x_9
= nir_fmul(b
, x_7
, x_2
);
222 nir_ssa_def
*x_11
= nir_fmul(b
, x_9
, x_2
);
224 nir_ssa_def
*polynomial_terms
[] = {
225 nir_fmul_imm(b
, x
, 0.9999793128310355f
),
226 nir_fmul_imm(b
, x_3
, -0.3326756418091246f
),
227 nir_fmul_imm(b
, x_5
, 0.1938924977115610f
),
228 nir_fmul_imm(b
, x_7
, -0.1173503194786851f
),
229 nir_fmul_imm(b
, x_9
, 0.0536813784310406f
),
230 nir_fmul_imm(b
, x_11
, -0.0121323213173444f
),
234 build_fsum(b
, polynomial_terms
, ARRAY_SIZE(polynomial_terms
));
236 /* range-reduction fixup */
237 tmp
= nir_fadd(b
, tmp
,
238 nir_fmul(b
, nir_b2f(b
, nir_flt(b
, one
, abs_y_over_x
), bit_size
),
239 nir_fadd_imm(b
, nir_fmul_imm(b
, tmp
, -2.0f
), M_PI_2
)));
242 return nir_fmul(b
, tmp
, nir_fsign(b
, y_over_x
));
246 nir_atan2(nir_builder
*b
, nir_ssa_def
*y
, nir_ssa_def
*x
)
248 assert(y
->bit_size
== x
->bit_size
);
249 const uint32_t bit_size
= x
->bit_size
;
251 nir_ssa_def
*zero
= nir_imm_floatN_t(b
, 0, bit_size
);
252 nir_ssa_def
*one
= nir_imm_floatN_t(b
, 1, bit_size
);
254 /* If we're on the left half-plane rotate the coordinates π/2 clock-wise
255 * for the y=0 discontinuity to end up aligned with the vertical
256 * discontinuity of atan(s/t) along t=0. This also makes sure that we
257 * don't attempt to divide by zero along the vertical line, which may give
258 * unspecified results on non-GLSL 4.1-capable hardware.
260 nir_ssa_def
*flip
= nir_fge(b
, zero
, x
);
261 nir_ssa_def
*s
= nir_bcsel(b
, flip
, nir_fabs(b
, x
), y
);
262 nir_ssa_def
*t
= nir_bcsel(b
, flip
, y
, nir_fabs(b
, x
));
264 /* If the magnitude of the denominator exceeds some huge value, scale down
265 * the arguments in order to prevent the reciprocal operation from flushing
266 * its result to zero, which would cause precision problems, and for s
267 * infinite would cause us to return a NaN instead of the correct finite
270 * If fmin and fmax are respectively the smallest and largest positive
271 * normalized floating point values representable by the implementation,
272 * the constants below should be in agreement with:
275 * scale <= 1 / fmin / fmax (for |t| >= huge)
277 * In addition scale should be a negative power of two in order to avoid
278 * loss of precision. The values chosen below should work for most usual
279 * floating point representations with at least the dynamic range of ATI's
280 * 24-bit representation.
282 const double huge_val
= bit_size
>= 32 ? 1e18
: 16384;
283 nir_ssa_def
*huge
= nir_imm_floatN_t(b
, huge_val
, bit_size
);
284 nir_ssa_def
*scale
= nir_bcsel(b
, nir_fge(b
, nir_fabs(b
, t
), huge
),
285 nir_imm_floatN_t(b
, 0.25, bit_size
), one
);
286 nir_ssa_def
*rcp_scaled_t
= nir_frcp(b
, nir_fmul(b
, t
, scale
));
287 nir_ssa_def
*s_over_t
= nir_fmul(b
, nir_fmul(b
, s
, scale
), rcp_scaled_t
);
289 /* For |x| = |y| assume tan = 1 even if infinite (i.e. pretend momentarily
290 * that ∞/∞ = 1) in order to comply with the rather artificial rules
291 * inherited from IEEE 754-2008, namely:
293 * "atan2(±∞, −∞) is ±3π/4
294 * atan2(±∞, +∞) is ±π/4"
296 * Note that this is inconsistent with the rules for the neighborhood of
297 * zero that are based on iterated limits:
299 * "atan2(±0, −0) is ±π
300 * atan2(±0, +0) is ±0"
302 * but GLSL specifically allows implementations to deviate from IEEE rules
303 * at (0,0), so we take that license (i.e. pretend that 0/0 = 1 here as
306 nir_ssa_def
*tan
= nir_bcsel(b
, nir_feq(b
, nir_fabs(b
, x
), nir_fabs(b
, y
)),
307 one
, nir_fabs(b
, s_over_t
));
309 /* Calculate the arctangent and fix up the result if we had flipped the
313 nir_fadd(b
, nir_fmul_imm(b
, nir_b2f(b
, flip
, bit_size
), M_PI_2
),
316 /* Rather convoluted calculation of the sign of the result. When x < 0 we
317 * cannot use fsign because we need to be able to distinguish between
318 * negative and positive zero. We don't use bitwise arithmetic tricks for
319 * consistency with the GLSL front-end. When x >= 0 rcp_scaled_t will
320 * always be non-negative so this won't be able to distinguish between
321 * negative and positive zero, but we don't care because atan2 is
322 * continuous along the whole positive y = 0 half-line, so it won't affect
323 * the result significantly.
325 return nir_bcsel(b
, nir_flt(b
, nir_fmin(b
, y
, rcp_scaled_t
), zero
),
326 nir_fneg(b
, arc
), arc
);
330 nir_get_texture_size(nir_builder
*b
, nir_tex_instr
*tex
)
332 b
->cursor
= nir_before_instr(&tex
->instr
);
336 unsigned num_srcs
= 1; /* One for the LOD */
337 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
338 if (tex
->src
[i
].src_type
== nir_tex_src_texture_deref
||
339 tex
->src
[i
].src_type
== nir_tex_src_sampler_deref
||
340 tex
->src
[i
].src_type
== nir_tex_src_texture_offset
||
341 tex
->src
[i
].src_type
== nir_tex_src_sampler_offset
||
342 tex
->src
[i
].src_type
== nir_tex_src_texture_handle
||
343 tex
->src
[i
].src_type
== nir_tex_src_sampler_handle
)
347 txs
= nir_tex_instr_create(b
->shader
, num_srcs
);
348 txs
->op
= nir_texop_txs
;
349 txs
->sampler_dim
= tex
->sampler_dim
;
350 txs
->is_array
= tex
->is_array
;
351 txs
->is_shadow
= tex
->is_shadow
;
352 txs
->is_new_style_shadow
= tex
->is_new_style_shadow
;
353 txs
->texture_index
= tex
->texture_index
;
354 txs
->sampler_index
= tex
->sampler_index
;
355 txs
->dest_type
= nir_type_int
;
358 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
359 if (tex
->src
[i
].src_type
== nir_tex_src_texture_deref
||
360 tex
->src
[i
].src_type
== nir_tex_src_sampler_deref
||
361 tex
->src
[i
].src_type
== nir_tex_src_texture_offset
||
362 tex
->src
[i
].src_type
== nir_tex_src_sampler_offset
||
363 tex
->src
[i
].src_type
== nir_tex_src_texture_handle
||
364 tex
->src
[i
].src_type
== nir_tex_src_sampler_handle
) {
365 nir_src_copy(&txs
->src
[idx
].src
, &tex
->src
[i
].src
, txs
);
366 txs
->src
[idx
].src_type
= tex
->src
[i
].src_type
;
370 /* Add in an LOD because some back-ends require it */
371 txs
->src
[idx
].src
= nir_src_for_ssa(nir_imm_int(b
, 0));
372 txs
->src
[idx
].src_type
= nir_tex_src_lod
;
374 nir_ssa_dest_init(&txs
->instr
, &txs
->dest
,
375 nir_tex_instr_dest_size(txs
), 32, NULL
);
376 nir_builder_instr_insert(b
, &txs
->instr
);
378 return &txs
->dest
.ssa
;
382 nir_get_texture_lod(nir_builder
*b
, nir_tex_instr
*tex
)
384 b
->cursor
= nir_before_instr(&tex
->instr
);
388 unsigned num_srcs
= 0;
389 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
390 if (tex
->src
[i
].src_type
== nir_tex_src_coord
||
391 tex
->src
[i
].src_type
== nir_tex_src_texture_deref
||
392 tex
->src
[i
].src_type
== nir_tex_src_sampler_deref
||
393 tex
->src
[i
].src_type
== nir_tex_src_texture_offset
||
394 tex
->src
[i
].src_type
== nir_tex_src_sampler_offset
||
395 tex
->src
[i
].src_type
== nir_tex_src_texture_handle
||
396 tex
->src
[i
].src_type
== nir_tex_src_sampler_handle
)
400 tql
= nir_tex_instr_create(b
->shader
, num_srcs
);
401 tql
->op
= nir_texop_lod
;
402 tql
->coord_components
= tex
->coord_components
;
403 tql
->sampler_dim
= tex
->sampler_dim
;
404 tql
->is_array
= tex
->is_array
;
405 tql
->is_shadow
= tex
->is_shadow
;
406 tql
->is_new_style_shadow
= tex
->is_new_style_shadow
;
407 tql
->texture_index
= tex
->texture_index
;
408 tql
->sampler_index
= tex
->sampler_index
;
409 tql
->dest_type
= nir_type_float
;
412 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
413 if (tex
->src
[i
].src_type
== nir_tex_src_coord
||
414 tex
->src
[i
].src_type
== nir_tex_src_texture_deref
||
415 tex
->src
[i
].src_type
== nir_tex_src_sampler_deref
||
416 tex
->src
[i
].src_type
== nir_tex_src_texture_offset
||
417 tex
->src
[i
].src_type
== nir_tex_src_sampler_offset
||
418 tex
->src
[i
].src_type
== nir_tex_src_texture_handle
||
419 tex
->src
[i
].src_type
== nir_tex_src_sampler_handle
) {
420 nir_src_copy(&tql
->src
[idx
].src
, &tex
->src
[i
].src
, tql
);
421 tql
->src
[idx
].src_type
= tex
->src
[i
].src_type
;
426 nir_ssa_dest_init(&tql
->instr
, &tql
->dest
, 2, 32, NULL
);
427 nir_builder_instr_insert(b
, &tql
->instr
);
429 /* The LOD is the y component of the result */
430 return nir_channel(b
, &tql
->dest
.ssa
, 1);