2 * Copyright © 2018 Red Hat Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27 #include "nir_builtin_builder.h"
30 nir_cross3(nir_builder
*b
, nir_ssa_def
*x
, nir_ssa_def
*y
)
32 unsigned yzx
[3] = { 1, 2, 0 };
33 unsigned zxy
[3] = { 2, 0, 1 };
35 return nir_fsub(b
, nir_fmul(b
, nir_swizzle(b
, x
, yzx
, 3, true),
36 nir_swizzle(b
, y
, zxy
, 3, true)),
37 nir_fmul(b
, nir_swizzle(b
, x
, zxy
, 3, true),
38 nir_swizzle(b
, y
, yzx
, 3, true)));
42 nir_cross4(nir_builder
*b
, nir_ssa_def
*x
, nir_ssa_def
*y
)
44 nir_ssa_def
*cross
= nir_cross3(b
, x
, y
);
47 nir_channel(b
, cross
, 0),
48 nir_channel(b
, cross
, 1),
49 nir_channel(b
, cross
, 2),
50 nir_imm_intN_t(b
, 0, cross
->bit_size
));
54 nir_length(nir_builder
*b
, nir_ssa_def
*vec
)
56 nir_ssa_def
*finf
= nir_imm_floatN_t(b
, INFINITY
, vec
->bit_size
);
58 nir_ssa_def
*abs
= nir_fabs(b
, vec
);
59 if (vec
->num_components
== 1)
62 nir_ssa_def
*maxc
= nir_fmax_abs_vec_comp(b
, abs
);
63 abs
= nir_fdiv(b
, abs
, maxc
);
64 nir_ssa_def
*res
= nir_fmul(b
, nir_fsqrt(b
, nir_fdot(b
, abs
, abs
)), maxc
);
65 return nir_bcsel(b
, nir_feq(b
, maxc
, finf
), maxc
, res
);
69 nir_fast_length(nir_builder
*b
, nir_ssa_def
*vec
)
71 switch (vec
->num_components
) {
72 case 1: return nir_fsqrt(b
, nir_fmul(b
, vec
, vec
));
73 case 2: return nir_fsqrt(b
, nir_fdot2(b
, vec
, vec
));
74 case 3: return nir_fsqrt(b
, nir_fdot3(b
, vec
, vec
));
75 case 4: return nir_fsqrt(b
, nir_fdot4(b
, vec
, vec
));
77 unreachable("Invalid number of components");
82 nir_nextafter(nir_builder
*b
, nir_ssa_def
*x
, nir_ssa_def
*y
)
84 nir_ssa_def
*zero
= nir_imm_intN_t(b
, 0, x
->bit_size
);
85 nir_ssa_def
*one
= nir_imm_intN_t(b
, 1, x
->bit_size
);
87 nir_ssa_def
*condeq
= nir_feq(b
, x
, y
);
88 nir_ssa_def
*conddir
= nir_flt(b
, x
, y
);
89 nir_ssa_def
*condzero
= nir_feq(b
, x
, zero
);
91 /* beware of: +/-0.0 - 1 == NaN */
95 nir_imm_intN_t(b
, (1 << (x
->bit_size
- 1)) + 1, x
->bit_size
),
98 /* beware of -0.0 + 1 == -0x1p-149 */
99 nir_ssa_def
*xp
= nir_bcsel(b
, condzero
, one
, nir_iadd(b
, x
, one
));
101 /* nextafter can be implemented by just +/- 1 on the int value */
103 nir_bcsel(b
, nir_ixor(b
, conddir
, nir_flt(b
, x
, zero
)), xp
, xn
);
105 return nir_nan_check2(b
, x
, y
, nir_bcsel(b
, condeq
, x
, res
));
109 nir_normalize(nir_builder
*b
, nir_ssa_def
*vec
)
111 if (vec
->num_components
== 1)
112 return nir_fsign(b
, vec
);
114 nir_ssa_def
*f0
= nir_imm_floatN_t(b
, 0.0, vec
->bit_size
);
115 nir_ssa_def
*f1
= nir_imm_floatN_t(b
, 1.0, vec
->bit_size
);
116 nir_ssa_def
*finf
= nir_imm_floatN_t(b
, INFINITY
, vec
->bit_size
);
118 /* scale the input to increase precision */
119 nir_ssa_def
*maxc
= nir_fmax_abs_vec_comp(b
, vec
);
120 nir_ssa_def
*svec
= nir_fdiv(b
, vec
, maxc
);
122 nir_ssa_def
*finfvec
= nir_copysign(b
, nir_bcsel(b
, nir_feq(b
, vec
, finf
), f1
, f0
), f1
);
124 nir_ssa_def
*temp
= nir_bcsel(b
, nir_feq(b
, maxc
, finf
), finfvec
, svec
);
125 nir_ssa_def
*res
= nir_fmul(b
, temp
, nir_frsq(b
, nir_fdot(b
, temp
, temp
)));
127 return nir_bcsel(b
, nir_feq(b
, maxc
, f0
), vec
, res
);
131 nir_rotate(nir_builder
*b
, nir_ssa_def
*x
, nir_ssa_def
*y
)
133 nir_ssa_def
*shift_mask
= nir_imm_int(b
, x
->bit_size
- 1);
135 if (y
->bit_size
!= 32)
138 nir_ssa_def
*lshift
= nir_iand(b
, y
, shift_mask
);
139 nir_ssa_def
*rshift
= nir_isub(b
, nir_imm_int(b
, x
->bit_size
), lshift
);
141 nir_ssa_def
*hi
= nir_ishl(b
, x
, lshift
);
142 nir_ssa_def
*lo
= nir_ushr(b
, x
, rshift
);
144 return nir_ior(b
, hi
, lo
);
148 nir_smoothstep(nir_builder
*b
, nir_ssa_def
*edge0
, nir_ssa_def
*edge1
, nir_ssa_def
*x
)
150 nir_ssa_def
*f2
= nir_imm_floatN_t(b
, 2.0, x
->bit_size
);
151 nir_ssa_def
*f3
= nir_imm_floatN_t(b
, 3.0, x
->bit_size
);
153 /* t = clamp((x - edge0) / (edge1 - edge0), 0, 1) */
155 nir_fsat(b
, nir_fdiv(b
, nir_fsub(b
, x
, edge0
),
156 nir_fsub(b
, edge1
, edge0
)));
158 /* result = t * t * (3 - 2 * t) */
159 return nir_fmul(b
, t
, nir_fmul(b
, t
, nir_fsub(b
, f3
, nir_fmul(b
, f2
, t
))));
163 nir_upsample(nir_builder
*b
, nir_ssa_def
*hi
, nir_ssa_def
*lo
)
165 assert(lo
->num_components
== hi
->num_components
);
166 assert(lo
->bit_size
== hi
->bit_size
);
168 nir_ssa_def
*res
[NIR_MAX_VEC_COMPONENTS
];
169 for (unsigned i
= 0; i
< lo
->num_components
; ++i
) {
170 nir_ssa_def
*vec
= nir_vec2(b
, nir_channel(b
, lo
, i
), nir_channel(b
, hi
, i
));
171 res
[i
] = nir_pack_bits(b
, vec
, vec
->bit_size
* 2);
174 return nir_vec(b
, res
, lo
->num_components
);