2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Jason Ekstrand (jason@jlekstrand.net)
28 #include "vtn_private.h"
29 #include "GLSL.std.450.h"
31 #define M_PIf ((float) M_PI)
32 #define M_PI_2f ((float) M_PI_2)
33 #define M_PI_4f ((float) M_PI_4)
36 build_mat2_det(nir_builder
*b
, nir_ssa_def
*col
[2])
38 unsigned swiz
[4] = {1, 0, 0, 0};
39 nir_ssa_def
*p
= nir_fmul(b
, col
[0], nir_swizzle(b
, col
[1], swiz
, 2, true));
40 return nir_fsub(b
, nir_channel(b
, p
, 0), nir_channel(b
, p
, 1));
44 build_mat3_det(nir_builder
*b
, nir_ssa_def
*col
[3])
46 unsigned yzx
[4] = {1, 2, 0, 0};
47 unsigned zxy
[4] = {2, 0, 1, 0};
51 nir_fmul(b
, nir_swizzle(b
, col
[1], yzx
, 3, true),
52 nir_swizzle(b
, col
[2], zxy
, 3, true)));
55 nir_fmul(b
, nir_swizzle(b
, col
[1], zxy
, 3, true),
56 nir_swizzle(b
, col
[2], yzx
, 3, true)));
58 nir_ssa_def
*diff
= nir_fsub(b
, prod0
, prod1
);
60 return nir_fadd(b
, nir_channel(b
, diff
, 0),
61 nir_fadd(b
, nir_channel(b
, diff
, 1),
62 nir_channel(b
, diff
, 2)));
66 build_mat4_det(nir_builder
*b
, nir_ssa_def
**col
)
68 nir_ssa_def
*subdet
[4];
69 for (unsigned i
= 0; i
< 4; i
++) {
71 for (unsigned j
= 0; j
< 3; j
++)
72 swiz
[j
] = j
+ (j
>= i
);
74 nir_ssa_def
*subcol
[3];
75 subcol
[0] = nir_swizzle(b
, col
[1], swiz
, 3, true);
76 subcol
[1] = nir_swizzle(b
, col
[2], swiz
, 3, true);
77 subcol
[2] = nir_swizzle(b
, col
[3], swiz
, 3, true);
79 subdet
[i
] = build_mat3_det(b
, subcol
);
82 nir_ssa_def
*prod
= nir_fmul(b
, col
[0], nir_vec(b
, subdet
, 4));
84 return nir_fadd(b
, nir_fsub(b
, nir_channel(b
, prod
, 0),
85 nir_channel(b
, prod
, 1)),
86 nir_fsub(b
, nir_channel(b
, prod
, 2),
87 nir_channel(b
, prod
, 3)));
91 build_mat_det(struct vtn_builder
*b
, struct vtn_ssa_value
*src
)
93 unsigned size
= glsl_get_vector_elements(src
->type
);
96 for (unsigned i
= 0; i
< size
; i
++)
97 cols
[i
] = src
->elems
[i
]->def
;
100 case 2: return build_mat2_det(&b
->nb
, cols
);
101 case 3: return build_mat3_det(&b
->nb
, cols
);
102 case 4: return build_mat4_det(&b
->nb
, cols
);
104 unreachable("Invalid matrix size");
108 /* Computes the determinate of the submatrix given by taking src and
109 * removing the specified row and column.
112 build_mat_subdet(struct nir_builder
*b
, struct vtn_ssa_value
*src
,
113 unsigned size
, unsigned row
, unsigned col
)
115 assert(row
< size
&& col
< size
);
117 return nir_channel(b
, src
->elems
[1 - col
]->def
, 1 - row
);
119 /* Swizzle to get all but the specified row */
121 for (unsigned j
= 0; j
< 3; j
++)
122 swiz
[j
] = j
+ (j
>= row
);
124 /* Grab all but the specified column */
125 nir_ssa_def
*subcol
[3];
126 for (unsigned j
= 0; j
< size
; j
++) {
128 subcol
[j
- (j
> col
)] = nir_swizzle(b
, src
->elems
[j
]->def
,
129 swiz
, size
- 1, true);
134 return build_mat2_det(b
, subcol
);
137 return build_mat3_det(b
, subcol
);
142 static struct vtn_ssa_value
*
143 matrix_inverse(struct vtn_builder
*b
, struct vtn_ssa_value
*src
)
145 nir_ssa_def
*adj_col
[4];
146 unsigned size
= glsl_get_vector_elements(src
->type
);
148 /* Build up an adjugate matrix */
149 for (unsigned c
= 0; c
< size
; c
++) {
150 nir_ssa_def
*elem
[4];
151 for (unsigned r
= 0; r
< size
; r
++) {
152 elem
[r
] = build_mat_subdet(&b
->nb
, src
, size
, c
, r
);
155 elem
[r
] = nir_fneg(&b
->nb
, elem
[r
]);
158 adj_col
[c
] = nir_vec(&b
->nb
, elem
, size
);
161 nir_ssa_def
*det_inv
= nir_frcp(&b
->nb
, build_mat_det(b
, src
));
163 struct vtn_ssa_value
*val
= vtn_create_ssa_value(b
, src
->type
);
164 for (unsigned i
= 0; i
< size
; i
++)
165 val
->elems
[i
]->def
= nir_fmul(&b
->nb
, adj_col
[i
], det_inv
);
171 build_length(nir_builder
*b
, nir_ssa_def
*vec
)
173 switch (vec
->num_components
) {
174 case 1: return nir_fsqrt(b
, nir_fmul(b
, vec
, vec
));
175 case 2: return nir_fsqrt(b
, nir_fdot2(b
, vec
, vec
));
176 case 3: return nir_fsqrt(b
, nir_fdot3(b
, vec
, vec
));
177 case 4: return nir_fsqrt(b
, nir_fdot4(b
, vec
, vec
));
179 unreachable("Invalid number of components");
183 static inline nir_ssa_def
*
184 build_fclamp(nir_builder
*b
,
185 nir_ssa_def
*x
, nir_ssa_def
*min_val
, nir_ssa_def
*max_val
)
187 return nir_fmin(b
, nir_fmax(b
, x
, min_val
), max_val
);
194 build_exp(nir_builder
*b
, nir_ssa_def
*x
)
196 return nir_fexp2(b
, nir_fmul(b
, x
, nir_imm_float(b
, M_LOG2E
)));
200 * Return ln(x) - the natural logarithm of x.
203 build_log(nir_builder
*b
, nir_ssa_def
*x
)
205 return nir_fmul(b
, nir_flog2(b
, x
), nir_imm_float(b
, 1.0 / M_LOG2E
));
209 * Approximate asin(x) by the formula:
210 * asin~(x) = sign(x) * (pi/2 - sqrt(1 - |x|) * (pi/2 + |x|(pi/4 - 1 + |x|(p0 + |x|p1))))
212 * which is correct to first order at x=0 and x=±1 regardless of the p
213 * coefficients but can be made second-order correct at both ends by selecting
214 * the fit coefficients appropriately. Different p coefficients can be used
215 * in the asin and acos implementation to minimize some relative error metric
219 build_asin(nir_builder
*b
, nir_ssa_def
*x
, float p0
, float p1
)
221 nir_ssa_def
*abs_x
= nir_fabs(b
, x
);
222 return nir_fmul(b
, nir_fsign(b
, x
),
223 nir_fsub(b
, nir_imm_float(b
, M_PI_2f
),
224 nir_fmul(b
, nir_fsqrt(b
, nir_fsub(b
, nir_imm_float(b
, 1.0f
), abs_x
)),
225 nir_fadd(b
, nir_imm_float(b
, M_PI_2f
),
227 nir_fadd(b
, nir_imm_float(b
, M_PI_4f
- 1.0f
),
229 nir_fadd(b
, nir_imm_float(b
, p0
),
231 nir_imm_float(b
, p1
))))))))));
235 * Compute xs[0] + xs[1] + xs[2] + ... using fadd.
238 build_fsum(nir_builder
*b
, nir_ssa_def
**xs
, int terms
)
240 nir_ssa_def
*accum
= xs
[0];
242 for (int i
= 1; i
< terms
; i
++)
243 accum
= nir_fadd(b
, accum
, xs
[i
]);
249 build_atan(nir_builder
*b
, nir_ssa_def
*y_over_x
)
251 nir_ssa_def
*abs_y_over_x
= nir_fabs(b
, y_over_x
);
252 nir_ssa_def
*one
= nir_imm_float(b
, 1.0f
);
255 * range-reduction, first step:
257 * / y_over_x if |y_over_x| <= 1.0;
259 * \ 1.0 / y_over_x otherwise
261 nir_ssa_def
*x
= nir_fdiv(b
, nir_fmin(b
, abs_y_over_x
, one
),
262 nir_fmax(b
, abs_y_over_x
, one
));
265 * approximate atan by evaluating polynomial:
267 * x * 0.9999793128310355 - x^3 * 0.3326756418091246 +
268 * x^5 * 0.1938924977115610 - x^7 * 0.1173503194786851 +
269 * x^9 * 0.0536813784310406 - x^11 * 0.0121323213173444
271 nir_ssa_def
*x_2
= nir_fmul(b
, x
, x
);
272 nir_ssa_def
*x_3
= nir_fmul(b
, x_2
, x
);
273 nir_ssa_def
*x_5
= nir_fmul(b
, x_3
, x_2
);
274 nir_ssa_def
*x_7
= nir_fmul(b
, x_5
, x_2
);
275 nir_ssa_def
*x_9
= nir_fmul(b
, x_7
, x_2
);
276 nir_ssa_def
*x_11
= nir_fmul(b
, x_9
, x_2
);
278 nir_ssa_def
*polynomial_terms
[] = {
279 nir_fmul(b
, x
, nir_imm_float(b
, 0.9999793128310355f
)),
280 nir_fmul(b
, x_3
, nir_imm_float(b
, -0.3326756418091246f
)),
281 nir_fmul(b
, x_5
, nir_imm_float(b
, 0.1938924977115610f
)),
282 nir_fmul(b
, x_7
, nir_imm_float(b
, -0.1173503194786851f
)),
283 nir_fmul(b
, x_9
, nir_imm_float(b
, 0.0536813784310406f
)),
284 nir_fmul(b
, x_11
, nir_imm_float(b
, -0.0121323213173444f
)),
288 build_fsum(b
, polynomial_terms
, ARRAY_SIZE(polynomial_terms
));
290 /* range-reduction fixup */
291 tmp
= nir_fadd(b
, tmp
,
293 nir_b2f(b
, nir_flt(b
, one
, abs_y_over_x
)),
294 nir_fadd(b
, nir_fmul(b
, tmp
,
295 nir_imm_float(b
, -2.0f
)),
296 nir_imm_float(b
, M_PI_2f
))));
299 return nir_fmul(b
, tmp
, nir_fsign(b
, y_over_x
));
303 build_atan2(nir_builder
*b
, nir_ssa_def
*y
, nir_ssa_def
*x
)
305 nir_ssa_def
*zero
= nir_imm_float(b
, 0.0f
);
307 /* If |x| >= 1.0e-8 * |y|: */
308 nir_ssa_def
*condition
=
309 nir_fge(b
, nir_fabs(b
, x
),
310 nir_fmul(b
, nir_imm_float(b
, 1.0e-8f
), nir_fabs(b
, y
)));
312 /* Then...call atan(y/x) and fix it up: */
313 nir_ssa_def
*atan1
= build_atan(b
, nir_fdiv(b
, y
, x
));
314 nir_ssa_def
*r_then
=
315 nir_bcsel(b
, nir_flt(b
, x
, zero
),
317 nir_bcsel(b
, nir_fge(b
, y
, zero
),
318 nir_imm_float(b
, M_PIf
),
319 nir_imm_float(b
, -M_PIf
))),
323 nir_ssa_def
*r_else
=
324 nir_fmul(b
, nir_fsign(b
, y
), nir_imm_float(b
, M_PI_2f
));
326 return nir_bcsel(b
, condition
, r_then
, r_else
);
330 build_frexp(nir_builder
*b
, nir_ssa_def
*x
, nir_ssa_def
**exponent
)
332 nir_ssa_def
*abs_x
= nir_fabs(b
, x
);
333 nir_ssa_def
*zero
= nir_imm_float(b
, 0.0f
);
335 /* Single-precision floating-point values are stored as
340 * An exponent shift of 23 will shift the mantissa out, leaving only the
341 * exponent and sign bit (which itself may be zero, if the absolute value
342 * was taken before the bitcast and shift.
344 nir_ssa_def
*exponent_shift
= nir_imm_int(b
, 23);
345 nir_ssa_def
*exponent_bias
= nir_imm_int(b
, -126);
347 nir_ssa_def
*sign_mantissa_mask
= nir_imm_int(b
, 0x807fffffu
);
349 /* Exponent of floating-point values in the range [0.5, 1.0). */
350 nir_ssa_def
*exponent_value
= nir_imm_int(b
, 0x3f000000u
);
352 nir_ssa_def
*is_not_zero
= nir_fne(b
, abs_x
, zero
);
355 nir_iadd(b
, nir_ushr(b
, abs_x
, exponent_shift
),
356 nir_bcsel(b
, is_not_zero
, exponent_bias
, zero
));
358 return nir_ior(b
, nir_iand(b
, x
, sign_mantissa_mask
),
359 nir_bcsel(b
, is_not_zero
, exponent_value
, zero
));
363 vtn_nir_alu_op_for_spirv_glsl_opcode(enum GLSLstd450 opcode
)
366 case GLSLstd450Round
: return nir_op_fround_even
;
367 case GLSLstd450RoundEven
: return nir_op_fround_even
;
368 case GLSLstd450Trunc
: return nir_op_ftrunc
;
369 case GLSLstd450FAbs
: return nir_op_fabs
;
370 case GLSLstd450SAbs
: return nir_op_iabs
;
371 case GLSLstd450FSign
: return nir_op_fsign
;
372 case GLSLstd450SSign
: return nir_op_isign
;
373 case GLSLstd450Floor
: return nir_op_ffloor
;
374 case GLSLstd450Ceil
: return nir_op_fceil
;
375 case GLSLstd450Fract
: return nir_op_ffract
;
376 case GLSLstd450Sin
: return nir_op_fsin
;
377 case GLSLstd450Cos
: return nir_op_fcos
;
378 case GLSLstd450Pow
: return nir_op_fpow
;
379 case GLSLstd450Exp2
: return nir_op_fexp2
;
380 case GLSLstd450Log2
: return nir_op_flog2
;
381 case GLSLstd450Sqrt
: return nir_op_fsqrt
;
382 case GLSLstd450InverseSqrt
: return nir_op_frsq
;
383 case GLSLstd450FMin
: return nir_op_fmin
;
384 case GLSLstd450UMin
: return nir_op_umin
;
385 case GLSLstd450SMin
: return nir_op_imin
;
386 case GLSLstd450FMax
: return nir_op_fmax
;
387 case GLSLstd450UMax
: return nir_op_umax
;
388 case GLSLstd450SMax
: return nir_op_imax
;
389 case GLSLstd450FMix
: return nir_op_flrp
;
390 case GLSLstd450Fma
: return nir_op_ffma
;
391 case GLSLstd450Ldexp
: return nir_op_ldexp
;
392 case GLSLstd450FindILsb
: return nir_op_find_lsb
;
393 case GLSLstd450FindSMsb
: return nir_op_ifind_msb
;
394 case GLSLstd450FindUMsb
: return nir_op_ufind_msb
;
396 /* Packing/Unpacking functions */
397 case GLSLstd450PackSnorm4x8
: return nir_op_pack_snorm_4x8
;
398 case GLSLstd450PackUnorm4x8
: return nir_op_pack_unorm_4x8
;
399 case GLSLstd450PackSnorm2x16
: return nir_op_pack_snorm_2x16
;
400 case GLSLstd450PackUnorm2x16
: return nir_op_pack_unorm_2x16
;
401 case GLSLstd450PackHalf2x16
: return nir_op_pack_half_2x16
;
402 case GLSLstd450UnpackSnorm4x8
: return nir_op_unpack_snorm_4x8
;
403 case GLSLstd450UnpackUnorm4x8
: return nir_op_unpack_unorm_4x8
;
404 case GLSLstd450UnpackSnorm2x16
: return nir_op_unpack_snorm_2x16
;
405 case GLSLstd450UnpackUnorm2x16
: return nir_op_unpack_unorm_2x16
;
406 case GLSLstd450UnpackHalf2x16
: return nir_op_unpack_half_2x16
;
409 unreachable("No NIR equivalent");
414 handle_glsl450_alu(struct vtn_builder
*b
, enum GLSLstd450 entrypoint
,
415 const uint32_t *w
, unsigned count
)
417 struct nir_builder
*nb
= &b
->nb
;
418 const struct glsl_type
*dest_type
=
419 vtn_value(b
, w
[1], vtn_value_type_type
)->type
->type
;
421 struct vtn_value
*val
= vtn_push_value(b
, w
[2], vtn_value_type_ssa
);
422 val
->ssa
= vtn_create_ssa_value(b
, dest_type
);
424 /* Collect the various SSA sources */
425 unsigned num_inputs
= count
- 5;
426 nir_ssa_def
*src
[3] = { NULL
, };
427 for (unsigned i
= 0; i
< num_inputs
; i
++)
428 src
[i
] = vtn_ssa_value(b
, w
[i
+ 5])->def
;
430 switch (entrypoint
) {
431 case GLSLstd450Radians
:
432 val
->ssa
->def
= nir_fmul(nb
, src
[0], nir_imm_float(nb
, 0.01745329251));
434 case GLSLstd450Degrees
:
435 val
->ssa
->def
= nir_fmul(nb
, src
[0], nir_imm_float(nb
, 57.2957795131));
438 val
->ssa
->def
= nir_fdiv(nb
, nir_fsin(nb
, src
[0]),
439 nir_fcos(nb
, src
[0]));
442 case GLSLstd450Modf
: {
443 nir_ssa_def
*sign
= nir_fsign(nb
, src
[0]);
444 nir_ssa_def
*abs
= nir_fabs(nb
, src
[0]);
445 val
->ssa
->def
= nir_fmul(nb
, sign
, nir_ffract(nb
, abs
));
446 nir_store_deref_var(nb
, vtn_nir_deref(b
, w
[6]),
447 nir_fmul(nb
, sign
, nir_ffloor(nb
, abs
)), 0xf);
451 case GLSLstd450ModfStruct
: {
452 nir_ssa_def
*sign
= nir_fsign(nb
, src
[0]);
453 nir_ssa_def
*abs
= nir_fabs(nb
, src
[0]);
454 assert(glsl_type_is_struct(val
->ssa
->type
));
455 val
->ssa
->elems
[0]->def
= nir_fmul(nb
, sign
, nir_ffract(nb
, abs
));
456 val
->ssa
->elems
[1]->def
= nir_fmul(nb
, sign
, nir_ffloor(nb
, abs
));
461 val
->ssa
->def
= nir_sge(nb
, src
[1], src
[0]);
464 case GLSLstd450Length
:
465 val
->ssa
->def
= build_length(nb
, src
[0]);
467 case GLSLstd450Distance
:
468 val
->ssa
->def
= build_length(nb
, nir_fsub(nb
, src
[0], src
[1]));
470 case GLSLstd450Normalize
:
471 val
->ssa
->def
= nir_fdiv(nb
, src
[0], build_length(nb
, src
[0]));
475 val
->ssa
->def
= build_exp(nb
, src
[0]);
479 val
->ssa
->def
= build_log(nb
, src
[0]);
482 case GLSLstd450FClamp
:
483 val
->ssa
->def
= build_fclamp(nb
, src
[0], src
[1], src
[2]);
485 case GLSLstd450UClamp
:
486 val
->ssa
->def
= nir_umin(nb
, nir_umax(nb
, src
[0], src
[1]), src
[2]);
488 case GLSLstd450SClamp
:
489 val
->ssa
->def
= nir_imin(nb
, nir_imax(nb
, src
[0], src
[1]), src
[2]);
492 case GLSLstd450Cross
: {
493 unsigned yzx
[4] = { 1, 2, 0, 0 };
494 unsigned zxy
[4] = { 2, 0, 1, 0 };
496 nir_fsub(nb
, nir_fmul(nb
, nir_swizzle(nb
, src
[0], yzx
, 3, true),
497 nir_swizzle(nb
, src
[1], zxy
, 3, true)),
498 nir_fmul(nb
, nir_swizzle(nb
, src
[0], zxy
, 3, true),
499 nir_swizzle(nb
, src
[1], yzx
, 3, true)));
503 case GLSLstd450SmoothStep
: {
504 /* t = clamp((x - edge0) / (edge1 - edge0), 0, 1) */
506 build_fclamp(nb
, nir_fdiv(nb
, nir_fsub(nb
, src
[2], src
[0]),
507 nir_fsub(nb
, src
[1], src
[0])),
508 nir_imm_float(nb
, 0.0), nir_imm_float(nb
, 1.0));
509 /* result = t * t * (3 - 2 * t) */
511 nir_fmul(nb
, t
, nir_fmul(nb
, t
,
512 nir_fsub(nb
, nir_imm_float(nb
, 3.0),
513 nir_fmul(nb
, nir_imm_float(nb
, 2.0), t
))));
517 case GLSLstd450FaceForward
:
519 nir_bcsel(nb
, nir_flt(nb
, nir_fdot(nb
, src
[2], src
[1]),
520 nir_imm_float(nb
, 0.0)),
521 src
[0], nir_fneg(nb
, src
[0]));
524 case GLSLstd450Reflect
:
525 /* I - 2 * dot(N, I) * N */
527 nir_fsub(nb
, src
[0], nir_fmul(nb
, nir_imm_float(nb
, 2.0),
528 nir_fmul(nb
, nir_fdot(nb
, src
[0], src
[1]),
532 case GLSLstd450Refract
: {
533 nir_ssa_def
*I
= src
[0];
534 nir_ssa_def
*N
= src
[1];
535 nir_ssa_def
*eta
= src
[2];
536 nir_ssa_def
*n_dot_i
= nir_fdot(nb
, N
, I
);
537 nir_ssa_def
*one
= nir_imm_float(nb
, 1.0);
538 nir_ssa_def
*zero
= nir_imm_float(nb
, 0.0);
539 /* k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I)) */
541 nir_fsub(nb
, one
, nir_fmul(nb
, eta
, nir_fmul(nb
, eta
,
542 nir_fsub(nb
, one
, nir_fmul(nb
, n_dot_i
, n_dot_i
)))));
543 nir_ssa_def
*result
=
544 nir_fsub(nb
, nir_fmul(nb
, eta
, I
),
545 nir_fmul(nb
, nir_fadd(nb
, nir_fmul(nb
, eta
, n_dot_i
),
546 nir_fsqrt(nb
, k
)), N
));
547 /* XXX: bcsel, or if statement? */
548 val
->ssa
->def
= nir_bcsel(nb
, nir_flt(nb
, k
, zero
), zero
, result
);
553 /* 0.5 * (e^x - e^(-x)) */
555 nir_fmul(nb
, nir_imm_float(nb
, 0.5f
),
556 nir_fsub(nb
, build_exp(nb
, src
[0]),
557 build_exp(nb
, nir_fneg(nb
, src
[0]))));
561 /* 0.5 * (e^x + e^(-x)) */
563 nir_fmul(nb
, nir_imm_float(nb
, 0.5f
),
564 nir_fadd(nb
, build_exp(nb
, src
[0]),
565 build_exp(nb
, nir_fneg(nb
, src
[0]))));
568 case GLSLstd450Tanh
: {
569 /* tanh(x) := (0.5 * (e^x - e^(-x))) / (0.5 * (e^x + e^(-x)))
571 * With a little algebra this reduces to (e^2x - 1) / (e^2x + 1)
573 * We clamp x to (-inf, +10] to avoid precision problems. When x > 10,
574 * e^2x is so much larger than 1.0 that 1.0 gets flushed to zero in the
575 * computation e^2x +/- 1 so it can be ignored.
577 nir_ssa_def
*x
= nir_fmin(nb
, src
[0], nir_imm_float(nb
, 10));
578 nir_ssa_def
*exp2x
= build_exp(nb
, nir_fmul(nb
, x
, nir_imm_float(nb
, 2)));
579 val
->ssa
->def
= nir_fdiv(nb
, nir_fsub(nb
, exp2x
, nir_imm_float(nb
, 1)),
580 nir_fadd(nb
, exp2x
, nir_imm_float(nb
, 1)));
584 case GLSLstd450Asinh
:
585 val
->ssa
->def
= nir_fmul(nb
, nir_fsign(nb
, src
[0]),
586 build_log(nb
, nir_fadd(nb
, nir_fabs(nb
, src
[0]),
587 nir_fsqrt(nb
, nir_fadd(nb
, nir_fmul(nb
, src
[0], src
[0]),
588 nir_imm_float(nb
, 1.0f
))))));
590 case GLSLstd450Acosh
:
591 val
->ssa
->def
= build_log(nb
, nir_fadd(nb
, src
[0],
592 nir_fsqrt(nb
, nir_fsub(nb
, nir_fmul(nb
, src
[0], src
[0]),
593 nir_imm_float(nb
, 1.0f
)))));
595 case GLSLstd450Atanh
: {
596 nir_ssa_def
*one
= nir_imm_float(nb
, 1.0);
597 val
->ssa
->def
= nir_fmul(nb
, nir_imm_float(nb
, 0.5f
),
598 build_log(nb
, nir_fdiv(nb
, nir_fadd(nb
, one
, src
[0]),
599 nir_fsub(nb
, one
, src
[0]))));
604 val
->ssa
->def
= build_asin(nb
, src
[0], 0.086566724, -0.03102955);
608 val
->ssa
->def
= nir_fsub(nb
, nir_imm_float(nb
, M_PI_2f
),
609 build_asin(nb
, src
[0], 0.08132463, -0.02363318));
613 val
->ssa
->def
= build_atan(nb
, src
[0]);
616 case GLSLstd450Atan2
:
617 val
->ssa
->def
= build_atan2(nb
, src
[0], src
[1]);
620 case GLSLstd450Frexp
: {
621 nir_ssa_def
*exponent
;
622 val
->ssa
->def
= build_frexp(nb
, src
[0], &exponent
);
623 nir_store_deref_var(nb
, vtn_nir_deref(b
, w
[6]), exponent
, 0xf);
627 case GLSLstd450FrexpStruct
: {
628 assert(glsl_type_is_struct(val
->ssa
->type
));
629 val
->ssa
->elems
[0]->def
= build_frexp(nb
, src
[0],
630 &val
->ssa
->elems
[1]->def
);
636 nir_build_alu(&b
->nb
, vtn_nir_alu_op_for_spirv_glsl_opcode(entrypoint
),
637 src
[0], src
[1], src
[2], NULL
);
643 handle_glsl450_interpolation(struct vtn_builder
*b
, enum GLSLstd450 opcode
,
644 const uint32_t *w
, unsigned count
)
646 const struct glsl_type
*dest_type
=
647 vtn_value(b
, w
[1], vtn_value_type_type
)->type
->type
;
649 struct vtn_value
*val
= vtn_push_value(b
, w
[2], vtn_value_type_ssa
);
650 val
->ssa
= vtn_create_ssa_value(b
, dest_type
);
654 case GLSLstd450InterpolateAtCentroid
:
655 op
= nir_intrinsic_interp_var_at_centroid
;
657 case GLSLstd450InterpolateAtSample
:
658 op
= nir_intrinsic_interp_var_at_sample
;
660 case GLSLstd450InterpolateAtOffset
:
661 op
= nir_intrinsic_interp_var_at_offset
;
664 unreachable("Invalid opcode");
667 nir_intrinsic_instr
*intrin
= nir_intrinsic_instr_create(b
->nb
.shader
, op
);
669 nir_deref_var
*deref
= vtn_nir_deref(b
, w
[5]);
670 intrin
->variables
[0] = nir_deref_var_clone(deref
, intrin
);
673 case GLSLstd450InterpolateAtCentroid
:
675 case GLSLstd450InterpolateAtSample
:
676 case GLSLstd450InterpolateAtOffset
:
677 intrin
->src
[0] = nir_src_for_ssa(vtn_ssa_value(b
, w
[6])->def
);
680 unreachable("Invalid opcode");
683 intrin
->num_components
= glsl_get_vector_elements(dest_type
);
684 nir_ssa_dest_init(&intrin
->instr
, &intrin
->dest
,
685 glsl_get_vector_elements(dest_type
),
686 glsl_get_bit_size(dest_type
), NULL
);
687 val
->ssa
->def
= &intrin
->dest
.ssa
;
689 nir_builder_instr_insert(&b
->nb
, &intrin
->instr
);
693 vtn_handle_glsl450_instruction(struct vtn_builder
*b
, uint32_t ext_opcode
,
694 const uint32_t *w
, unsigned count
)
696 switch ((enum GLSLstd450
)ext_opcode
) {
697 case GLSLstd450Determinant
: {
698 struct vtn_value
*val
= vtn_push_value(b
, w
[2], vtn_value_type_ssa
);
699 val
->ssa
= rzalloc(b
, struct vtn_ssa_value
);
700 val
->ssa
->type
= vtn_value(b
, w
[1], vtn_value_type_type
)->type
->type
;
701 val
->ssa
->def
= build_mat_det(b
, vtn_ssa_value(b
, w
[5]));
705 case GLSLstd450MatrixInverse
: {
706 struct vtn_value
*val
= vtn_push_value(b
, w
[2], vtn_value_type_ssa
);
707 val
->ssa
= matrix_inverse(b
, vtn_ssa_value(b
, w
[5]));
711 case GLSLstd450InterpolateAtCentroid
:
712 case GLSLstd450InterpolateAtSample
:
713 case GLSLstd450InterpolateAtOffset
:
714 handle_glsl450_interpolation(b
, ext_opcode
, w
, count
);
718 handle_glsl450_alu(b
, (enum GLSLstd450
)ext_opcode
, w
, count
);