2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Jason Ekstrand (jason@jlekstrand.net)
28 #include "vtn_private.h"
29 #include "GLSL.std.450.h"
31 #define M_PIf ((float) M_PI)
32 #define M_PI_2f ((float) M_PI_2)
33 #define M_PI_4f ((float) M_PI_4)
36 build_mat2_det(nir_builder
*b
, nir_ssa_def
*col
[2])
38 unsigned swiz
[4] = {1, 0, 0, 0};
39 nir_ssa_def
*p
= nir_fmul(b
, col
[0], nir_swizzle(b
, col
[1], swiz
, 2, true));
40 return nir_fsub(b
, nir_channel(b
, p
, 0), nir_channel(b
, p
, 1));
44 build_mat3_det(nir_builder
*b
, nir_ssa_def
*col
[3])
46 unsigned yzx
[4] = {1, 2, 0, 0};
47 unsigned zxy
[4] = {2, 0, 1, 0};
51 nir_fmul(b
, nir_swizzle(b
, col
[1], yzx
, 3, true),
52 nir_swizzle(b
, col
[2], zxy
, 3, true)));
55 nir_fmul(b
, nir_swizzle(b
, col
[1], zxy
, 3, true),
56 nir_swizzle(b
, col
[2], yzx
, 3, true)));
58 nir_ssa_def
*diff
= nir_fsub(b
, prod0
, prod1
);
60 return nir_fadd(b
, nir_channel(b
, diff
, 0),
61 nir_fadd(b
, nir_channel(b
, diff
, 1),
62 nir_channel(b
, diff
, 2)));
66 build_mat4_det(nir_builder
*b
, nir_ssa_def
**col
)
68 nir_ssa_def
*subdet
[4];
69 for (unsigned i
= 0; i
< 4; i
++) {
71 for (unsigned j
= 0, k
= 0; j
< 3; j
++, k
++) {
73 k
++; /* skip column */
77 nir_ssa_def
*subcol
[3];
78 subcol
[0] = nir_swizzle(b
, col
[1], swiz
, 3, true);
79 subcol
[1] = nir_swizzle(b
, col
[2], swiz
, 3, true);
80 subcol
[2] = nir_swizzle(b
, col
[3], swiz
, 3, true);
82 subdet
[i
] = build_mat3_det(b
, subcol
);
85 nir_ssa_def
*prod
= nir_fmul(b
, col
[0], nir_vec(b
, subdet
, 4));
87 return nir_fadd(b
, nir_fsub(b
, nir_channel(b
, prod
, 0),
88 nir_channel(b
, prod
, 1)),
89 nir_fsub(b
, nir_channel(b
, prod
, 2),
90 nir_channel(b
, prod
, 3)));
94 build_mat_det(struct vtn_builder
*b
, struct vtn_ssa_value
*src
)
96 unsigned size
= glsl_get_vector_elements(src
->type
);
99 for (unsigned i
= 0; i
< size
; i
++)
100 cols
[i
] = src
->elems
[i
]->def
;
103 case 2: return build_mat2_det(&b
->nb
, cols
);
104 case 3: return build_mat3_det(&b
->nb
, cols
);
105 case 4: return build_mat4_det(&b
->nb
, cols
);
107 unreachable("Invalid matrix size");
111 /* Computes the determinate of the submatrix given by taking src and
112 * removing the specified row and column.
115 build_mat_subdet(struct nir_builder
*b
, struct vtn_ssa_value
*src
,
116 unsigned size
, unsigned row
, unsigned col
)
118 assert(row
< size
&& col
< size
);
120 return nir_channel(b
, src
->elems
[1 - col
]->def
, 1 - row
);
122 /* Swizzle to get all but the specified row */
124 for (unsigned j
= 0; j
< 4; j
++)
125 swiz
[j
- (j
> row
)] = j
;
127 /* Grab all but the specified column */
128 nir_ssa_def
*subcol
[3];
129 for (unsigned j
= 0; j
< size
; j
++) {
131 subcol
[j
- (j
> col
)] = nir_swizzle(b
, src
->elems
[j
]->def
,
132 swiz
, size
- 1, true);
137 return build_mat2_det(b
, subcol
);
140 return build_mat3_det(b
, subcol
);
145 static struct vtn_ssa_value
*
146 matrix_inverse(struct vtn_builder
*b
, struct vtn_ssa_value
*src
)
148 nir_ssa_def
*adj_col
[4];
149 unsigned size
= glsl_get_vector_elements(src
->type
);
151 /* Build up an adjugate matrix */
152 for (unsigned c
= 0; c
< size
; c
++) {
153 nir_ssa_def
*elem
[4];
154 for (unsigned r
= 0; r
< size
; r
++) {
155 elem
[r
] = build_mat_subdet(&b
->nb
, src
, size
, c
, r
);
158 elem
[r
] = nir_fneg(&b
->nb
, elem
[r
]);
161 adj_col
[c
] = nir_vec(&b
->nb
, elem
, size
);
164 nir_ssa_def
*det_inv
= nir_frcp(&b
->nb
, build_mat_det(b
, src
));
166 struct vtn_ssa_value
*val
= vtn_create_ssa_value(b
, src
->type
);
167 for (unsigned i
= 0; i
< size
; i
++)
168 val
->elems
[i
]->def
= nir_fmul(&b
->nb
, adj_col
[i
], det_inv
);
174 build_length(nir_builder
*b
, nir_ssa_def
*vec
)
176 switch (vec
->num_components
) {
177 case 1: return nir_fsqrt(b
, nir_fmul(b
, vec
, vec
));
178 case 2: return nir_fsqrt(b
, nir_fdot2(b
, vec
, vec
));
179 case 3: return nir_fsqrt(b
, nir_fdot3(b
, vec
, vec
));
180 case 4: return nir_fsqrt(b
, nir_fdot4(b
, vec
, vec
));
182 unreachable("Invalid number of components");
186 static inline nir_ssa_def
*
187 build_fclamp(nir_builder
*b
,
188 nir_ssa_def
*x
, nir_ssa_def
*min_val
, nir_ssa_def
*max_val
)
190 return nir_fmin(b
, nir_fmax(b
, x
, min_val
), max_val
);
197 build_exp(nir_builder
*b
, nir_ssa_def
*x
)
199 return nir_fexp2(b
, nir_fmul(b
, x
, nir_imm_float(b
, M_LOG2E
)));
203 * Return ln(x) - the natural logarithm of x.
206 build_log(nir_builder
*b
, nir_ssa_def
*x
)
208 return nir_fmul(b
, nir_flog2(b
, x
), nir_imm_float(b
, 1.0 / M_LOG2E
));
212 * Approximate asin(x) by the formula:
213 * asin~(x) = sign(x) * (pi/2 - sqrt(1 - |x|) * (pi/2 + |x|(pi/4 - 1 + |x|(p0 + |x|p1))))
215 * which is correct to first order at x=0 and x=±1 regardless of the p
216 * coefficients but can be made second-order correct at both ends by selecting
217 * the fit coefficients appropriately. Different p coefficients can be used
218 * in the asin and acos implementation to minimize some relative error metric
222 build_asin(nir_builder
*b
, nir_ssa_def
*x
, float p0
, float p1
)
224 nir_ssa_def
*abs_x
= nir_fabs(b
, x
);
225 return nir_fmul(b
, nir_fsign(b
, x
),
226 nir_fsub(b
, nir_imm_float(b
, M_PI_2f
),
227 nir_fmul(b
, nir_fsqrt(b
, nir_fsub(b
, nir_imm_float(b
, 1.0f
), abs_x
)),
228 nir_fadd(b
, nir_imm_float(b
, M_PI_2f
),
230 nir_fadd(b
, nir_imm_float(b
, M_PI_4f
- 1.0f
),
232 nir_fadd(b
, nir_imm_float(b
, p0
),
234 nir_imm_float(b
, p1
))))))))));
238 * Compute xs[0] + xs[1] + xs[2] + ... using fadd.
241 build_fsum(nir_builder
*b
, nir_ssa_def
**xs
, int terms
)
243 nir_ssa_def
*accum
= xs
[0];
245 for (int i
= 1; i
< terms
; i
++)
246 accum
= nir_fadd(b
, accum
, xs
[i
]);
252 build_atan(nir_builder
*b
, nir_ssa_def
*y_over_x
)
254 nir_ssa_def
*abs_y_over_x
= nir_fabs(b
, y_over_x
);
255 nir_ssa_def
*one
= nir_imm_float(b
, 1.0f
);
258 * range-reduction, first step:
260 * / y_over_x if |y_over_x| <= 1.0;
262 * \ 1.0 / y_over_x otherwise
264 nir_ssa_def
*x
= nir_fdiv(b
, nir_fmin(b
, abs_y_over_x
, one
),
265 nir_fmax(b
, abs_y_over_x
, one
));
268 * approximate atan by evaluating polynomial:
270 * x * 0.9999793128310355 - x^3 * 0.3326756418091246 +
271 * x^5 * 0.1938924977115610 - x^7 * 0.1173503194786851 +
272 * x^9 * 0.0536813784310406 - x^11 * 0.0121323213173444
274 nir_ssa_def
*x_2
= nir_fmul(b
, x
, x
);
275 nir_ssa_def
*x_3
= nir_fmul(b
, x_2
, x
);
276 nir_ssa_def
*x_5
= nir_fmul(b
, x_3
, x_2
);
277 nir_ssa_def
*x_7
= nir_fmul(b
, x_5
, x_2
);
278 nir_ssa_def
*x_9
= nir_fmul(b
, x_7
, x_2
);
279 nir_ssa_def
*x_11
= nir_fmul(b
, x_9
, x_2
);
281 nir_ssa_def
*polynomial_terms
[] = {
282 nir_fmul(b
, x
, nir_imm_float(b
, 0.9999793128310355f
)),
283 nir_fmul(b
, x_3
, nir_imm_float(b
, -0.3326756418091246f
)),
284 nir_fmul(b
, x_5
, nir_imm_float(b
, 0.1938924977115610f
)),
285 nir_fmul(b
, x_7
, nir_imm_float(b
, -0.1173503194786851f
)),
286 nir_fmul(b
, x_9
, nir_imm_float(b
, 0.0536813784310406f
)),
287 nir_fmul(b
, x_11
, nir_imm_float(b
, -0.0121323213173444f
)),
291 build_fsum(b
, polynomial_terms
, ARRAY_SIZE(polynomial_terms
));
293 /* range-reduction fixup */
294 tmp
= nir_fadd(b
, tmp
,
296 nir_b2f(b
, nir_flt(b
, one
, abs_y_over_x
)),
297 nir_fadd(b
, nir_fmul(b
, tmp
,
298 nir_imm_float(b
, -2.0f
)),
299 nir_imm_float(b
, M_PI_2f
))));
302 return nir_fmul(b
, tmp
, nir_fsign(b
, y_over_x
));
306 build_atan2(nir_builder
*b
, nir_ssa_def
*y
, nir_ssa_def
*x
)
308 nir_ssa_def
*zero
= nir_imm_float(b
, 0.0f
);
310 /* If |x| >= 1.0e-8 * |y|: */
311 nir_ssa_def
*condition
=
312 nir_fge(b
, nir_fabs(b
, x
),
313 nir_fmul(b
, nir_imm_float(b
, 1.0e-8f
), nir_fabs(b
, y
)));
315 /* Then...call atan(y/x) and fix it up: */
316 nir_ssa_def
*atan1
= build_atan(b
, nir_fdiv(b
, y
, x
));
317 nir_ssa_def
*r_then
=
318 nir_bcsel(b
, nir_flt(b
, x
, zero
),
320 nir_bcsel(b
, nir_fge(b
, y
, zero
),
321 nir_imm_float(b
, M_PIf
),
322 nir_imm_float(b
, -M_PIf
))),
326 nir_ssa_def
*r_else
=
327 nir_fmul(b
, nir_fsign(b
, y
), nir_imm_float(b
, M_PI_2f
));
329 return nir_bcsel(b
, condition
, r_then
, r_else
);
333 build_frexp(nir_builder
*b
, nir_ssa_def
*x
, nir_ssa_def
**exponent
)
335 nir_ssa_def
*abs_x
= nir_fabs(b
, x
);
336 nir_ssa_def
*zero
= nir_imm_float(b
, 0.0f
);
338 /* Single-precision floating-point values are stored as
343 * An exponent shift of 23 will shift the mantissa out, leaving only the
344 * exponent and sign bit (which itself may be zero, if the absolute value
345 * was taken before the bitcast and shift.
347 nir_ssa_def
*exponent_shift
= nir_imm_int(b
, 23);
348 nir_ssa_def
*exponent_bias
= nir_imm_int(b
, -126);
350 nir_ssa_def
*sign_mantissa_mask
= nir_imm_int(b
, 0x807fffffu
);
352 /* Exponent of floating-point values in the range [0.5, 1.0). */
353 nir_ssa_def
*exponent_value
= nir_imm_int(b
, 0x3f000000u
);
355 nir_ssa_def
*is_not_zero
= nir_fne(b
, abs_x
, zero
);
358 nir_iadd(b
, nir_ushr(b
, abs_x
, exponent_shift
),
359 nir_bcsel(b
, is_not_zero
, exponent_bias
, zero
));
361 return nir_ior(b
, nir_iand(b
, x
, sign_mantissa_mask
),
362 nir_bcsel(b
, is_not_zero
, exponent_value
, zero
));
366 handle_glsl450_alu(struct vtn_builder
*b
, enum GLSLstd450 entrypoint
,
367 const uint32_t *w
, unsigned count
)
369 struct nir_builder
*nb
= &b
->nb
;
370 const struct glsl_type
*dest_type
=
371 vtn_value(b
, w
[1], vtn_value_type_type
)->type
->type
;
373 struct vtn_value
*val
= vtn_push_value(b
, w
[2], vtn_value_type_ssa
);
374 val
->ssa
= vtn_create_ssa_value(b
, dest_type
);
376 /* Collect the various SSA sources */
377 unsigned num_inputs
= count
- 5;
379 for (unsigned i
= 0; i
< num_inputs
; i
++)
380 src
[i
] = vtn_ssa_value(b
, w
[i
+ 5])->def
;
383 switch (entrypoint
) {
384 case GLSLstd450Round
: op
= nir_op_fround_even
; break; /* TODO */
385 case GLSLstd450RoundEven
: op
= nir_op_fround_even
; break;
386 case GLSLstd450Trunc
: op
= nir_op_ftrunc
; break;
387 case GLSLstd450FAbs
: op
= nir_op_fabs
; break;
388 case GLSLstd450SAbs
: op
= nir_op_iabs
; break;
389 case GLSLstd450FSign
: op
= nir_op_fsign
; break;
390 case GLSLstd450SSign
: op
= nir_op_isign
; break;
391 case GLSLstd450Floor
: op
= nir_op_ffloor
; break;
392 case GLSLstd450Ceil
: op
= nir_op_fceil
; break;
393 case GLSLstd450Fract
: op
= nir_op_ffract
; break;
394 case GLSLstd450Radians
:
395 val
->ssa
->def
= nir_fmul(nb
, src
[0], nir_imm_float(nb
, 0.01745329251));
397 case GLSLstd450Degrees
:
398 val
->ssa
->def
= nir_fmul(nb
, src
[0], nir_imm_float(nb
, 57.2957795131));
400 case GLSLstd450Sin
: op
= nir_op_fsin
; break;
401 case GLSLstd450Cos
: op
= nir_op_fcos
; break;
403 val
->ssa
->def
= nir_fdiv(nb
, nir_fsin(nb
, src
[0]),
404 nir_fcos(nb
, src
[0]));
406 case GLSLstd450Pow
: op
= nir_op_fpow
; break;
407 case GLSLstd450Exp2
: op
= nir_op_fexp2
; break;
408 case GLSLstd450Log2
: op
= nir_op_flog2
; break;
409 case GLSLstd450Sqrt
: op
= nir_op_fsqrt
; break;
410 case GLSLstd450InverseSqrt
: op
= nir_op_frsq
; break;
412 case GLSLstd450Modf
: {
413 nir_ssa_def
*sign
= nir_fsign(nb
, src
[0]);
414 nir_ssa_def
*abs
= nir_fabs(nb
, src
[0]);
415 val
->ssa
->def
= nir_fmul(nb
, sign
, nir_ffract(nb
, abs
));
416 nir_store_deref_var(nb
, vtn_nir_deref(b
, w
[6]),
417 nir_fmul(nb
, sign
, nir_ffloor(nb
, abs
)), 0xf);
421 case GLSLstd450ModfStruct
: {
422 nir_ssa_def
*sign
= nir_fsign(nb
, src
[0]);
423 nir_ssa_def
*abs
= nir_fabs(nb
, src
[0]);
424 assert(glsl_type_is_struct(val
->ssa
->type
));
425 val
->ssa
->elems
[0]->def
= nir_fmul(nb
, sign
, nir_ffract(nb
, abs
));
426 val
->ssa
->elems
[1]->def
= nir_fmul(nb
, sign
, nir_ffloor(nb
, abs
));
430 case GLSLstd450FMin
: op
= nir_op_fmin
; break;
431 case GLSLstd450UMin
: op
= nir_op_umin
; break;
432 case GLSLstd450SMin
: op
= nir_op_imin
; break;
433 case GLSLstd450FMax
: op
= nir_op_fmax
; break;
434 case GLSLstd450UMax
: op
= nir_op_umax
; break;
435 case GLSLstd450SMax
: op
= nir_op_imax
; break;
436 case GLSLstd450FMix
: op
= nir_op_flrp
; break;
438 val
->ssa
->def
= nir_sge(nb
, src
[1], src
[0]);
441 case GLSLstd450Fma
: op
= nir_op_ffma
; break;
442 case GLSLstd450Ldexp
: op
= nir_op_ldexp
; break;
444 /* Packing/Unpacking functions */
445 case GLSLstd450PackSnorm4x8
: op
= nir_op_pack_snorm_4x8
; break;
446 case GLSLstd450PackUnorm4x8
: op
= nir_op_pack_unorm_4x8
; break;
447 case GLSLstd450PackSnorm2x16
: op
= nir_op_pack_snorm_2x16
; break;
448 case GLSLstd450PackUnorm2x16
: op
= nir_op_pack_unorm_2x16
; break;
449 case GLSLstd450PackHalf2x16
: op
= nir_op_pack_half_2x16
; break;
450 case GLSLstd450UnpackSnorm4x8
: op
= nir_op_unpack_snorm_4x8
; break;
451 case GLSLstd450UnpackUnorm4x8
: op
= nir_op_unpack_unorm_4x8
; break;
452 case GLSLstd450UnpackSnorm2x16
: op
= nir_op_unpack_snorm_2x16
; break;
453 case GLSLstd450UnpackUnorm2x16
: op
= nir_op_unpack_unorm_2x16
; break;
454 case GLSLstd450UnpackHalf2x16
: op
= nir_op_unpack_half_2x16
; break;
456 case GLSLstd450Length
:
457 val
->ssa
->def
= build_length(nb
, src
[0]);
459 case GLSLstd450Distance
:
460 val
->ssa
->def
= build_length(nb
, nir_fsub(nb
, src
[0], src
[1]));
462 case GLSLstd450Normalize
:
463 val
->ssa
->def
= nir_fdiv(nb
, src
[0], build_length(nb
, src
[0]));
467 val
->ssa
->def
= build_exp(nb
, src
[0]);
471 val
->ssa
->def
= build_log(nb
, src
[0]);
474 case GLSLstd450FClamp
:
475 val
->ssa
->def
= build_fclamp(nb
, src
[0], src
[1], src
[2]);
477 case GLSLstd450UClamp
:
478 val
->ssa
->def
= nir_umin(nb
, nir_umax(nb
, src
[0], src
[1]), src
[2]);
480 case GLSLstd450SClamp
:
481 val
->ssa
->def
= nir_imin(nb
, nir_imax(nb
, src
[0], src
[1]), src
[2]);
484 case GLSLstd450Cross
: {
485 unsigned yzx
[4] = { 1, 2, 0, 0 };
486 unsigned zxy
[4] = { 2, 0, 1, 0 };
488 nir_fsub(nb
, nir_fmul(nb
, nir_swizzle(nb
, src
[0], yzx
, 3, true),
489 nir_swizzle(nb
, src
[1], zxy
, 3, true)),
490 nir_fmul(nb
, nir_swizzle(nb
, src
[0], zxy
, 3, true),
491 nir_swizzle(nb
, src
[1], yzx
, 3, true)));
495 case GLSLstd450SmoothStep
: {
496 /* t = clamp((x - edge0) / (edge1 - edge0), 0, 1) */
498 build_fclamp(nb
, nir_fdiv(nb
, nir_fsub(nb
, src
[2], src
[0]),
499 nir_fsub(nb
, src
[1], src
[0])),
500 nir_imm_float(nb
, 0.0), nir_imm_float(nb
, 1.0));
501 /* result = t * t * (3 - 2 * t) */
503 nir_fmul(nb
, t
, nir_fmul(nb
, t
,
504 nir_fsub(nb
, nir_imm_float(nb
, 3.0),
505 nir_fmul(nb
, nir_imm_float(nb
, 2.0), t
))));
509 case GLSLstd450FaceForward
:
511 nir_bcsel(nb
, nir_flt(nb
, nir_fdot(nb
, src
[2], src
[1]),
512 nir_imm_float(nb
, 0.0)),
513 src
[0], nir_fneg(nb
, src
[0]));
516 case GLSLstd450Reflect
:
517 /* I - 2 * dot(N, I) * N */
519 nir_fsub(nb
, src
[0], nir_fmul(nb
, nir_imm_float(nb
, 2.0),
520 nir_fmul(nb
, nir_fdot(nb
, src
[0], src
[1]),
524 case GLSLstd450Refract
: {
525 nir_ssa_def
*I
= src
[0];
526 nir_ssa_def
*N
= src
[1];
527 nir_ssa_def
*eta
= src
[2];
528 nir_ssa_def
*n_dot_i
= nir_fdot(nb
, N
, I
);
529 nir_ssa_def
*one
= nir_imm_float(nb
, 1.0);
530 nir_ssa_def
*zero
= nir_imm_float(nb
, 0.0);
531 /* k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I)) */
533 nir_fsub(nb
, one
, nir_fmul(nb
, eta
, nir_fmul(nb
, eta
,
534 nir_fsub(nb
, one
, nir_fmul(nb
, n_dot_i
, n_dot_i
)))));
535 nir_ssa_def
*result
=
536 nir_fsub(nb
, nir_fmul(nb
, eta
, I
),
537 nir_fmul(nb
, nir_fadd(nb
, nir_fmul(nb
, eta
, n_dot_i
),
538 nir_fsqrt(nb
, k
)), N
));
539 /* XXX: bcsel, or if statement? */
540 val
->ssa
->def
= nir_bcsel(nb
, nir_flt(nb
, k
, zero
), zero
, result
);
545 /* 0.5 * (e^x - e^(-x)) */
547 nir_fmul(nb
, nir_imm_float(nb
, 0.5f
),
548 nir_fsub(nb
, build_exp(nb
, src
[0]),
549 build_exp(nb
, nir_fneg(nb
, src
[0]))));
553 /* 0.5 * (e^x + e^(-x)) */
555 nir_fmul(nb
, nir_imm_float(nb
, 0.5f
),
556 nir_fadd(nb
, build_exp(nb
, src
[0]),
557 build_exp(nb
, nir_fneg(nb
, src
[0]))));
561 /* (0.5 * (e^x - e^(-x))) / (0.5 * (e^x + e^(-x))) */
563 nir_fdiv(nb
, nir_fmul(nb
, nir_imm_float(nb
, 0.5f
),
564 nir_fsub(nb
, build_exp(nb
, src
[0]),
565 build_exp(nb
, nir_fneg(nb
, src
[0])))),
566 nir_fmul(nb
, nir_imm_float(nb
, 0.5f
),
567 nir_fadd(nb
, build_exp(nb
, src
[0]),
568 build_exp(nb
, nir_fneg(nb
, src
[0])))));
571 case GLSLstd450Asinh
:
572 val
->ssa
->def
= nir_fmul(nb
, nir_fsign(nb
, src
[0]),
573 build_log(nb
, nir_fadd(nb
, nir_fabs(nb
, src
[0]),
574 nir_fsqrt(nb
, nir_fadd(nb
, nir_fmul(nb
, src
[0], src
[0]),
575 nir_imm_float(nb
, 1.0f
))))));
577 case GLSLstd450Acosh
:
578 val
->ssa
->def
= build_log(nb
, nir_fadd(nb
, src
[0],
579 nir_fsqrt(nb
, nir_fsub(nb
, nir_fmul(nb
, src
[0], src
[0]),
580 nir_imm_float(nb
, 1.0f
)))));
582 case GLSLstd450Atanh
: {
583 nir_ssa_def
*one
= nir_imm_float(nb
, 1.0);
584 val
->ssa
->def
= nir_fmul(nb
, nir_imm_float(nb
, 0.5f
),
585 build_log(nb
, nir_fdiv(nb
, nir_fadd(nb
, one
, src
[0]),
586 nir_fsub(nb
, one
, src
[0]))));
590 case GLSLstd450FindILsb
: op
= nir_op_find_lsb
; break;
591 case GLSLstd450FindSMsb
: op
= nir_op_ifind_msb
; break;
592 case GLSLstd450FindUMsb
: op
= nir_op_ufind_msb
; break;
595 val
->ssa
->def
= build_asin(nb
, src
[0], 0.086566724, -0.03102955);
599 val
->ssa
->def
= nir_fsub(nb
, nir_imm_float(nb
, M_PI_2f
),
600 build_asin(nb
, src
[0], 0.08132463, -0.02363318));
604 val
->ssa
->def
= build_atan(nb
, src
[0]);
607 case GLSLstd450Atan2
:
608 val
->ssa
->def
= build_atan2(nb
, src
[0], src
[1]);
611 case GLSLstd450Frexp
: {
612 nir_ssa_def
*exponent
;
613 val
->ssa
->def
= build_frexp(nb
, src
[0], &exponent
);
614 nir_store_deref_var(nb
, vtn_nir_deref(b
, w
[6]), exponent
, 0xf);
618 case GLSLstd450FrexpStruct
: {
619 assert(glsl_type_is_struct(val
->ssa
->type
));
620 val
->ssa
->elems
[0]->def
= build_frexp(nb
, src
[0],
621 &val
->ssa
->elems
[1]->def
);
625 case GLSLstd450PackDouble2x32
:
626 case GLSLstd450UnpackDouble2x32
:
628 unreachable("Unhandled opcode");
631 nir_alu_instr
*instr
= nir_alu_instr_create(b
->shader
, op
);
632 nir_ssa_dest_init(&instr
->instr
, &instr
->dest
.dest
,
633 glsl_get_vector_elements(val
->ssa
->type
), val
->name
);
634 instr
->dest
.write_mask
= (1 << instr
->dest
.dest
.ssa
.num_components
) - 1;
635 val
->ssa
->def
= &instr
->dest
.dest
.ssa
;
637 for (unsigned i
= 0; i
< nir_op_infos
[op
].num_inputs
; i
++)
638 instr
->src
[i
].src
= nir_src_for_ssa(src
[i
]);
640 nir_builder_instr_insert(nb
, &instr
->instr
);
644 vtn_handle_glsl450_instruction(struct vtn_builder
*b
, uint32_t ext_opcode
,
645 const uint32_t *w
, unsigned count
)
647 switch ((enum GLSLstd450
)ext_opcode
) {
648 case GLSLstd450Determinant
: {
649 struct vtn_value
*val
= vtn_push_value(b
, w
[2], vtn_value_type_ssa
);
650 val
->ssa
= rzalloc(b
, struct vtn_ssa_value
);
651 val
->ssa
->type
= vtn_value(b
, w
[1], vtn_value_type_type
)->type
->type
;
652 val
->ssa
->def
= build_mat_det(b
, vtn_ssa_value(b
, w
[5]));
656 case GLSLstd450MatrixInverse
: {
657 struct vtn_value
*val
= vtn_push_value(b
, w
[2], vtn_value_type_ssa
);
658 val
->ssa
= matrix_inverse(b
, vtn_ssa_value(b
, w
[5]));
662 case GLSLstd450InterpolateAtCentroid
:
663 case GLSLstd450InterpolateAtSample
:
664 case GLSLstd450InterpolateAtOffset
:
665 unreachable("Unhandled opcode");
668 handle_glsl450_alu(b
, (enum GLSLstd450
)ext_opcode
, w
, count
);