2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Jason Ekstrand (jason@jlekstrand.net)
28 #include "vtn_private.h"
29 #include "GLSL.std.450.h"
31 #define M_PIf ((float) M_PI)
32 #define M_PI_2f ((float) M_PI_2)
33 #define M_PI_4f ((float) M_PI_4)
36 build_mat2_det(nir_builder
*b
, nir_ssa_def
*col
[2])
38 unsigned swiz
[4] = {1, 0, 0, 0};
39 nir_ssa_def
*p
= nir_fmul(b
, col
[0], nir_swizzle(b
, col
[1], swiz
, 2, true));
40 return nir_fsub(b
, nir_channel(b
, p
, 0), nir_channel(b
, p
, 1));
44 build_mat3_det(nir_builder
*b
, nir_ssa_def
*col
[3])
46 unsigned yzx
[4] = {1, 2, 0, 0};
47 unsigned zxy
[4] = {2, 0, 1, 0};
51 nir_fmul(b
, nir_swizzle(b
, col
[1], yzx
, 3, true),
52 nir_swizzle(b
, col
[2], zxy
, 3, true)));
55 nir_fmul(b
, nir_swizzle(b
, col
[1], zxy
, 3, true),
56 nir_swizzle(b
, col
[2], yzx
, 3, true)));
58 nir_ssa_def
*diff
= nir_fsub(b
, prod0
, prod1
);
60 return nir_fadd(b
, nir_channel(b
, diff
, 0),
61 nir_fadd(b
, nir_channel(b
, diff
, 1),
62 nir_channel(b
, diff
, 2)));
66 build_mat4_det(nir_builder
*b
, nir_ssa_def
**col
)
68 nir_ssa_def
*subdet
[4];
69 for (unsigned i
= 0; i
< 4; i
++) {
71 for (unsigned j
= 0; j
< 4; j
++)
72 swiz
[j
- (j
> i
)] = j
;
74 nir_ssa_def
*subcol
[3];
75 subcol
[0] = nir_swizzle(b
, col
[1], swiz
, 3, true);
76 subcol
[1] = nir_swizzle(b
, col
[2], swiz
, 3, true);
77 subcol
[2] = nir_swizzle(b
, col
[3], swiz
, 3, true);
79 subdet
[i
] = build_mat3_det(b
, subcol
);
82 nir_ssa_def
*prod
= nir_fmul(b
, col
[0], nir_vec(b
, subdet
, 4));
84 return nir_fadd(b
, nir_fsub(b
, nir_channel(b
, prod
, 0),
85 nir_channel(b
, prod
, 1)),
86 nir_fsub(b
, nir_channel(b
, prod
, 2),
87 nir_channel(b
, prod
, 3)));
91 build_mat_det(struct vtn_builder
*b
, struct vtn_ssa_value
*src
)
93 unsigned size
= glsl_get_vector_elements(src
->type
);
96 for (unsigned i
= 0; i
< size
; i
++)
97 cols
[i
] = src
->elems
[i
]->def
;
100 case 2: return build_mat2_det(&b
->nb
, cols
);
101 case 3: return build_mat3_det(&b
->nb
, cols
);
102 case 4: return build_mat4_det(&b
->nb
, cols
);
104 unreachable("Invalid matrix size");
108 /* Computes the determinate of the submatrix given by taking src and
109 * removing the specified row and column.
112 build_mat_subdet(struct nir_builder
*b
, struct vtn_ssa_value
*src
,
113 unsigned size
, unsigned row
, unsigned col
)
115 assert(row
< size
&& col
< size
);
117 return nir_channel(b
, src
->elems
[1 - col
]->def
, 1 - row
);
119 /* Swizzle to get all but the specified row */
121 for (unsigned j
= 0; j
< 4; j
++)
122 swiz
[j
- (j
> row
)] = j
;
124 /* Grab all but the specified column */
125 nir_ssa_def
*subcol
[3];
126 for (unsigned j
= 0; j
< size
; j
++) {
128 subcol
[j
- (j
> col
)] = nir_swizzle(b
, src
->elems
[j
]->def
,
129 swiz
, size
- 1, true);
134 return build_mat2_det(b
, subcol
);
137 return build_mat3_det(b
, subcol
);
142 static struct vtn_ssa_value
*
143 matrix_inverse(struct vtn_builder
*b
, struct vtn_ssa_value
*src
)
145 nir_ssa_def
*adj_col
[4];
146 unsigned size
= glsl_get_vector_elements(src
->type
);
148 /* Build up an adjugate matrix */
149 for (unsigned c
= 0; c
< size
; c
++) {
150 nir_ssa_def
*elem
[4];
151 for (unsigned r
= 0; r
< size
; r
++) {
152 elem
[r
] = build_mat_subdet(&b
->nb
, src
, size
, c
, r
);
155 elem
[r
] = nir_fneg(&b
->nb
, elem
[r
]);
158 adj_col
[c
] = nir_vec(&b
->nb
, elem
, size
);
161 nir_ssa_def
*det_inv
= nir_frcp(&b
->nb
, build_mat_det(b
, src
));
163 struct vtn_ssa_value
*val
= vtn_create_ssa_value(b
, src
->type
);
164 for (unsigned i
= 0; i
< size
; i
++)
165 val
->elems
[i
]->def
= nir_fmul(&b
->nb
, adj_col
[i
], det_inv
);
171 build_length(nir_builder
*b
, nir_ssa_def
*vec
)
173 switch (vec
->num_components
) {
174 case 1: return nir_fsqrt(b
, nir_fmul(b
, vec
, vec
));
175 case 2: return nir_fsqrt(b
, nir_fdot2(b
, vec
, vec
));
176 case 3: return nir_fsqrt(b
, nir_fdot3(b
, vec
, vec
));
177 case 4: return nir_fsqrt(b
, nir_fdot4(b
, vec
, vec
));
179 unreachable("Invalid number of components");
183 static inline nir_ssa_def
*
184 build_fclamp(nir_builder
*b
,
185 nir_ssa_def
*x
, nir_ssa_def
*min_val
, nir_ssa_def
*max_val
)
187 return nir_fmin(b
, nir_fmax(b
, x
, min_val
), max_val
);
194 build_exp(nir_builder
*b
, nir_ssa_def
*x
)
196 return nir_fexp2(b
, nir_fmul(b
, x
, nir_imm_float(b
, M_LOG2E
)));
200 * Return ln(x) - the natural logarithm of x.
203 build_log(nir_builder
*b
, nir_ssa_def
*x
)
205 return nir_fmul(b
, nir_flog2(b
, x
), nir_imm_float(b
, 1.0 / M_LOG2E
));
209 build_asin(nir_builder
*b
, nir_ssa_def
*x
)
211 nir_ssa_def
*abs_x
= nir_fabs(b
, x
);
212 return nir_fmul(b
, nir_fsign(b
, x
),
213 nir_fsub(b
, nir_imm_float(b
, M_PI_2f
),
214 nir_fmul(b
, nir_fsqrt(b
, nir_fsub(b
, nir_imm_float(b
, 1.0f
), abs_x
)),
215 nir_fadd(b
, nir_imm_float(b
, M_PI_2f
),
217 nir_fadd(b
, nir_imm_float(b
, M_PI_4f
- 1.0f
),
219 nir_fadd(b
, nir_imm_float(b
, 0.086566724f
),
221 nir_imm_float(b
, -0.03102955f
))))))))));
225 * Compute xs[0] + xs[1] + xs[2] + ... using fadd.
228 build_fsum(nir_builder
*b
, nir_ssa_def
**xs
, int terms
)
230 nir_ssa_def
*accum
= xs
[0];
232 for (int i
= 1; i
< terms
; i
++)
233 accum
= nir_fadd(b
, accum
, xs
[i
]);
239 build_atan(nir_builder
*b
, nir_ssa_def
*y_over_x
)
241 nir_ssa_def
*abs_y_over_x
= nir_fabs(b
, y_over_x
);
242 nir_ssa_def
*one
= nir_imm_float(b
, 1.0f
);
245 * range-reduction, first step:
247 * / y_over_x if |y_over_x| <= 1.0;
249 * \ 1.0 / y_over_x otherwise
251 nir_ssa_def
*x
= nir_fdiv(b
, nir_fmin(b
, abs_y_over_x
, one
),
252 nir_fmax(b
, abs_y_over_x
, one
));
255 * approximate atan by evaluating polynomial:
257 * x * 0.9999793128310355 - x^3 * 0.3326756418091246 +
258 * x^5 * 0.1938924977115610 - x^7 * 0.1173503194786851 +
259 * x^9 * 0.0536813784310406 - x^11 * 0.0121323213173444
261 nir_ssa_def
*x_2
= nir_fmul(b
, x
, x
);
262 nir_ssa_def
*x_3
= nir_fmul(b
, x_2
, x
);
263 nir_ssa_def
*x_5
= nir_fmul(b
, x_3
, x_2
);
264 nir_ssa_def
*x_7
= nir_fmul(b
, x_5
, x_2
);
265 nir_ssa_def
*x_9
= nir_fmul(b
, x_7
, x_2
);
266 nir_ssa_def
*x_11
= nir_fmul(b
, x_9
, x_2
);
268 nir_ssa_def
*polynomial_terms
[] = {
269 nir_fmul(b
, x
, nir_imm_float(b
, 0.9999793128310355f
)),
270 nir_fmul(b
, x_3
, nir_imm_float(b
, -0.3326756418091246f
)),
271 nir_fmul(b
, x_5
, nir_imm_float(b
, 0.1938924977115610f
)),
272 nir_fmul(b
, x_7
, nir_imm_float(b
, -0.1173503194786851f
)),
273 nir_fmul(b
, x_9
, nir_imm_float(b
, 0.0536813784310406f
)),
274 nir_fmul(b
, x_11
, nir_imm_float(b
, -0.0121323213173444f
)),
278 build_fsum(b
, polynomial_terms
, ARRAY_SIZE(polynomial_terms
));
280 /* range-reduction fixup */
281 tmp
= nir_fadd(b
, tmp
,
283 nir_b2f(b
, nir_flt(b
, one
, abs_y_over_x
)),
284 nir_fadd(b
, nir_fmul(b
, tmp
,
285 nir_imm_float(b
, -2.0f
)),
286 nir_imm_float(b
, M_PI_2f
))));
289 return nir_fmul(b
, tmp
, nir_fsign(b
, y_over_x
));
293 build_atan2(nir_builder
*b
, nir_ssa_def
*y
, nir_ssa_def
*x
)
295 nir_ssa_def
*zero
= nir_imm_float(b
, 0.0f
);
297 /* If |x| >= 1.0e-8 * |y|: */
298 nir_if
*if_stmt
= nir_if_create(b
->shader
);
299 if_stmt
->condition
= nir_src_for_ssa(
300 nir_fge(b
, nir_fabs(b
, x
),
301 nir_fmul(b
, nir_imm_float(b
, 1.0e-8f
), nir_fabs(b
, y
))));
302 nir_builder_cf_insert(b
, &if_stmt
->cf_node
);
304 /* Then...call atan(y/x) and fix it up: */
305 b
->cursor
= nir_after_cf_list(&if_stmt
->then_list
);
306 nir_ssa_def
*atan1
= build_atan(b
, nir_fdiv(b
, y
, x
));
307 nir_ssa_def
*r_then
=
308 nir_bcsel(b
, nir_flt(b
, x
, zero
),
310 nir_bcsel(b
, nir_fge(b
, y
, zero
),
311 nir_imm_float(b
, M_PIf
),
312 nir_imm_float(b
, -M_PIf
))),
316 b
->cursor
= nir_after_cf_list(&if_stmt
->else_list
);
317 nir_ssa_def
*r_else
=
318 nir_fmul(b
, nir_fsign(b
, y
), nir_imm_float(b
, M_PI_2f
));
320 b
->cursor
= nir_after_cf_node(&if_stmt
->cf_node
);
322 nir_phi_instr
*phi
= nir_phi_instr_create(b
->shader
);
323 nir_ssa_dest_init(&phi
->instr
, &phi
->dest
, r_then
->num_components
, NULL
);
325 nir_phi_src
*phi_src0
= ralloc(phi
, nir_phi_src
);
326 nir_phi_src
*phi_src1
= ralloc(phi
, nir_phi_src
);
328 phi_src0
->pred
= nir_cf_node_as_block((nir_cf_node
*) exec_list_get_head(&if_stmt
->then_list
));
329 phi_src0
->src
= nir_src_for_ssa(r_then
);
330 exec_list_push_tail(&phi
->srcs
, &phi_src0
->node
);
331 phi_src1
->pred
= nir_cf_node_as_block((nir_cf_node
*) exec_list_get_head(&if_stmt
->else_list
));
332 phi_src1
->src
= nir_src_for_ssa(r_else
);
333 exec_list_push_tail(&phi
->srcs
, &phi_src1
->node
);
335 nir_builder_instr_insert(b
, &phi
->instr
);
337 return &phi
->dest
.ssa
;
341 build_frexp(nir_builder
*b
, nir_ssa_def
*x
, nir_ssa_def
**exponent
)
343 nir_ssa_def
*abs_x
= nir_fabs(b
, x
);
344 nir_ssa_def
*zero
= nir_imm_float(b
, 0.0f
);
346 /* Single-precision floating-point values are stored as
351 * An exponent shift of 23 will shift the mantissa out, leaving only the
352 * exponent and sign bit (which itself may be zero, if the absolute value
353 * was taken before the bitcast and shift.
355 nir_ssa_def
*exponent_shift
= nir_imm_int(b
, 23);
356 nir_ssa_def
*exponent_bias
= nir_imm_int(b
, -126);
358 nir_ssa_def
*sign_mantissa_mask
= nir_imm_int(b
, 0x807fffffu
);
360 /* Exponent of floating-point values in the range [0.5, 1.0). */
361 nir_ssa_def
*exponent_value
= nir_imm_int(b
, 0x3f000000u
);
363 nir_ssa_def
*is_not_zero
= nir_fne(b
, abs_x
, zero
);
366 nir_iadd(b
, nir_ushr(b
, abs_x
, exponent_shift
),
367 nir_bcsel(b
, is_not_zero
, exponent_bias
, zero
));
369 return nir_ior(b
, nir_iand(b
, x
, sign_mantissa_mask
),
370 nir_bcsel(b
, is_not_zero
, exponent_value
, zero
));
374 handle_glsl450_alu(struct vtn_builder
*b
, enum GLSLstd450 entrypoint
,
375 const uint32_t *w
, unsigned count
)
377 struct nir_builder
*nb
= &b
->nb
;
378 const struct glsl_type
*dest_type
=
379 vtn_value(b
, w
[1], vtn_value_type_type
)->type
->type
;
381 struct vtn_value
*val
= vtn_push_value(b
, w
[2], vtn_value_type_ssa
);
382 val
->ssa
= vtn_create_ssa_value(b
, dest_type
);
384 /* Collect the various SSA sources */
385 unsigned num_inputs
= count
- 5;
387 for (unsigned i
= 0; i
< num_inputs
; i
++)
388 src
[i
] = vtn_ssa_value(b
, w
[i
+ 5])->def
;
391 switch (entrypoint
) {
392 case GLSLstd450Round
: op
= nir_op_fround_even
; break; /* TODO */
393 case GLSLstd450RoundEven
: op
= nir_op_fround_even
; break;
394 case GLSLstd450Trunc
: op
= nir_op_ftrunc
; break;
395 case GLSLstd450FAbs
: op
= nir_op_fabs
; break;
396 case GLSLstd450SAbs
: op
= nir_op_iabs
; break;
397 case GLSLstd450FSign
: op
= nir_op_fsign
; break;
398 case GLSLstd450SSign
: op
= nir_op_isign
; break;
399 case GLSLstd450Floor
: op
= nir_op_ffloor
; break;
400 case GLSLstd450Ceil
: op
= nir_op_fceil
; break;
401 case GLSLstd450Fract
: op
= nir_op_ffract
; break;
402 case GLSLstd450Radians
:
403 val
->ssa
->def
= nir_fmul(nb
, src
[0], nir_imm_float(nb
, 0.01745329251));
405 case GLSLstd450Degrees
:
406 val
->ssa
->def
= nir_fmul(nb
, src
[0], nir_imm_float(nb
, 57.2957795131));
408 case GLSLstd450Sin
: op
= nir_op_fsin
; break;
409 case GLSLstd450Cos
: op
= nir_op_fcos
; break;
411 val
->ssa
->def
= nir_fdiv(nb
, nir_fsin(nb
, src
[0]),
412 nir_fcos(nb
, src
[0]));
414 case GLSLstd450Pow
: op
= nir_op_fpow
; break;
415 case GLSLstd450Exp2
: op
= nir_op_fexp2
; break;
416 case GLSLstd450Log2
: op
= nir_op_flog2
; break;
417 case GLSLstd450Sqrt
: op
= nir_op_fsqrt
; break;
418 case GLSLstd450InverseSqrt
: op
= nir_op_frsq
; break;
420 case GLSLstd450Modf
: {
421 val
->ssa
->def
= nir_ffract(nb
, src
[0]);
422 nir_deref_var
*out
= vtn_value(b
, w
[6], vtn_value_type_deref
)->deref
;
423 nir_store_deref_var(nb
, out
, nir_ffloor(nb
, src
[0]), 0xf);
427 case GLSLstd450FMin
: op
= nir_op_fmin
; break;
428 case GLSLstd450UMin
: op
= nir_op_umin
; break;
429 case GLSLstd450SMin
: op
= nir_op_imin
; break;
430 case GLSLstd450FMax
: op
= nir_op_fmax
; break;
431 case GLSLstd450UMax
: op
= nir_op_umax
; break;
432 case GLSLstd450SMax
: op
= nir_op_imax
; break;
433 case GLSLstd450FMix
: op
= nir_op_flrp
; break;
435 val
->ssa
->def
= nir_sge(nb
, src
[1], src
[0]);
438 case GLSLstd450Fma
: op
= nir_op_ffma
; break;
439 case GLSLstd450Ldexp
: op
= nir_op_ldexp
; break;
441 /* Packing/Unpacking functions */
442 case GLSLstd450PackSnorm4x8
: op
= nir_op_pack_snorm_4x8
; break;
443 case GLSLstd450PackUnorm4x8
: op
= nir_op_pack_unorm_4x8
; break;
444 case GLSLstd450PackSnorm2x16
: op
= nir_op_pack_snorm_2x16
; break;
445 case GLSLstd450PackUnorm2x16
: op
= nir_op_pack_unorm_2x16
; break;
446 case GLSLstd450PackHalf2x16
: op
= nir_op_pack_half_2x16
; break;
447 case GLSLstd450UnpackSnorm4x8
: op
= nir_op_unpack_snorm_4x8
; break;
448 case GLSLstd450UnpackUnorm4x8
: op
= nir_op_unpack_unorm_4x8
; break;
449 case GLSLstd450UnpackSnorm2x16
: op
= nir_op_unpack_snorm_2x16
; break;
450 case GLSLstd450UnpackUnorm2x16
: op
= nir_op_unpack_unorm_2x16
; break;
451 case GLSLstd450UnpackHalf2x16
: op
= nir_op_unpack_half_2x16
; break;
453 case GLSLstd450Length
:
454 val
->ssa
->def
= build_length(nb
, src
[0]);
456 case GLSLstd450Distance
:
457 val
->ssa
->def
= build_length(nb
, nir_fsub(nb
, src
[0], src
[1]));
459 case GLSLstd450Normalize
:
460 val
->ssa
->def
= nir_fdiv(nb
, src
[0], build_length(nb
, src
[0]));
464 val
->ssa
->def
= build_exp(nb
, src
[0]);
468 val
->ssa
->def
= build_log(nb
, src
[0]);
471 case GLSLstd450FClamp
:
472 val
->ssa
->def
= build_fclamp(nb
, src
[0], src
[1], src
[2]);
474 case GLSLstd450UClamp
:
475 val
->ssa
->def
= nir_umin(nb
, nir_umax(nb
, src
[0], src
[1]), src
[2]);
477 case GLSLstd450SClamp
:
478 val
->ssa
->def
= nir_imin(nb
, nir_imax(nb
, src
[0], src
[1]), src
[2]);
481 case GLSLstd450Cross
: {
482 unsigned yzx
[4] = { 1, 2, 0, 0 };
483 unsigned zxy
[4] = { 2, 0, 1, 0 };
485 nir_fsub(nb
, nir_fmul(nb
, nir_swizzle(nb
, src
[0], yzx
, 3, true),
486 nir_swizzle(nb
, src
[1], zxy
, 3, true)),
487 nir_fmul(nb
, nir_swizzle(nb
, src
[0], zxy
, 3, true),
488 nir_swizzle(nb
, src
[1], yzx
, 3, true)));
492 case GLSLstd450SmoothStep
: {
493 /* t = clamp((x - edge0) / (edge1 - edge0), 0, 1) */
495 build_fclamp(nb
, nir_fdiv(nb
, nir_fsub(nb
, src
[2], src
[0]),
496 nir_fsub(nb
, src
[1], src
[0])),
497 nir_imm_float(nb
, 0.0), nir_imm_float(nb
, 1.0));
498 /* result = t * t * (3 - 2 * t) */
500 nir_fmul(nb
, t
, nir_fmul(nb
, t
,
501 nir_fsub(nb
, nir_imm_float(nb
, 3.0),
502 nir_fmul(nb
, nir_imm_float(nb
, 2.0), t
))));
506 case GLSLstd450FaceForward
:
508 nir_bcsel(nb
, nir_flt(nb
, nir_fdot(nb
, src
[2], src
[1]),
509 nir_imm_float(nb
, 0.0)),
510 src
[0], nir_fneg(nb
, src
[0]));
513 case GLSLstd450Reflect
:
514 /* I - 2 * dot(N, I) * N */
516 nir_fsub(nb
, src
[0], nir_fmul(nb
, nir_imm_float(nb
, 2.0),
517 nir_fmul(nb
, nir_fdot(nb
, src
[0], src
[1]),
521 case GLSLstd450Refract
: {
522 nir_ssa_def
*I
= src
[0];
523 nir_ssa_def
*N
= src
[1];
524 nir_ssa_def
*eta
= src
[2];
525 nir_ssa_def
*n_dot_i
= nir_fdot(nb
, N
, I
);
526 nir_ssa_def
*one
= nir_imm_float(nb
, 1.0);
527 nir_ssa_def
*zero
= nir_imm_float(nb
, 0.0);
528 /* k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I)) */
530 nir_fsub(nb
, one
, nir_fmul(nb
, eta
, nir_fmul(nb
, eta
,
531 nir_fsub(nb
, one
, nir_fmul(nb
, n_dot_i
, n_dot_i
)))));
532 nir_ssa_def
*result
=
533 nir_fsub(nb
, nir_fmul(nb
, eta
, I
),
534 nir_fmul(nb
, nir_fadd(nb
, nir_fmul(nb
, eta
, n_dot_i
),
535 nir_fsqrt(nb
, k
)), N
));
536 /* XXX: bcsel, or if statement? */
537 val
->ssa
->def
= nir_bcsel(nb
, nir_flt(nb
, k
, zero
), zero
, result
);
542 /* 0.5 * (e^x - e^(-x)) */
544 nir_fmul(nb
, nir_imm_float(nb
, 0.5f
),
545 nir_fsub(nb
, build_exp(nb
, src
[0]),
546 build_exp(nb
, nir_fneg(nb
, src
[0]))));
550 /* 0.5 * (e^x + e^(-x)) */
552 nir_fmul(nb
, nir_imm_float(nb
, 0.5f
),
553 nir_fadd(nb
, build_exp(nb
, src
[0]),
554 build_exp(nb
, nir_fneg(nb
, src
[0]))));
558 /* (e^x - e^(-x)) / (e^x + e^(-x)) */
560 nir_fdiv(nb
, nir_fsub(nb
, build_exp(nb
, src
[0]),
561 build_exp(nb
, nir_fneg(nb
, src
[0]))),
562 nir_fadd(nb
, build_exp(nb
, src
[0]),
563 build_exp(nb
, nir_fneg(nb
, src
[0]))));
566 case GLSLstd450Asinh
:
567 val
->ssa
->def
= nir_fmul(nb
, nir_fsign(nb
, src
[0]),
568 build_log(nb
, nir_fadd(nb
, nir_fabs(nb
, src
[0]),
569 nir_fsqrt(nb
, nir_fadd(nb
, nir_fmul(nb
, src
[0], src
[0]),
570 nir_imm_float(nb
, 1.0f
))))));
572 case GLSLstd450Acosh
:
573 val
->ssa
->def
= build_log(nb
, nir_fadd(nb
, src
[0],
574 nir_fsqrt(nb
, nir_fsub(nb
, nir_fmul(nb
, src
[0], src
[0]),
575 nir_imm_float(nb
, 1.0f
)))));
577 case GLSLstd450Atanh
: {
578 nir_ssa_def
*one
= nir_imm_float(nb
, 1.0);
579 val
->ssa
->def
= nir_fmul(nb
, nir_imm_float(nb
, 0.5f
),
580 build_log(nb
, nir_fdiv(nb
, nir_fadd(nb
, one
, src
[0]),
581 nir_fsub(nb
, one
, src
[0]))));
585 case GLSLstd450FindILsb
: op
= nir_op_find_lsb
; break;
586 case GLSLstd450FindSMsb
: op
= nir_op_ifind_msb
; break;
587 case GLSLstd450FindUMsb
: op
= nir_op_ufind_msb
; break;
590 val
->ssa
->def
= build_asin(nb
, src
[0]);
594 val
->ssa
->def
= nir_fsub(nb
, nir_imm_float(nb
, M_PI_2f
),
595 build_asin(nb
, src
[0]));
599 val
->ssa
->def
= build_atan(nb
, src
[0]);
602 case GLSLstd450Atan2
:
603 val
->ssa
->def
= build_atan2(nb
, src
[0], src
[1]);
606 case GLSLstd450Frexp
: {
607 nir_ssa_def
*exponent
;
608 val
->ssa
->def
= build_frexp(nb
, src
[0], &exponent
);
609 nir_deref_var
*out
= vtn_value(b
, w
[6], vtn_value_type_deref
)->deref
;
610 nir_store_deref_var(nb
, out
, exponent
, 0xf);
614 case GLSLstd450FrexpStruct
: {
615 assert(glsl_type_is_struct(val
->ssa
->type
));
616 val
->ssa
->elems
[0]->def
= build_frexp(nb
, src
[0],
617 &val
->ssa
->elems
[1]->def
);
621 case GLSLstd450ModfStruct
:
622 case GLSLstd450PackDouble2x32
:
623 case GLSLstd450UnpackDouble2x32
:
626 unreachable("Unhandled opcode");
629 nir_alu_instr
*instr
= nir_alu_instr_create(b
->shader
, op
);
630 nir_ssa_dest_init(&instr
->instr
, &instr
->dest
.dest
,
631 glsl_get_vector_elements(val
->ssa
->type
), val
->name
);
632 instr
->dest
.write_mask
= (1 << instr
->dest
.dest
.ssa
.num_components
) - 1;
633 val
->ssa
->def
= &instr
->dest
.dest
.ssa
;
635 for (unsigned i
= 0; i
< nir_op_infos
[op
].num_inputs
; i
++)
636 instr
->src
[i
].src
= nir_src_for_ssa(src
[i
]);
638 nir_builder_instr_insert(nb
, &instr
->instr
);
642 vtn_handle_glsl450_instruction(struct vtn_builder
*b
, uint32_t ext_opcode
,
643 const uint32_t *w
, unsigned count
)
645 switch ((enum GLSLstd450
)ext_opcode
) {
646 case GLSLstd450Determinant
: {
647 struct vtn_value
*val
= vtn_push_value(b
, w
[2], vtn_value_type_ssa
);
648 val
->ssa
= rzalloc(b
, struct vtn_ssa_value
);
649 val
->ssa
->type
= vtn_value(b
, w
[1], vtn_value_type_type
)->type
->type
;
650 val
->ssa
->def
= build_mat_det(b
, vtn_ssa_value(b
, w
[5]));
654 case GLSLstd450MatrixInverse
: {
655 struct vtn_value
*val
= vtn_push_value(b
, w
[2], vtn_value_type_ssa
);
656 val
->ssa
= matrix_inverse(b
, vtn_ssa_value(b
, w
[5]));
660 case GLSLstd450InterpolateAtCentroid
:
661 case GLSLstd450InterpolateAtSample
:
662 case GLSLstd450InterpolateAtOffset
:
663 unreachable("Unhandled opcode");
666 handle_glsl450_alu(b
, (enum GLSLstd450
)ext_opcode
, w
, count
);