2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Jason Ekstrand (jason@jlekstrand.net)
30 #include "nir/nir_builtin_builder.h"
32 #include "vtn_private.h"
33 #include "GLSL.std.450.h"
35 #define M_PIf ((float) M_PI)
36 #define M_PI_2f ((float) M_PI_2)
37 #define M_PI_4f ((float) M_PI_4)
40 build_mat2_det(nir_builder
*b
, nir_ssa_def
*col
[2])
42 unsigned swiz
[2] = {1, 0 };
43 nir_ssa_def
*p
= nir_fmul(b
, col
[0], nir_swizzle(b
, col
[1], swiz
, 2));
44 return nir_fsub(b
, nir_channel(b
, p
, 0), nir_channel(b
, p
, 1));
48 build_mat3_det(nir_builder
*b
, nir_ssa_def
*col
[3])
50 unsigned yzx
[3] = {1, 2, 0 };
51 unsigned zxy
[3] = {2, 0, 1 };
55 nir_fmul(b
, nir_swizzle(b
, col
[1], yzx
, 3),
56 nir_swizzle(b
, col
[2], zxy
, 3)));
59 nir_fmul(b
, nir_swizzle(b
, col
[1], zxy
, 3),
60 nir_swizzle(b
, col
[2], yzx
, 3)));
62 nir_ssa_def
*diff
= nir_fsub(b
, prod0
, prod1
);
64 return nir_fadd(b
, nir_channel(b
, diff
, 0),
65 nir_fadd(b
, nir_channel(b
, diff
, 1),
66 nir_channel(b
, diff
, 2)));
70 build_mat4_det(nir_builder
*b
, nir_ssa_def
**col
)
72 nir_ssa_def
*subdet
[4];
73 for (unsigned i
= 0; i
< 4; i
++) {
75 for (unsigned j
= 0; j
< 3; j
++)
76 swiz
[j
] = j
+ (j
>= i
);
78 nir_ssa_def
*subcol
[3];
79 subcol
[0] = nir_swizzle(b
, col
[1], swiz
, 3);
80 subcol
[1] = nir_swizzle(b
, col
[2], swiz
, 3);
81 subcol
[2] = nir_swizzle(b
, col
[3], swiz
, 3);
83 subdet
[i
] = build_mat3_det(b
, subcol
);
86 nir_ssa_def
*prod
= nir_fmul(b
, col
[0], nir_vec(b
, subdet
, 4));
88 return nir_fadd(b
, nir_fsub(b
, nir_channel(b
, prod
, 0),
89 nir_channel(b
, prod
, 1)),
90 nir_fsub(b
, nir_channel(b
, prod
, 2),
91 nir_channel(b
, prod
, 3)));
95 build_mat_det(struct vtn_builder
*b
, struct vtn_ssa_value
*src
)
97 unsigned size
= glsl_get_vector_elements(src
->type
);
100 for (unsigned i
= 0; i
< size
; i
++)
101 cols
[i
] = src
->elems
[i
]->def
;
104 case 2: return build_mat2_det(&b
->nb
, cols
);
105 case 3: return build_mat3_det(&b
->nb
, cols
);
106 case 4: return build_mat4_det(&b
->nb
, cols
);
108 vtn_fail("Invalid matrix size");
112 /* Computes the determinate of the submatrix given by taking src and
113 * removing the specified row and column.
116 build_mat_subdet(struct nir_builder
*b
, struct vtn_ssa_value
*src
,
117 unsigned size
, unsigned row
, unsigned col
)
119 assert(row
< size
&& col
< size
);
121 return nir_channel(b
, src
->elems
[1 - col
]->def
, 1 - row
);
123 /* Swizzle to get all but the specified row */
124 unsigned swiz
[NIR_MAX_VEC_COMPONENTS
] = {0};
125 for (unsigned j
= 0; j
< 3; j
++)
126 swiz
[j
] = j
+ (j
>= row
);
128 /* Grab all but the specified column */
129 nir_ssa_def
*subcol
[3];
130 for (unsigned j
= 0; j
< size
; j
++) {
132 subcol
[j
- (j
> col
)] = nir_swizzle(b
, src
->elems
[j
]->def
,
138 return build_mat2_det(b
, subcol
);
141 return build_mat3_det(b
, subcol
);
146 static struct vtn_ssa_value
*
147 matrix_inverse(struct vtn_builder
*b
, struct vtn_ssa_value
*src
)
149 nir_ssa_def
*adj_col
[4];
150 unsigned size
= glsl_get_vector_elements(src
->type
);
152 /* Build up an adjugate matrix */
153 for (unsigned c
= 0; c
< size
; c
++) {
154 nir_ssa_def
*elem
[4];
155 for (unsigned r
= 0; r
< size
; r
++) {
156 elem
[r
] = build_mat_subdet(&b
->nb
, src
, size
, c
, r
);
159 elem
[r
] = nir_fneg(&b
->nb
, elem
[r
]);
162 adj_col
[c
] = nir_vec(&b
->nb
, elem
, size
);
165 nir_ssa_def
*det_inv
= nir_frcp(&b
->nb
, build_mat_det(b
, src
));
167 struct vtn_ssa_value
*val
= vtn_create_ssa_value(b
, src
->type
);
168 for (unsigned i
= 0; i
< size
; i
++)
169 val
->elems
[i
]->def
= nir_fmul(&b
->nb
, adj_col
[i
], det_inv
);
175 * Approximate asin(x) by the piecewise formula:
176 * for |x| < 0.5, asin~(x) = x * (1 + x²(pS0 + x²(pS1 + x²*pS2)) / (1 + x²*qS1))
177 * for |x| ≥ 0.5, asin~(x) = sign(x) * (π/2 - sqrt(1 - |x|) * (π/2 + |x|(π/4 - 1 + |x|(p0 + |x|p1))))
179 * The latter is correct to first order at x=0 and x=±1 regardless of the p
180 * coefficients but can be made second-order correct at both ends by selecting
181 * the fit coefficients appropriately. Different p coefficients can be used
182 * in the asin and acos implementation to minimize some relative error metric
186 build_asin(nir_builder
*b
, nir_ssa_def
*x
, float p0
, float p1
, bool piecewise
)
188 if (x
->bit_size
== 16) {
189 /* The polynomial approximation isn't precise enough to meet half-float
190 * precision requirements. Alternatively, we could implement this using
193 * asin(x) = atan2(x, sqrt(1 - x*x))
195 * But that is very expensive, so instead we just do the polynomial
196 * approximation in 32-bit math and then we convert the result back to
199 return nir_f2f16(b
, build_asin(b
, nir_f2f32(b
, x
), p0
, p1
, piecewise
));
201 nir_ssa_def
*one
= nir_imm_floatN_t(b
, 1.0f
, x
->bit_size
);
202 nir_ssa_def
*half
= nir_imm_floatN_t(b
, 0.5f
, x
->bit_size
);
203 nir_ssa_def
*abs_x
= nir_fabs(b
, x
);
205 nir_ssa_def
*p0_plus_xp1
= nir_fadd_imm(b
, nir_fmul_imm(b
, abs_x
, p1
), p0
);
207 nir_ssa_def
*expr_tail
=
208 nir_fadd_imm(b
, nir_fmul(b
, abs_x
,
209 nir_fadd_imm(b
, nir_fmul(b
, abs_x
,
214 nir_ssa_def
*result0
= nir_fmul(b
, nir_fsign(b
, x
),
215 nir_fsub(b
, nir_imm_floatN_t(b
, M_PI_2f
, x
->bit_size
),
216 nir_fmul(b
, nir_fsqrt(b
, nir_fsub(b
, one
, abs_x
)),
219 /* approximation for |x| < 0.5 */
220 const float pS0
= 1.6666586697e-01f
;
221 const float pS1
= -4.2743422091e-02f
;
222 const float pS2
= -8.6563630030e-03f
;
223 const float qS1
= -7.0662963390e-01f
;
225 nir_ssa_def
*x2
= nir_fmul(b
, x
, x
);
226 nir_ssa_def
*p
= nir_fmul(b
,
231 nir_fadd_imm(b
, nir_fmul_imm(b
, x2
, pS2
),
235 nir_ssa_def
*q
= nir_fadd(b
, one
, nir_fmul_imm(b
, x2
, qS1
));
236 nir_ssa_def
*result1
= nir_fadd(b
, x
, nir_fmul(b
, x
, nir_fdiv(b
, p
, q
)));
237 return nir_bcsel(b
, nir_flt(b
, abs_x
, half
), result1
, result0
);
244 vtn_nir_alu_op_for_spirv_glsl_opcode(struct vtn_builder
*b
,
245 enum GLSLstd450 opcode
,
246 unsigned execution_mode
,
251 case GLSLstd450Round
: return nir_op_fround_even
;
252 case GLSLstd450RoundEven
: return nir_op_fround_even
;
253 case GLSLstd450Trunc
: return nir_op_ftrunc
;
254 case GLSLstd450FAbs
: return nir_op_fabs
;
255 case GLSLstd450SAbs
: return nir_op_iabs
;
256 case GLSLstd450FSign
: return nir_op_fsign
;
257 case GLSLstd450SSign
: return nir_op_isign
;
258 case GLSLstd450Floor
: return nir_op_ffloor
;
259 case GLSLstd450Ceil
: return nir_op_fceil
;
260 case GLSLstd450Fract
: return nir_op_ffract
;
261 case GLSLstd450Sin
: return nir_op_fsin
;
262 case GLSLstd450Cos
: return nir_op_fcos
;
263 case GLSLstd450Pow
: return nir_op_fpow
;
264 case GLSLstd450Exp2
: return nir_op_fexp2
;
265 case GLSLstd450Log2
: return nir_op_flog2
;
266 case GLSLstd450Sqrt
: return nir_op_fsqrt
;
267 case GLSLstd450InverseSqrt
: return nir_op_frsq
;
268 case GLSLstd450NMin
: *exact
= true; return nir_op_fmin
;
269 case GLSLstd450FMin
: return nir_op_fmin
;
270 case GLSLstd450UMin
: return nir_op_umin
;
271 case GLSLstd450SMin
: return nir_op_imin
;
272 case GLSLstd450NMax
: *exact
= true; return nir_op_fmax
;
273 case GLSLstd450FMax
: return nir_op_fmax
;
274 case GLSLstd450UMax
: return nir_op_umax
;
275 case GLSLstd450SMax
: return nir_op_imax
;
276 case GLSLstd450FMix
: return nir_op_flrp
;
277 case GLSLstd450Fma
: return nir_op_ffma
;
278 case GLSLstd450Ldexp
: return nir_op_ldexp
;
279 case GLSLstd450FindILsb
: return nir_op_find_lsb
;
280 case GLSLstd450FindSMsb
: return nir_op_ifind_msb
;
281 case GLSLstd450FindUMsb
: return nir_op_ufind_msb
;
283 /* Packing/Unpacking functions */
284 case GLSLstd450PackSnorm4x8
: return nir_op_pack_snorm_4x8
;
285 case GLSLstd450PackUnorm4x8
: return nir_op_pack_unorm_4x8
;
286 case GLSLstd450PackSnorm2x16
: return nir_op_pack_snorm_2x16
;
287 case GLSLstd450PackUnorm2x16
: return nir_op_pack_unorm_2x16
;
288 case GLSLstd450PackHalf2x16
: return nir_op_pack_half_2x16
;
289 case GLSLstd450PackDouble2x32
: return nir_op_pack_64_2x32
;
290 case GLSLstd450UnpackSnorm4x8
: return nir_op_unpack_snorm_4x8
;
291 case GLSLstd450UnpackUnorm4x8
: return nir_op_unpack_unorm_4x8
;
292 case GLSLstd450UnpackSnorm2x16
: return nir_op_unpack_snorm_2x16
;
293 case GLSLstd450UnpackUnorm2x16
: return nir_op_unpack_unorm_2x16
;
294 case GLSLstd450UnpackHalf2x16
:
295 if (execution_mode
& FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16
)
296 return nir_op_unpack_half_2x16_flush_to_zero
;
298 return nir_op_unpack_half_2x16
;
299 case GLSLstd450UnpackDouble2x32
: return nir_op_unpack_64_2x32
;
302 vtn_fail("No NIR equivalent");
306 #define NIR_IMM_FP(n, v) (nir_imm_floatN_t(n, v, src[0]->bit_size))
309 handle_glsl450_alu(struct vtn_builder
*b
, enum GLSLstd450 entrypoint
,
310 const uint32_t *w
, unsigned count
)
312 struct nir_builder
*nb
= &b
->nb
;
313 const struct glsl_type
*dest_type
= vtn_get_type(b
, w
[1])->type
;
315 /* Collect the various SSA sources */
316 unsigned num_inputs
= count
- 5;
317 nir_ssa_def
*src
[3] = { NULL
, };
318 for (unsigned i
= 0; i
< num_inputs
; i
++) {
319 /* These are handled specially below */
320 if (vtn_untyped_value(b
, w
[i
+ 5])->value_type
== vtn_value_type_pointer
)
323 src
[i
] = vtn_get_nir_ssa(b
, w
[i
+ 5]);
326 struct vtn_ssa_value
*dest
= vtn_create_ssa_value(b
, dest_type
);
327 switch (entrypoint
) {
328 case GLSLstd450Radians
:
329 dest
->def
= nir_radians(nb
, src
[0]);
331 case GLSLstd450Degrees
:
332 dest
->def
= nir_degrees(nb
, src
[0]);
335 dest
->def
= nir_ftan(nb
, src
[0]);
338 case GLSLstd450Modf
: {
339 nir_ssa_def
*sign
= nir_fsign(nb
, src
[0]);
340 nir_ssa_def
*abs
= nir_fabs(nb
, src
[0]);
341 dest
->def
= nir_fmul(nb
, sign
, nir_ffract(nb
, abs
));
342 nir_store_deref(nb
, vtn_nir_deref(b
, w
[6]),
343 nir_fmul(nb
, sign
, nir_ffloor(nb
, abs
)), 0xf);
347 case GLSLstd450ModfStruct
: {
348 nir_ssa_def
*sign
= nir_fsign(nb
, src
[0]);
349 nir_ssa_def
*abs
= nir_fabs(nb
, src
[0]);
350 vtn_assert(glsl_type_is_struct_or_ifc(dest_type
));
351 dest
->elems
[0]->def
= nir_fmul(nb
, sign
, nir_ffract(nb
, abs
));
352 dest
->elems
[1]->def
= nir_fmul(nb
, sign
, nir_ffloor(nb
, abs
));
357 dest
->def
= nir_sge(nb
, src
[1], src
[0]);
360 case GLSLstd450Length
:
361 dest
->def
= nir_fast_length(nb
, src
[0]);
363 case GLSLstd450Distance
:
364 dest
->def
= nir_fast_distance(nb
, src
[0], src
[1]);
366 case GLSLstd450Normalize
:
367 dest
->def
= nir_fast_normalize(nb
, src
[0]);
371 dest
->def
= nir_fexp(nb
, src
[0]);
375 dest
->def
= nir_flog(nb
, src
[0]);
378 case GLSLstd450FClamp
:
379 dest
->def
= nir_fclamp(nb
, src
[0], src
[1], src
[2]);
381 case GLSLstd450NClamp
:
383 dest
->def
= nir_fclamp(nb
, src
[0], src
[1], src
[2]);
386 case GLSLstd450UClamp
:
387 dest
->def
= nir_uclamp(nb
, src
[0], src
[1], src
[2]);
389 case GLSLstd450SClamp
:
390 dest
->def
= nir_iclamp(nb
, src
[0], src
[1], src
[2]);
393 case GLSLstd450Cross
: {
394 dest
->def
= nir_cross3(nb
, src
[0], src
[1]);
398 case GLSLstd450SmoothStep
: {
399 dest
->def
= nir_smoothstep(nb
, src
[0], src
[1], src
[2]);
403 case GLSLstd450FaceForward
:
405 nir_bcsel(nb
, nir_flt(nb
, nir_fdot(nb
, src
[2], src
[1]),
406 NIR_IMM_FP(nb
, 0.0)),
407 src
[0], nir_fneg(nb
, src
[0]));
410 case GLSLstd450Reflect
:
411 /* I - 2 * dot(N, I) * N */
413 nir_fsub(nb
, src
[0], nir_fmul(nb
, NIR_IMM_FP(nb
, 2.0),
414 nir_fmul(nb
, nir_fdot(nb
, src
[0], src
[1]),
418 case GLSLstd450Refract
: {
419 nir_ssa_def
*I
= src
[0];
420 nir_ssa_def
*N
= src
[1];
421 nir_ssa_def
*eta
= src
[2];
422 nir_ssa_def
*n_dot_i
= nir_fdot(nb
, N
, I
);
423 nir_ssa_def
*one
= NIR_IMM_FP(nb
, 1.0);
424 nir_ssa_def
*zero
= NIR_IMM_FP(nb
, 0.0);
425 /* According to the SPIR-V and GLSL specs, eta is always a float
426 * regardless of the type of the other operands. However in practice it
427 * seems that if you try to pass it a float then glslang will just
428 * promote it to a double and generate invalid SPIR-V. In order to
429 * support a hypothetical fixed version of glslang we’ll promote eta to
430 * double if the other operands are double also.
432 if (I
->bit_size
!= eta
->bit_size
) {
433 nir_op conversion_op
=
434 nir_type_conversion_op(nir_type_float
| eta
->bit_size
,
435 nir_type_float
| I
->bit_size
,
436 nir_rounding_mode_undef
);
437 eta
= nir_build_alu(nb
, conversion_op
, eta
, NULL
, NULL
, NULL
);
439 /* k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I)) */
441 nir_fsub(nb
, one
, nir_fmul(nb
, eta
, nir_fmul(nb
, eta
,
442 nir_fsub(nb
, one
, nir_fmul(nb
, n_dot_i
, n_dot_i
)))));
443 nir_ssa_def
*result
=
444 nir_fsub(nb
, nir_fmul(nb
, eta
, I
),
445 nir_fmul(nb
, nir_fadd(nb
, nir_fmul(nb
, eta
, n_dot_i
),
446 nir_fsqrt(nb
, k
)), N
));
447 /* XXX: bcsel, or if statement? */
448 dest
->def
= nir_bcsel(nb
, nir_flt(nb
, k
, zero
), zero
, result
);
453 /* 0.5 * (e^x - e^(-x)) */
455 nir_fmul_imm(nb
, nir_fsub(nb
, nir_fexp(nb
, src
[0]),
456 nir_fexp(nb
, nir_fneg(nb
, src
[0]))),
461 /* 0.5 * (e^x + e^(-x)) */
463 nir_fmul_imm(nb
, nir_fadd(nb
, nir_fexp(nb
, src
[0]),
464 nir_fexp(nb
, nir_fneg(nb
, src
[0]))),
468 case GLSLstd450Tanh
: {
469 /* tanh(x) := (e^x - e^(-x)) / (e^x + e^(-x))
471 * We clamp x to [-10, +10] to avoid precision problems. When x > 10,
472 * e^x dominates the sum, e^(-x) is lost and tanh(x) is 1.0 for 32 bit
475 * For 16-bit precision this we clamp x to [-4.2, +4.2].
477 const uint32_t bit_size
= src
[0]->bit_size
;
478 const double clamped_x
= bit_size
> 16 ? 10.0 : 4.2;
479 nir_ssa_def
*x
= nir_fclamp(nb
, src
[0],
480 nir_imm_floatN_t(nb
, -clamped_x
, bit_size
),
481 nir_imm_floatN_t(nb
, clamped_x
, bit_size
));
483 nir_fdiv(nb
, nir_fsub(nb
, nir_fexp(nb
, x
),
484 nir_fexp(nb
, nir_fneg(nb
, x
))),
485 nir_fadd(nb
, nir_fexp(nb
, x
),
486 nir_fexp(nb
, nir_fneg(nb
, x
))));
490 case GLSLstd450Asinh
:
491 dest
->def
= nir_fmul(nb
, nir_fsign(nb
, src
[0]),
492 nir_flog(nb
, nir_fadd(nb
, nir_fabs(nb
, src
[0]),
493 nir_fsqrt(nb
, nir_fadd_imm(nb
, nir_fmul(nb
, src
[0], src
[0]),
496 case GLSLstd450Acosh
:
497 dest
->def
= nir_flog(nb
, nir_fadd(nb
, src
[0],
498 nir_fsqrt(nb
, nir_fadd_imm(nb
, nir_fmul(nb
, src
[0], src
[0]),
501 case GLSLstd450Atanh
: {
502 nir_ssa_def
*one
= nir_imm_floatN_t(nb
, 1.0, src
[0]->bit_size
);
504 nir_fmul_imm(nb
, nir_flog(nb
, nir_fdiv(nb
, nir_fadd(nb
, src
[0], one
),
505 nir_fsub(nb
, one
, src
[0]))),
511 dest
->def
= build_asin(nb
, src
[0], 0.086566724, -0.03102955, true);
516 nir_fsub(nb
, nir_imm_floatN_t(nb
, M_PI_2f
, src
[0]->bit_size
),
517 build_asin(nb
, src
[0], 0.08132463, -0.02363318, false));
521 dest
->def
= nir_atan(nb
, src
[0]);
524 case GLSLstd450Atan2
:
525 dest
->def
= nir_atan2(nb
, src
[0], src
[1]);
528 case GLSLstd450Frexp
: {
529 nir_ssa_def
*exponent
= nir_frexp_exp(nb
, src
[0]);
530 dest
->def
= nir_frexp_sig(nb
, src
[0]);
531 nir_store_deref(nb
, vtn_nir_deref(b
, w
[6]), exponent
, 0xf);
535 case GLSLstd450FrexpStruct
: {
536 vtn_assert(glsl_type_is_struct_or_ifc(dest_type
));
537 dest
->elems
[0]->def
= nir_frexp_sig(nb
, src
[0]);
538 dest
->elems
[1]->def
= nir_frexp_exp(nb
, src
[0]);
543 unsigned execution_mode
=
544 b
->shader
->info
.float_controls_execution_mode
;
546 nir_op op
= vtn_nir_alu_op_for_spirv_glsl_opcode(b
, entrypoint
, execution_mode
, &exact
);
548 dest
->def
= nir_build_alu(&b
->nb
, op
, src
[0], src
[1], src
[2], NULL
);
554 vtn_push_ssa_value(b
, w
[2], dest
);
558 handle_glsl450_interpolation(struct vtn_builder
*b
, enum GLSLstd450 opcode
,
559 const uint32_t *w
, unsigned count
)
563 case GLSLstd450InterpolateAtCentroid
:
564 op
= nir_intrinsic_interp_deref_at_centroid
;
566 case GLSLstd450InterpolateAtSample
:
567 op
= nir_intrinsic_interp_deref_at_sample
;
569 case GLSLstd450InterpolateAtOffset
:
570 op
= nir_intrinsic_interp_deref_at_offset
;
573 vtn_fail("Invalid opcode");
576 nir_intrinsic_instr
*intrin
= nir_intrinsic_instr_create(b
->nb
.shader
, op
);
578 struct vtn_pointer
*ptr
=
579 vtn_value(b
, w
[5], vtn_value_type_pointer
)->pointer
;
580 nir_deref_instr
*deref
= vtn_pointer_to_deref(b
, ptr
);
582 /* If the value we are interpolating has an index into a vector then
583 * interpolate the vector and index the result of that instead. This is
584 * necessary because the index will get generated as a series of nir_bcsel
585 * instructions so it would no longer be an input variable.
587 const bool vec_array_deref
= deref
->deref_type
== nir_deref_type_array
&&
588 glsl_type_is_vector(nir_deref_instr_parent(deref
)->type
);
590 nir_deref_instr
*vec_deref
= NULL
;
591 if (vec_array_deref
) {
593 deref
= nir_deref_instr_parent(deref
);
595 intrin
->src
[0] = nir_src_for_ssa(&deref
->dest
.ssa
);
598 case GLSLstd450InterpolateAtCentroid
:
600 case GLSLstd450InterpolateAtSample
:
601 case GLSLstd450InterpolateAtOffset
:
602 intrin
->src
[1] = nir_src_for_ssa(vtn_get_nir_ssa(b
, w
[6]));
605 vtn_fail("Invalid opcode");
608 intrin
->num_components
= glsl_get_vector_elements(deref
->type
);
609 nir_ssa_dest_init(&intrin
->instr
, &intrin
->dest
,
610 glsl_get_vector_elements(deref
->type
),
611 glsl_get_bit_size(deref
->type
), NULL
);
613 nir_builder_instr_insert(&b
->nb
, &intrin
->instr
);
615 nir_ssa_def
*def
= &intrin
->dest
.ssa
;
617 def
= nir_vector_extract(&b
->nb
, def
, vec_deref
->arr
.index
.ssa
);
619 vtn_push_nir_ssa(b
, w
[2], def
);
623 vtn_handle_glsl450_instruction(struct vtn_builder
*b
, SpvOp ext_opcode
,
624 const uint32_t *w
, unsigned count
)
626 switch ((enum GLSLstd450
)ext_opcode
) {
627 case GLSLstd450Determinant
: {
628 vtn_push_nir_ssa(b
, w
[2], build_mat_det(b
, vtn_ssa_value(b
, w
[5])));
632 case GLSLstd450MatrixInverse
: {
633 vtn_push_ssa_value(b
, w
[2], matrix_inverse(b
, vtn_ssa_value(b
, w
[5])));
637 case GLSLstd450InterpolateAtCentroid
:
638 case GLSLstd450InterpolateAtSample
:
639 case GLSLstd450InterpolateAtOffset
:
640 handle_glsl450_interpolation(b
, (enum GLSLstd450
)ext_opcode
, w
, count
);
644 handle_glsl450_alu(b
, (enum GLSLstd450
)ext_opcode
, w
, count
);