2 * Copyright © 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "nir_builder.h"
28 * \file nir_opt_intrinsics.c
31 static nir_intrinsic_instr
*
32 lower_subgroups_64bit_split_intrinsic(nir_builder
*b
, nir_intrinsic_instr
*intrin
,
33 unsigned int component
)
37 comp
= nir_unpack_64_2x32_split_x(b
, intrin
->src
[0].ssa
);
39 comp
= nir_unpack_64_2x32_split_y(b
, intrin
->src
[0].ssa
);
41 nir_intrinsic_instr
*intr
= nir_intrinsic_instr_create(b
->shader
, intrin
->intrinsic
);
42 nir_ssa_dest_init(&intr
->instr
, &intr
->dest
, 1, 32, NULL
);
43 intr
->const_index
[0] = intrin
->const_index
[0];
44 intr
->const_index
[1] = intrin
->const_index
[1];
45 intr
->src
[0] = nir_src_for_ssa(comp
);
46 if (nir_intrinsic_infos
[intrin
->intrinsic
].num_srcs
== 2)
47 nir_src_copy(&intr
->src
[1], &intrin
->src
[1], intr
);
49 intr
->num_components
= 1;
50 nir_builder_instr_insert(b
, &intr
->instr
);
55 lower_subgroup_op_to_32bit(nir_builder
*b
, nir_intrinsic_instr
*intrin
)
57 assert(intrin
->src
[0].ssa
->bit_size
== 64);
58 nir_intrinsic_instr
*intr_x
= lower_subgroups_64bit_split_intrinsic(b
, intrin
, 0);
59 nir_intrinsic_instr
*intr_y
= lower_subgroups_64bit_split_intrinsic(b
, intrin
, 1);
60 return nir_pack_64_2x32_split(b
, &intr_x
->dest
.ssa
, &intr_y
->dest
.ssa
);
64 ballot_type_to_uint(nir_builder
*b
, nir_ssa_def
*value
, unsigned bit_size
)
66 /* We only use this on uvec4 types */
67 assert(value
->num_components
== 4 && value
->bit_size
== 32);
70 return nir_channel(b
, value
, 0);
72 assert(bit_size
== 64);
73 return nir_pack_64_2x32_split(b
, nir_channel(b
, value
, 0),
74 nir_channel(b
, value
, 1));
78 /* Converts a uint32_t or uint64_t value to uint64_t or uvec4 */
80 uint_to_ballot_type(nir_builder
*b
, nir_ssa_def
*value
,
81 unsigned num_components
, unsigned bit_size
)
83 assert(value
->num_components
== 1);
84 assert(value
->bit_size
== 32 || value
->bit_size
== 64);
86 nir_ssa_def
*zero
= nir_imm_int(b
, 0);
87 if (num_components
> 1) {
88 /* SPIR-V uses a uvec4 for ballot values */
89 assert(num_components
== 4);
90 assert(bit_size
== 32);
92 if (value
->bit_size
== 32) {
93 return nir_vec4(b
, value
, zero
, zero
, zero
);
95 assert(value
->bit_size
== 64);
96 return nir_vec4(b
, nir_unpack_64_2x32_split_x(b
, value
),
97 nir_unpack_64_2x32_split_y(b
, value
),
101 /* GLSL uses a uint64_t for ballot values */
102 assert(num_components
== 1);
103 assert(bit_size
== 64);
105 if (value
->bit_size
== 32) {
106 return nir_pack_64_2x32_split(b
, value
, zero
);
108 assert(value
->bit_size
== 64);
115 lower_subgroup_op_to_scalar(nir_builder
*b
, nir_intrinsic_instr
*intrin
,
118 /* This is safe to call on scalar things but it would be silly */
119 assert(intrin
->dest
.ssa
.num_components
> 1);
121 nir_ssa_def
*value
= nir_ssa_for_src(b
, intrin
->src
[0],
122 intrin
->num_components
);
123 nir_ssa_def
*reads
[4];
125 for (unsigned i
= 0; i
< intrin
->num_components
; i
++) {
126 nir_intrinsic_instr
*chan_intrin
=
127 nir_intrinsic_instr_create(b
->shader
, intrin
->intrinsic
);
128 nir_ssa_dest_init(&chan_intrin
->instr
, &chan_intrin
->dest
,
129 1, intrin
->dest
.ssa
.bit_size
, NULL
);
130 chan_intrin
->num_components
= 1;
133 chan_intrin
->src
[0] = nir_src_for_ssa(nir_channel(b
, value
, i
));
135 if (nir_intrinsic_infos
[intrin
->intrinsic
].num_srcs
> 1) {
136 assert(nir_intrinsic_infos
[intrin
->intrinsic
].num_srcs
== 2);
137 nir_src_copy(&chan_intrin
->src
[1], &intrin
->src
[1], chan_intrin
);
140 chan_intrin
->const_index
[0] = intrin
->const_index
[0];
141 chan_intrin
->const_index
[1] = intrin
->const_index
[1];
143 if (lower_to_32bit
&& chan_intrin
->src
[0].ssa
->bit_size
== 64) {
144 reads
[i
] = lower_subgroup_op_to_32bit(b
, chan_intrin
);
146 nir_builder_instr_insert(b
, &chan_intrin
->instr
);
147 reads
[i
] = &chan_intrin
->dest
.ssa
;
151 return nir_vec(b
, reads
, intrin
->num_components
);
155 lower_vote_eq_to_scalar(nir_builder
*b
, nir_intrinsic_instr
*intrin
)
157 assert(intrin
->src
[0].is_ssa
);
158 nir_ssa_def
*value
= intrin
->src
[0].ssa
;
160 nir_ssa_def
*result
= NULL
;
161 for (unsigned i
= 0; i
< intrin
->num_components
; i
++) {
162 nir_intrinsic_instr
*chan_intrin
=
163 nir_intrinsic_instr_create(b
->shader
, intrin
->intrinsic
);
164 nir_ssa_dest_init(&chan_intrin
->instr
, &chan_intrin
->dest
,
165 1, intrin
->dest
.ssa
.bit_size
, NULL
);
166 chan_intrin
->num_components
= 1;
167 chan_intrin
->src
[0] = nir_src_for_ssa(nir_channel(b
, value
, i
));
168 nir_builder_instr_insert(b
, &chan_intrin
->instr
);
171 result
= nir_iand(b
, result
, &chan_intrin
->dest
.ssa
);
173 result
= &chan_intrin
->dest
.ssa
;
181 lower_vote_eq_to_ballot(nir_builder
*b
, nir_intrinsic_instr
*intrin
,
182 const nir_lower_subgroups_options
*options
)
184 assert(intrin
->src
[0].is_ssa
);
185 nir_ssa_def
*value
= intrin
->src
[0].ssa
;
187 /* We have to implicitly lower to scalar */
188 nir_ssa_def
*all_eq
= NULL
;
189 for (unsigned i
= 0; i
< intrin
->num_components
; i
++) {
190 nir_intrinsic_instr
*rfi
=
191 nir_intrinsic_instr_create(b
->shader
,
192 nir_intrinsic_read_first_invocation
);
193 nir_ssa_dest_init(&rfi
->instr
, &rfi
->dest
,
194 1, value
->bit_size
, NULL
);
195 rfi
->num_components
= 1;
196 rfi
->src
[0] = nir_src_for_ssa(nir_channel(b
, value
, i
));
197 nir_builder_instr_insert(b
, &rfi
->instr
);
200 if (intrin
->intrinsic
== nir_intrinsic_vote_feq
) {
201 is_eq
= nir_feq(b
, &rfi
->dest
.ssa
, nir_channel(b
, value
, i
));
203 is_eq
= nir_ieq(b
, &rfi
->dest
.ssa
, nir_channel(b
, value
, i
));
206 if (all_eq
== NULL
) {
209 all_eq
= nir_iand(b
, all_eq
, is_eq
);
213 nir_intrinsic_instr
*ballot
=
214 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_ballot
);
215 nir_ssa_dest_init(&ballot
->instr
, &ballot
->dest
,
216 1, options
->ballot_bit_size
, NULL
);
217 ballot
->num_components
= 1;
218 ballot
->src
[0] = nir_src_for_ssa(nir_inot(b
, all_eq
));
219 nir_builder_instr_insert(b
, &ballot
->instr
);
221 return nir_ieq(b
, &ballot
->dest
.ssa
,
222 nir_imm_intN_t(b
, 0, options
->ballot_bit_size
));
226 lower_shuffle_to_swizzle(nir_builder
*b
, nir_intrinsic_instr
*intrin
,
227 const nir_lower_subgroups_options
*options
)
229 unsigned mask
= nir_src_as_uint(intrin
->src
[1]);
234 nir_intrinsic_instr
*swizzle
= nir_intrinsic_instr_create(
235 b
->shader
, nir_intrinsic_masked_swizzle_amd
);
236 swizzle
->num_components
= intrin
->num_components
;
237 nir_src_copy(&swizzle
->src
[0], &intrin
->src
[0], swizzle
);
238 nir_intrinsic_set_swizzle_mask(swizzle
, (mask
<< 10) | 0x1f);
239 nir_ssa_dest_init(&swizzle
->instr
, &swizzle
->dest
,
240 intrin
->dest
.ssa
.num_components
,
241 intrin
->dest
.ssa
.bit_size
, NULL
);
243 if (options
->lower_to_scalar
&& swizzle
->num_components
> 1) {
244 return lower_subgroup_op_to_scalar(b
, swizzle
, options
->lower_shuffle_to_32bit
);
245 } else if (options
->lower_shuffle_to_32bit
&& swizzle
->src
[0].ssa
->bit_size
== 64) {
246 return lower_subgroup_op_to_32bit(b
, swizzle
);
248 nir_builder_instr_insert(b
, &swizzle
->instr
);
249 return &swizzle
->dest
.ssa
;
254 lower_shuffle(nir_builder
*b
, nir_intrinsic_instr
*intrin
,
255 const nir_lower_subgroups_options
*options
)
257 if (intrin
->intrinsic
== nir_intrinsic_shuffle_xor
&&
258 options
->lower_shuffle_to_swizzle_amd
&&
259 nir_src_is_const(intrin
->src
[1])) {
260 nir_ssa_def
*result
=
261 lower_shuffle_to_swizzle(b
, intrin
, options
);
266 nir_ssa_def
*index
= nir_load_subgroup_invocation(b
);
267 bool is_shuffle
= false;
268 switch (intrin
->intrinsic
) {
269 case nir_intrinsic_shuffle_xor
:
270 assert(intrin
->src
[1].is_ssa
);
271 index
= nir_ixor(b
, index
, intrin
->src
[1].ssa
);
274 case nir_intrinsic_shuffle_up
:
275 assert(intrin
->src
[1].is_ssa
);
276 index
= nir_isub(b
, index
, intrin
->src
[1].ssa
);
279 case nir_intrinsic_shuffle_down
:
280 assert(intrin
->src
[1].is_ssa
);
281 index
= nir_iadd(b
, index
, intrin
->src
[1].ssa
);
284 case nir_intrinsic_quad_broadcast
:
285 assert(intrin
->src
[1].is_ssa
);
286 index
= nir_ior(b
, nir_iand(b
, index
, nir_imm_int(b
, ~0x3)),
289 case nir_intrinsic_quad_swap_horizontal
:
290 /* For Quad operations, subgroups are divided into quads where
291 * (invocation % 4) is the index to a square arranged as follows:
299 index
= nir_ixor(b
, index
, nir_imm_int(b
, 0x1));
301 case nir_intrinsic_quad_swap_vertical
:
302 index
= nir_ixor(b
, index
, nir_imm_int(b
, 0x2));
304 case nir_intrinsic_quad_swap_diagonal
:
305 index
= nir_ixor(b
, index
, nir_imm_int(b
, 0x3));
308 unreachable("Invalid intrinsic");
311 nir_intrinsic_instr
*shuffle
=
312 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_shuffle
);
313 shuffle
->num_components
= intrin
->num_components
;
314 nir_src_copy(&shuffle
->src
[0], &intrin
->src
[0], shuffle
);
315 shuffle
->src
[1] = nir_src_for_ssa(index
);
316 nir_ssa_dest_init(&shuffle
->instr
, &shuffle
->dest
,
317 intrin
->dest
.ssa
.num_components
,
318 intrin
->dest
.ssa
.bit_size
, NULL
);
320 bool lower_to_32bit
= options
->lower_shuffle_to_32bit
&& is_shuffle
;
321 if (options
->lower_to_scalar
&& shuffle
->num_components
> 1) {
322 return lower_subgroup_op_to_scalar(b
, shuffle
, lower_to_32bit
);
323 } else if (lower_to_32bit
&& shuffle
->src
[0].ssa
->bit_size
== 64) {
324 return lower_subgroup_op_to_32bit(b
, shuffle
);
326 nir_builder_instr_insert(b
, &shuffle
->instr
);
327 return &shuffle
->dest
.ssa
;
332 lower_subgroups_filter(const nir_instr
*instr
, const void *_options
)
334 return instr
->type
== nir_instr_type_intrinsic
;
338 build_subgroup_mask(nir_builder
*b
, unsigned bit_size
,
339 const nir_lower_subgroups_options
*options
)
341 return nir_ushr(b
, nir_imm_intN_t(b
, ~0ull, bit_size
),
342 nir_isub(b
, nir_imm_int(b
, bit_size
),
343 nir_load_subgroup_size(b
)));
347 lower_dynamic_quad_broadcast(nir_builder
*b
, nir_intrinsic_instr
*intrin
,
348 const nir_lower_subgroups_options
*options
)
350 if (!options
->lower_quad_broadcast_dynamic_to_const
)
351 return lower_shuffle(b
, intrin
, options
);
353 nir_ssa_def
*dst
= NULL
;
355 for (unsigned i
= 0; i
< 4; ++i
) {
356 nir_intrinsic_instr
*qbcst
=
357 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_quad_broadcast
);
359 qbcst
->num_components
= intrin
->num_components
;
360 qbcst
->src
[1] = nir_src_for_ssa(nir_imm_int(b
, i
));
361 nir_src_copy(&qbcst
->src
[0], &intrin
->src
[0], qbcst
);
362 nir_ssa_dest_init(&qbcst
->instr
, &qbcst
->dest
,
363 intrin
->dest
.ssa
.num_components
,
364 intrin
->dest
.ssa
.bit_size
, NULL
);
366 nir_ssa_def
*qbcst_dst
= NULL
;
368 if (options
->lower_to_scalar
&& qbcst
->num_components
> 1) {
369 qbcst_dst
= lower_subgroup_op_to_scalar(b
, qbcst
, false);
371 nir_builder_instr_insert(b
, &qbcst
->instr
);
372 qbcst_dst
= &qbcst
->dest
.ssa
;
376 dst
= nir_bcsel(b
, nir_ieq(b
, intrin
->src
[1].ssa
,
377 nir_src_for_ssa(nir_imm_int(b
, i
)).ssa
),
387 lower_subgroups_instr(nir_builder
*b
, nir_instr
*instr
, void *_options
)
389 const nir_lower_subgroups_options
*options
= _options
;
391 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
392 switch (intrin
->intrinsic
) {
393 case nir_intrinsic_vote_any
:
394 case nir_intrinsic_vote_all
:
395 if (options
->lower_vote_trivial
)
396 return nir_ssa_for_src(b
, intrin
->src
[0], 1);
399 case nir_intrinsic_vote_feq
:
400 case nir_intrinsic_vote_ieq
:
401 if (options
->lower_vote_trivial
)
402 return nir_imm_true(b
);
404 if (options
->lower_vote_eq_to_ballot
)
405 return lower_vote_eq_to_ballot(b
, intrin
, options
);
407 if (options
->lower_to_scalar
&& intrin
->num_components
> 1)
408 return lower_vote_eq_to_scalar(b
, intrin
);
411 case nir_intrinsic_load_subgroup_size
:
412 if (options
->subgroup_size
)
413 return nir_imm_int(b
, options
->subgroup_size
);
416 case nir_intrinsic_read_invocation
:
417 case nir_intrinsic_read_first_invocation
:
418 if (options
->lower_to_scalar
&& intrin
->num_components
> 1)
419 return lower_subgroup_op_to_scalar(b
, intrin
, false);
422 case nir_intrinsic_load_subgroup_eq_mask
:
423 case nir_intrinsic_load_subgroup_ge_mask
:
424 case nir_intrinsic_load_subgroup_gt_mask
:
425 case nir_intrinsic_load_subgroup_le_mask
:
426 case nir_intrinsic_load_subgroup_lt_mask
: {
427 if (!options
->lower_subgroup_masks
)
430 /* If either the result or the requested bit size is 64-bits then we
431 * know that we have 64-bit types and using them will probably be more
432 * efficient than messing around with 32-bit shifts and packing.
434 const unsigned bit_size
= MAX2(options
->ballot_bit_size
,
435 intrin
->dest
.ssa
.bit_size
);
437 nir_ssa_def
*count
= nir_load_subgroup_invocation(b
);
439 switch (intrin
->intrinsic
) {
440 case nir_intrinsic_load_subgroup_eq_mask
:
441 val
= nir_ishl(b
, nir_imm_intN_t(b
, 1ull, bit_size
), count
);
443 case nir_intrinsic_load_subgroup_ge_mask
:
444 val
= nir_iand(b
, nir_ishl(b
, nir_imm_intN_t(b
, ~0ull, bit_size
), count
),
445 build_subgroup_mask(b
, bit_size
, options
));
447 case nir_intrinsic_load_subgroup_gt_mask
:
448 val
= nir_iand(b
, nir_ishl(b
, nir_imm_intN_t(b
, ~1ull, bit_size
), count
),
449 build_subgroup_mask(b
, bit_size
, options
));
451 case nir_intrinsic_load_subgroup_le_mask
:
452 val
= nir_inot(b
, nir_ishl(b
, nir_imm_intN_t(b
, ~1ull, bit_size
), count
));
454 case nir_intrinsic_load_subgroup_lt_mask
:
455 val
= nir_inot(b
, nir_ishl(b
, nir_imm_intN_t(b
, ~0ull, bit_size
), count
));
458 unreachable("you seriously can't tell this is unreachable?");
461 return uint_to_ballot_type(b
, val
,
462 intrin
->dest
.ssa
.num_components
,
463 intrin
->dest
.ssa
.bit_size
);
466 case nir_intrinsic_ballot
: {
467 if (intrin
->dest
.ssa
.num_components
== 1 &&
468 intrin
->dest
.ssa
.bit_size
== options
->ballot_bit_size
)
471 nir_intrinsic_instr
*ballot
=
472 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_ballot
);
473 ballot
->num_components
= 1;
474 nir_ssa_dest_init(&ballot
->instr
, &ballot
->dest
,
475 1, options
->ballot_bit_size
, NULL
);
476 nir_src_copy(&ballot
->src
[0], &intrin
->src
[0], ballot
);
477 nir_builder_instr_insert(b
, &ballot
->instr
);
479 return uint_to_ballot_type(b
, &ballot
->dest
.ssa
,
480 intrin
->dest
.ssa
.num_components
,
481 intrin
->dest
.ssa
.bit_size
);
484 case nir_intrinsic_ballot_bitfield_extract
:
485 case nir_intrinsic_ballot_bit_count_reduce
:
486 case nir_intrinsic_ballot_find_lsb
:
487 case nir_intrinsic_ballot_find_msb
: {
488 assert(intrin
->src
[0].is_ssa
);
489 nir_ssa_def
*int_val
= ballot_type_to_uint(b
, intrin
->src
[0].ssa
,
490 options
->ballot_bit_size
);
492 if (intrin
->intrinsic
!= nir_intrinsic_ballot_bitfield_extract
&&
493 intrin
->intrinsic
!= nir_intrinsic_ballot_find_lsb
) {
494 /* For OpGroupNonUniformBallotFindMSB, the SPIR-V Spec says:
496 * "Find the most significant bit set to 1 in Value, considering
497 * only the bits in Value required to represent all bits of the
498 * group’s invocations. If none of the considered bits is set to
499 * 1, the result is undefined."
501 * It has similar text for the other three. This means that, in case
502 * the subgroup size is less than 32, we have to mask off the unused
503 * bits. If the subgroup size is fixed and greater than or equal to
504 * 32, the mask will be 0xffffffff and nir_opt_algebraic will delete
507 * We only have to worry about this for BitCount and FindMSB because
508 * FindLSB counts from the bottom and BitfieldExtract selects
509 * individual bits. In either case, if run outside the range of
510 * valid bits, we hit the undefined results case and we can return
513 int_val
= nir_iand(b
, int_val
,
514 build_subgroup_mask(b
, options
->ballot_bit_size
, options
));
517 switch (intrin
->intrinsic
) {
518 case nir_intrinsic_ballot_bitfield_extract
:
519 assert(intrin
->src
[1].is_ssa
);
520 return nir_i2b(b
, nir_iand(b
, nir_ushr(b
, int_val
,
522 nir_imm_intN_t(b
, 1, options
->ballot_bit_size
)));
523 case nir_intrinsic_ballot_bit_count_reduce
:
524 return nir_bit_count(b
, int_val
);
525 case nir_intrinsic_ballot_find_lsb
:
526 return nir_find_lsb(b
, int_val
);
527 case nir_intrinsic_ballot_find_msb
:
528 return nir_ufind_msb(b
, int_val
);
530 unreachable("you seriously can't tell this is unreachable?");
534 case nir_intrinsic_ballot_bit_count_exclusive
:
535 case nir_intrinsic_ballot_bit_count_inclusive
: {
536 nir_ssa_def
*count
= nir_load_subgroup_invocation(b
);
537 nir_ssa_def
*mask
= nir_imm_intN_t(b
, ~0ull, options
->ballot_bit_size
);
538 if (intrin
->intrinsic
== nir_intrinsic_ballot_bit_count_inclusive
) {
539 const unsigned bits
= options
->ballot_bit_size
;
540 mask
= nir_ushr(b
, mask
, nir_isub(b
, nir_imm_int(b
, bits
- 1), count
));
542 mask
= nir_inot(b
, nir_ishl(b
, mask
, count
));
545 assert(intrin
->src
[0].is_ssa
);
546 nir_ssa_def
*int_val
= ballot_type_to_uint(b
, intrin
->src
[0].ssa
,
547 options
->ballot_bit_size
);
549 return nir_bit_count(b
, nir_iand(b
, int_val
, mask
));
552 case nir_intrinsic_elect
: {
553 nir_intrinsic_instr
*first
=
554 nir_intrinsic_instr_create(b
->shader
,
555 nir_intrinsic_first_invocation
);
556 nir_ssa_dest_init(&first
->instr
, &first
->dest
, 1, 32, NULL
);
557 nir_builder_instr_insert(b
, &first
->instr
);
559 return nir_ieq(b
, nir_load_subgroup_invocation(b
), &first
->dest
.ssa
);
562 case nir_intrinsic_shuffle
:
563 if (options
->lower_to_scalar
&& intrin
->num_components
> 1)
564 return lower_subgroup_op_to_scalar(b
, intrin
, options
->lower_shuffle_to_32bit
);
565 else if (options
->lower_shuffle_to_32bit
&& intrin
->src
[0].ssa
->bit_size
== 64)
566 return lower_subgroup_op_to_32bit(b
, intrin
);
568 case nir_intrinsic_shuffle_xor
:
569 case nir_intrinsic_shuffle_up
:
570 case nir_intrinsic_shuffle_down
:
571 if (options
->lower_shuffle
)
572 return lower_shuffle(b
, intrin
, options
);
573 else if (options
->lower_to_scalar
&& intrin
->num_components
> 1)
574 return lower_subgroup_op_to_scalar(b
, intrin
, options
->lower_shuffle_to_32bit
);
575 else if (options
->lower_shuffle_to_32bit
&& intrin
->src
[0].ssa
->bit_size
== 64)
576 return lower_subgroup_op_to_32bit(b
, intrin
);
579 case nir_intrinsic_quad_broadcast
:
580 case nir_intrinsic_quad_swap_horizontal
:
581 case nir_intrinsic_quad_swap_vertical
:
582 case nir_intrinsic_quad_swap_diagonal
:
583 if (options
->lower_quad
||
584 (options
->lower_quad_broadcast_dynamic
&&
585 intrin
->intrinsic
== nir_intrinsic_quad_broadcast
&&
586 !nir_src_is_const(intrin
->src
[1])))
587 return lower_dynamic_quad_broadcast(b
, intrin
, options
);
588 else if (options
->lower_to_scalar
&& intrin
->num_components
> 1)
589 return lower_subgroup_op_to_scalar(b
, intrin
, false);
592 case nir_intrinsic_reduce
: {
593 nir_ssa_def
*ret
= NULL
;
594 /* A cluster size greater than the subgroup size is implemention defined */
595 if (options
->subgroup_size
&&
596 nir_intrinsic_cluster_size(intrin
) >= options
->subgroup_size
) {
597 nir_intrinsic_set_cluster_size(intrin
, 0);
598 ret
= NIR_LOWER_INSTR_PROGRESS
;
600 if (options
->lower_to_scalar
&& intrin
->num_components
> 1)
601 ret
= lower_subgroup_op_to_scalar(b
, intrin
, false);
604 case nir_intrinsic_inclusive_scan
:
605 case nir_intrinsic_exclusive_scan
:
606 if (options
->lower_to_scalar
&& intrin
->num_components
> 1)
607 return lower_subgroup_op_to_scalar(b
, intrin
, false);
618 nir_lower_subgroups(nir_shader
*shader
,
619 const nir_lower_subgroups_options
*options
)
621 return nir_shader_lower_instructions(shader
,
622 lower_subgroups_filter
,
623 lower_subgroups_instr
,