static nir_ssa_def *
lower_shuffle(nir_builder *b, nir_intrinsic_instr *intrin,
- bool lower_to_scalar, bool lower_to_32bit)
+ const nir_lower_subgroups_options *options)
{
nir_ssa_def *index = nir_load_subgroup_invocation(b);
+ bool is_shuffle = false;
switch (intrin->intrinsic) {
case nir_intrinsic_shuffle_xor:
assert(intrin->src[1].is_ssa);
index = nir_ixor(b, index, intrin->src[1].ssa);
+ is_shuffle = true;
break;
case nir_intrinsic_shuffle_up:
assert(intrin->src[1].is_ssa);
index = nir_isub(b, index, intrin->src[1].ssa);
+ is_shuffle = true;
break;
case nir_intrinsic_shuffle_down:
assert(intrin->src[1].is_ssa);
index = nir_iadd(b, index, intrin->src[1].ssa);
+ is_shuffle = true;
break;
case nir_intrinsic_quad_broadcast:
assert(intrin->src[1].is_ssa);
intrin->dest.ssa.num_components,
intrin->dest.ssa.bit_size, NULL);
- if (lower_to_scalar && shuffle->num_components > 1) {
+ bool lower_to_32bit = options->lower_shuffle_to_32bit && is_shuffle;
+ if (options->lower_to_scalar && shuffle->num_components > 1) {
return lower_subgroup_op_to_scalar(b, shuffle, lower_to_32bit);
} else if (lower_to_32bit && shuffle->src[0].ssa->bit_size == 64) {
return lower_subgroup_op_to_32bit(b, shuffle);
const nir_lower_subgroups_options *options)
{
if (!options->lower_quad_broadcast_dynamic_to_const)
- return lower_shuffle(b, intrin, options->lower_to_scalar, false);
+ return lower_shuffle(b, intrin, options);
nir_ssa_def *dst = NULL;
else if (options->lower_shuffle_to_32bit && intrin->src[0].ssa->bit_size == 64)
return lower_subgroup_op_to_32bit(b, intrin);
break;
-
case nir_intrinsic_shuffle_xor:
case nir_intrinsic_shuffle_up:
case nir_intrinsic_shuffle_down:
if (options->lower_shuffle)
- return lower_shuffle(b, intrin, options->lower_to_scalar, options->lower_shuffle_to_32bit);
+ return lower_shuffle(b, intrin, options);
else if (options->lower_to_scalar && intrin->num_components > 1)
return lower_subgroup_op_to_scalar(b, intrin, options->lower_shuffle_to_32bit);
else if (options->lower_shuffle_to_32bit && intrin->src[0].ssa->bit_size == 64)