{
aco_opcode s_op = instr->src[0].src.ssa->bit_size == 64 ? s64_op : instr->src[0].src.ssa->bit_size == 32 ? s32_op : aco_opcode::num_opcodes;
aco_opcode v_op = instr->src[0].src.ssa->bit_size == 64 ? v64_op : instr->src[0].src.ssa->bit_size == 32 ? v32_op : v16_op;
- bool divergent_vals = ctx->divergent_vals[instr->dest.dest.ssa.index];
bool use_valu = s_op == aco_opcode::num_opcodes ||
- divergent_vals ||
+ nir_dest_is_divergent(instr->dest.dest) ||
ctx->allocated[instr->src[0].src.ssa->index].type() == RegType::vgpr ||
ctx->allocated[instr->src[1].src.ssa->index].type() == RegType::vgpr;
aco_opcode op = use_valu ? v_op : s_op;
assert(els.regClass() == bld.lm);
}
- if (!ctx->divergent_vals[instr->src[0].src.ssa->index]) { /* uniform condition and values in sgpr */
+ if (!nir_src_is_divergent(instr->src[0].src)) { /* uniform condition and values in sgpr */
if (dst.regClass() == s1 || dst.regClass() == s2) {
assert((then.regClass() == s1 || then.regClass() == s2) && els.regClass() == then.regClass());
assert(dst.size() == then.size());
{
Builder bld(ctx->program, ctx->block);
Temp index = get_ssa_temp(ctx, instr->src[0].ssa);
- if (!ctx->divergent_vals[instr->dest.ssa.index])
+ if (!nir_dest_is_divergent(instr->dest))
index = bld.as_uniform(index);
unsigned desc_set = nir_intrinsic_desc_set(instr);
unsigned binding = nir_intrinsic_binding(instr);
Temp rsrc = convert_pointer_to_64_bit(ctx, get_ssa_temp(ctx, instr->src[1].ssa));
rsrc = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), rsrc, Operand(0u));
- bool smem = !ctx->divergent_vals[instr->src[2].ssa->index] &&
+ bool smem = !nir_src_is_divergent(instr->src[2]) &&
ctx->options->chip_class >= GFX8 &&
elem_size_bytes >= 4;
if (smem)
case nir_intrinsic_shuffle:
case nir_intrinsic_read_invocation: {
Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
- if (!ctx->divergent_vals[instr->src[0].ssa->index]) {
+ if (!nir_src_is_divergent(instr->src[0])) {
emit_uniform_subgroup(ctx, instr, src);
} else {
Temp tid = get_ssa_temp(ctx, instr->src[1].ssa);
- if (instr->intrinsic == nir_intrinsic_read_invocation || !ctx->divergent_vals[instr->src[1].ssa->index])
+ if (instr->intrinsic == nir_intrinsic_read_invocation || !nir_src_is_divergent(instr->src[1]))
tid = bld.as_uniform(tid);
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
if (src.regClass() == v1) {
nir_intrinsic_cluster_size(instr) : 0;
cluster_size = util_next_power_of_two(MIN2(cluster_size ? cluster_size : ctx->program->wave_size, ctx->program->wave_size));
- if (!ctx->divergent_vals[instr->src[0].ssa->index] && (op == nir_op_ior || op == nir_op_iand)) {
+ if (!nir_src_is_divergent(instr->src[0]) && (op == nir_op_ior || op == nir_op_iand)) {
emit_uniform_subgroup(ctx, instr, src);
} else if (instr->dest.ssa.bit_size == 1) {
if (op == nir_op_imul || op == nir_op_umin || op == nir_op_imin)
}
case nir_intrinsic_quad_broadcast: {
Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
- if (!ctx->divergent_vals[instr->dest.ssa.index]) {
+ if (!nir_dest_is_divergent(instr->dest)) {
emit_uniform_subgroup(ctx, instr, src);
} else {
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
case nir_intrinsic_quad_swap_diagonal:
case nir_intrinsic_quad_swizzle_amd: {
Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
- if (!ctx->divergent_vals[instr->dest.ssa.index]) {
+ if (!nir_dest_is_divergent(instr->dest)) {
emit_uniform_subgroup(ctx, instr, src);
break;
}
}
case nir_intrinsic_masked_swizzle_amd: {
Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
- if (!ctx->divergent_vals[instr->dest.ssa.index]) {
+ if (!nir_dest_is_divergent(instr->dest)) {
emit_uniform_subgroup(ctx, instr, src);
break;
}
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
assert(instr->dest.ssa.bit_size != 1 || dst.regClass() == ctx->program->lane_mask);
- bool logical = !dst.is_linear() || ctx->divergent_vals[instr->dest.ssa.index];
+ bool logical = !dst.is_linear() || nir_dest_is_divergent(instr->dest);
logical |= ctx->block->kind & block_kind_merge;
aco_opcode opcode = logical ? aco_opcode::p_phi : aco_opcode::p_linear_phi;
aco_ptr<Pseudo_branch_instruction> branch;
if_context ic;
- if (!ctx->divergent_vals[if_stmt->condition.ssa->index]) { /* uniform condition */
+ if (!nir_src_is_divergent(if_stmt->condition)) { /* uniform condition */
/**
* Uniform conditionals are represented in the following way*) :
*
if (ngg_no_gs && !ngg_early_prim_export(&ctx))
ngg_emit_nogs_output(&ctx);
- ralloc_free(ctx.divergent_vals);
-
if (i == 0 && ctx.stage == vertex_tess_control_hs && ctx.tcs_in_out_eq) {
/* Outputs of the previous stage are inputs to the next stage */
ctx.inputs = ctx.outputs;
nir_shader *shader;
uint32_t constant_data_offset;
Block *block;
- bool *divergent_vals;
std::unique_ptr<Temp[]> allocated;
std::unordered_map<unsigned, std::array<Temp,NIR_MAX_VEC_COMPONENTS>> allocated_vec;
Stage stage; /* Stage */
* block instead. This is so that we can use any SGPR live-out of the side
* without the branch without creating a linear phi in the invert or merge block. */
bool
-sanitize_if(nir_function_impl *impl, bool *divergent, nir_if *nif)
+sanitize_if(nir_function_impl *impl, nir_if *nif)
{
//TODO: skip this if the condition is uniform and there are no divergent breaks/continues?
}
bool
-sanitize_cf_list(nir_function_impl *impl, bool *divergent, struct exec_list *cf_list)
+sanitize_cf_list(nir_function_impl *impl, struct exec_list *cf_list)
{
bool progress = false;
foreach_list_typed(nir_cf_node, cf_node, node, cf_list) {
break;
case nir_cf_node_if: {
nir_if *nif = nir_cf_node_as_if(cf_node);
- progress |= sanitize_cf_list(impl, divergent, &nif->then_list);
- progress |= sanitize_cf_list(impl, divergent, &nif->else_list);
- progress |= sanitize_if(impl, divergent, nif);
+ progress |= sanitize_cf_list(impl, &nif->then_list);
+ progress |= sanitize_cf_list(impl, &nif->else_list);
+ progress |= sanitize_if(impl, nif);
break;
}
case nir_cf_node_loop: {
nir_loop *loop = nir_cf_node_as_loop(cf_node);
- progress |= sanitize_cf_list(impl, divergent, &loop->body);
+ progress |= sanitize_cf_list(impl, &loop->body);
break;
}
case nir_cf_node_function:
unsigned lane_mask_size = ctx->program->lane_mask.size();
ctx->shader = shader;
- ctx->divergent_vals = nir_divergence_analysis(shader, nir_divergence_view_index_uniform);
+ nir_divergence_analysis(shader, nir_divergence_view_index_uniform);
/* sanitize control flow */
nir_metadata_require(impl, nir_metadata_dominance);
- sanitize_cf_list(impl, ctx->divergent_vals, &impl->body);
+ sanitize_cf_list(impl, &impl->body);
nir_metadata_preserve(impl, (nir_metadata)~nir_metadata_block_index);
/* we'll need this for isel */
case nir_op_b2f16:
case nir_op_b2f32:
case nir_op_mov:
- type = ctx->divergent_vals[alu_instr->dest.dest.ssa.index] ? RegType::vgpr : RegType::sgpr;
+ type = nir_dest_is_divergent(alu_instr->dest.dest) ? RegType::vgpr : RegType::sgpr;
break;
case nir_op_bcsel:
- type = ctx->divergent_vals[alu_instr->dest.dest.ssa.index] ? RegType::vgpr : RegType::sgpr;
+ type = nir_dest_is_divergent(alu_instr->dest.dest) ? RegType::vgpr : RegType::sgpr;
/* fallthrough */
default:
for (unsigned i = 0; i < nir_op_infos[alu_instr->op].num_inputs; i++) {
case nir_intrinsic_load_global:
case nir_intrinsic_vulkan_resource_index:
case nir_intrinsic_load_shared:
- type = ctx->divergent_vals[intrinsic->dest.ssa.index] ? RegType::vgpr : RegType::sgpr;
+ type = nir_dest_is_divergent(intrinsic->dest) ? RegType::vgpr : RegType::sgpr;
break;
case nir_intrinsic_load_view_index:
type = ctx->stage == fragment_fs ? RegType::vgpr : RegType::sgpr;
if (tex->dest.ssa.bit_size == 64)
size *= 2;
- if (tex->op == nir_texop_texture_samples)
- assert(!ctx->divergent_vals[tex->dest.ssa.index]);
- if (ctx->divergent_vals[tex->dest.ssa.index])
+ if (tex->op == nir_texop_texture_samples) {
+ assert(!tex->dest.ssa.divergent);
+ }
+ if (nir_dest_is_divergent(tex->dest))
allocated[tex->dest.ssa.index] = Temp(0, RegClass(RegType::vgpr, size));
else
allocated[tex->dest.ssa.index] = Temp(0, RegClass(RegType::sgpr, size));
break;
}
- if (ctx->divergent_vals[phi->dest.ssa.index]) {
+ if (nir_dest_is_divergent(phi->dest)) {
type = RegType::vgpr;
} else {
type = RegType::sgpr;
*/
static bool
-visit_cf_list(bool *divergent, struct exec_list *list,
+visit_cf_list(struct exec_list *list,
nir_divergence_options options, gl_shader_stage stage);
static bool
-visit_alu(bool *divergent, nir_alu_instr *instr)
+visit_alu(nir_alu_instr *instr)
{
- if (divergent[instr->dest.dest.ssa.index])
+ if (instr->dest.dest.ssa.divergent)
return false;
unsigned num_src = nir_op_infos[instr->op].num_inputs;
for (unsigned i = 0; i < num_src; i++) {
- if (divergent[instr->src[i].src.ssa->index]) {
- divergent[instr->dest.dest.ssa.index] = true;
+ if (instr->src[i].src.ssa->divergent) {
+ instr->dest.dest.ssa.divergent = true;
return true;
}
}
}
static bool
-visit_intrinsic(bool *divergent, nir_intrinsic_instr *instr,
+visit_intrinsic(nir_intrinsic_instr *instr,
nir_divergence_options options, gl_shader_stage stage)
{
if (!nir_intrinsic_infos[instr->intrinsic].has_dest)
return false;
- if (divergent[instr->dest.ssa.index])
+ if (instr->dest.ssa.divergent)
return false;
bool is_divergent = false;
/* Intrinsics with divergence depending on shader stage and hardware */
case nir_intrinsic_load_input:
- is_divergent = divergent[instr->src[0].ssa->index];
+ is_divergent = instr->src[0].ssa->divergent;
if (stage == MESA_SHADER_FRAGMENT)
is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
else if (stage == MESA_SHADER_TESS_EVAL)
is_divergent = true;
break;
case nir_intrinsic_load_input_vertex:
- is_divergent = divergent[instr->src[1].ssa->index];
+ is_divergent = instr->src[1].ssa->divergent;
assert(stage == MESA_SHADER_FRAGMENT);
is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
break;
case nir_intrinsic_load_output:
assert(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_FRAGMENT);
- is_divergent = divergent[instr->src[0].ssa->index];
+ is_divergent = instr->src[0].ssa->divergent;
if (stage == MESA_SHADER_TESS_CTRL)
is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
else
break;
case nir_intrinsic_load_fs_input_interp_deltas:
assert(stage == MESA_SHADER_FRAGMENT);
- is_divergent = divergent[instr->src[0].ssa->index];
+ is_divergent = instr->src[0].ssa->divergent;
is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
break;
case nir_intrinsic_load_primitive_id:
/* fallthrough */
case nir_intrinsic_inclusive_scan: {
nir_op op = nir_intrinsic_reduction_op(instr);
- is_divergent = divergent[instr->src[0].ssa->index];
+ is_divergent = instr->src[0].ssa->divergent;
if (op != nir_op_umin && op != nir_op_imin && op != nir_op_fmin &&
op != nir_op_umax && op != nir_op_imax && op != nir_op_fmax &&
op != nir_op_iand && op != nir_op_ior)
case nir_intrinsic_masked_swizzle_amd: {
unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
for (unsigned i = 0; i < num_srcs; i++) {
- if (divergent[instr->src[i].ssa->index]) {
+ if (instr->src[i].ssa->divergent) {
is_divergent = true;
break;
}
}
case nir_intrinsic_shuffle:
- is_divergent = divergent[instr->src[0].ssa->index] &&
- divergent[instr->src[1].ssa->index];
+ is_divergent = instr->src[0].ssa->divergent &&
+ instr->src[1].ssa->divergent;
break;
/* Intrinsics which are always divergent */
#endif
}
- divergent[instr->dest.ssa.index] = is_divergent;
+ instr->dest.ssa.divergent = is_divergent;
return is_divergent;
}
static bool
-visit_tex(bool *divergent, nir_tex_instr *instr)
+visit_tex(nir_tex_instr *instr)
{
- if (divergent[instr->dest.ssa.index])
+ if (instr->dest.ssa.divergent)
return false;
bool is_divergent = false;
case nir_tex_src_sampler_deref:
case nir_tex_src_sampler_handle:
case nir_tex_src_sampler_offset:
- is_divergent |= divergent[instr->src[i].src.ssa->index] &&
+ is_divergent |= instr->src[i].src.ssa->divergent &&
instr->sampler_non_uniform;
break;
case nir_tex_src_texture_deref:
case nir_tex_src_texture_handle:
case nir_tex_src_texture_offset:
- is_divergent |= divergent[instr->src[i].src.ssa->index] &&
+ is_divergent |= instr->src[i].src.ssa->divergent &&
instr->texture_non_uniform;
break;
default:
- is_divergent |= divergent[instr->src[i].src.ssa->index];
+ is_divergent |= instr->src[i].src.ssa->divergent;
break;
}
}
- divergent[instr->dest.ssa.index] = is_divergent;
+ instr->dest.ssa.divergent = is_divergent;
return is_divergent;
}
static bool
-visit_phi(bool *divergent, nir_phi_instr *instr)
+visit_phi(nir_phi_instr *instr)
{
/* There are 3 types of phi instructions:
* (1) gamma: represent the joining point of different paths
* (note: there should be no phi for loop-invariant variables.)
*/
- if (divergent[instr->dest.ssa.index])
+ if (instr->dest.ssa.divergent)
return false;
nir_foreach_phi_src(src, instr) {
/* if any source value is divergent, the resulting value is divergent */
- if (divergent[src->src.ssa->index]) {
- divergent[instr->dest.ssa.index] = true;
+ if (src->src.ssa->divergent) {
+ instr->dest.ssa.divergent = true;
return true;
}
}
while (current->type != nir_cf_node_loop) {
assert (current->type == nir_cf_node_if);
nir_if *if_node = nir_cf_node_as_if(current);
- if (divergent[if_node->condition.ssa->index]) {
- divergent[instr->dest.ssa.index] = true;
+ if (if_node->condition.ssa->divergent) {
+ instr->dest.ssa.divergent = true;
return true;
}
current = current->parent;
/* gamma: check if the condition is divergent */
nir_if *if_node = nir_cf_node_as_if(prev);
- if (divergent[if_node->condition.ssa->index]) {
- divergent[instr->dest.ssa.index] = true;
+ if (if_node->condition.ssa->divergent) {
+ instr->dest.ssa.divergent = true;
return true;
}
while (current->type != nir_cf_node_loop) {
assert(current->type == nir_cf_node_if);
nir_if *if_node = nir_cf_node_as_if(current);
- if (divergent[if_node->condition.ssa->index]) {
- divergent[instr->dest.ssa.index] = true;
+ if (if_node->condition.ssa->divergent) {
+ instr->dest.ssa.divergent = true;
return true;
}
current = current->parent;
}
assert(current->type == nir_cf_node_if);
nir_if *if_node = nir_cf_node_as_if(current);
- is_divergent |= divergent[if_node->condition.ssa->index];
+ is_divergent |= if_node->condition.ssa->divergent;
current = current->parent;
}
if (is_divergent) {
- divergent[instr->dest.ssa.index] = true;
+ instr->dest.ssa.divergent = true;
return true;
}
}
}
static bool
-visit_load_const(bool *divergent, nir_load_const_instr *instr)
+visit_load_const(nir_load_const_instr *instr)
{
return false;
}
static bool
-visit_ssa_undef(bool *divergent, nir_ssa_undef_instr *instr)
+visit_ssa_undef(nir_ssa_undef_instr *instr)
{
return false;
}
}
static bool
-visit_deref(bool *divergent, nir_deref_instr *deref,
+visit_deref(nir_deref_instr *deref,
nir_divergence_options options, gl_shader_stage stage)
{
- if (divergent[deref->dest.ssa.index])
+ if (deref->dest.ssa.divergent)
return false;
bool is_divergent = false;
break;
case nir_deref_type_array:
case nir_deref_type_ptr_as_array:
- is_divergent = divergent[deref->arr.index.ssa->index];
+ is_divergent = deref->arr.index.ssa->divergent;
/* fallthrough */
case nir_deref_type_struct:
case nir_deref_type_array_wildcard:
- is_divergent |= divergent[deref->parent.ssa->index];
+ is_divergent |= deref->parent.ssa->divergent;
break;
case nir_deref_type_cast:
is_divergent = !nir_variable_mode_is_uniform(deref->var->data.mode) ||
- divergent[deref->parent.ssa->index];
+ deref->parent.ssa->divergent;
break;
}
- divergent[deref->dest.ssa.index] = is_divergent;
+ deref->dest.ssa.divergent = is_divergent;
return is_divergent;
}
static bool
-visit_block(bool *divergent, nir_block *block, nir_divergence_options options,
+visit_block(nir_block *block, nir_divergence_options options,
gl_shader_stage stage)
{
bool has_changed = false;
nir_foreach_instr(instr, block) {
switch (instr->type) {
case nir_instr_type_alu:
- has_changed |= visit_alu(divergent, nir_instr_as_alu(instr));
+ has_changed |= visit_alu(nir_instr_as_alu(instr));
break;
case nir_instr_type_intrinsic:
- has_changed |= visit_intrinsic(divergent, nir_instr_as_intrinsic(instr),
+ has_changed |= visit_intrinsic(nir_instr_as_intrinsic(instr),
options, stage);
break;
case nir_instr_type_tex:
- has_changed |= visit_tex(divergent, nir_instr_as_tex(instr));
+ has_changed |= visit_tex(nir_instr_as_tex(instr));
break;
case nir_instr_type_phi:
- has_changed |= visit_phi(divergent, nir_instr_as_phi(instr));
+ has_changed |= visit_phi(nir_instr_as_phi(instr));
break;
case nir_instr_type_load_const:
- has_changed |= visit_load_const(divergent, nir_instr_as_load_const(instr));
+ has_changed |= visit_load_const(nir_instr_as_load_const(instr));
break;
case nir_instr_type_ssa_undef:
- has_changed |= visit_ssa_undef(divergent, nir_instr_as_ssa_undef(instr));
+ has_changed |= visit_ssa_undef(nir_instr_as_ssa_undef(instr));
break;
case nir_instr_type_deref:
- has_changed |= visit_deref(divergent, nir_instr_as_deref(instr),
+ has_changed |= visit_deref(nir_instr_as_deref(instr),
options, stage);
break;
case nir_instr_type_jump:
}
static bool
-visit_if(bool *divergent, nir_if *if_stmt, nir_divergence_options options, gl_shader_stage stage)
+visit_if(nir_if *if_stmt, nir_divergence_options options, gl_shader_stage stage)
{
- return visit_cf_list(divergent, &if_stmt->then_list, options, stage) |
- visit_cf_list(divergent, &if_stmt->else_list, options, stage);
+ return visit_cf_list(&if_stmt->then_list, options, stage) |
+ visit_cf_list(&if_stmt->else_list, options, stage);
}
static bool
-visit_loop(bool *divergent, nir_loop *loop, nir_divergence_options options, gl_shader_stage stage)
+visit_loop(nir_loop *loop, nir_divergence_options options, gl_shader_stage stage)
{
bool has_changed = false;
bool repeat = true;
/* TODO: restructure this and the phi handling more efficiently */
while (repeat) {
- repeat = visit_cf_list(divergent, &loop->body, options, stage);
+ repeat = visit_cf_list(&loop->body, options, stage);
has_changed |= repeat;
}
}
static bool
-visit_cf_list(bool *divergent, struct exec_list *list,
+visit_cf_list(struct exec_list *list,
nir_divergence_options options, gl_shader_stage stage)
{
bool has_changed = false;
foreach_list_typed(nir_cf_node, node, node, list) {
switch (node->type) {
case nir_cf_node_block:
- has_changed |= visit_block(divergent, nir_cf_node_as_block(node),
+ has_changed |= visit_block(nir_cf_node_as_block(node),
options, stage);
break;
case nir_cf_node_if:
- has_changed |= visit_if(divergent, nir_cf_node_as_if(node),
+ has_changed |= visit_if(nir_cf_node_as_if(node),
options, stage);
break;
case nir_cf_node_loop:
- has_changed |= visit_loop(divergent, nir_cf_node_as_loop(node),
+ has_changed |= visit_loop(nir_cf_node_as_loop(node),
options, stage);
break;
case nir_cf_node_function:
return has_changed;
}
+static bool
+set_ssa_def_not_divergent(nir_ssa_def *def, UNUSED void *_state)
+{
+ def->divergent = false;
+ return true;
+}
-bool*
+void
nir_divergence_analysis(nir_shader *shader, nir_divergence_options options)
{
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
- bool *t = rzalloc_array(shader, bool, impl->ssa_alloc);
- visit_cf_list(t, &impl->body, options, shader->info.stage);
+ /* Set all SSA defs to non-divergent to start off */
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr(instr, block)
+ nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL);
+ }
- return t;
+ visit_cf_list(&impl->body, options, shader->info.stage);
}