From 0e8da9f60718520d1c3abd335a047282bb760b78 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Mon, 18 Nov 2019 17:26:38 +0000 Subject: [PATCH] aco: handle loop exit and IF merge phis with break/discard MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit ACO considers discards jumps and creates edges in the CFG for them but NIR does neither of these. This can be fixed instead by keeping track of whether a side of an IF had a break/discard, but this doesn't solve the issue with discards affecting loop exit phis. So this reworks phi handling a bit. Fixes these tests: dEQP-VK.graphicsfuzz.disc-and-add-in-func-in-loop dEQP-VK.graphicsfuzz.loop-call-discard dEQP-VK.graphicsfuzz.complex-nested-loops-and-call Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann --- .../compiler/aco_instruction_selection.cpp | 129 ++++++++++-------- .../aco_instruction_selection_setup.cpp | 5 + 2 files changed, 80 insertions(+), 54 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 51e22f2a822..807cc02264c 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -6912,20 +6912,48 @@ Operand get_phi_operand(isel_context *ctx, nir_ssa_def *ssa) void visit_phi(isel_context *ctx, nir_phi_instr *instr) { aco_ptr phi; - unsigned num_src = exec_list_length(&instr->srcs); Temp dst = get_ssa_temp(ctx, &instr->dest.ssa); assert(instr->dest.ssa.bit_size != 1 || dst.regClass() == s2); - aco_opcode opcode = !dst.is_linear() || ctx->divergent_vals[instr->dest.ssa.index] ? aco_opcode::p_phi : aco_opcode::p_linear_phi; + bool logical = !dst.is_linear() || ctx->divergent_vals[instr->dest.ssa.index]; + logical |= ctx->block->kind & block_kind_merge; + aco_opcode opcode = logical ? aco_opcode::p_phi : aco_opcode::p_linear_phi; + /* we want a sorted list of sources, since the predecessor list is also sorted */ std::map phi_src; - bool all_undef = true; - nir_foreach_phi_src(src, instr) { + nir_foreach_phi_src(src, instr) phi_src[src->pred->index] = src->src.ssa; - if (src->src.ssa->parent_instr->type != nir_instr_type_ssa_undef) - all_undef = false; + + std::vector& preds = logical ? ctx->block->logical_preds : ctx->block->linear_preds; + unsigned num_operands = 0; + Operand operands[std::max(exec_list_length(&instr->srcs), (unsigned)preds.size())]; + unsigned num_defined = 0; + unsigned cur_pred_idx = 0; + for (std::pair src : phi_src) { + if (cur_pred_idx < preds.size()) { + /* handle missing preds (IF merges with discard/break) and extra preds (loop exit with discard) */ + unsigned block = ctx->cf_info.nir_to_aco[src.first]; + unsigned skipped = 0; + while (cur_pred_idx + skipped < preds.size() && preds[cur_pred_idx + skipped] != block) + skipped++; + if (cur_pred_idx + skipped < preds.size()) { + for (unsigned i = 0; i < skipped; i++) + operands[num_operands++] = Operand(dst.regClass()); + cur_pred_idx += skipped; + } else { + continue; + } + } + cur_pred_idx++; + Operand op = get_phi_operand(ctx, src.second); + operands[num_operands++] = op; + num_defined += !op.isUndefined(); } - if (all_undef) { + /* handle block_kind_continue_or_break at loop exit blocks */ + while (cur_pred_idx++ < preds.size()) + operands[num_operands++] = Operand(dst.regClass()); + + if (num_defined == 0) { Builder bld(ctx->program, ctx->block); if (dst.regClass() == s1) { bld.sop1(aco_opcode::s_mov_b32, Definition(dst), Operand(0u)); @@ -6941,17 +6969,41 @@ void visit_phi(isel_context *ctx, nir_phi_instr *instr) return; } + /* we can use a linear phi in some cases if one src is undef */ + if (dst.is_linear() && ctx->block->kind & block_kind_merge && num_defined == 1) { + phi.reset(create_instruction(aco_opcode::p_linear_phi, Format::PSEUDO, num_operands, 1)); + + Block *linear_else = &ctx->program->blocks[ctx->block->linear_preds[1]]; + Block *invert = &ctx->program->blocks[linear_else->linear_preds[0]]; + assert(invert->kind & block_kind_invert); + + unsigned then_block = invert->linear_preds[0]; + + Block* insert_block = NULL; + for (unsigned i = 0; i < num_operands; i++) { + Operand op = operands[i]; + if (op.isUndefined()) + continue; + insert_block = ctx->block->logical_preds[i] == then_block ? invert : ctx->block; + phi->operands[0] = op; + break; + } + assert(insert_block); /* should be handled by the "num_defined == 0" case above */ + phi->operands[1] = Operand(dst.regClass()); + phi->definitions[0] = Definition(dst); + insert_block->instructions.emplace(insert_block->instructions.begin(), std::move(phi)); + return; + } + /* try to scalarize vector phis */ if (instr->dest.ssa.bit_size != 1 && dst.size() > 1) { // TODO: scalarize linear phis on divergent ifs bool can_scalarize = (opcode == aco_opcode::p_phi || !(ctx->block->kind & block_kind_merge)); std::array new_vec; - for (std::pair& pair : phi_src) { - Operand src = get_phi_operand(ctx, pair.second); - if (src.isTemp() && ctx->allocated_vec.find(src.tempId()) == ctx->allocated_vec.end()) { + for (unsigned i = 0; can_scalarize && (i < num_operands); i++) { + Operand src = operands[i]; + if (src.isTemp() && ctx->allocated_vec.find(src.tempId()) == ctx->allocated_vec.end()) can_scalarize = false; - break; - } } if (can_scalarize) { unsigned num_components = instr->dest.ssa.num_components; @@ -6960,12 +7012,10 @@ void visit_phi(isel_context *ctx, nir_phi_instr *instr) aco_ptr vec{create_instruction(aco_opcode::p_create_vector, Format::PSEUDO, num_components, 1)}; for (unsigned k = 0; k < num_components; k++) { - phi.reset(create_instruction(opcode, Format::PSEUDO, num_src, 1)); - std::map::iterator it = phi_src.begin(); - for (unsigned i = 0; i < num_src; i++) { - Operand src = get_phi_operand(ctx, it->second); + phi.reset(create_instruction(opcode, Format::PSEUDO, num_operands, 1)); + for (unsigned i = 0; i < num_operands; i++) { + Operand src = operands[i]; phi->operands[i] = src.isTemp() ? Operand(ctx->allocated_vec[src.tempId()][k]) : Operand(rc); - ++it; } Temp phi_dst = {ctx->program->allocateId(), rc}; phi->definitions[0] = Definition(phi_dst); @@ -6980,43 +7030,9 @@ void visit_phi(isel_context *ctx, nir_phi_instr *instr) } } - unsigned extra_src = 0; - if (opcode == aco_opcode::p_linear_phi && (ctx->block->kind & block_kind_loop_exit) && - ctx->program->blocks[ctx->block->index-2].kind & block_kind_continue_or_break) { - extra_src++; - } - - phi.reset(create_instruction(opcode, Format::PSEUDO, num_src + extra_src, 1)); - - /* if we have a linear phi on a divergent if, we know that one src is undef */ - if (opcode == aco_opcode::p_linear_phi && ctx->block->kind & block_kind_merge) { - assert(extra_src == 0); - Block* block; - /* we place the phi either in the invert-block or in the current block */ - if (phi_src.begin()->second->parent_instr->type != nir_instr_type_ssa_undef) { - assert((++phi_src.begin())->second->parent_instr->type == nir_instr_type_ssa_undef); - Block& linear_else = ctx->program->blocks[ctx->block->linear_preds[1]]; - block = &ctx->program->blocks[linear_else.linear_preds[0]]; - assert(block->kind & block_kind_invert); - phi->operands[0] = get_phi_operand(ctx, phi_src.begin()->second); - } else { - assert((++phi_src.begin())->second->parent_instr->type != nir_instr_type_ssa_undef); - block = ctx->block; - phi->operands[0] = get_phi_operand(ctx, (++phi_src.begin())->second); - } - phi->operands[1] = Operand(dst.regClass()); - phi->definitions[0] = Definition(dst); - block->instructions.emplace(block->instructions.begin(), std::move(phi)); - return; - } - - std::map::iterator it = phi_src.begin(); - for (unsigned i = 0; i < num_src; i++) { - phi->operands[i] = get_phi_operand(ctx, it->second); - ++it; - } - for (unsigned i = 0; i < extra_src; i++) - phi->operands[num_src + i] = Operand(dst.regClass()); + phi.reset(create_instruction(opcode, Format::PSEUDO, num_operands, 1)); + for (unsigned i = 0; i < num_operands; i++) + phi->operands[i] = operands[i]; phi->definitions[0] = Definition(dst); ctx->block->instructions.emplace(ctx->block->instructions.begin(), std::move(phi)); } @@ -7062,6 +7078,7 @@ void visit_jump(isel_context *ctx, nir_jump_instr *instr) return; } ctx->cf_info.parent_loop.has_divergent_branch = true; + ctx->cf_info.nir_to_aco[instr->instr.block->index] = ctx->block->index; break; case nir_jump_continue: logical_target = &ctx->program->blocks[ctx->cf_info.parent_loop.header_idx]; @@ -7073,6 +7090,7 @@ void visit_jump(isel_context *ctx, nir_jump_instr *instr) we must ensure that they are handled correctly */ ctx->cf_info.parent_loop.has_divergent_continue = true; ctx->cf_info.parent_loop.has_divergent_branch = true; + ctx->cf_info.nir_to_aco[instr->instr.block->index] = ctx->block->index; } else { /* uniform continue - directly jump to the loop header */ ctx->block->kind |= block_kind_uniform; @@ -7144,6 +7162,9 @@ void visit_block(isel_context *ctx, nir_block *block) //abort(); } } + + if (!ctx->cf_info.parent_loop.has_divergent_branch) + ctx->cf_info.nir_to_aco[block->index] = ctx->block->index; } diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index ba7c7c12a52..ab96a4507cf 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -69,6 +69,7 @@ struct isel_context { bool is_divergent = false; } parent_if; bool exec_potentially_empty = false; + std::unique_ptr nir_to_aco; /* NIR block index to ACO block index */ } cf_info; Temp arg_temps[AC_MAX_ARGS]; @@ -133,6 +134,8 @@ void init_context(isel_context *ctx, nir_shader *shader) unsigned spi_ps_inputs = 0; + std::unique_ptr nir_to_aco{new unsigned[impl->num_blocks]()}; + bool done = false; while (!done) { done = true; @@ -538,6 +541,7 @@ void init_context(isel_context *ctx, nir_shader *shader) allocated[i] = Temp(ctx->program->allocateId(), allocated[i].regClass()); ctx->allocated.reset(allocated.release()); + ctx->cf_info.nir_to_aco.reset(nir_to_aco.release()); } Pseudo_instruction *add_startpgm(struct isel_context *ctx) @@ -899,6 +903,7 @@ setup_isel_context(Program* program, nir_function_impl *func = nir_shader_get_entrypoint(nir); nir_index_ssa_defs(func); + nir_metadata_require(func, nir_metadata_block_index); if (args->options->dump_preoptir) { fprintf(stderr, "NIR shader before instruction selection:\n"); -- 2.30.2