From 300d1181c72043afe045a155079fc152fcd1283e Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 10 Jan 2020 14:07:03 -0800 Subject: [PATCH] freedreno/ir3: move atomic fixup after RA A post-RA sched pass will move the extra mov's to the wrong place, so rework the fixup so it can run after RA (and therefore after postsched) Signed-off-by: Rob Clark Part-of: --- src/freedreno/ir3/ir3.h | 2 +- src/freedreno/ir3/ir3_a6xx.c | 54 ++++++++++++++++------------ src/freedreno/ir3/ir3_compiler_nir.c | 10 +++--- 3 files changed, 38 insertions(+), 28 deletions(-) diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index bf0f92a478a..b9cf06e636d 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -1138,7 +1138,7 @@ void ir3_sun(struct ir3 *ir); void ir3_sched_add_deps(struct ir3 *ir); int ir3_sched(struct ir3 *ir); -void ir3_a6xx_fixup_atomic_dests(struct ir3 *ir, struct ir3_shader_variant *so); +bool ir3_a6xx_fixup_atomic_dests(struct ir3 *ir, struct ir3_shader_variant *so); /* register assignment: */ struct ir3_ra_reg_set * ir3_ra_alloc_reg_set(struct ir3_compiler *compiler); diff --git a/src/freedreno/ir3/ir3_a6xx.c b/src/freedreno/ir3/ir3_a6xx.c index fd18fc3aa3c..6fed98d7673 100644 --- a/src/freedreno/ir3/ir3_a6xx.c +++ b/src/freedreno/ir3/ir3_a6xx.c @@ -330,35 +330,35 @@ const struct ir3_context_funcs ir3_a6xx_funcs = { * extra mov from src1.x to dst. This way the other compiler passes * can ignore this quirk of the new instruction encoding. * - * This might cause extra complication in the future when we support - * spilling, as I think we'd want to re-run the scheduling pass. One - * possible alternative might be to do this in the RA pass after - * ra_allocate() but before destroying the SSA links. (Ie. we do - * want to know if anything consumes the result of the atomic instr, - * if there is no consumer then inserting the extra mov is pointless. + * This should run after RA. */ static struct ir3_instruction * get_atomic_dest_mov(struct ir3_instruction *atomic) { + struct ir3_instruction *mov; + /* if we've already created the mov-out, then re-use it: */ if (atomic->data) return atomic->data; + /* We are already out of SSA here, so we can't use the nice builders: */ + mov = ir3_instr_create(atomic->block, OPC_MOV); + ir3_reg_create(mov, 0, 0); /* dst */ + ir3_reg_create(mov, 0, 0); /* src */ + + mov->cat1.src_type = TYPE_U32; + mov->cat1.dst_type = TYPE_U32; + /* extract back out the 'dummy' which serves as stand-in for dest: */ - struct ir3_instruction *src = ssa(atomic->regs[3]); + struct ir3_instruction *src = atomic->regs[3]->instr; debug_assert(src->opc == OPC_META_COLLECT); - struct ir3_instruction *dummy = ssa(src->regs[1]); - struct ir3_instruction *mov = ir3_MOV(atomic->block, dummy, TYPE_U32); + *mov->regs[0] = *atomic->regs[0]; + *mov->regs[1] = *src->regs[1]->instr->regs[0]; mov->flags |= IR3_INSTR_SY; - if (atomic->regs[0]->flags & IR3_REG_ARRAY) { - mov->regs[0]->flags |= IR3_REG_ARRAY; - mov->regs[0]->array = atomic->regs[0]->array; - } - /* it will have already been appended to the end of the block, which * isn't where we want it, so fix-up the location: */ @@ -368,11 +368,13 @@ get_atomic_dest_mov(struct ir3_instruction *atomic) return atomic->data = mov; } -void +bool ir3_a6xx_fixup_atomic_dests(struct ir3 *ir, struct ir3_shader_variant *so) { + bool progress = false; + if (ir3_shader_nibo(so) == 0) - return; + return false; foreach_block (block, &ir->block_list) { foreach_instr (instr, &block->instr_list) { @@ -385,21 +387,27 @@ ir3_a6xx_fixup_atomic_dests(struct ir3 *ir, struct ir3_shader_variant *so) struct ir3_register *reg; foreach_src(reg, instr) { - struct ir3_instruction *src = ssa(reg); + struct ir3_instruction *src = reg->instr; if (!src) continue; - if (is_atomic(src->opc) && (src->flags & IR3_INSTR_G)) + if (is_atomic(src->opc) && (src->flags & IR3_INSTR_G)) { reg->instr = get_atomic_dest_mov(src); + progress = true; + } } } + } - /* we also need to fixup shader outputs: */ - struct ir3_instruction *out; - foreach_output_n(out, n, ir) - if (is_atomic(out->opc) && (out->flags & IR3_INSTR_G)) - ir->outputs[n] = get_atomic_dest_mov(out); + /* we also need to fixup shader outputs: */ + struct ir3_instruction *out; + foreach_output_n (out, n, ir) { + if (is_atomic(out->opc) && (out->flags & IR3_INSTR_G)) { + ir->outputs[n] = get_atomic_dest_mov(out); + progress = true; + } } + return progress; } diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 1ae1dfd8e63..c5a1f915b9c 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -3406,10 +3406,6 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, goto out; } - if (compiler->gpu_id >= 600) { - ir3_a6xx_fixup_atomic_dests(ir, so); - } - ir3_debug_print(ir, "AFTER SCHED"); /* Pre-assign VS inputs on a6xx+ binning pass shader, to align @@ -3484,6 +3480,12 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, ir3_debug_print(ir, "AFTER RA"); + if (compiler->gpu_id >= 600) { + if (ir3_a6xx_fixup_atomic_dests(ir, so)) { + ir3_debug_print(ir, "AFTER ATOMIC FIXUP"); + } + } + if (so->type == MESA_SHADER_FRAGMENT) pack_inlocs(ctx); -- 2.30.2