From 449e5ded9340243b68183d7fffcc838cf283c89c Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 30 Apr 2020 13:46:35 -0400 Subject: [PATCH] pan/mdg: Treat inot as a modifier With this, we may remove all invert passes and simply look at the src modifier on NIR->MIR and fixup at pack time. No shader-db changes. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/midgard/compiler.h | 3 ++ src/panfrost/midgard/midgard_compile.c | 53 ++++++++++++++++--- src/panfrost/midgard/midgard_emit.c | 48 +++++++++++++++++ src/panfrost/midgard/midgard_nir.h | 1 + src/panfrost/midgard/midgard_nir_algebraic.py | 8 +++ 5 files changed, 105 insertions(+), 8 deletions(-) diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h index 04cae499f43..e0dd83bb9cf 100644 --- a/src/panfrost/midgard/compiler.h +++ b/src/panfrost/midgard/compiler.h @@ -136,6 +136,9 @@ typedef struct midgard_instruction { bool invert; + /* For accepting ALU ops - invert the nth source */ + bool src_invert[MIR_SRC_COUNT]; + /* Hint for the register allocator not to spill the destination written * from this instruction (because it is a spill/unspill node itself). * Bitmask of spilled classes */ diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index 96a8c1c9397..a47cc9bb791 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -486,6 +486,7 @@ optimise_nir(nir_shader *nir, unsigned quirks) /* Now that booleans are lowered, we can run out late opts */ NIR_PASS(progress, nir, midgard_nir_lower_algebraic_late); + NIR_PASS(progress, nir, midgard_nir_cancel_inot); NIR_PASS(progress, nir, nir_copy_prop); NIR_PASS(progress, nir, nir_opt_dce); @@ -599,8 +600,25 @@ reg_mode_for_nir(nir_alu_instr *instr) } } +/* Compare mir_lower_invert */ +static bool +nir_accepts_inot(nir_op op, unsigned src) +{ + switch (op) { + case nir_op_ior: + case nir_op_iand: + case nir_op_ixor: + return true; + case nir_op_b32csel: + /* Only the condition */ + return (src == 0); + default: + return false; + } +} + static void -mir_copy_src(midgard_instruction *ins, nir_alu_instr *instr, unsigned i, unsigned to, bool *abs, bool *neg, bool is_int, unsigned bcast_count) +mir_copy_src(midgard_instruction *ins, nir_alu_instr *instr, unsigned i, unsigned to, bool *abs, bool *neg, bool *not, bool is_int, unsigned bcast_count) { nir_alu_src src = instr->src[i]; @@ -612,6 +630,9 @@ mir_copy_src(midgard_instruction *ins, nir_alu_instr *instr, unsigned i, unsigne *abs = true; } + if (nir_accepts_inot(instr->op, i) && pan_has_source_mod(&src, nir_op_inot)) + *not = true; + unsigned bits = nir_src_bit_size(src.src); ins->src[to] = nir_src_index(NULL, &src.src); @@ -750,8 +771,9 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) ALU_CASE(fsin, fsin); ALU_CASE(fcos, fcos); - /* We'll set invert */ - ALU_CASE(inot, imov); + /* We'll get 0 in the second arg, so: + * ~a = ~(a | 0) = nor(a, 0) */ + ALU_CASE(inot, inor); ALU_CASE(iand, iand); ALU_CASE(ior, ior); ALU_CASE(ixor, ixor); @@ -899,7 +921,7 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) if (quirk_flipped_r24) { ins.src[0] = ~0; - mir_copy_src(&ins, instr, 0, 1, &abs[1], &neg[1], is_int, broadcast_swizzle); + mir_copy_src(&ins, instr, 0, 1, &abs[1], &neg[1], &ins.src_invert[1], is_int, broadcast_swizzle); } else { for (unsigned i = 0; i < nr_inputs; ++i) { unsigned to = i; @@ -912,13 +934,21 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) if (i == 0) to = 2; + else if (flip_src12) + to = 2 - i; else to = i - 1; } else if (flip_src12) { to = 1 - to; } - mir_copy_src(&ins, instr, i, to, &abs[to], &neg[to], is_int, broadcast_swizzle); + mir_copy_src(&ins, instr, i, to, &abs[to], &neg[to], &ins.src_invert[to], is_int, broadcast_swizzle); + + /* (!c) ? a : b = c ? b : a */ + if (instr->op == nir_op_b32csel && ins.src_invert[2]) { + ins.src_invert[2] = false; + flip_src12 ^= true; + } } } @@ -952,6 +982,13 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) ins.alu = alu; + /* Arrange for creation of iandnot/iornot */ + if (ins.src_invert[0] && !ins.src_invert[1]) { + mir_flip(&ins); + ins.src_invert[0] = false; + ins.src_invert[1] = true; + } + /* Late fixup for emulated instructions */ if (instr->op == nir_op_b2f32 || instr->op == nir_op_b2i32) { @@ -982,8 +1019,6 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) for (unsigned c = 0; c < 16; ++c) ins.swizzle[1][c] = 0; - } else if (instr->op == nir_op_inot) { - ins.invert = true; } if ((opcode_props & UNITS_ALL) == UNIT_VLUT) { @@ -2494,18 +2529,20 @@ midgard_compile_shader_nir(nir_shader *nir, panfrost_program *program, bool is_b progress |= midgard_opt_dead_code_eliminate(ctx, block); progress |= midgard_opt_combine_projection(ctx, block); progress |= midgard_opt_varying_projection(ctx, block); +#if 0 progress |= midgard_opt_not_propagate(ctx, block); progress |= midgard_opt_fuse_src_invert(ctx, block); progress |= midgard_opt_fuse_dest_invert(ctx, block); progress |= midgard_opt_csel_invert(ctx, block); progress |= midgard_opt_drop_cmp_invert(ctx, block); progress |= midgard_opt_invert_branch(ctx, block); +#endif } } while (progress); mir_foreach_block(ctx, _block) { midgard_block *block = (midgard_block *) _block; - midgard_lower_invert(ctx, block); + //midgard_lower_invert(ctx, block); midgard_lower_derivatives(ctx, block); } diff --git a/src/panfrost/midgard/midgard_emit.c b/src/panfrost/midgard/midgard_emit.c index b0bb3191039..8bfb76e2a4c 100644 --- a/src/panfrost/midgard/midgard_emit.c +++ b/src/panfrost/midgard/midgard_emit.c @@ -328,6 +328,51 @@ mir_pack_ldst_mask(midgard_instruction *ins) ins->load_store.mask = packed; } +static void +mir_lower_inverts(midgard_instruction *ins) +{ + bool inv[3] = { + ins->src_invert[0], + ins->src_invert[1], + ins->src_invert[2] + }; + + switch (ins->alu.op) { + case midgard_alu_op_iand: + /* a & ~b = iandnot(a, b) */ + /* ~a & ~b = ~(a | b) = inor(a, b) */ + + if (inv[0] && inv[1]) + ins->alu.op = midgard_alu_op_inor; + else if (inv[1]) + ins->alu.op = midgard_alu_op_iandnot; + + break; + case midgard_alu_op_ior: + /* a | ~b = iornot(a, b) */ + /* ~a | ~b = ~(a & b) = inand(a, b) */ + + if (inv[0] && inv[1]) + ins->alu.op = midgard_alu_op_inand; + else if (inv[1]) + ins->alu.op = midgard_alu_op_iornot; + + break; + + case midgard_alu_op_ixor: + /* ~a ^ b = a ^ ~b = ~(a ^ b) = inxor(a, b) */ + /* ~a ^ ~b = a ^ b */ + + if (inv[0] ^ inv[1]) + ins->alu.op = midgard_alu_op_inxor; + + break; + + default: + break; + } +} + static void emit_alu_bundle(compiler_context *ctx, midgard_bundle *bundle, @@ -361,6 +406,9 @@ emit_alu_bundle(compiler_context *ctx, /* In case we demote to a scalar */ midgard_scalar_alu scalarized; + if (!ins->compact_branch) + mir_lower_inverts(ins); + if (ins->unit & UNITS_ANY_VECTOR) { mir_pack_mask_alu(ins); mir_pack_swizzle_alu(ins); diff --git a/src/panfrost/midgard/midgard_nir.h b/src/panfrost/midgard/midgard_nir.h index 85eadd34631..4efd3c5c9f1 100644 --- a/src/panfrost/midgard/midgard_nir.h +++ b/src/panfrost/midgard/midgard_nir.h @@ -3,3 +3,4 @@ bool midgard_nir_lower_algebraic_late(nir_shader *shader); bool midgard_nir_scale_trig(nir_shader *shader); +bool midgard_nir_cancel_inot(nir_shader *shader); diff --git a/src/panfrost/midgard/midgard_nir_algebraic.py b/src/panfrost/midgard/midgard_nir_algebraic.py index d7eceeba869..8dc72bcf561 100644 --- a/src/panfrost/midgard/midgard_nir_algebraic.py +++ b/src/panfrost/midgard/midgard_nir_algebraic.py @@ -113,6 +113,11 @@ constant_switch = [ (('ult32', '#a', 'b'), ('inot', ('uge32', a, b))) ] +# ..since the above switching happens after algebraic stuff is done +cancel_inot = [ + (('inot', ('inot', a)), a) +] + # Midgard scales fsin/fcos arguments by pi. # Pass must be run only once, after the main loop @@ -140,6 +145,9 @@ def run(): print(nir_algebraic.AlgebraicPass("midgard_nir_scale_trig", scale_trig).render()) + print(nir_algebraic.AlgebraicPass("midgard_nir_cancel_inot", + cancel_inot).render()) + if __name__ == '__main__': main() -- 2.30.2