nir: Add nir_foreach_shader_in/out_variable helpers
[mesa.git] / src / panfrost / midgard / midgard_opt_dce.c
index 28f0886fe318ba0bedb1e9b687e00b82757fe57b..86812b5b0fe5d628403494c4ec253d42b5f88d12 100644 (file)
  */
 
 #include "compiler.h"
+#include "util/u_memory.h"
+#include "midgard_ops.h"
 
-/* Basic dead code elimination on the MIR itself */
+/* SIMD-aware dead code elimination. Perform liveness analysis step-by-step,
+ * removing dead components. If an instruction ends up with a zero mask, the
+ * instruction in total is dead and should be removed. */
 
-bool
-midgard_opt_dead_code_eliminate(compiler_context *ctx, midgard_block *block)
+static bool
+can_cull_mask(compiler_context *ctx, midgard_instruction *ins)
+{
+        if (ins->dest >= ctx->temp_count)
+                return false;
+
+        if (ins->dest == ctx->blend_src1)
+                return false;
+
+        if (ins->type == TAG_LOAD_STORE_4)
+                if (load_store_opcode_props[ins->load_store.op].props & LDST_SPECIAL_MASK)
+                        return false;
+
+        return true;
+}
+
+static bool
+can_dce(midgard_instruction *ins)
+{
+        if (ins->mask)
+                return false;
+
+        if (ins->compact_branch)
+                return false;
+
+        if (ins->type == TAG_LOAD_STORE_4)
+                if (load_store_opcode_props[ins->load_store.op].props & LDST_SIDE_FX)
+                        return false;
+
+        if (ins->type == TAG_TEXTURE_4)
+                if (ins->texture.op == TEXTURE_OP_BARRIER)
+                        return false;
+
+        return true;
+}
+
+static bool
+midgard_opt_dead_code_eliminate_block(compiler_context *ctx, midgard_block *block)
 {
         bool progress = false;
 
+        uint16_t *live = mem_dup(block->base.live_out, ctx->temp_count * sizeof(uint16_t));
+
+        mir_foreach_instr_in_block_rev(block, ins) {
+                if (can_cull_mask(ctx, ins)) {
+                        unsigned type_size = nir_alu_type_get_type_size(ins->dest_type);
+                        unsigned round_size = type_size;
+                        unsigned oldmask = ins->mask;
+
+                        /* Make sure we're packable */
+                        if (type_size == 16 && ins->type == TAG_LOAD_STORE_4)
+                                round_size = 32;
+
+                        unsigned rounded = mir_round_bytemask_up(live[ins->dest], round_size);
+                        unsigned cmask = mir_from_bytemask(rounded, type_size);
+
+                        ins->mask &= cmask;
+                        progress |= (ins->mask != oldmask);
+                }
+
+                mir_liveness_ins_update(live, ins, ctx->temp_count);
+        }
+
         mir_foreach_instr_in_block_safe(block, ins) {
-                if (ins->type != TAG_ALU_4) continue;
-                if (ins->compact_branch) continue;
+                if (can_dce(ins)) {
+                        mir_remove_instruction(ins);
+                        progress = true;
+                }
+        }
 
-                if (ins->ssa_args.dest >= SSA_FIXED_MINIMUM) continue;
-                if (mir_is_live_after(ctx, block, ins, ins->ssa_args.dest)) continue;
+        free(live);
 
-                mir_remove_instruction(ins);
-                progress = true;
+        return progress;
+}
+
+bool
+midgard_opt_dead_code_eliminate(compiler_context *ctx)
+{
+        /* We track liveness. In fact, it's ok if we assume more things are
+         * live than they actually are, that just reduces the effectiveness of
+         * this iterations lightly. And DCE has the effect of strictly reducing
+         * liveness, so we can run DCE across all blocks while only computing
+         * liveness at the beginning. */
+
+        mir_invalidate_liveness(ctx);
+        mir_compute_liveness(ctx);
+
+        bool progress = false;
+
+        mir_foreach_block(ctx, block) {
+                progress |= midgard_opt_dead_code_eliminate_block(ctx, (midgard_block *) block);
         }
 
         return progress;
@@ -64,11 +145,11 @@ midgard_opt_dead_move_eliminate(compiler_context *ctx, midgard_block *block)
 
                 mir_foreach_instr_in_block_from(block, q, mir_next_op(ins)) {
                         /* Check if used */
-                        if (mir_has_arg(q, ins->ssa_args.dest))
+                        if (mir_has_arg(q, ins->dest))
                                 break;
 
                         /* Check if overwritten */
-                        if (q->ssa_args.dest == ins->ssa_args.dest) {
+                        if (q->dest == ins->dest) {
                                 /* Special case to vec4; component tracking is
                                  * harder */
 
@@ -85,51 +166,3 @@ midgard_opt_dead_move_eliminate(compiler_context *ctx, midgard_block *block)
 
         return progress;
 }
-
-/* An even further special case - to be run after RA runs but before
- * scheduling, eliminating moves that end up being useless even though they
- * appeared meaningful in the SSA. Part #2 of register coalescing. */
-
-void
-midgard_opt_post_move_eliminate(compiler_context *ctx, midgard_block *block, struct ra_graph *g)
-{
-        mir_foreach_instr_in_block_safe(block, ins) {
-                if (ins->type != TAG_ALU_4) continue;
-                if (ins->compact_branch) continue;
-                if (!OP_IS_MOVE(ins->alu.op)) continue;
-
-                /* Check we're to the same place post-RA */
-                unsigned iA = ins->ssa_args.dest;
-                unsigned iB = ins->ssa_args.src[1];
-
-                if ((iA < 0) || (iB < 0)) continue;
-
-                unsigned A = iA >= SSA_FIXED_MINIMUM ?
-                        SSA_REG_FROM_FIXED(iA) : 
-                        ra_get_node_reg(g, iA);
-
-                unsigned B = iB >= SSA_FIXED_MINIMUM ?
-                        SSA_REG_FROM_FIXED(iB) : 
-                        ra_get_node_reg(g, iB);
-
-                if (A != B) continue;
-
-                /* Check we're in the work zone. TODO: promoted
-                 * uniforms? */
-                if (A >= 16) continue;
-
-                /* Ensure there aren't side effects */
-                if (mir_nontrivial_source2_mod(ins)) continue;
-                if (mir_nontrivial_outmod(ins)) continue;
-                if (ins->mask != 0xF) continue;
-
-                /* We do want to rewrite to keep the graph sane for pipeline
-                 * register creation (TODO: is this the best approach?) */
-                mir_rewrite_index_dst(ctx, ins->ssa_args.src[1], ins->ssa_args.dest);
-
-                /* We're good to go */
-                mir_remove_instruction(ins);
-
-        }
-
-}