freedreno/ir3: rename depth->dce
authorRob Clark <robdclark@chromium.org>
Sat, 4 Apr 2020 17:29:53 +0000 (10:29 -0700)
committerMarge Bot <eric+marge@anholt.net>
Mon, 13 Apr 2020 20:47:28 +0000 (20:47 +0000)
Since DCE is the only remaining function of this pass, after the pre-RA
scheduler rewrite.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4440>

src/freedreno/Makefile.sources
src/freedreno/ir3/ir3.c
src/freedreno/ir3/ir3.h
src/freedreno/ir3/ir3_compiler_nir.c
src/freedreno/ir3/ir3_dce.c [new file with mode: 0644]
src/freedreno/ir3/ir3_depth.c [deleted file]
src/freedreno/ir3/ir3_print.c
src/freedreno/ir3/meson.build

index 07a59c97764b6a17bca13c91bdca9ee8bf9de111..045d94ec6b773feec756a22714ec73fa20eaa101 100644 (file)
@@ -29,7 +29,7 @@ ir3_SOURCES := \
        ir3/ir3_context.h \
        ir3/ir3_cp.c \
        ir3/ir3_cf.c \
-       ir3/ir3_depth.c \
+       ir3/ir3_dce.c \
        ir3/ir3_delay.c \
        ir3/ir3_group.c \
        ir3/ir3_image.c \
index f2dbc88f3bb2bd0470a5891254eb72ee104eac11..dcd5a5082a6e1d46f18a4b969dd1e8aa47d7427d 100644 (file)
@@ -1149,7 +1149,6 @@ ir3_clear_mark(struct ir3 *ir)
        }
 }
 
-/* note: this will destroy instr->depth, don't do it until after sched! */
 unsigned
 ir3_count_instructions(struct ir3 *ir)
 {
index 7fb0d823d25e8aca38f189cef94beb499b1e0048..b58cd6eb4bec7f596f4c2c357739dee1e2d55cce 100644 (file)
@@ -306,25 +306,9 @@ struct ir3_instruction {
                } input;
        };
 
-       /* transient values used during various algorithms: */
-       union {
-               /* The instruction depth is the max dependency distance to output.
-                *
-                * You can also think of it as the "cost", if we did any sort of
-                * optimization for register footprint.  Ie. a value that is  just
-                * result of moving a const to a reg would have a low cost,  so to
-                * it could make sense to duplicate the instruction at various
-                * points where the result is needed to reduce register footprint.
-                */
-               int depth;
-               /* When we get to the RA stage, we no longer need depth, but
-                * we do need instruction's position/name:
-                */
-               struct {
-                       uint16_t ip;
-                       uint16_t name;
-               };
-       };
+       /* When we get to the RA stage, we need instruction's position/name: */
+       uint16_t ip;
+       uint16_t name;
 
        /* used for per-pass extra instruction data.
         *
@@ -1199,9 +1183,9 @@ unsigned ir3_delay_calc(struct ir3_block *block, struct ir3_instruction *instr,
                bool soft, bool pred);
 void ir3_remove_nops(struct ir3 *ir);
 
-/* depth calculation: */
+/* dead code elimination: */
 struct ir3_shader_variant;
-void ir3_depth(struct ir3 *ir, struct ir3_shader_variant *so);
+void ir3_dce(struct ir3 *ir, struct ir3_shader_variant *so);
 
 /* fp16 conversion folding */
 void ir3_cf(struct ir3 *ir);
index 752adc8f1b7ec1b2c3e91e2478a4785f030ae06c..365870509b3dd87764c200c32401c9f7e957c7c0 100644 (file)
@@ -3640,9 +3640,9 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
 
        ir3_debug_print(ir, "AFTER GROUPING");
 
-       ir3_depth(ir, so);
+       ir3_dce(ir, so);
 
-       ir3_debug_print(ir, "AFTER DEPTH");
+       ir3_debug_print(ir, "AFTER DCE");
 
        /* do Sethi–Ullman numbering before scheduling: */
        ir3_sun(ir);
diff --git a/src/freedreno/ir3/ir3_dce.c b/src/freedreno/ir3/ir3_dce.c
new file mode 100644 (file)
index 0000000..0bd8af5
--- /dev/null
@@ -0,0 +1,194 @@
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "util/u_math.h"
+
+#include "ir3.h"
+#include "ir3_shader.h"
+
+/*
+ * Dead code elimination:
+ */
+
+static void
+instr_dce(struct ir3_instruction *instr, bool falsedep)
+{
+       struct ir3_instruction *src;
+
+       /* don't mark falsedep's as used, but otherwise process them normally: */
+       if (!falsedep)
+               instr->flags &= ~IR3_INSTR_UNUSED;
+
+       if (ir3_instr_check_mark(instr))
+               return;
+
+       foreach_ssa_src_n (src, i, instr) {
+               instr_dce(src, __is_false_dep(instr, i));
+       }
+}
+
+static bool
+remove_unused_by_block(struct ir3_block *block)
+{
+       bool progress = false;
+       foreach_instr_safe (instr, &block->instr_list) {
+               if (instr->opc == OPC_END || instr->opc == OPC_CHSH || instr->opc == OPC_CHMASK)
+                       continue;
+               if (instr->flags & IR3_INSTR_UNUSED) {
+                       if (instr->opc == OPC_META_SPLIT) {
+                               struct ir3_instruction *src = ssa(instr->regs[1]);
+                               /* tex (cat5) instructions have a writemask, so we can
+                                * mask off unused components.  Other instructions do not.
+                                */
+                               if (src && is_tex_or_prefetch(src) && (src->regs[0]->wrmask > 1)) {
+                                       src->regs[0]->wrmask &= ~(1 << instr->split.off);
+
+                                       /* prune no-longer needed right-neighbors.  We could
+                                        * probably do the same for left-neighbors (ie. tex
+                                        * fetch that only need .yw components), but that
+                                        * makes RA a bit more confusing than it already is
+                                        */
+                                       struct ir3_instruction *n = instr;
+                                       while (n && n->cp.right)
+                                               n = n->cp.right;
+                                       while (n->flags & IR3_INSTR_UNUSED) {
+                                               n = n->cp.left;
+                                               if (!n)
+                                                       break;
+                                               n->cp.right = NULL;
+                                       }
+                               }
+                       }
+
+                       /* prune false-deps, etc: */
+                       foreach_ssa_use (use, instr)
+                               foreach_ssa_srcp_n (srcp, n, use)
+                                       if (*srcp == instr)
+                                               *srcp = NULL;
+
+                       list_delinit(&instr->node);
+                       progress = true;
+               }
+       }
+       return progress;
+}
+
+static bool
+find_and_remove_unused(struct ir3 *ir, struct ir3_shader_variant *so)
+{
+       unsigned i;
+       bool progress = false;
+
+       ir3_clear_mark(ir);
+
+       /* initially mark everything as unused, we'll clear the flag as we
+        * visit the instructions:
+        */
+       foreach_block (block, &ir->block_list) {
+               foreach_instr (instr, &block->instr_list) {
+                       /* special case, if pre-fs texture fetch used, we cannot
+                        * eliminate the barycentric i/j input
+                        */
+                       if (so->num_sampler_prefetch &&
+                                       (instr->opc == OPC_META_INPUT) &&
+                                       (instr->input.sysval == SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL))
+                               continue;
+                       instr->flags |= IR3_INSTR_UNUSED;
+               }
+       }
+
+       struct ir3_instruction *out;
+       foreach_output (out, ir)
+               instr_dce(out, false);
+
+       foreach_block (block, &ir->block_list) {
+               for (i = 0; i < block->keeps_count; i++)
+                       instr_dce(block->keeps[i], false);
+
+               /* We also need to account for if-condition: */
+               if (block->condition)
+                       instr_dce(block->condition, false);
+       }
+
+       /* remove un-used instructions: */
+       foreach_block (block, &ir->block_list) {
+               progress |= remove_unused_by_block(block);
+       }
+
+       /* fixup wrmask of split instructions to account for adjusted tex
+        * wrmask's:
+        */
+       foreach_block (block, &ir->block_list) {
+               foreach_instr (instr, &block->instr_list) {
+                       if (instr->opc != OPC_META_SPLIT)
+                               continue;
+
+                       struct ir3_instruction *src = ssa(instr->regs[1]);
+                       if (!is_tex_or_prefetch(src))
+                               continue;
+
+                       instr->regs[1]->wrmask = src->regs[0]->wrmask;
+               }
+       }
+
+       /* note that we can end up with unused indirects, but we should
+        * not end up with unused predicates.
+        */
+       for (i = 0; i < ir->a0_users_count; i++) {
+               struct ir3_instruction *instr = ir->a0_users[i];
+               if (instr && (instr->flags & IR3_INSTR_UNUSED))
+                       ir->a0_users[i] = NULL;
+       }
+
+       for (i = 0; i < ir->a1_users_count; i++) {
+               struct ir3_instruction *instr = ir->a1_users[i];
+               if (instr && (instr->flags & IR3_INSTR_UNUSED))
+                       ir->a1_users[i] = NULL;
+       }
+
+       /* cleanup unused inputs: */
+       struct ir3_instruction *in;
+       foreach_input_n (in, n, ir)
+               if (in->flags & IR3_INSTR_UNUSED)
+                       ir->inputs[n] = NULL;
+
+       return progress;
+}
+
+void
+ir3_dce(struct ir3 *ir, struct ir3_shader_variant *so)
+{
+       void *mem_ctx = ralloc_context(NULL);
+       bool progress;
+
+       ir3_find_ssa_uses(ir, mem_ctx, true);
+
+       do {
+               progress = find_and_remove_unused(ir, so);
+       } while (progress);
+
+       ralloc_free(mem_ctx);
+}
diff --git a/src/freedreno/ir3/ir3_depth.c b/src/freedreno/ir3/ir3_depth.c
deleted file mode 100644 (file)
index 90a0dde..0000000
+++ /dev/null
@@ -1,226 +0,0 @@
-/*
- * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-#include "util/u_math.h"
-
-#include "ir3.h"
-#include "ir3_shader.h"
-
-/*
- * Instruction Depth:
- *
- * Calculates weighted instruction depth, ie. the sum of # of needed
- * instructions plus delay slots back to original input (ie INPUT or
- * CONST).  That is to say, an instructions depth is:
- *
- *   depth(instr) {
- *     d = 0;
- *     // for each src register:
- *     foreach (src in instr->regs[1..n])
- *       d = max(d, delayslots(src->instr, n) + depth(src->instr));
- *     return d + 1;
- *   }
- *
- * After an instruction's depth is calculated, it is inserted into the
- * blocks depth sorted list, which is used by the scheduling pass.
- */
-
-static void
-ir3_instr_depth(struct ir3_instruction *instr, unsigned boost, bool falsedep)
-{
-       struct ir3_instruction *src;
-
-       /* don't mark falsedep's as used, but otherwise process them normally: */
-       if (!falsedep)
-               instr->flags &= ~IR3_INSTR_UNUSED;
-
-       if (ir3_instr_check_mark(instr))
-               return;
-
-       instr->depth = 0;
-
-       foreach_ssa_src_n (src, i, instr) {
-               unsigned sd;
-
-               /* visit child to compute it's depth: */
-               ir3_instr_depth(src, boost, __is_false_dep(instr, i));
-
-               /* for array writes, no need to delay on previous write: */
-               if (i == 0)
-                       continue;
-
-               sd = ir3_delayslots(src, instr, i, true) + src->depth;
-               sd += boost;
-
-               instr->depth = MAX2(instr->depth, sd);
-       }
-
-       if (!is_meta(instr))
-               instr->depth++;
-}
-
-static bool
-remove_unused_by_block(struct ir3_block *block)
-{
-       bool progress = false;
-       foreach_instr_safe (instr, &block->instr_list) {
-               if (instr->opc == OPC_END || instr->opc == OPC_CHSH || instr->opc == OPC_CHMASK)
-                       continue;
-               if (instr->flags & IR3_INSTR_UNUSED) {
-                       if (instr->opc == OPC_META_SPLIT) {
-                               struct ir3_instruction *src = ssa(instr->regs[1]);
-                               /* tex (cat5) instructions have a writemask, so we can
-                                * mask off unused components.  Other instructions do not.
-                                */
-                               if (src && is_tex_or_prefetch(src) && (src->regs[0]->wrmask > 1)) {
-                                       src->regs[0]->wrmask &= ~(1 << instr->split.off);
-
-                                       /* prune no-longer needed right-neighbors.  We could
-                                        * probably do the same for left-neighbors (ie. tex
-                                        * fetch that only need .yw components), but that
-                                        * makes RA a bit more confusing than it already is
-                                        */
-                                       struct ir3_instruction *n = instr;
-                                       while (n && n->cp.right)
-                                               n = n->cp.right;
-                                       while (n->flags & IR3_INSTR_UNUSED) {
-                                               n = n->cp.left;
-                                               if (!n)
-                                                       break;
-                                               n->cp.right = NULL;
-                                       }
-                               }
-                       }
-
-                       /* prune false-deps, etc: */
-                       foreach_ssa_use (use, instr)
-                               foreach_ssa_srcp_n (srcp, n, use)
-                                       if (*srcp == instr)
-                                               *srcp = NULL;
-
-                       list_delinit(&instr->node);
-                       progress = true;
-               }
-       }
-       return progress;
-}
-
-static bool
-compute_depth_and_remove_unused(struct ir3 *ir, struct ir3_shader_variant *so)
-{
-       unsigned i;
-       bool progress = false;
-
-       ir3_clear_mark(ir);
-
-       /* initially mark everything as unused, we'll clear the flag as we
-        * visit the instructions:
-        */
-       foreach_block (block, &ir->block_list) {
-               foreach_instr (instr, &block->instr_list) {
-                       /* special case, if pre-fs texture fetch used, we cannot
-                        * eliminate the barycentric i/j input
-                        */
-                       if (so->num_sampler_prefetch &&
-                                       (instr->opc == OPC_META_INPUT) &&
-                                       (instr->input.sysval == SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL))
-                               continue;
-                       instr->flags |= IR3_INSTR_UNUSED;
-               }
-       }
-
-       struct ir3_instruction *out;
-       foreach_output (out, ir)
-               ir3_instr_depth(out, 0, false);
-
-       foreach_block (block, &ir->block_list) {
-               for (i = 0; i < block->keeps_count; i++)
-                       ir3_instr_depth(block->keeps[i], 0, false);
-
-               /* We also need to account for if-condition: */
-               if (block->condition)
-                       ir3_instr_depth(block->condition, 6, false);
-       }
-
-       /* remove un-used instructions: */
-       foreach_block (block, &ir->block_list) {
-               progress |= remove_unused_by_block(block);
-       }
-
-       /* fixup wrmask of split instructions to account for adjusted tex
-        * wrmask's:
-        */
-       foreach_block (block, &ir->block_list) {
-               foreach_instr (instr, &block->instr_list) {
-                       if (instr->opc != OPC_META_SPLIT)
-                               continue;
-
-                       struct ir3_instruction *src = ssa(instr->regs[1]);
-                       if (!is_tex_or_prefetch(src))
-                               continue;
-
-                       instr->regs[1]->wrmask = src->regs[0]->wrmask;
-               }
-       }
-
-       /* note that we can end up with unused indirects, but we should
-        * not end up with unused predicates.
-        */
-       for (i = 0; i < ir->a0_users_count; i++) {
-               struct ir3_instruction *instr = ir->a0_users[i];
-               if (instr && (instr->flags & IR3_INSTR_UNUSED))
-                       ir->a0_users[i] = NULL;
-       }
-
-       for (i = 0; i < ir->a1_users_count; i++) {
-               struct ir3_instruction *instr = ir->a1_users[i];
-               if (instr && (instr->flags & IR3_INSTR_UNUSED))
-                       ir->a1_users[i] = NULL;
-       }
-
-       /* cleanup unused inputs: */
-       struct ir3_instruction *in;
-       foreach_input_n (in, n, ir)
-               if (in->flags & IR3_INSTR_UNUSED)
-                       ir->inputs[n] = NULL;
-
-       return progress;
-}
-
-void
-ir3_depth(struct ir3 *ir, struct ir3_shader_variant *so)
-{
-       void *mem_ctx = ralloc_context(NULL);
-       bool progress;
-
-       ir3_find_ssa_uses(ir, mem_ctx, true);
-
-       do {
-               progress = compute_depth_and_remove_unused(ir, so);
-       } while (progress);
-
-       ralloc_free(mem_ctx);
-}
index b18a3d27e3451716ffa493a73c02c1d1e86afe47..753a9919ca0fb7786861ef346301b9dea219bb59 100644 (file)
@@ -72,7 +72,6 @@ static void print_instr_name(struct ir3_instruction *instr, bool flags)
 #endif
        printf("%04u:", instr->name);
        printf("%04u:", instr->ip);
-       printf("%03d:", instr->depth);
        if (instr->flags & IR3_INSTR_UNUSED) {
                printf("XXX: ");
        } else {
index 69103341158753280d0c94f9ffd5d78b8a27608c..068d4262ee7acd1bec3bffcab4e2ac44d6f0428c 100644 (file)
@@ -55,8 +55,8 @@ libfreedreno_ir3_files = files(
   'ir3_context.h',
   'ir3_cf.c',
   'ir3_cp.c',
+  'ir3_dce.c',
   'ir3_delay.c',
-  'ir3_depth.c',
   'ir3_group.c',
   'ir3_image.c',
   'ir3_image.h',