From: Rob Clark Date: Sat, 4 Apr 2020 17:29:53 +0000 (-0700) Subject: freedreno/ir3: rename depth->dce X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=751c11a8c7a6f54f87c62e4b49802bf80826ec42;p=mesa.git freedreno/ir3: rename depth->dce Since DCE is the only remaining function of this pass, after the pre-RA scheduler rewrite. Signed-off-by: Rob Clark Part-of: --- diff --git a/src/freedreno/Makefile.sources b/src/freedreno/Makefile.sources index 07a59c97764..045d94ec6b7 100644 --- a/src/freedreno/Makefile.sources +++ b/src/freedreno/Makefile.sources @@ -29,7 +29,7 @@ ir3_SOURCES := \ ir3/ir3_context.h \ ir3/ir3_cp.c \ ir3/ir3_cf.c \ - ir3/ir3_depth.c \ + ir3/ir3_dce.c \ ir3/ir3_delay.c \ ir3/ir3_group.c \ ir3/ir3_image.c \ diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index f2dbc88f3bb..dcd5a5082a6 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -1149,7 +1149,6 @@ ir3_clear_mark(struct ir3 *ir) } } -/* note: this will destroy instr->depth, don't do it until after sched! */ unsigned ir3_count_instructions(struct ir3 *ir) { diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 7fb0d823d25..b58cd6eb4be 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -306,25 +306,9 @@ struct ir3_instruction { } input; }; - /* transient values used during various algorithms: */ - union { - /* The instruction depth is the max dependency distance to output. - * - * You can also think of it as the "cost", if we did any sort of - * optimization for register footprint. Ie. a value that is just - * result of moving a const to a reg would have a low cost, so to - * it could make sense to duplicate the instruction at various - * points where the result is needed to reduce register footprint. - */ - int depth; - /* When we get to the RA stage, we no longer need depth, but - * we do need instruction's position/name: - */ - struct { - uint16_t ip; - uint16_t name; - }; - }; + /* When we get to the RA stage, we need instruction's position/name: */ + uint16_t ip; + uint16_t name; /* used for per-pass extra instruction data. * @@ -1199,9 +1183,9 @@ unsigned ir3_delay_calc(struct ir3_block *block, struct ir3_instruction *instr, bool soft, bool pred); void ir3_remove_nops(struct ir3 *ir); -/* depth calculation: */ +/* dead code elimination: */ struct ir3_shader_variant; -void ir3_depth(struct ir3 *ir, struct ir3_shader_variant *so); +void ir3_dce(struct ir3 *ir, struct ir3_shader_variant *so); /* fp16 conversion folding */ void ir3_cf(struct ir3 *ir); diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 752adc8f1b7..365870509b3 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -3640,9 +3640,9 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, ir3_debug_print(ir, "AFTER GROUPING"); - ir3_depth(ir, so); + ir3_dce(ir, so); - ir3_debug_print(ir, "AFTER DEPTH"); + ir3_debug_print(ir, "AFTER DCE"); /* do Sethi–Ullman numbering before scheduling: */ ir3_sun(ir); diff --git a/src/freedreno/ir3/ir3_dce.c b/src/freedreno/ir3/ir3_dce.c new file mode 100644 index 00000000000..0bd8af537f4 --- /dev/null +++ b/src/freedreno/ir3/ir3_dce.c @@ -0,0 +1,194 @@ +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "util/u_math.h" + +#include "ir3.h" +#include "ir3_shader.h" + +/* + * Dead code elimination: + */ + +static void +instr_dce(struct ir3_instruction *instr, bool falsedep) +{ + struct ir3_instruction *src; + + /* don't mark falsedep's as used, but otherwise process them normally: */ + if (!falsedep) + instr->flags &= ~IR3_INSTR_UNUSED; + + if (ir3_instr_check_mark(instr)) + return; + + foreach_ssa_src_n (src, i, instr) { + instr_dce(src, __is_false_dep(instr, i)); + } +} + +static bool +remove_unused_by_block(struct ir3_block *block) +{ + bool progress = false; + foreach_instr_safe (instr, &block->instr_list) { + if (instr->opc == OPC_END || instr->opc == OPC_CHSH || instr->opc == OPC_CHMASK) + continue; + if (instr->flags & IR3_INSTR_UNUSED) { + if (instr->opc == OPC_META_SPLIT) { + struct ir3_instruction *src = ssa(instr->regs[1]); + /* tex (cat5) instructions have a writemask, so we can + * mask off unused components. Other instructions do not. + */ + if (src && is_tex_or_prefetch(src) && (src->regs[0]->wrmask > 1)) { + src->regs[0]->wrmask &= ~(1 << instr->split.off); + + /* prune no-longer needed right-neighbors. We could + * probably do the same for left-neighbors (ie. tex + * fetch that only need .yw components), but that + * makes RA a bit more confusing than it already is + */ + struct ir3_instruction *n = instr; + while (n && n->cp.right) + n = n->cp.right; + while (n->flags & IR3_INSTR_UNUSED) { + n = n->cp.left; + if (!n) + break; + n->cp.right = NULL; + } + } + } + + /* prune false-deps, etc: */ + foreach_ssa_use (use, instr) + foreach_ssa_srcp_n (srcp, n, use) + if (*srcp == instr) + *srcp = NULL; + + list_delinit(&instr->node); + progress = true; + } + } + return progress; +} + +static bool +find_and_remove_unused(struct ir3 *ir, struct ir3_shader_variant *so) +{ + unsigned i; + bool progress = false; + + ir3_clear_mark(ir); + + /* initially mark everything as unused, we'll clear the flag as we + * visit the instructions: + */ + foreach_block (block, &ir->block_list) { + foreach_instr (instr, &block->instr_list) { + /* special case, if pre-fs texture fetch used, we cannot + * eliminate the barycentric i/j input + */ + if (so->num_sampler_prefetch && + (instr->opc == OPC_META_INPUT) && + (instr->input.sysval == SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL)) + continue; + instr->flags |= IR3_INSTR_UNUSED; + } + } + + struct ir3_instruction *out; + foreach_output (out, ir) + instr_dce(out, false); + + foreach_block (block, &ir->block_list) { + for (i = 0; i < block->keeps_count; i++) + instr_dce(block->keeps[i], false); + + /* We also need to account for if-condition: */ + if (block->condition) + instr_dce(block->condition, false); + } + + /* remove un-used instructions: */ + foreach_block (block, &ir->block_list) { + progress |= remove_unused_by_block(block); + } + + /* fixup wrmask of split instructions to account for adjusted tex + * wrmask's: + */ + foreach_block (block, &ir->block_list) { + foreach_instr (instr, &block->instr_list) { + if (instr->opc != OPC_META_SPLIT) + continue; + + struct ir3_instruction *src = ssa(instr->regs[1]); + if (!is_tex_or_prefetch(src)) + continue; + + instr->regs[1]->wrmask = src->regs[0]->wrmask; + } + } + + /* note that we can end up with unused indirects, but we should + * not end up with unused predicates. + */ + for (i = 0; i < ir->a0_users_count; i++) { + struct ir3_instruction *instr = ir->a0_users[i]; + if (instr && (instr->flags & IR3_INSTR_UNUSED)) + ir->a0_users[i] = NULL; + } + + for (i = 0; i < ir->a1_users_count; i++) { + struct ir3_instruction *instr = ir->a1_users[i]; + if (instr && (instr->flags & IR3_INSTR_UNUSED)) + ir->a1_users[i] = NULL; + } + + /* cleanup unused inputs: */ + struct ir3_instruction *in; + foreach_input_n (in, n, ir) + if (in->flags & IR3_INSTR_UNUSED) + ir->inputs[n] = NULL; + + return progress; +} + +void +ir3_dce(struct ir3 *ir, struct ir3_shader_variant *so) +{ + void *mem_ctx = ralloc_context(NULL); + bool progress; + + ir3_find_ssa_uses(ir, mem_ctx, true); + + do { + progress = find_and_remove_unused(ir, so); + } while (progress); + + ralloc_free(mem_ctx); +} diff --git a/src/freedreno/ir3/ir3_depth.c b/src/freedreno/ir3/ir3_depth.c deleted file mode 100644 index 90a0ddef51f..00000000000 --- a/src/freedreno/ir3/ir3_depth.c +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Copyright (C) 2014 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#include "util/u_math.h" - -#include "ir3.h" -#include "ir3_shader.h" - -/* - * Instruction Depth: - * - * Calculates weighted instruction depth, ie. the sum of # of needed - * instructions plus delay slots back to original input (ie INPUT or - * CONST). That is to say, an instructions depth is: - * - * depth(instr) { - * d = 0; - * // for each src register: - * foreach (src in instr->regs[1..n]) - * d = max(d, delayslots(src->instr, n) + depth(src->instr)); - * return d + 1; - * } - * - * After an instruction's depth is calculated, it is inserted into the - * blocks depth sorted list, which is used by the scheduling pass. - */ - -static void -ir3_instr_depth(struct ir3_instruction *instr, unsigned boost, bool falsedep) -{ - struct ir3_instruction *src; - - /* don't mark falsedep's as used, but otherwise process them normally: */ - if (!falsedep) - instr->flags &= ~IR3_INSTR_UNUSED; - - if (ir3_instr_check_mark(instr)) - return; - - instr->depth = 0; - - foreach_ssa_src_n (src, i, instr) { - unsigned sd; - - /* visit child to compute it's depth: */ - ir3_instr_depth(src, boost, __is_false_dep(instr, i)); - - /* for array writes, no need to delay on previous write: */ - if (i == 0) - continue; - - sd = ir3_delayslots(src, instr, i, true) + src->depth; - sd += boost; - - instr->depth = MAX2(instr->depth, sd); - } - - if (!is_meta(instr)) - instr->depth++; -} - -static bool -remove_unused_by_block(struct ir3_block *block) -{ - bool progress = false; - foreach_instr_safe (instr, &block->instr_list) { - if (instr->opc == OPC_END || instr->opc == OPC_CHSH || instr->opc == OPC_CHMASK) - continue; - if (instr->flags & IR3_INSTR_UNUSED) { - if (instr->opc == OPC_META_SPLIT) { - struct ir3_instruction *src = ssa(instr->regs[1]); - /* tex (cat5) instructions have a writemask, so we can - * mask off unused components. Other instructions do not. - */ - if (src && is_tex_or_prefetch(src) && (src->regs[0]->wrmask > 1)) { - src->regs[0]->wrmask &= ~(1 << instr->split.off); - - /* prune no-longer needed right-neighbors. We could - * probably do the same for left-neighbors (ie. tex - * fetch that only need .yw components), but that - * makes RA a bit more confusing than it already is - */ - struct ir3_instruction *n = instr; - while (n && n->cp.right) - n = n->cp.right; - while (n->flags & IR3_INSTR_UNUSED) { - n = n->cp.left; - if (!n) - break; - n->cp.right = NULL; - } - } - } - - /* prune false-deps, etc: */ - foreach_ssa_use (use, instr) - foreach_ssa_srcp_n (srcp, n, use) - if (*srcp == instr) - *srcp = NULL; - - list_delinit(&instr->node); - progress = true; - } - } - return progress; -} - -static bool -compute_depth_and_remove_unused(struct ir3 *ir, struct ir3_shader_variant *so) -{ - unsigned i; - bool progress = false; - - ir3_clear_mark(ir); - - /* initially mark everything as unused, we'll clear the flag as we - * visit the instructions: - */ - foreach_block (block, &ir->block_list) { - foreach_instr (instr, &block->instr_list) { - /* special case, if pre-fs texture fetch used, we cannot - * eliminate the barycentric i/j input - */ - if (so->num_sampler_prefetch && - (instr->opc == OPC_META_INPUT) && - (instr->input.sysval == SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL)) - continue; - instr->flags |= IR3_INSTR_UNUSED; - } - } - - struct ir3_instruction *out; - foreach_output (out, ir) - ir3_instr_depth(out, 0, false); - - foreach_block (block, &ir->block_list) { - for (i = 0; i < block->keeps_count; i++) - ir3_instr_depth(block->keeps[i], 0, false); - - /* We also need to account for if-condition: */ - if (block->condition) - ir3_instr_depth(block->condition, 6, false); - } - - /* remove un-used instructions: */ - foreach_block (block, &ir->block_list) { - progress |= remove_unused_by_block(block); - } - - /* fixup wrmask of split instructions to account for adjusted tex - * wrmask's: - */ - foreach_block (block, &ir->block_list) { - foreach_instr (instr, &block->instr_list) { - if (instr->opc != OPC_META_SPLIT) - continue; - - struct ir3_instruction *src = ssa(instr->regs[1]); - if (!is_tex_or_prefetch(src)) - continue; - - instr->regs[1]->wrmask = src->regs[0]->wrmask; - } - } - - /* note that we can end up with unused indirects, but we should - * not end up with unused predicates. - */ - for (i = 0; i < ir->a0_users_count; i++) { - struct ir3_instruction *instr = ir->a0_users[i]; - if (instr && (instr->flags & IR3_INSTR_UNUSED)) - ir->a0_users[i] = NULL; - } - - for (i = 0; i < ir->a1_users_count; i++) { - struct ir3_instruction *instr = ir->a1_users[i]; - if (instr && (instr->flags & IR3_INSTR_UNUSED)) - ir->a1_users[i] = NULL; - } - - /* cleanup unused inputs: */ - struct ir3_instruction *in; - foreach_input_n (in, n, ir) - if (in->flags & IR3_INSTR_UNUSED) - ir->inputs[n] = NULL; - - return progress; -} - -void -ir3_depth(struct ir3 *ir, struct ir3_shader_variant *so) -{ - void *mem_ctx = ralloc_context(NULL); - bool progress; - - ir3_find_ssa_uses(ir, mem_ctx, true); - - do { - progress = compute_depth_and_remove_unused(ir, so); - } while (progress); - - ralloc_free(mem_ctx); -} diff --git a/src/freedreno/ir3/ir3_print.c b/src/freedreno/ir3/ir3_print.c index b18a3d27e34..753a9919ca0 100644 --- a/src/freedreno/ir3/ir3_print.c +++ b/src/freedreno/ir3/ir3_print.c @@ -72,7 +72,6 @@ static void print_instr_name(struct ir3_instruction *instr, bool flags) #endif printf("%04u:", instr->name); printf("%04u:", instr->ip); - printf("%03d:", instr->depth); if (instr->flags & IR3_INSTR_UNUSED) { printf("XXX: "); } else { diff --git a/src/freedreno/ir3/meson.build b/src/freedreno/ir3/meson.build index 69103341158..068d4262ee7 100644 --- a/src/freedreno/ir3/meson.build +++ b/src/freedreno/ir3/meson.build @@ -55,8 +55,8 @@ libfreedreno_ir3_files = files( 'ir3_context.h', 'ir3_cf.c', 'ir3_cp.c', + 'ir3_dce.c', 'ir3_delay.c', - 'ir3_depth.c', 'ir3_group.c', 'ir3_image.c', 'ir3_image.h',