From 8a5f2d9444879dc4c8b2b1f192b2a667a1278a2b Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sat, 24 Nov 2018 12:18:08 -0500 Subject: [PATCH] =?utf8?q?freedreno/ir3:=20add=20Sethi=E2=80=93Ullman=20nu?= =?utf8?q?mbering=20pass?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Rob Clark --- src/freedreno/Makefile.sources | 3 +- src/freedreno/ir3/ir3.h | 21 +++- src/freedreno/ir3/ir3_compiler_nir.c | 5 + src/freedreno/ir3/ir3_print.c | 3 +- src/freedreno/ir3/ir3_shader.c | 2 + src/freedreno/ir3/ir3_shader.h | 2 + src/freedreno/ir3/ir3_sun.c | 111 ++++++++++++++++++ src/freedreno/ir3/meson.build | 1 + .../drivers/freedreno/ir3/ir3_gallium.c | 8 +- 9 files changed, 149 insertions(+), 7 deletions(-) create mode 100644 src/freedreno/ir3/ir3_sun.c diff --git a/src/freedreno/Makefile.sources b/src/freedreno/Makefile.sources index cbb5de934f7..d4c2103708d 100644 --- a/src/freedreno/Makefile.sources +++ b/src/freedreno/Makefile.sources @@ -40,7 +40,8 @@ ir3_SOURCES := \ ir3/ir3_ra.c \ ir3/ir3_sched.c \ ir3/ir3_shader.c \ - ir3/ir3_shader.h + ir3/ir3_shader.h \ + ir3/ir3_sun.c ir3_GENERATED_FILES := \ ir3/ir3_nir_trig.c diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 53e56edb3c4..e38ef9fcb66 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -29,8 +29,9 @@ #include "compiler/shader_enums.h" -#include "util/u_debug.h" +#include "util/bitscan.h" #include "util/list.h" +#include "util/u_debug.h" #include "instr-a3xx.h" @@ -292,6 +293,9 @@ struct ir3_instruction { */ void *data; + int sun; /* Sethi–Ullman number, used by sched */ + int use_count; /* currently just updated/used by cp */ + /* Used during CP and RA stages. For fanin and shader inputs/ * outputs where we need a sequence of consecutive registers, * keep track of each src instructions left (ie 'n-1') and right @@ -363,8 +367,6 @@ struct ir3_instruction { /* Entry in ir3_block's instruction list: */ struct list_head node; - int use_count; /* currently just updated/used by cp */ - #ifdef DEBUG uint32_t serialno; #endif @@ -443,6 +445,8 @@ struct ir3 { /* List of ir3_array's: */ struct list_head array_list; + unsigned max_sun; /* max Sethi–Ullman number */ + #ifdef DEBUG unsigned block_count, instr_count; #endif @@ -739,6 +743,14 @@ static inline bool is_meta(struct ir3_instruction *instr) return (opc_cat(instr->opc) == -1); } +static inline unsigned dest_regs(struct ir3_instruction *instr) +{ + if ((instr->regs_count == 0) || is_store(instr)) + return 0; + + return util_last_bit(instr->regs[0]->wrmask); +} + static inline bool writes_addr(struct ir3_instruction *instr) { if (instr->regs_count > 0) { @@ -999,6 +1011,9 @@ void ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so); /* group neighbors and insert mov's to resolve conflicts: */ void ir3_group(struct ir3 *ir); +/* Sethi–Ullman numbering: */ +void ir3_sun(struct ir3 *ir); + /* scheduling: */ void ir3_sched_add_deps(struct ir3 *ir); int ir3_sched(struct ir3 *ir); diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index e6fe45daa12..1e3fbeb3117 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -2610,6 +2610,9 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, ir3_print(ir); } + /* do Sethi–Ullman numbering before scheduling: */ + ir3_sun(ir); + ret = ir3_sched(ir); if (ret) { DBG("SCHED failed!"); @@ -2708,6 +2711,8 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, else so->total_in = max_bary + 1; + so->max_sun = ir->max_sun; + out: if (ret) { if (so->ir) diff --git a/src/freedreno/ir3/ir3_print.c b/src/freedreno/ir3/ir3_print.c index 415024b985f..b69941da29e 100644 --- a/src/freedreno/ir3/ir3_print.c +++ b/src/freedreno/ir3/ir3_print.c @@ -40,7 +40,8 @@ static void print_instr_name(struct ir3_instruction *instr) #endif printf("%04u:", instr->name); printf("%04u:", instr->ip); - printf("%03u: ", instr->depth); + printf("%03u:", instr->depth); + printf("%03u: ", instr->sun); if (instr->flags & IR3_INSTR_SY) printf("(sy)"); diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index 3dcc3945ffe..7ebc0b4c853 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -393,6 +393,8 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out) fprintf(out, "; %u (ss), %u (sy)\n", so->info.ss, so->info.sy); + fprintf(out, "; max_sun=%u\n", ir->max_sun); + /* print shader type specific info: */ switch (so->type) { case MESA_SHADER_VERTEX: diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 5fffaf9fb85..f8aa8522a55 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -344,6 +344,8 @@ struct ir3_shader_variant { */ unsigned branchstack; + unsigned max_sun; + /* the instructions length is in units of instruction groups * (4 instructions for a3xx, 16 instructions for a4xx.. each * instruction is 2 dwords): diff --git a/src/freedreno/ir3/ir3_sun.c b/src/freedreno/ir3/ir3_sun.c new file mode 100644 index 00000000000..7fea9a073e5 --- /dev/null +++ b/src/freedreno/ir3/ir3_sun.c @@ -0,0 +1,111 @@ +/* + * Copyright (C) 2018 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + + +#include "util/u_math.h" + +#include "ir3.h" + +/* + * A simple pass to do Sethi–Ullman numbering, as described in "Generalizations + * of the Sethi-Ullman algorithm for register allocation"[1]. This is used by + * the scheduler pass. + * + * TODO this could probably be more clever about flow control, ie. if a src + * is computed in multiple paths into a block, I think we should only have to + * consider the worst-case. + * + * [1] https://pdfs.semanticscholar.org/ae53/6010b214612c2571f483354c264b0b39c545.pdf + */ + +static unsigned +number_instr(struct ir3_instruction *instr) +{ + if (ir3_instr_check_mark(instr)) + return instr->sun; + + struct ir3_instruction *src; + const unsigned n = __ssa_src_cnt(instr); + unsigned a[n]; + unsigned b[n]; + unsigned i = 0; + + /* TODO I think including false-deps in the calculation is the right + * thing to do: + */ + foreach_ssa_src_n(src, n, instr) { + if (__is_false_dep(instr, n)) + continue; + if (src->block != instr->block) { + a[i] = 1; + } else { + a[i] = number_instr(src); + } + b[i] = dest_regs(src); + i++; + } + + /* + * Rπ = max(aπ(1), bπ(1) + max(aπ(2), bπ(2) + max(..., bπ(k−1) + max(aπ(k), bπ(k)))...): + */ + unsigned last_r = 0; + + for (int k = i - 1; k >= 0; k--) { + unsigned r = MAX2(a[k], b[k] + last_r); + + if (k > 0) + r += b[k-1]; + + last_r = r; + } + + last_r = MAX2(last_r, dest_regs(instr)); + + instr->sun = last_r; + + return instr->sun; +} + +void +ir3_sun(struct ir3 *ir) +{ + unsigned max = 0; + + ir3_clear_mark(ir); + + for (unsigned i = 0; i < ir->noutputs; i++) + if (ir->outputs[i]) + max = MAX2(max, number_instr(ir->outputs[i])); + + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + for (unsigned i = 0; i < block->keeps_count; i++) + max = MAX2(max, number_instr(block->keeps[i])); + if (block->condition) + max = MAX2(max, number_instr(block->condition)); + } + + ir->max_sun = max; +} diff --git a/src/freedreno/ir3/meson.build b/src/freedreno/ir3/meson.build index bf99476311d..5f87ca6185b 100644 --- a/src/freedreno/ir3/meson.build +++ b/src/freedreno/ir3/meson.build @@ -56,6 +56,7 @@ libfreedreno_ir3_files = files( 'ir3_sched.c', 'ir3_shader.c', 'ir3_shader.h', + 'ir3_sun.c', ) libfreedreno_ir3 = static_library( diff --git a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c index 7e7b699678f..1fe61273d32 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c @@ -52,7 +52,8 @@ dump_shader_info(struct ir3_shader_variant *v, struct pipe_debug_callback *debug "SHADER-DB: %s prog %d/%d: %u instructions, %u dwords\n" "SHADER-DB: %s prog %d/%d: %u half, %u full\n" "SHADER-DB: %s prog %d/%d: %u const, %u constlen\n" - "SHADER-DB: %s prog %d/%d: %u (ss), %u (sy)\n", + "SHADER-DB: %s prog %d/%d: %u (ss), %u (sy)\n" + "SHADER-DB: %s prog %d/%d: max_sun=%u\n", ir3_shader_stage(v->shader), v->shader->id, v->id, v->info.instrs_count, @@ -67,7 +68,10 @@ dump_shader_info(struct ir3_shader_variant *v, struct pipe_debug_callback *debug v->constlen, ir3_shader_stage(v->shader), v->shader->id, v->id, - v->info.ss, v->info.sy); + v->info.ss, v->info.sy, + ir3_shader_stage(v->shader), + v->shader->id, v->id, + v->max_sun); } struct ir3_shader_variant * -- 2.30.2