From d95a6e3a0ca2d4a420306dd078cea05d3f21c865 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 6 May 2020 10:20:14 -0700 Subject: [PATCH] freedreno/ir3/sched: avoid scheduling outputs If an instruction's only use is as an output, and it increases register pressure, then try to avoid scheduling it until there are no other options. A semi-common pattern is `fragcolN.a = 1.0`, this pushes all these immed loads to the end of the shader. Signed-off-by: Rob Clark Part-of: --- src/freedreno/ir3/ir3.h | 14 ++++++ src/freedreno/ir3/ir3_ra.h | 14 ------ src/freedreno/ir3/ir3_sched.c | 95 ++++++++++++++++++++++++++++++++--- 3 files changed, 101 insertions(+), 22 deletions(-) diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 9ec324e4e4a..247dca19564 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -846,6 +846,20 @@ static inline unsigned dest_regs(struct ir3_instruction *instr) return util_last_bit(instr->regs[0]->wrmask); } +static inline bool +writes_gpr(struct ir3_instruction *instr) +{ + if (dest_regs(instr) == 0) + return false; + /* is dest a normal temp register: */ + struct ir3_register *reg = instr->regs[0]; + debug_assert(!(reg->flags & (IR3_REG_CONST | IR3_REG_IMMED))); + if ((reg_num(reg) == REG_A0) || + (reg->num == regid(REG_P0, 0))) + return false; + return true; +} + static inline bool writes_addr0(struct ir3_instruction *instr) { if (instr->regs_count > 0) { diff --git a/src/freedreno/ir3/ir3_ra.h b/src/freedreno/ir3/ir3_ra.h index 35fb618c49a..7acfdd0443d 100644 --- a/src/freedreno/ir3/ir3_ra.h +++ b/src/freedreno/ir3/ir3_ra.h @@ -205,20 +205,6 @@ scalar_name(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, unsigned n) return ra_name(ctx, &ctx->instrd[instr->ip]) + n; } -static inline bool -writes_gpr(struct ir3_instruction *instr) -{ - if (dest_regs(instr) == 0) - return false; - /* is dest a normal temp register: */ - struct ir3_register *reg = instr->regs[0]; - debug_assert(!(reg->flags & (IR3_REG_CONST | IR3_REG_IMMED))); - if ((reg_num(reg) == REG_A0) || - (reg->num == regid(REG_P0, 0))) - return false; - return true; -} - #define NO_NAME ~0 /* diff --git a/src/freedreno/ir3/ir3_sched.c b/src/freedreno/ir3/ir3_sched.c index 46448c10b4b..b4f1bc0a187 100644 --- a/src/freedreno/ir3/ir3_sched.c +++ b/src/freedreno/ir3/ir3_sched.c @@ -124,6 +124,18 @@ struct ir3_sched_node { * If so, we should prioritize it when possible */ bool kill_path; + + /* This node represents a shader output. A semi-common pattern in + * shaders is something along the lines of: + * + * fragcolor.w = 1.0 + * + * Which we'd prefer to schedule as late as possible, since it + * produces a live value that is never killed/consumed. So detect + * outputs up-front, and avoid scheduling them unless the reduce + * register pressure (or at least are neutral) + */ + bool output; }; #define foreach_sched_node(__n, __list) \ @@ -394,12 +406,18 @@ live_effect(struct ir3_instruction *instr) return new_live - freed_live; } +static struct ir3_sched_node * choose_instr_inc(struct ir3_sched_ctx *ctx, + struct ir3_sched_notes *notes, bool avoid_output); + /** * Chooses an instruction to schedule using the Goodman/Hsu (1988) CSR (Code * Scheduling for Register pressure) heuristic. + * + * Only handles the case of choosing instructions that reduce register pressure + * or are even. */ static struct ir3_sched_node * -choose_instr_csr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes) +choose_instr_dec(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes) { struct ir3_sched_node *chosen = NULL; @@ -422,7 +440,7 @@ choose_instr_csr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes) } if (chosen) { - di(chosen->instr, "csr: chose (freed+ready)"); + di(chosen->instr, "dec: chose (freed+ready)"); return chosen; } @@ -440,7 +458,7 @@ choose_instr_csr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes) } if (chosen) { - di(chosen->instr, "csr: chose (freed)"); + di(chosen->instr, "dec: chose (freed)"); return chosen; } @@ -468,7 +486,7 @@ choose_instr_csr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes) } if (chosen) { - di(chosen->instr, "csr: chose (neutral+ready)"); + di(chosen->instr, "dec: chose (neutral+ready)"); return chosen; } @@ -484,10 +502,23 @@ choose_instr_csr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes) } if (chosen) { - di(chosen->instr, "csr: chose (neutral)"); + di(chosen->instr, "dec: chose (neutral)"); return chosen; } + return choose_instr_inc(ctx, notes, true); +} + +/** + * When we can't choose an instruction that reduces register pressure or + * is neutral, we end up here to try and pick the least bad option. + */ +static struct ir3_sched_node * +choose_instr_inc(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, + bool avoid_output) +{ + struct ir3_sched_node *chosen = NULL; + /* * From hear on out, we are picking something that increases * register pressure. So try to pick something which will @@ -497,6 +528,9 @@ choose_instr_csr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes) /* Pick the max delay of the remaining ready set. */ foreach_sched_node (n, &ctx->dag->heads) { + if (avoid_output && n->output) + continue; + unsigned d = ir3_delay_calc(ctx->block, n->instr, false, false); if (d > 0) @@ -514,12 +548,15 @@ choose_instr_csr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes) } if (chosen) { - di(chosen->instr, "csr: chose (distance+ready)"); + di(chosen->instr, "inc: chose (distance+ready)"); return chosen; } /* Pick the max delay of the remaining leaders. */ foreach_sched_node (n, &ctx->dag->heads) { + if (avoid_output && n->output) + continue; + if (!check_instr(ctx, notes, n->instr)) continue; @@ -532,7 +569,7 @@ choose_instr_csr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes) } if (chosen) { - di(chosen->instr, "csr: chose (distance)"); + di(chosen->instr, "inc: chose (distance)"); return chosen; } @@ -594,7 +631,11 @@ choose_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes) if (chosen) return chosen->instr; - chosen = choose_instr_csr(ctx, notes); + chosen = choose_instr_dec(ctx, notes); + if (chosen) + return chosen->instr; + + chosen = choose_instr_inc(ctx, notes, false); if (chosen) return chosen->instr; @@ -759,6 +800,39 @@ mark_kill_path(struct ir3_instruction *instr) } } +/* Is it an output? */ +static bool +is_output_collect(struct ir3_instruction *instr) +{ + struct ir3 *ir = instr->block->shader; + + for (unsigned i = 0; i < ir->outputs_count; i++) { + struct ir3_instruction *collect = ir->outputs[i]; + assert(collect->opc == OPC_META_COLLECT); + if (instr == collect) + return true; + } + + return false; +} + +/* Is it's only use as output? */ +static bool +is_output_only(struct ir3_instruction *instr) +{ + if (!writes_gpr(instr)) + return false; + + if (!(instr->regs[0]->flags & IR3_REG_SSA)) + return false; + + foreach_ssa_use (use, instr) + if (!is_output_collect(use)) + return false; + + return true; +} + static void sched_node_add_deps(struct ir3_instruction *instr) { @@ -777,6 +851,11 @@ sched_node_add_deps(struct ir3_instruction *instr) if (is_kill(instr) || is_input(instr)) { mark_kill_path(instr); } + + if (is_output_only(instr)) { + struct ir3_sched_node *n = instr->data; + n->output = true; + } } static void -- 2.30.2