From 00f838fa730f5c765902fe2e5ce9754df5276e91 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 27 Feb 2019 09:56:18 -0500 Subject: [PATCH] freedreno/ir3: track register pressure in sched Not a perfect solution, and the "pressure" target is hard-coded. But it doesn't really seem to much in the common case, and avoids exploding register usage in dEQP ssbo tests. So this should serve as a stop-gap solution until I have time to re- write the scheduler. Hurts slightly in instruction count, but gains (reduces) slightly the register usage in shader-db. Fixes ~150 dEQP-GLES31.functional.ssbo.* that were failing due to RA fail. Signed-off-by: Rob Clark --- src/freedreno/ir3/ir3_depth.c | 2 +- src/freedreno/ir3/ir3_sched.c | 97 ++++++++++++++++++++++++++++++++--- 2 files changed, 90 insertions(+), 9 deletions(-) diff --git a/src/freedreno/ir3/ir3_depth.c b/src/freedreno/ir3/ir3_depth.c index e0500b43161..071b5e3c528 100644 --- a/src/freedreno/ir3/ir3_depth.c +++ b/src/freedreno/ir3/ir3_depth.c @@ -90,7 +90,7 @@ int ir3_delayslots(struct ir3_instruction *assigner, * handled with sync bits */ - if (is_meta(assigner)) + if (is_meta(assigner) || is_meta(consumer)) return 0; if (writes_addr(assigner)) diff --git a/src/freedreno/ir3/ir3_sched.c b/src/freedreno/ir3/ir3_sched.c index 6552980d90c..4380fdf2f9a 100644 --- a/src/freedreno/ir3/ir3_sched.c +++ b/src/freedreno/ir3/ir3_sched.c @@ -55,6 +55,7 @@ struct ir3_sched_ctx { struct ir3_instruction *scheduled; /* last scheduled instr XXX remove*/ struct ir3_instruction *addr; /* current a0.x user, if any */ struct ir3_instruction *pred; /* current p0.x user, if any */ + int live_values; /* estimate of current live values */ bool error; }; @@ -63,6 +64,77 @@ static bool is_sfu_or_mem(struct ir3_instruction *instr) return is_sfu(instr) || is_mem(instr); } +static void +unuse_each_src(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) +{ + struct ir3_instruction *src; + + foreach_ssa_src_n(src, n, instr) { + if (__is_false_dep(instr, n)) + continue; + if (instr->block != src->block) + continue; + if ((src->opc == OPC_META_FI) || (src->opc == OPC_META_FO)) { + unuse_each_src(ctx, src); + } else { + debug_assert(src->use_count > 0); + + if (--src->use_count == 0) { + ctx->live_values -= dest_regs(src); + debug_assert(ctx->live_values >= 0); + } + } + } +} + +static void +use_each_src(struct ir3_instruction *instr) +{ + struct ir3_instruction *src; + + foreach_ssa_src_n(src, n, instr) { + if (__is_false_dep(instr, n)) + continue; + if (instr->block != src->block) + continue; + if ((src->opc == OPC_META_FI) || (src->opc == OPC_META_FO)) { + use_each_src(src); + } else { + src->use_count++; + } + } +} + +static void +update_live_values(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) +{ + if ((instr->opc == OPC_META_FI) || (instr->opc == OPC_META_FO)) + return; + + ctx->live_values += dest_regs(instr); + unuse_each_src(ctx, instr); +} + +/* This is *slightly* different than how ir3_cp uses use_count, in that + * we just track it per block (because we schedule a block at a time) and + * because we don't track meta instructions and false dependencies (since + * they don't contribute real register pressure). + */ +static void +update_use_count(struct ir3_block *block) +{ + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { + instr->use_count = 0; + } + + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { + if ((instr->opc == OPC_META_FI) || (instr->opc == OPC_META_FO)) + continue; + + use_each_src(instr); + } +} + #define NULL_INSTR ((void *)~0) static void @@ -105,6 +177,8 @@ schedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) list_addtail(&instr->node, &instr->block->instr_list); ctx->scheduled = instr; + update_live_values(ctx, instr); + if (writes_addr(instr) || writes_pred(instr) || is_input(instr)) { clear_cache(ctx, NULL); } else { @@ -126,7 +200,7 @@ deepest(struct ir3_instruction **srcs, unsigned nsrcs) return NULL; for (; i < nsrcs; i++) - if (srcs[i] && (srcs[i]->depth > d->depth)) + if (srcs[i] && (srcs[i]->sun > d->sun)) d = srcs[id = i]; srcs[id] = NULL; @@ -432,14 +506,18 @@ find_eligible_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, if (!candidate) continue; - delay = delay_calc(ctx->block, candidate, soft, false); - if (delay < min_delay) { - best_instr = candidate; - min_delay = delay; + if (ctx->live_values > 16*4) { + /* under register pressure, only care about reducing live values: */ + if (!best_instr || (candidate->sun > best_instr->sun)) + best_instr = candidate; + } else { + delay = delay_calc(ctx->block, candidate, soft, false); + if ((delay < min_delay) || + ((delay <= (min_delay + 2)) && (candidate->sun > best_instr->sun))) { + best_instr = candidate; + min_delay = delay; + } } - - if (min_delay == 0) - break; } return best_instr; @@ -714,6 +792,8 @@ int ir3_sched(struct ir3 *ir) ir3_clear_mark(ir); list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + ctx.live_values = 0; + update_use_count(block); sched_block(&ctx, block); } @@ -723,6 +803,7 @@ int ir3_sched(struct ir3 *ir) if (ctx.error) return -1; + return 0; } -- 2.30.2