X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Ffreedreno%2Fir3%2Fir3_sched.c;h=247221d3a03b303aa599009d4789a89670b2c0f7;hb=0f33c255d3e80fde0d8f0ec595593ba17f56deb8;hp=f027d7b7a304a9eb745cd644717c217a397a3132;hpb=7bbf21e89830588c576264c74e94076ea94eab34;p=mesa.git diff --git a/src/freedreno/ir3/ir3_sched.c b/src/freedreno/ir3/ir3_sched.c index f027d7b7a30..247221d3a03 100644 --- a/src/freedreno/ir3/ir3_sched.c +++ b/src/freedreno/ir3/ir3_sched.c @@ -59,6 +59,11 @@ struct ir3_sched_ctx { bool error; }; +static bool is_scheduled(struct ir3_instruction *instr) +{ + return !!(instr->flags & IR3_INSTR_MARK); +} + static bool is_sfu_or_mem(struct ir3_instruction *instr) { return is_sfu(instr) || is_mem(instr); @@ -74,7 +79,7 @@ unuse_each_src(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) continue; if (instr->block != src->block) continue; - if ((src->opc == OPC_META_FI) || (src->opc == OPC_META_FO)) { + if ((src->opc == OPC_META_COLLECT) || (src->opc == OPC_META_SPLIT)) { unuse_each_src(ctx, src); } else { debug_assert(src->use_count > 0); @@ -87,8 +92,32 @@ unuse_each_src(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) } } +static void clear_cache(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr); static void use_instr(struct ir3_instruction *instr); +/* transfers a use-count to new instruction, for cases where we + * "spill" address or predicate. Note this might cause the + * previous instruction that loaded a0.x/p0.x to become live + * again, when we previously thought it was dead. + */ +static void +transfer_use(struct ir3_sched_ctx *ctx, struct ir3_instruction *orig_instr, + struct ir3_instruction *new_instr) +{ + struct ir3_instruction *src; + + debug_assert(is_scheduled(orig_instr)); + + foreach_ssa_src_n(src, n, new_instr) { + if (__is_false_dep(new_instr, n)) + continue; + ctx->live_values += dest_regs(src); + use_instr(src); + } + + clear_cache(ctx, orig_instr); +} + static void use_each_src(struct ir3_instruction *instr) { @@ -104,7 +133,7 @@ use_each_src(struct ir3_instruction *instr) static void use_instr(struct ir3_instruction *instr) { - if ((instr->opc == OPC_META_FI) || (instr->opc == OPC_META_FO)) { + if ((instr->opc == OPC_META_COLLECT) || (instr->opc == OPC_META_SPLIT)) { use_each_src(instr); } else { instr->use_count++; @@ -114,7 +143,7 @@ use_instr(struct ir3_instruction *instr) static void update_live_values(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) { - if ((instr->opc == OPC_META_FI) || (instr->opc == OPC_META_FO)) + if ((instr->opc == OPC_META_COLLECT) || (instr->opc == OPC_META_SPLIT)) return; ctx->live_values += dest_regs(instr); @@ -132,7 +161,7 @@ update_use_count(struct ir3 *ir) list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { - if ((instr->opc == OPC_META_FI) || (instr->opc == OPC_META_FO)) + if ((instr->opc == OPC_META_COLLECT) || (instr->opc == OPC_META_SPLIT)) continue; use_each_src(instr); @@ -141,14 +170,9 @@ update_use_count(struct ir3 *ir) /* Shader outputs are also used: */ - for (unsigned i = 0; i < ir->noutputs; i++) { - struct ir3_instruction *out = ir->outputs[i]; - - if (!out) - continue; - + struct ir3_instruction *out; + foreach_output(out, ir) use_instr(out); - } } #define NULL_INSTR ((void *)~0) @@ -270,10 +294,11 @@ distance(struct ir3_block *block, struct ir3_instruction *instr, /* (ab)use block->data to prevent recursion: */ block->data = block; - for (unsigned i = 0; i < block->predecessors_count; i++) { + set_foreach(block->predecessors, entry) { + struct ir3_block *pred = (struct ir3_block *)entry->key; unsigned n; - n = distance(block->predecessors[i], instr, min, pred); + n = distance(pred, instr, min, pred); min = MIN2(min, n); } @@ -345,11 +370,6 @@ struct ir3_sched_notes { bool addr_conflict, pred_conflict; }; -static bool is_scheduled(struct ir3_instruction *instr) -{ - return !!(instr->flags & IR3_INSTR_MARK); -} - /* could an instruction be scheduled if specified ssa src was scheduled? */ static bool could_sched(struct ir3_instruction *instr, struct ir3_instruction *src) @@ -371,6 +391,8 @@ static bool check_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, struct ir3_instruction *instr) { + debug_assert(!is_scheduled(instr)); + /* For instructions that write address register we need to * make sure there is at least one instruction that uses the * addr value which is otherwise ready. @@ -515,15 +537,15 @@ live_effect(struct ir3_instruction *instr) if (instr->block != src->block) continue; - /* for fanout/split, just pass things along to the real src: */ - if (src->opc == OPC_META_FO) + /* for split, just pass things along to the real src: */ + if (src->opc == OPC_META_SPLIT) src = ssa(src->regs[1]); - /* for fanin/collect, if this is the last use of *each* src, + /* for collect, if this is the last use of *each* src, * then it will decrease the live values, since RA treats * them as a whole: */ - if (src->opc == OPC_META_FI) { + if (src->opc == OPC_META_COLLECT) { struct ir3_instruction *src2; bool last_use = true; @@ -639,6 +661,15 @@ find_eligible_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, return best_instr; } +static struct ir3_instruction * +split_instr(struct ir3_sched_ctx *ctx, struct ir3_instruction *orig_instr) +{ + struct ir3_instruction *new_instr = ir3_instr_clone(orig_instr); + ir3_insert_by_depth(new_instr, &ctx->depth_list); + transfer_use(ctx, orig_instr, new_instr); + return new_instr; +} + /* "spill" the address register by remapping any unscheduled * instructions which depend on the current address register * to a clone of the instruction which wrote the address reg. @@ -669,10 +700,11 @@ split_addr(struct ir3_sched_ctx *ctx) */ if (indirect->address == ctx->addr) { if (!new_addr) { - new_addr = ir3_instr_clone(ctx->addr); + new_addr = split_instr(ctx, ctx->addr); /* original addr is scheduled, but new one isn't: */ new_addr->flags &= ~IR3_INSTR_MARK; } + indirect->address = NULL; ir3_instr_set_address(indirect, new_addr); } } @@ -713,7 +745,7 @@ split_pred(struct ir3_sched_ctx *ctx) */ if (ssa(predicated->regs[1]) == ctx->pred) { if (!new_pred) { - new_pred = ir3_instr_clone(ctx->pred); + new_pred = split_instr(ctx, ctx->pred); /* original pred is scheduled, but new one isn't: */ new_pred->flags &= ~IR3_INSTR_MARK; } @@ -746,19 +778,30 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block) list_inithead(&block->instr_list); list_inithead(&ctx->depth_list); - /* first a pre-pass to schedule all meta:input instructions - * (which need to appear first so that RA knows the register is - * occupied), and move remaining to depth sorted list: + /* First schedule all meta:input instructions, followed by + * tex-prefetch. We want all of the instructions that load + * values into registers before the shader starts to go + * before any other instructions. But in particular we + * want inputs to come before prefetches. This is because + * a FS's bary_ij input may not actually be live in the + * shader, but it should not be scheduled on top of any + * other input (but can be overwritten by a tex prefetch) + * + * Finally, move all the remaining instructions to the depth- + * list */ - list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) { - if (instr->opc == OPC_META_INPUT) { + list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) + if (instr->opc == OPC_META_INPUT) schedule(ctx, instr); - } else { - ir3_insert_by_depth(instr, &ctx->depth_list); - } - } - while (!list_empty(&ctx->depth_list)) { + list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) + if (instr->opc == OPC_META_TEX_PREFETCH) + schedule(ctx, instr); + + list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) + ir3_insert_by_depth(instr, &ctx->depth_list); + + while (!list_is_empty(&ctx->depth_list)) { struct ir3_sched_notes notes = {0}; struct ir3_instruction *instr; @@ -880,8 +923,9 @@ sched_intra_block(struct ir3_sched_ctx *ctx, struct ir3_block *block) list_for_each_entry_safe (struct ir3_instruction, instr, &block->instr_list, node) { unsigned delay = 0; - for (unsigned i = 0; i < block->predecessors_count; i++) { - unsigned d = delay_calc(block->predecessors[i], instr, false, true); + set_foreach(block->predecessors, entry) { + struct ir3_block *pred = (struct ir3_block *)entry->key; + unsigned d = delay_calc(pred, instr, false, true); delay = MAX2(d, delay); }