X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Ffreedreno%2Fir3%2Fir3_sched.c;h=247221d3a03b303aa599009d4789a89670b2c0f7;hb=0f33c255d3e80fde0d8f0ec595593ba17f56deb8;hp=f027d7b7a304a9eb745cd644717c217a397a3132;hpb=7bbf21e89830588c576264c74e94076ea94eab34;p=mesa.git

diff --git a/src/freedreno/ir3/ir3_sched.c b/src/freedreno/ir3/ir3_sched.c
index f027d7b7a30..247221d3a03 100644
--- a/src/freedreno/ir3/ir3_sched.c
+++ b/src/freedreno/ir3/ir3_sched.c
@@ -59,6 +59,11 @@ struct ir3_sched_ctx {
 	bool error;
 };
 
+static bool is_scheduled(struct ir3_instruction *instr)
+{
+	return !!(instr->flags & IR3_INSTR_MARK);
+}
+
 static bool is_sfu_or_mem(struct ir3_instruction *instr)
 {
 	return is_sfu(instr) || is_mem(instr);
@@ -74,7 +79,7 @@ unuse_each_src(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
 			continue;
 		if (instr->block != src->block)
 			continue;
-		if ((src->opc == OPC_META_FI) || (src->opc == OPC_META_FO)) {
+		if ((src->opc == OPC_META_COLLECT) || (src->opc == OPC_META_SPLIT)) {
 			unuse_each_src(ctx, src);
 		} else {
 			debug_assert(src->use_count > 0);
@@ -87,8 +92,32 @@ unuse_each_src(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
 	}
 }
 
+static void clear_cache(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr);
 static void use_instr(struct ir3_instruction *instr);
 
+/* transfers a use-count to new instruction, for cases where we
+ * "spill" address or predicate.  Note this might cause the
+ * previous instruction that loaded a0.x/p0.x to become live
+ * again, when we previously thought it was dead.
+ */
+static void
+transfer_use(struct ir3_sched_ctx *ctx, struct ir3_instruction *orig_instr,
+		struct ir3_instruction *new_instr)
+{
+	struct ir3_instruction *src;
+
+	debug_assert(is_scheduled(orig_instr));
+
+	foreach_ssa_src_n(src, n, new_instr) {
+		if (__is_false_dep(new_instr, n))
+			continue;
+		ctx->live_values += dest_regs(src);
+		use_instr(src);
+	}
+
+	clear_cache(ctx, orig_instr);
+}
+
 static void
 use_each_src(struct ir3_instruction *instr)
 {
@@ -104,7 +133,7 @@ use_each_src(struct ir3_instruction *instr)
 static void
 use_instr(struct ir3_instruction *instr)
 {
-	if ((instr->opc == OPC_META_FI) || (instr->opc == OPC_META_FO)) {
+	if ((instr->opc == OPC_META_COLLECT) || (instr->opc == OPC_META_SPLIT)) {
 		use_each_src(instr);
 	} else {
 		instr->use_count++;
@@ -114,7 +143,7 @@ use_instr(struct ir3_instruction *instr)
 static void
 update_live_values(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
 {
-	if ((instr->opc == OPC_META_FI) || (instr->opc == OPC_META_FO))
+	if ((instr->opc == OPC_META_COLLECT) || (instr->opc == OPC_META_SPLIT))
 		return;
 
 	ctx->live_values += dest_regs(instr);
@@ -132,7 +161,7 @@ update_use_count(struct ir3 *ir)
 
 	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
 		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
-			if ((instr->opc == OPC_META_FI) || (instr->opc == OPC_META_FO))
+			if ((instr->opc == OPC_META_COLLECT) || (instr->opc == OPC_META_SPLIT))
 				continue;
 
 			use_each_src(instr);
@@ -141,14 +170,9 @@ update_use_count(struct ir3 *ir)
 
 	/* Shader outputs are also used:
 	 */
-	for (unsigned i = 0; i <  ir->noutputs; i++) {
-		struct ir3_instruction  *out = ir->outputs[i];
-
-		if (!out)
-			continue;
-
+	struct ir3_instruction *out;
+	foreach_output(out, ir)
 		use_instr(out);
-	}
 }
 
 #define NULL_INSTR ((void *)~0)
@@ -270,10 +294,11 @@ distance(struct ir3_block *block, struct ir3_instruction *instr,
 		/* (ab)use block->data to prevent recursion: */
 		block->data = block;
 
-		for (unsigned i = 0; i < block->predecessors_count; i++) {
+		set_foreach(block->predecessors, entry) {
+			struct ir3_block *pred = (struct ir3_block *)entry->key;
 			unsigned n;
 
-			n = distance(block->predecessors[i], instr, min, pred);
+			n = distance(pred, instr, min, pred);
 
 			min = MIN2(min, n);
 		}
@@ -345,11 +370,6 @@ struct ir3_sched_notes {
 	bool addr_conflict, pred_conflict;
 };
 
-static bool is_scheduled(struct ir3_instruction *instr)
-{
-	return !!(instr->flags & IR3_INSTR_MARK);
-}
-
 /* could an instruction be scheduled if specified ssa src was scheduled? */
 static bool
 could_sched(struct ir3_instruction *instr, struct ir3_instruction *src)
@@ -371,6 +391,8 @@ static bool
 check_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
 		struct ir3_instruction *instr)
 {
+	debug_assert(!is_scheduled(instr));
+
 	/* For instructions that write address register we need to
 	 * make sure there is at least one instruction that uses the
 	 * addr value which is otherwise ready.
@@ -515,15 +537,15 @@ live_effect(struct ir3_instruction *instr)
 		if (instr->block != src->block)
 			continue;
 
-		/* for fanout/split, just pass things along to the real src: */
-		if (src->opc == OPC_META_FO)
+		/* for split, just pass things along to the real src: */
+		if (src->opc == OPC_META_SPLIT)
 			src = ssa(src->regs[1]);
 
-		/* for fanin/collect, if this is the last use of *each* src,
+		/* for collect, if this is the last use of *each* src,
 		 * then it will decrease the live values, since RA treats
 		 * them as a whole:
 		 */
-		if (src->opc == OPC_META_FI) {
+		if (src->opc == OPC_META_COLLECT) {
 			struct ir3_instruction *src2;
 			bool last_use = true;
 
@@ -639,6 +661,15 @@ find_eligible_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
 	return best_instr;
 }
 
+static struct ir3_instruction *
+split_instr(struct ir3_sched_ctx *ctx, struct ir3_instruction *orig_instr)
+{
+	struct ir3_instruction *new_instr = ir3_instr_clone(orig_instr);
+	ir3_insert_by_depth(new_instr, &ctx->depth_list);
+	transfer_use(ctx, orig_instr, new_instr);
+	return new_instr;
+}
+
 /* "spill" the address register by remapping any unscheduled
  * instructions which depend on the current address register
  * to a clone of the instruction which wrote the address reg.
@@ -669,10 +700,11 @@ split_addr(struct ir3_sched_ctx *ctx)
 		 */
 		if (indirect->address == ctx->addr) {
 			if (!new_addr) {
-				new_addr = ir3_instr_clone(ctx->addr);
+				new_addr = split_instr(ctx, ctx->addr);
 				/* original addr is scheduled, but new one isn't: */
 				new_addr->flags &= ~IR3_INSTR_MARK;
 			}
+			indirect->address = NULL;
 			ir3_instr_set_address(indirect, new_addr);
 		}
 	}
@@ -713,7 +745,7 @@ split_pred(struct ir3_sched_ctx *ctx)
 		 */
 		if (ssa(predicated->regs[1]) == ctx->pred) {
 			if (!new_pred) {
-				new_pred = ir3_instr_clone(ctx->pred);
+				new_pred = split_instr(ctx, ctx->pred);
 				/* original pred is scheduled, but new one isn't: */
 				new_pred->flags &= ~IR3_INSTR_MARK;
 			}
@@ -746,19 +778,30 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
 	list_inithead(&block->instr_list);
 	list_inithead(&ctx->depth_list);
 
-	/* first a pre-pass to schedule all meta:input instructions
-	 * (which need to appear first so that RA knows the register is
-	 * occupied), and move remaining to depth sorted list:
+	/* First schedule all meta:input instructions, followed by
+	 * tex-prefetch.  We want all of the instructions that load
+	 * values into registers before the shader starts to go
+	 * before any other instructions.  But in particular we
+	 * want inputs to come before prefetches.  This is because
+	 * a FS's bary_ij input may not actually be live in the
+	 * shader, but it should not be scheduled on top of any
+	 * other input (but can be overwritten by a tex prefetch)
+	 *
+	 * Finally, move all the remaining instructions to the depth-
+	 * list
 	 */
-	list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) {
-		if (instr->opc == OPC_META_INPUT) {
+	list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node)
+		if (instr->opc == OPC_META_INPUT)
 			schedule(ctx, instr);
-		} else {
-			ir3_insert_by_depth(instr, &ctx->depth_list);
-		}
-	}
 
-	while (!list_empty(&ctx->depth_list)) {
+	list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node)
+		if (instr->opc == OPC_META_TEX_PREFETCH)
+			schedule(ctx, instr);
+
+	list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node)
+		ir3_insert_by_depth(instr, &ctx->depth_list);
+
+	while (!list_is_empty(&ctx->depth_list)) {
 		struct ir3_sched_notes notes = {0};
 		struct ir3_instruction *instr;
 
@@ -880,8 +923,9 @@ sched_intra_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
 	list_for_each_entry_safe (struct ir3_instruction, instr, &block->instr_list, node) {
 		unsigned delay = 0;
 
-		for (unsigned i = 0; i < block->predecessors_count; i++) {
-			unsigned d = delay_calc(block->predecessors[i], instr, false, true);
+		set_foreach(block->predecessors, entry) {
+			struct ir3_block *pred = (struct ir3_block *)entry->key;
+			unsigned d = delay_calc(pred, instr, false, true);
 			delay = MAX2(d, delay);
 		}