+ n->instr = instr;
+ instr->data = n;
+}
+
+static void
+sched_node_add_dep(struct ir3_instruction *instr, struct ir3_instruction *src, int i)
+{
+ /* don't consider dependencies in other blocks: */
+ if (src->block != instr->block)
+ return;
+
+ /* we could have false-dep's that end up unused: */
+ if (src->flags & IR3_INSTR_UNUSED) {
+ debug_assert(__is_false_dep(instr, i));
+ return;
+ }
+
+ struct ir3_sched_node *n = instr->data;
+ struct ir3_sched_node *sn = src->data;
+
+ /* If src is consumed by a collect, track that to realize that once
+ * any of the collect srcs are live, we should hurry up and schedule
+ * the rest.
+ */
+ if (instr->opc == OPC_META_COLLECT)
+ sn->collect = instr;
+
+ dag_add_edge(&sn->dag, &n->dag, NULL);
+
+ unsigned d = ir3_delayslots(src, instr, i, true);
+ n->delay = MAX2(n->delay, d);
+}
+
+static void
+mark_kill_path(struct ir3_instruction *instr)
+{
+ struct ir3_sched_node *n = instr->data;
+ n->kill_path = true;
+
+ foreach_ssa_src (src, instr) {
+ if (src->block != instr->block)
+ continue;
+ mark_kill_path(src);
+ }
+}
+
+/* Is it an output? */
+static bool
+is_output_collect(struct ir3_instruction *instr)
+{
+ struct ir3 *ir = instr->block->shader;
+
+ for (unsigned i = 0; i < ir->outputs_count; i++) {
+ struct ir3_instruction *collect = ir->outputs[i];
+ assert(collect->opc == OPC_META_COLLECT);
+ if (instr == collect)
+ return true;
+ }
+
+ return false;
+}
+
+/* Is it's only use as output? */
+static bool
+is_output_only(struct ir3_instruction *instr)
+{
+ if (!writes_gpr(instr))
+ return false;
+
+ if (!(instr->regs[0]->flags & IR3_REG_SSA))
+ return false;
+
+ foreach_ssa_use (use, instr)
+ if (!is_output_collect(use))
+ return false;
+
+ return true;
+}
+
+static void
+sched_node_add_deps(struct ir3_instruction *instr)
+{
+ /* Since foreach_ssa_src() already handles false-dep's we can construct
+ * the DAG easily in a single pass.
+ */
+ foreach_ssa_src_n (src, i, instr) {
+ sched_node_add_dep(instr, src, i);
+ }
+
+ /* NOTE that all inputs must be scheduled before a kill, so
+ * mark these to be prioritized as well:
+ */
+ if (is_kill(instr) || is_input(instr)) {
+ mark_kill_path(instr);
+ }
+
+ if (is_output_only(instr)) {
+ struct ir3_sched_node *n = instr->data;
+ n->output = true;
+ }
+}
+
+static void
+sched_dag_max_delay_cb(struct dag_node *node, void *state)
+{
+ struct ir3_sched_node *n = (struct ir3_sched_node *)node;
+ uint32_t max_delay = 0;
+
+ util_dynarray_foreach(&n->dag.edges, struct dag_edge, edge) {
+ struct ir3_sched_node *child = (struct ir3_sched_node *)edge->child;
+ max_delay = MAX2(child->max_delay, max_delay);
+ }
+
+ n->max_delay = MAX2(n->max_delay, max_delay + n->delay);
+}
+
+static void
+sched_dag_init(struct ir3_sched_ctx *ctx)
+{
+ ctx->dag = dag_create(ctx);
+
+ foreach_instr (instr, &ctx->unscheduled_list) {
+ sched_node_init(ctx, instr);
+ sched_node_add_deps(instr);
+ }
+
+ dag_traverse_bottom_up(ctx->dag, sched_dag_max_delay_cb, NULL);
+}
+
+static void
+sched_dag_destroy(struct ir3_sched_ctx *ctx)
+{
+ ralloc_free(ctx->dag);
+ ctx->dag = NULL;
+}
+
+static void
+sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
+{