bool error;
};
+static bool is_scheduled(struct ir3_instruction *instr)
+{
+ return !!(instr->flags & IR3_INSTR_MARK);
+}
+
static bool is_sfu_or_mem(struct ir3_instruction *instr)
{
return is_sfu(instr) || is_mem(instr);
continue;
if (instr->block != src->block)
continue;
- if ((src->opc == OPC_META_FI) || (src->opc == OPC_META_FO)) {
+ if ((src->opc == OPC_META_COLLECT) || (src->opc == OPC_META_SPLIT)) {
unuse_each_src(ctx, src);
} else {
debug_assert(src->use_count > 0);
}
}
+static void clear_cache(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr);
static void use_instr(struct ir3_instruction *instr);
+/* transfers a use-count to new instruction, for cases where we
+ * "spill" address or predicate. Note this might cause the
+ * previous instruction that loaded a0.x/p0.x to become live
+ * again, when we previously thought it was dead.
+ */
+static void
+transfer_use(struct ir3_sched_ctx *ctx, struct ir3_instruction *orig_instr,
+ struct ir3_instruction *new_instr)
+{
+ struct ir3_instruction *src;
+
+ debug_assert(is_scheduled(orig_instr));
+
+ foreach_ssa_src_n(src, n, new_instr) {
+ if (__is_false_dep(new_instr, n))
+ continue;
+ ctx->live_values += dest_regs(src);
+ use_instr(src);
+ }
+
+ clear_cache(ctx, orig_instr);
+}
+
static void
use_each_src(struct ir3_instruction *instr)
{
static void
use_instr(struct ir3_instruction *instr)
{
- if ((instr->opc == OPC_META_FI) || (instr->opc == OPC_META_FO)) {
+ if ((instr->opc == OPC_META_COLLECT) || (instr->opc == OPC_META_SPLIT)) {
use_each_src(instr);
} else {
instr->use_count++;
static void
update_live_values(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
{
- if ((instr->opc == OPC_META_FI) || (instr->opc == OPC_META_FO))
+ if ((instr->opc == OPC_META_COLLECT) || (instr->opc == OPC_META_SPLIT))
return;
ctx->live_values += dest_regs(instr);
list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
- if ((instr->opc == OPC_META_FI) || (instr->opc == OPC_META_FO))
+ if ((instr->opc == OPC_META_COLLECT) || (instr->opc == OPC_META_SPLIT))
continue;
use_each_src(instr);
/* Shader outputs are also used:
*/
- for (unsigned i = 0; i < ir->noutputs; i++) {
- struct ir3_instruction *out = ir->outputs[i];
-
- if (!out)
- continue;
-
+ struct ir3_instruction *out;
+ foreach_output(out, ir)
use_instr(out);
- }
}
#define NULL_INSTR ((void *)~0)
/* (ab)use block->data to prevent recursion: */
block->data = block;
- for (unsigned i = 0; i < block->predecessors_count; i++) {
+ set_foreach(block->predecessors, entry) {
+ struct ir3_block *pred = (struct ir3_block *)entry->key;
unsigned n;
- n = distance(block->predecessors[i], instr, min, pred);
+ n = distance(pred, instr, min, pred);
min = MIN2(min, n);
}
bool addr_conflict, pred_conflict;
};
-static bool is_scheduled(struct ir3_instruction *instr)
-{
- return !!(instr->flags & IR3_INSTR_MARK);
-}
-
/* could an instruction be scheduled if specified ssa src was scheduled? */
static bool
could_sched(struct ir3_instruction *instr, struct ir3_instruction *src)
check_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
struct ir3_instruction *instr)
{
+ debug_assert(!is_scheduled(instr));
+
/* For instructions that write address register we need to
* make sure there is at least one instruction that uses the
* addr value which is otherwise ready.
if (instr->block != src->block)
continue;
- /* for fanout/split, just pass things along to the real src: */
- if (src->opc == OPC_META_FO)
+ /* for split, just pass things along to the real src: */
+ if (src->opc == OPC_META_SPLIT)
src = ssa(src->regs[1]);
- /* for fanin/collect, if this is the last use of *each* src,
+ /* for collect, if this is the last use of *each* src,
* then it will decrease the live values, since RA treats
* them as a whole:
*/
- if (src->opc == OPC_META_FI) {
+ if (src->opc == OPC_META_COLLECT) {
struct ir3_instruction *src2;
bool last_use = true;
return best_instr;
}
+static struct ir3_instruction *
+split_instr(struct ir3_sched_ctx *ctx, struct ir3_instruction *orig_instr)
+{
+ struct ir3_instruction *new_instr = ir3_instr_clone(orig_instr);
+ ir3_insert_by_depth(new_instr, &ctx->depth_list);
+ transfer_use(ctx, orig_instr, new_instr);
+ return new_instr;
+}
+
/* "spill" the address register by remapping any unscheduled
* instructions which depend on the current address register
* to a clone of the instruction which wrote the address reg.
*/
if (indirect->address == ctx->addr) {
if (!new_addr) {
- new_addr = ir3_instr_clone(ctx->addr);
+ new_addr = split_instr(ctx, ctx->addr);
/* original addr is scheduled, but new one isn't: */
new_addr->flags &= ~IR3_INSTR_MARK;
}
+ indirect->address = NULL;
ir3_instr_set_address(indirect, new_addr);
}
}
*/
if (ssa(predicated->regs[1]) == ctx->pred) {
if (!new_pred) {
- new_pred = ir3_instr_clone(ctx->pred);
+ new_pred = split_instr(ctx, ctx->pred);
/* original pred is scheduled, but new one isn't: */
new_pred->flags &= ~IR3_INSTR_MARK;
}
list_inithead(&block->instr_list);
list_inithead(&ctx->depth_list);
- /* first a pre-pass to schedule all meta:input instructions
- * (which need to appear first so that RA knows the register is
- * occupied), and move remaining to depth sorted list:
+ /* First schedule all meta:input instructions, followed by
+ * tex-prefetch. We want all of the instructions that load
+ * values into registers before the shader starts to go
+ * before any other instructions. But in particular we
+ * want inputs to come before prefetches. This is because
+ * a FS's bary_ij input may not actually be live in the
+ * shader, but it should not be scheduled on top of any
+ * other input (but can be overwritten by a tex prefetch)
+ *
+ * Finally, move all the remaining instructions to the depth-
+ * list
*/
- list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) {
- if (instr->opc == OPC_META_INPUT) {
+ list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node)
+ if (instr->opc == OPC_META_INPUT)
schedule(ctx, instr);
- } else {
- ir3_insert_by_depth(instr, &ctx->depth_list);
- }
- }
- while (!list_empty(&ctx->depth_list)) {
+ list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node)
+ if (instr->opc == OPC_META_TEX_PREFETCH)
+ schedule(ctx, instr);
+
+ list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node)
+ ir3_insert_by_depth(instr, &ctx->depth_list);
+
+ while (!list_is_empty(&ctx->depth_list)) {
struct ir3_sched_notes notes = {0};
struct ir3_instruction *instr;
list_for_each_entry_safe (struct ir3_instruction, instr, &block->instr_list, node) {
unsigned delay = 0;
- for (unsigned i = 0; i < block->predecessors_count; i++) {
- unsigned d = delay_calc(block->predecessors[i], instr, false, true);
+ set_foreach(block->predecessors, entry) {
+ struct ir3_block *pred = (struct ir3_block *)entry->key;
+ unsigned d = delay_calc(pred, instr, false, true);
delay = MAX2(d, delay);
}