static bool
qpu_inst_is_tlb(const struct v3d_qpu_instr *inst)
{
+ if (inst->sig.ldtlb || inst->sig.ldtlbu)
+ return true;
+
if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
return false;
add_write_dep(state, &state->last_tmu_config, n);
if (inst->sig.ldtlb | inst->sig.ldtlbu)
- add_read_dep(state, state->last_tlb, n);
+ add_write_dep(state, &state->last_tlb, n);
if (inst->sig.ldvpm) {
add_write_dep(state, &state->last_vpm_read, n);
struct dag *dag;
int tick;
int last_magic_sfu_write_tick;
+ int last_stallable_sfu_reg;
+ int last_stallable_sfu_tick;
int last_ldvary_tick;
int last_uniforms_reset_tick;
int last_thrsw_tick;
return (scoreboard->tick == 0 && qpu_inst_is_tlb(inst));
}
+static bool
+qpu_instruction_uses_rf(const struct v3d_qpu_instr *inst,
+ uint32_t waddr) {
+
+ if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
+ return false;
+
+ if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_A) &&
+ inst->raddr_a == waddr)
+ return true;
+
+ if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_B) &&
+ !inst->sig.small_imm && (inst->raddr_b == waddr))
+ return true;
+
+ return false;
+}
+
+static bool
+mux_read_stalls(struct choose_scoreboard *scoreboard,
+ const struct v3d_qpu_instr *inst)
+{
+ return scoreboard->tick == scoreboard->last_stallable_sfu_tick + 1 &&
+ qpu_instruction_uses_rf(inst,
+ scoreboard->last_stallable_sfu_reg);
+}
+
+/* We define a max schedule priority to allow negative priorities as result of
+ * substracting this max when an instruction stalls. So instructions that
+ * stall have lower priority than regular instructions. */
+#define MAX_SCHEDULE_PRIORITY 16
+
static int
get_instruction_priority(const struct v3d_qpu_instr *inst)
{
return next_score;
next_score++;
- /* XXX perf: We should schedule SFU ALU ops so that the reader is 2
- * instructions after the producer if possible, not just 1.
- */
-
/* Default score for things that aren't otherwise special. */
baseline_score = next_score;
next_score++;
return next_score;
next_score++;
+ /* We should increase the maximum if we assert here */
+ assert(next_score < MAX_SCHEDULE_PRIORITY);
+
return baseline_score;
}
inst->sig.wrtmuc);
}
+static bool
+qpu_compatible_peripheral_access(const struct v3d_device_info *devinfo,
+ const struct v3d_qpu_instr *a,
+ const struct v3d_qpu_instr *b)
+{
+ const bool a_uses_peripheral = qpu_accesses_peripheral(a);
+ const bool b_uses_peripheral = qpu_accesses_peripheral(b);
+
+ /* We can always do one peripheral access per instruction. */
+ if (!a_uses_peripheral || !b_uses_peripheral)
+ return true;
+
+ if (devinfo->ver < 41)
+ return false;
+
+ /* V3D 4.1 and later allow TMU read along with a VPM read or write, and
+ * WRTMUC with a TMU magic register write (other than tmuc).
+ */
+ if ((a->sig.ldtmu && v3d_qpu_uses_vpm(b)) ||
+ (b->sig.ldtmu && v3d_qpu_uses_vpm(a))) {
+ return true;
+ }
+
+ if ((a->sig.wrtmuc && v3d_qpu_writes_tmu_not_tmuc(b)) ||
+ (b->sig.wrtmuc && v3d_qpu_writes_tmu_not_tmuc(a))) {
+ return true;
+ }
+
+ return false;
+}
+
static bool
qpu_merge_inst(const struct v3d_device_info *devinfo,
struct v3d_qpu_instr *result,
return false;
}
- /* Can't do more than one peripheral access in an instruction.
- *
- * XXX: V3D 4.1 allows TMU read along with a VPM read or write, and
- * WRTMUC with a TMU magic register write (other than tmuc).
- */
- if (qpu_accesses_peripheral(a) && qpu_accesses_peripheral(b))
+ if (!qpu_compatible_peripheral_access(devinfo, a, b))
return false;
struct v3d_qpu_instr merge = *a;
int prio = get_instruction_priority(inst);
+ if (mux_read_stalls(scoreboard, inst)) {
+ /* Don't merge an instruction that stalls */
+ if (prev_inst)
+ continue;
+ else {
+ /* Any instruction that don't stall will have
+ * higher scheduling priority */
+ prio -= MAX_SCHEDULE_PRIORITY;
+ assert(prio < 0);
+ }
+ }
+
/* Found a valid instruction. If nothing better comes along,
* this one works.
*/
scoreboard->last_magic_sfu_write_tick = scoreboard->tick;
}
+static void
+update_scoreboard_for_sfu_stall_waddr(struct choose_scoreboard *scoreboard,
+ const struct v3d_qpu_instr *inst)
+{
+ if (v3d_qpu_instr_is_sfu(inst)) {
+ scoreboard->last_stallable_sfu_reg = inst->alu.add.waddr;
+ scoreboard->last_stallable_sfu_tick = scoreboard->tick;
+ }
+}
+
static void
update_scoreboard_for_chosen(struct choose_scoreboard *scoreboard,
const struct v3d_qpu_instr *inst)
if (inst->alu.add.magic_write) {
update_scoreboard_for_magic_waddr(scoreboard,
inst->alu.add.waddr);
+ } else {
+ update_scoreboard_for_sfu_stall_waddr(scoreboard,
+ inst);
}
}
after_inst));
}
+ if (v3d_qpu_instr_is_sfu(before_inst))
+ return 2;
+
return latency;
}
fprintf(stderr, "\n");
}
}
+ if (mux_read_stalls(scoreboard, inst))
+ c->qpu_inst_stalled_count++;
}
/* Update the uniform index for the rewritten location --
scoreboard.last_magic_sfu_write_tick = -10;
scoreboard.last_uniforms_reset_tick = -10;
scoreboard.last_thrsw_tick = -10;
+ scoreboard.last_stallable_sfu_tick = -10;
if (debug) {
fprintf(stderr, "Pre-schedule instructions\n");