enum direction { F, R };
struct schedule_state {
+ const struct v3d_device_info *devinfo;
struct schedule_node *last_r[6];
struct schedule_node *last_rf[64];
struct schedule_node *last_sf;
static void
calculate_deps(struct schedule_state *state, struct schedule_node *n)
{
+ const struct v3d_device_info *devinfo = state->devinfo;
struct qinst *qinst = n->inst;
struct v3d_qpu_instr *inst = &qinst->qpu;
process_waddr_deps(state, n, inst->alu.mul.waddr,
inst->alu.mul.magic_write);
}
+ if (v3d_qpu_sig_writes_address(devinfo, &inst->sig)) {
+ process_waddr_deps(state, n, inst->sig_addr,
+ inst->sig_magic);
+ }
- if (v3d_qpu_writes_r3(inst))
+ if (v3d_qpu_writes_r3(devinfo, inst))
add_write_dep(state, &state->last_r[3], n);
- if (v3d_qpu_writes_r4(inst))
+ if (v3d_qpu_writes_r4(devinfo, inst))
add_write_dep(state, &state->last_r[4], n);
- if (v3d_qpu_writes_r5(inst))
+ if (v3d_qpu_writes_r5(devinfo, inst))
add_write_dep(state, &state->last_r[5], n);
if (inst->sig.thrsw) {
struct schedule_state state;
memset(&state, 0, sizeof(state));
+ state.devinfo = c->devinfo;
state.dir = F;
list_for_each_entry(struct schedule_node, node, schedule_list, link)
struct schedule_state state;
memset(&state, 0, sizeof(state));
+ state.devinfo = c->devinfo;
state.dir = R;
for (node = schedule_list->prev; schedule_list != node; node = node->prev) {
}
static bool
-writes_too_soon_after_write(struct choose_scoreboard *scoreboard,
+writes_too_soon_after_write(const struct v3d_device_info *devinfo,
+ struct choose_scoreboard *scoreboard,
struct qinst *qinst)
{
const struct v3d_qpu_instr *inst = &qinst->qpu;
* occur if a dead SFU computation makes it to scheduling.
*/
if (scoreboard->tick - scoreboard->last_sfu_write_tick < 2 &&
- v3d_qpu_writes_r4(inst))
+ v3d_qpu_writes_r4(devinfo, inst))
return true;
return false;
return (inst->sig.ldvpm ||
inst->sig.ldtmu ||
inst->sig.ldtlb ||
- inst->sig.ldtlbu);
+ inst->sig.ldtlbu ||
+ inst->sig.wrtmuc);
}
static bool
return false;
}
- /* Can't do more than one peripheral access in an instruction. */
+ /* Can't do more than one peripheral access in an instruction.
+ *
+ * XXX: V3D 4.1 allows TMU read along with a VPM read or write, and
+ * WRTMUC with a TMU magic register write (other than tmuc).
+ */
if (qpu_accesses_peripheral(a) && qpu_accesses_peripheral(b))
return false;
merge.sig.thrsw |= b->sig.thrsw;
merge.sig.ldunif |= b->sig.ldunif;
+ merge.sig.ldunifrf |= b->sig.ldunifrf;
+ merge.sig.ldunifa |= b->sig.ldunifa;
+ merge.sig.ldunifarf |= b->sig.ldunifarf;
merge.sig.ldtmu |= b->sig.ldtmu;
merge.sig.ldvary |= b->sig.ldvary;
merge.sig.ldvpm |= b->sig.ldvpm;
merge.sig.rotate |= b->sig.rotate;
merge.sig.wrtmuc |= b->sig.wrtmuc;
+ if (v3d_qpu_sig_writes_address(devinfo, &a->sig) &&
+ v3d_qpu_sig_writes_address(devinfo, &b->sig))
+ return false;
+ merge.sig_addr |= b->sig_addr;
+ merge.sig_magic |= b->sig_magic;
+
uint64_t packed;
bool ok = v3d_qpu_instr_pack(devinfo, &merge, &packed);
if (reads_too_soon_after_write(scoreboard, n->inst))
continue;
- if (writes_too_soon_after_write(scoreboard, n->inst))
+ if (writes_too_soon_after_write(devinfo, scoreboard, n->inst))
continue;
/* "A scoreboard wait must not occur in the first two
* otherwise get scheduled so ldunif and ldvary try to update
* r5 in the same tick.
*/
- if (inst->sig.ldunif &&
+ if ((inst->sig.ldunif || inst->sig.ldunifa) &&
scoreboard->tick == scoreboard->last_ldvary_tick + 1) {
continue;
}