struct dag *dag;
int tick;
int last_magic_sfu_write_tick;
+ int last_stallable_sfu_reg;
+ int last_stallable_sfu_tick;
int last_ldvary_tick;
int last_uniforms_reset_tick;
int last_thrsw_tick;
return (scoreboard->tick == 0 && qpu_inst_is_tlb(inst));
}
+static bool
+qpu_instruction_uses_rf(const struct v3d_qpu_instr *inst,
+ uint32_t waddr) {
+
+ if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
+ return false;
+
+ if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_A) &&
+ inst->raddr_a == waddr)
+ return true;
+
+ if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_B) &&
+ !inst->sig.small_imm && (inst->raddr_b == waddr))
+ return true;
+
+ return false;
+}
+
+static bool
+mux_read_stalls(struct choose_scoreboard *scoreboard,
+ const struct v3d_qpu_instr *inst)
+{
+ return scoreboard->tick == scoreboard->last_stallable_sfu_tick + 1 &&
+ qpu_instruction_uses_rf(inst,
+ scoreboard->last_stallable_sfu_reg);
+}
+
static int
get_instruction_priority(const struct v3d_qpu_instr *inst)
{
scoreboard->last_magic_sfu_write_tick = scoreboard->tick;
}
+static void
+update_scoreboard_for_sfu_stall_waddr(struct choose_scoreboard *scoreboard,
+ const struct v3d_qpu_instr *inst)
+{
+ if (v3d_qpu_instr_is_sfu(inst)) {
+ scoreboard->last_stallable_sfu_reg = inst->alu.add.waddr;
+ scoreboard->last_stallable_sfu_tick = scoreboard->tick;
+ }
+}
+
static void
update_scoreboard_for_chosen(struct choose_scoreboard *scoreboard,
const struct v3d_qpu_instr *inst)
if (inst->alu.add.magic_write) {
update_scoreboard_for_magic_waddr(scoreboard,
inst->alu.add.waddr);
+ } else {
+ update_scoreboard_for_sfu_stall_waddr(scoreboard,
+ inst);
}
}
fprintf(stderr, "\n");
}
}
+ if (mux_read_stalls(scoreboard, inst))
+ c->qpu_inst_stalled_count++;
}
/* Update the uniform index for the rewritten location --
scoreboard.last_magic_sfu_write_tick = -10;
scoreboard.last_uniforms_reset_tick = -10;
scoreboard.last_thrsw_tick = -10;
+ scoreboard.last_stallable_sfu_tick = -10;
if (debug) {
fprintf(stderr, "Pre-schedule instructions\n");
char *shaderdb;
int ret = asprintf(&shaderdb,
"%s shader: %d inst, %d threads, %d loops, "
- "%d uniforms, %d max-temps, %d:%d spills:fills",
+ "%d uniforms, %d max-temps, %d:%d spills:fills, "
+ "%d sfu-stalls, %d inst-and-stalls",
vir_get_stage_name(c),
c->qpu_inst_count,
c->threads,
c->num_uniforms,
vir_get_max_temps(c),
c->spills,
- c->fills);
+ c->fills,
+ c->qpu_inst_stalled_count,
+ c->qpu_inst_count + c->qpu_inst_stalled_count);
if (ret >= 0) {
if (V3D_DEBUG & V3D_DEBUG_SHADERDB)
fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
bool
v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst)
{
- if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
- switch (inst->alu.add.op) {
- case V3D_QPU_A_RECIP:
- case V3D_QPU_A_RSQRT:
- case V3D_QPU_A_EXP:
- case V3D_QPU_A_LOG:
- case V3D_QPU_A_SIN:
- case V3D_QPU_A_RSQRT2:
- return true;
- default:
- break;
- }
+ if (v3d_qpu_instr_is_sfu(inst))
+ return true;
+ if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
if (inst->alu.add.magic_write &&
v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr)) {
return true;
return false;
}
+bool
+v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr *inst)
+{
+ if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
+ switch (inst->alu.add.op) {
+ case V3D_QPU_A_RECIP:
+ case V3D_QPU_A_RSQRT:
+ case V3D_QPU_A_EXP:
+ case V3D_QPU_A_LOG:
+ case V3D_QPU_A_SIN:
+ case V3D_QPU_A_RSQRT2:
+ return true;
+ default:
+ return false;
+ }
+ }
+ return false;
+}
+
bool
v3d_qpu_writes_tmu(const struct v3d_qpu_instr *inst)
{
bool v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
bool v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
bool v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
+bool v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
bool v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
bool v3d_qpu_writes_tmu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
bool v3d_qpu_writes_tmu_not_tmuc(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;