c->last_thrsw_at_top_level = (c->execute.file == QFILE_NULL);
}
-static struct qreg
-vir_SFU(struct v3d_compile *c, int waddr, struct qreg src)
-{
- vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, waddr), src);
- return vir_FMOV(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4));
-}
-
static struct qreg
indirect_uniform_load(struct v3d_compile *c, nir_intrinsic_instr *intr)
{
input = vir_FADD(c, input, vir_uniform_f(c, 0.5));
struct qreg periods = vir_FROUND(c, input);
- struct qreg sin_output = vir_SFU(c, V3D_QPU_WADDR_SIN,
- vir_FSUB(c, input, periods));
+ struct qreg sin_output = vir_SIN(c, vir_FSUB(c, input, periods));
return vir_XOR(c, sin_output, vir_SHL(c,
vir_FTOIN(c, periods),
vir_uniform_ui(c, -1)));
c->inputs[attr * 4 + 0] = vir_FXCD(c);
c->inputs[attr * 4 + 1] = vir_FYCD(c);
c->inputs[attr * 4 + 2] = c->payload_z;
- c->inputs[attr * 4 + 3] = vir_SFU(c, V3D_QPU_WADDR_RECIP,
- c->payload_w);
+ c->inputs[attr * 4 + 3] = vir_RECIP(c, c->payload_w);
}
static struct qreg
break;
case nir_op_frcp:
- result = vir_SFU(c, V3D_QPU_WADDR_RECIP, src[0]);
+ result = vir_RECIP(c, src[0]);
break;
case nir_op_frsq:
- result = vir_SFU(c, V3D_QPU_WADDR_RSQRT, src[0]);
+ result = vir_RSQRT(c, src[0]);
break;
case nir_op_fexp2:
- result = vir_SFU(c, V3D_QPU_WADDR_EXP, src[0]);
+ result = vir_EXP(c, src[0]);
break;
case nir_op_flog2:
- result = vir_SFU(c, V3D_QPU_WADDR_LOG, src[0]);
+ result = vir_LOG(c, src[0]);
break;
case nir_op_fceil:
setup_default_position(c);
uint32_t vpm_index = 0;
- struct qreg rcp_w = vir_SFU(c, V3D_QPU_WADDR_RECIP,
- c->outputs[c->output_position_index + 3]);
+ struct qreg rcp_w = vir_RECIP(c,
+ c->outputs[c->output_position_index + 3]);
emit_vpm_write_setup(c);
struct choose_scoreboard {
int tick;
- int last_sfu_write_tick;
+ int last_magic_sfu_write_tick;
int last_ldvary_tick;
int last_uniforms_reset_tick;
bool tlb_locked;
{
switch (mux) {
case V3D_QPU_MUX_R4:
- if (scoreboard->tick - scoreboard->last_sfu_write_tick <= 2)
+ if (scoreboard->tick - scoreboard->last_magic_sfu_write_tick <= 2)
return true;
break;
* This would normally be prevented by dependency tracking, but might
* occur if a dead SFU computation makes it to scheduling.
*/
- if (scoreboard->tick - scoreboard->last_sfu_write_tick < 2 &&
+ if (scoreboard->tick - scoreboard->last_magic_sfu_write_tick < 2 &&
v3d_qpu_writes_r4(devinfo, inst))
return true;
{
if (v3d_qpu_uses_vpm(inst))
return true;
+ if (v3d_qpu_uses_sfu(inst))
+ return true;
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
if (inst->alu.add.op != V3D_QPU_A_NOP &&
enum v3d_qpu_waddr waddr)
{
if (v3d_qpu_magic_waddr_is_sfu(waddr))
- scoreboard->last_sfu_write_tick = scoreboard->tick;
+ scoreboard->last_magic_sfu_write_tick = scoreboard->tick;
}
static void
struct choose_scoreboard scoreboard;
memset(&scoreboard, 0, sizeof(scoreboard));
scoreboard.last_ldvary_tick = -10;
- scoreboard.last_sfu_write_tick = -10;
+ scoreboard.last_magic_sfu_write_tick = -10;
scoreboard.last_uniforms_reset_tick = -10;
if (debug) {
a, b)); \
}
+#define VIR_SFU(name) \
+static inline struct qreg \
+vir_##name(struct v3d_compile *c, struct qreg a) \
+{ \
+ if (c->devinfo->ver >= 41) { \
+ return vir_emit_def(c, vir_add_inst(V3D_QPU_A_##name, \
+ c->undef, \
+ a, c->undef)); \
+ } else { \
+ vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_##name), a); \
+ return vir_FMOV(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); \
+ } \
+} \
+static inline struct qinst * \
+vir_##name##_dest(struct v3d_compile *c, struct qreg dest, \
+ struct qreg a) \
+{ \
+ if (c->devinfo->ver >= 41) { \
+ return vir_emit_nondef(c, vir_add_inst(V3D_QPU_A_##name, \
+ dest, \
+ a, c->undef)); \
+ } else { \
+ vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_##name), a); \
+ return vir_FMOV_dest(c, dest, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); \
+ } \
+}
+
#define VIR_A_ALU2(name) VIR_ALU2(name, vir_add_inst, V3D_QPU_A_##name)
#define VIR_M_ALU2(name) VIR_ALU2(name, vir_mul_inst, V3D_QPU_M_##name)
#define VIR_A_ALU1(name) VIR_ALU1(name, vir_add_inst, V3D_QPU_A_##name)
VIR_M_ALU1(MOV)
VIR_M_ALU1(FMOV)
+VIR_SFU(RECIP)
+VIR_SFU(RSQRT)
+VIR_SFU(EXP)
+VIR_SFU(LOG)
+VIR_SFU(SIN)
+VIR_SFU(RSQRT2)
+
static inline struct qinst *
vir_MOV_cond(struct v3d_compile *c, enum v3d_qpu_cond cond,
struct qreg dest, struct qreg src)
return vir_reg(QFILE_UNIF, uniform);
}
+static bool
+vir_can_set_flags(struct v3d_compile *c, struct qinst *inst)
+{
+ if (c->devinfo->ver >= 40 && (v3d_qpu_reads_vpm(&inst->qpu) ||
+ v3d_qpu_uses_sfu(&inst->qpu))) {
+ return false;
+ }
+
+ return true;
+}
+
void
vir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf)
{
if (src.file != QFILE_TEMP ||
!c->defs[src.index] ||
- last_inst != c->defs[src.index]) {
+ last_inst != c->defs[src.index] ||
+ !vir_can_set_flags(c, last_inst)) {
/* XXX: Make the MOV be the appropriate type */
last_inst = vir_MOV_dest(c, vir_reg(QFILE_NULL, 0), src);
}
return false;
}
+static bool
+can_write_to_null(struct v3d_compile *c, struct qinst *inst)
+{
+ /* The SFU instructions must write to a physical register. */
+ if (c->devinfo->ver >= 41 && v3d_qpu_uses_sfu(&inst->qpu))
+ return false;
+
+ return true;
+}
+
bool
vir_opt_dead_code(struct v3d_compile *c)
{
* it's nicer to read the VIR code without
* unused destination regs.
*/
- if (inst->dst.file == QFILE_TEMP) {
+ if (inst->dst.file == QFILE_TEMP &&
+ can_write_to_null(c, inst)) {
if (debug) {
fprintf(stderr,
"Removing dst from: ");
class_bits[inst->dst.index] &= CLASS_BIT_PHYS;
break;
+ case V3D_QPU_A_RECIP:
+ case V3D_QPU_A_RSQRT:
+ case V3D_QPU_A_EXP:
+ case V3D_QPU_A_LOG:
+ case V3D_QPU_A_SIN:
+ case V3D_QPU_A_RSQRT2:
+ /* The SFU instructions write directly to the
+ * phys regfile.
+ */
+ assert(inst->dst.file == QFILE_TEMP);
+ class_bits[inst->dst.index] &= CLASS_BIT_PHYS;
+ break;
+
default:
break;
}
return false;
}
+bool
+v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst)
+{
+ if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
+ switch (inst->alu.add.op) {
+ case V3D_QPU_A_RECIP:
+ case V3D_QPU_A_RSQRT:
+ case V3D_QPU_A_EXP:
+ case V3D_QPU_A_LOG:
+ case V3D_QPU_A_SIN:
+ case V3D_QPU_A_RSQRT2:
+ return true;
+ default:
+ break;
+ }
+
+ if (inst->alu.add.magic_write &&
+ v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr)) {
+ return true;
+ }
+
+ if (inst->alu.mul.magic_write &&
+ v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
bool
v3d_qpu_writes_tmu(const struct v3d_qpu_instr *inst)
{
bool v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
bool v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
bool v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
+bool v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
bool v3d_qpu_writes_tmu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
bool v3d_qpu_writes_r3(const struct v3d_device_info *devinfo,
const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;