* address.
*
* In that case, we need to move one to a temporary that can be used in the
- * instruction, instead.
+ * instruction, instead. We reserve ra31/rb31 for this purpose.
*/
-static bool
+static void
fixup_raddr_conflict(struct vc4_compile *c,
struct qpu_reg dst,
- struct qpu_reg *src0, struct qpu_reg *src1,
- bool r3_live)
+ struct qpu_reg *src0, struct qpu_reg *src1)
{
uint32_t mux0 = src0->mux == QPU_MUX_SMALL_IMM ? QPU_MUX_B : src0->mux;
uint32_t mux1 = src1->mux == QPU_MUX_SMALL_IMM ? QPU_MUX_B : src1->mux;
mux0 != mux1 ||
(src0->addr == src1->addr &&
src0->mux == src1->mux)) {
- return false;
+ return;
}
if (swap_file(src0) || swap_file(src1))
- return false;
+ return;
if (mux0 == QPU_MUX_A) {
- /* If we're conflicting over the A regfile, then we can just
- * use the reserved rb31.
- */
queue(c, qpu_a_MOV(qpu_rb(31), *src1));
*src1 = qpu_rb(31);
- return false;
} else {
- /* Otherwise, we need a non-B regfile. So, we spill r3 out to
- * rb31, then store our desired value in r3, and tell the
- * caller to put rb31 back into r3 when we're done.
- */
- if (r3_live)
- queue(c, qpu_a_MOV(qpu_rb(31), qpu_r3()));
- queue(c, qpu_a_MOV(qpu_r3(), *src1));
-
- *src1 = qpu_r3();
-
- return r3_live && dst.mux != QPU_MUX_R3;
+ queue(c, qpu_a_MOV(qpu_ra(31), *src1));
+ *src1 = qpu_ra(31);
}
}
uint32_t inputs_remaining = c->num_inputs;
uint32_t vpm_read_fifo_count = 0;
uint32_t vpm_read_offset = 0;
- bool written_r3 = false;
- bool needs_restore;
+ int last_vpm_read_index = -1;
/* Map from the QIR ops enum order to QPU unpack bits. */
static const uint32_t unpack_map[] = {
QPU_UNPACK_8A,
*/
assert(src[i].addr <= 47);
break;
+ case QFILE_VPM:
+ assert((int)qinst->src[i].index >=
+ last_vpm_read_index);
+ last_vpm_read_index = qinst->src[i].index;
+ src[i] = qpu_ra(QPU_R_VPM);
+ break;
}
}
case QFILE_TEMP:
dst = temp_registers[qinst->dst.index];
break;
+ case QFILE_VPM:
+ dst = qpu_ra(QPU_W_VPM);
+ break;
case QFILE_VARY:
case QFILE_UNIF:
case QFILE_SMALL_IMM:
}
break;
- case QOP_SF:
- queue(c, qpu_a_MOV(qpu_ra(QPU_W_NOP), src[0]));
- *last_inst(c) |= QPU_SF;
- break;
-
case QOP_SEL_X_0_ZS:
case QOP_SEL_X_0_ZC:
case QOP_SEL_X_0_NS:
break;
- case QOP_VPM_WRITE:
- queue(c, qpu_a_MOV(qpu_ra(QPU_W_VPM), src[0]));
- break;
-
- case QOP_VPM_READ:
- queue(c, qpu_a_MOV(dst, qpu_ra(QPU_R_VPM)));
- break;
-
case QOP_RCP:
case QOP_RSQ:
case QOP_EXP2:
break;
- case QOP_PACK_COLORS: {
- /* We have to be careful not to start writing over one
- * of our source values when incrementally writing the
- * destination. So, if the dst is one of the srcs, we
- * pack that one first (and we pack 4 channels at once
- * for the first pack).
- */
- struct qpu_reg first_pack = src[0];
- for (int i = 0; i < 4; i++) {
- if (src[i].mux == dst.mux &&
- src[i].addr == dst.addr) {
- first_pack = dst;
- break;
- }
- }
- queue(c, qpu_m_MOV(dst, first_pack));
+ case QOP_PACK_8888_F:
+ queue(c, qpu_m_MOV(dst, src[0]));
*last_inst(c) |= QPU_PM;
*last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8888,
QPU_PACK);
+ break;
- for (int i = 0; i < 4; i++) {
- if (src[i].mux == first_pack.mux &&
- src[i].addr == first_pack.addr) {
- continue;
+ case QOP_PACK_8A_F:
+ case QOP_PACK_8B_F:
+ case QOP_PACK_8C_F:
+ case QOP_PACK_8D_F:
+ /* If dst doesn't happen to already contain src[0],
+ * then we have to move it in.
+ */
+ if (qinst->src[0].file != QFILE_NULL &&
+ (src[0].mux != dst.mux || src[0].addr != dst.addr)) {
+ /* Don't overwrite src1 while setting up
+ * the dst!
+ */
+ if (dst.mux == src[1].mux &&
+ dst.addr == src[1].addr) {
+ queue(c, qpu_m_MOV(qpu_rb(31), src[1]));
+ src[1] = qpu_rb(31);
}
- queue(c, qpu_m_MOV(dst, src[i]));
- *last_inst(c) |= QPU_PM;
- *last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8A + i,
- QPU_PACK);
+ queue(c, qpu_m_MOV(dst, src[0]));
}
+ queue(c, qpu_m_MOV(dst, src[1]));
+ *last_inst(c) |= QPU_PM;
+ *last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8A +
+ qinst->op - QOP_PACK_8A_F,
+ QPU_PACK);
break;
- }
case QOP_FRAG_X:
queue(c, qpu_a_ITOF(dst,
break;
case QOP_TEX_DIRECT:
- needs_restore = fixup_raddr_conflict(c, dst,
- &src[0], &src[1],
- written_r3);
+ fixup_raddr_conflict(c, dst, &src[0], &src[1]);
queue(c, qpu_a_ADD(qpu_rb(QPU_W_TMU0_S), src[0], src[1]));
- if (needs_restore)
- queue(c, qpu_a_MOV(qpu_r3(), qpu_rb(31)));
break;
case QOP_TEX_RESULT:
if (qir_get_op_nsrc(qinst->op) == 1)
src[1] = src[0];
- needs_restore = fixup_raddr_conflict(c, dst,
- &src[0], &src[1],
- written_r3);
+ fixup_raddr_conflict(c, dst, &src[0], &src[1]);
if (translate[qinst->op].is_mul) {
queue(c, qpu_m_alu2(translate[qinst->op].op,
dst,
src[0], src[1]));
}
- if (needs_restore)
- queue(c, qpu_a_MOV(qpu_r3(), qpu_rb(31)));
break;
}
- if (dst.mux == QPU_MUX_R3)
- written_r3 = true;
+ if (qinst->sf) {
+ assert(!qir_is_multi_instruction(qinst));
+ *last_inst(c) |= QPU_SF;
+ }
}
qpu_schedule_instructions(c);