[TGSI_OPCODE_MIN] = { QOP_FMIN, tgsi_to_qir_alu },
[TGSI_OPCODE_MAX] = { QOP_FMAX, tgsi_to_qir_alu },
[TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_alu },
+ [TGSI_OPCODE_SEQ] = { QOP_SEQ, tgsi_to_qir_alu },
+ [TGSI_OPCODE_SNE] = { QOP_SNE, tgsi_to_qir_alu },
+ [TGSI_OPCODE_SGE] = { QOP_SGE, tgsi_to_qir_alu },
+ [TGSI_OPCODE_SLT] = { QOP_SLT, tgsi_to_qir_alu },
[TGSI_OPCODE_MAD] = { 0, tgsi_to_qir_mad },
[TGSI_OPCODE_DP2] = { 0, tgsi_to_qir_dp2 },
[TGSI_OPCODE_DP3] = { 0, tgsi_to_qir_dp3 },
[QOP_FMAX] = { "fmax", 1, 2 },
[QOP_FMINABS] = { "fminabs", 1, 2 },
[QOP_FMAXABS] = { "fmaxabs", 1, 2 },
+
+ [QOP_SEQ] = { "seq", 1, 2 },
+ [QOP_SNE] = { "sne", 1, 2 },
+ [QOP_SGE] = { "sge", 1, 2 },
+ [QOP_SLT] = { "slt", 1, 2 },
+
[QOP_FTOI] = { "ftoi", 1, 1 },
[QOP_RCP] = { "rcp", 1, 1 },
[QOP_RSQ] = { "rsq", 1, 1 },
QOP_FMAX,
QOP_FMINABS,
QOP_FMAXABS,
+
+ QOP_SEQ,
+ QOP_SNE,
+ QOP_SGE,
+ QOP_SLT,
+
QOP_FTOI,
QOP_RCP,
QOP_RSQ,
#define QPU_COND_MUL_SHIFT 46
#define QPU_COND_MUL_MASK QPU_MASK(48, 46)
+#define QPU_SF ((uint64_t)1 << 45)
+
#define QPU_WADDR_ADD_SHIFT 38
#define QPU_WADDR_ADD_MASK QPU_MASK(43, 38)
#define QPU_WADDR_MUL_SHIFT 32
[QPU_PACK_A_8D_SAT] = ".8d.sat",
};
+static const char *qpu_condflags[] = {
+ [QPU_COND_NEVER] = ".never",
+ [QPU_COND_ALWAYS] = "",
+ [QPU_COND_ZS] = ".zs",
+ [QPU_COND_ZC] = ".zc",
+ [QPU_COND_NS] = ".ns",
+ [QPU_COND_NC] = ".nc",
+ [QPU_COND_CS] = ".cs",
+ [QPU_COND_CC] = ".cc",
+};
+
#define DESC(array, index) \
((index > ARRAY_SIZE(array) || !(array)[index]) ? \
"???" : (array)[index])
print_add_op(uint64_t inst)
{
uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
+ uint32_t cond = QPU_GET_FIELD(inst, QPU_COND_ADD);
bool is_mov = (op_add == QPU_A_OR &&
QPU_GET_FIELD(inst, QPU_ADD_A) ==
QPU_GET_FIELD(inst, QPU_ADD_B));
- fprintf(stderr, "%s ", is_mov ? "mov" : DESC(qpu_add_opcodes, op_add));
+ fprintf(stderr, "%s%s%s ",
+ is_mov ? "mov" : DESC(qpu_add_opcodes, op_add),
+ ((inst & QPU_SF) && op_add != QPU_A_NOP) ? ".sf" : "",
+ op_add != QPU_A_NOP ? DESC(qpu_condflags, cond) : "");
print_alu_dst(inst, false);
fprintf(stderr, ", ");
static void
print_mul_op(uint64_t inst)
{
+ uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
uint32_t op_mul = QPU_GET_FIELD(inst, QPU_OP_MUL);
+ uint32_t cond = QPU_GET_FIELD(inst, QPU_COND_MUL);
bool is_mov = (op_mul == QPU_M_V8MIN &&
QPU_GET_FIELD(inst, QPU_MUL_A) ==
QPU_GET_FIELD(inst, QPU_MUL_B));
- fprintf(stderr, "%s ", is_mov ? "mov" : DESC(qpu_mul_opcodes, op_mul));
+ fprintf(stderr, "%s%s%s ",
+ is_mov ? "mov" : DESC(qpu_mul_opcodes, op_mul),
+ ((inst & QPU_SF) && op_add == QPU_A_NOP) ? ".sf" : "",
+ op_mul != QPU_M_NOP ? DESC(qpu_condflags, cond) : "");
print_alu_dst(inst, true);
fprintf(stderr, ", ");
print_load_imm(uint64_t inst)
{
uint32_t imm = inst;
+ uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
+ uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
+ uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
+ uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL);
fprintf(stderr, "load_imm ");
print_alu_dst(inst, false);
- fprintf(stderr, ", ");
+ fprintf(stderr, "%s, ", (waddr_add != QPU_W_NOP ?
+ DESC(qpu_condflags, cond_add) : ""));
print_alu_dst(inst, true);
- fprintf(stderr, ", ");
+ fprintf(stderr, "%s, ", (waddr_mul != QPU_W_NOP ?
+ DESC(qpu_condflags, cond_mul) : ""));
fprintf(stderr, "0x%08x (%f)", imm, uif(imm));
}
}
}
+/**
+ * This is used to resolve the fact that we might register-allocate two
+ * different operands of an instruction to the same physical register file
+ * even though instructions have only one field for the register file source
+ * address.
+ *
+ * In that case, we need to move one to a temporary that can be used in the
+ * instruction, instead.
+ */
+static void
+fixup_raddr_conflict(uint64_t *insts, uint32_t *ni,
+ struct qpu_reg src0, struct qpu_reg *src1)
+{
+ if ((src0.mux == QPU_MUX_A || src0.mux == QPU_MUX_B) &&
+ (src1->mux == QPU_MUX_A || src1->mux == QPU_MUX_B) &&
+ src0.addr != src1->addr) {
+ insts[(*ni)++] = qpu_inst(qpu_a_MOV(qpu_r5(), *src1),
+ qpu_m_NOP());
+ *src1 = qpu_r5();
+ }
+}
+
void
vc4_generate_code(struct qcompile *c)
{
M(FMUL),
};
+ static const uint32_t compareflags[] = {
+ [QOP_SEQ - QOP_SEQ] = QPU_COND_ZS,
+ [QOP_SNE - QOP_SEQ] = QPU_COND_ZC,
+ [QOP_SLT - QOP_SEQ] = QPU_COND_NS,
+ [QOP_SGE - QOP_SEQ] = QPU_COND_NC,
+ };
+
struct qpu_reg src[4];
for (int i = 0; i < qir_get_op_nsrc(qinst->op); i++) {
int index = qinst->src[i].index;
}
break;
+ case QOP_SEQ:
+ case QOP_SNE:
+ case QOP_SGE:
+ case QOP_SLT:
+ fixup_raddr_conflict(insts, &ni, src[0], &src[1]);
+ insts[ni++] = qpu_inst(qpu_a_SUB(qpu_ra(QPU_W_NOP),
+ src[0], src[1]),
+ qpu_m_NOP());
+ insts[ni - 1] |= QPU_SF;
+
+ insts[ni++] = qpu_load_imm_f(dst, 0.0);
+ insts[ni++] = qpu_load_imm_f(dst, 1.0);
+ insts[ni - 1] = ((insts[ni - 1] & ~QPU_COND_ADD_MASK)
+ | QPU_SET_FIELD(compareflags[qinst->op - QOP_SEQ],
+ QPU_COND_ADD));
+
+ break;
+
case QOP_VPM_WRITE:
insts[ni++] = qpu_inst(qpu_a_MOV(qpu_ra(QPU_W_VPM),
src[0]),
if (qir_get_op_nsrc(qinst->op) == 1)
src[1] = src[0];
- if ((src[0].mux == QPU_MUX_A || src[0].mux == QPU_MUX_B) &&
- (src[1].mux == QPU_MUX_A || src[1].mux == QPU_MUX_B) &&
- src[0].addr != src[1].addr) {
- insts[ni++] = qpu_inst(qpu_a_MOV(qpu_r5(), src[1]),
- qpu_m_NOP());
- src[1] = qpu_r5();
- }
+ fixup_raddr_conflict(insts, &ni, src[0], &src[1]);
if (translate[qinst->op].is_mul) {
insts[ni++] = qpu_inst(qpu_a_NOP(),