#include "vc4_qir.h"
#include "vc4_qpu.h"
+#define QPU_MUX(mux, muxfield) \
+ QPU_SET_FIELD(mux != QPU_MUX_SMALL_IMM ? mux : QPU_MUX_B, muxfield)
+
static uint64_t
set_src_raddr(uint64_t inst, struct qpu_reg src)
{
}
if (src.mux == QPU_MUX_B) {
- assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP ||
- QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr);
+ assert((QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP ||
+ QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr) &&
+ QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM);
return QPU_UPDATE_FIELD(inst, src.addr, QPU_RADDR_B);
}
+ if (src.mux == QPU_MUX_SMALL_IMM) {
+ if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_SMALL_IMM) {
+ assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr);
+ } else {
+ inst = qpu_set_sig(inst, QPU_SIG_SMALL_IMM);
+ assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP);
+ }
+ return ((inst & ~QPU_RADDR_B_MASK) |
+ QPU_SET_FIELD(src.addr, QPU_RADDR_B));
+ }
+
return inst;
}
{
uint64_t inst = 0;
+ inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
inst |= QPU_SET_FIELD(QPU_A_OR, QPU_OP_ADD);
inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
inst |= qpu_a_dst(dst);
inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);
- inst |= QPU_SET_FIELD(src.mux, QPU_ADD_A);
- inst |= QPU_SET_FIELD(src.mux, QPU_ADD_B);
+ inst |= QPU_MUX(src.mux, QPU_ADD_A);
+ inst |= QPU_MUX(src.mux, QPU_ADD_B);
inst = set_src_raddr(inst, src);
- inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
return inst;
{
uint64_t inst = 0;
+ inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
inst |= QPU_SET_FIELD(QPU_M_V8MIN, QPU_OP_MUL);
inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
inst |= qpu_m_dst(dst);
inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);
- inst |= QPU_SET_FIELD(src.mux, QPU_MUL_A);
- inst |= QPU_SET_FIELD(src.mux, QPU_MUL_B);
+ inst |= QPU_MUX(src.mux, QPU_MUL_A);
+ inst |= QPU_MUX(src.mux, QPU_MUL_B);
inst = set_src_raddr(inst, src);
- inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);
return inst;
return inst;
}
+uint64_t
+qpu_load_imm_u2(struct qpu_reg dst, uint32_t val)
+{
+ return qpu_load_imm_ui(dst, val) | QPU_SET_FIELD(QPU_LOAD_IMM_MODE_U2,
+ QPU_LOAD_IMM_MODE);
+}
+
+uint64_t
+qpu_load_imm_i2(struct qpu_reg dst, uint32_t val)
+{
+ return qpu_load_imm_ui(dst, val) | QPU_SET_FIELD(QPU_LOAD_IMM_MODE_I2,
+ QPU_LOAD_IMM_MODE);
+}
+
+uint64_t
+qpu_branch(uint32_t cond, uint32_t target)
+{
+ uint64_t inst = 0;
+
+ inst |= qpu_a_dst(qpu_ra(QPU_W_NOP));
+ inst |= qpu_m_dst(qpu_rb(QPU_W_NOP));
+ inst |= QPU_SET_FIELD(cond, QPU_BRANCH_COND);
+ inst |= QPU_SET_FIELD(QPU_SIG_BRANCH, QPU_SIG);
+ inst |= QPU_SET_FIELD(target, QPU_BRANCH_TARGET);
+
+ return inst;
+}
+
uint64_t
qpu_a_alu2(enum qpu_op_add op,
struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1)
{
uint64_t inst = 0;
+ inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
inst |= QPU_SET_FIELD(op, QPU_OP_ADD);
inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
inst |= qpu_a_dst(dst);
inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);
- inst |= QPU_SET_FIELD(src0.mux, QPU_ADD_A);
+ inst |= QPU_MUX(src0.mux, QPU_ADD_A);
inst = set_src_raddr(inst, src0);
- inst |= QPU_SET_FIELD(src1.mux, QPU_ADD_B);
+ inst |= QPU_MUX(src1.mux, QPU_ADD_B);
inst = set_src_raddr(inst, src1);
- inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
return inst;
{
uint64_t inst = 0;
+ inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
inst |= QPU_SET_FIELD(op, QPU_OP_MUL);
inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
inst |= qpu_m_dst(dst);
inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);
- inst |= QPU_SET_FIELD(src0.mux, QPU_MUL_A);
+ inst |= QPU_MUX(src0.mux, QPU_MUL_A);
inst = set_src_raddr(inst, src0);
- inst |= QPU_SET_FIELD(src1.mux, QPU_MUL_B);
+ inst |= QPU_MUX(src1.mux, QPU_MUL_B);
inst = set_src_raddr(inst, src1);
- inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);
return inst;
}
+uint64_t
+qpu_m_rot(struct qpu_reg dst, struct qpu_reg src0, int rot)
+{
+ uint64_t inst = 0;
+ inst = qpu_m_alu2(QPU_M_V8MIN, dst, src0, src0);
+
+ inst = QPU_UPDATE_FIELD(inst, QPU_SIG_SMALL_IMM, QPU_SIG);
+ inst = QPU_UPDATE_FIELD(inst, QPU_SMALL_IMM_MUL_ROT + rot,
+ QPU_SMALL_IMM);
+
+ return inst;
+}
+
static bool
merge_fields(uint64_t *merge,
uint64_t a, uint64_t b,
if (raddr_a == QPU_R_MUTEX_ACQUIRE)
accesses++;
- if (raddr_b == QPU_R_MUTEX_ACQUIRE)
+ if (raddr_b == QPU_R_MUTEX_ACQUIRE &&
+ QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM)
accesses++;
/* XXX: semaphore, combined color read/write? */
case QPU_W_ACC1:
case QPU_W_ACC2:
case QPU_W_ACC3:
+ case QPU_W_NOP:
case QPU_W_TLB_Z:
case QPU_W_TLB_COLOR_MS:
case QPU_W_TLB_COLOR_ALL:
return false;
}
+ if (!(*merge & QPU_PM) &&
+ QPU_GET_FIELD(*merge, QPU_UNPACK) != QPU_UNPACK_NOP) {
+ return false;
+ }
+
if (raddr_b_b != QPU_R_NOP &&
raddr_b_b != raddr_a_a)
return false;
return true;
}
+static bool
+writes_a_file(uint64_t inst)
+{
+ if (!(inst & QPU_WS))
+ return QPU_GET_FIELD(inst, QPU_WADDR_ADD) < 32;
+ else
+ return QPU_GET_FIELD(inst, QPU_WADDR_MUL) < 32;
+}
+
+static bool
+reads_r4(uint64_t inst)
+{
+ return (QPU_GET_FIELD(inst, QPU_ADD_A) == QPU_MUX_R4 ||
+ QPU_GET_FIELD(inst, QPU_ADD_B) == QPU_MUX_R4 ||
+ QPU_GET_FIELD(inst, QPU_MUL_A) == QPU_MUX_R4 ||
+ QPU_GET_FIELD(inst, QPU_MUL_B) == QPU_MUX_R4);
+}
+
uint64_t
qpu_merge_inst(uint64_t a, uint64_t b)
{
uint64_t merge = a | b;
bool ok = true;
+ uint32_t a_sig = QPU_GET_FIELD(a, QPU_SIG);
+ uint32_t b_sig = QPU_GET_FIELD(b, QPU_SIG);
if (QPU_GET_FIELD(a, QPU_OP_ADD) != QPU_A_NOP &&
QPU_GET_FIELD(b, QPU_OP_ADD) != QPU_A_NOP) {
if (qpu_num_sf_accesses(a) && qpu_num_sf_accesses(b))
return 0;
- if (QPU_GET_FIELD(a, QPU_SIG) == QPU_SIG_LOAD_IMM ||
- QPU_GET_FIELD(b, QPU_SIG) == QPU_SIG_LOAD_IMM) {
+ if (a_sig == QPU_SIG_LOAD_IMM ||
+ b_sig == QPU_SIG_LOAD_IMM ||
+ a_sig == QPU_SIG_SMALL_IMM ||
+ b_sig == QPU_SIG_SMALL_IMM ||
+ a_sig == QPU_SIG_BRANCH ||
+ b_sig == QPU_SIG_BRANCH) {
return 0;
}
QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG));
/* Misc fields that have to match exactly. */
- ok = ok && merge_fields(&merge, a, b, QPU_SF | QPU_PM,
- ~0);
+ ok = ok && merge_fields(&merge, a, b, QPU_SF, ~0);
if (!merge_fields(&merge, a, b, QPU_RADDR_A_MASK,
QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A))) {
return 0;
}
+ if (!merge_fields(&merge, a, b, QPU_PM, ~0)) {
+ /* If one instruction has PM bit set and the other not, the
+ * one without PM shouldn't do packing/unpacking, and we
+ * have to make sure non-NOP packing/unpacking from PM
+ * instruction aren't added to it.
+ */
+ uint64_t temp;
+
+ /* Let a be the one with PM bit */
+ if (!(a & QPU_PM)) {
+ temp = a;
+ a = b;
+ b = temp;
+ }
+
+ if ((b & (QPU_PACK_MASK | QPU_UNPACK_MASK)) != 0)
+ return 0;
+
+ if ((a & QPU_PACK_MASK) != 0 &&
+ QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
+ return 0;
+
+ if ((a & QPU_UNPACK_MASK) != 0 && reads_r4(b))
+ return 0;
+ } else {
+ /* packing: Make sure that non-NOP packs agree, then deal with
+ * special-case failing of adding a non-NOP pack to something
+ * with a NOP pack.
+ */
+ if (!merge_fields(&merge, a, b, QPU_PACK_MASK, 0))
+ return 0;
+ bool new_a_pack = (QPU_GET_FIELD(a, QPU_PACK) !=
+ QPU_GET_FIELD(merge, QPU_PACK));
+ bool new_b_pack = (QPU_GET_FIELD(b, QPU_PACK) !=
+ QPU_GET_FIELD(merge, QPU_PACK));
+ if (!(merge & QPU_PM)) {
+ /* Make sure we're not going to be putting a new
+ * a-file packing on either half.
+ */
+ if (new_a_pack && writes_a_file(a))
+ return 0;
+
+ if (new_b_pack && writes_a_file(b))
+ return 0;
+ } else {
+ /* Make sure we're not going to be putting new MUL
+ * packing oneither half.
+ */
+ if (new_a_pack &&
+ QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP)
+ return 0;
+
+ if (new_b_pack &&
+ QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
+ return 0;
+ }
+
+ /* unpacking: Make sure that non-NOP unpacks agree, then deal
+ * with special-case failing of adding a non-NOP unpack to
+ * something with a NOP unpack.
+ */
+ if (!merge_fields(&merge, a, b, QPU_UNPACK_MASK, 0))
+ return 0;
+ bool new_a_unpack = (QPU_GET_FIELD(a, QPU_UNPACK) !=
+ QPU_GET_FIELD(merge, QPU_UNPACK));
+ bool new_b_unpack = (QPU_GET_FIELD(b, QPU_UNPACK) !=
+ QPU_GET_FIELD(merge, QPU_UNPACK));
+ if (!(merge & QPU_PM)) {
+ /* Make sure we're not going to be putting a new
+ * a-file packing on either half.
+ */
+ if (new_a_unpack &&
+ QPU_GET_FIELD(a, QPU_RADDR_A) != QPU_R_NOP)
+ return 0;
+
+ if (new_b_unpack &&
+ QPU_GET_FIELD(b, QPU_RADDR_A) != QPU_R_NOP)
+ return 0;
+ } else {
+ /* Make sure we're not going to be putting new r4
+ * unpack on either half.
+ */
+ if (new_a_unpack && reads_r4(a))
+ return 0;
+
+ if (new_b_unpack && reads_r4(b))
+ return 0;
+ }
+ }
+
if (ok)
return merge;
else
sig == QPU_SIG_WAIT_FOR_SCOREBOARD);
}
+/**
+ * Returns the small immediate value to be encoded in to the raddr b field if
+ * the argument can be represented as one, or ~0 otherwise.
+ */
+uint32_t
+qpu_encode_small_immediate(uint32_t i)
+{
+ if (i <= 15)
+ return i;
+ if ((int)i < 0 && (int)i >= -16)
+ return i + 32;
+
+ switch (i) {
+ case 0x3f800000:
+ return 32;
+ case 0x40000000:
+ return 33;
+ case 0x40800000:
+ return 34;
+ case 0x41000000:
+ return 35;
+ case 0x41800000:
+ return 36;
+ case 0x42000000:
+ return 37;
+ case 0x42800000:
+ return 38;
+ case 0x43000000:
+ return 39;
+ case 0x3b800000:
+ return 40;
+ case 0x3c000000:
+ return 41;
+ case 0x3c800000:
+ return 42;
+ case 0x3d000000:
+ return 43;
+ case 0x3d800000:
+ return 44;
+ case 0x3e000000:
+ return 45;
+ case 0x3e800000:
+ return 46;
+ case 0x3f000000:
+ return 47;
+ }
+
+ return ~0;
+}
+
void
qpu_serialize_one_inst(struct vc4_compile *c, uint64_t inst)
{