X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fvc4%2Fvc4_qpu.c;h=380b9f43c595cd6e55cc365cbb6092dc390524e1;hb=ce785f5ffd7dbed14a3909164e55a975a023ee97;hp=52c06ae551744a9b3aabba72a0601de94a49329b;hpb=1f0e1060503e9e700c22a07fa050c47ef5257a40;p=mesa.git diff --git a/src/gallium/drivers/vc4/vc4_qpu.c b/src/gallium/drivers/vc4/vc4_qpu.c index 52c06ae5517..380b9f43c59 100644 --- a/src/gallium/drivers/vc4/vc4_qpu.c +++ b/src/gallium/drivers/vc4/vc4_qpu.c @@ -26,6 +26,9 @@ #include "vc4_qir.h" #include "vc4_qpu.h" +#define QPU_MUX(mux, muxfield) \ + QPU_SET_FIELD(mux != QPU_MUX_SMALL_IMM ? mux : QPU_MUX_B, muxfield) + static uint64_t set_src_raddr(uint64_t inst, struct qpu_reg src) { @@ -36,11 +39,23 @@ set_src_raddr(uint64_t inst, struct qpu_reg src) } if (src.mux == QPU_MUX_B) { - assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP || - QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr); + assert((QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP || + QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr) && + QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM); return QPU_UPDATE_FIELD(inst, src.addr, QPU_RADDR_B); } + if (src.mux == QPU_MUX_SMALL_IMM) { + if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_SMALL_IMM) { + assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr); + } else { + inst = qpu_set_sig(inst, QPU_SIG_SMALL_IMM); + assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP); + } + return ((inst & ~QPU_RADDR_B_MASK) | + QPU_SET_FIELD(src.addr, QPU_RADDR_B)); + } + return inst; } @@ -101,15 +116,15 @@ qpu_a_MOV(struct qpu_reg dst, struct qpu_reg src) { uint64_t inst = 0; + inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); inst |= QPU_SET_FIELD(QPU_A_OR, QPU_OP_ADD); inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A); inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B); inst |= qpu_a_dst(dst); inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD); - inst |= QPU_SET_FIELD(src.mux, QPU_ADD_A); - inst |= QPU_SET_FIELD(src.mux, QPU_ADD_B); + inst |= QPU_MUX(src.mux, QPU_ADD_A); + inst |= QPU_MUX(src.mux, QPU_ADD_B); inst = set_src_raddr(inst, src); - inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL); return inst; @@ -120,15 +135,15 @@ qpu_m_MOV(struct qpu_reg dst, struct qpu_reg src) { uint64_t inst = 0; + inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); inst |= QPU_SET_FIELD(QPU_M_V8MIN, QPU_OP_MUL); inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A); inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B); inst |= qpu_m_dst(dst); inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL); - inst |= QPU_SET_FIELD(src.mux, QPU_MUL_A); - inst |= QPU_SET_FIELD(src.mux, QPU_MUL_B); + inst |= QPU_MUX(src.mux, QPU_MUL_A); + inst |= QPU_MUX(src.mux, QPU_MUL_B); inst = set_src_raddr(inst, src); - inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD); return inst; @@ -149,22 +164,50 @@ qpu_load_imm_ui(struct qpu_reg dst, uint32_t val) return inst; } +uint64_t +qpu_load_imm_u2(struct qpu_reg dst, uint32_t val) +{ + return qpu_load_imm_ui(dst, val) | QPU_SET_FIELD(QPU_LOAD_IMM_MODE_U2, + QPU_LOAD_IMM_MODE); +} + +uint64_t +qpu_load_imm_i2(struct qpu_reg dst, uint32_t val) +{ + return qpu_load_imm_ui(dst, val) | QPU_SET_FIELD(QPU_LOAD_IMM_MODE_I2, + QPU_LOAD_IMM_MODE); +} + +uint64_t +qpu_branch(uint32_t cond, uint32_t target) +{ + uint64_t inst = 0; + + inst |= qpu_a_dst(qpu_ra(QPU_W_NOP)); + inst |= qpu_m_dst(qpu_rb(QPU_W_NOP)); + inst |= QPU_SET_FIELD(cond, QPU_BRANCH_COND); + inst |= QPU_SET_FIELD(QPU_SIG_BRANCH, QPU_SIG); + inst |= QPU_SET_FIELD(target, QPU_BRANCH_TARGET); + + return inst; +} + uint64_t qpu_a_alu2(enum qpu_op_add op, struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1) { uint64_t inst = 0; + inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); inst |= QPU_SET_FIELD(op, QPU_OP_ADD); inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A); inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B); inst |= qpu_a_dst(dst); inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD); - inst |= QPU_SET_FIELD(src0.mux, QPU_ADD_A); + inst |= QPU_MUX(src0.mux, QPU_ADD_A); inst = set_src_raddr(inst, src0); - inst |= QPU_SET_FIELD(src1.mux, QPU_ADD_B); + inst |= QPU_MUX(src1.mux, QPU_ADD_B); inst = set_src_raddr(inst, src1); - inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL); return inst; @@ -176,21 +219,34 @@ qpu_m_alu2(enum qpu_op_mul op, { uint64_t inst = 0; + inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); inst |= QPU_SET_FIELD(op, QPU_OP_MUL); inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A); inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B); inst |= qpu_m_dst(dst); inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL); - inst |= QPU_SET_FIELD(src0.mux, QPU_MUL_A); + inst |= QPU_MUX(src0.mux, QPU_MUL_A); inst = set_src_raddr(inst, src0); - inst |= QPU_SET_FIELD(src1.mux, QPU_MUL_B); + inst |= QPU_MUX(src1.mux, QPU_MUL_B); inst = set_src_raddr(inst, src1); - inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD); return inst; } +uint64_t +qpu_m_rot(struct qpu_reg dst, struct qpu_reg src0, int rot) +{ + uint64_t inst = 0; + inst = qpu_m_alu2(QPU_M_V8MIN, dst, src0, src0); + + inst = QPU_UPDATE_FIELD(inst, QPU_SIG_SMALL_IMM, QPU_SIG); + inst = QPU_UPDATE_FIELD(inst, QPU_SMALL_IMM_MUL_ROT + rot, + QPU_SMALL_IMM); + + return inst; +} + static bool merge_fields(uint64_t *merge, uint64_t a, uint64_t b, @@ -243,7 +299,8 @@ qpu_num_sf_accesses(uint64_t inst) if (raddr_a == QPU_R_MUTEX_ACQUIRE) accesses++; - if (raddr_b == QPU_R_MUTEX_ACQUIRE) + if (raddr_b == QPU_R_MUTEX_ACQUIRE && + QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM) accesses++; /* XXX: semaphore, combined color read/write? */ @@ -266,6 +323,7 @@ qpu_waddr_ignores_ws(uint32_t waddr) case QPU_W_ACC1: case QPU_W_ACC2: case QPU_W_ACC3: + case QPU_W_NOP: case QPU_W_TLB_Z: case QPU_W_TLB_COLOR_MS: case QPU_W_TLB_COLOR_ALL: @@ -321,6 +379,11 @@ try_swap_ra_file(uint64_t *merge, uint64_t *a, uint64_t *b) return false; } + if (!(*merge & QPU_PM) && + QPU_GET_FIELD(*merge, QPU_UNPACK) != QPU_UNPACK_NOP) { + return false; + } + if (raddr_b_b != QPU_R_NOP && raddr_b_b != raddr_a_a) return false; @@ -378,11 +441,31 @@ convert_mov(uint64_t *inst) return true; } +static bool +writes_a_file(uint64_t inst) +{ + if (!(inst & QPU_WS)) + return QPU_GET_FIELD(inst, QPU_WADDR_ADD) < 32; + else + return QPU_GET_FIELD(inst, QPU_WADDR_MUL) < 32; +} + +static bool +reads_r4(uint64_t inst) +{ + return (QPU_GET_FIELD(inst, QPU_ADD_A) == QPU_MUX_R4 || + QPU_GET_FIELD(inst, QPU_ADD_B) == QPU_MUX_R4 || + QPU_GET_FIELD(inst, QPU_MUL_A) == QPU_MUX_R4 || + QPU_GET_FIELD(inst, QPU_MUL_B) == QPU_MUX_R4); +} + uint64_t qpu_merge_inst(uint64_t a, uint64_t b) { uint64_t merge = a | b; bool ok = true; + uint32_t a_sig = QPU_GET_FIELD(a, QPU_SIG); + uint32_t b_sig = QPU_GET_FIELD(b, QPU_SIG); if (QPU_GET_FIELD(a, QPU_OP_ADD) != QPU_A_NOP && QPU_GET_FIELD(b, QPU_OP_ADD) != QPU_A_NOP) { @@ -402,8 +485,12 @@ qpu_merge_inst(uint64_t a, uint64_t b) if (qpu_num_sf_accesses(a) && qpu_num_sf_accesses(b)) return 0; - if (QPU_GET_FIELD(a, QPU_SIG) == QPU_SIG_LOAD_IMM || - QPU_GET_FIELD(b, QPU_SIG) == QPU_SIG_LOAD_IMM) { + if (a_sig == QPU_SIG_LOAD_IMM || + b_sig == QPU_SIG_LOAD_IMM || + a_sig == QPU_SIG_SMALL_IMM || + b_sig == QPU_SIG_SMALL_IMM || + a_sig == QPU_SIG_BRANCH || + b_sig == QPU_SIG_BRANCH) { return 0; } @@ -411,8 +498,7 @@ qpu_merge_inst(uint64_t a, uint64_t b) QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG)); /* Misc fields that have to match exactly. */ - ok = ok && merge_fields(&merge, a, b, QPU_SF | QPU_PM, - ~0); + ok = ok && merge_fields(&merge, a, b, QPU_SF, ~0); if (!merge_fields(&merge, a, b, QPU_RADDR_A_MASK, QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A))) { @@ -450,6 +536,96 @@ qpu_merge_inst(uint64_t a, uint64_t b) return 0; } + if (!merge_fields(&merge, a, b, QPU_PM, ~0)) { + /* If one instruction has PM bit set and the other not, the + * one without PM shouldn't do packing/unpacking, and we + * have to make sure non-NOP packing/unpacking from PM + * instruction aren't added to it. + */ + uint64_t temp; + + /* Let a be the one with PM bit */ + if (!(a & QPU_PM)) { + temp = a; + a = b; + b = temp; + } + + if ((b & (QPU_PACK_MASK | QPU_UNPACK_MASK)) != 0) + return 0; + + if ((a & QPU_PACK_MASK) != 0 && + QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP) + return 0; + + if ((a & QPU_UNPACK_MASK) != 0 && reads_r4(b)) + return 0; + } else { + /* packing: Make sure that non-NOP packs agree, then deal with + * special-case failing of adding a non-NOP pack to something + * with a NOP pack. + */ + if (!merge_fields(&merge, a, b, QPU_PACK_MASK, 0)) + return 0; + bool new_a_pack = (QPU_GET_FIELD(a, QPU_PACK) != + QPU_GET_FIELD(merge, QPU_PACK)); + bool new_b_pack = (QPU_GET_FIELD(b, QPU_PACK) != + QPU_GET_FIELD(merge, QPU_PACK)); + if (!(merge & QPU_PM)) { + /* Make sure we're not going to be putting a new + * a-file packing on either half. + */ + if (new_a_pack && writes_a_file(a)) + return 0; + + if (new_b_pack && writes_a_file(b)) + return 0; + } else { + /* Make sure we're not going to be putting new MUL + * packing oneither half. + */ + if (new_a_pack && + QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP) + return 0; + + if (new_b_pack && + QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP) + return 0; + } + + /* unpacking: Make sure that non-NOP unpacks agree, then deal + * with special-case failing of adding a non-NOP unpack to + * something with a NOP unpack. + */ + if (!merge_fields(&merge, a, b, QPU_UNPACK_MASK, 0)) + return 0; + bool new_a_unpack = (QPU_GET_FIELD(a, QPU_UNPACK) != + QPU_GET_FIELD(merge, QPU_UNPACK)); + bool new_b_unpack = (QPU_GET_FIELD(b, QPU_UNPACK) != + QPU_GET_FIELD(merge, QPU_UNPACK)); + if (!(merge & QPU_PM)) { + /* Make sure we're not going to be putting a new + * a-file packing on either half. + */ + if (new_a_unpack && + QPU_GET_FIELD(a, QPU_RADDR_A) != QPU_R_NOP) + return 0; + + if (new_b_unpack && + QPU_GET_FIELD(b, QPU_RADDR_A) != QPU_R_NOP) + return 0; + } else { + /* Make sure we're not going to be putting new r4 + * unpack on either half. + */ + if (new_a_unpack && reads_r4(a)) + return 0; + + if (new_b_unpack && reads_r4(b)) + return 0; + } + } + if (ok) return merge; else @@ -501,6 +677,56 @@ qpu_inst_is_tlb(uint64_t inst) sig == QPU_SIG_WAIT_FOR_SCOREBOARD); } +/** + * Returns the small immediate value to be encoded in to the raddr b field if + * the argument can be represented as one, or ~0 otherwise. + */ +uint32_t +qpu_encode_small_immediate(uint32_t i) +{ + if (i <= 15) + return i; + if ((int)i < 0 && (int)i >= -16) + return i + 32; + + switch (i) { + case 0x3f800000: + return 32; + case 0x40000000: + return 33; + case 0x40800000: + return 34; + case 0x41000000: + return 35; + case 0x41800000: + return 36; + case 0x42000000: + return 37; + case 0x42800000: + return 38; + case 0x43000000: + return 39; + case 0x3b800000: + return 40; + case 0x3c000000: + return 41; + case 0x3c800000: + return 42; + case 0x3d000000: + return 43; + case 0x3d800000: + return 44; + case 0x3e000000: + return 45; + case 0x3e800000: + return 46; + case 0x3f000000: + return 47; + } + + return ~0; +} + void qpu_serialize_one_inst(struct vc4_compile *c, uint64_t inst) {