+static bool
+qpu_waddr_ignores_ws(uint32_t waddr)
+{
+ switch(waddr) {
+ case QPU_W_ACC0:
+ case QPU_W_ACC1:
+ case QPU_W_ACC2:
+ case QPU_W_ACC3:
+ case QPU_W_NOP:
+ case QPU_W_TLB_Z:
+ case QPU_W_TLB_COLOR_MS:
+ case QPU_W_TLB_COLOR_ALL:
+ case QPU_W_TLB_ALPHA_MASK:
+ case QPU_W_VPM:
+ case QPU_W_SFU_RECIP:
+ case QPU_W_SFU_RECIPSQRT:
+ case QPU_W_SFU_EXP:
+ case QPU_W_SFU_LOG:
+ case QPU_W_TMU0_S:
+ case QPU_W_TMU0_T:
+ case QPU_W_TMU0_R:
+ case QPU_W_TMU0_B:
+ case QPU_W_TMU1_S:
+ case QPU_W_TMU1_T:
+ case QPU_W_TMU1_R:
+ case QPU_W_TMU1_B:
+ return true;
+ }
+
+ return false;
+}
+
+static void
+swap_ra_file_mux_helper(uint64_t *merge, uint64_t *a, uint32_t mux_shift)
+{
+ uint64_t mux_mask = (uint64_t)0x7 << mux_shift;
+ uint64_t mux_a_val = (uint64_t)QPU_MUX_A << mux_shift;
+ uint64_t mux_b_val = (uint64_t)QPU_MUX_B << mux_shift;
+
+ if ((*a & mux_mask) == mux_a_val) {
+ *a = (*a & ~mux_mask) | mux_b_val;
+ *merge = (*merge & ~mux_mask) | mux_b_val;
+ }
+}
+
+static bool
+try_swap_ra_file(uint64_t *merge, uint64_t *a, uint64_t *b)
+{
+ uint32_t raddr_a_a = QPU_GET_FIELD(*a, QPU_RADDR_A);
+ uint32_t raddr_a_b = QPU_GET_FIELD(*a, QPU_RADDR_B);
+ uint32_t raddr_b_a = QPU_GET_FIELD(*b, QPU_RADDR_A);
+ uint32_t raddr_b_b = QPU_GET_FIELD(*b, QPU_RADDR_B);
+
+ if (raddr_a_b != QPU_R_NOP)
+ return false;
+
+ switch (raddr_a_a) {
+ case QPU_R_UNIF:
+ case QPU_R_VARY:
+ break;
+ default:
+ return false;
+ }
+
+ if (!(*merge & QPU_PM) &&
+ QPU_GET_FIELD(*merge, QPU_UNPACK) != QPU_UNPACK_NOP) {
+ return false;
+ }
+
+ if (raddr_b_b != QPU_R_NOP &&
+ raddr_b_b != raddr_a_a)
+ return false;
+
+ /* Move raddr A to B in instruction a. */
+ *a = (*a & ~QPU_RADDR_A_MASK) | QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
+ *a = (*a & ~QPU_RADDR_B_MASK) | QPU_SET_FIELD(raddr_a_a, QPU_RADDR_B);
+ *merge = QPU_UPDATE_FIELD(*merge, raddr_b_a, QPU_RADDR_A);
+ *merge = QPU_UPDATE_FIELD(*merge, raddr_a_a, QPU_RADDR_B);
+ swap_ra_file_mux_helper(merge, a, QPU_ADD_A_SHIFT);
+ swap_ra_file_mux_helper(merge, a, QPU_ADD_B_SHIFT);
+ swap_ra_file_mux_helper(merge, a, QPU_MUL_A_SHIFT);
+ swap_ra_file_mux_helper(merge, a, QPU_MUL_B_SHIFT);
+
+ return true;
+}
+
+static bool
+convert_mov(uint64_t *inst)
+{
+ uint32_t add_a = QPU_GET_FIELD(*inst, QPU_ADD_A);
+ uint32_t waddr_add = QPU_GET_FIELD(*inst, QPU_WADDR_ADD);
+ uint32_t cond_add = QPU_GET_FIELD(*inst, QPU_COND_ADD);
+
+ /* Is it a MOV? */
+ if (QPU_GET_FIELD(*inst, QPU_OP_ADD) != QPU_A_OR ||
+ (add_a != QPU_GET_FIELD(*inst, QPU_ADD_B))) {
+ return false;
+ }
+
+ if (QPU_GET_FIELD(*inst, QPU_SIG) != QPU_SIG_NONE)
+ return false;
+
+ /* We could maybe support this in the .8888 and .8a-.8d cases. */
+ if (*inst & QPU_PM)
+ return false;
+
+ *inst = QPU_UPDATE_FIELD(*inst, QPU_A_NOP, QPU_OP_ADD);
+ *inst = QPU_UPDATE_FIELD(*inst, QPU_M_V8MIN, QPU_OP_MUL);
+
+ *inst = QPU_UPDATE_FIELD(*inst, add_a, QPU_MUL_A);
+ *inst = QPU_UPDATE_FIELD(*inst, add_a, QPU_MUL_B);
+ *inst = QPU_UPDATE_FIELD(*inst, QPU_MUX_R0, QPU_ADD_A);
+ *inst = QPU_UPDATE_FIELD(*inst, QPU_MUX_R0, QPU_ADD_B);
+
+ *inst = QPU_UPDATE_FIELD(*inst, waddr_add, QPU_WADDR_MUL);
+ *inst = QPU_UPDATE_FIELD(*inst, QPU_W_NOP, QPU_WADDR_ADD);
+
+ *inst = QPU_UPDATE_FIELD(*inst, cond_add, QPU_COND_MUL);
+ *inst = QPU_UPDATE_FIELD(*inst, QPU_COND_NEVER, QPU_COND_ADD);
+
+ if (!qpu_waddr_ignores_ws(waddr_add))
+ *inst ^= QPU_WS;
+
+ return true;
+}
+
+static bool
+writes_a_file(uint64_t inst)
+{
+ if (!(inst & QPU_WS))
+ return QPU_GET_FIELD(inst, QPU_WADDR_ADD) < 32;
+ else
+ return QPU_GET_FIELD(inst, QPU_WADDR_MUL) < 32;
+}
+
+static bool
+reads_r4(uint64_t inst)
+{
+ return (QPU_GET_FIELD(inst, QPU_ADD_A) == QPU_MUX_R4 ||
+ QPU_GET_FIELD(inst, QPU_ADD_B) == QPU_MUX_R4 ||
+ QPU_GET_FIELD(inst, QPU_MUL_A) == QPU_MUX_R4 ||
+ QPU_GET_FIELD(inst, QPU_MUL_B) == QPU_MUX_R4);
+}
+