vc4: Allow pairing of some instructions that disagree about the WS bit.
authorEric Anholt <eric@anholt.net>
Fri, 5 Dec 2014 20:34:30 +0000 (12:34 -0800)
committerEric Anholt <eric@anholt.net>
Sat, 6 Dec 2014 00:27:06 +0000 (16:27 -0800)
No difference on shader-db because we tend to have a lot of other
conflicts going on as well (like RADDR_A disagreements)

src/gallium/drivers/vc4/vc4_qpu.c

index fc5d4b09dc23c2e563709307765bced43bcf3688..0e388905228f4ddf1a4cc65fd232571ff5ce18bf 100644 (file)
@@ -259,6 +259,37 @@ qpu_num_sf_accesses(uint64_t inst)
         return accesses;
 }
 
+static bool
+qpu_waddr_ignores_pm(uint32_t waddr)
+{
+        switch(waddr) {
+        case QPU_W_ACC0:
+        case QPU_W_ACC1:
+        case QPU_W_ACC2:
+        case QPU_W_ACC3:
+        case QPU_W_TLB_Z:
+        case QPU_W_TLB_COLOR_MS:
+        case QPU_W_TLB_COLOR_ALL:
+        case QPU_W_TLB_ALPHA_MASK:
+        case QPU_W_VPM:
+        case QPU_W_SFU_RECIP:
+        case QPU_W_SFU_RECIPSQRT:
+        case QPU_W_SFU_EXP:
+        case QPU_W_SFU_LOG:
+        case QPU_W_TMU0_S:
+        case QPU_W_TMU0_T:
+        case QPU_W_TMU0_R:
+        case QPU_W_TMU0_B:
+        case QPU_W_TMU1_S:
+        case QPU_W_TMU1_T:
+        case QPU_W_TMU1_R:
+        case QPU_W_TMU1_B:
+                return true;
+        }
+
+        return false;
+}
+
 uint64_t
 qpu_merge_inst(uint64_t a, uint64_t b)
 {
@@ -280,7 +311,7 @@ qpu_merge_inst(uint64_t a, uint64_t b)
                                 QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG));
 
         /* Misc fields that have to match exactly. */
-        ok = ok && merge_fields(&merge, a, b, QPU_SF | QPU_WS | QPU_PM,
+        ok = ok && merge_fields(&merge, a, b, QPU_SF | QPU_PM,
                                 ~0);
 
         ok = ok && merge_fields(&merge, a, b, QPU_RADDR_A_MASK,
@@ -293,6 +324,21 @@ qpu_merge_inst(uint64_t a, uint64_t b)
         ok = ok && merge_fields(&merge, a, b, QPU_WADDR_MUL_MASK,
                                 QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL));
 
+        /* Allow disagreement on WS (swapping A vs B physical reg file as the
+         * destination for ADD/MUL) if one of the original instructions
+         * ignores it (probably because it's just writing to accumulators).
+         */
+        if (qpu_waddr_ignores_pm(QPU_GET_FIELD(a, QPU_WADDR_ADD)) &&
+            qpu_waddr_ignores_pm(QPU_GET_FIELD(a, QPU_WADDR_MUL))) {
+                merge = (merge & ~QPU_WS) | (b & QPU_WS);
+        } else if (qpu_waddr_ignores_pm(QPU_GET_FIELD(b, QPU_WADDR_ADD)) &&
+                   qpu_waddr_ignores_pm(QPU_GET_FIELD(b, QPU_WADDR_MUL))) {
+                merge = (merge & ~QPU_WS) | (a & QPU_WS);
+        } else {
+                if ((a & QPU_WS) != (b & QPU_WS))
+                        return 0;
+        }
+
         if (ok)
                 return merge;
         else