vc4: Add shader-db dumping of NIR instruction count.

[mesa.git] / src / gallium / drivers / vc4 / vc4_qpu.c
diff --git a/src/gallium/drivers/vc4/vc4_qpu.c b/src/gallium/drivers/vc4/vc4_qpu.c

index 7e38ede334278e67ca6afaf4b13470cc9ea52fc8..f67e3f8b768a84b69b8b512dbcf82ab6ef406e1b 100644 (file)
--- a/src/gallium/drivers/vc4/vc4_qpu.c
+++ b/src/gallium/drivers/vc4/vc4_qpu.c
@@ -337,6 +337,11 @@ try_swap_ra_file(uint64_t *merge, uint64_t *a, uint64_t *b)
                  return false;
          }
  
+        if (!(*merge & QPU_PM) &&
+            QPU_GET_FIELD(*merge, QPU_UNPACK) != QPU_UNPACK_NOP) {
+                return false;
+        }
+
          if (raddr_b_b != QPU_R_NOP &&
              raddr_b_b != raddr_a_a)
                  return false;
@@ -394,6 +399,24 @@ convert_mov(uint64_t *inst)
          return true;
  }
  
+static bool
+writes_a_file(uint64_t inst)
+{
+        if (!(inst & QPU_WS))
+                return QPU_GET_FIELD(inst, QPU_WADDR_ADD) < 32;
+        else
+                return QPU_GET_FIELD(inst, QPU_WADDR_MUL) < 32;
+}
+
+static bool
+reads_r4(uint64_t inst)
+{
+        return (QPU_GET_FIELD(inst, QPU_ADD_A) == QPU_MUX_R4 ||
+                QPU_GET_FIELD(inst, QPU_ADD_B) == QPU_MUX_R4 ||
+                QPU_GET_FIELD(inst, QPU_MUL_A) == QPU_MUX_R4 ||
+                QPU_GET_FIELD(inst, QPU_MUL_B) == QPU_MUX_R4);
+}
+
  uint64_t
  qpu_merge_inst(uint64_t a, uint64_t b)
  {
@@ -470,6 +493,66 @@ qpu_merge_inst(uint64_t a, uint64_t b)
                          return 0;
          }
  
+        /* packing: Make sure that non-NOP packs agree, then deal with
+         * special-case failing of adding a non-NOP pack to something with a
+         * NOP pack.
+         */
+        if (!merge_fields(&merge, a, b, QPU_PACK_MASK, 0))
+                return 0;
+        bool new_a_pack = (QPU_GET_FIELD(a, QPU_PACK) !=
+                           QPU_GET_FIELD(merge, QPU_PACK));
+        bool new_b_pack = (QPU_GET_FIELD(b, QPU_PACK) !=
+                           QPU_GET_FIELD(merge, QPU_PACK));
+        if (!(merge & QPU_PM)) {
+                /* Make sure we're not going to be putting a new
+                 * a-file packing on either half.
+                 */
+                if (new_a_pack && writes_a_file(a))
+                        return 0;
+
+                if (new_b_pack && writes_a_file(b))
+                        return 0;
+        } else {
+                /* Make sure we're not going to be putting new MUL packing on
+                 * either half.
+                 */
+                if (new_a_pack && QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP)
+                        return 0;
+
+                if (new_b_pack && QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
+                        return 0;
+        }
+
+        /* unpacking: Make sure that non-NOP unpacks agree, then deal with
+         * special-case failing of adding a non-NOP unpack to something with a
+         * NOP unpack.
+         */
+        if (!merge_fields(&merge, a, b, QPU_UNPACK_MASK, 0))
+                return 0;
+        bool new_a_unpack = (QPU_GET_FIELD(a, QPU_UNPACK) !=
+                             QPU_GET_FIELD(merge, QPU_UNPACK));
+        bool new_b_unpack = (QPU_GET_FIELD(b, QPU_UNPACK) !=
+                             QPU_GET_FIELD(merge, QPU_UNPACK));
+        if (!(merge & QPU_PM)) {
+                /* Make sure we're not going to be putting a new
+                 * a-file packing on either half.
+                 */
+                if (new_a_unpack && QPU_GET_FIELD(a, QPU_RADDR_A) != QPU_R_NOP)
+                        return 0;
+
+                if (new_b_unpack && QPU_GET_FIELD(b, QPU_RADDR_A) != QPU_R_NOP)
+                        return 0;
+        } else {
+                /* Make sure we're not going to be putting new r4 unpack on
+                 * either half.
+                 */
+                if (new_a_unpack && reads_r4(a))
+                        return 0;
+
+                if (new_b_unpack && reads_r4(b))
+                        return 0;
+        }
+
          if (ok)
                  return merge;
          else