vc4: Add QPU support for generating BRANCH instructions.
authorEric Anholt <eric@anholt.net>
Tue, 15 Mar 2016 20:43:54 +0000 (13:43 -0700)
committerEric Anholt <eric@anholt.net>
Wed, 13 Jul 2016 00:42:38 +0000 (17:42 -0700)
src/gallium/drivers/vc4/vc4_qpu.c
src/gallium/drivers/vc4/vc4_qpu.h
src/gallium/drivers/vc4/vc4_qpu_defines.h
src/gallium/drivers/vc4/vc4_qpu_disasm.c
src/gallium/drivers/vc4/vc4_qpu_validate.c

index 6aa6b24d94c9ea6b6ec4ca89064bdac969184bf5..cf74c42439100c61f28be8e759654af1c2ea1e73 100644 (file)
@@ -164,6 +164,20 @@ qpu_load_imm_ui(struct qpu_reg dst, uint32_t val)
         return inst;
 }
 
+uint64_t
+qpu_branch(uint32_t cond, uint32_t target)
+{
+        uint64_t inst = 0;
+
+        inst |= qpu_a_dst(qpu_ra(QPU_W_NOP));
+        inst |= qpu_m_dst(qpu_rb(QPU_W_NOP));
+        inst |= QPU_SET_FIELD(cond, QPU_BRANCH_COND);
+        inst |= QPU_SET_FIELD(QPU_SIG_BRANCH, QPU_SIG);
+        inst |= QPU_SET_FIELD(target, QPU_BRANCH_TARGET);
+
+        return inst;
+}
+
 uint64_t
 qpu_a_alu2(enum qpu_op_add op,
            struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1)
@@ -446,7 +460,9 @@ qpu_merge_inst(uint64_t a, uint64_t b)
         if (a_sig == QPU_SIG_LOAD_IMM ||
             b_sig == QPU_SIG_LOAD_IMM ||
             a_sig == QPU_SIG_SMALL_IMM ||
-            b_sig == QPU_SIG_SMALL_IMM) {
+            b_sig == QPU_SIG_SMALL_IMM ||
+            a_sig == QPU_SIG_BRANCH ||
+            b_sig == QPU_SIG_BRANCH) {
                 return 0;
         }
 
index 83fa36e6b2c89ae313273bb44eea7d4f03e25a34..a0aac1587decd0cc17b71751da16411376e4392c 100644 (file)
@@ -143,6 +143,7 @@ uint64_t qpu_m_alu2(enum qpu_op_mul op, struct qpu_reg dst,
                     struct qpu_reg src0, struct qpu_reg src1) ATTRIBUTE_CONST;
 uint64_t qpu_merge_inst(uint64_t a, uint64_t b) ATTRIBUTE_CONST;
 uint64_t qpu_load_imm_ui(struct qpu_reg dst, uint32_t val) ATTRIBUTE_CONST;
+uint64_t qpu_branch(uint32_t cond, uint32_t target) ATTRIBUTE_CONST;
 uint64_t qpu_set_sig(uint64_t inst, uint32_t sig) ATTRIBUTE_CONST;
 uint64_t qpu_set_cond_add(uint64_t inst, uint32_t cond) ATTRIBUTE_CONST;
 uint64_t qpu_set_cond_mul(uint64_t inst, uint32_t cond) ATTRIBUTE_CONST;
@@ -238,4 +239,7 @@ vc4_qpu_validate(uint64_t *insts, uint32_t num_inst);
 void
 vc4_qpu_disasm_cond(FILE *out, uint32_t cond);
 
+void
+vc4_qpu_disasm_cond_branch(FILE *out, uint32_t cond);
+
 #endif /* VC4_QPU_H */
index 626dc3be6bed7f5c463e13383bb1547f4d75388e..2ffa68663b1cc601fc3007ad29ce6c6817f07fbe 100644 (file)
@@ -165,6 +165,23 @@ enum qpu_cond {
         QPU_COND_CC,
 };
 
+enum qpu_branch_cond {
+        QPU_COND_BRANCH_ALL_ZS,
+        QPU_COND_BRANCH_ALL_ZC,
+        QPU_COND_BRANCH_ANY_ZS,
+        QPU_COND_BRANCH_ANY_ZC,
+        QPU_COND_BRANCH_ALL_NS,
+        QPU_COND_BRANCH_ALL_NC,
+        QPU_COND_BRANCH_ANY_NS,
+        QPU_COND_BRANCH_ANY_NC,
+        QPU_COND_BRANCH_ALL_CS,
+        QPU_COND_BRANCH_ALL_CC,
+        QPU_COND_BRANCH_ANY_CS,
+        QPU_COND_BRANCH_ANY_CC,
+
+        QPU_COND_BRANCH_ALWAYS = 15
+};
+
 enum qpu_pack_mul {
         QPU_PACK_MUL_NOP,
         QPU_PACK_MUL_8888 = 3, /* replicated to each 8 bits of the 32-bit dst. */
@@ -243,6 +260,16 @@ enum qpu_unpack {
 #define QPU_COND_MUL_SHIFT              46
 #define QPU_COND_MUL_MASK               QPU_MASK(48, 46)
 
+
+#define QPU_BRANCH_COND_SHIFT           52
+#define QPU_BRANCH_COND_MASK            QPU_MASK(55, 52)
+
+#define QPU_BRANCH_REL                  ((uint64_t)1 << 51)
+#define QPU_BRANCH_REG                  ((uint64_t)1 << 50)
+
+#define QPU_BRANCH_RADDR_A_SHIFT        45
+#define QPU_BRANCH_RADDR_A_MASK         QPU_MASK(49, 45)
+
 #define QPU_SF                          ((uint64_t)1 << 45)
 
 #define QPU_WADDR_ADD_SHIFT             38
@@ -274,4 +301,7 @@ enum qpu_unpack {
 #define QPU_OP_ADD_SHIFT                24
 #define QPU_OP_ADD_MASK                 QPU_MASK(28, 24)
 
+#define QPU_BRANCH_TARGET_SHIFT         0
+#define QPU_BRANCH_TARGET_MASK          QPU_MASK(31, 0)
+
 #endif /* VC4_QPU_DEFINES_H */
index d48e753df3d1d7726c17c67ad43a045d468501d5..c902d4fdac96415c45af7937248cbce22552132b 100644 (file)
@@ -224,6 +224,22 @@ static const char *qpu_cond[] = {
         [QPU_COND_CC] = ".cc",
 };
 
+static const char *qpu_cond_branch[] = {
+        [QPU_COND_BRANCH_ALL_ZS] = ".all_zs",
+        [QPU_COND_BRANCH_ALL_ZC] = ".all_zc",
+        [QPU_COND_BRANCH_ANY_ZS] = ".any_zs",
+        [QPU_COND_BRANCH_ANY_ZC] = ".any_zc",
+        [QPU_COND_BRANCH_ALL_NS] = ".all_ns",
+        [QPU_COND_BRANCH_ALL_NC] = ".all_nc",
+        [QPU_COND_BRANCH_ANY_NS] = ".any_ns",
+        [QPU_COND_BRANCH_ANY_NC] = ".any_nc",
+        [QPU_COND_BRANCH_ALL_CS] = ".all_cs",
+        [QPU_COND_BRANCH_ALL_CC] = ".all_cc",
+        [QPU_COND_BRANCH_ANY_CS] = ".any_cs",
+        [QPU_COND_BRANCH_ANY_CC] = ".any_cc",
+        [QPU_COND_BRANCH_ALWAYS] = "",
+};
+
 #define DESC(array, index)                                        \
         ((index >= ARRAY_SIZE(array) || !(array)[index]) ?         \
          "???" : (array)[index])
@@ -270,6 +286,12 @@ vc4_qpu_disasm_cond(FILE *out, uint32_t cond)
         fprintf(out, "%s", DESC(qpu_cond, cond));
 }
 
+void
+vc4_qpu_disasm_cond_branch(FILE *out, uint32_t cond)
+{
+        fprintf(out, "%s", DESC(qpu_cond_branch, cond));
+}
+
 static void
 print_alu_dst(uint64_t inst, bool is_mul)
 {
@@ -434,7 +456,13 @@ vc4_qpu_disasm(const uint64_t *instructions, int num_instructions)
                 switch (sig) {
                 case QPU_SIG_BRANCH:
                         fprintf(stderr, "branch");
+                        vc4_qpu_disasm_cond_branch(stderr,
+                                                   QPU_GET_FIELD(inst,
+                                                                 QPU_BRANCH_COND));
+
+                        fprintf(stderr, " %d", (uint32_t)inst);
                         break;
+
                 case QPU_SIG_LOAD_IMM:
                         print_load_imm(inst);
                         break;
index e9a45e30277c0edf859a07ace1d00e648c00edec..fade360e8a64ee1a5e100d35fcd12081cc02ddb3 100644 (file)
@@ -52,6 +52,12 @@ _reads_reg(uint64_t inst, uint32_t r, bool ignore_a, bool ignore_b)
                 { QPU_GET_FIELD(inst, QPU_MUL_B) },
         };
 
+        /* Branches only reference raddr_a (no mux), and we don't use that
+         * feature of branching.
+         */
+        if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_BRANCH)
+                return false;
+
         for (int i = 0; i < ARRAY_SIZE(src_regs); i++) {
                 if (!ignore_a &&
                     src_regs[i].mux == QPU_MUX_A &&