From c73aa0a09b996feff5aec42e0347b99b35b2f981 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 15 Mar 2016 13:43:54 -0700 Subject: [PATCH] vc4: Add QPU support for generating BRANCH instructions. --- src/gallium/drivers/vc4/vc4_qpu.c | 18 ++++++++++++- src/gallium/drivers/vc4/vc4_qpu.h | 4 +++ src/gallium/drivers/vc4/vc4_qpu_defines.h | 30 ++++++++++++++++++++++ src/gallium/drivers/vc4/vc4_qpu_disasm.c | 28 ++++++++++++++++++++ src/gallium/drivers/vc4/vc4_qpu_validate.c | 6 +++++ 5 files changed, 85 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_qpu.c b/src/gallium/drivers/vc4/vc4_qpu.c index 6aa6b24d94c..cf74c424391 100644 --- a/src/gallium/drivers/vc4/vc4_qpu.c +++ b/src/gallium/drivers/vc4/vc4_qpu.c @@ -164,6 +164,20 @@ qpu_load_imm_ui(struct qpu_reg dst, uint32_t val) return inst; } +uint64_t +qpu_branch(uint32_t cond, uint32_t target) +{ + uint64_t inst = 0; + + inst |= qpu_a_dst(qpu_ra(QPU_W_NOP)); + inst |= qpu_m_dst(qpu_rb(QPU_W_NOP)); + inst |= QPU_SET_FIELD(cond, QPU_BRANCH_COND); + inst |= QPU_SET_FIELD(QPU_SIG_BRANCH, QPU_SIG); + inst |= QPU_SET_FIELD(target, QPU_BRANCH_TARGET); + + return inst; +} + uint64_t qpu_a_alu2(enum qpu_op_add op, struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1) @@ -446,7 +460,9 @@ qpu_merge_inst(uint64_t a, uint64_t b) if (a_sig == QPU_SIG_LOAD_IMM || b_sig == QPU_SIG_LOAD_IMM || a_sig == QPU_SIG_SMALL_IMM || - b_sig == QPU_SIG_SMALL_IMM) { + b_sig == QPU_SIG_SMALL_IMM || + a_sig == QPU_SIG_BRANCH || + b_sig == QPU_SIG_BRANCH) { return 0; } diff --git a/src/gallium/drivers/vc4/vc4_qpu.h b/src/gallium/drivers/vc4/vc4_qpu.h index 83fa36e6b2c..a0aac1587de 100644 --- a/src/gallium/drivers/vc4/vc4_qpu.h +++ b/src/gallium/drivers/vc4/vc4_qpu.h @@ -143,6 +143,7 @@ uint64_t qpu_m_alu2(enum qpu_op_mul op, struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1) ATTRIBUTE_CONST; uint64_t qpu_merge_inst(uint64_t a, uint64_t b) ATTRIBUTE_CONST; uint64_t qpu_load_imm_ui(struct qpu_reg dst, uint32_t val) ATTRIBUTE_CONST; +uint64_t qpu_branch(uint32_t cond, uint32_t target) ATTRIBUTE_CONST; uint64_t qpu_set_sig(uint64_t inst, uint32_t sig) ATTRIBUTE_CONST; uint64_t qpu_set_cond_add(uint64_t inst, uint32_t cond) ATTRIBUTE_CONST; uint64_t qpu_set_cond_mul(uint64_t inst, uint32_t cond) ATTRIBUTE_CONST; @@ -238,4 +239,7 @@ vc4_qpu_validate(uint64_t *insts, uint32_t num_inst); void vc4_qpu_disasm_cond(FILE *out, uint32_t cond); +void +vc4_qpu_disasm_cond_branch(FILE *out, uint32_t cond); + #endif /* VC4_QPU_H */ diff --git a/src/gallium/drivers/vc4/vc4_qpu_defines.h b/src/gallium/drivers/vc4/vc4_qpu_defines.h index 626dc3be6be..2ffa68663b1 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_defines.h +++ b/src/gallium/drivers/vc4/vc4_qpu_defines.h @@ -165,6 +165,23 @@ enum qpu_cond { QPU_COND_CC, }; +enum qpu_branch_cond { + QPU_COND_BRANCH_ALL_ZS, + QPU_COND_BRANCH_ALL_ZC, + QPU_COND_BRANCH_ANY_ZS, + QPU_COND_BRANCH_ANY_ZC, + QPU_COND_BRANCH_ALL_NS, + QPU_COND_BRANCH_ALL_NC, + QPU_COND_BRANCH_ANY_NS, + QPU_COND_BRANCH_ANY_NC, + QPU_COND_BRANCH_ALL_CS, + QPU_COND_BRANCH_ALL_CC, + QPU_COND_BRANCH_ANY_CS, + QPU_COND_BRANCH_ANY_CC, + + QPU_COND_BRANCH_ALWAYS = 15 +}; + enum qpu_pack_mul { QPU_PACK_MUL_NOP, QPU_PACK_MUL_8888 = 3, /* replicated to each 8 bits of the 32-bit dst. */ @@ -243,6 +260,16 @@ enum qpu_unpack { #define QPU_COND_MUL_SHIFT 46 #define QPU_COND_MUL_MASK QPU_MASK(48, 46) + +#define QPU_BRANCH_COND_SHIFT 52 +#define QPU_BRANCH_COND_MASK QPU_MASK(55, 52) + +#define QPU_BRANCH_REL ((uint64_t)1 << 51) +#define QPU_BRANCH_REG ((uint64_t)1 << 50) + +#define QPU_BRANCH_RADDR_A_SHIFT 45 +#define QPU_BRANCH_RADDR_A_MASK QPU_MASK(49, 45) + #define QPU_SF ((uint64_t)1 << 45) #define QPU_WADDR_ADD_SHIFT 38 @@ -274,4 +301,7 @@ enum qpu_unpack { #define QPU_OP_ADD_SHIFT 24 #define QPU_OP_ADD_MASK QPU_MASK(28, 24) +#define QPU_BRANCH_TARGET_SHIFT 0 +#define QPU_BRANCH_TARGET_MASK QPU_MASK(31, 0) + #endif /* VC4_QPU_DEFINES_H */ diff --git a/src/gallium/drivers/vc4/vc4_qpu_disasm.c b/src/gallium/drivers/vc4/vc4_qpu_disasm.c index d48e753df3d..c902d4fdac9 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_disasm.c +++ b/src/gallium/drivers/vc4/vc4_qpu_disasm.c @@ -224,6 +224,22 @@ static const char *qpu_cond[] = { [QPU_COND_CC] = ".cc", }; +static const char *qpu_cond_branch[] = { + [QPU_COND_BRANCH_ALL_ZS] = ".all_zs", + [QPU_COND_BRANCH_ALL_ZC] = ".all_zc", + [QPU_COND_BRANCH_ANY_ZS] = ".any_zs", + [QPU_COND_BRANCH_ANY_ZC] = ".any_zc", + [QPU_COND_BRANCH_ALL_NS] = ".all_ns", + [QPU_COND_BRANCH_ALL_NC] = ".all_nc", + [QPU_COND_BRANCH_ANY_NS] = ".any_ns", + [QPU_COND_BRANCH_ANY_NC] = ".any_nc", + [QPU_COND_BRANCH_ALL_CS] = ".all_cs", + [QPU_COND_BRANCH_ALL_CC] = ".all_cc", + [QPU_COND_BRANCH_ANY_CS] = ".any_cs", + [QPU_COND_BRANCH_ANY_CC] = ".any_cc", + [QPU_COND_BRANCH_ALWAYS] = "", +}; + #define DESC(array, index) \ ((index >= ARRAY_SIZE(array) || !(array)[index]) ? \ "???" : (array)[index]) @@ -270,6 +286,12 @@ vc4_qpu_disasm_cond(FILE *out, uint32_t cond) fprintf(out, "%s", DESC(qpu_cond, cond)); } +void +vc4_qpu_disasm_cond_branch(FILE *out, uint32_t cond) +{ + fprintf(out, "%s", DESC(qpu_cond_branch, cond)); +} + static void print_alu_dst(uint64_t inst, bool is_mul) { @@ -434,7 +456,13 @@ vc4_qpu_disasm(const uint64_t *instructions, int num_instructions) switch (sig) { case QPU_SIG_BRANCH: fprintf(stderr, "branch"); + vc4_qpu_disasm_cond_branch(stderr, + QPU_GET_FIELD(inst, + QPU_BRANCH_COND)); + + fprintf(stderr, " %d", (uint32_t)inst); break; + case QPU_SIG_LOAD_IMM: print_load_imm(inst); break; diff --git a/src/gallium/drivers/vc4/vc4_qpu_validate.c b/src/gallium/drivers/vc4/vc4_qpu_validate.c index e9a45e30277..fade360e8a6 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_validate.c +++ b/src/gallium/drivers/vc4/vc4_qpu_validate.c @@ -52,6 +52,12 @@ _reads_reg(uint64_t inst, uint32_t r, bool ignore_a, bool ignore_b) { QPU_GET_FIELD(inst, QPU_MUL_B) }, }; + /* Branches only reference raddr_a (no mux), and we don't use that + * feature of branching. + */ + if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_BRANCH) + return false; + for (int i = 0; i < ARRAY_SIZE(src_regs); i++) { if (!ignore_a && src_regs[i].mux == QPU_MUX_A && -- 2.30.2