From 05c7d9715b8a419fd6fb952715ee8fde9401aacb Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 2 Feb 2017 16:15:18 -0800 Subject: [PATCH] broadcom: Add V3D 3.3 QPU instruction pack, unpack, and disasm. Unlike VC4, I've defined an unpacked instruction format with pack/unpack functions to convert to 64-bit encoded instructions. This will let us incrementally put together our instructions and validate them in a more natural way than the QPU_GET_FIELD/QPU_SET_FIELD used to. The pack/unpack unfortuantely are written by hand. While I could define genxml for parts of it, there are many special cases (like operand order of commutative binops choosing which binop is being performed!) and it probably wouldn't come out much cleaner. The disasm unit test ensures that we have the same assembly format as Broadcom's internal tools, other than whitespace changes. v2: Fix automake variable redefinition complaints, add test to .gitignore --- src/broadcom/.gitignore | 1 + src/broadcom/Makefile.am | 3 + src/broadcom/Makefile.sources | 5 + src/broadcom/Makefile.vc5.am | 19 + src/broadcom/qpu/qpu_disasm.c | 298 +++++++ src/broadcom/qpu/qpu_disasm.h | 39 + src/broadcom/qpu/qpu_instr.c | 645 ++++++++++++++ src/broadcom/qpu/qpu_instr.h | 411 +++++++++ src/broadcom/qpu/qpu_pack.c | 1206 +++++++++++++++++++++++++++ src/broadcom/qpu/qpu_validate.c | 0 src/broadcom/qpu/tests/.gitignore | 1 + src/broadcom/qpu/tests/qpu_disasm.c | 146 ++++ 12 files changed, 2774 insertions(+) create mode 100644 src/broadcom/Makefile.vc5.am create mode 100644 src/broadcom/qpu/qpu_disasm.c create mode 100644 src/broadcom/qpu/qpu_disasm.h create mode 100644 src/broadcom/qpu/qpu_instr.c create mode 100644 src/broadcom/qpu/qpu_instr.h create mode 100644 src/broadcom/qpu/qpu_pack.c create mode 100644 src/broadcom/qpu/qpu_validate.c create mode 100644 src/broadcom/qpu/tests/.gitignore create mode 100644 src/broadcom/qpu/tests/qpu_disasm.c diff --git a/src/broadcom/.gitignore b/src/broadcom/.gitignore index 5442872127e..92edd58f8c9 100644 --- a/src/broadcom/.gitignore +++ b/src/broadcom/.gitignore @@ -1,2 +1,3 @@ cle/v3d_xml.h cle/*_pack.h +qpu/tests/qpu_disasm diff --git a/src/broadcom/Makefile.am b/src/broadcom/Makefile.am index 9ebfe4584bf..ce2fd7df41f 100644 --- a/src/broadcom/Makefile.am +++ b/src/broadcom/Makefile.am @@ -24,6 +24,8 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/include \ -I$(top_builddir)/src \ -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/broadcom/ \ + -I$(top_srcdir)/src/broadcom/include \ $(VALGRIND_CFLAGS) \ $(DEFINES) @@ -49,5 +51,6 @@ PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) include Makefile.genxml.am include Makefile.cle.am +include Makefile.vc5.am CLEANFILES += $(BUILT_SOURCES) diff --git a/src/broadcom/Makefile.sources b/src/broadcom/Makefile.sources index 92f972754c7..d40febae678 100644 --- a/src/broadcom/Makefile.sources +++ b/src/broadcom/Makefile.sources @@ -14,6 +14,11 @@ BROADCOM_FILES = \ common/v3d_debug.c \ common/v3d_debug.h \ common/v3d_device_info.h \ + qpu/qpu_disasm.c \ + qpu/qpu_disasm.h \ + qpu/qpu_instr.c \ + qpu/qpu_instr.h \ + qpu/qpu_pack.c \ $() BROADCOM_DECODER_FILES = \ diff --git a/src/broadcom/Makefile.vc5.am b/src/broadcom/Makefile.vc5.am new file mode 100644 index 00000000000..e88afc20423 --- /dev/null +++ b/src/broadcom/Makefile.vc5.am @@ -0,0 +1,19 @@ +noinst_LTLIBRARIES += libbroadcom.la + +if USE_VC5_SIMULATOR +AM_CFLAGS += $(VC5_SIMULATOR_CFLAGS) +libbroadcom_la_LDFLAGS = $(VC5_SIMULATOR_LIBS) +endif + +libbroadcom_la_SOURCES = $(BROADCOM_FILES) + +check_PROGRAMS += \ + qpu/tests/qpu_disasm \ + $(NULL) + +LDADD = \ + libbroadcom.la \ + $(top_builddir)/src/util/libmesautil.la \ + $(NULL) + +TESTS += $(check_PROGRAMS) diff --git a/src/broadcom/qpu/qpu_disasm.c b/src/broadcom/qpu/qpu_disasm.c new file mode 100644 index 00000000000..5ee834852bd --- /dev/null +++ b/src/broadcom/qpu/qpu_disasm.c @@ -0,0 +1,298 @@ +/* + * Copyright © 2016 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include "util/ralloc.h" + +#include "broadcom/common/v3d_device_info.h" +#include "qpu_instr.h" +#include "qpu_disasm.h" + +struct disasm_state { + const struct v3d_device_info *devinfo; + char *string; + size_t offset; +}; + +static void +append(struct disasm_state *disasm, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + ralloc_vasprintf_rewrite_tail(&disasm->string, + &disasm->offset, + fmt, args); + va_end(args); +} + +static void +pad_to(struct disasm_state *disasm, int n) +{ + /* FIXME: Do a single append somehow. */ + while (disasm->offset < n) + append(disasm, " "); +} + + +static void +v3d_qpu_disasm_raddr(struct disasm_state *disasm, + const struct v3d_qpu_instr *instr, uint8_t mux) +{ + if (mux == V3D_QPU_MUX_A) { + append(disasm, "rf%d", instr->raddr_a); + } else if (mux == V3D_QPU_MUX_B) { + append(disasm, "rf%d", instr->raddr_b); + } else { + append(disasm, "r%d", mux); + } +} + +static void +v3d_qpu_disasm_waddr(struct disasm_state *disasm, uint32_t waddr, bool magic) +{ + if (!magic) { + append(disasm, "rf%d", waddr); + return; + } + + const char *name = v3d_qpu_magic_waddr_name(waddr); + if (name) + append(disasm, "%s", name); + else + append(disasm, "waddr UNKNOWN %d", waddr); +} + +static void +v3d_qpu_disasm_add(struct disasm_state *disasm, + const struct v3d_qpu_instr *instr) +{ + bool has_dst = v3d_qpu_add_op_has_dst(instr->alu.add.op); + int num_src = v3d_qpu_add_op_num_src(instr->alu.add.op); + + append(disasm, "%s", v3d_qpu_add_op_name(instr->alu.add.op)); + append(disasm, "%s", v3d_qpu_cond_name(instr->flags.ac)); + append(disasm, "%s", v3d_qpu_pf_name(instr->flags.apf)); + append(disasm, "%s", v3d_qpu_uf_name(instr->flags.auf)); + + append(disasm, " "); + + if (has_dst) { + v3d_qpu_disasm_waddr(disasm, instr->alu.add.waddr, + instr->alu.add.magic_write); + append(disasm, v3d_qpu_pack_name(instr->alu.add.output_pack)); + } + + if (num_src >= 1) { + if (has_dst) + append(disasm, ", "); + v3d_qpu_disasm_raddr(disasm, instr, instr->alu.add.a); + append(disasm, "%s", + v3d_qpu_unpack_name(instr->alu.add.a_unpack)); + } + + if (num_src >= 2) { + append(disasm, ", "); + v3d_qpu_disasm_raddr(disasm, instr, instr->alu.add.b); + append(disasm, "%s", + v3d_qpu_unpack_name(instr->alu.add.b_unpack)); + } +} + +static void +v3d_qpu_disasm_mul(struct disasm_state *disasm, + const struct v3d_qpu_instr *instr) +{ + bool has_dst = v3d_qpu_mul_op_has_dst(instr->alu.mul.op); + int num_src = v3d_qpu_mul_op_num_src(instr->alu.mul.op); + + pad_to(disasm, 21); + append(disasm, "; "); + + append(disasm, "%s", v3d_qpu_mul_op_name(instr->alu.mul.op)); + append(disasm, "%s", v3d_qpu_cond_name(instr->flags.mc)); + append(disasm, "%s", v3d_qpu_pf_name(instr->flags.mpf)); + append(disasm, "%s", v3d_qpu_uf_name(instr->flags.muf)); + + if (instr->alu.mul.op == V3D_QPU_M_NOP) + return; + + append(disasm, " "); + + if (has_dst) { + v3d_qpu_disasm_waddr(disasm, instr->alu.mul.waddr, + instr->alu.mul.magic_write); + append(disasm, v3d_qpu_pack_name(instr->alu.mul.output_pack)); + } + + if (num_src >= 1) { + if (has_dst) + append(disasm, ", "); + v3d_qpu_disasm_raddr(disasm, instr, instr->alu.mul.a); + append(disasm, "%s", + v3d_qpu_unpack_name(instr->alu.mul.a_unpack)); + } + + if (num_src >= 2) { + append(disasm, ", "); + v3d_qpu_disasm_raddr(disasm, instr, instr->alu.mul.b); + append(disasm, "%s", + v3d_qpu_unpack_name(instr->alu.mul.b_unpack)); + } +} + +static void +v3d_qpu_disasm_sig(struct disasm_state *disasm, + const struct v3d_qpu_instr *instr) +{ + const struct v3d_qpu_sig *sig = &instr->sig; + + if (!sig->thrsw && + !sig->ldvary && + !sig->ldvpm && + !sig->ldtmu && + !sig->ldunif && + !sig->wrtmuc) { + return; + } + + pad_to(disasm, 41); + + if (sig->thrsw) + append(disasm, "; thrsw"); + if (sig->ldvary) + append(disasm, "; ldvary"); + if (sig->ldvpm) + append(disasm, "; ldvpm"); + if (sig->ldtmu) + append(disasm, "; ldtmu"); + if (sig->ldunif) + append(disasm, "; ldunif"); + if (sig->wrtmuc) + append(disasm, "; wrtmuc"); +} + +static void +v3d_qpu_disasm_alu(struct disasm_state *disasm, + const struct v3d_qpu_instr *instr) +{ + v3d_qpu_disasm_add(disasm, instr); + v3d_qpu_disasm_mul(disasm, instr); + v3d_qpu_disasm_sig(disasm, instr); +} + +static void +v3d_qpu_disasm_branch(struct disasm_state *disasm, + const struct v3d_qpu_instr *instr) +{ + append(disasm, "b"); + if (instr->branch.ub) + append(disasm, "u"); + append(disasm, "%s", v3d_qpu_branch_cond_name(instr->branch.cond)); + append(disasm, "%s", v3d_qpu_msfign_name(instr->branch.msfign)); + + switch (instr->branch.bdi) { + case V3D_QPU_BRANCH_DEST_ABS: + append(disasm, " zero_addr+0x%08x", instr->branch.offset); + break; + + case V3D_QPU_BRANCH_DEST_REL: + append(disasm, " %d", instr->branch.offset); + break; + + case V3D_QPU_BRANCH_DEST_LINK_REG: + append(disasm, " lri"); + break; + + case V3D_QPU_BRANCH_DEST_REGFILE: + append(disasm, " rf%d", instr->branch.raddr_a); + break; + } + + if (instr->branch.ub) { + switch (instr->branch.bdu) { + case V3D_QPU_BRANCH_DEST_ABS: + append(disasm, ", a:unif"); + break; + + case V3D_QPU_BRANCH_DEST_REL: + append(disasm, ", r:unif"); + break; + + case V3D_QPU_BRANCH_DEST_LINK_REG: + append(disasm, ", lri"); + break; + + case V3D_QPU_BRANCH_DEST_REGFILE: + append(disasm, ", rf%d", instr->branch.raddr_a); + break; + } + } +} + +const char * +v3d_qpu_decode(const struct v3d_device_info *devinfo, + const struct v3d_qpu_instr *instr) +{ + struct disasm_state disasm = { + .string = rzalloc_size(NULL, 1), + .offset = 0, + .devinfo = devinfo, + }; + + switch (instr->type) { + case V3D_QPU_INSTR_TYPE_ALU: + v3d_qpu_disasm_alu(&disasm, instr); + break; + + case V3D_QPU_INSTR_TYPE_BRANCH: + v3d_qpu_disasm_branch(&disasm, instr); + break; + } + + return disasm.string; +} + +/** + * Returns a string containing the disassembled representation of the QPU + * instruction. It is the caller's responsibility to free the return value + * with ralloc_free(). + */ +const char * +v3d_qpu_disasm(const struct v3d_device_info *devinfo, uint64_t inst) +{ + struct v3d_qpu_instr instr; + bool ok = v3d_qpu_instr_unpack(devinfo, inst, &instr); + assert(ok); (void)ok; + + return v3d_qpu_decode(devinfo, &instr); +} + +void +v3d_qpu_dump(const struct v3d_device_info *devinfo, + const struct v3d_qpu_instr *instr) +{ + const char *decoded = v3d_qpu_decode(devinfo, instr); + fprintf(stderr, "%s", decoded); + ralloc_free((char *)decoded); +} diff --git a/src/broadcom/qpu/qpu_disasm.h b/src/broadcom/qpu/qpu_disasm.h new file mode 100644 index 00000000000..efdf8ddb588 --- /dev/null +++ b/src/broadcom/qpu/qpu_disasm.h @@ -0,0 +1,39 @@ +/* + * Copyright © 2016 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC5_QPU_DISASM_H +#define VC5_QPU_DISASM_H + +#include "broadcom/common/v3d_device_info.h" + +struct v3d_qpu_instr; + +const char *v3d_qpu_decode(const struct v3d_device_info *devinfo, const + struct v3d_qpu_instr *instr); + +const char *v3d_qpu_disasm(const struct v3d_device_info *devinfo, uint64_t inst); + +void v3d_qpu_dump(const struct v3d_device_info *devinfo, const + struct v3d_qpu_instr *instr); + +#endif /* VC5_QPU_DISASM_H */ diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c new file mode 100644 index 00000000000..7499170de3d --- /dev/null +++ b/src/broadcom/qpu/qpu_instr.c @@ -0,0 +1,645 @@ +/* + * Copyright © 2016 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include "util/macros.h" +#include "qpu_instr.h" + +#ifndef QPU_MASK +#define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low)) +/* Using the GNU statement expression extension */ +#define QPU_SET_FIELD(value, field) \ + ({ \ + uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \ + assert((fieldval & ~ field ## _MASK) == 0); \ + fieldval & field ## _MASK; \ + }) + +#define QPU_GET_FIELD(word, field) ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT)) + +#define QPU_UPDATE_FIELD(inst, value, field) \ + (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field)) +#endif /* QPU_MASK */ + +#define VC5_QPU_OP_MUL_SHIFT 58 +#define VC5_QPU_OP_MUL_MASK QPU_MASK(63, 58) + +#define VC5_QPU_SIG_SHIFT 53 +#define VC5_QPU_SIG_MASK QPU_MASK(57, 53) +# define VC5_QPU_SIG_THRSW_BIT 0x1 +# define VC5_QPU_SIG_LDUNIF_BIT 0x2 +# define VC5_QPU_SIG_LDTMU_BIT 0x4 +# define VC5_QPU_SIG_LDVARY_BIT 0x8 + +#define VC5_QPU_COND_SHIFT 46 +#define VC5_QPU_COND_MASK QPU_MASK(52, 46) + +#define VC5_QPU_COND_IFA 0 +#define VC5_QPU_COND_IFB 1 +#define VC5_QPU_COND_IFNA 2 +#define VC5_QPU_COND_IFNB 3 + +#define VC5_QPU_MM QPU_MASK(45, 45) +#define VC5_QPU_MA QPU_MASK(44, 44) + +#define V3D_QPU_WADDR_M_SHIFT 38 +#define V3D_QPU_WADDR_M_MASK QPU_MASK(43, 38) + +#define VC5_QPU_BRANCH_ADDR_LOW_SHIFT 35 +#define VC5_QPU_BRANCH_ADDR_LOW_MASK QPU_MASK(55, 35) + +#define V3D_QPU_WADDR_A_SHIFT 32 +#define V3D_QPU_WADDR_A_MASK QPU_MASK(37, 32) + +#define VC5_QPU_BRANCH_COND_SHIFT 32 +#define VC5_QPU_BRANCH_COND_MASK QPU_MASK(34, 32) + +#define VC5_QPU_BRANCH_ADDR_HIGH_SHIFT 24 +#define VC5_QPU_BRANCH_ADDR_HIGH_MASK QPU_MASK(31, 24) + +#define VC5_QPU_OP_ADD_SHIFT 24 +#define VC5_QPU_OP_ADD_MASK QPU_MASK(31, 24) + +#define VC5_QPU_MUL_B_SHIFT 21 +#define VC5_QPU_MUL_B_MASK QPU_MASK(23, 21) + +#define VC5_QPU_BRANCH_MSFIGN_SHIFT 21 +#define VC5_QPU_BRANCH_MSFIGN_MASK QPU_MASK(22, 21) + +#define VC5_QPU_MUL_A_SHIFT 18 +#define VC5_QPU_MUL_A_MASK QPU_MASK(20, 18) + +#define VC5_QPU_ADD_B_SHIFT 15 +#define VC5_QPU_ADD_B_MASK QPU_MASK(17, 15) + +#define VC5_QPU_BRANCH_BDU_SHIFT 15 +#define VC5_QPU_BRANCH_BDU_MASK QPU_MASK(17, 15) + +#define VC5_QPU_BRANCH_UB QPU_MASK(14, 14) + +#define VC5_QPU_ADD_A_SHIFT 12 +#define VC5_QPU_ADD_A_MASK QPU_MASK(14, 12) + +#define VC5_QPU_BRANCH_BDI_SHIFT 12 +#define VC5_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12) + +#define VC5_QPU_RADDR_A_SHIFT 6 +#define VC5_QPU_RADDR_A_MASK QPU_MASK(11, 6) + +#define VC5_QPU_RADDR_B_SHIFT 0 +#define VC5_QPU_RADDR_B_MASK QPU_MASK(5, 0) + +const char * +v3d_qpu_magic_waddr_name(enum v3d_qpu_waddr waddr) +{ + static const char *waddr_magic[] = { + [V3D_QPU_WADDR_R0] = "r0", + [V3D_QPU_WADDR_R1] = "r1", + [V3D_QPU_WADDR_R2] = "r2", + [V3D_QPU_WADDR_R3] = "r3", + [V3D_QPU_WADDR_R4] = "r4", + [V3D_QPU_WADDR_R5] = "r5", + [V3D_QPU_WADDR_NOP] = "-", + [V3D_QPU_WADDR_TLB] = "tlb", + [V3D_QPU_WADDR_TLBU] = "tlbu", + [V3D_QPU_WADDR_TMU] = "tmu", + [V3D_QPU_WADDR_TMUL] = "tmul", + [V3D_QPU_WADDR_TMUD] = "tmud", + [V3D_QPU_WADDR_TMUA] = "tmua", + [V3D_QPU_WADDR_TMUAU] = "tmuau", + [V3D_QPU_WADDR_VPM] = "vpm", + [V3D_QPU_WADDR_VPMU] = "vpmu", + [V3D_QPU_WADDR_SYNC] = "sync", + [V3D_QPU_WADDR_SYNCU] = "syncu", + [V3D_QPU_WADDR_RECIP] = "recip", + [V3D_QPU_WADDR_RSQRT] = "rsqrt", + [V3D_QPU_WADDR_EXP] = "exp", + [V3D_QPU_WADDR_LOG] = "log", + [V3D_QPU_WADDR_SIN] = "sin", + [V3D_QPU_WADDR_RSQRT2] = "rsqrt2", + }; + + return waddr_magic[waddr]; +} + +const char * +v3d_qpu_add_op_name(enum v3d_qpu_add_op op) +{ + static const char *op_names[] = { + [V3D_QPU_A_FADD] = "fadd", + [V3D_QPU_A_FADDNF] = "faddnf", + [V3D_QPU_A_VFPACK] = "vfpack", + [V3D_QPU_A_ADD] = "add", + [V3D_QPU_A_SUB] = "sub", + [V3D_QPU_A_FSUB] = "fsub", + [V3D_QPU_A_MIN] = "min", + [V3D_QPU_A_MAX] = "max", + [V3D_QPU_A_UMIN] = "umin", + [V3D_QPU_A_UMAX] = "umax", + [V3D_QPU_A_SHL] = "shl", + [V3D_QPU_A_SHR] = "shr", + [V3D_QPU_A_ASR] = "asr", + [V3D_QPU_A_ROR] = "ror", + [V3D_QPU_A_FMIN] = "fmin", + [V3D_QPU_A_FMAX] = "fmax", + [V3D_QPU_A_VFMIN] = "vfmin", + [V3D_QPU_A_AND] = "and", + [V3D_QPU_A_OR] = "or", + [V3D_QPU_A_XOR] = "xor", + [V3D_QPU_A_VADD] = "vadd", + [V3D_QPU_A_VSUB] = "vsub", + [V3D_QPU_A_NOT] = "not", + [V3D_QPU_A_NEG] = "neg", + [V3D_QPU_A_FLAPUSH] = "flapush", + [V3D_QPU_A_FLBPUSH] = "flbpush", + [V3D_QPU_A_FLBPOP] = "flbpop", + [V3D_QPU_A_SETMSF] = "setmsf", + [V3D_QPU_A_SETREVF] = "setrevf", + [V3D_QPU_A_NOP] = "nop", + [V3D_QPU_A_TIDX] = "tidx", + [V3D_QPU_A_EIDX] = "eidx", + [V3D_QPU_A_LR] = "lr", + [V3D_QPU_A_VFLA] = "vfla", + [V3D_QPU_A_VFLNA] = "vflna", + [V3D_QPU_A_VFLB] = "vflb", + [V3D_QPU_A_VFLNB] = "vflnb", + [V3D_QPU_A_FXCD] = "fxcd", + [V3D_QPU_A_XCD] = "xcd", + [V3D_QPU_A_FYCD] = "fycd", + [V3D_QPU_A_YCD] = "ycd", + [V3D_QPU_A_MSF] = "msf", + [V3D_QPU_A_REVF] = "revf", + [V3D_QPU_A_VDWWT] = "vdwwt", + [V3D_QPU_A_IID] = "iid", + [V3D_QPU_A_SAMPID] = "sampid", + [V3D_QPU_A_PATCHID] = "patchid", + [V3D_QPU_A_TMUWT] = "tmuwt", + [V3D_QPU_A_VPMSETUP] = "vpmsetup", + [V3D_QPU_A_VPMWT] = "vpmwt", + [V3D_QPU_A_LDVPMV] = "ldvpmv", + [V3D_QPU_A_LDVPMD] = "ldvpmd", + [V3D_QPU_A_LDVPMP] = "ldvpmp", + [V3D_QPU_A_LDVPMG] = "ldvpmg", + [V3D_QPU_A_FCMP] = "fcmp", + [V3D_QPU_A_VFMAX] = "vfmax", + [V3D_QPU_A_FROUND] = "fround", + [V3D_QPU_A_FTOIN] = "ftoin", + [V3D_QPU_A_FTRUNC] = "ftrunc", + [V3D_QPU_A_FTOIZ] = "ftoiz", + [V3D_QPU_A_FFLOOR] = "ffloor", + [V3D_QPU_A_FTOUZ] = "ftouz", + [V3D_QPU_A_FCEIL] = "fceil", + [V3D_QPU_A_FTOC] = "ftoc", + [V3D_QPU_A_FDX] = "fdx", + [V3D_QPU_A_FDY] = "fdy", + [V3D_QPU_A_STVPMV] = "stvpmv", + [V3D_QPU_A_STVPMD] = "stvpmd", + [V3D_QPU_A_STVPMP] = "stvpmp", + [V3D_QPU_A_ITOF] = "itof", + [V3D_QPU_A_CLZ] = "clz", + [V3D_QPU_A_UTOF] = "utof", + }; + + if (op >= ARRAY_SIZE(op_names)) + return NULL; + + return op_names[op]; +} + +const char * +v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op) +{ + static const char *op_names[] = { + [V3D_QPU_M_ADD] = "add", + [V3D_QPU_M_SUB] = "sub", + [V3D_QPU_M_UMUL24] = "umul24", + [V3D_QPU_M_VFMUL] = "vfmul", + [V3D_QPU_M_SMUL24] = "smul24", + [V3D_QPU_M_MULTOP] = "multop", + [V3D_QPU_M_FMOV] = "fmov", + [V3D_QPU_M_MOV] = "mov", + [V3D_QPU_M_NOP] = "nop", + [V3D_QPU_M_FMUL] = "fmul", + }; + + if (op >= ARRAY_SIZE(op_names)) + return NULL; + + return op_names[op]; +} + +const char * +v3d_qpu_cond_name(enum v3d_qpu_cond cond) +{ + switch (cond) { + case V3D_QPU_COND_NONE: + return ""; + case V3D_QPU_COND_IFA: + return ".ifa"; + case V3D_QPU_COND_IFB: + return ".ifb"; + case V3D_QPU_COND_IFNA: + return ".ifna"; + case V3D_QPU_COND_IFNB: + return ".ifnb"; + default: + unreachable("bad cond value"); + } +} + +const char * +v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond) +{ + switch (cond) { + case V3D_QPU_BRANCH_COND_ALWAYS: + return ""; + case V3D_QPU_BRANCH_COND_A0: + return ".a0"; + case V3D_QPU_BRANCH_COND_NA0: + return ".na0"; + case V3D_QPU_BRANCH_COND_ALLA: + return ".alla"; + case V3D_QPU_BRANCH_COND_ANYNA: + return ".anyna"; + case V3D_QPU_BRANCH_COND_ANYA: + return ".anya"; + case V3D_QPU_BRANCH_COND_ALLNA: + return ".allna"; + default: + unreachable("bad branch cond value"); + } +} + +const char * +v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign) +{ + switch (msfign) { + case V3D_QPU_MSFIGN_NONE: + return ""; + case V3D_QPU_MSFIGN_P: + return "p"; + case V3D_QPU_MSFIGN_Q: + return "q"; + default: + unreachable("bad branch cond value"); + } +} + +const char * +v3d_qpu_pf_name(enum v3d_qpu_pf pf) +{ + switch (pf) { + case V3D_QPU_PF_NONE: + return ""; + case V3D_QPU_PF_PUSHZ: + return ".pushz"; + case V3D_QPU_PF_PUSHN: + return ".pushn"; + case V3D_QPU_PF_PUSHC: + return ".pushc"; + default: + unreachable("bad pf value"); + } +} + +const char * +v3d_qpu_uf_name(enum v3d_qpu_uf uf) +{ + switch (uf) { + case V3D_QPU_UF_NONE: + return ""; + case V3D_QPU_UF_ANDZ: + return ".andz"; + case V3D_QPU_UF_ANDNZ: + return ".andnz"; + case V3D_QPU_UF_NORZ: + return ".norz"; + case V3D_QPU_UF_NORNZ: + return ".nornz"; + case V3D_QPU_UF_ANDN: + return ".andn"; + case V3D_QPU_UF_ANDNN: + return ".andnn"; + case V3D_QPU_UF_NORN: + return ".norn"; + case V3D_QPU_UF_NORNN: + return ".nornn"; + case V3D_QPU_UF_ANDC: + return ".andc"; + case V3D_QPU_UF_ANDNC: + return ".andnc"; + case V3D_QPU_UF_NORC: + return ".norc"; + case V3D_QPU_UF_NORNC: + return ".nornc"; + default: + unreachable("bad pf value"); + } +} + +const char * +v3d_qpu_pack_name(enum v3d_qpu_output_pack pack) +{ + switch (pack) { + case V3D_QPU_PACK_NONE: + return ""; + case V3D_QPU_PACK_L: + return ".l"; + case V3D_QPU_PACK_H: + return ".h"; + default: + unreachable("bad pack value"); + } +} + +const char * +v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack) +{ + switch (unpack) { + case V3D_QPU_UNPACK_NONE: + return ""; + case V3D_QPU_UNPACK_L: + return ".l"; + case V3D_QPU_UNPACK_H: + return ".h"; + case V3D_QPU_UNPACK_ABS: + return ".abs"; + case V3D_QPU_UNPACK_REPLICATE_32F_16: + return ".ff"; + case V3D_QPU_UNPACK_REPLICATE_L_16: + return ".ll"; + case V3D_QPU_UNPACK_REPLICATE_H_16: + return ".hh"; + case V3D_QPU_UNPACK_SWAP_16: + return ".swp"; + default: + unreachable("bad unpack value"); + } +} + +#define D 1 +#define A 2 +#define B 4 +static const uint8_t add_op_args[] = { + [V3D_QPU_A_FADD] = D | A | B, + [V3D_QPU_A_FADDNF] = D | A | B, + [V3D_QPU_A_VFPACK] = D | A | B, + [V3D_QPU_A_ADD] = D | A | B, + [V3D_QPU_A_VFPACK] = D | A | B, + [V3D_QPU_A_SUB] = D | A | B, + [V3D_QPU_A_VFPACK] = D | A | B, + [V3D_QPU_A_FSUB] = D | A | B, + [V3D_QPU_A_MIN] = D | A | B, + [V3D_QPU_A_MAX] = D | A | B, + [V3D_QPU_A_UMIN] = D | A | B, + [V3D_QPU_A_UMAX] = D | A | B, + [V3D_QPU_A_SHL] = D | A | B, + [V3D_QPU_A_SHR] = D | A | B, + [V3D_QPU_A_ASR] = D | A | B, + [V3D_QPU_A_ROR] = D | A | B, + [V3D_QPU_A_FMIN] = D | A | B, + [V3D_QPU_A_FMAX] = D | A | B, + [V3D_QPU_A_VFMIN] = D | A | B, + + [V3D_QPU_A_AND] = D | A | B, + [V3D_QPU_A_OR] = D | A | B, + [V3D_QPU_A_XOR] = D | A | B, + + [V3D_QPU_A_VADD] = D | A | B, + [V3D_QPU_A_VSUB] = D | A | B, + [V3D_QPU_A_NOT] = D | A, + [V3D_QPU_A_NEG] = D | A, + [V3D_QPU_A_FLAPUSH] = D | A, + [V3D_QPU_A_FLBPUSH] = D | A, + [V3D_QPU_A_FLBPOP] = D | A, + [V3D_QPU_A_SETMSF] = D | A, + [V3D_QPU_A_SETREVF] = D | A, + [V3D_QPU_A_NOP] = 0, + [V3D_QPU_A_TIDX] = D, + [V3D_QPU_A_EIDX] = D, + [V3D_QPU_A_LR] = D, + [V3D_QPU_A_VFLA] = D, + [V3D_QPU_A_VFLNA] = D, + [V3D_QPU_A_VFLB] = D, + [V3D_QPU_A_VFLNB] = D, + + [V3D_QPU_A_FXCD] = D, + [V3D_QPU_A_XCD] = D, + [V3D_QPU_A_FYCD] = D, + [V3D_QPU_A_YCD] = D, + + [V3D_QPU_A_MSF] = D, + [V3D_QPU_A_REVF] = D, + [V3D_QPU_A_VDWWT] = D, + [V3D_QPU_A_IID] = D, + [V3D_QPU_A_SAMPID] = D, + [V3D_QPU_A_PATCHID] = D, + [V3D_QPU_A_TMUWT] = D, + [V3D_QPU_A_VPMWT] = D, + + [V3D_QPU_A_VPMSETUP] = D | A, + + [V3D_QPU_A_LDVPMV] = D | A, + [V3D_QPU_A_LDVPMD] = D | A, + [V3D_QPU_A_LDVPMP] = D | A, + [V3D_QPU_A_LDVPMG] = D | A | B, + + /* FIXME: MOVABSNEG */ + + [V3D_QPU_A_FCMP] = D | A | B, + [V3D_QPU_A_VFMAX] = D | A | B, + + [V3D_QPU_A_FROUND] = D | A, + [V3D_QPU_A_FTOIN] = D | A, + [V3D_QPU_A_FTRUNC] = D | A, + [V3D_QPU_A_FTOIZ] = D | A, + [V3D_QPU_A_FFLOOR] = D | A, + [V3D_QPU_A_FTOUZ] = D | A, + [V3D_QPU_A_FCEIL] = D | A, + [V3D_QPU_A_FTOC] = D | A, + + [V3D_QPU_A_FDX] = D | A, + [V3D_QPU_A_FDY] = D | A, + + [V3D_QPU_A_STVPMV] = A | B, + [V3D_QPU_A_STVPMD] = A | B, + [V3D_QPU_A_STVPMP] = A | B, + + [V3D_QPU_A_ITOF] = D | A, + [V3D_QPU_A_CLZ] = D | A, + [V3D_QPU_A_UTOF] = D | A, +}; + +static const uint8_t mul_op_args[] = { + [V3D_QPU_M_ADD] = D | A | B, + [V3D_QPU_M_SUB] = D | A | B, + [V3D_QPU_M_UMUL24] = D | A | B, + [V3D_QPU_M_VFMUL] = D | A | B, + [V3D_QPU_M_SMUL24] = D | A | B, + [V3D_QPU_M_MULTOP] = D | A | B, + [V3D_QPU_M_FMOV] = D | A, + [V3D_QPU_M_NOP] = 0, + [V3D_QPU_M_MOV] = D | A, + [V3D_QPU_M_FMUL] = D | A | B, +}; + +bool +v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op) +{ + assert(op < ARRAY_SIZE(add_op_args)); + + return add_op_args[op] & D; +} + +bool +v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op) +{ + assert(op < ARRAY_SIZE(mul_op_args)); + + return mul_op_args[op] & D; +} + +int +v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op) +{ + assert(op < ARRAY_SIZE(add_op_args)); + + uint8_t args = add_op_args[op]; + if (args & B) + return 2; + else if (args & A) + return 1; + else + return 0; +} + +int +v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op) +{ + assert(op < ARRAY_SIZE(mul_op_args)); + + uint8_t args = mul_op_args[op]; + if (args & B) + return 2; + else if (args & A) + return 1; + else + return 0; +} + +bool +v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr) +{ + switch (waddr) { + case V3D_QPU_WADDR_RECIP: + case V3D_QPU_WADDR_RSQRT: + case V3D_QPU_WADDR_EXP: + case V3D_QPU_WADDR_LOG: + case V3D_QPU_WADDR_SIN: + case V3D_QPU_WADDR_RSQRT2: + return true; + default: + return false; + } +} + +bool +v3d_qpu_magic_waddr_is_tmu(enum v3d_qpu_waddr waddr) +{ + switch (waddr) { + case V3D_QPU_WADDR_TMU: + case V3D_QPU_WADDR_TMUL: + case V3D_QPU_WADDR_TMUD: + case V3D_QPU_WADDR_TMUA: + case V3D_QPU_WADDR_TMUAU: + return true; + default: + return false; + } +} + +bool +v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr) +{ + return (waddr == V3D_QPU_WADDR_TLB || + waddr == V3D_QPU_WADDR_TLBU); +} + +bool +v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr) +{ + return (waddr == V3D_QPU_WADDR_VPM || + waddr == V3D_QPU_WADDR_VPMU); +} + +bool +v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) +{ + return (waddr == V3D_QPU_WADDR_SYNC || + waddr == V3D_QPU_WADDR_SYNCU); +} + +bool +v3d_qpu_writes_r3(const struct v3d_qpu_instr *inst) +{ + return inst->sig.ldvary || inst->sig.ldvpm; +} + +bool +v3d_qpu_writes_r4(const struct v3d_qpu_instr *inst) +{ + if (inst->sig.ldtmu) + return true; + + if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { + if (inst->alu.add.magic_write && + v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr)) { + return true; + } + + if (inst->alu.mul.magic_write && + v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr)) { + return true; + } + } + + return false; +} + +bool +v3d_qpu_writes_r5(const struct v3d_qpu_instr *inst) +{ + return inst->sig.ldvary || inst->sig.ldunif; +} + +bool +v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux) +{ + int add_nsrc = v3d_qpu_add_op_num_src(inst->alu.add.op); + int mul_nsrc = v3d_qpu_mul_op_num_src(inst->alu.mul.op); + + return ((add_nsrc > 0 && inst->alu.add.a == mux) || + (add_nsrc > 1 && inst->alu.add.b == mux) || + (mul_nsrc > 0 && inst->alu.mul.a == mux) || + (mul_nsrc > 1 && inst->alu.mul.b == mux)); +} diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h new file mode 100644 index 00000000000..a425fae8b25 --- /dev/null +++ b/src/broadcom/qpu/qpu_instr.h @@ -0,0 +1,411 @@ +/* + * Copyright © 2016 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * @file qpu_instr.h + * + * Definitions of the unpacked form of QPU instructions. Assembly and + * disassembly will use this for talking about instructions, with qpu_encode.c + * and qpu_decode.c handling the pack and unpack of the actual 64-bit QPU + * instruction. + */ + +#ifndef QPU_INSTR_H +#define QPU_INSTR_H + +#include +#include +#include "util/macros.h" + +struct v3d_device_info; + +struct v3d_qpu_sig { + bool thrsw:1; + bool ldunif:1; + bool ldtmu:1; + bool ldvary:1; + bool ldvpm:1; + bool ldtlb:1; + bool ldtlbu:1; + bool small_imm:1; + bool ucb:1; + bool rotate:1; + bool wrtmuc:1; +}; + +enum v3d_qpu_cond { + V3D_QPU_COND_NONE, + V3D_QPU_COND_IFA, + V3D_QPU_COND_IFB, + V3D_QPU_COND_IFNA, + V3D_QPU_COND_IFNB, +}; + +enum v3d_qpu_pf { + V3D_QPU_PF_NONE, + V3D_QPU_PF_PUSHZ, + V3D_QPU_PF_PUSHN, + V3D_QPU_PF_PUSHC, +}; + +enum v3d_qpu_uf { + V3D_QPU_UF_NONE, + V3D_QPU_UF_ANDZ, + V3D_QPU_UF_ANDNZ, + V3D_QPU_UF_NORNZ, + V3D_QPU_UF_NORZ, + V3D_QPU_UF_ANDN, + V3D_QPU_UF_ANDNN, + V3D_QPU_UF_NORNN, + V3D_QPU_UF_NORN, + V3D_QPU_UF_ANDC, + V3D_QPU_UF_ANDNC, + V3D_QPU_UF_NORNC, + V3D_QPU_UF_NORC, +}; + +enum v3d_qpu_waddr { + V3D_QPU_WADDR_R0 = 0, + V3D_QPU_WADDR_R1 = 1, + V3D_QPU_WADDR_R2 = 2, + V3D_QPU_WADDR_R3 = 3, + V3D_QPU_WADDR_R4 = 4, + V3D_QPU_WADDR_R5 = 5, + /* 6 is reserved, but note 3.2.2.8: "Result Writes" */ + V3D_QPU_WADDR_NOP = 6, + V3D_QPU_WADDR_TLB = 7, + V3D_QPU_WADDR_TLBU = 8, + V3D_QPU_WADDR_TMU = 9, + V3D_QPU_WADDR_TMUL = 10, + V3D_QPU_WADDR_TMUD = 11, + V3D_QPU_WADDR_TMUA = 12, + V3D_QPU_WADDR_TMUAU = 13, + V3D_QPU_WADDR_VPM = 14, + V3D_QPU_WADDR_VPMU = 15, + V3D_QPU_WADDR_SYNC = 16, + V3D_QPU_WADDR_SYNCU = 17, + /* reserved */ + V3D_QPU_WADDR_RECIP = 19, + V3D_QPU_WADDR_RSQRT = 20, + V3D_QPU_WADDR_EXP = 21, + V3D_QPU_WADDR_LOG = 22, + V3D_QPU_WADDR_SIN = 23, + V3D_QPU_WADDR_RSQRT2 = 24, +}; + +struct v3d_qpu_flags { + enum v3d_qpu_cond ac, mc; + enum v3d_qpu_pf apf, mpf; + enum v3d_qpu_uf auf, muf; +}; + +enum v3d_qpu_add_op { + V3D_QPU_A_FADD, + V3D_QPU_A_FADDNF, + V3D_QPU_A_VFPACK, + V3D_QPU_A_ADD, + V3D_QPU_A_SUB, + V3D_QPU_A_FSUB, + V3D_QPU_A_MIN, + V3D_QPU_A_MAX, + V3D_QPU_A_UMIN, + V3D_QPU_A_UMAX, + V3D_QPU_A_SHL, + V3D_QPU_A_SHR, + V3D_QPU_A_ASR, + V3D_QPU_A_ROR, + V3D_QPU_A_FMIN, + V3D_QPU_A_FMAX, + V3D_QPU_A_VFMIN, + V3D_QPU_A_AND, + V3D_QPU_A_OR, + V3D_QPU_A_XOR, + V3D_QPU_A_VADD, + V3D_QPU_A_VSUB, + V3D_QPU_A_NOT, + V3D_QPU_A_NEG, + V3D_QPU_A_FLAPUSH, + V3D_QPU_A_FLBPUSH, + V3D_QPU_A_FLBPOP, + V3D_QPU_A_SETMSF, + V3D_QPU_A_SETREVF, + V3D_QPU_A_NOP, + V3D_QPU_A_TIDX, + V3D_QPU_A_EIDX, + V3D_QPU_A_LR, + V3D_QPU_A_VFLA, + V3D_QPU_A_VFLNA, + V3D_QPU_A_VFLB, + V3D_QPU_A_VFLNB, + V3D_QPU_A_FXCD, + V3D_QPU_A_XCD, + V3D_QPU_A_FYCD, + V3D_QPU_A_YCD, + V3D_QPU_A_MSF, + V3D_QPU_A_REVF, + V3D_QPU_A_VDWWT, + V3D_QPU_A_IID, + V3D_QPU_A_SAMPID, + V3D_QPU_A_PATCHID, + V3D_QPU_A_TMUWT, + V3D_QPU_A_VPMSETUP, + V3D_QPU_A_VPMWT, + V3D_QPU_A_LDVPMV, + V3D_QPU_A_LDVPMD, + V3D_QPU_A_LDVPMP, + V3D_QPU_A_LDVPMG, + V3D_QPU_A_FCMP, + V3D_QPU_A_VFMAX, + V3D_QPU_A_FROUND, + V3D_QPU_A_FTOIN, + V3D_QPU_A_FTRUNC, + V3D_QPU_A_FTOIZ, + V3D_QPU_A_FFLOOR, + V3D_QPU_A_FTOUZ, + V3D_QPU_A_FCEIL, + V3D_QPU_A_FTOC, + V3D_QPU_A_FDX, + V3D_QPU_A_FDY, + V3D_QPU_A_STVPMV, + V3D_QPU_A_STVPMD, + V3D_QPU_A_STVPMP, + V3D_QPU_A_ITOF, + V3D_QPU_A_CLZ, + V3D_QPU_A_UTOF, +}; + +enum v3d_qpu_mul_op { + V3D_QPU_M_ADD, + V3D_QPU_M_SUB, + V3D_QPU_M_UMUL24, + V3D_QPU_M_VFMUL, + V3D_QPU_M_SMUL24, + V3D_QPU_M_MULTOP, + V3D_QPU_M_FMOV, + V3D_QPU_M_MOV, + V3D_QPU_M_NOP, + V3D_QPU_M_FMUL, +}; + +enum v3d_qpu_output_pack { + V3D_QPU_PACK_NONE, + /** + * Convert to 16-bit float, put in low 16 bits of destination leaving + * high unmodified. + */ + V3D_QPU_PACK_L, + /** + * Convert to 16-bit float, put in high 16 bits of destination leaving + * low unmodified. + */ + V3D_QPU_PACK_H, +}; + +enum v3d_qpu_input_unpack { + /** + * No-op input unpacking. Note that this enum's value doesn't match + * the packed QPU instruction value of the field (we use 0 so that the + * default on new instruction creation is no-op). + */ + V3D_QPU_UNPACK_NONE, + /** Absolute value. Only available for some operations. */ + V3D_QPU_UNPACK_ABS, + /** Convert low 16 bits from 16-bit float to 32-bit float. */ + V3D_QPU_UNPACK_L, + /** Convert high 16 bits from 16-bit float to 32-bit float. */ + V3D_QPU_UNPACK_H, + + /** Convert to 16f and replicate it to the high bits. */ + V3D_QPU_UNPACK_REPLICATE_32F_16, + + /** Replicate low 16 bits to high */ + V3D_QPU_UNPACK_REPLICATE_L_16, + + /** Replicate high 16 bits to low */ + V3D_QPU_UNPACK_REPLICATE_H_16, + + /** Swap high and low 16 bits */ + V3D_QPU_UNPACK_SWAP_16, +}; + +enum v3d_qpu_mux { + V3D_QPU_MUX_R0, + V3D_QPU_MUX_R1, + V3D_QPU_MUX_R2, + V3D_QPU_MUX_R3, + V3D_QPU_MUX_R4, + V3D_QPU_MUX_R5, + V3D_QPU_MUX_A, + V3D_QPU_MUX_B, +}; + +struct v3d_qpu_alu_instr { + struct { + enum v3d_qpu_add_op op; + enum v3d_qpu_mux a, b; + uint8_t waddr; + bool magic_write; + enum v3d_qpu_output_pack output_pack; + enum v3d_qpu_input_unpack a_unpack; + enum v3d_qpu_input_unpack b_unpack; + } add; + + struct { + enum v3d_qpu_mul_op op; + enum v3d_qpu_mux a, b; + uint8_t waddr; + bool magic_write; + enum v3d_qpu_output_pack output_pack; + enum v3d_qpu_input_unpack a_unpack; + enum v3d_qpu_input_unpack b_unpack; + } mul; +}; + +enum v3d_qpu_branch_cond { + V3D_QPU_BRANCH_COND_ALWAYS, + V3D_QPU_BRANCH_COND_A0, + V3D_QPU_BRANCH_COND_NA0, + V3D_QPU_BRANCH_COND_ALLA, + V3D_QPU_BRANCH_COND_ANYNA, + V3D_QPU_BRANCH_COND_ANYA, + V3D_QPU_BRANCH_COND_ALLNA, +}; + +enum v3d_qpu_msfign { + /** Ignore multisample flags when determining branch condition. */ + V3D_QPU_MSFIGN_NONE, + /** + * If no multisample flags are set in the lane (a pixel in the FS, a + * vertex in the VS), ignore the lane's condition when computing the + * branch condition. + */ + V3D_QPU_MSFIGN_P, + /** + * If no multisample flags are set in a 2x2 quad in the FS, ignore the + * quad's a/b conditions. + */ + V3D_QPU_MSFIGN_Q, +}; + +enum v3d_qpu_branch_dest { + V3D_QPU_BRANCH_DEST_ABS, + V3D_QPU_BRANCH_DEST_REL, + V3D_QPU_BRANCH_DEST_LINK_REG, + V3D_QPU_BRANCH_DEST_REGFILE, +}; + +struct v3d_qpu_branch_instr { + enum v3d_qpu_branch_cond cond; + enum v3d_qpu_msfign msfign; + + /** Selects how to compute the new IP if the branch is taken. */ + enum v3d_qpu_branch_dest bdi; + + /** + * Selects how to compute the new uniforms pointer if the branch is + * taken. (ABS/REL implicitly load a uniform and use that) + */ + enum v3d_qpu_branch_dest bdu; + + /** + * If set, then udest determines how the uniform stream will branch, + * otherwise the uniform stream is left as is. + */ + bool ub; + + uint8_t raddr_a; + + uint32_t offset; +}; + +enum v3d_qpu_instr_type { + V3D_QPU_INSTR_TYPE_ALU, + V3D_QPU_INSTR_TYPE_BRANCH, +}; + +struct v3d_qpu_instr { + enum v3d_qpu_instr_type type; + + struct v3d_qpu_sig sig; + uint8_t raddr_a; + uint8_t raddr_b; + struct v3d_qpu_flags flags; + + union { + struct v3d_qpu_alu_instr alu; + struct v3d_qpu_branch_instr branch; + }; +}; + +const char *v3d_qpu_magic_waddr_name(enum v3d_qpu_waddr waddr); +const char *v3d_qpu_add_op_name(enum v3d_qpu_add_op op); +const char *v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op); +const char *v3d_qpu_cond_name(enum v3d_qpu_cond cond); +const char *v3d_qpu_pf_name(enum v3d_qpu_pf pf); +const char *v3d_qpu_uf_name(enum v3d_qpu_uf uf); +const char *v3d_qpu_pack_name(enum v3d_qpu_output_pack pack); +const char *v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack); +const char *v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond); +const char *v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign); + +bool v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op); +bool v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op); +int v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op); +int v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op); + +bool v3d_qpu_sig_pack(const struct v3d_device_info *devinfo, + const struct v3d_qpu_sig *sig, + uint32_t *packed_sig); +bool v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo, + uint32_t packed_sig, + struct v3d_qpu_sig *sig); + +bool +v3d_qpu_flags_pack(const struct v3d_device_info *devinfo, + const struct v3d_qpu_flags *cond, + uint32_t *packed_cond); +bool +v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo, + uint32_t packed_cond, + struct v3d_qpu_flags *cond); + +bool +v3d_qpu_instr_pack(const struct v3d_device_info *devinfo, + const struct v3d_qpu_instr *instr, + uint64_t *packed_instr); +bool +v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo, + uint64_t packed_instr, + struct v3d_qpu_instr *instr); + +bool v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; +bool v3d_qpu_magic_waddr_is_tmu(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; +bool v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; +bool v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; +bool v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; +bool v3d_qpu_writes_r3(const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; +bool v3d_qpu_writes_r4(const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; +bool v3d_qpu_writes_r5(const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; +bool v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux); + +#endif diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c new file mode 100644 index 00000000000..0ecce86662a --- /dev/null +++ b/src/broadcom/qpu/qpu_pack.c @@ -0,0 +1,1206 @@ +/* + * Copyright © 2016 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include "util/macros.h" + +#include "broadcom/common/v3d_device_info.h" +#include "qpu_instr.h" + +#ifndef QPU_MASK +#define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low)) +/* Using the GNU statement expression extension */ +#define QPU_SET_FIELD(value, field) \ + ({ \ + uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \ + assert((fieldval & ~ field ## _MASK) == 0); \ + fieldval & field ## _MASK; \ + }) + +#define QPU_GET_FIELD(word, field) ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT)) + +#define QPU_UPDATE_FIELD(inst, value, field) \ + (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field)) +#endif /* QPU_MASK */ + +#define VC5_QPU_OP_MUL_SHIFT 58 +#define VC5_QPU_OP_MUL_MASK QPU_MASK(63, 58) + +#define VC5_QPU_SIG_SHIFT 53 +#define VC5_QPU_SIG_MASK QPU_MASK(57, 53) +# define VC5_QPU_SIG_THRSW_BIT 0x1 +# define VC5_QPU_SIG_LDUNIF_BIT 0x2 +# define VC5_QPU_SIG_LDTMU_BIT 0x4 +# define VC5_QPU_SIG_LDVARY_BIT 0x8 + +#define VC5_QPU_COND_SHIFT 46 +#define VC5_QPU_COND_MASK QPU_MASK(52, 46) + +#define VC5_QPU_COND_IFA 0 +#define VC5_QPU_COND_IFB 1 +#define VC5_QPU_COND_IFNA 2 +#define VC5_QPU_COND_IFNB 3 + +#define VC5_QPU_MM QPU_MASK(45, 45) +#define VC5_QPU_MA QPU_MASK(44, 44) + +#define V3D_QPU_WADDR_M_SHIFT 38 +#define V3D_QPU_WADDR_M_MASK QPU_MASK(43, 38) + +#define VC5_QPU_BRANCH_ADDR_LOW_SHIFT 35 +#define VC5_QPU_BRANCH_ADDR_LOW_MASK QPU_MASK(55, 35) + +#define V3D_QPU_WADDR_A_SHIFT 32 +#define V3D_QPU_WADDR_A_MASK QPU_MASK(37, 32) + +#define VC5_QPU_BRANCH_COND_SHIFT 32 +#define VC5_QPU_BRANCH_COND_MASK QPU_MASK(34, 32) + +#define VC5_QPU_BRANCH_ADDR_HIGH_SHIFT 24 +#define VC5_QPU_BRANCH_ADDR_HIGH_MASK QPU_MASK(31, 24) + +#define VC5_QPU_OP_ADD_SHIFT 24 +#define VC5_QPU_OP_ADD_MASK QPU_MASK(31, 24) + +#define VC5_QPU_MUL_B_SHIFT 21 +#define VC5_QPU_MUL_B_MASK QPU_MASK(23, 21) + +#define VC5_QPU_BRANCH_MSFIGN_SHIFT 21 +#define VC5_QPU_BRANCH_MSFIGN_MASK QPU_MASK(22, 21) + +#define VC5_QPU_MUL_A_SHIFT 18 +#define VC5_QPU_MUL_A_MASK QPU_MASK(20, 18) + +#define VC5_QPU_ADD_B_SHIFT 15 +#define VC5_QPU_ADD_B_MASK QPU_MASK(17, 15) + +#define VC5_QPU_BRANCH_BDU_SHIFT 15 +#define VC5_QPU_BRANCH_BDU_MASK QPU_MASK(17, 15) + +#define VC5_QPU_BRANCH_UB QPU_MASK(14, 14) + +#define VC5_QPU_ADD_A_SHIFT 12 +#define VC5_QPU_ADD_A_MASK QPU_MASK(14, 12) + +#define VC5_QPU_BRANCH_BDI_SHIFT 12 +#define VC5_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12) + +#define VC5_QPU_RADDR_A_SHIFT 6 +#define VC5_QPU_RADDR_A_MASK QPU_MASK(11, 6) + +#define VC5_QPU_RADDR_B_SHIFT 0 +#define VC5_QPU_RADDR_B_MASK QPU_MASK(5, 0) + +#define THRSW .thrsw = true +#define LDUNIF .ldunif = true +#define LDTMU .ldtmu = true +#define LDVARY .ldvary = true +#define LDVPM .ldvpm = true +#define SMIMM .small_imm = true +#define LDTLB .ldtlb = true +#define LDTLBU .ldtlbu = true +#define UCB .ucb = true +#define ROT .rotate = true +#define WRTMUC .wrtmuc = true + +static const struct v3d_qpu_sig v33_sig_map[] = { + /* MISC R3 R4 R5 */ + [0] = { }, + [1] = { THRSW, }, + [2] = { LDUNIF }, + [3] = { THRSW, LDUNIF }, + [4] = { LDTMU, }, + [5] = { THRSW, LDTMU, }, + [6] = { LDTMU, LDUNIF }, + [7] = { THRSW, LDTMU, LDUNIF }, + [8] = { LDVARY, }, + [9] = { THRSW, LDVARY, }, + [10] = { LDVARY, LDUNIF }, + [11] = { THRSW, LDVARY, LDUNIF }, + [12] = { LDVARY, LDTMU, }, + [13] = { THRSW, LDVARY, LDTMU, }, + [14] = { SMIMM, LDVARY, }, + [15] = { SMIMM, }, + [16] = { LDTLB, }, + [17] = { LDTLBU, }, + /* 18-21 reserved */ + [22] = { UCB, }, + [23] = { ROT, }, + [24] = { LDVPM, }, + [25] = { THRSW, LDVPM, }, + [26] = { LDVPM, LDUNIF }, + [27] = { THRSW, LDVPM, LDUNIF }, + [28] = { LDVPM, LDTMU, }, + [29] = { THRSW, LDVPM, LDTMU, }, + [30] = { SMIMM, LDVPM, }, + [31] = { SMIMM, }, +}; + +bool +v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo, + uint32_t packed_sig, + struct v3d_qpu_sig *sig) +{ + if (packed_sig >= ARRAY_SIZE(v33_sig_map)) + return false; + + *sig = v33_sig_map[packed_sig]; + + /* Signals with zeroed unpacked contents after element 0 are reserved. */ + return (packed_sig == 0 || + memcmp(sig, &v33_sig_map[0], sizeof(*sig) != 0)); +} + +bool +v3d_qpu_sig_pack(const struct v3d_device_info *devinfo, + const struct v3d_qpu_sig *sig, + uint32_t *packed_sig) +{ + static const struct v3d_qpu_sig *map; + + map = v33_sig_map; + + for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) { + if (memcmp(&map[i], sig, sizeof(*sig)) == 0) { + *packed_sig = i; + return true; + } + } + + return false; +} + +bool +v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo, + uint32_t packed_cond, + struct v3d_qpu_flags *cond) +{ + static const enum v3d_qpu_cond cond_map[4] = { + [0] = V3D_QPU_COND_IFA, + [1] = V3D_QPU_COND_IFB, + [2] = V3D_QPU_COND_IFNA, + [3] = V3D_QPU_COND_IFNB, + }; + + cond->ac = V3D_QPU_COND_NONE; + cond->mc = V3D_QPU_COND_NONE; + cond->apf = V3D_QPU_PF_NONE; + cond->mpf = V3D_QPU_PF_NONE; + cond->auf = V3D_QPU_UF_NONE; + cond->muf = V3D_QPU_UF_NONE; + + if (packed_cond == 0) { + return true; + } else if (packed_cond >> 2 == 0) { + cond->apf = packed_cond & 0x3; + } else if (packed_cond >> 4 == 0) { + cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ; + } else if (packed_cond == 0x10) { + return false; + } else if (packed_cond >> 2 == 0x4) { + cond->mpf = packed_cond & 0x3; + } else if (packed_cond >> 4 == 0x1) { + cond->muf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ; + } else if (packed_cond >> 4 == 0x2) { + cond->ac = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA; + cond->mpf = packed_cond & 0x3; + } else if (packed_cond >> 4 == 0x3) { + cond->mc = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA; + cond->apf = packed_cond & 0x3; + } else if (packed_cond >> 6) { + cond->mc = cond_map[(packed_cond >> 4) & 0x3]; + if (((packed_cond >> 2) & 0x3) == 0) { + cond->ac = cond_map[packed_cond & 0x3]; + } else { + cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ; + } + } + + return true; +} + +bool +v3d_qpu_flags_pack(const struct v3d_device_info *devinfo, + const struct v3d_qpu_flags *cond, + uint32_t *packed_cond) +{ +#define AC (1 << 0) +#define MC (1 << 1) +#define APF (1 << 2) +#define MPF (1 << 3) +#define AUF (1 << 4) +#define MUF (1 << 5) + static const struct { + uint8_t flags_present; + uint8_t bits; + } flags_table[] = { + { 0, 0 }, + { APF, 0 }, + { AUF, 0 }, + { MPF, (1 << 4) }, + { MUF, (1 << 4) }, + { AC, (1 << 5) }, + { AC | MPF, (1 << 5) }, + { MC, (1 << 5) | (1 << 4) }, + { MC | APF, (1 << 5) | (1 << 4) }, + { MC | AC, (1 << 6) }, + { MC | AUF, (1 << 6) }, + }; + + uint8_t flags_present = 0; + if (cond->ac != V3D_QPU_COND_NONE) + flags_present |= AC; + if (cond->mc != V3D_QPU_COND_NONE) + flags_present |= MC; + if (cond->apf != V3D_QPU_PF_NONE) + flags_present |= APF; + if (cond->mpf != V3D_QPU_PF_NONE) + flags_present |= MPF; + if (cond->auf != V3D_QPU_UF_NONE) + flags_present |= AUF; + if (cond->muf != V3D_QPU_UF_NONE) + flags_present |= MUF; + + for (int i = 0; i < ARRAY_SIZE(flags_table); i++) { + if (flags_table[i].flags_present != flags_present) + continue; + + *packed_cond = flags_table[i].bits; + + *packed_cond |= cond->apf; + *packed_cond |= cond->mpf; + + if (flags_present & AUF) + *packed_cond |= cond->auf - V3D_QPU_UF_ANDZ + 4; + if (flags_present & MUF) + *packed_cond |= cond->muf - V3D_QPU_UF_ANDZ + 4; + + if (flags_present & AC) + *packed_cond |= (cond->ac - V3D_QPU_COND_IFA) << 2; + + if (flags_present & MC) { + if (*packed_cond & (1 << 6)) + *packed_cond |= (cond->mc - + V3D_QPU_COND_IFA) << 4; + else + *packed_cond |= (cond->mc - + V3D_QPU_COND_IFA) << 2; + } + + return true; + } + + return false; +} + +/* Make a mapping of the table of opcodes in the spec. The opcode is + * determined by a combination of the opcode field, and in the case of 0 or + * 1-arg opcodes, the mux_b field as well. + */ +#define MUX_MASK(bot, top) (((1 << (top + 1)) - 1) - ((1 << (bot)) - 1)) +#define ANYMUX MUX_MASK(0, 7) + +struct opcode_desc { + uint8_t opcode_first; + uint8_t opcode_last; + uint8_t mux_b_mask; + uint8_t mux_a_mask; + uint8_t op; + /* 0 if it's the same across V3D versions, or a specific V3D version. */ + uint8_t ver; +}; + +static const struct opcode_desc add_ops[] = { + /* FADD is FADDNF depending on the order of the mux_a/mux_b. */ + { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADD }, + { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADDNF }, + { 53, 55, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK }, + { 56, 56, ANYMUX, ANYMUX, V3D_QPU_A_ADD }, + { 57, 59, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK }, + { 60, 60, ANYMUX, ANYMUX, V3D_QPU_A_SUB }, + { 61, 63, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK }, + { 64, 111, ANYMUX, ANYMUX, V3D_QPU_A_FSUB }, + { 120, 120, ANYMUX, ANYMUX, V3D_QPU_A_MIN }, + { 121, 121, ANYMUX, ANYMUX, V3D_QPU_A_MAX }, + { 122, 122, ANYMUX, ANYMUX, V3D_QPU_A_UMIN }, + { 123, 123, ANYMUX, ANYMUX, V3D_QPU_A_UMAX }, + { 124, 124, ANYMUX, ANYMUX, V3D_QPU_A_SHL }, + { 125, 125, ANYMUX, ANYMUX, V3D_QPU_A_SHR }, + { 126, 126, ANYMUX, ANYMUX, V3D_QPU_A_ASR }, + { 127, 127, ANYMUX, ANYMUX, V3D_QPU_A_ROR }, + /* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */ + { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMIN }, + { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMAX }, + { 176, 180, ANYMUX, ANYMUX, V3D_QPU_A_VFMIN }, + + { 181, 181, ANYMUX, ANYMUX, V3D_QPU_A_AND }, + { 182, 182, ANYMUX, ANYMUX, V3D_QPU_A_OR }, + { 183, 183, ANYMUX, ANYMUX, V3D_QPU_A_XOR }, + + { 184, 184, ANYMUX, ANYMUX, V3D_QPU_A_VADD }, + { 185, 185, ANYMUX, ANYMUX, V3D_QPU_A_VSUB }, + { 186, 186, 1 << 0, ANYMUX, V3D_QPU_A_NOT }, + { 186, 186, 1 << 1, ANYMUX, V3D_QPU_A_NEG }, + { 186, 186, 1 << 2, ANYMUX, V3D_QPU_A_FLAPUSH }, + { 186, 186, 1 << 3, ANYMUX, V3D_QPU_A_FLBPUSH }, + { 186, 186, 1 << 4, ANYMUX, V3D_QPU_A_FLBPOP }, + { 186, 186, 1 << 6, ANYMUX, V3D_QPU_A_SETMSF }, + { 186, 186, 1 << 7, ANYMUX, V3D_QPU_A_SETREVF }, + { 187, 187, 1 << 0, 1 << 0, V3D_QPU_A_NOP, 0 }, + { 187, 187, 1 << 0, 1 << 1, V3D_QPU_A_TIDX }, + { 187, 187, 1 << 0, 1 << 2, V3D_QPU_A_EIDX }, + { 187, 187, 1 << 0, 1 << 3, V3D_QPU_A_LR }, + { 187, 187, 1 << 0, 1 << 4, V3D_QPU_A_VFLA }, + { 187, 187, 1 << 0, 1 << 5, V3D_QPU_A_VFLNA }, + { 187, 187, 1 << 0, 1 << 6, V3D_QPU_A_VFLB }, + { 187, 187, 1 << 0, 1 << 7, V3D_QPU_A_VFLNB }, + + { 187, 187, 1 << 1, MUX_MASK(0, 2), V3D_QPU_A_FXCD }, + { 187, 187, 1 << 1, 1 << 3, V3D_QPU_A_XCD }, + { 187, 187, 1 << 1, MUX_MASK(4, 6), V3D_QPU_A_FYCD }, + { 187, 187, 1 << 1, 1 << 7, V3D_QPU_A_YCD }, + + { 187, 187, 1 << 2, 1 << 0, V3D_QPU_A_MSF }, + { 187, 187, 1 << 2, 1 << 1, V3D_QPU_A_REVF }, + { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_VDWWT }, + { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT }, + { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT }, + + { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP }, + + /* FIXME: MORE COMPLICATED */ + /* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */ + + { 192, 239, ANYMUX, ANYMUX, V3D_QPU_A_FCMP }, + { 240, 244, ANYMUX, ANYMUX, V3D_QPU_A_VFMAX }, + + { 245, 245, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FROUND }, + { 245, 245, 1 << 3, ANYMUX, V3D_QPU_A_FTOIN }, + { 245, 245, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FTRUNC }, + { 245, 245, 1 << 7, ANYMUX, V3D_QPU_A_FTOIZ }, + { 246, 246, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FFLOOR }, + { 246, 246, 1 << 3, ANYMUX, V3D_QPU_A_FTOUZ }, + { 246, 246, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FCEIL }, + { 246, 246, 1 << 7, ANYMUX, V3D_QPU_A_FTOC }, + + { 247, 247, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FDX }, + { 247, 247, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FDY }, + + /* The stvpms are distinguished by the waddr field. */ + { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMV }, + { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMD }, + { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMP }, + + { 252, 252, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_ITOF }, + { 252, 252, 1 << 3, ANYMUX, V3D_QPU_A_CLZ }, + { 252, 252, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_UTOF }, +}; + +static const struct opcode_desc mul_ops[] = { + { 1, 1, ANYMUX, ANYMUX, V3D_QPU_M_ADD }, + { 2, 2, ANYMUX, ANYMUX, V3D_QPU_M_SUB }, + { 3, 3, ANYMUX, ANYMUX, V3D_QPU_M_UMUL24 }, + { 4, 8, ANYMUX, ANYMUX, V3D_QPU_M_VFMUL }, + { 9, 9, ANYMUX, ANYMUX, V3D_QPU_M_SMUL24 }, + { 10, 10, ANYMUX, ANYMUX, V3D_QPU_M_MULTOP }, + { 14, 14, ANYMUX, ANYMUX, V3D_QPU_M_FMOV }, + { 15, 15, MUX_MASK(0, 3), ANYMUX, V3D_QPU_M_FMOV }, + { 15, 15, 1 << 4, 1 << 0, V3D_QPU_M_NOP, 0 }, + { 15, 15, 1 << 7, ANYMUX, V3D_QPU_M_MOV }, + { 16, 63, ANYMUX, ANYMUX, V3D_QPU_M_FMUL }, +}; + +static const struct opcode_desc * +lookup_opcode(const struct opcode_desc *opcodes, size_t num_opcodes, + uint32_t opcode, uint32_t mux_a, uint32_t mux_b) +{ + for (int i = 0; i < num_opcodes; i++) { + const struct opcode_desc *op_desc = &opcodes[i]; + + if (opcode < op_desc->opcode_first || + opcode > op_desc->opcode_last) + continue; + + if (!(op_desc->mux_b_mask & (1 << mux_b))) + continue; + + if (!(op_desc->mux_a_mask & (1 << mux_a))) + continue; + + return op_desc; + } + + return NULL; +} + +static bool +v3d_qpu_float32_unpack_unpack(uint32_t packed, + enum v3d_qpu_input_unpack *unpacked) +{ + switch (packed) { + case 0: + *unpacked = V3D_QPU_UNPACK_ABS; + return true; + case 1: + *unpacked = V3D_QPU_UNPACK_NONE; + return true; + case 2: + *unpacked = V3D_QPU_UNPACK_L; + return true; + case 3: + *unpacked = V3D_QPU_UNPACK_H; + return true; + default: + return false; + } +} + +static bool +v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked, + uint32_t *packed) +{ + switch (unpacked) { + case V3D_QPU_UNPACK_ABS: + *packed = 0; + return true; + case V3D_QPU_UNPACK_NONE: + *packed = 1; + return true; + case V3D_QPU_UNPACK_L: + *packed = 2; + return true; + case V3D_QPU_UNPACK_H: + *packed = 3; + return true; + default: + return false; + } +} + +static bool +v3d_qpu_float16_unpack_unpack(uint32_t packed, + enum v3d_qpu_input_unpack *unpacked) +{ + switch (packed) { + case 0: + *unpacked = V3D_QPU_UNPACK_NONE; + return true; + case 1: + *unpacked = V3D_QPU_UNPACK_REPLICATE_32F_16; + return true; + case 2: + *unpacked = V3D_QPU_UNPACK_REPLICATE_L_16; + return true; + case 3: + *unpacked = V3D_QPU_UNPACK_REPLICATE_H_16; + return true; + case 4: + *unpacked = V3D_QPU_UNPACK_SWAP_16; + return true; + default: + return false; + } +} + +static bool +v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked, + uint32_t *packed) +{ + switch (unpacked) { + case V3D_QPU_UNPACK_NONE: + *packed = 0; + return true; + case V3D_QPU_UNPACK_REPLICATE_32F_16: + *packed = 1; + return true; + case V3D_QPU_UNPACK_REPLICATE_L_16: + *packed = 2; + return true; + case V3D_QPU_UNPACK_REPLICATE_H_16: + *packed = 3; + return true; + case V3D_QPU_UNPACK_SWAP_16: + *packed = 4; + return true; + default: + return false; + } +} + +static bool +v3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked, + uint32_t *packed) +{ + switch (unpacked) { + case V3D_QPU_PACK_NONE: + *packed = 0; + return true; + case V3D_QPU_PACK_L: + *packed = 1; + return true; + case V3D_QPU_PACK_H: + *packed = 2; + return true; + default: + return false; + } +} + +static bool +v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, + struct v3d_qpu_instr *instr) +{ + uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_ADD); + uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_A); + uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_B); + uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A); + + uint32_t map_op = op; + /* Some big clusters of opcodes are replicated with unpack + * flags + */ + if (map_op >= 249 && map_op <= 251) + map_op = (map_op - 249 + 245); + if (map_op >= 253 && map_op <= 255) + map_op = (map_op - 253 + 245); + + const struct opcode_desc *desc = + lookup_opcode(add_ops, ARRAY_SIZE(add_ops), + map_op, mux_a, mux_b); + if (!desc) + return false; + + instr->alu.add.op = desc->op; + + /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the + * operands. + */ + if (((op >> 2) & 3) * 8 + mux_a > (op & 3) * 8 + mux_b) { + if (instr->alu.add.op == V3D_QPU_A_FMIN) + instr->alu.add.op = V3D_QPU_A_FMAX; + if (instr->alu.add.op == V3D_QPU_A_FADD) + instr->alu.add.op = V3D_QPU_A_FADDNF; + } + + /* Some QPU ops require a bit more than just basic opcode and mux a/b + * comparisons to distinguish them. + */ + switch (instr->alu.add.op) { + case V3D_QPU_A_STVPMV: + case V3D_QPU_A_STVPMD: + case V3D_QPU_A_STVPMP: + switch (waddr) { + case 0: + instr->alu.add.op = V3D_QPU_A_STVPMV; + break; + case 1: + instr->alu.add.op = V3D_QPU_A_STVPMD; + break; + case 2: + instr->alu.add.op = V3D_QPU_A_STVPMP; + break; + default: + return false; + } + break; + default: + break; + } + + switch (instr->alu.add.op) { + case V3D_QPU_A_FADD: + case V3D_QPU_A_FADDNF: + case V3D_QPU_A_FSUB: + case V3D_QPU_A_FMIN: + case V3D_QPU_A_FMAX: + case V3D_QPU_A_FCMP: + instr->alu.add.output_pack = (op >> 4) & 0x3; + + if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, + &instr->alu.add.a_unpack)) { + return false; + } + + if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3, + &instr->alu.add.b_unpack)) { + return false; + } + break; + + case V3D_QPU_A_FFLOOR: + case V3D_QPU_A_FROUND: + case V3D_QPU_A_FTRUNC: + case V3D_QPU_A_FCEIL: + case V3D_QPU_A_FDX: + case V3D_QPU_A_FDY: + instr->alu.add.output_pack = mux_b & 0x3; + + if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, + &instr->alu.add.a_unpack)) { + return false; + } + break; + + case V3D_QPU_A_FTOIN: + case V3D_QPU_A_FTOIZ: + case V3D_QPU_A_FTOUZ: + case V3D_QPU_A_FTOC: + instr->alu.add.output_pack = V3D_QPU_PACK_NONE; + + if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, + &instr->alu.add.a_unpack)) { + return false; + } + break; + + case V3D_QPU_A_VFMIN: + case V3D_QPU_A_VFMAX: + if (!v3d_qpu_float16_unpack_unpack(op & 0x7, + &instr->alu.add.a_unpack)) { + return false; + } + + instr->alu.add.output_pack = V3D_QPU_PACK_NONE; + instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE; + break; + + default: + instr->alu.add.output_pack = V3D_QPU_PACK_NONE; + instr->alu.add.a_unpack = V3D_QPU_UNPACK_NONE; + instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE; + break; + } + + instr->alu.add.a = mux_a; + instr->alu.add.b = mux_b; + instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A); + instr->alu.add.magic_write = packed_inst & VC5_QPU_MA; + + return true; +} + +static bool +v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, + struct v3d_qpu_instr *instr) +{ + uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_MUL); + uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_A); + uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_B); + + { + const struct opcode_desc *desc = + lookup_opcode(mul_ops, ARRAY_SIZE(mul_ops), + op, mux_a, mux_b); + if (!desc) + return false; + + instr->alu.mul.op = desc->op; + } + + switch (instr->alu.mul.op) { + case V3D_QPU_M_FMUL: + instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1; + + if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, + &instr->alu.mul.a_unpack)) { + return false; + } + + if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3, + &instr->alu.mul.b_unpack)) { + return false; + } + + break; + + case V3D_QPU_M_FMOV: + instr->alu.mul.output_pack = (((op & 1) << 1) + + ((mux_b >> 2) & 1)); + + if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3, + &instr->alu.mul.a_unpack)) { + return false; + } + + break; + default: + instr->alu.mul.output_pack = V3D_QPU_PACK_NONE; + instr->alu.mul.a_unpack = V3D_QPU_UNPACK_NONE; + instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE; + break; + } + + instr->alu.mul.a = mux_a; + instr->alu.mul.b = mux_b; + instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M); + instr->alu.mul.magic_write = packed_inst & VC5_QPU_MM; + + return true; +} + +static bool +v3d_qpu_add_pack(const struct v3d_device_info *devinfo, + const struct v3d_qpu_instr *instr, uint64_t *packed_instr) +{ + uint32_t waddr = instr->alu.add.waddr; + uint32_t mux_a = instr->alu.add.a; + uint32_t mux_b = instr->alu.add.b; + int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op); + const struct opcode_desc *desc; + + int opcode; + for (desc = add_ops; desc != &add_ops[ARRAY_SIZE(add_ops)]; + desc++) { + if (desc->op == instr->alu.add.op) + break; + } + if (desc == &add_ops[ARRAY_SIZE(add_ops)]) + return false; + + opcode = desc->opcode_first; + + /* If an operation doesn't use an arg, its mux values may be used to + * identify the operation type. + */ + if (nsrc < 2) + mux_b = ffs(desc->mux_b_mask) - 1; + + if (nsrc < 1) + mux_a = ffs(desc->mux_a_mask) - 1; + + switch (instr->alu.add.op) { + case V3D_QPU_A_STVPMV: + waddr = 0; + break; + case V3D_QPU_A_STVPMD: + waddr = 1; + break; + case V3D_QPU_A_STVPMP: + waddr = 2; + break; + default: + break; + } + + switch (instr->alu.add.op) { + case V3D_QPU_A_FADD: + case V3D_QPU_A_FADDNF: + case V3D_QPU_A_FSUB: + case V3D_QPU_A_FMIN: + case V3D_QPU_A_FMAX: + case V3D_QPU_A_FCMP: { + uint32_t output_pack; + uint32_t a_unpack; + uint32_t b_unpack; + + if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack, + &output_pack)) { + return false; + } + opcode |= output_pack << 4; + + if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, + &a_unpack)) { + return false; + } + + if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack, + &b_unpack)) { + return false; + } + + /* These operations with commutative operands are + * distinguished by which order their operands come in. + */ + bool ordering = a_unpack * 8 + mux_a > b_unpack * 8 + mux_b; + if (((instr->alu.add.op == V3D_QPU_A_FMIN || + instr->alu.add.op == V3D_QPU_A_FADD) && ordering) || + ((instr->alu.add.op == V3D_QPU_A_FMAX || + instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) { + uint32_t temp; + + temp = a_unpack; + a_unpack = b_unpack; + b_unpack = temp; + + temp = mux_a; + mux_a = mux_b; + mux_b = temp; + } + + opcode |= a_unpack << 2; + opcode |= b_unpack << 0; + break; + } + + case V3D_QPU_A_FFLOOR: + case V3D_QPU_A_FROUND: + case V3D_QPU_A_FTRUNC: + case V3D_QPU_A_FCEIL: + case V3D_QPU_A_FDX: + case V3D_QPU_A_FDY: { + uint32_t packed; + + if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack, + &packed)) { + return false; + } + mux_b |= packed; + + if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, + &packed)) { + return false; + } + if (packed == 0) + return false; + opcode |= packed << 2; + break; + } + + case V3D_QPU_A_FTOIN: + case V3D_QPU_A_FTOIZ: + case V3D_QPU_A_FTOUZ: + case V3D_QPU_A_FTOC: + if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE) + return false; + + uint32_t packed; + if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, + &packed)) { + return false; + } + if (packed == 0) + return false; + opcode |= packed << 2; + + break; + + case V3D_QPU_A_VFMIN: + case V3D_QPU_A_VFMAX: + if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE || + instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE) { + return false; + } + + if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a_unpack, + &packed)) { + return false; + } + opcode |= packed; + break; + + default: + if (instr->alu.add.op != V3D_QPU_A_NOP && + (instr->alu.add.output_pack != V3D_QPU_PACK_NONE || + instr->alu.add.a_unpack != V3D_QPU_UNPACK_NONE || + instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE)) { + return false; + } + break; + } + + *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_ADD_A); + *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_ADD_B); + *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_ADD); + *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A); + if (instr->alu.add.magic_write) + *packed_instr |= VC5_QPU_MA; + + return true; +} + +static bool +v3d_qpu_mul_pack(const struct v3d_device_info *devinfo, + const struct v3d_qpu_instr *instr, uint64_t *packed_instr) +{ + uint32_t mux_a = instr->alu.mul.a; + uint32_t mux_b = instr->alu.mul.b; + int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op); + const struct opcode_desc *desc; + + for (desc = mul_ops; desc != &mul_ops[ARRAY_SIZE(mul_ops)]; + desc++) { + if (desc->op == instr->alu.mul.op) + break; + } + if (desc == &mul_ops[ARRAY_SIZE(mul_ops)]) + return false; + + uint32_t opcode = desc->opcode_first; + + /* Some opcodes have a single valid value for their mux a/b, so set + * that here. If mux a/b determine packing, it will be set below. + */ + if (nsrc < 2) + mux_b = ffs(desc->mux_b_mask) - 1; + + if (nsrc < 1) + mux_a = ffs(desc->mux_a_mask) - 1; + + switch (instr->alu.mul.op) { + case V3D_QPU_M_FMUL: { + uint32_t packed; + + if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack, + &packed)) { + return false; + } + /* No need for a +1 because desc->opcode_first has a 1 in this + * field. + */ + opcode += packed << 4; + + if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack, + &packed)) { + return false; + } + opcode |= packed << 2; + + if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b_unpack, + &packed)) { + return false; + } + opcode |= packed << 0; + break; + } + + case V3D_QPU_M_FMOV: { + uint32_t packed; + + if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack, + &packed)) { + return false; + } + opcode |= (packed >> 1) & 1; + mux_b = (packed & 1) << 2; + + if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack, + &packed)) { + return false; + } + mux_b |= packed; + break; + } + + default: + break; + } + + *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_MUL_A); + *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_MUL_B); + + *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_MUL); + *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M); + if (instr->alu.mul.magic_write) + *packed_instr |= VC5_QPU_MM; + + return true; +} + +static bool +v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo, + uint64_t packed_instr, + struct v3d_qpu_instr *instr) +{ + instr->type = V3D_QPU_INSTR_TYPE_ALU; + + if (!v3d_qpu_sig_unpack(devinfo, + QPU_GET_FIELD(packed_instr, VC5_QPU_SIG), + &instr->sig)) + return false; + + if (!v3d_qpu_flags_unpack(devinfo, + QPU_GET_FIELD(packed_instr, VC5_QPU_COND), + &instr->flags)) + return false; + + instr->raddr_a = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_A); + instr->raddr_b = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_B); + + if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr)) + return false; + + if (!v3d_qpu_mul_unpack(devinfo, packed_instr, instr)) + return false; + + return true; +} + +static bool +v3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo, + uint64_t packed_instr, + struct v3d_qpu_instr *instr) +{ + instr->type = V3D_QPU_INSTR_TYPE_BRANCH; + + uint32_t cond = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_COND); + if (cond == 0) + instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS; + else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <= + V3D_QPU_BRANCH_COND_ALLNA) + instr->branch.cond = V3D_QPU_BRANCH_COND_A0 + (cond - 2); + else + return false; + + uint32_t msfign = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_MSFIGN); + if (msfign == 3) + return false; + instr->branch.msfign = msfign; + + instr->branch.bdi = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_BDI); + + instr->branch.ub = packed_instr & VC5_QPU_BRANCH_UB; + if (instr->branch.ub) { + instr->branch.bdu = QPU_GET_FIELD(packed_instr, + VC5_QPU_BRANCH_BDU); + } + + instr->branch.raddr_a = QPU_GET_FIELD(packed_instr, + VC5_QPU_RADDR_A); + + instr->branch.offset = 0; + + instr->branch.offset += + QPU_GET_FIELD(packed_instr, + VC5_QPU_BRANCH_ADDR_LOW) << 3; + + instr->branch.offset += + QPU_GET_FIELD(packed_instr, + VC5_QPU_BRANCH_ADDR_HIGH) << 24; + + return true; +} + +bool +v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo, + uint64_t packed_instr, + struct v3d_qpu_instr *instr) +{ + if (QPU_GET_FIELD(packed_instr, VC5_QPU_OP_MUL) != 0) { + return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr); + } else { + uint32_t sig = QPU_GET_FIELD(packed_instr, VC5_QPU_SIG); + + if ((sig & 24) == 16) { + return v3d_qpu_instr_unpack_branch(devinfo, packed_instr, + instr); + } else { + return false; + } + } +} + +static bool +v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo, + const struct v3d_qpu_instr *instr, + uint64_t *packed_instr) +{ + uint32_t sig; + if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig)) + return false; + *packed_instr |= QPU_SET_FIELD(sig, VC5_QPU_SIG); + + if (instr->type == V3D_QPU_INSTR_TYPE_ALU) { + *packed_instr |= QPU_SET_FIELD(instr->raddr_a, VC5_QPU_RADDR_A); + *packed_instr |= QPU_SET_FIELD(instr->raddr_b, VC5_QPU_RADDR_B); + + if (!v3d_qpu_add_pack(devinfo, instr, packed_instr)) + return false; + if (!v3d_qpu_mul_pack(devinfo, instr, packed_instr)) + return false; + + uint32_t flags; + if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags)) + return false; + *packed_instr |= QPU_SET_FIELD(flags, VC5_QPU_COND); + } + + return true; +} + +static bool +v3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo, + const struct v3d_qpu_instr *instr, + uint64_t *packed_instr) +{ + *packed_instr |= QPU_SET_FIELD(16, VC5_QPU_SIG); + + if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) { + *packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond - + V3D_QPU_BRANCH_COND_A0), + VC5_QPU_BRANCH_COND); + } + + *packed_instr |= QPU_SET_FIELD(instr->branch.msfign, + VC5_QPU_BRANCH_MSFIGN); + + *packed_instr |= QPU_SET_FIELD(instr->branch.bdi, + VC5_QPU_BRANCH_BDI); + + if (instr->branch.ub) { + *packed_instr |= VC5_QPU_BRANCH_UB; + *packed_instr |= QPU_SET_FIELD(instr->branch.bdu, + VC5_QPU_BRANCH_BDU); + } + + switch (instr->branch.bdi) { + case V3D_QPU_BRANCH_DEST_ABS: + case V3D_QPU_BRANCH_DEST_REL: + *packed_instr |= QPU_SET_FIELD(instr->branch.msfign, + VC5_QPU_BRANCH_MSFIGN); + + *packed_instr |= QPU_SET_FIELD((instr->branch.offset & + ~0xff000000) >> 3, + VC5_QPU_BRANCH_ADDR_LOW); + + *packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24, + VC5_QPU_BRANCH_ADDR_HIGH); + + case V3D_QPU_BRANCH_DEST_REGFILE: + *packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a, + VC5_QPU_RADDR_A); + break; + + default: + break; + } + + return true; +} + +bool +v3d_qpu_instr_pack(const struct v3d_device_info *devinfo, + const struct v3d_qpu_instr *instr, + uint64_t *packed_instr) +{ + *packed_instr = 0; + + switch (instr->type) { + case V3D_QPU_INSTR_TYPE_ALU: + return v3d_qpu_instr_pack_alu(devinfo, instr, packed_instr); + case V3D_QPU_INSTR_TYPE_BRANCH: + return v3d_qpu_instr_pack_branch(devinfo, instr, packed_instr); + default: + return false; + } +} diff --git a/src/broadcom/qpu/qpu_validate.c b/src/broadcom/qpu/qpu_validate.c new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/broadcom/qpu/tests/.gitignore b/src/broadcom/qpu/tests/.gitignore new file mode 100644 index 00000000000..d2cf70a7cab --- /dev/null +++ b/src/broadcom/qpu/tests/.gitignore @@ -0,0 +1 @@ +v3d_qpu_disasm diff --git a/src/broadcom/qpu/tests/qpu_disasm.c b/src/broadcom/qpu/tests/qpu_disasm.c new file mode 100644 index 00000000000..c7f6476def5 --- /dev/null +++ b/src/broadcom/qpu/tests/qpu_disasm.c @@ -0,0 +1,146 @@ +/* + * Copyright © 2016 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include "util/macros.h" +#include "broadcom/common/v3d_device_info.h" +#include "broadcom/qpu/qpu_disasm.h" +#include "broadcom/qpu/qpu_instr.h" + +static const struct { + int ver; + uint64_t inst; + const char *expected; +} tests[] = { + { 33, 0x3d003186bb800000ull, "nop ; nop ; ldvary" }, + { 33, 0x3c20318105829000ull, "fadd r1, r1, r5 ; nop ; thrsw" }, + { 33, 0x3c403186bb81d000ull, "vpmsetup -, r5 ; nop ; ldunif" }, + { 33, 0x3f003186bb800000ull, "nop ; nop ; ldvpm" }, + { 33, 0x3c002380b6edb000ull, "or rf0, r3, r3 ; mov vpm, r3" }, + { 33, 0x57403006bbb80000ull, "nop ; fmul r0, rf0, r5 ; ldvpm; ldunif" }, + + /* branch conditions */ + { 33, 0x02000006002034c0ull, "b.anyap rf19" }, + { 33, 0x02679356b4201000ull, "b.anyap -1268280496" }, + { 33, 0x02b76a2dd0400000ull, "b.anynaq zero_addr+0xd0b76a28" }, + { 33, 0x0200000500402000ull, "b.anynaq lri" }, + { 33, 0x0216fe167301c8c0ull, "bu.anya zero_addr+0x7316fe10, rf35" }, + { 33, 0x020000050040e000ull, "bu.anynaq lri, r:unif" }, + { 33, 0x0200000300006000ull, "bu.na0 lri, a:unif" }, + + /* Special waddr names */ + { 33, 0x3c00318735808000ull, "vfpack tlb, r0, r1 ; nop" }, + { 33, 0xe0571c938e8d5000ull, "fmax.andc recip, r5.h, r2.l; fmul.ifb rf50.h, r3.l, r4.abs; ldunif" }, + { 33, 0xc04098d4382c9000ull, "add.pushn rsqrt, r1, r1; fmul rf35.h, r3.abs, r1.abs; ldunif" }, + { 33, 0x481edcd6b3184500ull, "vfmin.norn log, r4.hh, r0; fmul.ifnb rf51, rf20.abs, r0.l" }, + { 33, 0x041618d57c453000ull, "shl.andn exp, r3, r2; add.ifb rf35, r1, r2" }, + { 33, 0x7048e5da49272800ull, "fsub.ifa rf26, r2.l, rf32; fmul.pushc sin, r1.h, r1.abs; ldunif" }, + +}; + +static void +swap_mux(enum v3d_qpu_mux *a, enum v3d_qpu_mux *b) +{ + enum v3d_qpu_mux t = *a; + *a = *b; + *b = t; +} + +static void +swap_pack(enum v3d_qpu_input_unpack *a, enum v3d_qpu_input_unpack *b) +{ + enum v3d_qpu_input_unpack t = *a; + *a = *b; + *b = t; +} + +int +main(int argc, char **argv) +{ + struct v3d_device_info devinfo = { }; + int retval = 0; + + for (int i = 0; i < ARRAY_SIZE(tests); i++) { + devinfo.ver = tests[i].ver; + + printf("Testing v%d.%d 0x%016llx... ", + devinfo.ver / 10, devinfo.ver % 10, + (long long)tests[i].inst); + + const char *disasm_output = v3d_qpu_disasm(&devinfo, + tests[i].inst); + + if (strcmp(disasm_output, tests[i].expected) != 0) { + printf("FAIL\n"); + printf(" Expected: \"%s\"\n", tests[i].expected); + printf(" Got: \"%s\"\n", disasm_output); + retval = 1; + continue; + } + + struct v3d_qpu_instr instr; + if (!v3d_qpu_instr_unpack(&devinfo, tests[i].inst, &instr)) { + printf("FAIL (unpack) %s\n", tests[i].expected); + retval = 1; + continue; + } + + if (instr.type == V3D_QPU_INSTR_TYPE_ALU) { + switch (instr.alu.add.op) { + case V3D_QPU_A_FADD: + case V3D_QPU_A_FADDNF: + case V3D_QPU_A_FMIN: + case V3D_QPU_A_FMAX: + /* Swap the operands to be sure that we test + * how the QPUs distinguish between these ops. + */ + swap_mux(&instr.alu.add.a, + &instr.alu.add.b); + swap_pack(&instr.alu.add.a_unpack, + &instr.alu.add.b_unpack); + default: + break; + } + } + + uint64_t repack; + if (!v3d_qpu_instr_pack(&devinfo, &instr, &repack)) { + printf("FAIL (pack) %s\n", tests[i].expected); + retval = 1; + continue; + } + + if (repack != tests[i].inst) { + printf("FAIL (repack) 0x%016llx\n", (long long)repack); + printf(" Expected: \"%s\"\n", tests[i].expected); + const char *redisasm = v3d_qpu_disasm(&devinfo, repack); + printf(" Got: \"%s\"\n", redisasm); + retval = 1; + } + + printf("PASS\n"); + } + + return retval; +} -- 2.30.2