From: Rob Clark Date: Fri, 24 Jul 2020 00:32:36 +0000 (-0700) Subject: freedreno: slurp in decode tools X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=1ea4ef0d3be829e392922f5d26fbc89bf69a8a67;p=mesa.git freedreno: slurp in decode tools cffdump, crashdec, etc At this point there is some duplication with other files in-tree (ie. a2xx and a3xx+ disassembly), which will be cleaned up in a later commit. Signed-off-by: Rob Clark Part-of: --- diff --git a/src/freedreno/decode/buffers.c b/src/freedreno/decode/buffers.c new file mode 100644 index 00000000000..8e696f857df --- /dev/null +++ b/src/freedreno/decode/buffers.c @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2012 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * Helper lib to track gpu buffers contents/address, and map between gpu and + * host address while decoding cmdstream/crashdumps + */ + +#include +#include + +#include "buffers.h" + +struct buffer { + void *hostptr; + unsigned int len; + uint64_t gpuaddr; + + /* for 'once' mode, for buffers containing cmdstream keep track per offset + * into buffer of which modes it has already been dumped; + */ + struct { + unsigned offset; + unsigned dumped_mask; + } offsets[64]; + unsigned noffsets; +}; + +static struct buffer buffers[512]; +static int nbuffers; + +static int +buffer_contains_gpuaddr(struct buffer *buf, uint64_t gpuaddr, uint32_t len) +{ + return (buf->gpuaddr <= gpuaddr) && (gpuaddr < (buf->gpuaddr + buf->len)); +} + +static int +buffer_contains_hostptr(struct buffer *buf, void *hostptr) +{ + return (buf->hostptr <= hostptr) && (hostptr < (buf->hostptr + buf->len)); +} + + +uint64_t +gpuaddr(void *hostptr) +{ + int i; + for (i = 0; i < nbuffers; i++) + if (buffer_contains_hostptr(&buffers[i], hostptr)) + return buffers[i].gpuaddr + (hostptr - buffers[i].hostptr); + return 0; +} + +uint64_t +gpubaseaddr(uint64_t gpuaddr) +{ + int i; + if (!gpuaddr) + return 0; + for (i = 0; i < nbuffers; i++) + if (buffer_contains_gpuaddr(&buffers[i], gpuaddr, 0)) + return buffers[i].gpuaddr; + return 0; +} + +void * +hostptr(uint64_t gpuaddr) +{ + int i; + if (!gpuaddr) + return 0; + for (i = 0; i < nbuffers; i++) + if (buffer_contains_gpuaddr(&buffers[i], gpuaddr, 0)) + return buffers[i].hostptr + (gpuaddr - buffers[i].gpuaddr); + return 0; +} + +unsigned +hostlen(uint64_t gpuaddr) +{ + int i; + if (!gpuaddr) + return 0; + for (i = 0; i < nbuffers; i++) + if (buffer_contains_gpuaddr(&buffers[i], gpuaddr, 0)) + return buffers[i].len + buffers[i].gpuaddr - gpuaddr; + return 0; +} + +bool +has_dumped(uint64_t gpuaddr, unsigned enable_mask) +{ + if (!gpuaddr) + return false; + + for (int i = 0; i < nbuffers; i++) { + if (buffer_contains_gpuaddr(&buffers[i], gpuaddr, 0)) { + struct buffer *b = &buffers[i]; + assert(gpuaddr >= b->gpuaddr); + unsigned offset = gpuaddr - b->gpuaddr; + + unsigned n = 0; + while (n < b->noffsets) { + if (offset == b->offsets[n].offset) + break; + n++; + } + + /* if needed, allocate a new offset entry: */ + if (n == b->noffsets) { + b->noffsets++; + assert(b->noffsets < ARRAY_SIZE(b->offsets)); + b->offsets[n].dumped_mask = 0; + b->offsets[n].offset = offset; + } + + if ((b->offsets[n].dumped_mask & enable_mask) == enable_mask) + return true; + + b->offsets[n].dumped_mask |= enable_mask; + + return false; + } + } + + return false; +} + +void +reset_buffers(void) +{ + for (int i = 0; i < nbuffers; i++) { + free(buffers[i].hostptr); + buffers[i].hostptr = NULL; + buffers[i].len = 0; + buffers[i].noffsets = 0; + } + nbuffers = 0; +} + +/** + * Record buffer contents, takes ownership of hostptr (freed in + * reset_buffers()) + */ +void +add_buffer(uint64_t gpuaddr, unsigned int len, void *hostptr) +{ + int i; + + for (i = 0; i < nbuffers; i++) { + if (buffers[i].gpuaddr == gpuaddr) + break; + } + + if (i == nbuffers) { + /* some traces, like test-perf, with some blob versions, + * seem to generate an unreasonable # of gpu buffers (a + * leak?), so just ignore them. + */ + if (nbuffers >= ARRAY_SIZE(buffers)) { + free(hostptr); + return; + } + nbuffers++; + } + + buffers[i].hostptr = hostptr; + buffers[i].len = len; + buffers[i].gpuaddr = gpuaddr; +} diff --git a/src/freedreno/decode/buffers.h b/src/freedreno/decode/buffers.h new file mode 100644 index 00000000000..f63f3f3ad7d --- /dev/null +++ b/src/freedreno/decode/buffers.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2012 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __BUFFERS_H__ +#define __BUFFERS_H__ + +#include +#include + +uint64_t gpuaddr(void *hostptr); +uint64_t gpubaseaddr(uint64_t gpuaddr); +void * hostptr(uint64_t gpuaddr); +unsigned hostlen(uint64_t gpuaddr); +bool has_dumped(uint64_t gpuaddr, unsigned enable_mask); + +void reset_buffers(void); +void add_buffer(uint64_t gpuaddr, unsigned int len, void *hostptr); + +#ifndef ARRAY_SIZE +# define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +#endif + +#endif /* __BUFFERS_H__ */ diff --git a/src/freedreno/decode/cffdec.c b/src/freedreno/decode/cffdec.c new file mode 100644 index 00000000000..d0b269578ad --- /dev/null +++ b/src/freedreno/decode/cffdec.c @@ -0,0 +1,2717 @@ +/* + * Copyright (c) 2012 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "redump.h" +#include "disasm.h" +#include "script.h" +#include "rnnutil.h" +#include "buffers.h" +#include "cffdec.h" + +/* ************************************************************************* */ +/* originally based on kernel recovery dump code: */ + +static const struct cffdec_options *options; + +static bool needs_wfi = false; +static bool summary = false; +static bool in_summary = false; +static int vertices; + +static inline unsigned regcnt(void) +{ + if (options->gpu_id >= 500) + return 0xffff; + else + return 0x7fff; +} + +static int is_64b(void) +{ + return options->gpu_id >= 500; +} + + +static int draws[3]; +static struct { + uint64_t base; + uint32_t size; /* in dwords */ + /* Generally cmdstream consists of multiple IB calls to different + * buffers, which are themselves often re-used for each tile. The + * triggered flag serves two purposes to help make it more clear + * what part of the cmdstream is before vs after the the GPU hang: + * + * 1) if in IB2 we are passed the point within the IB2 buffer where + * the GPU hung, but IB1 is not passed the point within its + * buffer where the GPU had hung, then we know the GPU hang + * happens on a future use of that IB2 buffer. + * + * 2) if in an IB1 or IB2 buffer that is not the one where the GPU + * hung, but we've already passed the trigger point at the same + * IB level, we know that we are passed the point where the GPU + * had hung. + * + * So this is a one way switch, false->true. And a higher #'d + * IB level isn't considered triggered unless the lower #'d IB + * level is. + */ + bool triggered; +} ibs[4]; +static int ib; + +static int draw_count; +static int current_draw_count; + +/* query mode.. to handle symbolic register name queries, we need to + * defer parsing query string until after gpu_id is know and rnn db + * loaded: + */ +static int *queryvals; + +static bool +quiet(int lvl) +{ + if ((options->draw_filter != -1) && (options->draw_filter != current_draw_count)) + return true; + if ((lvl >= 3) && (summary || options->querystrs || options->script)) + return true; + if ((lvl >= 2) && (options->querystrs || options->script)) + return true; + return false; +} + +void +printl(int lvl, const char *fmt, ...) +{ + va_list args; + if (quiet(lvl)) + return; + va_start(args, fmt); + vprintf(fmt, args); + va_end(args); +} + +static const char *levels[] = { + "\t", + "\t\t", + "\t\t\t", + "\t\t\t\t", + "\t\t\t\t\t", + "\t\t\t\t\t\t", + "\t\t\t\t\t\t\t", + "\t\t\t\t\t\t\t\t", + "\t\t\t\t\t\t\t\t\t", + "x", + "x", + "x", + "x", + "x", + "x", +}; + +enum state_src_t { + STATE_SRC_DIRECT, + STATE_SRC_INDIRECT, + STATE_SRC_BINDLESS, +}; + +/* SDS (CP_SET_DRAW_STATE) helpers: */ +static void load_all_groups(int level); +static void disable_all_groups(void); + +static void dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, int level); +static void dump_tex_const(uint32_t *texsamp, int num_unit, int level); + +static bool +highlight_gpuaddr(uint64_t gpuaddr) +{ + if (!options->color) + return false; + + if (!options->ibs[ib].base) + return false; + + if ((ib > 0) && options->ibs[ib-1].base && !ibs[ib-1].triggered) + return false; + + if (ibs[ib].triggered) + return true; + + if (options->ibs[ib].base != ibs[ib].base) + return false; + + uint64_t start = ibs[ib].base + 4 * (ibs[ib].size - options->ibs[ib].rem); + uint64_t end = ibs[ib].base + 4 * ibs[ib].size; + + bool triggered = (start <= gpuaddr) && (gpuaddr <= end); + + ibs[ib].triggered |= triggered; + + if (triggered) + printf("ESTIMATED CRASH LOCATION!\n"); + + return triggered; +} + +static void +dump_hex(uint32_t *dwords, uint32_t sizedwords, int level) +{ + int i, j; + int lastzero = 1; + + if (quiet(2)) + return; + + for (i = 0; i < sizedwords; i += 8) { + int zero = 1; + + /* always show first row: */ + if (i == 0) + zero = 0; + + for (j = 0; (j < 8) && (i+j < sizedwords) && zero; j++) + if (dwords[i+j]) + zero = 0; + + if (zero && !lastzero) + printf("*\n"); + + lastzero = zero; + + if (zero) + continue; + + uint64_t addr = gpuaddr(&dwords[i]); + bool highlight = highlight_gpuaddr(addr); + + if (highlight) + printf("\x1b[0;1;31m"); + + if (is_64b()) { + printf("%016lx:%s", addr, levels[level]); + } else { + printf("%08x:%s", (uint32_t)addr, levels[level]); + } + + if (highlight) + printf("\x1b[0m"); + + printf("%04x:", i * 4); + + for (j = 0; (j < 8) && (i+j < sizedwords); j++) { + printf(" %08x", dwords[i+j]); + } + + printf("\n"); + } +} + +static void +dump_float(float *dwords, uint32_t sizedwords, int level) +{ + int i; + for (i = 0; i < sizedwords; i++) { + if ((i % 8) == 0) { + if (is_64b()) { + printf("%016lx:%s", gpuaddr(dwords), levels[level]); + } else { + printf("%08x:%s", (uint32_t)gpuaddr(dwords), levels[level]); + } + } else { + printf(" "); + } + printf("%8f", *(dwords++)); + if ((i % 8) == 7) + printf("\n"); + } + if (i % 8) + printf("\n"); +} + +/* I believe the surface format is low bits: +#define RB_COLOR_INFO__COLOR_FORMAT_MASK 0x0000000fL +comments in sys2gmem_tex_const indicate that address is [31:12], but +looks like at least some of the bits above the format have different meaning.. +*/ +static void parse_dword_addr(uint32_t dword, uint32_t *gpuaddr, + uint32_t *flags, uint32_t mask) +{ + assert(!is_64b()); /* this is only used on a2xx */ + *gpuaddr = dword & ~mask; + *flags = dword & mask; +} + +static uint32_t type0_reg_vals[0xffff + 1]; +static uint8_t type0_reg_rewritten[sizeof(type0_reg_vals)/8]; /* written since last draw */ +static uint8_t type0_reg_written[sizeof(type0_reg_vals)/8]; +static uint32_t lastvals[ARRAY_SIZE(type0_reg_vals)]; + +static bool reg_rewritten(uint32_t regbase) +{ + return !!(type0_reg_rewritten[regbase/8] & (1 << (regbase % 8))); +} + +bool reg_written(uint32_t regbase) +{ + return !!(type0_reg_written[regbase/8] & (1 << (regbase % 8))); +} + +static void clear_rewritten(void) +{ + memset(type0_reg_rewritten, 0, sizeof(type0_reg_rewritten)); +} + +static void clear_written(void) +{ + memset(type0_reg_written, 0, sizeof(type0_reg_written)); + clear_rewritten(); +} + +uint32_t reg_lastval(uint32_t regbase) +{ + return lastvals[regbase]; +} + +static void +clear_lastvals(void) +{ + memset(lastvals, 0, sizeof(lastvals)); +} + +uint32_t +reg_val(uint32_t regbase) +{ + return type0_reg_vals[regbase]; +} + +void +reg_set(uint32_t regbase, uint32_t val) +{ + assert(regbase < regcnt()); + type0_reg_vals[regbase] = val; + type0_reg_written[regbase/8] |= (1 << (regbase % 8)); + type0_reg_rewritten[regbase/8] |= (1 << (regbase % 8)); +} + +static void +reg_dump_scratch(const char *name, uint32_t dword, int level) +{ + unsigned r; + + if (quiet(3)) + return; + + r = regbase("CP_SCRATCH[0].REG"); + + // if not, try old a2xx/a3xx version: + if (!r) + r = regbase("CP_SCRATCH_REG0"); + + if (!r) + return; + + printf("%s:%u,%u,%u,%u\n", levels[level], + reg_val(r + 4), reg_val(r + 5), + reg_val(r + 6), reg_val(r + 7)); +} + +static void +dump_gpuaddr_size(uint64_t gpuaddr, int level, int sizedwords, int quietlvl) +{ + void *buf; + + if (quiet(quietlvl)) + return; + + buf = hostptr(gpuaddr); + if (buf) { + dump_hex(buf, sizedwords, level+1); + } +} + +static void +dump_gpuaddr(uint64_t gpuaddr, int level) +{ + dump_gpuaddr_size(gpuaddr, level, 64, 3); +} + +static void +reg_dump_gpuaddr(const char *name, uint32_t dword, int level) +{ + dump_gpuaddr(dword, level); +} + +uint32_t gpuaddr_lo; +static void +reg_gpuaddr_lo(const char *name, uint32_t dword, int level) +{ + gpuaddr_lo = dword; +} + +static void +reg_dump_gpuaddr_hi(const char *name, uint32_t dword, int level) +{ + dump_gpuaddr(gpuaddr_lo | (((uint64_t)dword) << 32), level); +} + + +static void +dump_shader(const char *ext, void *buf, int bufsz) +{ + if (options->dump_shaders) { + static int n = 0; + char filename[8]; + int fd; + sprintf(filename, "%04d.%s", n++, ext); + fd = open(filename, O_WRONLY| O_TRUNC | O_CREAT, 0644); + write(fd, buf, bufsz); + close(fd); + } +} + +static void +disasm_gpuaddr(const char *name, uint64_t gpuaddr, int level) +{ + void *buf; + + gpuaddr &= 0xfffffffffffffff0; + + if (quiet(3)) + return; + + buf = hostptr(gpuaddr); + if (buf) { + uint32_t sizedwords = hostlen(gpuaddr) / 4; + const char *ext; + + dump_hex(buf, min(64, sizedwords), level+1); + disasm_a3xx(buf, sizedwords, level+2, stdout, options->gpu_id); + + /* this is a bit ugly way, but oh well.. */ + if (strstr(name, "SP_VS_OBJ")) { + ext = "vo3"; + } else if (strstr(name, "SP_FS_OBJ")) { + ext = "fo3"; + } else if (strstr(name, "SP_GS_OBJ")) { + ext = "go3"; + } else if (strstr(name, "SP_CS_OBJ")) { + ext = "co3"; + } else { + ext = NULL; + } + + if (ext) + dump_shader(ext, buf, sizedwords * 4); + } +} + +static void +reg_disasm_gpuaddr(const char *name, uint32_t dword, int level) +{ + disasm_gpuaddr(name, dword, level); +} + +static void +reg_disasm_gpuaddr_hi(const char *name, uint32_t dword, int level) +{ + disasm_gpuaddr(name, gpuaddr_lo | (((uint64_t)dword) << 32), level); +} + +/* Find the value of the TEX_COUNT register that corresponds to the named + * TEX_SAMP/TEX_CONST reg. + * + * Note, this kinda assumes an equal # of samplers and textures, but not + * really sure if there is a much better option. I suppose on a6xx we + * could instead decode the bitfields in SP_xS_CONFIG + */ +static int +get_tex_count(const char *name) +{ + char count_reg[strlen(name) + 5]; + char *p; + + p = strstr(name, "CONST"); + if (!p) + p = strstr(name, "SAMP"); + if (!p) + return 0; + + int n = p - name; + strncpy(count_reg, name, n); + strcpy(count_reg + n, "COUNT"); + + return reg_val(regbase(count_reg)); +} + +static void +reg_dump_tex_samp_hi(const char *name, uint32_t dword, int level) +{ + if (!in_summary) + return; + + int num_unit = get_tex_count(name); + uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32); + void *buf = hostptr(gpuaddr); + + if (!buf) + return; + + dump_tex_samp(buf, STATE_SRC_DIRECT, num_unit, level+1); +} + +static void +reg_dump_tex_const_hi(const char *name, uint32_t dword, int level) +{ + if (!in_summary) + return; + + int num_unit = get_tex_count(name); + uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32); + void *buf = hostptr(gpuaddr); + + if (!buf) + return; + + dump_tex_const(buf, num_unit, level+1); +} + +/* + * Registers with special handling (rnndec_decode() handles rest): + */ +#define REG(x, fxn) { #x, fxn } +static struct { + const char *regname; + void (*fxn)(const char *name, uint32_t dword, int level); + uint32_t regbase; +} reg_a2xx[] = { + REG(CP_SCRATCH_REG0, reg_dump_scratch), + REG(CP_SCRATCH_REG1, reg_dump_scratch), + REG(CP_SCRATCH_REG2, reg_dump_scratch), + REG(CP_SCRATCH_REG3, reg_dump_scratch), + REG(CP_SCRATCH_REG4, reg_dump_scratch), + REG(CP_SCRATCH_REG5, reg_dump_scratch), + REG(CP_SCRATCH_REG6, reg_dump_scratch), + REG(CP_SCRATCH_REG7, reg_dump_scratch), + {NULL}, +}, reg_a3xx[] = { + REG(CP_SCRATCH_REG0, reg_dump_scratch), + REG(CP_SCRATCH_REG1, reg_dump_scratch), + REG(CP_SCRATCH_REG2, reg_dump_scratch), + REG(CP_SCRATCH_REG3, reg_dump_scratch), + REG(CP_SCRATCH_REG4, reg_dump_scratch), + REG(CP_SCRATCH_REG5, reg_dump_scratch), + REG(CP_SCRATCH_REG6, reg_dump_scratch), + REG(CP_SCRATCH_REG7, reg_dump_scratch), + REG(VSC_SIZE_ADDRESS, reg_dump_gpuaddr), + REG(SP_VS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr), + REG(SP_FS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr), + REG(SP_VS_OBJ_START_REG, reg_disasm_gpuaddr), + REG(SP_FS_OBJ_START_REG, reg_disasm_gpuaddr), + REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr), + {NULL}, +}, reg_a4xx[] = { + REG(CP_SCRATCH[0].REG, reg_dump_scratch), + REG(CP_SCRATCH[0x1].REG, reg_dump_scratch), + REG(CP_SCRATCH[0x2].REG, reg_dump_scratch), + REG(CP_SCRATCH[0x3].REG, reg_dump_scratch), + REG(CP_SCRATCH[0x4].REG, reg_dump_scratch), + REG(CP_SCRATCH[0x5].REG, reg_dump_scratch), + REG(CP_SCRATCH[0x6].REG, reg_dump_scratch), + REG(CP_SCRATCH[0x7].REG, reg_dump_scratch), + REG(SP_VS_PVT_MEM_ADDR, reg_dump_gpuaddr), + REG(SP_FS_PVT_MEM_ADDR, reg_dump_gpuaddr), + REG(SP_GS_PVT_MEM_ADDR, reg_dump_gpuaddr), + REG(SP_HS_PVT_MEM_ADDR, reg_dump_gpuaddr), + REG(SP_DS_PVT_MEM_ADDR, reg_dump_gpuaddr), + REG(SP_CS_PVT_MEM_ADDR, reg_dump_gpuaddr), + REG(SP_VS_OBJ_START, reg_disasm_gpuaddr), + REG(SP_FS_OBJ_START, reg_disasm_gpuaddr), + REG(SP_GS_OBJ_START, reg_disasm_gpuaddr), + REG(SP_HS_OBJ_START, reg_disasm_gpuaddr), + REG(SP_DS_OBJ_START, reg_disasm_gpuaddr), + REG(SP_CS_OBJ_START, reg_disasm_gpuaddr), + REG(TPL1_TP_VS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr), + REG(TPL1_TP_HS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr), + REG(TPL1_TP_DS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr), + REG(TPL1_TP_GS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr), + REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr), + {NULL}, +}, reg_a5xx[] = { + REG(CP_SCRATCH[0x4].REG, reg_dump_scratch), + REG(CP_SCRATCH[0x5].REG, reg_dump_scratch), + REG(CP_SCRATCH[0x6].REG, reg_dump_scratch), + REG(CP_SCRATCH[0x7].REG, reg_dump_scratch), + REG(SP_VS_OBJ_START_LO, reg_gpuaddr_lo), + REG(SP_VS_OBJ_START_HI, reg_disasm_gpuaddr_hi), + REG(SP_HS_OBJ_START_LO, reg_gpuaddr_lo), + REG(SP_HS_OBJ_START_HI, reg_disasm_gpuaddr_hi), + REG(SP_DS_OBJ_START_LO, reg_gpuaddr_lo), + REG(SP_DS_OBJ_START_HI, reg_disasm_gpuaddr_hi), + REG(SP_GS_OBJ_START_LO, reg_gpuaddr_lo), + REG(SP_GS_OBJ_START_HI, reg_disasm_gpuaddr_hi), + REG(SP_FS_OBJ_START_LO, reg_gpuaddr_lo), + REG(SP_FS_OBJ_START_HI, reg_disasm_gpuaddr_hi), + REG(SP_CS_OBJ_START_LO, reg_gpuaddr_lo), + REG(SP_CS_OBJ_START_HI, reg_disasm_gpuaddr_hi), + REG(TPL1_VS_TEX_CONST_LO, reg_gpuaddr_lo), + REG(TPL1_VS_TEX_CONST_HI, reg_dump_tex_const_hi), + REG(TPL1_VS_TEX_SAMP_LO, reg_gpuaddr_lo), + REG(TPL1_VS_TEX_SAMP_HI, reg_dump_tex_samp_hi), + REG(TPL1_HS_TEX_CONST_LO, reg_gpuaddr_lo), + REG(TPL1_HS_TEX_CONST_HI, reg_dump_tex_const_hi), + REG(TPL1_HS_TEX_SAMP_LO, reg_gpuaddr_lo), + REG(TPL1_HS_TEX_SAMP_HI, reg_dump_tex_samp_hi), + REG(TPL1_DS_TEX_CONST_LO, reg_gpuaddr_lo), + REG(TPL1_DS_TEX_CONST_HI, reg_dump_tex_const_hi), + REG(TPL1_DS_TEX_SAMP_LO, reg_gpuaddr_lo), + REG(TPL1_DS_TEX_SAMP_HI, reg_dump_tex_samp_hi), + REG(TPL1_GS_TEX_CONST_LO, reg_gpuaddr_lo), + REG(TPL1_GS_TEX_CONST_HI, reg_dump_tex_const_hi), + REG(TPL1_GS_TEX_SAMP_LO, reg_gpuaddr_lo), + REG(TPL1_GS_TEX_SAMP_HI, reg_dump_tex_samp_hi), + REG(TPL1_FS_TEX_CONST_LO, reg_gpuaddr_lo), + REG(TPL1_FS_TEX_CONST_HI, reg_dump_tex_const_hi), + REG(TPL1_FS_TEX_SAMP_LO, reg_gpuaddr_lo), + REG(TPL1_FS_TEX_SAMP_HI, reg_dump_tex_samp_hi), + REG(TPL1_CS_TEX_CONST_LO, reg_gpuaddr_lo), + REG(TPL1_CS_TEX_CONST_HI, reg_dump_tex_const_hi), + REG(TPL1_CS_TEX_SAMP_LO, reg_gpuaddr_lo), + REG(TPL1_CS_TEX_SAMP_HI, reg_dump_tex_samp_hi), + REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_LO, reg_gpuaddr_lo), + REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_HI, reg_dump_gpuaddr_hi), +// REG(RB_MRT_FLAG_BUFFER[0].ADDR_LO, reg_gpuaddr_lo), +// REG(RB_MRT_FLAG_BUFFER[0].ADDR_HI, reg_dump_gpuaddr_hi), +// REG(RB_MRT_FLAG_BUFFER[1].ADDR_LO, reg_gpuaddr_lo), +// REG(RB_MRT_FLAG_BUFFER[1].ADDR_HI, reg_dump_gpuaddr_hi), +// REG(RB_MRT_FLAG_BUFFER[2].ADDR_LO, reg_gpuaddr_lo), +// REG(RB_MRT_FLAG_BUFFER[2].ADDR_HI, reg_dump_gpuaddr_hi), +// REG(RB_MRT_FLAG_BUFFER[3].ADDR_LO, reg_gpuaddr_lo), +// REG(RB_MRT_FLAG_BUFFER[3].ADDR_HI, reg_dump_gpuaddr_hi), +// REG(RB_MRT_FLAG_BUFFER[4].ADDR_LO, reg_gpuaddr_lo), +// REG(RB_MRT_FLAG_BUFFER[4].ADDR_HI, reg_dump_gpuaddr_hi), +// REG(RB_MRT_FLAG_BUFFER[5].ADDR_LO, reg_gpuaddr_lo), +// REG(RB_MRT_FLAG_BUFFER[5].ADDR_HI, reg_dump_gpuaddr_hi), +// REG(RB_MRT_FLAG_BUFFER[6].ADDR_LO, reg_gpuaddr_lo), +// REG(RB_MRT_FLAG_BUFFER[6].ADDR_HI, reg_dump_gpuaddr_hi), +// REG(RB_MRT_FLAG_BUFFER[7].ADDR_LO, reg_gpuaddr_lo), +// REG(RB_MRT_FLAG_BUFFER[7].ADDR_HI, reg_dump_gpuaddr_hi), +// REG(RB_BLIT_FLAG_DST_LO, reg_gpuaddr_lo), +// REG(RB_BLIT_FLAG_DST_HI, reg_dump_gpuaddr_hi), +// REG(RB_MRT[0].BASE_LO, reg_gpuaddr_lo), +// REG(RB_MRT[0].BASE_HI, reg_dump_gpuaddr_hi), +// REG(RB_DEPTH_BUFFER_BASE_LO, reg_gpuaddr_lo), +// REG(RB_DEPTH_BUFFER_BASE_HI, reg_dump_gpuaddr_hi), +// REG(RB_DEPTH_FLAG_BUFFER_BASE_LO, reg_gpuaddr_lo), +// REG(RB_DEPTH_FLAG_BUFFER_BASE_HI, reg_dump_gpuaddr_hi), +// REG(RB_BLIT_DST_LO, reg_gpuaddr_lo), +// REG(RB_BLIT_DST_HI, reg_dump_gpuaddr_hi), + +// REG(RB_2D_SRC_LO, reg_gpuaddr_lo), +// REG(RB_2D_SRC_HI, reg_dump_gpuaddr_hi), +// REG(RB_2D_SRC_FLAGS_LO, reg_gpuaddr_lo), +// REG(RB_2D_SRC_FLAGS_HI, reg_dump_gpuaddr_hi), +// REG(RB_2D_DST_LO, reg_gpuaddr_lo), +// REG(RB_2D_DST_HI, reg_dump_gpuaddr_hi), +// REG(RB_2D_DST_FLAGS_LO, reg_gpuaddr_lo), +// REG(RB_2D_DST_FLAGS_HI, reg_dump_gpuaddr_hi), + + {NULL}, +}, reg_a6xx[] = { + REG(CP_SCRATCH[0x4].REG, reg_dump_scratch), + REG(CP_SCRATCH[0x5].REG, reg_dump_scratch), + REG(CP_SCRATCH[0x6].REG, reg_dump_scratch), + REG(CP_SCRATCH[0x7].REG, reg_dump_scratch), + + REG(SP_VS_OBJ_START_LO, reg_gpuaddr_lo), + REG(SP_VS_OBJ_START_HI, reg_disasm_gpuaddr_hi), + REG(SP_HS_OBJ_START_LO, reg_gpuaddr_lo), + REG(SP_HS_OBJ_START_HI, reg_disasm_gpuaddr_hi), + REG(SP_DS_OBJ_START_LO, reg_gpuaddr_lo), + REG(SP_DS_OBJ_START_HI, reg_disasm_gpuaddr_hi), + REG(SP_GS_OBJ_START_LO, reg_gpuaddr_lo), + REG(SP_GS_OBJ_START_HI, reg_disasm_gpuaddr_hi), + REG(SP_FS_OBJ_START_LO, reg_gpuaddr_lo), + REG(SP_FS_OBJ_START_HI, reg_disasm_gpuaddr_hi), + REG(SP_CS_OBJ_START_LO, reg_gpuaddr_lo), + REG(SP_CS_OBJ_START_HI, reg_disasm_gpuaddr_hi), + + REG(SP_VS_TEX_CONST_LO, reg_gpuaddr_lo), + REG(SP_VS_TEX_CONST_HI, reg_dump_tex_const_hi), + REG(SP_VS_TEX_SAMP_LO, reg_gpuaddr_lo), + REG(SP_VS_TEX_SAMP_HI, reg_dump_tex_samp_hi), + REG(SP_HS_TEX_CONST_LO, reg_gpuaddr_lo), + REG(SP_HS_TEX_CONST_HI, reg_dump_tex_const_hi), + REG(SP_HS_TEX_SAMP_LO, reg_gpuaddr_lo), + REG(SP_HS_TEX_SAMP_HI, reg_dump_tex_samp_hi), + REG(SP_DS_TEX_CONST_LO, reg_gpuaddr_lo), + REG(SP_DS_TEX_CONST_HI, reg_dump_tex_const_hi), + REG(SP_DS_TEX_SAMP_LO, reg_gpuaddr_lo), + REG(SP_DS_TEX_SAMP_HI, reg_dump_tex_samp_hi), + REG(SP_GS_TEX_CONST_LO, reg_gpuaddr_lo), + REG(SP_GS_TEX_CONST_HI, reg_dump_tex_const_hi), + REG(SP_GS_TEX_SAMP_LO, reg_gpuaddr_lo), + REG(SP_GS_TEX_SAMP_HI, reg_dump_tex_samp_hi), + REG(SP_FS_TEX_CONST_LO, reg_gpuaddr_lo), + REG(SP_FS_TEX_CONST_HI, reg_dump_tex_const_hi), + REG(SP_FS_TEX_SAMP_LO, reg_gpuaddr_lo), + REG(SP_FS_TEX_SAMP_HI, reg_dump_tex_samp_hi), + REG(SP_CS_TEX_CONST_LO, reg_gpuaddr_lo), + REG(SP_CS_TEX_CONST_HI, reg_dump_tex_const_hi), + REG(SP_CS_TEX_SAMP_LO, reg_gpuaddr_lo), + REG(SP_CS_TEX_SAMP_HI, reg_dump_tex_samp_hi), + + {NULL}, +}, *type0_reg; + +static struct rnn *rnn; + +static void +init_rnn(const char *gpuname) +{ + rnn = rnn_new(!options->color); + + rnn_load(rnn, gpuname); + + if (options->querystrs) { + int i; + queryvals = calloc(options->nquery, sizeof(queryvals[0])); + + for (i = 0; i < options->nquery; i++) { + int val = strtol(options->querystrs[i], NULL, 0); + + if (val == 0) + val = regbase(options->querystrs[i]); + + queryvals[i] = val; + printf("querystr: %s -> 0x%x\n", options->querystrs[i], queryvals[i]); + } + } + + for (unsigned idx = 0; type0_reg[idx].regname; idx++) { + type0_reg[idx].regbase = regbase(type0_reg[idx].regname); + if (!type0_reg[idx].regbase) { + printf("invalid register name: %s\n", type0_reg[idx].regname); + exit(1); + } + } +} + +void +reset_regs(void) +{ + clear_written(); + clear_lastvals(); + memset(&ibs, 0, sizeof(ibs)); +} + +void +cffdec_init(const struct cffdec_options *_options) +{ + options = _options; + summary = options->summary; + + /* in case we're decoding multiple files: */ + free(queryvals); + reset_regs(); + draw_count = 0; + + /* TODO we need an API to free/cleanup any previous rnn */ + + switch (options->gpu_id) { + case 200 ... 299: + type0_reg = reg_a2xx; + init_rnn("a2xx"); + break; + case 300 ... 399: + type0_reg = reg_a3xx; + init_rnn("a3xx"); + break; + case 400 ... 499: + type0_reg = reg_a4xx; + init_rnn("a4xx"); + break; + case 500 ... 599: + type0_reg = reg_a5xx; + init_rnn("a5xx"); + break; + case 600 ... 699: + type0_reg = reg_a6xx; + init_rnn("a6xx"); + break; + default: + errx(-1, "unsupported gpu"); + } +} + +const char * +pktname(unsigned opc) +{ + return rnn_enumname(rnn, "adreno_pm4_type3_packets", opc); +} + +const char * +regname(uint32_t regbase, int color) +{ + return rnn_regname(rnn, regbase, color); +} + +uint32_t +regbase(const char *name) +{ + return rnn_regbase(rnn, name); +} + +static int +endswith(uint32_t regbase, const char *suffix) +{ + const char *name = regname(regbase, 0); + const char *s = strstr(name, suffix); + if (!s) + return 0; + return (s - strlen(name) + strlen(suffix)) == name; +} + +void +dump_register_val(uint32_t regbase, uint32_t dword, int level) +{ + struct rnndecaddrinfo *info = rnn_reginfo(rnn, regbase); + + if (info && info->typeinfo) { + uint64_t gpuaddr = 0; + char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, dword); + printf("%s%s: %s", levels[level], info->name, decoded); + + /* Try and figure out if we are looking at a gpuaddr.. this + * might be useful for other gen's too, but at least a5xx has + * the _HI/_LO suffix we can look for. Maybe a better approach + * would be some special annotation in the xml.. + */ + if (options->gpu_id >= 500) { + if (endswith(regbase, "_HI") && endswith(regbase-1, "_LO")) { + gpuaddr = (((uint64_t)dword) << 32) | reg_val(regbase-1); + } else if (endswith(regbase, "_LO") && endswith(regbase+1, "_HI")) { + gpuaddr = (((uint64_t)reg_val(regbase+1)) << 32) | dword; + } + } + + if (gpuaddr && hostptr(gpuaddr)) { + printf("\t\tbase=%lx, offset=%lu, size=%u", + gpubaseaddr(gpuaddr), + gpuaddr - gpubaseaddr(gpuaddr), + hostlen(gpubaseaddr(gpuaddr))); + } + + printf("\n"); + + free(decoded); + } else if (info) { + printf("%s%s: %08x\n", levels[level], info->name, dword); + } else { + printf("%s<%04x>: %08x\n", levels[level], regbase, dword); + } + + if (info) { + free(info->name); + free(info); + } +} + +static void +dump_register(uint32_t regbase, uint32_t dword, int level) +{ + if (!quiet(3)) { + dump_register_val(regbase, dword, level); + } + + for (unsigned idx = 0; type0_reg[idx].regname; idx++) { + if (type0_reg[idx].regbase == regbase) { + type0_reg[idx].fxn(type0_reg[idx].regname, dword, level); + break; + } + } +} + +static bool +is_banked_reg(uint32_t regbase) +{ + return (0x2000 <= regbase) && (regbase < 0x2400); +} + +static void +dump_registers(uint32_t regbase, uint32_t *dwords, uint32_t sizedwords, int level) +{ + while (sizedwords--) { + int last_summary = summary; + + /* access to non-banked registers needs a WFI: + * TODO banked register range for a2xx?? + */ + if (needs_wfi && !is_banked_reg(regbase)) + printl(2, "NEEDS WFI: %s (%x)\n", regname(regbase, 1), regbase); + + reg_set(regbase, *dwords); + dump_register(regbase, *dwords, level); + regbase++; + dwords++; + summary = last_summary; + } +} + +static void +dump_domain(uint32_t *dwords, uint32_t sizedwords, int level, + const char *name) +{ + struct rnndomain *dom; + int i; + + dom = rnn_finddomain(rnn->db, name); + + if (!dom) + return; + + if (script_packet) + script_packet(dwords, sizedwords, rnn, dom); + + if (quiet(2)) + return; + + for (i = 0; i < sizedwords; i++) { + struct rnndecaddrinfo *info = rnndec_decodeaddr(rnn->vc, dom, i, 0); + char *decoded; + if (!(info && info->typeinfo)) + break; + uint64_t value = dwords[i]; + if (info->typeinfo->high >= 32 && i < sizedwords - 1) { + value |= (uint64_t) dwords[i + 1] << 32; + i++; /* skip the next dword since we're printing it now */ + } + decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value); + /* Unlike the register printing path, we don't print the name + * of the register, so if it doesn't contain other named + * things (i.e. it isn't a bitset) then print the register + * name as if it's a bitset with a single entry. This avoids + * having to create a dummy register with a single entry to + * get a name in the decoding. + */ + if (info->typeinfo->type == RNN_TTYPE_BITSET || + info->typeinfo->type == RNN_TTYPE_INLINE_BITSET) { + printf("%s%s\n", levels[level], decoded); + } else { + printf("%s{ %s%s%s = %s }\n", levels[level], + rnn->vc->colors->rname, info->name, + rnn->vc->colors->reset, decoded); + } + free(decoded); + free(info->name); + free(info); + } +} + + +static uint32_t bin_x1, bin_x2, bin_y1, bin_y2; +static unsigned mode; +static const char *render_mode; +static enum { + MODE_BINNING = 0x1, + MODE_GMEM = 0x2, + MODE_BYPASS = 0x4, + MODE_ALL = MODE_BINNING | MODE_GMEM | MODE_BYPASS, +} enable_mask = MODE_ALL; +static bool skip_ib2_enable_global; +static bool skip_ib2_enable_local; + +static void +print_mode(int level) +{ + if ((options->gpu_id >= 500) && !quiet(2)) { + printf("%smode: %s\n", levels[level], render_mode); + printf("%sskip_ib2: g=%d, l=%d\n", levels[level], skip_ib2_enable_global, skip_ib2_enable_local); + } +} + +static bool +skip_query(void) +{ + switch (options->query_mode) { + case QUERY_ALL: + /* never skip: */ + return false; + case QUERY_WRITTEN: + for (int i = 0; i < options->nquery; i++) { + uint32_t regbase = queryvals[i]; + if (!reg_written(regbase)) { + continue; + } + if (reg_rewritten(regbase)) { + return false; + } + } + return true; + case QUERY_DELTA: + for (int i = 0; i < options->nquery; i++) { + uint32_t regbase = queryvals[i]; + if (!reg_written(regbase)) { + continue; + } + uint32_t lastval = reg_val(regbase); + if (lastval != lastvals[regbase]) { + return false; + } + } + return true; + } + return true; +} + +static void +__do_query(const char *primtype, uint32_t num_indices) +{ + int n = 0; + + if ((500 <= options->gpu_id) && (options->gpu_id < 700)) { + uint32_t scissor_tl = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_TL")); + uint32_t scissor_br = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_BR")); + + bin_x1 = scissor_tl & 0xffff; + bin_y1 = scissor_tl >> 16; + bin_x2 = scissor_br & 0xffff; + bin_y2 = scissor_br >> 16; + } + + for (int i = 0; i < options->nquery; i++) { + uint32_t regbase = queryvals[i]; + if (reg_written(regbase)) { + uint32_t lastval = reg_val(regbase); + printf("%4d: %s(%u,%u-%u,%u):%u:", draw_count, primtype, + bin_x1, bin_y1, bin_x2, bin_y2, num_indices); + if (options->gpu_id >= 500) + printf("%s:", render_mode); + printf("\t%08x", lastval); + if (lastval != lastvals[regbase]) { + printf("!"); + } else { + printf(" "); + } + if (reg_rewritten(regbase)) { + printf("+"); + } else { + printf(" "); + } + dump_register_val(regbase, lastval, 0); + n++; + } + } + + if (n > 1) + printf("\n"); +} + +static void +do_query_compare(const char *primtype, uint32_t num_indices) +{ + unsigned saved_enable_mask = enable_mask; + const char *saved_render_mode = render_mode; + + /* in 'query-compare' mode, we want to see if the register is writtten + * or changed in any mode: + * + * (NOTE: this could cause false-positive for 'query-delta' if the reg + * is written with different values in binning vs sysmem/gmem mode, as + * we don't track previous values per-mode, but I think we can live with + * that) + */ + enable_mask = MODE_ALL; + + clear_rewritten(); + load_all_groups(0); + + if (!skip_query()) { + /* dump binning pass values: */ + enable_mask = MODE_BINNING; + render_mode = "BINNING"; + clear_rewritten(); + load_all_groups(0); + __do_query(primtype, num_indices); + + /* dump draw pass values: */ + enable_mask = MODE_GMEM | MODE_BYPASS; + render_mode = "DRAW"; + clear_rewritten(); + load_all_groups(0); + __do_query(primtype, num_indices); + + printf("\n"); + } + + enable_mask = saved_enable_mask; + render_mode = saved_render_mode; + + disable_all_groups(); +} + +/* well, actually query and script.. + * NOTE: call this before dump_register_summary() + */ +static void +do_query(const char *primtype, uint32_t num_indices) +{ + if (script_draw) + script_draw(primtype, num_indices); + + if (options->query_compare) { + do_query_compare(primtype, num_indices); + return; + } + + if (skip_query()) + return; + + __do_query(primtype, num_indices); +} + +static void +cp_im_loadi(uint32_t *dwords, uint32_t sizedwords, int level) +{ + uint32_t start = dwords[1] >> 16; + uint32_t size = dwords[1] & 0xffff; + const char *type = NULL, *ext = NULL; + enum shader_t disasm_type; + + switch (dwords[0]) { + case 0: + type = "vertex"; + ext = "vo"; + disasm_type = SHADER_VERTEX; + break; + case 1: + type = "fragment"; + ext = "fo"; + disasm_type = SHADER_FRAGMENT; + break; + default: + type = ""; + disasm_type = 0; + break; + } + + printf("%s%s shader, start=%04x, size=%04x\n", levels[level], type, start, size); + disasm_a2xx(dwords + 2, sizedwords - 2, level+2, disasm_type); + + /* dump raw shader: */ + if (ext) + dump_shader(ext, dwords + 2, (sizedwords - 2) * 4); +} + +static void +cp_wide_reg_write(uint32_t *dwords, uint32_t sizedwords, int level) +{ + uint32_t reg = dwords[0] & 0xffff; + int i; + for (i = 1; i < sizedwords; i++) { + dump_register(reg, dwords[i], level+1); + reg_set(reg, dwords[i]); + reg++; + } +} + +enum state_t { + TEX_SAMP = 1, + TEX_CONST, + TEX_MIPADDR, /* a3xx only */ + SHADER_PROG, + SHADER_CONST, + + // image/ssbo state: + SSBO_0, + SSBO_1, + SSBO_2, + + UBO, + + // unknown things, just to hexdumps: + UNKNOWN_DWORDS, + UNKNOWN_2DWORDS, + UNKNOWN_4DWORDS, +}; + +enum adreno_state_block { + SB_VERT_TEX = 0, + SB_VERT_MIPADDR = 1, + SB_FRAG_TEX = 2, + SB_FRAG_MIPADDR = 3, + SB_VERT_SHADER = 4, + SB_GEOM_SHADER = 5, + SB_FRAG_SHADER = 6, + SB_COMPUTE_SHADER = 7, +}; + +/* TODO there is probably a clever way to let rnndec parse things so + * we don't have to care about packet format differences across gens + */ + +static void +a3xx_get_state_type(uint32_t *dwords, enum shader_t *stage, enum state_t *state, + enum state_src_t *src) +{ + unsigned state_block_id = (dwords[0] >> 19) & 0x7; + unsigned state_type = dwords[1] & 0x3; + static const struct { + enum shader_t stage; + enum state_t state; + } lookup[0xf][0x3] = { + [SB_VERT_TEX][0] = { SHADER_VERTEX, TEX_SAMP }, + [SB_VERT_TEX][1] = { SHADER_VERTEX, TEX_CONST }, + [SB_FRAG_TEX][0] = { SHADER_FRAGMENT, TEX_SAMP }, + [SB_FRAG_TEX][1] = { SHADER_FRAGMENT, TEX_CONST }, + [SB_VERT_SHADER][0] = { SHADER_VERTEX, SHADER_PROG }, + [SB_VERT_SHADER][1] = { SHADER_VERTEX, SHADER_CONST }, + [SB_FRAG_SHADER][0] = { SHADER_FRAGMENT, SHADER_PROG }, + [SB_FRAG_SHADER][1] = { SHADER_FRAGMENT, SHADER_CONST }, + }; + + *stage = lookup[state_block_id][state_type].stage; + *state = lookup[state_block_id][state_type].state; + unsigned state_src = (dwords[0] >> 16) & 0x7; + if (state_src == 0 /* SS_DIRECT */) + *src = STATE_SRC_DIRECT; + else + *src = STATE_SRC_INDIRECT; +} + +static enum state_src_t +_get_state_src(unsigned dword0) +{ + switch ((dword0 >> 16) & 0x3) { + case 0: /* SS4_DIRECT / SS6_DIRECT */ + return STATE_SRC_DIRECT; + case 2: /* SS4_INDIRECT / SS6_INDIRECT */ + return STATE_SRC_INDIRECT; + case 1: /* SS6_BINDLESS */ + return STATE_SRC_BINDLESS; + default: + return STATE_SRC_DIRECT; + } +} + +static void +_get_state_type(unsigned state_block_id, unsigned state_type, + enum shader_t *stage, enum state_t *state) +{ + static const struct { + enum shader_t stage; + enum state_t state; + } lookup[0x10][0x4] = { + // SB4_VS_TEX: + [0x0][0] = { SHADER_VERTEX, TEX_SAMP }, + [0x0][1] = { SHADER_VERTEX, TEX_CONST }, + [0x0][2] = { SHADER_VERTEX, UBO }, + // SB4_HS_TEX: + [0x1][0] = { SHADER_TCS, TEX_SAMP }, + [0x1][1] = { SHADER_TCS, TEX_CONST }, + [0x1][2] = { SHADER_TCS, UBO }, + // SB4_DS_TEX: + [0x2][0] = { SHADER_TES, TEX_SAMP }, + [0x2][1] = { SHADER_TES, TEX_CONST }, + [0x2][2] = { SHADER_TES, UBO }, + // SB4_GS_TEX: + [0x3][0] = { SHADER_GEOM, TEX_SAMP }, + [0x3][1] = { SHADER_GEOM, TEX_CONST }, + [0x3][2] = { SHADER_GEOM, UBO }, + // SB4_FS_TEX: + [0x4][0] = { SHADER_FRAGMENT, TEX_SAMP }, + [0x4][1] = { SHADER_FRAGMENT, TEX_CONST }, + [0x4][2] = { SHADER_FRAGMENT, UBO }, + // SB4_CS_TEX: + [0x5][0] = { SHADER_COMPUTE, TEX_SAMP }, + [0x5][1] = { SHADER_COMPUTE, TEX_CONST }, + [0x5][2] = { SHADER_COMPUTE, UBO }, + // SB4_VS_SHADER: + [0x8][0] = { SHADER_VERTEX, SHADER_PROG }, + [0x8][1] = { SHADER_VERTEX, SHADER_CONST }, + [0x8][2] = { SHADER_VERTEX, UBO }, + // SB4_HS_SHADER + [0x9][0] = { SHADER_TCS, SHADER_PROG }, + [0x9][1] = { SHADER_TCS, SHADER_CONST }, + [0x9][2] = { SHADER_TCS, UBO }, + // SB4_DS_SHADER + [0xa][0] = { SHADER_TES, SHADER_PROG }, + [0xa][1] = { SHADER_TES, SHADER_CONST }, + [0xa][2] = { SHADER_TES, UBO }, + // SB4_GS_SHADER + [0xb][0] = { SHADER_GEOM, SHADER_PROG }, + [0xb][1] = { SHADER_GEOM, SHADER_CONST }, + [0xb][2] = { SHADER_GEOM, UBO }, + // SB4_FS_SHADER: + [0xc][0] = { SHADER_FRAGMENT, SHADER_PROG }, + [0xc][1] = { SHADER_FRAGMENT, SHADER_CONST }, + [0xc][2] = { SHADER_FRAGMENT, UBO }, + // SB4_CS_SHADER: + [0xd][0] = { SHADER_COMPUTE, SHADER_PROG }, + [0xd][1] = { SHADER_COMPUTE, SHADER_CONST }, + [0xd][2] = { SHADER_COMPUTE, UBO }, + [0xd][3] = { SHADER_COMPUTE, SSBO_0 }, /* a6xx location */ + // SB4_SSBO (shared across all stages) + [0xe][0] = { 0, SSBO_0 }, /* a5xx (and a4xx?) location */ + [0xe][1] = { 0, SSBO_1 }, + [0xe][2] = { 0, SSBO_2 }, + // SB4_CS_SSBO + [0xf][0] = { SHADER_COMPUTE, SSBO_0 }, + [0xf][1] = { SHADER_COMPUTE, SSBO_1 }, + [0xf][2] = { SHADER_COMPUTE, SSBO_2 }, + // unknown things + /* This looks like combined UBO state for 3d stages (a5xx and + * before?? I think a6xx has UBO state per shader stage: + */ + [0x6][2] = { 0, UBO }, + [0x7][1] = { 0, UNKNOWN_2DWORDS }, + }; + + *stage = lookup[state_block_id][state_type].stage; + *state = lookup[state_block_id][state_type].state; +} + +static void +a4xx_get_state_type(uint32_t *dwords, enum shader_t *stage, enum state_t *state, + enum state_src_t *src) +{ + unsigned state_block_id = (dwords[0] >> 18) & 0xf; + unsigned state_type = dwords[1] & 0x3; + _get_state_type(state_block_id, state_type, stage, state); + *src = _get_state_src(dwords[0]); +} + +static void +a6xx_get_state_type(uint32_t *dwords, enum shader_t *stage, enum state_t *state, + enum state_src_t *src) +{ + unsigned state_block_id = (dwords[0] >> 18) & 0xf; + unsigned state_type = (dwords[0] >> 14) & 0x3; + _get_state_type(state_block_id, state_type, stage, state); + *src = _get_state_src(dwords[0]); +} + +static void +dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, int level) +{ + for (int i = 0; i < num_unit; i++) { + /* work-around to reduce noise for opencl blob which always + * writes the max # regardless of # of textures used + */ + if ((num_unit == 16) && (texsamp[0] == 0) && (texsamp[1] == 0)) + break; + + if ((300 <= options->gpu_id) && (options->gpu_id < 400)) { + dump_domain(texsamp, 2, level+2, "A3XX_TEX_SAMP"); + dump_hex(texsamp, 2, level+1); + texsamp += 2; + } else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) { + dump_domain(texsamp, 2, level+2, "A4XX_TEX_SAMP"); + dump_hex(texsamp, 2, level+1); + texsamp += 2; + } else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) { + dump_domain(texsamp, 4, level+2, "A5XX_TEX_SAMP"); + dump_hex(texsamp, 4, level+1); + texsamp += 4; + } else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) { + dump_domain(texsamp, 4, level+2, "A6XX_TEX_SAMP"); + dump_hex(texsamp, 4, level+1); + texsamp += src == STATE_SRC_BINDLESS ? 16 : 4; + } + } +} + +static void +dump_tex_const(uint32_t *texconst, int num_unit, int level) +{ + for (int i = 0; i < num_unit; i++) { + /* work-around to reduce noise for opencl blob which always + * writes the max # regardless of # of textures used + */ + if ((num_unit == 16) && + (texconst[0] == 0) && (texconst[1] == 0) && + (texconst[2] == 0) && (texconst[3] == 0)) + break; + + if ((300 <= options->gpu_id) && (options->gpu_id < 400)) { + dump_domain(texconst, 4, level+2, "A3XX_TEX_CONST"); + dump_hex(texconst, 4, level+1); + texconst += 4; + } else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) { + dump_domain(texconst, 8, level+2, "A4XX_TEX_CONST"); + if (options->dump_textures) { + uint32_t addr = texconst[4] & ~0x1f; + dump_gpuaddr(addr, level-2); + } + dump_hex(texconst, 8, level+1); + texconst += 8; + } else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) { + dump_domain(texconst, 12, level+2, "A5XX_TEX_CONST"); + if (options->dump_textures) { + uint64_t addr = (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4]; + dump_gpuaddr_size(addr, level-2, hostlen(addr) / 4, 3); + } + dump_hex(texconst, 12, level+1); + texconst += 12; + } else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) { + dump_domain(texconst, 16, level+2, "A6XX_TEX_CONST"); + if (options->dump_textures) { + uint64_t addr = (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4]; + dump_gpuaddr_size(addr, level-2, hostlen(addr) / 4, 3); + } + dump_hex(texconst, 16, level+1); + texconst += 16; + } + } +} + +static void +cp_load_state(uint32_t *dwords, uint32_t sizedwords, int level) +{ + enum shader_t stage; + enum state_t state; + enum state_src_t src; + uint32_t num_unit = (dwords[0] >> 22) & 0x1ff; + uint64_t ext_src_addr; + void *contents; + int i; + + if (quiet(2) && !options->script) + return; + + if (options->gpu_id >= 600) + a6xx_get_state_type(dwords, &stage, &state, &src); + else if (options->gpu_id >= 400) + a4xx_get_state_type(dwords, &stage, &state, &src); + else + a3xx_get_state_type(dwords, &stage, &state, &src); + + switch (src) { + case STATE_SRC_DIRECT: ext_src_addr = 0; break; + case STATE_SRC_INDIRECT: + if (is_64b()) { + ext_src_addr = dwords[1] & 0xfffffffc; + ext_src_addr |= ((uint64_t)dwords[2]) << 32; + } else { + ext_src_addr = dwords[1] & 0xfffffffc; + } + + break; + case STATE_SRC_BINDLESS: { + const unsigned base_reg = + stage == SHADER_COMPUTE ? regbase("HLSQ_CS_BINDLESS_BASE[0]") : regbase("HLSQ_BINDLESS_BASE[0]"); + + if (is_64b()) { + const unsigned reg = base_reg + (dwords[1] >> 28) * 2; + ext_src_addr = reg_val(reg) & 0xfffffffc; + ext_src_addr |= ((uint64_t)reg_val(reg + 1)) << 32; + } else { + const unsigned reg = base_reg + (dwords[1] >> 28); + ext_src_addr = reg_val(reg) & 0xfffffffc; + } + + ext_src_addr += 4 * (dwords[1] & 0xffffff); + break; + } + } + + if (ext_src_addr) + contents = hostptr(ext_src_addr); + else + contents = is_64b() ? dwords + 3 : dwords + 2; + + if (!contents) + return; + + switch (state) { + case SHADER_PROG: { + const char *ext = NULL; + + if (quiet(2)) + return; + + if (options->gpu_id >= 400) + num_unit *= 16; + else if (options->gpu_id >= 300) + num_unit *= 4; + + /* shaders: + * + * note: num_unit seems to be # of instruction groups, where + * an instruction group has 4 64bit instructions. + */ + if (stage == SHADER_VERTEX) { + ext = "vo3"; + } else if (stage == SHADER_GEOM) { + ext = "go3"; + } else if (stage == SHADER_COMPUTE) { + ext = "co3"; + } else if (stage == SHADER_FRAGMENT){ + ext = "fo3"; + } + + if (contents) + disasm_a3xx(contents, num_unit * 2, level+2, stdout, options->gpu_id); + + /* dump raw shader: */ + if (ext) + dump_shader(ext, contents, num_unit * 2 * 4); + + break; + } + case SHADER_CONST: { + if (quiet(2)) + return; + + /* uniforms/consts: + * + * note: num_unit seems to be # of pairs of dwords?? + */ + + if (options->gpu_id >= 400) + num_unit *= 2; + + dump_float(contents, num_unit*2, level+1); + dump_hex(contents, num_unit*2, level+1); + + break; + } + case TEX_MIPADDR: { + uint32_t *addrs = contents; + + if (quiet(2)) + return; + + /* mipmap consts block just appears to be array of num_unit gpu addr's: */ + for (i = 0; i < num_unit; i++) { + void *ptr = hostptr(addrs[i]); + printf("%s%2d: %08x\n", levels[level+1], i, addrs[i]); + if (options->dump_textures) { + printf("base=%08x\n", (uint32_t)gpubaseaddr(addrs[i])); + dump_hex(ptr, hostlen(addrs[i])/4, level+1); + } + } + break; + } + case TEX_SAMP: { + dump_tex_samp(contents, src, num_unit, level); + break; + } + case TEX_CONST: { + dump_tex_const(contents, num_unit, level); + break; + } + case SSBO_0: { + uint32_t *ssboconst = (uint32_t *)contents; + + for (i = 0; i < num_unit; i++) { + int sz = 4; + if (400 <= options->gpu_id && options->gpu_id < 500) { + dump_domain(ssboconst, 4, level+2, "A4XX_SSBO_0"); + } else if (500 <= options->gpu_id && options->gpu_id < 600) { + dump_domain(ssboconst, 4, level+2, "A5XX_SSBO_0"); + } else if (600 <= options->gpu_id && options->gpu_id < 700) { + sz = 16; + dump_domain(ssboconst, 16, level+2, "A6XX_IBO"); + } + dump_hex(ssboconst, sz, level+1); + ssboconst += sz; + } + break; + } + case SSBO_1: { + uint32_t *ssboconst = (uint32_t *)contents; + + for (i = 0; i < num_unit; i++) { + if (400 <= options->gpu_id && options->gpu_id < 500) + dump_domain(ssboconst, 2, level+2, "A4XX_SSBO_1"); + else if (500 <= options->gpu_id && options->gpu_id < 600) + dump_domain(ssboconst, 2, level+2, "A5XX_SSBO_1"); + dump_hex(ssboconst, 2, level+1); + ssboconst += 2; + } + break; + } + case SSBO_2: { + uint32_t *ssboconst = (uint32_t *)contents; + + for (i = 0; i < num_unit; i++) { + /* TODO a4xx and a5xx might be same: */ + if ((500 <= options->gpu_id) && (options->gpu_id < 600)) { + dump_domain(ssboconst, 2, level+2, "A5XX_SSBO_2"); + dump_hex(ssboconst, 2, level+1); + } + if (options->dump_textures) { + uint64_t addr = (((uint64_t)ssboconst[1] & 0x1ffff) << 32) | ssboconst[0]; + dump_gpuaddr_size(addr, level-2, hostlen(addr) / 4, 3); + } + ssboconst += 2; + } + break; + } + case UBO: { + uint32_t *uboconst = (uint32_t *)contents; + + for (i = 0; i < num_unit; i++) { + // TODO probably similar on a4xx.. + if (500 <= options->gpu_id && options->gpu_id < 600) + dump_domain(uboconst, 2, level+2, "A5XX_UBO"); + else if (600 <= options->gpu_id && options->gpu_id < 700) + dump_domain(uboconst, 2, level+2, "A6XX_UBO"); + dump_hex(uboconst, 2, level+1); + uboconst += src == STATE_SRC_BINDLESS ? 16 : 2; + } + break; + } + case UNKNOWN_DWORDS: { + if (quiet(2)) + return; + dump_hex(contents, num_unit, level+1); + break; + } + case UNKNOWN_2DWORDS: { + if (quiet(2)) + return; + dump_hex(contents, num_unit * 2, level+1); + break; + } + case UNKNOWN_4DWORDS: { + if (quiet(2)) + return; + dump_hex(contents, num_unit * 4, level+1); + break; + } + default: + if (quiet(2)) + return; + /* hmm.. */ + dump_hex(contents, num_unit, level+1); + break; + } +} + +static void +cp_set_bin(uint32_t *dwords, uint32_t sizedwords, int level) +{ + bin_x1 = dwords[1] & 0xffff; + bin_y1 = dwords[1] >> 16; + bin_x2 = dwords[2] & 0xffff; + bin_y2 = dwords[2] >> 16; +} + +static void +dump_a2xx_tex_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val, int level) +{ + uint32_t w, h, p; + uint32_t gpuaddr, flags, mip_gpuaddr, mip_flags; + uint32_t min, mag, swiz, clamp_x, clamp_y, clamp_z; + static const char *filter[] = { + "point", "bilinear", "bicubic", + }; + static const char *clamp[] = { + "wrap", "mirror", "clamp-last-texel", + }; + static const char swiznames[] = "xyzw01??"; + + /* see sys2gmem_tex_const[] in adreno_a2xxx.c */ + + /* Texture, FormatXYZW=Unsigned, ClampXYZ=Wrap/Repeat, + * RFMode=ZeroClamp-1, Dim=1:2d, pitch + */ + p = (dwords[0] >> 22) << 5; + clamp_x = (dwords[0] >> 10) & 0x3; + clamp_y = (dwords[0] >> 13) & 0x3; + clamp_z = (dwords[0] >> 16) & 0x3; + + /* Format=6:8888_WZYX, EndianSwap=0:None, ReqSize=0:256bit, DimHi=0, + * NearestClamp=1:OGL Mode + */ + parse_dword_addr(dwords[1], &gpuaddr, &flags, 0xfff); + + /* Width, Height, EndianSwap=0:None */ + w = (dwords[2] & 0x1fff) + 1; + h = ((dwords[2] >> 13) & 0x1fff) + 1; + + /* NumFormat=0:RF, DstSelXYZW=XYZW, ExpAdj=0, MagFilt=MinFilt=0:Point, + * Mip=2:BaseMap + */ + mag = (dwords[3] >> 19) & 0x3; + min = (dwords[3] >> 21) & 0x3; + swiz = (dwords[3] >> 1) & 0xfff; + + /* VolMag=VolMin=0:Point, MinMipLvl=0, MaxMipLvl=1, LodBiasH=V=0, + * Dim3d=0 + */ + // XXX + + /* BorderColor=0:ABGRBlack, ForceBC=0:diable, TriJuice=0, Aniso=0, + * Dim=1:2d, MipPacking=0 + */ + parse_dword_addr(dwords[5], &mip_gpuaddr, &mip_flags, 0xfff); + + printf("%sset texture const %04x\n", levels[level], val); + printf("%sclamp x/y/z: %s/%s/%s\n", levels[level+1], + clamp[clamp_x], clamp[clamp_y], clamp[clamp_z]); + printf("%sfilter min/mag: %s/%s\n", levels[level+1], filter[min], filter[mag]); + printf("%sswizzle: %c%c%c%c\n", levels[level+1], + swiznames[(swiz >> 0) & 0x7], swiznames[(swiz >> 3) & 0x7], + swiznames[(swiz >> 6) & 0x7], swiznames[(swiz >> 9) & 0x7]); + printf("%saddr=%08x (flags=%03x), size=%dx%d, pitch=%d, format=%s\n", + levels[level+1], gpuaddr, flags, w, h, p, + rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf)); + printf("%smipaddr=%08x (flags=%03x)\n", levels[level+1], + mip_gpuaddr, mip_flags); +} + +static void +dump_a2xx_shader_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val, int level) +{ + int i; + printf("%sset shader const %04x\n", levels[level], val); + for (i = 0; i < sizedwords; ) { + uint32_t gpuaddr, flags; + parse_dword_addr(dwords[i++], &gpuaddr, &flags, 0xf); + void *addr = hostptr(gpuaddr); + if (addr) { + const char * fmt = + rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf); + uint32_t size = dwords[i++]; + printf("%saddr=%08x, size=%d, format=%s\n", levels[level+1], + gpuaddr, size, fmt); + // TODO maybe dump these as bytes instead of dwords? + size = (size + 3) / 4; // for now convert to dwords + dump_hex(addr, min(size, 64), level + 1); + if (size > min(size, 64)) + printf("%s\t\t...\n", levels[level+1]); + dump_float(addr, min(size, 64), level + 1); + if (size > min(size, 64)) + printf("%s\t\t...\n", levels[level+1]); + } + } +} + +static void +cp_set_const(uint32_t *dwords, uint32_t sizedwords, int level) +{ + uint32_t val = dwords[0] & 0xffff; + switch((dwords[0] >> 16) & 0xf) { + case 0x0: + dump_float((float *)(dwords+1), sizedwords-1, level+1); + break; + case 0x1: + /* need to figure out how const space is partitioned between + * attributes, textures, etc.. + */ + if (val < 0x78) { + dump_a2xx_tex_const(dwords+1, sizedwords-1, val, level); + } else { + dump_a2xx_shader_const(dwords+1, sizedwords-1, val, level); + } + break; + case 0x2: + printf("%sset bool const %04x\n", levels[level], val); + break; + case 0x3: + printf("%sset loop const %04x\n", levels[level], val); + break; + case 0x4: + val += 0x2000; + if (dwords[0] & 0x80000000) { + uint32_t srcreg = dwords[1]; + uint32_t dstval = dwords[2]; + + /* TODO: not sure what happens w/ payload != 2.. */ + assert(sizedwords == 3); + assert(srcreg < ARRAY_SIZE(type0_reg_vals)); + + /* note: rnn_regname uses a static buf so we can't do + * two regname() calls for one printf.. + */ + printf("%s%s = %08x + ", levels[level], regname(val, 1), dstval); + printf("%s (%08x)\n", regname(srcreg, 1), type0_reg_vals[srcreg]); + + dstval += type0_reg_vals[srcreg]; + + dump_registers(val, &dstval, 1, level+1); + } else { + dump_registers(val, dwords+1, sizedwords-1, level+1); + } + break; + } +} + +static void dump_register_summary(int level); + +static void +cp_event_write(uint32_t *dwords, uint32_t sizedwords, int level) +{ + const char *name = rnn_enumname(rnn, "vgt_event_type", dwords[0]); + printl(2, "%sevent %s\n", levels[level], name); + + if (name && (options->gpu_id > 500)) { + char eventname[64]; + snprintf(eventname, sizeof(eventname), "EVENT:%s", name); + if (!strcmp(name, "BLIT")) { + do_query(eventname, 0); + print_mode(level); + dump_register_summary(level); + } + } +} + +static void +dump_register_summary(int level) +{ + uint32_t i; + bool saved_summary = summary; + summary = false; + + in_summary = true; + + /* dump current state of registers: */ + printl(2, "%sdraw[%i] register values\n", levels[level], draw_count); + for (i = 0; i < regcnt(); i++) { + uint32_t regbase = i; + uint32_t lastval = reg_val(regbase); + /* skip registers that haven't been updated since last draw/blit: */ + if (!(options->allregs || reg_rewritten(regbase))) + continue; + if (!reg_written(regbase)) + continue; + if (lastval != lastvals[regbase]) { + printl(2, "!"); + lastvals[regbase] = lastval; + } else { + printl(2, " "); + } + if (reg_rewritten(regbase)) { + printl(2, "+"); + } else { + printl(2, " "); + } + printl(2, "\t%08x", lastval); + if (!quiet(2)) { + dump_register(regbase, lastval, level); + } + } + + clear_rewritten(); + + in_summary = false; + + draw_count++; + summary = saved_summary; +} + +static uint32_t +draw_indx_common(uint32_t *dwords, int level) +{ + uint32_t prim_type = dwords[1] & 0x1f; + uint32_t source_select = (dwords[1] >> 6) & 0x3; + uint32_t num_indices = dwords[2]; + const char *primtype; + + primtype = rnn_enumname(rnn, "pc_di_primtype", prim_type); + + do_query(primtype, num_indices); + + printl(2, "%sdraw: %d\n", levels[level], draws[ib]); + printl(2, "%sprim_type: %s (%d)\n", levels[level], primtype, + prim_type); + printl(2, "%ssource_select: %s (%d)\n", levels[level], + rnn_enumname(rnn, "pc_di_src_sel", source_select), + source_select); + printl(2, "%snum_indices: %d\n", levels[level], num_indices); + + vertices += num_indices; + + draws[ib]++; + + return num_indices; +} + +enum pc_di_index_size { + INDEX_SIZE_IGN = 0, + INDEX_SIZE_16_BIT = 0, + INDEX_SIZE_32_BIT = 1, + INDEX_SIZE_8_BIT = 2, + INDEX_SIZE_INVALID = 0, +}; + +static void +cp_draw_indx(uint32_t *dwords, uint32_t sizedwords, int level) +{ + uint32_t num_indices = draw_indx_common(dwords, level); + + assert(!is_64b()); + + /* if we have an index buffer, dump that: */ + if (sizedwords == 5) { + void *ptr = hostptr(dwords[3]); + printl(2, "%sgpuaddr: %08x\n", levels[level], dwords[3]); + printl(2, "%sidx_size: %d\n", levels[level], dwords[4]); + if (ptr) { + enum pc_di_index_size size = + ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2); + if (!quiet(2)) { + int i; + printf("%sidxs: ", levels[level]); + if (size == INDEX_SIZE_8_BIT) { + uint8_t *idx = ptr; + for (i = 0; i < dwords[4]; i++) + printf(" %u", idx[i]); + } else if (size == INDEX_SIZE_16_BIT) { + uint16_t *idx = ptr; + for (i = 0; i < dwords[4]/2; i++) + printf(" %u", idx[i]); + } else if (size == INDEX_SIZE_32_BIT) { + uint32_t *idx = ptr; + for (i = 0; i < dwords[4]/4; i++) + printf(" %u", idx[i]); + } + printf("\n"); + dump_hex(ptr, dwords[4]/4, level+1); + } + } + } + + /* don't bother dumping registers for the dummy draw_indx's.. */ + if (num_indices > 0) + dump_register_summary(level); + + needs_wfi = true; +} + +static void +cp_draw_indx_2(uint32_t *dwords, uint32_t sizedwords, int level) +{ + uint32_t num_indices = draw_indx_common(dwords, level); + enum pc_di_index_size size = + ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2); + void *ptr = &dwords[3]; + int sz = 0; + + assert(!is_64b()); + + /* CP_DRAW_INDX_2 has embedded/inline idx buffer: */ + if (!quiet(2)) { + int i; + printf("%sidxs: ", levels[level]); + if (size == INDEX_SIZE_8_BIT) { + uint8_t *idx = ptr; + for (i = 0; i < num_indices; i++) + printf(" %u", idx[i]); + sz = num_indices; + } else if (size == INDEX_SIZE_16_BIT) { + uint16_t *idx = ptr; + for (i = 0; i < num_indices; i++) + printf(" %u", idx[i]); + sz = num_indices * 2; + } else if (size == INDEX_SIZE_32_BIT) { + uint32_t *idx = ptr; + for (i = 0; i < num_indices; i++) + printf(" %u", idx[i]); + sz = num_indices * 4; + } + printf("\n"); + dump_hex(ptr, sz / 4, level+1); + } + + /* don't bother dumping registers for the dummy draw_indx's.. */ + if (num_indices > 0) + dump_register_summary(level); +} + +static void +cp_draw_indx_offset(uint32_t *dwords, uint32_t sizedwords, int level) +{ + uint32_t num_indices = dwords[2]; + uint32_t prim_type = dwords[0] & 0x1f; + + do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), num_indices); + print_mode(level); + + /* don't bother dumping registers for the dummy draw_indx's.. */ + if (num_indices > 0) + dump_register_summary(level); +} + +static void +cp_draw_indx_indirect(uint32_t *dwords, uint32_t sizedwords, int level) +{ + uint32_t prim_type = dwords[0] & 0x1f; + uint64_t addr; + + do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0); + print_mode(level); + + if (is_64b()) + addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1]; + else + addr = dwords[1]; + dump_gpuaddr_size(addr, level, 0x10, 2); + + if (is_64b()) + addr = (((uint64_t)dwords[5] & 0x1ffff) << 32) | dwords[4]; + else + addr = dwords[3]; + dump_gpuaddr_size(addr, level, 0x10, 2); + + dump_register_summary(level); +} + +static void +cp_draw_indirect(uint32_t *dwords, uint32_t sizedwords, int level) +{ + uint32_t prim_type = dwords[0] & 0x1f; + uint64_t addr; + + do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0); + print_mode(level); + + addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1]; + dump_gpuaddr_size(addr, level, 0x10, 2); + + dump_register_summary(level); +} + +static void +cp_run_cl(uint32_t *dwords, uint32_t sizedwords, int level) +{ + do_query("COMPUTE", 1); + dump_register_summary(level); +} + +static void +cp_nop(uint32_t *dwords, uint32_t sizedwords, int level) +{ + const char *buf = (void *)dwords; + int i; + + if (quiet(3)) + return; + + // blob doesn't use CP_NOP for string_marker but it does + // use it for things that end up looking like, but aren't + // ascii chars: + if (!options->decode_markers) + return; + + for (i = 0; i < 4 * sizedwords; i++) { + if (buf[i] == '\0') + break; + if (isascii(buf[i])) + printf("%c", buf[i]); + } + printf("\n"); +} + +static void +cp_indirect(uint32_t *dwords, uint32_t sizedwords, int level) +{ + /* traverse indirect buffers */ + uint64_t ibaddr; + uint32_t ibsize; + uint32_t *ptr = NULL; + + if (is_64b()) { + /* a5xx+.. high 32b of gpu addr, then size: */ + ibaddr = dwords[0]; + ibaddr |= ((uint64_t)dwords[1]) << 32; + ibsize = dwords[2]; + } else { + ibaddr = dwords[0]; + ibsize = dwords[1]; + } + + if (!quiet(3)) { + if (is_64b()) { + printf("%sibaddr:%016lx\n", levels[level], ibaddr); + } else { + printf("%sibaddr:%08x\n", levels[level], (uint32_t)ibaddr); + } + printf("%sibsize:%08x\n", levels[level], ibsize); + } + + if (options->once && has_dumped(ibaddr, enable_mask)) + return; + + /* 'query-compare' mode implies 'once' mode, although we need only to + * process the cmdstream for *any* enable_mask mode, since we are + * comparing binning vs draw reg values at the same time, ie. it is + * not useful to process the same draw in both binning and draw pass. + */ + if (options->query_compare && has_dumped(ibaddr, MODE_ALL)) + return; + + /* map gpuaddr back to hostptr: */ + ptr = hostptr(ibaddr); + + if (ptr) { + /* If the GPU hung within the target IB, the trigger point will be + * just after the current CP_INDIRECT_BUFFER. Because the IB is + * executed but never returns. Account for this by checking if + * the IB returned: + */ + highlight_gpuaddr(gpuaddr(&dwords[is_64b() ? 3 : 2])); + + ib++; + ibs[ib].base = ibaddr; + ibs[ib].size = ibsize; + + dump_commands(ptr, ibsize, level); + ib--; + } else { + fprintf(stderr, "could not find: %016"PRIx64" (%d)\n", ibaddr, ibsize); + } +} + +static void +cp_wfi(uint32_t *dwords, uint32_t sizedwords, int level) +{ + needs_wfi = false; +} + +static void +cp_mem_write(uint32_t *dwords, uint32_t sizedwords, int level) +{ + if (quiet(2)) + return; + + if (is_64b()) { + uint64_t gpuaddr = dwords[0] | (((uint64_t)dwords[1]) << 32); + printf("%sgpuaddr:%016lx\n", levels[level], gpuaddr); + dump_hex(&dwords[2], sizedwords-2, level+1); + + if (pkt_is_type4(dwords[2]) || pkt_is_type7(dwords[2])) + dump_commands(&dwords[2], sizedwords-2, level+1); + } else { + uint32_t gpuaddr = dwords[0]; + printf("%sgpuaddr:%08x\n", levels[level], gpuaddr); + dump_float((float *)&dwords[1], sizedwords-1, level+1); + } +} + +static void +cp_rmw(uint32_t *dwords, uint32_t sizedwords, int level) +{ + uint32_t val = dwords[0] & 0xffff; + uint32_t and = dwords[1]; + uint32_t or = dwords[2]; + printl(3, "%srmw (%s & 0x%08x) | 0x%08x)\n", levels[level], regname(val, 1), and, or); + if (needs_wfi) + printl(2, "NEEDS WFI: rmw (%s & 0x%08x) | 0x%08x)\n", regname(val, 1), and, or); + reg_set(val, (reg_val(val) & and) | or); +} + +static void +cp_reg_mem(uint32_t *dwords, uint32_t sizedwords, int level) +{ + uint32_t val = dwords[0] & 0xffff; + printl(3, "%sbase register: %s\n", levels[level], regname(val, 1)); + + if (quiet(2)) + return; + + uint64_t gpuaddr = dwords[1] | (((uint64_t)dwords[2]) << 32); + printf("%sgpuaddr:%016lx\n", levels[level], gpuaddr); + void *ptr = hostptr(gpuaddr); + if (ptr) { + uint32_t cnt = (dwords[0] >> 19) & 0x3ff; + dump_hex(ptr, cnt, level + 1); + } +} + +struct draw_state { + uint16_t enable_mask; + uint16_t flags; + uint32_t count; + uint64_t addr; +}; + +struct draw_state state[32]; + +#define FLAG_DIRTY 0x1 +#define FLAG_DISABLE 0x2 +#define FLAG_DISABLE_ALL_GROUPS 0x4 +#define FLAG_LOAD_IMMED 0x8 + +static int draw_mode; + +static void +disable_group(unsigned group_id) +{ + struct draw_state *ds = &state[group_id]; + memset(ds, 0, sizeof(*ds)); +} + +static void +disable_all_groups(void) +{ + for (unsigned i = 0; i < ARRAY_SIZE(state); i++) + disable_group(i); +} + +static void +load_group(unsigned group_id, int level) +{ + struct draw_state *ds = &state[group_id]; + + if (!ds->count) + return; + + printl(2, "%sgroup_id: %u\n", levels[level], group_id); + printl(2, "%scount: %d\n", levels[level], ds->count); + printl(2, "%saddr: %016llx\n", levels[level], ds->addr); + printl(2, "%sflags: %x\n", levels[level], ds->flags); + + if (options->gpu_id >= 600) { + printl(2, "%senable_mask: 0x%x\n", levels[level], ds->enable_mask); + + if (!(ds->enable_mask & enable_mask)) { + printl(2, "%s\tskipped!\n\n", levels[level]); + return; + } + } + + void *ptr = hostptr(ds->addr); + if (ptr) { + if (!quiet(2)) + dump_hex(ptr, ds->count, level+1); + + ib++; + dump_commands(ptr, ds->count, level+1); + ib--; + } +} + +static void +load_all_groups(int level) +{ + /* sanity check, we should never recursively hit recursion here, and if + * we do bad things happen: + */ + static bool loading_groups = false; + if (loading_groups) { + printf("ERROR: nothing in draw state should trigger recursively loading groups!\n"); + return; + } + loading_groups = true; + for (unsigned i = 0; i < ARRAY_SIZE(state); i++) + load_group(i, level); + loading_groups = false; + + /* in 'query-compare' mode, defer disabling all groups until we have a + * chance to process the query: + */ + if (!options->query_compare) + disable_all_groups(); +} + +static void +cp_set_draw_state(uint32_t *dwords, uint32_t sizedwords, int level) +{ + uint32_t i; + + for (i = 0; i < sizedwords; ) { + struct draw_state *ds; + uint32_t count = dwords[i] & 0xffff; + uint32_t group_id = (dwords[i] >> 24) & 0x1f; + uint32_t enable_mask = (dwords[i] >> 20) & 0xf; + uint32_t flags = (dwords[i] >> 16) & 0xf; + uint64_t addr; + + if (is_64b()) { + addr = dwords[i + 1]; + addr |= ((uint64_t)dwords[i + 2]) << 32; + i += 3; + } else { + addr = dwords[i + 1]; + i += 2; + } + + if (flags & FLAG_DISABLE_ALL_GROUPS) { + disable_all_groups(); + continue; + } + + if (flags & FLAG_DISABLE) { + disable_group(group_id); + continue; + } + + assert(group_id < ARRAY_SIZE(state)); + disable_group(group_id); + + ds = &state[group_id]; + + ds->enable_mask = enable_mask; + ds->flags = flags; + ds->count = count; + ds->addr = addr; + + if (flags & FLAG_LOAD_IMMED) { + load_group(group_id, level); + disable_group(group_id); + } + } +} + +static void +cp_set_mode(uint32_t *dwords, uint32_t sizedwords, int level) +{ + draw_mode = dwords[0]; +} + +/* execute compute shader */ +static void +cp_exec_cs(uint32_t *dwords, uint32_t sizedwords, int level) +{ + do_query("compute", 0); + dump_register_summary(level); +} + +static void +cp_exec_cs_indirect(uint32_t *dwords, uint32_t sizedwords, int level) +{ + uint64_t addr; + + if (is_64b()) { + addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1]; + } else { + addr = dwords[1]; + } + + printl(3, "%saddr: %016llx\n", levels[level], addr); + dump_gpuaddr_size(addr, level, 0x10, 2); + + do_query("compute", 0); + dump_register_summary(level); +} + +static void +cp_set_marker(uint32_t *dwords, uint32_t sizedwords, int level) +{ + render_mode = rnn_enumname(rnn, "a6xx_render_mode", dwords[0] & 0xf); + + if (!strcmp(render_mode, "RM6_BINNING")) { + enable_mask = MODE_BINNING; + } else if (!strcmp(render_mode, "RM6_GMEM")) { + enable_mask = MODE_GMEM; + } else if (!strcmp(render_mode, "RM6_BYPASS")) { + enable_mask = MODE_BYPASS; + } +} + +static void +cp_set_render_mode(uint32_t *dwords, uint32_t sizedwords, int level) +{ + uint64_t addr; + uint32_t *ptr, len; + + assert(is_64b()); + + /* TODO seems to have two ptrs, 9 dwords total (incl pkt7 hdr).. + * not sure if this can come in different sizes. + * + * First ptr doesn't seem to be cmdstream, second one does. + * + * Comment from downstream kernel: + * + * SRM -- set render mode (ex binning, direct render etc) + * SRM is set by UMD usually at start of IB to tell CP the type of + * preemption. + * KMD needs to set SRM to NULL to indicate CP that rendering is + * done by IB. + * ------------------------------------------------------------------ + * + * Seems to always be one of these two: + * 70ec0008 00000001 001c0000 00000000 00000010 00000003 0000000d 001c2000 00000000 + * 70ec0008 00000001 001c0000 00000000 00000000 00000003 0000000d 001c2000 00000000 + * + */ + + assert(options->gpu_id >= 500); + + render_mode = rnn_enumname(rnn, "render_mode_cmd", dwords[0]); + + if (sizedwords == 1) + return; + + addr = dwords[1]; + addr |= ((uint64_t)dwords[2]) << 32; + + mode = dwords[3]; + + dump_gpuaddr(addr, level+1); + + if (sizedwords == 5) + return; + + assert(sizedwords == 8); + + len = dwords[5]; + addr = dwords[6]; + addr |= ((uint64_t)dwords[7]) << 32; + + printl(3, "%saddr: 0x%016lx\n", levels[level], addr); + printl(3, "%slen: 0x%x\n", levels[level], len); + + ptr = hostptr(addr); + + if (ptr) { + if (!quiet(2)) { + ib++; + dump_commands(ptr, len, level+1); + ib--; + dump_hex(ptr, len, level+1); + } + } +} + +static void +cp_compute_checkpoint(uint32_t *dwords, uint32_t sizedwords, int level) +{ + uint64_t addr; + uint32_t *ptr, len; + + assert(is_64b()); + assert(options->gpu_id >= 500); + + assert(sizedwords == 8); + + addr = dwords[5]; + addr |= ((uint64_t)dwords[6]) << 32; + len = dwords[7]; + + printl(3, "%saddr: 0x%016lx\n", levels[level], addr); + printl(3, "%slen: 0x%x\n", levels[level], len); + + ptr = hostptr(addr); + + if (ptr) { + if (!quiet(2)) { + ib++; + dump_commands(ptr, len, level+1); + ib--; + dump_hex(ptr, len, level+1); + } + } +} + +static void +cp_blit(uint32_t *dwords, uint32_t sizedwords, int level) +{ + do_query(rnn_enumname(rnn, "cp_blit_cmd", dwords[0]), 0); + print_mode(level); + dump_register_summary(level); +} + +static void +cp_context_reg_bunch(uint32_t *dwords, uint32_t sizedwords, int level) +{ + int i; + + /* NOTE: seems to write same reg multiple times.. not sure if different parts of + * these are triggered by the FLUSH_SO_n events?? (if that is what they actually + * are?) + */ + bool saved_summary = summary; + summary = false; + + for (i = 0; i < sizedwords; i += 2) { + dump_register(dwords[i+0], dwords[i+1], level+1); + reg_set(dwords[i+0], dwords[i+1]); + } + + summary = saved_summary; +} + +static void +cp_reg_write(uint32_t *dwords, uint32_t sizedwords, int level) +{ + uint32_t reg = dwords[1] & 0xffff; + + dump_register(reg, dwords[2], level+1); + reg_set(reg, dwords[2]); +} + +static void +cp_set_ctxswitch_ib(uint32_t *dwords, uint32_t sizedwords, int level) +{ + uint64_t addr; + uint32_t size = dwords[2] & 0xffff; + void *ptr; + + addr = dwords[0] | ((uint64_t)dwords[1] << 32); + + printf("addr=%lx\n", addr); + ptr = hostptr(addr); + if (ptr) { + dump_commands(ptr, size, level+1); + } +} + +static void +cp_skip_ib2_enable_global(uint32_t *dwords, uint32_t sizedwords, int level) +{ + skip_ib2_enable_global = dwords[0]; +} + +static void +cp_skip_ib2_enable_local(uint32_t *dwords, uint32_t sizedwords, int level) +{ + skip_ib2_enable_local = dwords[0]; +} + +#define CP(x, fxn, ...) { "CP_" #x, fxn, ##__VA_ARGS__ } +static const struct type3_op { + const char *name; + void (*fxn)(uint32_t *dwords, uint32_t sizedwords, int level); + struct { + bool load_all_groups; + } options; +} type3_op[] = { + CP(NOP, cp_nop), + CP(INDIRECT_BUFFER, cp_indirect), + CP(INDIRECT_BUFFER_PFD, cp_indirect), + CP(WAIT_FOR_IDLE, cp_wfi), + CP(REG_RMW, cp_rmw), + CP(REG_TO_MEM, cp_reg_mem), + CP(MEM_TO_REG, cp_reg_mem), /* same layout as CP_REG_TO_MEM */ + CP(MEM_WRITE, cp_mem_write), + CP(EVENT_WRITE, cp_event_write), + CP(RUN_OPENCL, cp_run_cl), + CP(DRAW_INDX, cp_draw_indx, {.load_all_groups=true}), + CP(DRAW_INDX_2, cp_draw_indx_2, {.load_all_groups=true}), + CP(SET_CONSTANT, cp_set_const), + CP(IM_LOAD_IMMEDIATE, cp_im_loadi), + CP(WIDE_REG_WRITE, cp_wide_reg_write), + + /* for a3xx */ + CP(LOAD_STATE, cp_load_state), + CP(SET_BIN, cp_set_bin), + + /* for a4xx */ + CP(LOAD_STATE4, cp_load_state), + CP(SET_DRAW_STATE, cp_set_draw_state), + CP(DRAW_INDX_OFFSET, cp_draw_indx_offset, {.load_all_groups=true}), + CP(EXEC_CS, cp_exec_cs, {.load_all_groups=true}), + CP(EXEC_CS_INDIRECT, cp_exec_cs_indirect, {.load_all_groups=true}), + + /* for a5xx */ + CP(SET_RENDER_MODE, cp_set_render_mode), + CP(COMPUTE_CHECKPOINT, cp_compute_checkpoint), + CP(BLIT, cp_blit), + CP(CONTEXT_REG_BUNCH, cp_context_reg_bunch), + CP(DRAW_INDIRECT, cp_draw_indirect, {.load_all_groups=true}), + CP(DRAW_INDX_INDIRECT, cp_draw_indx_indirect, {.load_all_groups=true}), + CP(SKIP_IB2_ENABLE_GLOBAL, cp_skip_ib2_enable_global), + CP(SKIP_IB2_ENABLE_LOCAL, cp_skip_ib2_enable_local), + + /* for a6xx */ + CP(LOAD_STATE6_GEOM, cp_load_state), + CP(LOAD_STATE6_FRAG, cp_load_state), + CP(LOAD_STATE6, cp_load_state), + CP(SET_MODE, cp_set_mode), + CP(SET_MARKER, cp_set_marker), + CP(REG_WRITE, cp_reg_write), + + CP(SET_CTXSWITCH_IB, cp_set_ctxswitch_ib), +}; + +static void +noop_fxn(uint32_t *dwords, uint32_t sizedwords, int level) +{ +} + +static const struct type3_op * +get_type3_op(unsigned opc) +{ + static const struct type3_op dummy_op = { + .fxn = noop_fxn, + }; + const char *name = pktname(opc); + + if (!name) + return &dummy_op; + + for (unsigned i = 0; i < ARRAY_SIZE(type3_op); i++) + if (!strcmp(name, type3_op[i].name)) + return &type3_op[i]; + + return &dummy_op; +} + +void +dump_commands(uint32_t *dwords, uint32_t sizedwords, int level) +{ + int dwords_left = sizedwords; + uint32_t count = 0; /* dword count including packet header */ + uint32_t val; + +// assert(dwords); + if (!dwords) { + printf("NULL cmd buffer!\n"); + return; + } + + draws[ib] = 0; + + while (dwords_left > 0) { + + current_draw_count = draw_count; + + /* hack, this looks like a -1 underflow, in some versions + * when it tries to write zero registers via pkt0 + */ +// if ((dwords[0] >> 16) == 0xffff) +// goto skip; + + if (pkt_is_type0(dwords[0])) { + printl(3, "t0"); + count = type0_pkt_size(dwords[0]) + 1; + val = type0_pkt_offset(dwords[0]); + assert(val < regcnt()); + printl(3, "%swrite %s%s (%04x)\n", levels[level+1], regname(val, 1), + (dwords[0] & 0x8000) ? " (same register)" : "", val); + dump_registers(val, dwords+1, count-1, level+2); + if (!quiet(3)) + dump_hex(dwords, count, level+1); + } else if (pkt_is_type4(dwords[0])) { + /* basically the same(ish) as type0 prior to a5xx */ + printl(3, "t4"); + count = type4_pkt_size(dwords[0]) + 1; + val = type4_pkt_offset(dwords[0]); + assert(val < regcnt()); + printl(3, "%swrite %s (%04x)\n", levels[level+1], regname(val, 1), val); + dump_registers(val, dwords+1, count-1, level+2); + if (!quiet(3)) + dump_hex(dwords, count, level+1); +#if 0 + } else if (pkt_is_type1(dwords[0])) { + printl(3, "t1"); + count = 3; + val = dwords[0] & 0xfff; + printl(3, "%swrite %s\n", levels[level+1], regname(val, 1)); + dump_registers(val, dwords+1, 1, level+2); + val = (dwords[0] >> 12) & 0xfff; + printl(3, "%swrite %s\n", levels[level+1], regname(val, 1)); + dump_registers(val, dwords+2, 1, level+2); + if (!quiet(3)) + dump_hex(dwords, count, level+1); + } else if (pkt_is_type2(dwords[0])) { + printl(3, "t2"); + printf("%sNOP\n", levels[level+1]); + count = 1; + if (!quiet(3)) + dump_hex(dwords, count, level+1); +#endif + } else if (pkt_is_type3(dwords[0])) { + count = type3_pkt_size(dwords[0]) + 1; + val = cp_type3_opcode(dwords[0]); + const struct type3_op *op = get_type3_op(val); + if (op->options.load_all_groups) + load_all_groups(level+1); + printl(3, "t3"); + const char *name = pktname(val); + if (!quiet(2)) { + printf("\t%sopcode: %s%s%s (%02x) (%d dwords)%s\n", levels[level], + rnn->vc->colors->bctarg, name, rnn->vc->colors->reset, + val, count, (dwords[0] & 0x1) ? " (predicated)" : ""); + } + if (name) + dump_domain(dwords+1, count-1, level+2, name); + op->fxn(dwords+1, count-1, level+1); + if (!quiet(2)) + dump_hex(dwords, count, level+1); + } else if (pkt_is_type7(dwords[0])) { + count = type7_pkt_size(dwords[0]) + 1; + val = cp_type7_opcode(dwords[0]); + const struct type3_op *op = get_type3_op(val); + if (op->options.load_all_groups) + load_all_groups(level+1); + printl(3, "t7"); + const char *name = pktname(val); + if (!quiet(2)) { + printf("\t%sopcode: %s%s%s (%02x) (%d dwords)\n", levels[level], + rnn->vc->colors->bctarg, name, rnn->vc->colors->reset, + val, count); + } + if (name) { + /* special hack for two packets that decode the same way + * on a6xx: + */ + if (!strcmp(name, "CP_LOAD_STATE6_FRAG") || + !strcmp(name, "CP_LOAD_STATE6_GEOM")) + name = "CP_LOAD_STATE6"; + dump_domain(dwords+1, count-1, level+2, name); + } + op->fxn(dwords+1, count-1, level+1); + if (!quiet(2)) + dump_hex(dwords, count, level+1); + } else if (pkt_is_type2(dwords[0])) { + printl(3, "t2"); + printl(3, "%snop\n", levels[level+1]); + } else { + /* for 5xx+ we can do a passable job of looking for start of next valid packet: */ + if (options->gpu_id >= 500) { + while (dwords_left > 0) { + if (pkt_is_type7(dwords[0]) || pkt_is_type4(dwords[0])) + break; + printf("bad type! %08x\n", dwords[0]); + dwords++; + dwords_left--; + } + } else { + printf("bad type! %08x\n", dwords[0]); + return; + } + } + + dwords += count; + dwords_left -= count; + + } + + if (dwords_left < 0) + printf("**** this ain't right!! dwords_left=%d\n", dwords_left); +} diff --git a/src/freedreno/decode/cffdec.h b/src/freedreno/decode/cffdec.h new file mode 100644 index 00000000000..695aec39de8 --- /dev/null +++ b/src/freedreno/decode/cffdec.h @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2012 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __CFFDEC_H__ +#define __CFFDEC_H__ + +#include + +enum query_mode { + /* default mode, dump all queried regs on each draw: */ + QUERY_ALL = 0, + + /* only dump if any of the queried regs were written + * since last draw: + */ + QUERY_WRITTEN, + + /* only dump if any of the queried regs changed since + * last draw: + */ + QUERY_DELTA, +}; + +struct cffdec_options { + unsigned gpu_id; + int draw_filter; + int color; + int dump_shaders; + int summary; + int allregs; + int dump_textures; + int decode_markers; + char *script; + + int query_compare; /* binning vs SYSMEM/GMEM compare mode */ + int query_mode; /* enum query_mode */ + char **querystrs; + int nquery; + + /* In "once" mode, only decode a cmdstream buffer once (per draw + * mode, in the case of a6xx+ where a single cmdstream buffer can + * be used for both binning and draw pass), rather than each time + * encountered (ie. once per tile/bin in GMEM draw passes) + */ + int once; + + /* for crashdec, where we know CP_IBx_REM_SIZE, we can use this + * to highlight the cmdstream not parsed yet, to make it easier + * to see how far along the CP is. + */ + struct { + uint64_t base; + uint32_t rem; + } ibs[4]; +}; + +void printl(int lvl, const char *fmt, ...); +const char * pktname(unsigned opc); +uint32_t regbase(const char *name); +const char * regname(uint32_t regbase, int color); +bool reg_written(uint32_t regbase); +uint32_t reg_lastval(uint32_t regbase); +uint32_t reg_val(uint32_t regbase); +void reg_set(uint32_t regbase, uint32_t val); +void reset_regs(void); +void cffdec_init(const struct cffdec_options *options); +void dump_register_val(uint32_t regbase, uint32_t dword, int level); +void dump_commands(uint32_t *dwords, uint32_t sizedwords, int level); + +/* + * Helpers for packet parsing: + */ + + +#define CP_TYPE0_PKT 0x00000000 +#define CP_TYPE2_PKT 0x80000000 +#define CP_TYPE3_PKT 0xc0000000 +#define CP_TYPE4_PKT 0x40000000 +#define CP_TYPE7_PKT 0x70000000 + +#define pkt_is_type0(pkt) (((pkt) & 0XC0000000) == CP_TYPE0_PKT) +#define type0_pkt_size(pkt) ((((pkt) >> 16) & 0x3FFF) + 1) +#define type0_pkt_offset(pkt) ((pkt) & 0x7FFF) + +#define pkt_is_type2(pkt) ((pkt) == CP_TYPE2_PKT) + +/* + * Check both for the type3 opcode and make sure that the reserved bits [1:7] + * and 15 are 0 + */ + +static inline uint pm4_calc_odd_parity_bit(uint val) +{ + return (0x9669 >> (0xf & ((val) ^ + ((val) >> 4) ^ ((val) >> 8) ^ ((val) >> 12) ^ + ((val) >> 16) ^ ((val) >> 20) ^ ((val) >> 24) ^ + ((val) >> 28)))) & 1; +} + +#define pkt_is_type3(pkt) \ + ((((pkt) & 0xC0000000) == CP_TYPE3_PKT) && \ + (((pkt) & 0x80FE) == 0)) + +#define cp_type3_opcode(pkt) (((pkt) >> 8) & 0xFF) +#define type3_pkt_size(pkt) ((((pkt) >> 16) & 0x3FFF) + 1) + +#define pkt_is_type4(pkt) \ + ((((pkt) & 0xF0000000) == CP_TYPE4_PKT) && \ + ((((pkt) >> 27) & 0x1) == \ + pm4_calc_odd_parity_bit(type4_pkt_offset(pkt))) \ + && ((((pkt) >> 7) & 0x1) == \ + pm4_calc_odd_parity_bit(type4_pkt_size(pkt)))) + +#define type4_pkt_offset(pkt) (((pkt) >> 8) & 0x7FFFF) +#define type4_pkt_size(pkt) ((pkt) & 0x7F) + +#define pkt_is_type7(pkt) \ + ((((pkt) & 0xF0000000) == CP_TYPE7_PKT) && \ + (((pkt) & 0x0F000000) == 0) && \ + ((((pkt) >> 23) & 0x1) == \ + pm4_calc_odd_parity_bit(cp_type7_opcode(pkt))) \ + && ((((pkt) >> 15) & 0x1) == \ + pm4_calc_odd_parity_bit(type7_pkt_size(pkt)))) + +#define cp_type7_opcode(pkt) (((pkt) >> 16) & 0x7F) +#define type7_pkt_size(pkt) ((pkt) & 0x3FFF) + +#endif /* __CFFDEC_H__ */ diff --git a/src/freedreno/decode/cffdump.c b/src/freedreno/decode/cffdump.c new file mode 100644 index 00000000000..7fec7dcbd10 --- /dev/null +++ b/src/freedreno/decode/cffdump.c @@ -0,0 +1,370 @@ +/* + * Copyright (c) 2012 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "redump.h" +#include "disasm.h" +#include "script.h" +#include "io.h" +#include "rnnutil.h" +#include "pager.h" +#include "buffers.h" +#include "cffdec.h" + +static struct cffdec_options options = { + .gpu_id = 220, +}; + +static bool needs_wfi = false; +static bool is_blob = false; +static int show_comp = false; +static int interactive; +static int vertices; + +static int handle_file(const char *filename, int start, int end, int draw); + +static void print_usage(const char *name) +{ + fprintf(stderr, "Usage:\n\n" + "\t%s [OPTSIONS]... FILE...\n\n" + "Options:\n" + "\t-v, --verbose - more verbose disassembly\n" + "\t--dump-shaders - dump each shader to a raw file\n" + "\t--no-color - disable colorized output (default for non-console\n" + "\t output)\n" + "\t--color - enable colorized output (default for tty output)\n" + "\t--no-pager - disable pager (default for non-console output)\n" + "\t--pager - enable pager (default for tty output)\n" + "\t-s, --summary - don't show individual register writes, but just\n" + "\t register values on draws\n" + "\t-a, --allregs - show all registers (including ones not written\n" + "\t since previous draw) on each draw\n" + "\t-S, --start=N - start decoding from frame N\n" + "\t-E, --end=N - stop decoding after frame N\n" + "\t-F, --frame=N - decode only frame N\n" + "\t-D, --draw=N - decode only draw N\n" + "\t--textures - dump texture contents (if possible)\n" + "\t-L, --script=LUA - run specified lua script to analyze state\n" + "\t-q, --query=REG - query mode, dump only specified query registers on\n" + "\t each draw; multiple --query/-q args can be given to\n" + "\t dump multiple registers; register can be specified\n" + "\t either by name or numeric offset\n" + "\t--query-all - in query mode, show all queried regs on each draw\n" + "\t (default query mode)\n" + "\t--query-written - in query mode, show queried regs on draws if any of\n" + "\t them have been written since previous draw\n" + "\t--query-delta - in query mode, show queried regs on draws if any of\n" + "\t them have changed since previous draw\n" + "\t--query-compare - dump registers for BINNING vs GMEM/BYPASS per draw;\n" + "\t only applicable for regs set via SDS group (a6xx+),\n" + "\t implies --once, can be combined with --query-all,\n" + "\t --query-written, or --query-delta\n" + "\t--once - decode cmdstream only once (per draw mode); if same\n" + "\t cmdstream is executed for each tile, this will decode\n" + "\t it only for the first tile and skip the remainder,\n" + "\t which can be useful when looking at state that does\n" + "\t not change per tile\n" + "\t--not-once - decode cmdstream for each IB (default)\n" + "\t-h, --help - show this message\n" + , name); + exit(2); +} + +static const struct option opts[] = { + /* Long opts that simply set a flag (no corresponding short alias: */ + { "dump-shaders", no_argument, &options.dump_shaders, 1 }, + { "no-color", no_argument, &options.color, 0 }, + { "color", no_argument, &options.color, 1 }, + { "no-pager", no_argument, &interactive, 0 }, + { "pager", no_argument, &interactive, 1 }, + { "textures", no_argument, &options.dump_textures, 1 }, + { "show-compositor", no_argument, &show_comp, 1 }, + { "query-all", no_argument, &options.query_mode, QUERY_ALL }, + { "query-written", no_argument, &options.query_mode, QUERY_WRITTEN }, + { "query-delta", no_argument, &options.query_mode, QUERY_DELTA }, + { "query-compare", no_argument, &options.query_compare, 1 }, + { "once", no_argument, &options.once, 1 }, + { "not-once", no_argument, &options.once, 0 }, + + /* Long opts with short alias: */ + { "verbose", no_argument, 0, 'v' }, + { "summary", no_argument, 0, 's' }, + { "allregs", no_argument, 0, 'a' }, + { "start", required_argument, 0, 'S' }, + { "end", required_argument, 0, 'E' }, + { "frame", required_argument, 0, 'F' }, + { "draw", required_argument, 0, 'D' }, + { "script", required_argument, 0, 'L' }, + { "query", required_argument, 0, 'q' }, + { "help", no_argument, 0, 'h' }, +}; + +int main(int argc, char **argv) +{ + int ret = -1; + int start = 0, end = 0x7ffffff, draw = -1; + int c; + + interactive = isatty(STDOUT_FILENO); + + options.color = interactive; + + while ((c = getopt_long(argc, argv, "vsaS:E:F:D:L:q:h", opts, NULL)) != -1) { + switch (c) { + case 0: + /* option that set a flag, nothing to do */ + break; + case 'v': + disasm_set_debug(PRINT_RAW | EXPAND_REPEAT | PRINT_VERBOSE); + break; + case 's': + options.summary = true; + break; + case 'a': + options.allregs = true; + break; + case 'S': + start = atoi(optarg); + break; + case 'E': + end = atoi(optarg); + break; + case 'F': + start = end = atoi(optarg); + break; + case 'D': + draw = atoi(optarg); + break; + case 'L': + options.script = optarg; + if (script_load(options.script)) { + errx(-1, "error loading %s\n", options.script); + } + break; + case 'q': + options.querystrs = realloc(options.querystrs, + (options.nquery + 1) * sizeof(*options.querystrs)); + options.querystrs[options.nquery] = optarg; + options.nquery++; + interactive = 0; + break; + case 'h': + default: + print_usage(argv[0]); + } + } + + if (interactive) { + pager_open(); + } + + while (optind < argc) { + ret = handle_file(argv[optind], start, end, draw); + if (ret) { + fprintf(stderr, "error reading: %s\n", argv[optind]); + fprintf(stderr, "continuing..\n"); + } + optind++; + } + + if (ret) + print_usage(argv[0]); + + if ((options.query_mode || options.query_compare) && !options.nquery) { + fprintf(stderr, "query options only valid in query mode!\n"); + print_usage(argv[0]); + } + + script_finish(); + + if (interactive) { + pager_close(); + } + + return ret; +} + +static void parse_addr(uint32_t *buf, int sz, unsigned int *len, uint64_t *gpuaddr) +{ + *gpuaddr = buf[0]; + *len = buf[1]; + if (sz > 8) + *gpuaddr |= ((uint64_t)(buf[2])) << 32; +} + +static int handle_file(const char *filename, int start, int end, int draw) +{ + enum rd_sect_type type = RD_NONE; + void *buf = NULL; + struct io *io; + int submit = 0, got_gpu_id = 0; + int sz, ret = 0; + bool needs_reset = false; + bool skip = false; + + options.draw_filter = draw; + + cffdec_init(&options); + + printf("Reading %s...\n", filename); + + script_start_cmdstream(filename); + + if (!strcmp(filename, "-")) + io = io_openfd(0); + else + io = io_open(filename); + + if (!io) { + fprintf(stderr, "could not open: %s\n", filename); + return -1; + } + + struct { + unsigned int len; + uint64_t gpuaddr; + } gpuaddr = {0}; + + while (true) { + uint32_t arr[2]; + + ret = io_readn(io, arr, 8); + if (ret <= 0) + goto end; + + while ((arr[0] == 0xffffffff) && (arr[1] == 0xffffffff)) { + ret = io_readn(io, arr, 8); + if (ret <= 0) + goto end; + } + + type = arr[0]; + sz = arr[1]; + + if (sz < 0) { + ret = -1; + goto end; + } + + free(buf); + + needs_wfi = false; + + buf = malloc(sz + 1); + ((char *)buf)[sz] = '\0'; + ret = io_readn(io, buf, sz); + if (ret < 0) + goto end; + + switch(type) { + case RD_TEST: + printl(1, "test: %s\n", (char *)buf); + break; + case RD_CMD: + is_blob = true; + printl(2, "cmd: %s\n", (char *)buf); + skip = false; + if (!show_comp) { + skip |= (strstr(buf, "fdperf") == buf); + skip |= (strstr(buf, "chrome") == buf); + skip |= (strstr(buf, "surfaceflinger") == buf); + skip |= ((char *)buf)[0] == 'X'; + } + break; + case RD_VERT_SHADER: + printl(2, "vertex shader:\n%s\n", (char *)buf); + break; + case RD_FRAG_SHADER: + printl(2, "fragment shader:\n%s\n", (char *)buf); + break; + case RD_GPUADDR: + if (needs_reset) { + reset_buffers(); + needs_reset = false; + } + parse_addr(buf, sz, &gpuaddr.len, &gpuaddr.gpuaddr); + break; + case RD_BUFFER_CONTENTS: + add_buffer(gpuaddr.gpuaddr, gpuaddr.len, buf); + buf = NULL; + break; + case RD_CMDSTREAM_ADDR: + if ((start <= submit) && (submit <= end)) { + unsigned int sizedwords; + uint64_t gpuaddr; + parse_addr(buf, sz, &sizedwords, &gpuaddr); + printl(2, "############################################################\n"); + printl(2, "cmdstream: %d dwords\n", sizedwords); + if (!skip) { + script_start_submit(); + dump_commands(hostptr(gpuaddr), sizedwords, 0); + script_end_submit(); + } + printl(2, "############################################################\n"); + printl(2, "vertices: %d\n", vertices); + } + needs_reset = true; + submit++; + break; + case RD_GPU_ID: + if (!got_gpu_id) { + options.gpu_id = *((unsigned int *)buf); + printl(2, "gpu_id: %d\n", options.gpu_id); + cffdec_init(&options); + got_gpu_id = 1; + } + break; + default: + break; + } + } + +end: + script_end_cmdstream(); + + io_close(io); + fflush(stdout); + + if (ret < 0) { + printf("corrupt file\n"); + } + return 0; +} diff --git a/src/freedreno/decode/crashdec.c b/src/freedreno/decode/crashdec.c new file mode 100644 index 00000000000..3b17d831d57 --- /dev/null +++ b/src/freedreno/decode/crashdec.c @@ -0,0 +1,1114 @@ +/* + * Copyright © 2020 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * Decoder for devcoredump traces from drm/msm. In case of a gpu crash/hang, + * the coredump should be found in: + * + * /sys/class/devcoredump/devcd/data + * + * The crashdump will hang around for 5min, it can be cleared by writing to + * the file, ie: + * + * echo 1 > /sys/class/devcoredump/devcd/data + * + * (the driver won't log any new crashdumps until the previous one is cleared + * or times out after 5min) + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "buffers.h" +#include "cffdec.h" +#include "disasm.h" +#include "pager.h" +#include "rnnutil.h" +#include "util.h" +#include "instr-a3xx.h" + + +static FILE *in; +static bool verbose; + +static struct rnn *rnn_gmu; +static struct rnn *rnn_control; +static struct rnn *rnn_pipe; + +static struct cffdec_options options = { + .draw_filter = -1, +}; + +static inline bool is_a6xx(void) { return (600 <= options.gpu_id) && (options.gpu_id < 700); } +static inline bool is_a5xx(void) { return (500 <= options.gpu_id) && (options.gpu_id < 600); } +static inline bool is_64b(void) { return options.gpu_id >= 500; } + +/* + * Helpers to read register values: + */ + +/* read registers that are 64b on 64b GPUs (ie. a5xx+) */ +static uint64_t +regval64(const char *name) +{ + unsigned reg = regbase(name); + assert(reg); + uint64_t val = reg_val(reg); + if (is_64b()) + val |= ((uint64_t)reg_val(reg + 1)) << 32; + return val; +} + +static uint32_t +regval(const char *name) +{ + unsigned reg = regbase(name); + assert(reg); + return reg_val(reg); +} + +/* + * Line reading and string helpers: + */ + +static char *lastline; +static char *pushedline; + +static const char * +popline(void) +{ + char *r = pushedline; + + if (r) { + pushedline = NULL; + return r; + } + + free(lastline); + + size_t n = 0; + if (getline(&r, &n, in) < 0) + exit(0); + + lastline = r; + return r; +} + +static void +pushline(void) +{ + assert(!pushedline); + pushedline = lastline; +} + +static uint32_t * +popline_ascii85(uint32_t sizedwords) +{ + const char *line = popline(); + + /* At this point we exepct the ascii85 data to be indented *some* + * amount, and to terminate at the end of the line. So just eat + * up the leading whitespace. + */ + assert(*line == ' '); + while (*line == ' ') + line++; + + uint32_t *buf = calloc(1, 4 * sizedwords); + int idx = 0; + + while (*line != '\n') { + if (*line == 'z') { + buf[idx++] = 0; + line++; + continue; + } + + uint32_t accum = 0; + for (int i = 0; (i < 5) && (*line != '\n'); i++) { + accum *= 85; + accum += *line - '!'; + line++; + } + + buf[idx++] = accum; + } + + return buf; +} + +static bool +startswith(const char *line, const char *start) +{ + return strstr(line, start) == line; +} + +static void +parseline(const char *line, const char *fmt, ...) +{ + int fmtlen = strlen(fmt); + int n = 0; + int l = 0; + + /* scan fmt string to extract expected # of conversions: */ + for (int i = 0; i < fmtlen; i++) { + if (fmt[i] == '%') { + if (i == (l - 1)) { /* prev char was %, ie. we have %% */ + n--; + l = 0; + } else { + n++; + l = i; + } + } + } + + va_list ap; + va_start(ap, fmt); + if (vsscanf(line, fmt, ap) != n) { + fprintf(stderr, "parse error scanning: '%s'\n", fmt); + exit(1); + } + va_end(ap); +} + +#define foreach_line_in_section(_line) \ + for (const char *_line = popline(); _line; _line = popline()) \ + /* check for start of next section */ \ + if (_line[0] != ' ') { \ + pushline(); \ + break; \ + } else + +/* + * Provide our own disasm assert() handler, so that we can recover + * after attempting to disassemble things that might not be valid + * instructions: + */ + +static bool jmp_env_valid; +static jmp_buf jmp_env; + +void +ir3_assert_handler(const char *expr, const char *file, int line, + const char *func) +{ + printf("%s:%u: %s: Assertion `%s' failed.\n", file, line, func, expr); + if (jmp_env_valid) + longjmp(jmp_env, 1); + abort(); +} + +#define TRY(x) do { \ + assert(!jmp_env_valid); \ + if (setjmp(jmp_env) == 0) { \ + jmp_env_valid = true; \ + x; \ + } \ + jmp_env_valid = false; \ + } while (0) + +/* + * Decode ringbuffer section: + */ + +static struct { + uint64_t iova; + uint32_t rptr; + uint32_t wptr; + uint32_t size; + uint32_t *buf; +} ringbuffers[5]; + +static void +decode_ringbuffer(void) +{ + int id = 0; + + foreach_line_in_section (line) { + if (startswith(line, " - id:")) { + parseline(line, " - id: %d", &id); + assert(id < ARRAY_SIZE(ringbuffers)); + } else if (startswith(line, " iova:")) { + parseline(line, " iova: %"PRIx64, &ringbuffers[id].iova); + } else if (startswith(line, " rptr:")) { + parseline(line, " rptr: %d", &ringbuffers[id].rptr); + } else if (startswith(line, " wptr:")) { + parseline(line, " wptr: %d", &ringbuffers[id].wptr); + } else if (startswith(line, " size:")) { + parseline(line, " size: %d", &ringbuffers[id].size); + } else if (startswith(line, " data: !!ascii85 |")) { + ringbuffers[id].buf = popline_ascii85(ringbuffers[id].size / 4); + add_buffer(ringbuffers[id].iova, ringbuffers[id].size, ringbuffers[id].buf); + continue; + } + + printf("%s", line); + } +} + +static bool +valid_header(uint32_t pkt) +{ + if (options.gpu_id >= 500) { + return pkt_is_type4(pkt) || pkt_is_type7(pkt); + } else { + /* TODO maybe we can check validish looking pkt3 opc or pkt0 + * register offset.. the cmds sent by kernel are usually + * fairly limited (other than initialization) which confines + * the search space a bit.. + */ + return true; + } +} + +static void +dump_cmdstream(void) +{ + uint64_t rb_base = regval64("CP_RB_BASE"); + + printf("got rb_base=%"PRIx64"\n", rb_base); + + options.ibs[1].base = regval64("CP_IB1_BASE"); + options.ibs[1].rem = regval("CP_IB1_REM_SIZE"); + options.ibs[2].base = regval64("CP_IB2_BASE"); + options.ibs[2].rem = regval("CP_IB2_REM_SIZE"); + + /* Adjust remaining size to account for cmdstream slurped into ROQ + * but not yet consumed by SQE + * + * TODO add support for earlier GPUs once we tease out the needed + * registers.. see crashit.c in msmtest for hints. + * + * TODO it would be nice to be able to extract out register bitfields + * by name rather than hard-coding this. + */ + if (is_a6xx()) { + options.ibs[1].rem += regval("CP_CSQ_IB1_STAT") >> 16; + options.ibs[2].rem += regval("CP_CSQ_IB2_STAT") >> 16; + } + + printf("IB1: %"PRIx64", %u\n", options.ibs[1].base, options.ibs[1].rem); + printf("IB2: %"PRIx64", %u\n", options.ibs[2].base, options.ibs[2].rem); + + /* now that we've got the regvals we want, reset register state + * so we aren't seeing values from decode_registers(); + */ + reset_regs(); + + for (int id = 0; id < ARRAY_SIZE(ringbuffers); id++) { + if (ringbuffers[id].iova != rb_base) + continue; + if (!ringbuffers[id].size) + continue; + + printf("found ring!\n"); + + /* The kernel level ringbuffer (RB) wraps around, which + * cffdec doesn't really deal with.. so figure out how + * many dwords are unread + */ + unsigned ringszdw = ringbuffers[id].size >> 2; /* in dwords */ + +/* helper macro to deal with modulo size math: */ +#define mod_add(b, v) ((ringszdw + (int)(b) + (int)(v)) % ringszdw) + + /* The rptr will (most likely) have moved past the IB to + * userspace cmdstream, so back up a bit, and then advance + * until we find a valid start of a packet.. this is going + * to be less reliable on a4xx and before (pkt0/pkt3), + * compared to pkt4/pkt7 with parity bits + */ + const int lookback = 12; + unsigned rptr = mod_add(ringbuffers[id].rptr, -lookback); + + for (int idx = 0; idx < lookback; idx++) { + if (valid_header(ringbuffers[id].buf[rptr])) + break; + rptr = mod_add(rptr, 1); + } + + unsigned cmdszdw = mod_add(ringbuffers[id].wptr, -rptr); + + printf("got cmdszdw=%d\n", cmdszdw); + uint32_t *buf = malloc(cmdszdw * 4); + + for (int idx = 0; idx < cmdszdw; idx++) { + int p = mod_add(rptr, idx); + buf[idx] = ringbuffers[id].buf[p]; + } + + dump_commands(buf, cmdszdw, 0); + free(buf); + } +} + +/* + * Decode 'bos' (buffers) section: + */ + +static void +decode_bos(void) +{ + uint32_t size = 0; + uint64_t iova = 0; + + foreach_line_in_section (line) { + if (startswith(line, " - iova:")) { + parseline(line, " - iova: %"PRIx64, &iova); + } else if (startswith(line, " size:")) { + parseline(line, " size: %u", &size); + } else if (startswith(line, " data: !!ascii85 |")) { + uint32_t *buf = popline_ascii85(size / 4); + + if (verbose) + dump_hex_ascii(buf, size, 1); + + add_buffer(iova, size, buf); + + continue; + } + + printf("%s", line); + } +} + +/* + * Decode registers section: + */ + +static void +dump_register(struct rnn *rnn, uint32_t offset, uint32_t value) +{ + struct rnndecaddrinfo *info = rnn_reginfo(rnn, offset); + if (info && info->typeinfo) { + char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value); + printf("%s: %s\n", info->name, decoded); + } else if (info) { + printf("%s: %08x\n", info->name, value); + } else { + printf("<%04x>: %08x\n", offset, value); + } +} + +static void +decode_gmu_registers(void) +{ + foreach_line_in_section (line) { + uint32_t offset, value; + parseline(line, " - { offset: %x, value: %x }", &offset, &value); + + printf("\t%08x\t", value); + dump_register(rnn_gmu, offset/4, value); + } +} + +static void +decode_registers(void) +{ + foreach_line_in_section (line) { + uint32_t offset, value; + parseline(line, " - { offset: %x, value: %x }", &offset, &value); + + reg_set(offset/4, value); + printf("\t%08x", value); + dump_register_val(offset/4, value, 0); + } +} + +/* similar to registers section, but for banked context regs: */ +static void +decode_clusters(void) +{ + foreach_line_in_section (line) { + if (startswith(line, " - cluster-name:") || + startswith(line, " - context:")) { + printf("%s", line); + continue; + } + + uint32_t offset, value; + parseline(line, " - { offset: %x, value: %x }", &offset, &value); + + printf("\t%08x", value); + dump_register_val(offset/4, value, 0); + } +} + +/* + * Decode indexed-registers.. these aren't like normal registers, but a + * sort of FIFO where successive reads pop out associated debug state. + */ + +static void +dump_cp_seq_stat(uint32_t *stat) +{ + printf("\t PC: %04x\n", stat[0]); + stat++; + + if (is_a6xx() && valid_header(stat[0])) { + if (pkt_is_type7(stat[0])) { + unsigned opc = cp_type7_opcode(stat[0]); + const char *name = pktname(opc); + if (name) + printf("\tPKT: %s\n", name); + } else { + /* Not sure if this case can happen: */ + } + } + + for (int i = 0; i < 16; i++) { + printf("\t$%02x: %08x\t\t$%02x: %08x\n", + i + 1, stat[i], i + 16 + 1, stat[i + 16]); + } +} + +static void +dump_control_regs(uint32_t *regs) +{ + if (!rnn_control) + return; + + /* Control regs 0x100-0x17f are a scratch space to be used by the + * firmware however it wants, unlike lower regs which involve some + * fixed-function units. Therefore only these registers get dumped + * directly. + */ + for (uint32_t i = 0; i < 0x80; i++) { + printf("\t%08x\t", regs[i]); + dump_register(rnn_control, i + 0x100, regs[i]); + } +} + +static void +dump_cp_ucode_dbg(uint32_t *dbg) +{ + /* Notes on the data: + * There seems to be a section every 4096 DWORD's. The sections aren't + * all the same size, so the rest of the 4096 DWORD's are filled with + * mirrors of the actual data. + */ + + for (int section = 0; section < 6; section++, dbg += 0x1000) { + switch (section) { + case 0: + /* Contains scattered data from a630_sqe.fw: */ + printf("\tSQE instruction cache:\n"); + dump_hex_ascii(dbg, 4 * 0x400, 1); + break; + case 1: + printf("\tUnknown 1:\n"); + dump_hex_ascii(dbg, 4 * 0x80, 1); + break; + case 2: + printf("\tUnknown 2:\n"); + dump_hex_ascii(dbg, 4 * 0x200, 1); + break; + case 3: + printf("\tUnknown 3:\n"); + dump_hex_ascii(dbg, 4 * 0x80, 1); + break; + case 4: + /* Don't bother printing this normally */ + if (verbose) { + printf("\tSQE packet jumptable contents:\n"); + dump_hex_ascii(dbg, 4 * 0x80, 1); + } + break; + case 5: + printf("\tSQE scratch control regs:\n"); + dump_control_regs(dbg); + break; + } + } +} + +static void +dump_mem_pool_reg_write(unsigned reg, uint32_t data, unsigned context, bool pipe) +{ + if (pipe) { + struct rnndecaddrinfo *info = rnn_reginfo(rnn_pipe, reg); + printf("\t\twrite %s (%02x) pipe\n", info->name, reg); + + if (!strcmp(info->typeinfo->name, "void")) { + /* registers that ignore their payload */ + } else { + printf("\t\t\t"); + dump_register(rnn_pipe, reg, data); + } + } else { + printf("\t\twrite %s (%05x) context %d\n", regname(reg, 1), reg, context); + dump_register_val(reg, data, 2); + } +} + +static void +dump_mem_pool_chunk(const uint32_t *chunk) +{ + struct __attribute__((packed)) { + bool reg0_enabled : 1; + bool reg1_enabled : 1; + uint32_t data0 : 32; + uint32_t data1 : 32; + uint32_t reg0 : 18; + uint32_t reg1 : 18; + bool reg0_pipe : 1; + bool reg1_pipe : 1; + uint32_t reg0_context : 1; + uint32_t reg1_context : 1; + uint32_t padding : 22; + } fields; + + memcpy(&fields, chunk, 4 * sizeof(uint32_t)); + + if (fields.reg0_enabled) { + dump_mem_pool_reg_write(fields.reg0, fields.data0, fields.reg0_context, fields.reg0_pipe); + } + + if (fields.reg1_enabled) { + dump_mem_pool_reg_write(fields.reg1, fields.data1, fields.reg1_context, fields.reg1_pipe); + } +} + +static void +dump_cp_mem_pool(uint32_t *mempool) +{ + /* The mem pool is a shared pool of memory used for storing in-flight + * register writes. There are 6 different queues, one for each + * cluster. Writing to $data (or for some special registers, $addr) + * pushes data onto the appropriate queue, and each queue is pulled + * from by the appropriate cluster. The queues are thus written to + * in-order, but may be read out-of-order. + * + * The queues are conceptually divided into 128-bit "chunks", and the + * read and write pointers are in units of chunks. These chunks are + * organized internally into 8-chunk "blocks", and memory is allocated + * dynamically in terms of blocks. Each queue is represented as a + * singly-linked list of blocks, as well as 3-bit start/end chunk + * pointers that point within the first/last block. The next pointers + * are located in a separate array, rather than inline. + */ + + /* TODO: The firmware CP_MEM_POOL save/restore routines do something + * like: + * + * cread $02, [ $00 + 0 ] + * and $02, $02, 0x118 + * ... + * brne $02, 0, #label + * mov $03, 0x2000 + * mov $03, 0x1000 + * label: + * ... + * + * I think that control register 0 is the GPU version, and some + * versions have a smaller mem pool. It seems some models have a mem + * pool that's half the size, and a bunch of offsets are shifted + * accordingly. Unfortunately the kernel driver's dumping code doesn't + * seem to take this into account, even the downstream android driver, + * and we don't know which versions 0x8, 0x10, or 0x100 correspond + * to. Or maybe we can use CP_DBG_MEM_POOL_SIZE to figure this out? + */ + bool small_mem_pool = false; + + /* The array of next pointers for each block. */ + const uint32_t *next_pointers = small_mem_pool ? &mempool[0x800] : &mempool[0x1000]; + + /* Maximum number of blocks in the pool, also the size of the pointers + * array. + */ + const int num_blocks = small_mem_pool ? 0x30 : 0x80; + + /* Number of queues */ + const unsigned num_queues = 6; + + /* Unfortunately the per-queue state is a little more complicated than + * a simple pair of begin/end pointers. Instead of a single beginning + * block, there are *two*, with the property that either the two are + * equal or the second is the "next" of the first. Similarly there are + * two end blocks. Thus the queue either looks like this: + * + * A -> B -> ... -> C -> D + * + * Or like this, or some combination: + * + * A/B -> ... -> C/D + * + * However, there's only one beginning/end chunk offset. Now the + * question is, which of A or B is the actual start? I.e. is the chunk + * offset an offset inside A or B? It depends. I'll show a typical read + * cycle, starting here (read pointer marked with a *) with a chunk + * offset of 0: + * + * A B + * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + * |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_| -> |_|_|_|_|_|_|_|_| + * + * Once the pointer advances far enough, the hardware decides to free + * A, after which the read-side state looks like: + * + * (free) A/B + * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + * |_|_|_|_|_|_|_|_| |_|_|_|*|_|_|_|_| -> |_|_|_|_|_|_|_|_| + * + * Then after advancing the pointer a bit more, the hardware fetches + * the "next" pointer for A and stores it in B: + * + * (free) A B + * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + * |_|_|_|_|_|_|_|_| |_|_|_|_|_|_|_|*| -> |_|_|_|_|_|_|_|_| + * + * Then the read pointer advances into B, at which point we've come + * back to the first state having advanced a whole block: + * + * (free) A B + * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + * |_|_|_|_|_|_|_|_| |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_| + * + * + * There is a similar cycle for the write pointer. Now, the question + * is, how do we know which state we're in? We need to know this to + * know whether the pointer (*) is in A or B if they're different. It + * seems like there should be some bit somewhere describing this, but + * after lots of experimentation I've come up empty-handed. For now we + * assume that if the pointer is in the first half, then we're in + * either the first or second state and use B, and otherwise we're in + * the second or third state and use A. So far I haven't seen anything + * that violates this assumption. + */ + + struct { + uint32_t unk0; + uint32_t padding0[7]; /* Mirrors of unk0 */ + + struct { + uint32_t chunk : 3; + uint32_t first_block : 32 - 3; + } writer[6]; + uint32_t padding1[2]; /* Mirrors of writer[4], writer[5] */ + + uint32_t unk1; + uint32_t padding2[7]; /* Mirrors of unk1 */ + + uint32_t writer_second_block[6]; + uint32_t padding3[2]; + + uint32_t unk2[6]; + uint32_t padding4[2]; + + struct { + uint32_t chunk : 3; + uint32_t first_block : 32 - 3; + } reader[6]; + uint32_t padding5[2]; /* Mirrors of reader[4], reader[5] */ + + uint32_t unk3; + uint32_t padding6[7]; /* Mirrors of unk3 */ + + uint32_t reader_second_block[6]; + uint32_t padding7[2]; + + uint32_t block_count[6]; + uint32_t padding[2]; + + uint32_t unk4; + uint32_t padding9[7]; /* Mirrors of unk4 */ + } data1; + + const uint32_t *data1_ptr = small_mem_pool ? &mempool[0xc00] : &mempool[0x1800]; + memcpy(&data1, data1_ptr, sizeof(data1)); + + /* Based on the kernel, the first dword is the mem pool size (in + * blocks?) and mirrors CP_MEM_POOL_DBG_SIZE. + */ + const uint32_t *data2_ptr = small_mem_pool ? &mempool[0x1000] : &mempool[0x2000]; + const int data2_size = 0x60; + + /* This seems to be the size of each queue in chunks. */ + const uint32_t *queue_sizes = &data2_ptr[0x18]; + + printf("\tdata2:\n"); + dump_hex_ascii(data2_ptr, 4 * data2_size, 1); + + /* These seem to be some kind of counter of allocated/deallocated blocks */ + if (verbose) { + printf("\tunk0: %x\n", data1.unk0); + printf("\tunk1: %x\n", data1.unk1); + printf("\tunk3: %x\n", data1.unk3); + printf("\tunk4: %x\n\n", data1.unk4); + } + + for (int queue = 0; queue < num_queues; queue++) { + const char *cluster_names[6] = { + "FE", "SP_VS", "PC_VS", "GRAS", "SP_PS", "PS" + }; + printf("\tCLUSTER_%s:\n\n", cluster_names[queue]); + + if (verbose) { + printf("\t\twriter_first_block: 0x%x\n", data1.writer[queue].first_block); + printf("\t\twriter_second_block: 0x%x\n", data1.writer_second_block[queue]); + printf("\t\twriter_chunk: %d\n", data1.writer[queue].chunk); + printf("\t\treader_first_block: 0x%x\n", data1.reader[queue].first_block); + printf("\t\treader_second_block: 0x%x\n", data1.reader_second_block[queue]); + printf("\t\treader_chunk: %d\n", data1.reader[queue].chunk); + printf("\t\tblock_count: %d\n", data1.block_count[queue]); + printf("\t\tunk2: 0x%x\n", data1.unk2[queue]); + printf("\t\tqueue_size: %d\n\n", queue_sizes[queue]); + } + + uint32_t cur_chunk = data1.reader[queue].chunk; + uint32_t cur_block = cur_chunk > 3 ? + data1.reader[queue].first_block : + data1.reader_second_block[queue]; + uint32_t last_chunk = data1.writer[queue].chunk; + uint32_t last_block = last_chunk > 3 ? + data1.writer[queue].first_block : + data1.writer_second_block[queue]; + + if (verbose) + printf("\tblock %x\n", cur_block); + if (cur_block >= num_blocks) { + fprintf(stderr, "block %x too large\n", cur_block); + exit(1); + } + unsigned calculated_queue_size = 0; + while (cur_block != last_block || cur_chunk != last_chunk) { + calculated_queue_size++; + uint32_t *chunk_ptr = &mempool[cur_block * 0x20 + cur_chunk * 4]; + + dump_mem_pool_chunk(chunk_ptr); + + printf("\t%05x: %08x %08x %08x %08x\n", + 4 * (cur_block * 0x20 + cur_chunk + 4), + chunk_ptr[0], chunk_ptr[1], chunk_ptr[2], chunk_ptr[3]); + + cur_chunk++; + if (cur_chunk == 8) { + cur_block = next_pointers[cur_block]; + if (verbose) + printf("\tblock %x\n", cur_block); + if (cur_block >= num_blocks) { + fprintf(stderr, "block %x too large\n", cur_block); + exit(1); + } + cur_chunk = 0; + } + } + if (calculated_queue_size != queue_sizes[queue]) { + printf("\t\tCALCULATED SIZE %d DOES NOT MATCH!\n", calculated_queue_size); + } + printf("\n"); + } +} + +static void +decode_indexed_registers(void) +{ + char *name = NULL; + uint32_t sizedwords = 0; + + foreach_line_in_section (line) { + if (startswith(line, " - regs-name:")) { + free(name); + parseline(line, " - regs-name: %ms", &name); + } else if (startswith(line, " dwords:")) { + parseline(line, " dwords: %u", &sizedwords); + } else if (startswith(line, " data: !!ascii85 |")) { + uint32_t *buf = popline_ascii85(sizedwords); + + /* some of the sections are pretty large, and are (at least + * so far) not useful, so skip them if not in verbose mode: + */ + bool dump = verbose || + !strcmp(name, "CP_SEQ_STAT") || + !strcmp(name, "CP_DRAW_STATE") || + !strcmp(name, "CP_ROQ") || + 0; + + if (!strcmp(name, "CP_SEQ_STAT")) + dump_cp_seq_stat(buf); + + if (!strcmp(name, "CP_UCODE_DBG_DATA")) + dump_cp_ucode_dbg(buf); + + /* note that name was typo'd in earlier kernels: */ + if (!strcmp(name, "CP_MEMPOOL") || !strcmp(name, "CP_MEMPOOOL")) + dump_cp_mem_pool(buf); + + if (dump) + dump_hex_ascii(buf, 4 * sizedwords, 1); + free(buf); + + continue; + } + + printf("%s", line); + } +} + +/* + * Decode shader-blocks: + */ + +static void +decode_shader_blocks(void) +{ + char *type = NULL; + uint32_t sizedwords = 0; + + foreach_line_in_section (line) { + if (startswith(line, " - type:")) { + free(type); + parseline(line, " - type: %ms", &type); + } else if (startswith(line, " size:")) { + parseline(line, " size: %u", &sizedwords); + } else if (startswith(line, " data: !!ascii85 |")) { + uint32_t *buf = popline_ascii85(sizedwords); + + /* some of the sections are pretty large, and are (at least + * so far) not useful, so skip them if not in verbose mode: + */ + bool dump = verbose || + !strcmp(type, "A6XX_SP_INST_DATA") || + !strcmp(type, "A6XX_HLSQ_INST_RAM") || + 0; + + if (!strcmp(type, "A6XX_SP_INST_DATA") || + !strcmp(type, "A6XX_HLSQ_INST_RAM")) { + /* TODO this section actually contains multiple shaders + * (or parts of shaders?), so perhaps we should search + * for ends of shaders and decode each? + */ + TRY(disasm_a3xx(buf, sizedwords, 1, stdout, options.gpu_id)); + } + + if (dump) + dump_hex_ascii(buf, 4 * sizedwords, 1); + + free(buf); + + continue; + } + + printf("%s", line); + } + + free(type); +} + +/* + * Decode debugbus section: + */ + +static void +decode_debugbus(void) +{ + char *block = NULL; + uint32_t sizedwords = 0; + + foreach_line_in_section (line) { + if (startswith(line, " - debugbus-block:")) { + free(block); + parseline(line, " - debugbus-block: %ms", &block); + } else if (startswith(line, " count:")) { + parseline(line, " count: %u", &sizedwords); + } else if (startswith(line, " data: !!ascii85 |")) { + uint32_t *buf = popline_ascii85(sizedwords); + + /* some of the sections are pretty large, and are (at least + * so far) not useful, so skip them if not in verbose mode: + */ + bool dump = verbose || + 0; + + if (dump) + dump_hex_ascii(buf, 4 * sizedwords, 1); + + free(buf); + + continue; + } + + printf("%s", line); + } +} + +/* + * Main crashdump decode loop: + */ + +static void +decode(void) +{ + const char *line; + + while ((line = popline())) { + printf("%s", line); + if (startswith(line, "revision:")) { + parseline(line, "revision: %u", &options.gpu_id); + printf("Got gpu_id=%u\n", options.gpu_id); + + cffdec_init(&options); + + if (is_a6xx()) { + rnn_gmu = rnn_new(!options.color); + rnn_load_file(rnn_gmu, "adreno/a6xx_gmu.xml", "A6XX"); + rnn_control = rnn_new(!options.color); + rnn_load_file(rnn_control, "adreno/adreno_control_regs.xml", "A6XX_CONTROL_REG"); + rnn_pipe = rnn_new(!options.color); + rnn_load_file(rnn_pipe, "adreno/adreno_pipe_regs.xml", "A6XX_PIPE_REG"); + } else if (is_a5xx()) { + rnn_control = rnn_new(!options.color); + rnn_load_file(rnn_control, "adreno/adreno_control_regs.xml", "A5XX_CONTROL_REG"); + } else { + rnn_control = NULL; + } + } else if (startswith(line, "bos:")) { + decode_bos(); + } else if (startswith(line, "ringbuffer:")) { + decode_ringbuffer(); + } else if (startswith(line, "registers:")) { + decode_registers(); + + /* after we've recorded buffer contents, and CP register values, + * we can take a stab at decoding the cmdstream: + */ + dump_cmdstream(); + } else if (startswith(line, "registers-gmu:")) { + decode_gmu_registers(); + } else if (startswith(line, "indexed-registers:")) { + decode_indexed_registers(); + } else if (startswith(line, "shader-blocks:")) { + decode_shader_blocks(); + } else if (startswith(line, "clusters:")) { + decode_clusters(); + } else if (startswith(line, "debugbus:")) { + decode_debugbus(); + } + } +} + +/* + * Usage and argument parsing: + */ + +static void +usage(void) +{ + fprintf(stderr, "Usage:\n\n" + "\tcrashdec [-achmsv] [-f FILE]\n\n" + "Options:\n" + "\t-a, --allregs - show all registers (including ones not written since\n" + "\t previous draw) at each draw\n" + "\t-c, --color - use colors\n" + "\t-f, --file=FILE - read input from specified file (rather than stdin)\n" + "\t-h, --help - this usage message\n" + "\t-m, --markers - try to decode CP_NOP string markers\n" + "\t-s, --summary - don't show individual register writes, but just show\n" + "\t register values on draws\n" + "\t-v, --verbose - dump more verbose output, including contents of\n" + "\t less interesting buffers\n" + "\n" + ); + exit(2); +} + +static const struct option opts[] = { + { .name = "allregs", .has_arg = 0, NULL, 'a' }, + { .name = "color", .has_arg = 0, NULL, 'c' }, + { .name = "file", .has_arg = 1, NULL, 'f' }, + { .name = "help", .has_arg = 0, NULL, 'h' }, + { .name = "markers", .has_arg = 0, NULL, 'm' }, + { .name = "summary", .has_arg = 0, NULL, 's' }, + { .name = "verbose", .has_arg = 0, NULL, 'v' }, + {} +}; + +static bool interactive; + +static void +cleanup(void) +{ + fflush(stdout); + + if (interactive) { + pager_close(); + } +} + +int +main(int argc, char **argv) +{ + int c; + + interactive = isatty(STDOUT_FILENO); + options.color = interactive; + + /* default to read from stdin: */ + in = stdin; + + while ((c = getopt_long(argc, argv, "acf:hmsv", opts, NULL)) != -1) { + switch (c) { + case 'a': + options.allregs = true; + break; + case 'c': + options.color = true; + break; + case 'f': + in = fopen(optarg, "r"); + break; + case 'm': + options.decode_markers = true; + break; + case 's': + options.summary = true; + break; + case 'v': + verbose = true; + break; + case 'h': + default: + usage(); + } + } + + if (interactive) { + pager_open(); + } + + atexit(cleanup); + + decode(); + cleanup(); +} diff --git a/src/freedreno/decode/disasm-a2xx.c b/src/freedreno/decode/disasm-a2xx.c new file mode 100644 index 00000000000..314c9c1500b --- /dev/null +++ b/src/freedreno/decode/disasm-a2xx.c @@ -0,0 +1,623 @@ +/* + * Copyright (c) 2012 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include + +#include "disasm.h" +#include "instr-a2xx.h" +#include "rnnutil.h" + +static const char *levels[] = { + "", + "\t", + "\t\t", + "\t\t\t", + "\t\t\t\t", + "\t\t\t\t\t", + "\t\t\t\t\t\t", + "\t\t\t\t\t\t\t", + "\t\t\t\t\t\t\t\t", + "\t\t\t\t\t\t\t\t\t", + "x", + "x", + "x", + "x", + "x", + "x", +}; + +enum debug_t debug; + +static struct rnn *rnn; + +/* + * ALU instructions: + */ + +static const char chan_names[] = { + 'x', 'y', 'z', 'w', + /* these only apply to FETCH dst's: */ + '0', '1', '?', '_', +}; + +static void print_srcreg(uint32_t num, uint32_t type, + uint32_t swiz, uint32_t negate, uint32_t abs) +{ + if (negate) + printf("-"); + if (abs) + printf("|"); + printf("%c%u", type ? 'R' : 'C', num); + if (swiz) { + int i; + printf("."); + for (i = 0; i < 4; i++) { + printf("%c", chan_names[(swiz + i) & 0x3]); + swiz >>= 2; + } + } + if (abs) + printf("|"); +} + +static void print_dstreg(uint32_t num, uint32_t mask, uint32_t dst_exp) +{ + printf("%s%u", dst_exp ? "export" : "R", num); + if (mask != 0xf) { + int i; + printf("."); + for (i = 0; i < 4; i++) { + printf("%c", (mask & 0x1) ? chan_names[i] : '_'); + mask >>= 1; + } + } +} + +static void print_export_comment(uint32_t num, enum shader_t type) +{ + const char *name = NULL; + switch (type) { + case SHADER_VERTEX: + switch (num) { + case 62: name = "gl_Position"; break; + case 63: name = "gl_PointSize"; break; + } + break; + case SHADER_FRAGMENT: + switch (num) { + case 0: name = "gl_FragColor"; break; + } + break; + default: + break; + } + /* if we had a symbol table here, we could look + * up the name of the varying.. + */ + if (name) { + printf("\t; %s", name); + } +} + +struct { + uint32_t num_srcs; + const char *name; +} vector_instructions[0x20] = { +#define INSTR(opc, num_srcs) [opc] = { num_srcs, #opc } + INSTR(ADDv, 2), + INSTR(MULv, 2), + INSTR(MAXv, 2), + INSTR(MINv, 2), + INSTR(SETEv, 2), + INSTR(SETGTv, 2), + INSTR(SETGTEv, 2), + INSTR(SETNEv, 2), + INSTR(FRACv, 1), + INSTR(TRUNCv, 1), + INSTR(FLOORv, 1), + INSTR(MULADDv, 3), + INSTR(CNDEv, 3), + INSTR(CNDGTEv, 3), + INSTR(CNDGTv, 3), + INSTR(DOT4v, 2), + INSTR(DOT3v, 2), + INSTR(DOT2ADDv, 3), // ??? + INSTR(CUBEv, 2), + INSTR(MAX4v, 1), + INSTR(PRED_SETE_PUSHv, 2), + INSTR(PRED_SETNE_PUSHv, 2), + INSTR(PRED_SETGT_PUSHv, 2), + INSTR(PRED_SETGTE_PUSHv, 2), + INSTR(KILLEv, 2), + INSTR(KILLGTv, 2), + INSTR(KILLGTEv, 2), + INSTR(KILLNEv, 2), + INSTR(DSTv, 2), + INSTR(MOVAv, 1), +}, scalar_instructions[0x40] = { + INSTR(ADDs, 1), + INSTR(ADD_PREVs, 1), + INSTR(MULs, 1), + INSTR(MUL_PREVs, 1), + INSTR(MUL_PREV2s, 1), + INSTR(MAXs, 1), + INSTR(MINs, 1), + INSTR(SETEs, 1), + INSTR(SETGTs, 1), + INSTR(SETGTEs, 1), + INSTR(SETNEs, 1), + INSTR(FRACs, 1), + INSTR(TRUNCs, 1), + INSTR(FLOORs, 1), + INSTR(EXP_IEEE, 1), + INSTR(LOG_CLAMP, 1), + INSTR(LOG_IEEE, 1), + INSTR(RECIP_CLAMP, 1), + INSTR(RECIP_FF, 1), + INSTR(RECIP_IEEE, 1), + INSTR(RECIPSQ_CLAMP, 1), + INSTR(RECIPSQ_FF, 1), + INSTR(RECIPSQ_IEEE, 1), + INSTR(MOVAs, 1), + INSTR(MOVA_FLOORs, 1), + INSTR(SUBs, 1), + INSTR(SUB_PREVs, 1), + INSTR(PRED_SETEs, 1), + INSTR(PRED_SETNEs, 1), + INSTR(PRED_SETGTs, 1), + INSTR(PRED_SETGTEs, 1), + INSTR(PRED_SET_INVs, 1), + INSTR(PRED_SET_POPs, 1), + INSTR(PRED_SET_CLRs, 1), + INSTR(PRED_SET_RESTOREs, 1), + INSTR(KILLEs, 1), + INSTR(KILLGTs, 1), + INSTR(KILLGTEs, 1), + INSTR(KILLNEs, 1), + INSTR(KILLONEs, 1), + INSTR(SQRT_IEEE, 1), + INSTR(MUL_CONST_0, 1), + INSTR(MUL_CONST_1, 1), + INSTR(ADD_CONST_0, 1), + INSTR(ADD_CONST_1, 1), + INSTR(SUB_CONST_0, 1), + INSTR(SUB_CONST_1, 1), + INSTR(SIN, 1), + INSTR(COS, 1), + INSTR(RETAIN_PREV, 1), +#undef INSTR +}; + +static int disasm_alu(uint32_t *dwords, uint32_t alu_off, + int level, int sync, enum shader_t type) +{ + instr_alu_t *alu = (instr_alu_t *)dwords; + + printf("%s", levels[level]); + if (debug & PRINT_RAW) { + printf("%02x: %08x %08x %08x\t", alu_off, + dwords[0], dwords[1], dwords[2]); + } + + printf(" %sALU:\t", sync ? "(S)" : " "); + + printf("%s", vector_instructions[alu->vector_opc].name); + + if (alu->pred_select & 0x2) { + /* seems to work similar to conditional execution in ARM instruction + * set, so let's use a similar syntax for now: + */ + printf((alu->pred_select & 0x1) ? "EQ" : "NE"); + } + + printf("\t"); + + print_dstreg(alu->vector_dest, alu->vector_write_mask, alu->export_data); + printf(" = "); + if (vector_instructions[alu->vector_opc].num_srcs == 3) { + print_srcreg(alu->src3_reg, alu->src3_sel, alu->src3_swiz, + alu->src3_reg_negate, alu->src3_reg_abs); + printf(", "); + } + print_srcreg(alu->src1_reg, alu->src1_sel, alu->src1_swiz, + alu->src1_reg_negate, alu->src1_reg_abs); + if (vector_instructions[alu->vector_opc].num_srcs > 1) { + printf(", "); + print_srcreg(alu->src2_reg, alu->src2_sel, alu->src2_swiz, + alu->src2_reg_negate, alu->src2_reg_abs); + } + + if (alu->vector_clamp) + printf(" CLAMP"); + + if (alu->export_data) + print_export_comment(alu->vector_dest, type); + + printf("\n"); + + if (alu->scalar_write_mask || !alu->vector_write_mask) { + /* 2nd optional scalar op: */ + + printf("%s", levels[level]); + if (debug & PRINT_RAW) + printf(" \t"); + + if (scalar_instructions[alu->scalar_opc].name) { + printf("\t \t%s\t", scalar_instructions[alu->scalar_opc].name); + } else { + printf("\t \tOP(%u)\t", alu->scalar_opc); + } + + print_dstreg(alu->scalar_dest, alu->scalar_write_mask, alu->export_data); + printf(" = "); + print_srcreg(alu->src3_reg, alu->src3_sel, alu->src3_swiz, + alu->src3_reg_negate, alu->src3_reg_abs); + // TODO ADD/MUL must have another src?!? + if (alu->scalar_clamp) + printf(" CLAMP"); + if (alu->export_data) + print_export_comment(alu->scalar_dest, type); + printf("\n"); + } + + return 0; +} + + +/* + * FETCH instructions: + */ + +static void print_fetch_dst(uint32_t dst_reg, uint32_t dst_swiz) +{ + int i; + printf("\tR%u.", dst_reg); + for (i = 0; i < 4; i++) { + printf("%c", chan_names[dst_swiz & 0x7]); + dst_swiz >>= 3; + } +} + +static void print_fetch_vtx(instr_fetch_t *fetch) +{ + instr_fetch_vtx_t *vtx = &fetch->vtx; + + if (vtx->pred_select) { + /* seems to work similar to conditional execution in ARM instruction + * set, so let's use a similar syntax for now: + */ + printf(vtx->pred_condition ? "EQ" : "NE"); + } + + print_fetch_dst(vtx->dst_reg, vtx->dst_swiz); + printf(" = R%u.", vtx->src_reg); + printf("%c", chan_names[vtx->src_swiz & 0x3]); + + const char *fmt = rnn_enumname(rnn, "a2xx_sq_surfaceformat", vtx->format); + if (fmt) { + printf(" %s", fmt); + } else { + printf(" TYPE(0x%x)", vtx->format); + } + printf(" %s", vtx->format_comp_all ? "SIGNED" : "UNSIGNED"); + if (!vtx->num_format_all) + printf(" NORMALIZED"); + printf(" STRIDE(%u)", vtx->stride); + if (vtx->offset) + printf(" OFFSET(%u)", vtx->offset); + printf(" CONST(%u, %u)", vtx->const_index, vtx->const_index_sel); + if (0) { + // XXX + printf(" src_reg_am=%u", vtx->src_reg_am); + printf(" dst_reg_am=%u", vtx->dst_reg_am); + printf(" num_format_all=%u", vtx->num_format_all); + printf(" signed_rf_mode_all=%u", vtx->signed_rf_mode_all); + printf(" exp_adjust_all=%u", vtx->exp_adjust_all); + } +} + +static void print_fetch_tex(instr_fetch_t *fetch) +{ + static const char *filter[] = { + [TEX_FILTER_POINT] = "POINT", + [TEX_FILTER_LINEAR] = "LINEAR", + [TEX_FILTER_BASEMAP] = "BASEMAP", + }; + static const char *aniso_filter[] = { + [ANISO_FILTER_DISABLED] = "DISABLED", + [ANISO_FILTER_MAX_1_1] = "MAX_1_1", + [ANISO_FILTER_MAX_2_1] = "MAX_2_1", + [ANISO_FILTER_MAX_4_1] = "MAX_4_1", + [ANISO_FILTER_MAX_8_1] = "MAX_8_1", + [ANISO_FILTER_MAX_16_1] = "MAX_16_1", + }; + static const char *arbitrary_filter[] = { + [ARBITRARY_FILTER_2X4_SYM] = "2x4_SYM", + [ARBITRARY_FILTER_2X4_ASYM] = "2x4_ASYM", + [ARBITRARY_FILTER_4X2_SYM] = "4x2_SYM", + [ARBITRARY_FILTER_4X2_ASYM] = "4x2_ASYM", + [ARBITRARY_FILTER_4X4_SYM] = "4x4_SYM", + [ARBITRARY_FILTER_4X4_ASYM] = "4x4_ASYM", + }; + static const char *sample_loc[] = { + [SAMPLE_CENTROID] = "CENTROID", + [SAMPLE_CENTER] = "CENTER", + }; + instr_fetch_tex_t *tex = &fetch->tex; + uint32_t src_swiz = tex->src_swiz; + int i; + + if (tex->pred_select) { + /* seems to work similar to conditional execution in ARM instruction + * set, so let's use a similar syntax for now: + */ + printf(tex->pred_condition ? "EQ" : "NE"); + } + + print_fetch_dst(tex->dst_reg, tex->dst_swiz); + printf(" = R%u.", tex->src_reg); + for (i = 0; i < 3; i++) { + printf("%c", chan_names[src_swiz & 0x3]); + src_swiz >>= 2; + } + printf(" CONST(%u)", tex->const_idx); + if (tex->fetch_valid_only) + printf(" VALID_ONLY"); + if (tex->tx_coord_denorm) + printf(" DENORM"); + if (tex->mag_filter != TEX_FILTER_USE_FETCH_CONST) + printf(" MAG(%s)", filter[tex->mag_filter]); + if (tex->min_filter != TEX_FILTER_USE_FETCH_CONST) + printf(" MIN(%s)", filter[tex->min_filter]); + if (tex->mip_filter != TEX_FILTER_USE_FETCH_CONST) + printf(" MIP(%s)", filter[tex->mip_filter]); + if (tex->aniso_filter != ANISO_FILTER_USE_FETCH_CONST) + printf(" ANISO(%s)", aniso_filter[tex->aniso_filter]); + if (tex->arbitrary_filter != ARBITRARY_FILTER_USE_FETCH_CONST) + printf(" ARBITRARY(%s)", arbitrary_filter[tex->arbitrary_filter]); + if (tex->vol_mag_filter != TEX_FILTER_USE_FETCH_CONST) + printf(" VOL_MAG(%s)", filter[tex->vol_mag_filter]); + if (tex->vol_min_filter != TEX_FILTER_USE_FETCH_CONST) + printf(" VOL_MIN(%s)", filter[tex->vol_min_filter]); + if (!tex->use_comp_lod) { + printf(" LOD(%u)", tex->use_comp_lod); + printf(" LOD_BIAS(%u)", tex->lod_bias); + } + if (tex->use_reg_lod) { + printf(" REG_LOD(%u)", tex->use_reg_lod); + } + if (tex->use_reg_gradients) + printf(" USE_REG_GRADIENTS"); + printf(" LOCATION(%s)", sample_loc[tex->sample_location]); + if (tex->offset_x || tex->offset_y || tex->offset_z) + printf(" OFFSET(%u,%u,%u)", tex->offset_x, tex->offset_y, tex->offset_z); +} + +struct { + const char *name; + void (*fxn)(instr_fetch_t *cf); +} fetch_instructions[] = { +#define INSTR(opc, name, fxn) [opc] = { name, fxn } + INSTR(VTX_FETCH, "VERTEX", print_fetch_vtx), + INSTR(TEX_FETCH, "SAMPLE", print_fetch_tex), + INSTR(TEX_GET_BORDER_COLOR_FRAC, "?", print_fetch_tex), + INSTR(TEX_GET_COMP_TEX_LOD, "?", print_fetch_tex), + INSTR(TEX_GET_GRADIENTS, "?", print_fetch_tex), + INSTR(TEX_GET_WEIGHTS, "?", print_fetch_tex), + INSTR(TEX_SET_TEX_LOD, "SET_TEX_LOD", print_fetch_tex), + INSTR(TEX_SET_GRADIENTS_H, "?", print_fetch_tex), + INSTR(TEX_SET_GRADIENTS_V, "?", print_fetch_tex), + INSTR(TEX_RESERVED_4, "?", print_fetch_tex), +#undef INSTR +}; + +static int disasm_fetch(uint32_t *dwords, uint32_t alu_off, int level, int sync) +{ + instr_fetch_t *fetch = (instr_fetch_t *)dwords; + + printf("%s", levels[level]); + if (debug & PRINT_RAW) { + printf("%02x: %08x %08x %08x\t", alu_off, + dwords[0], dwords[1], dwords[2]); + } + + printf(" %sFETCH:\t", sync ? "(S)" : " "); + printf("%s", fetch_instructions[fetch->opc].name); + fetch_instructions[fetch->opc].fxn(fetch); + printf("\n"); + + return 0; +} + +/* + * CF instructions: + */ + +static int cf_exec(instr_cf_t *cf) +{ + return (cf->opc == EXEC) || + (cf->opc == EXEC_END) || + (cf->opc == COND_EXEC) || + (cf->opc == COND_EXEC_END) || + (cf->opc == COND_PRED_EXEC) || + (cf->opc == COND_PRED_EXEC_END) || + (cf->opc == COND_EXEC_PRED_CLEAN) || + (cf->opc == COND_EXEC_PRED_CLEAN_END); +} + +static int cf_cond_exec(instr_cf_t *cf) +{ + return (cf->opc == COND_EXEC) || + (cf->opc == COND_EXEC_END) || + (cf->opc == COND_PRED_EXEC) || + (cf->opc == COND_PRED_EXEC_END) || + (cf->opc == COND_EXEC_PRED_CLEAN) || + (cf->opc == COND_EXEC_PRED_CLEAN_END); +} + +static void print_cf_nop(instr_cf_t *cf) +{ +} + +static void print_cf_exec(instr_cf_t *cf) +{ + printf(" ADDR(0x%x) CNT(0x%x)", cf->exec.address, cf->exec.count); + if (cf->exec.yeild) + printf(" YIELD"); + if (cf->exec.vc) + printf(" VC(0x%x)", cf->exec.vc); + if (cf->exec.bool_addr) + printf(" BOOL_ADDR(0x%x)", cf->exec.bool_addr); + if (cf->exec.address_mode == ABSOLUTE_ADDR) + printf(" ABSOLUTE_ADDR"); + if (cf_cond_exec(cf)) + printf(" COND(%d)", cf->exec.condition); +} + +static void print_cf_loop(instr_cf_t *cf) +{ + printf(" ADDR(0x%x) LOOP_ID(%d)", cf->loop.address, cf->loop.loop_id); + if (cf->loop.address_mode == ABSOLUTE_ADDR) + printf(" ABSOLUTE_ADDR"); +} + +static void print_cf_jmp_call(instr_cf_t *cf) +{ + printf(" ADDR(0x%x) DIR(%d)", cf->jmp_call.address, cf->jmp_call.direction); + if (cf->jmp_call.force_call) + printf(" FORCE_CALL"); + if (cf->jmp_call.predicated_jmp) + printf(" COND(%d)", cf->jmp_call.condition); + if (cf->jmp_call.bool_addr) + printf(" BOOL_ADDR(0x%x)", cf->jmp_call.bool_addr); + if (cf->jmp_call.address_mode == ABSOLUTE_ADDR) + printf(" ABSOLUTE_ADDR"); +} + +static void print_cf_alloc(instr_cf_t *cf) +{ + static const char *bufname[] = { + [SQ_NO_ALLOC] = "NO ALLOC", + [SQ_POSITION] = "POSITION", + [SQ_PARAMETER_PIXEL] = "PARAM/PIXEL", + [SQ_MEMORY] = "MEMORY", + }; + printf(" %s SIZE(0x%x)", bufname[cf->alloc.buffer_select], cf->alloc.size); + if (cf->alloc.no_serial) + printf(" NO_SERIAL"); + if (cf->alloc.alloc_mode) // ??? + printf(" ALLOC_MODE"); +} + +struct { + const char *name; + void (*fxn)(instr_cf_t *cf); +} cf_instructions[] = { +#define INSTR(opc, fxn) [opc] = { #opc, fxn } + INSTR(NOP, print_cf_nop), + INSTR(EXEC, print_cf_exec), + INSTR(EXEC_END, print_cf_exec), + INSTR(COND_EXEC, print_cf_exec), + INSTR(COND_EXEC_END, print_cf_exec), + INSTR(COND_PRED_EXEC, print_cf_exec), + INSTR(COND_PRED_EXEC_END, print_cf_exec), + INSTR(LOOP_START, print_cf_loop), + INSTR(LOOP_END, print_cf_loop), + INSTR(COND_CALL, print_cf_jmp_call), + INSTR(RETURN, print_cf_jmp_call), + INSTR(COND_JMP, print_cf_jmp_call), + INSTR(ALLOC, print_cf_alloc), + INSTR(COND_EXEC_PRED_CLEAN, print_cf_exec), + INSTR(COND_EXEC_PRED_CLEAN_END, print_cf_exec), + INSTR(MARK_VS_FETCH_DONE, print_cf_nop), // ?? +#undef INSTR +}; + +static void print_cf(instr_cf_t *cf, int level) +{ + printf("%s", levels[level]); + if (debug & PRINT_RAW) { + uint16_t *words = (uint16_t *)cf; + printf(" %04x %04x %04x \t", + words[0], words[1], words[2]); + } + printf("%s", cf_instructions[cf->opc].name); + cf_instructions[cf->opc].fxn(cf); + printf("\n"); +} + +/* + * The adreno shader microcode consists of two parts: + * 1) A CF (control-flow) program, at the header of the compiled shader, + * which refers to ALU/FETCH instructions that follow it by address. + * 2) ALU and FETCH instructions + */ + +int disasm_a2xx(uint32_t *dwords, int sizedwords, int level, enum shader_t type) +{ + instr_cf_t *cfs = (instr_cf_t *)dwords; + int idx, max_idx; + + if (!rnn) { + rnn = rnn_new(1); + rnn_load(rnn, "a2xx"); + } + + for (idx = 0; ; idx++) { + instr_cf_t *cf = &cfs[idx]; + if (cf_exec(cf)) { + max_idx = 2 * cf->exec.address; + break; + } + } + + for (idx = 0; idx < max_idx; idx++) { + instr_cf_t *cf = &cfs[idx]; + + print_cf(cf, level); + + if (cf_exec(cf)) { + uint32_t sequence = cf->exec.serialize; + uint32_t i; + for (i = 0; i < cf->exec.count; i++) { + uint32_t alu_off = (cf->exec.address + i); + if (sequence & 0x1) { + disasm_fetch(dwords + alu_off * 3, alu_off, level, sequence & 0x2); + } else { + disasm_alu(dwords + alu_off * 3, alu_off, level, sequence & 0x2, type); + } + sequence >>= 2; + } + } + } + + return 0; +} + +void disasm_set_debug(enum debug_t d) +{ + debug = d; +} diff --git a/src/freedreno/decode/disasm-a3xx.c b/src/freedreno/decode/disasm-a3xx.c new file mode 100644 index 00000000000..9645dc5f41b --- /dev/null +++ b/src/freedreno/decode/disasm-a3xx.c @@ -0,0 +1,1641 @@ +/* + * Copyright (c) 2013 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "disasm.h" +#include "instr-a3xx.h" + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +extern enum debug_t debug; + +static const char *levels[] = { + "", + "\t", + "\t\t", + "\t\t\t", + "\t\t\t\t", + "\t\t\t\t\t", + "\t\t\t\t\t\t", + "\t\t\t\t\t\t\t", + "\t\t\t\t\t\t\t\t", + "\t\t\t\t\t\t\t\t\t", + "x", + "x", + "x", + "x", + "x", + "x", +}; + +static const char *component = "xyzw"; + +static const char *type[] = { + [TYPE_F16] = "f16", + [TYPE_F32] = "f32", + [TYPE_U16] = "u16", + [TYPE_U32] = "u32", + [TYPE_S16] = "s16", + [TYPE_S32] = "s32", + [TYPE_U8] = "u8", + [TYPE_S8] = "s8", +}; + + +#define MAX_REG 4096 + +typedef struct { + uint8_t full[MAX_REG/8]; + uint8_t half[MAX_REG/8]; +} regmask_t; + +struct disasm_ctx { + FILE *out; + int level; + unsigned gpu_id; + + struct shader_stats *stats; + + /* we have to process the dst register after src to avoid tripping up + * the read-before-write detection + */ + unsigned last_dst; + bool last_dst_full; + bool last_dst_valid; + + /* current instruction repeat flag: */ + unsigned repeat; + /* current instruction repeat indx/offset (for --expand): */ + unsigned repeatidx; + + /* tracking for register usage */ + struct { + regmask_t used; + regmask_t used_merged; + regmask_t rbw; /* read before write */ + regmask_t war; /* write after read */ + regmask_t cnst; /* used consts */ + } regs; +}; + +static const char *float_imms[] = { + "0.0", + "0.5", + "1.0", + "2.0", + "e", + "pi", + "1/pi", + "1/log2(e)", + "log2(e)", + "1/log2(10)", + "log2(10)", + "4.0", +}; + +static void print_reg(struct disasm_ctx *ctx, reg_t reg, bool full, + bool is_float, bool r, + bool c, bool im, bool neg, bool abs, bool addr_rel) +{ + const char type = c ? 'c' : 'r'; + + // XXX I prefer - and || for neg/abs, but preserving format used + // by libllvm-a3xx for easy diffing.. + + if (abs && neg) + fprintf(ctx->out, "(absneg)"); + else if (neg) + fprintf(ctx->out, "(neg)"); + else if (abs) + fprintf(ctx->out, "(abs)"); + + if (r) + fprintf(ctx->out, "(r)"); + + if (im) { + if (is_float && full && reg.iim_val < ARRAY_SIZE(float_imms)) { + fprintf(ctx->out, "(%s)", float_imms[reg.iim_val]); + } else { + fprintf(ctx->out, "%d", reg.iim_val); + } + } else if (addr_rel) { + /* I would just use %+d but trying to make it diff'able with + * libllvm-a3xx... + */ + if (reg.iim_val < 0) + fprintf(ctx->out, "%s%c", full ? "" : "h", type, -reg.iim_val); + else if (reg.iim_val > 0) + fprintf(ctx->out, "%s%c", full ? "" : "h", type, reg.iim_val); + else + fprintf(ctx->out, "%s%c", full ? "" : "h", type); + } else if ((reg.num == REG_A0) && !c) { + /* This matches libllvm output, the second (scalar) address register + * seems to be called a1.x instead of a0.y. + */ + fprintf(ctx->out, "a%d.x", reg.comp); + } else if ((reg.num == REG_P0) && !c) { + fprintf(ctx->out, "p0.%c", component[reg.comp]); + } else { + fprintf(ctx->out, "%s%c%d.%c", full ? "" : "h", type, reg.num, component[reg.comp]); + } +} + +/* Tracking for registers used, read-before-write (input), and + * write-after-read (output.. but not 100%).. + */ + +static void regmask_set(regmask_t *regmask, unsigned num, bool full, unsigned val) +{ + unsigned i = num / 8; + unsigned j = num % 8; + ir3_assert(num < MAX_REG); + if (full) { + regmask->full[i] = (regmask->full[i] & ~(1 << j)) | (val << j); + } else { + regmask->half[i] = (regmask->half[i] & ~(1 << j)) | (val << j); + } +} + +static unsigned regmask_get(regmask_t *regmask, unsigned num, bool full) +{ + unsigned i = num / 8; + unsigned j = num % 8; + ir3_assert(num < MAX_REG); + if (full) { + return (regmask->full[i] >> j) & 0x1; + } else { + return (regmask->half[i] >> j) & 0x1; + } +} + +static unsigned regidx(reg_t reg) +{ + return (4 * reg.num) + reg.comp; +} + +static reg_t idxreg(unsigned idx) +{ + return (reg_t){ + .comp = idx & 0x3, + .num = idx >> 2, + }; +} + +static int print_regs(struct disasm_ctx *ctx, regmask_t *regmask, bool full) +{ + int num, max = 0, cnt = 0; + int first, last; + + void print_sequence(void) + { + if (first != MAX_REG) { + if (first == last) { + fprintf(ctx->out, " %d", first); + } else { + fprintf(ctx->out, " %d-%d", first, last); + } + } + } + + first = last = MAX_REG; + + for (num = 0; num < MAX_REG; num++) { + if (regmask_get(regmask, num, full)) { + if (num != (last + 1)) { + print_sequence(); + first = num; + } + last = num; + if (num < (48*4)) + max = num; + cnt++; + } + } + + print_sequence(); + + fprintf(ctx->out, " (cnt=%d, max=%d)", cnt, max); + + return max; +} + +static void print_reg_stats(struct disasm_ctx *ctx) +{ + int fullreg, halfreg; + + fprintf(ctx->out, "%sRegister Stats:\n", levels[ctx->level]); + fprintf(ctx->out, "%s- used (half):", levels[ctx->level]); + halfreg = print_regs(ctx, &ctx->regs.used, false); + fprintf(ctx->out, "\n"); + fprintf(ctx->out, "%s- used (full):", levels[ctx->level]); + fullreg = print_regs(ctx, &ctx->regs.used, true); + fprintf(ctx->out, "\n"); + fprintf(ctx->out, "%s- used (merged):", levels[ctx->level]); + print_regs(ctx, &ctx->regs.used_merged, false); + fprintf(ctx->out, "\n"); + fprintf(ctx->out, "%s- input (half):", levels[ctx->level]); + print_regs(ctx, &ctx->regs.rbw, false); + fprintf(ctx->out, "\n"); + fprintf(ctx->out, "%s- input (full):", levels[ctx->level]); + print_regs(ctx, &ctx->regs.rbw, true); + fprintf(ctx->out, "\n"); + fprintf(ctx->out, "%s- const (half):", levels[ctx->level]); + print_regs(ctx, &ctx->regs.cnst, false); + fprintf(ctx->out, "\n"); + fprintf(ctx->out, "%s- const (full):", levels[ctx->level]); + print_regs(ctx, &ctx->regs.cnst, true); + fprintf(ctx->out, "\n"); + fprintf(ctx->out, "%s- output (half):", levels[ctx->level]); + print_regs(ctx, &ctx->regs.war, false); + fprintf(ctx->out, " (estimated)\n"); + fprintf(ctx->out, "%s- output (full):", levels[ctx->level]); + print_regs(ctx, &ctx->regs.war, true); + fprintf(ctx->out, " (estimated)\n"); + + /* convert to vec4, which is the granularity that registers are + * assigned to shader: + */ + fullreg = (fullreg + 3) / 4; + halfreg = (halfreg + 3) / 4; + + // Note this count of instructions includes rptN, which matches + // up to how mesa prints this: + fprintf(ctx->out, "%s- shaderdb: %d instructions, %d nops, %d non-nops, " + "(%d instlen), %d half, %d full\n", + levels[ctx->level], ctx->stats->instructions, ctx->stats->nops, + ctx->stats->instructions - ctx->stats->nops, ctx->stats->instlen, + halfreg, fullreg); + fprintf(ctx->out, "%s- shaderdb: %d (ss), %d (sy)\n", levels[ctx->level], + ctx->stats->ss, ctx->stats->sy); +} + +static void process_reg_dst(struct disasm_ctx *ctx) +{ + int i; + + if (!ctx->last_dst_valid) + return; + + for (i = 0; i <= ctx->repeat; i++) { + unsigned dst = ctx->last_dst + i; + + regmask_set(&ctx->regs.war, dst, ctx->last_dst_full, 1); + regmask_set(&ctx->regs.used, dst, ctx->last_dst_full, 1); + + if (ctx->last_dst_full) { + regmask_set(&ctx->regs.used_merged, (dst*2)+0, false, 1); + regmask_set(&ctx->regs.used_merged, (dst*2)+1, false, 1); + } else { + regmask_set(&ctx->regs.used_merged, dst, false, 1); + } + } + + ctx->last_dst_valid = false; +} + +static void print_reg_dst(struct disasm_ctx *ctx, reg_t reg, bool full, bool addr_rel) +{ + /* presumably the special registers a0.c and p0.c don't count.. */ + if (!(addr_rel || (reg.num == 61) || (reg.num == 62))) { + ctx->last_dst = regidx(reg); + ctx->last_dst_full = full; + ctx->last_dst_valid = true; + } + reg = idxreg(regidx(reg) + ctx->repeatidx); + print_reg(ctx, reg, full, false, false, false, false, false, false, addr_rel); +} + +static void print_reg_src(struct disasm_ctx *ctx, reg_t reg, bool full, bool f, bool r, + bool c, bool im, bool neg, bool abs, bool addr_rel) +{ + /* presumably the special registers a0.c and p0.c don't count.. */ + if (!(addr_rel || c || im || (reg.num == 61) || (reg.num == 62))) { + int i, num = regidx(reg); + for (i = 0; i <= ctx->repeat; i++) { + unsigned src = num + i; + + if (!regmask_get(&ctx->regs.used, src, full)) + regmask_set(&ctx->regs.rbw, src, full, 1); + + regmask_set(&ctx->regs.war, src, full, 0); + regmask_set(&ctx->regs.used, src, full, 1); + + if (full) { + regmask_set(&ctx->regs.used_merged, (src*2)+0, false, 1); + regmask_set(&ctx->regs.used_merged, (src*2)+1, false, 1); + } else { + regmask_set(&ctx->regs.used_merged, src, false, 1); + } + + if (!r) + break; + } + } else if (c) { + int i, num = regidx(reg); + for (i = 0; i <= ctx->repeat; i++) { + unsigned src = num + i; + + regmask_set(&ctx->regs.cnst, src, full, 1); + + if (!r) + break; + } + + unsigned max = (num + ctx->repeat + 1 + 3) / 4; + if (max > ctx->stats->constlen) + ctx->stats->constlen = max; + } + + if (r) + reg = idxreg(regidx(reg) + ctx->repeatidx); + + print_reg(ctx, reg, full, f, r, c, im, neg, abs, addr_rel); +} + +/* TODO switch to using reginfo struct everywhere, since more readable + * than passing a bunch of bools to print_reg_src + */ + +struct reginfo { + reg_t reg; + bool full; + bool r; + bool c; + bool f; /* src reg is interpreted as float, used for printing immediates */ + bool im; + bool neg; + bool abs; + bool addr_rel; +}; + +static void print_src(struct disasm_ctx *ctx, struct reginfo *info) +{ + reg_t reg = info->reg; + + if (info->r) + reg = idxreg(regidx(info->reg) + ctx->repeatidx); + + print_reg_src(ctx, reg, info->full, info->f, info->r, info->c, info->im, + info->neg, info->abs, info->addr_rel); +} + +//static void print_dst(struct disasm_ctx *ctx, struct reginfo *info) +//{ +// print_reg_dst(ctx, info->reg, info->full, info->addr_rel); +//} + +static void print_instr_cat0(struct disasm_ctx *ctx, instr_t *instr) +{ + static const struct { + const char *suffix; + int nsrc; + bool idx; + } brinfo[7] = { + [BRANCH_PLAIN] = { "r", 1, false }, + [BRANCH_OR] = { "rao", 2, false }, + [BRANCH_AND] = { "raa", 2, false }, + [BRANCH_CONST] = { "rac", 0, true }, + [BRANCH_ANY] = { "any", 1, false }, + [BRANCH_ALL] = { "all", 1, false }, + [BRANCH_X] = { "rax", 0, false }, + }; + instr_cat0_t *cat0 = &instr->cat0; + + switch (instr_opc(instr, ctx->gpu_id)) { + case OPC_KILL: + case OPC_PREDT: + case OPC_PREDF: + fprintf(ctx->out, " %sp0.%c", cat0->inv0 ? "!" : "", + component[cat0->comp0]); + break; + case OPC_B: + fprintf(ctx->out, "%s", brinfo[cat0->brtype].suffix); + if (brinfo[cat0->brtype].idx) { + fprintf(ctx->out, ".%u", cat0->idx); + } + if (brinfo[cat0->brtype].nsrc >= 1) { + fprintf(ctx->out, " %sp0.%c,", cat0->inv0 ? "!" : "", + component[cat0->comp0]); + } + if (brinfo[cat0->brtype].nsrc >= 2) { + fprintf(ctx->out, " %sp0.%c,", cat0->inv1 ? "!" : "", + component[cat0->comp1]); + } + fprintf(ctx->out, " #%d", cat0->a3xx.immed); + break; + case OPC_JUMP: + case OPC_CALL: + case OPC_BKT: + case OPC_GETONE: + case OPC_SHPS: + fprintf(ctx->out, " #%d", cat0->a3xx.immed); + break; + } + + if ((debug & PRINT_VERBOSE) && (cat0->dummy3|cat0->dummy4)) + fprintf(ctx->out, "\t{0: %x,%x}", cat0->dummy3, cat0->dummy4); +} + +static void print_instr_cat1(struct disasm_ctx *ctx, instr_t *instr) +{ + instr_cat1_t *cat1 = &instr->cat1; + + if (cat1->ul) + fprintf(ctx->out, "(ul)"); + + if (cat1->src_type == cat1->dst_type) { + if ((cat1->src_type == TYPE_S16) && (((reg_t)cat1->dst).num == REG_A0)) { + /* special case (nmemonic?): */ + fprintf(ctx->out, "mova"); + } else { + fprintf(ctx->out, "mov.%s%s", type[cat1->src_type], type[cat1->dst_type]); + } + } else { + fprintf(ctx->out, "cov.%s%s", type[cat1->src_type], type[cat1->dst_type]); + } + + fprintf(ctx->out, " "); + + if (cat1->even) + fprintf(ctx->out, "(even)"); + + if (cat1->pos_inf) + fprintf(ctx->out, "(pos_infinity)"); + + print_reg_dst(ctx, (reg_t)(cat1->dst), type_size(cat1->dst_type) == 32, + cat1->dst_rel); + + fprintf(ctx->out, ", "); + + /* ugg, have to special case this.. vs print_reg().. */ + if (cat1->src_im) { + if (type_float(cat1->src_type)) + fprintf(ctx->out, "(%f)", cat1->fim_val); + else if (type_uint(cat1->src_type)) + fprintf(ctx->out, "0x%08x", cat1->uim_val); + else + fprintf(ctx->out, "%d", cat1->iim_val); + } else if (cat1->src_rel && !cat1->src_c) { + /* I would just use %+d but trying to make it diff'able with + * libllvm-a3xx... + */ + char type = cat1->src_rel_c ? 'c' : 'r'; + const char *full = (type_size(cat1->src_type) == 32) ? "" : "h"; + if (cat1->off < 0) + fprintf(ctx->out, "%s%c", full, type, -cat1->off); + else if (cat1->off > 0) + fprintf(ctx->out, "%s%c", full, type, cat1->off); + else + fprintf(ctx->out, "%s%c", full, type); + } else { + struct reginfo src = { + .reg = (reg_t)cat1->src, + .full = type_size(cat1->src_type) == 32, + .r = cat1->src_r, + .c = cat1->src_c, + .im = cat1->src_im, + }; + print_src(ctx, &src); + } + + if ((debug & PRINT_VERBOSE) && (cat1->must_be_0)) + fprintf(ctx->out, "\t{1: %x}", cat1->must_be_0); +} + +static void print_instr_cat2(struct disasm_ctx *ctx, instr_t *instr) +{ + instr_cat2_t *cat2 = &instr->cat2; + int opc = _OPC(2, cat2->opc); + static const char *cond[] = { + "lt", + "le", + "gt", + "ge", + "eq", + "ne", + "?6?", + }; + + switch (opc) { + case OPC_CMPS_F: + case OPC_CMPS_U: + case OPC_CMPS_S: + case OPC_CMPV_F: + case OPC_CMPV_U: + case OPC_CMPV_S: + fprintf(ctx->out, ".%s", cond[cat2->cond]); + break; + } + + fprintf(ctx->out, " "); + if (cat2->ei) + fprintf(ctx->out, "(ei)"); + print_reg_dst(ctx, (reg_t)(cat2->dst), cat2->full ^ cat2->dst_half, false); + fprintf(ctx->out, ", "); + + struct reginfo src1 = { + .full = cat2->full, + .r = cat2->repeat ? cat2->src1_r : 0, + .f = is_cat2_float(opc), + .im = cat2->src1_im, + .abs = cat2->src1_abs, + .neg = cat2->src1_neg, + }; + + if (cat2->c1.src1_c) { + src1.reg = (reg_t)(cat2->c1.src1); + src1.c = true; + } else if (cat2->rel1.src1_rel) { + src1.reg = (reg_t)(cat2->rel1.src1); + src1.c = cat2->rel1.src1_c; + src1.addr_rel = true; + } else { + src1.reg = (reg_t)(cat2->src1); + } + print_src(ctx, &src1); + + struct reginfo src2 = { + .r = cat2->repeat ? cat2->src2_r : 0, + .full = cat2->full, + .f = is_cat2_float(opc), + .abs = cat2->src2_abs, + .neg = cat2->src2_neg, + .im = cat2->src2_im, + }; + switch (opc) { + case OPC_ABSNEG_F: + case OPC_ABSNEG_S: + case OPC_CLZ_B: + case OPC_CLZ_S: + case OPC_SIGN_F: + case OPC_FLOOR_F: + case OPC_CEIL_F: + case OPC_RNDNE_F: + case OPC_RNDAZ_F: + case OPC_TRUNC_F: + case OPC_NOT_B: + case OPC_BFREV_B: + case OPC_SETRM: + case OPC_CBITS_B: + /* these only have one src reg */ + break; + default: + fprintf(ctx->out, ", "); + if (cat2->c2.src2_c) { + src2.reg = (reg_t)(cat2->c2.src2); + src2.c = true; + } else if (cat2->rel2.src2_rel) { + src2.reg = (reg_t)(cat2->rel2.src2); + src2.c = cat2->rel2.src2_c; + src2.addr_rel = true; + } else { + src2.reg = (reg_t)(cat2->src2); + } + print_src(ctx, &src2); + break; + } +} + +static void print_instr_cat3(struct disasm_ctx *ctx, instr_t *instr) +{ + instr_cat3_t *cat3 = &instr->cat3; + bool full = instr_cat3_full(cat3); + + fprintf(ctx->out, " "); + print_reg_dst(ctx, (reg_t)(cat3->dst), full ^ cat3->dst_half, false); + fprintf(ctx->out, ", "); + + struct reginfo src1 = { + .r = cat3->repeat ? cat3->src1_r : 0, + .full = full, + .neg = cat3->src1_neg, + }; + if (cat3->c1.src1_c) { + src1.reg = (reg_t)(cat3->c1.src1); + src1.c = true; + } else if (cat3->rel1.src1_rel) { + src1.reg = (reg_t)(cat3->rel1.src1); + src1.c = cat3->rel1.src1_c; + src1.addr_rel = true; + } else { + src1.reg = (reg_t)(cat3->src1); + } + print_src(ctx, &src1); + + fprintf(ctx->out, ", "); + struct reginfo src2 = { + .reg = (reg_t)cat3->src2, + .full = full, + .r = cat3->repeat ? cat3->src2_r : 0, + .c = cat3->src2_c, + .neg = cat3->src2_neg, + }; + print_src(ctx, &src2); + + fprintf(ctx->out, ", "); + struct reginfo src3 = { + .r = cat3->src3_r, + .full = full, + .neg = cat3->src3_neg, + }; + if (cat3->c2.src3_c) { + src3.reg = (reg_t)(cat3->c2.src3); + src3.c = true; + } else if (cat3->rel2.src3_rel) { + src3.reg = (reg_t)(cat3->rel2.src3); + src3.c = cat3->rel2.src3_c; + src3.addr_rel = true; + } else { + src3.reg = (reg_t)(cat3->src3); + } + print_src(ctx, &src3); +} + +static void print_instr_cat4(struct disasm_ctx *ctx, instr_t *instr) +{ + instr_cat4_t *cat4 = &instr->cat4; + + fprintf(ctx->out, " "); + print_reg_dst(ctx, (reg_t)(cat4->dst), cat4->full ^ cat4->dst_half, false); + fprintf(ctx->out, ", "); + + struct reginfo src = { + .r = cat4->src_r, + .im = cat4->src_im, + .full = cat4->full, + .neg = cat4->src_neg, + .abs = cat4->src_abs, + }; + if (cat4->c.src_c) { + src.reg = (reg_t)(cat4->c.src); + src.c = true; + } else if (cat4->rel.src_rel) { + src.reg = (reg_t)(cat4->rel.src); + src.c = cat4->rel.src_c; + src.addr_rel = true; + } else { + src.reg = (reg_t)(cat4->src); + } + print_src(ctx, &src); + + if ((debug & PRINT_VERBOSE) && (cat4->dummy1|cat4->dummy2)) + fprintf(ctx->out, "\t{4: %x,%x}", cat4->dummy1, cat4->dummy2); +} + +static void print_instr_cat5(struct disasm_ctx *ctx, instr_t *instr) +{ + static const struct { + bool src1, src2, samp, tex; + } info[0x1f] = { + [opc_op(OPC_ISAM)] = { true, false, true, true, }, + [opc_op(OPC_ISAML)] = { true, true, true, true, }, + [opc_op(OPC_ISAMM)] = { true, false, true, true, }, + [opc_op(OPC_SAM)] = { true, false, true, true, }, + [opc_op(OPC_SAMB)] = { true, true, true, true, }, + [opc_op(OPC_SAML)] = { true, true, true, true, }, + [opc_op(OPC_SAMGQ)] = { true, false, true, true, }, + [opc_op(OPC_GETLOD)] = { true, false, true, true, }, + [opc_op(OPC_CONV)] = { true, true, true, true, }, + [opc_op(OPC_CONVM)] = { true, true, true, true, }, + [opc_op(OPC_GETSIZE)] = { true, false, false, true, }, + [opc_op(OPC_GETBUF)] = { false, false, false, true, }, + [opc_op(OPC_GETPOS)] = { true, false, false, true, }, + [opc_op(OPC_GETINFO)] = { false, false, false, true, }, + [opc_op(OPC_DSX)] = { true, false, false, false, }, + [opc_op(OPC_DSY)] = { true, false, false, false, }, + [opc_op(OPC_GATHER4R)] = { true, false, true, true, }, + [opc_op(OPC_GATHER4G)] = { true, false, true, true, }, + [opc_op(OPC_GATHER4B)] = { true, false, true, true, }, + [opc_op(OPC_GATHER4A)] = { true, false, true, true, }, + [opc_op(OPC_SAMGP0)] = { true, false, true, true, }, + [opc_op(OPC_SAMGP1)] = { true, false, true, true, }, + [opc_op(OPC_SAMGP2)] = { true, false, true, true, }, + [opc_op(OPC_SAMGP3)] = { true, false, true, true, }, + [opc_op(OPC_DSXPP_1)] = { true, false, false, false, }, + [opc_op(OPC_DSYPP_1)] = { true, false, false, false, }, + [opc_op(OPC_RGETPOS)] = { true, false, false, false, }, + [opc_op(OPC_RGETINFO)] = { false, false, false, false, }, + }; + + static const struct { + bool indirect; + bool bindless; + bool use_a1; + bool uniform; + } desc_features[8] = { + [CAT5_NONUNIFORM] = { .indirect = true, }, + [CAT5_UNIFORM] = { .indirect = true, .uniform = true, }, + [CAT5_BINDLESS_IMM] = { .bindless = true, }, + [CAT5_BINDLESS_UNIFORM] = { + .bindless = true, + .indirect = true, + .uniform = true, + }, + [CAT5_BINDLESS_NONUNIFORM] = { + .bindless = true, + .indirect = true, + }, + [CAT5_BINDLESS_A1_IMM] = { + .bindless = true, + .use_a1 = true, + }, + [CAT5_BINDLESS_A1_UNIFORM] = { + .bindless = true, + .indirect = true, + .uniform = true, + .use_a1 = true, + }, + [CAT5_BINDLESS_A1_NONUNIFORM] = { + .bindless = true, + .indirect = true, + .use_a1 = true, + }, + }; + + instr_cat5_t *cat5 = &instr->cat5; + int i; + + bool desc_indirect = + cat5->is_s2en_bindless && + desc_features[cat5->s2en_bindless.desc_mode].indirect; + bool bindless = + cat5->is_s2en_bindless && + desc_features[cat5->s2en_bindless.desc_mode].bindless; + bool use_a1 = + cat5->is_s2en_bindless && + desc_features[cat5->s2en_bindless.desc_mode].use_a1; + bool uniform = + cat5->is_s2en_bindless && + desc_features[cat5->s2en_bindless.desc_mode].uniform; + + if (cat5->is_3d) fprintf(ctx->out, ".3d"); + if (cat5->is_a) fprintf(ctx->out, ".a"); + if (cat5->is_o) fprintf(ctx->out, ".o"); + if (cat5->is_p) fprintf(ctx->out, ".p"); + if (cat5->is_s) fprintf(ctx->out, ".s"); + if (desc_indirect) fprintf(ctx->out, ".s2en"); + if (uniform) fprintf(ctx->out, ".uniform"); + + if (bindless) { + unsigned base = (cat5->s2en_bindless.base_hi << 1) | cat5->base_lo; + fprintf(ctx->out, ".base%d", base); + } + + fprintf(ctx->out, " "); + + switch (_OPC(5, cat5->opc)) { + case OPC_DSXPP_1: + case OPC_DSYPP_1: + break; + default: + fprintf(ctx->out, "(%s)", type[cat5->type]); + break; + } + + fprintf(ctx->out, "("); + for (i = 0; i < 4; i++) + if (cat5->wrmask & (1 << i)) + fprintf(ctx->out, "%c", "xyzw"[i]); + fprintf(ctx->out, ")"); + + print_reg_dst(ctx, (reg_t)(cat5->dst), type_size(cat5->type) == 32, false); + + if (info[cat5->opc].src1) { + fprintf(ctx->out, ", "); + struct reginfo src = { .reg = (reg_t)(cat5->src1), .full = cat5->full }; + print_src(ctx, &src); + } + + if (cat5->is_o || info[cat5->opc].src2) { + fprintf(ctx->out, ", "); + struct reginfo src = { .reg = (reg_t)(cat5->src2), .full = cat5->full }; + print_src(ctx, &src); + } + if (cat5->is_s2en_bindless) { + if (!desc_indirect) { + if (info[cat5->opc].samp) { + if (use_a1) + fprintf(ctx->out, ", s#%d", cat5->s2en_bindless.src3); + else + fprintf(ctx->out, ", s#%d", cat5->s2en_bindless.src3 & 0xf); + } + + if (info[cat5->opc].tex && !use_a1) { + fprintf(ctx->out, ", t#%d", cat5->s2en_bindless.src3 >> 4); + } + } + } else { + if (info[cat5->opc].samp) + fprintf(ctx->out, ", s#%d", cat5->norm.samp); + if (info[cat5->opc].tex) + fprintf(ctx->out, ", t#%d", cat5->norm.tex); + } + + if (desc_indirect) { + fprintf(ctx->out, ", "); + struct reginfo src = { .reg = (reg_t)(cat5->s2en_bindless.src3), .full = bindless }; + print_src(ctx, &src); + } + + if (use_a1) + fprintf(ctx->out, ", a1.x"); + + if (debug & PRINT_VERBOSE) { + if (cat5->is_s2en_bindless) { + if ((debug & PRINT_VERBOSE) && cat5->s2en_bindless.dummy1) + fprintf(ctx->out, "\t{5: %x}", cat5->s2en_bindless.dummy1); + } else { + if ((debug & PRINT_VERBOSE) && cat5->norm.dummy1) + fprintf(ctx->out, "\t{5: %x}", cat5->norm.dummy1); + } + } +} + +static void print_instr_cat6_a3xx(struct disasm_ctx *ctx, instr_t *instr) +{ + instr_cat6_t *cat6 = &instr->cat6; + char sd = 0, ss = 0; /* dst/src address space */ + bool nodst = false; + struct reginfo dst, src1, src2; + int src1off = 0, dstoff = 0; + + memset(&dst, 0, sizeof(dst)); + memset(&src1, 0, sizeof(src1)); + memset(&src2, 0, sizeof(src2)); + + switch (_OPC(6, cat6->opc)) { + case OPC_RESINFO: + case OPC_RESFMT: + dst.full = type_size(cat6->type) == 32; + src1.full = type_size(cat6->type) == 32; + src2.full = type_size(cat6->type) == 32; + break; + case OPC_L2G: + case OPC_G2L: + dst.full = true; + src1.full = true; + src2.full = true; + break; + case OPC_STG: + case OPC_STL: + case OPC_STP: + case OPC_STLW: + case OPC_STIB: + dst.full = type_size(cat6->type) == 32; + src1.full = type_size(cat6->type) == 32; + src2.full = type_size(cat6->type) == 32; + break; + default: + dst.full = type_size(cat6->type) == 32; + src1.full = true; + src2.full = true; + break; + } + + switch (_OPC(6, cat6->opc)) { + case OPC_PREFETCH: + break; + case OPC_RESINFO: + fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1); + break; + case OPC_LDGB: + fprintf(ctx->out, ".%s", cat6->ldgb.typed ? "typed" : "untyped"); + fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1); + fprintf(ctx->out, ".%s", type[cat6->type]); + fprintf(ctx->out, ".%d", cat6->ldgb.type_size + 1); + break; + case OPC_STGB: + case OPC_STIB: + fprintf(ctx->out, ".%s", cat6->stgb.typed ? "typed" : "untyped"); + fprintf(ctx->out, ".%dd", cat6->stgb.d + 1); + fprintf(ctx->out, ".%s", type[cat6->type]); + fprintf(ctx->out, ".%d", cat6->stgb.type_size + 1); + break; + case OPC_ATOMIC_ADD: + case OPC_ATOMIC_SUB: + case OPC_ATOMIC_XCHG: + case OPC_ATOMIC_INC: + case OPC_ATOMIC_DEC: + case OPC_ATOMIC_CMPXCHG: + case OPC_ATOMIC_MIN: + case OPC_ATOMIC_MAX: + case OPC_ATOMIC_AND: + case OPC_ATOMIC_OR: + case OPC_ATOMIC_XOR: + ss = cat6->g ? 'g' : 'l'; + fprintf(ctx->out, ".%s", cat6->ldgb.typed ? "typed" : "untyped"); + fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1); + fprintf(ctx->out, ".%s", type[cat6->type]); + fprintf(ctx->out, ".%d", cat6->ldgb.type_size + 1); + fprintf(ctx->out, ".%c", ss); + break; + default: + dst.im = cat6->g && !cat6->dst_off; + fprintf(ctx->out, ".%s", type[cat6->type]); + break; + } + fprintf(ctx->out, " "); + + switch (_OPC(6, cat6->opc)) { + case OPC_STG: + sd = 'g'; + break; + case OPC_STP: + sd = 'p'; + break; + case OPC_STL: + case OPC_STLW: + sd = 'l'; + break; + + case OPC_LDG: + case OPC_LDC: + ss = 'g'; + break; + case OPC_LDP: + ss = 'p'; + break; + case OPC_LDL: + case OPC_LDLW: + case OPC_LDLV: + ss = 'l'; + break; + + case OPC_L2G: + ss = 'l'; + sd = 'g'; + break; + + case OPC_G2L: + ss = 'g'; + sd = 'l'; + break; + + case OPC_PREFETCH: + ss = 'g'; + nodst = true; + break; + } + + if ((_OPC(6, cat6->opc) == OPC_STGB) || (_OPC(6, cat6->opc) == OPC_STIB)) { + struct reginfo src3; + + memset(&src3, 0, sizeof(src3)); + + src1.reg = (reg_t)(cat6->stgb.src1); + src2.reg = (reg_t)(cat6->stgb.src2); + src2.im = cat6->stgb.src2_im; + src3.reg = (reg_t)(cat6->stgb.src3); + src3.im = cat6->stgb.src3_im; + src3.full = true; + + fprintf(ctx->out, "g[%u], ", cat6->stgb.dst_ssbo); + print_src(ctx, &src1); + fprintf(ctx->out, ", "); + print_src(ctx, &src2); + fprintf(ctx->out, ", "); + print_src(ctx, &src3); + + if (debug & PRINT_VERBOSE) + fprintf(ctx->out, " (pad0=%x, pad3=%x)", cat6->stgb.pad0, cat6->stgb.pad3); + + return; + } + + if (is_atomic(_OPC(6, cat6->opc))) { + + src1.reg = (reg_t)(cat6->ldgb.src1); + src1.im = cat6->ldgb.src1_im; + src2.reg = (reg_t)(cat6->ldgb.src2); + src2.im = cat6->ldgb.src2_im; + dst.reg = (reg_t)(cat6->ldgb.dst); + + print_src(ctx, &dst); + fprintf(ctx->out, ", "); + if (ss == 'g') { + struct reginfo src3; + memset(&src3, 0, sizeof(src3)); + + src3.reg = (reg_t)(cat6->ldgb.src3); + src3.full = true; + + /* For images, the ".typed" variant is used and src2 is + * the ivecN coordinates, ie ivec2 for 2d. + * + * For SSBOs, the ".untyped" variant is used and src2 is + * a simple dword offset.. src3 appears to be + * uvec2(offset * 4, 0). Not sure the point of that. + */ + + fprintf(ctx->out, "g[%u], ", cat6->ldgb.src_ssbo); + print_src(ctx, &src1); /* value */ + fprintf(ctx->out, ", "); + print_src(ctx, &src2); /* offset/coords */ + fprintf(ctx->out, ", "); + print_src(ctx, &src3); /* 64b byte offset.. */ + + if (debug & PRINT_VERBOSE) { + fprintf(ctx->out, " (pad0=%x, pad3=%x, mustbe0=%x)", cat6->ldgb.pad0, + cat6->ldgb.pad3, cat6->ldgb.mustbe0); + } + } else { /* ss == 'l' */ + fprintf(ctx->out, "l["); + print_src(ctx, &src1); /* simple byte offset */ + fprintf(ctx->out, "], "); + print_src(ctx, &src2); /* value */ + + if (debug & PRINT_VERBOSE) { + fprintf(ctx->out, " (src3=%x, pad0=%x, pad3=%x, mustbe0=%x)", + cat6->ldgb.src3, cat6->ldgb.pad0, + cat6->ldgb.pad3, cat6->ldgb.mustbe0); + } + } + + return; + } else if (_OPC(6, cat6->opc) == OPC_RESINFO) { + dst.reg = (reg_t)(cat6->ldgb.dst); + + print_src(ctx, &dst); + fprintf(ctx->out, ", "); + fprintf(ctx->out, "g[%u]", cat6->ldgb.src_ssbo); + + return; + } else if (_OPC(6, cat6->opc) == OPC_LDGB) { + + src1.reg = (reg_t)(cat6->ldgb.src1); + src1.im = cat6->ldgb.src1_im; + src2.reg = (reg_t)(cat6->ldgb.src2); + src2.im = cat6->ldgb.src2_im; + dst.reg = (reg_t)(cat6->ldgb.dst); + + print_src(ctx, &dst); + fprintf(ctx->out, ", "); + fprintf(ctx->out, "g[%u], ", cat6->ldgb.src_ssbo); + print_src(ctx, &src1); + fprintf(ctx->out, ", "); + print_src(ctx, &src2); + + if (debug & PRINT_VERBOSE) + fprintf(ctx->out, " (pad0=%x, pad3=%x, mustbe0=%x)", cat6->ldgb.pad0, cat6->ldgb.pad3, cat6->ldgb.mustbe0); + + return; + } else if (_OPC(6, cat6->opc) == OPC_LDG && cat6->a.src1_im && cat6->a.src2_im) { + struct reginfo src3; + + memset(&src3, 0, sizeof(src3)); + src1.reg = (reg_t)(cat6->a.src1); + src2.reg = (reg_t)(cat6->a.src2); + src2.im = cat6->a.src2_im; + src3.reg = (reg_t)(cat6->a.off); + src3.full = true; + dst.reg = (reg_t)(cat6->d.dst); + + print_src(ctx, &dst); + fprintf(ctx->out, ", g["); + print_src(ctx, &src1); + fprintf(ctx->out, "+"); + print_src(ctx, &src3); + fprintf(ctx->out, "], "); + print_src(ctx, &src2); + + return; + } + if (cat6->dst_off) { + dst.reg = (reg_t)(cat6->c.dst); + dstoff = cat6->c.off; + } else { + dst.reg = (reg_t)(cat6->d.dst); + } + + if (cat6->src_off) { + src1.reg = (reg_t)(cat6->a.src1); + src1.im = cat6->a.src1_im; + src2.reg = (reg_t)(cat6->a.src2); + src2.im = cat6->a.src2_im; + src1off = cat6->a.off; + } else { + src1.reg = (reg_t)(cat6->b.src1); + src1.im = cat6->b.src1_im; + src2.reg = (reg_t)(cat6->b.src2); + src2.im = cat6->b.src2_im; + } + + if (!nodst) { + if (sd) + fprintf(ctx->out, "%c[", sd); + /* note: dst might actually be a src (ie. address to store to) */ + print_src(ctx, &dst); + if (cat6->dst_off && cat6->g) { + struct reginfo dstoff_reg = {0}; + dstoff_reg.reg = (reg_t) cat6->c.off; + dstoff_reg.full = true; + fprintf(ctx->out, "+"); + print_src(ctx, &dstoff_reg); + } else if (dstoff) + fprintf(ctx->out, "%+d", dstoff); + if (sd) + fprintf(ctx->out, "]"); + fprintf(ctx->out, ", "); + } + + if (ss) + fprintf(ctx->out, "%c[", ss); + + /* can have a larger than normal immed, so hack: */ + if (src1.im) { + fprintf(ctx->out, "%u", src1.reg.dummy13); + } else { + print_src(ctx, &src1); + } + + if (cat6->src_off && cat6->g) + print_src(ctx, &src2); + else if (src1off) + fprintf(ctx->out, "%+d", src1off); + if (ss) + fprintf(ctx->out, "]"); + + switch (_OPC(6, cat6->opc)) { + case OPC_RESINFO: + case OPC_RESFMT: + break; + default: + fprintf(ctx->out, ", "); + print_src(ctx, &src2); + break; + } +} + +static void print_instr_cat6_a6xx(struct disasm_ctx *ctx, instr_t *instr) +{ + instr_cat6_a6xx_t *cat6 = &instr->cat6_a6xx; + struct reginfo src1, src2, ssbo; + bool uses_type = _OPC(6, cat6->opc) != OPC_LDC; + + static const struct { + bool indirect; + bool bindless; + const char *name; + } desc_features[8] = { + [CAT6_IMM] = { + .name = "imm" + }, + [CAT6_UNIFORM] = { + .indirect = true, + .name = "uniform" + }, + [CAT6_NONUNIFORM] = { + .indirect = true, + .name = "nonuniform" + }, + [CAT6_BINDLESS_IMM] = { + .bindless = true, + .name = "imm" + }, + [CAT6_BINDLESS_UNIFORM] = { + .bindless = true, + .indirect = true, + .name = "uniform" + }, + [CAT6_BINDLESS_NONUNIFORM] = { + .bindless = true, + .indirect = true, + .name = "nonuniform" + }, + }; + + bool indirect_ssbo = desc_features[cat6->desc_mode].indirect; + bool bindless = desc_features[cat6->desc_mode].bindless; + bool type_full = cat6->type != TYPE_U16; + + + memset(&src1, 0, sizeof(src1)); + memset(&src2, 0, sizeof(src2)); + memset(&ssbo, 0, sizeof(ssbo)); + + if (uses_type) { + fprintf(ctx->out, ".%s", cat6->typed ? "typed" : "untyped"); + fprintf(ctx->out, ".%dd", cat6->d + 1); + fprintf(ctx->out, ".%s", type[cat6->type]); + } else { + fprintf(ctx->out, ".offset%d", cat6->d); + } + fprintf(ctx->out, ".%u", cat6->type_size + 1); + + fprintf(ctx->out, ".%s", desc_features[cat6->desc_mode].name); + if (bindless) + fprintf(ctx->out, ".base%d", cat6->base); + fprintf(ctx->out, " "); + + src2.reg = (reg_t)(cat6->src2); + src2.full = type_full; + print_src(ctx, &src2); + fprintf(ctx->out, ", "); + + src1.reg = (reg_t)(cat6->src1); + src1.full = true; // XXX + print_src(ctx, &src1); + fprintf(ctx->out, ", "); + ssbo.reg = (reg_t)(cat6->ssbo); + ssbo.im = !indirect_ssbo; + ssbo.full = true; + print_src(ctx, &ssbo); + + if (debug & PRINT_VERBOSE) { + fprintf(ctx->out, " (pad1=%x, pad2=%x, pad3=%x, pad4=%x, pad5=%x)", + cat6->pad1, cat6->pad2, cat6->pad3, cat6->pad4, cat6->pad5); + } +} + +static void print_instr_cat6(struct disasm_ctx *ctx, instr_t *instr) +{ + if (!is_cat6_legacy(instr, ctx->gpu_id)) { + print_instr_cat6_a6xx(ctx, instr); + if (debug & PRINT_VERBOSE) + fprintf(ctx->out, " NEW"); + } else { + print_instr_cat6_a3xx(ctx, instr); + if (debug & PRINT_VERBOSE) + fprintf(ctx->out, " LEGACY"); + } +} +static void print_instr_cat7(struct disasm_ctx *ctx, instr_t *instr) +{ + instr_cat7_t *cat7 = &instr->cat7; + + if (cat7->g) + fprintf(ctx->out, ".g"); + if (cat7->l) + fprintf(ctx->out, ".l"); + + if (_OPC(7, cat7->opc) == OPC_FENCE) { + if (cat7->r) + fprintf(ctx->out, ".r"); + if (cat7->w) + fprintf(ctx->out, ".w"); + } +} + +/* size of largest OPC field of all the instruction categories: */ +#define NOPC_BITS 6 + +static const struct opc_info { + uint16_t cat; + uint16_t opc; + const char *name; + void (*print)(struct disasm_ctx *ctx, instr_t *instr); +} opcs[1 << (3+NOPC_BITS)] = { +#define OPC(cat, opc, name) [(opc)] = { (cat), (opc), #name, print_instr_cat##cat } + /* category 0: */ + OPC(0, OPC_NOP, nop), + OPC(0, OPC_B, b), + OPC(0, OPC_JUMP, jump), + OPC(0, OPC_CALL, call), + OPC(0, OPC_RET, ret), + OPC(0, OPC_KILL, kill), + OPC(0, OPC_END, end), + OPC(0, OPC_EMIT, emit), + OPC(0, OPC_CUT, cut), + OPC(0, OPC_CHMASK, chmask), + OPC(0, OPC_CHSH, chsh), + OPC(0, OPC_FLOW_REV, flow_rev), + OPC(0, OPC_PREDT, predt), + OPC(0, OPC_PREDF, predf), + OPC(0, OPC_PREDE, prede), + OPC(0, OPC_BKT, bkt), + OPC(0, OPC_STKS, stks), + OPC(0, OPC_STKR, stkr), + OPC(0, OPC_XSET, xset), + OPC(0, OPC_XCLR, xclr), + OPC(0, OPC_GETONE, getone), + OPC(0, OPC_DBG, dbg), + OPC(0, OPC_SHPS, shps), + OPC(0, OPC_SHPE, shpe), + + /* category 1: */ + OPC(1, OPC_MOV, ), + + /* category 2: */ + OPC(2, OPC_ADD_F, add.f), + OPC(2, OPC_MIN_F, min.f), + OPC(2, OPC_MAX_F, max.f), + OPC(2, OPC_MUL_F, mul.f), + OPC(2, OPC_SIGN_F, sign.f), + OPC(2, OPC_CMPS_F, cmps.f), + OPC(2, OPC_ABSNEG_F, absneg.f), + OPC(2, OPC_CMPV_F, cmpv.f), + OPC(2, OPC_FLOOR_F, floor.f), + OPC(2, OPC_CEIL_F, ceil.f), + OPC(2, OPC_RNDNE_F, rndne.f), + OPC(2, OPC_RNDAZ_F, rndaz.f), + OPC(2, OPC_TRUNC_F, trunc.f), + OPC(2, OPC_ADD_U, add.u), + OPC(2, OPC_ADD_S, add.s), + OPC(2, OPC_SUB_U, sub.u), + OPC(2, OPC_SUB_S, sub.s), + OPC(2, OPC_CMPS_U, cmps.u), + OPC(2, OPC_CMPS_S, cmps.s), + OPC(2, OPC_MIN_U, min.u), + OPC(2, OPC_MIN_S, min.s), + OPC(2, OPC_MAX_U, max.u), + OPC(2, OPC_MAX_S, max.s), + OPC(2, OPC_ABSNEG_S, absneg.s), + OPC(2, OPC_AND_B, and.b), + OPC(2, OPC_OR_B, or.b), + OPC(2, OPC_NOT_B, not.b), + OPC(2, OPC_XOR_B, xor.b), + OPC(2, OPC_CMPV_U, cmpv.u), + OPC(2, OPC_CMPV_S, cmpv.s), + OPC(2, OPC_MUL_U24, mul.u24), + OPC(2, OPC_MUL_S24, mul.s24), + OPC(2, OPC_MULL_U, mull.u), + OPC(2, OPC_BFREV_B, bfrev.b), + OPC(2, OPC_CLZ_S, clz.s), + OPC(2, OPC_CLZ_B, clz.b), + OPC(2, OPC_SHL_B, shl.b), + OPC(2, OPC_SHR_B, shr.b), + OPC(2, OPC_ASHR_B, ashr.b), + OPC(2, OPC_BARY_F, bary.f), + OPC(2, OPC_MGEN_B, mgen.b), + OPC(2, OPC_GETBIT_B, getbit.b), + OPC(2, OPC_SETRM, setrm), + OPC(2, OPC_CBITS_B, cbits.b), + OPC(2, OPC_SHB, shb), + OPC(2, OPC_MSAD, msad), + + /* category 3: */ + OPC(3, OPC_MAD_U16, mad.u16), + OPC(3, OPC_MADSH_U16, madsh.u16), + OPC(3, OPC_MAD_S16, mad.s16), + OPC(3, OPC_MADSH_M16, madsh.m16), + OPC(3, OPC_MAD_U24, mad.u24), + OPC(3, OPC_MAD_S24, mad.s24), + OPC(3, OPC_MAD_F16, mad.f16), + OPC(3, OPC_MAD_F32, mad.f32), + OPC(3, OPC_SEL_B16, sel.b16), + OPC(3, OPC_SEL_B32, sel.b32), + OPC(3, OPC_SEL_S16, sel.s16), + OPC(3, OPC_SEL_S32, sel.s32), + OPC(3, OPC_SEL_F16, sel.f16), + OPC(3, OPC_SEL_F32, sel.f32), + OPC(3, OPC_SAD_S16, sad.s16), + OPC(3, OPC_SAD_S32, sad.s32), + + /* category 4: */ + OPC(4, OPC_RCP, rcp), + OPC(4, OPC_RSQ, rsq), + OPC(4, OPC_LOG2, log2), + OPC(4, OPC_EXP2, exp2), + OPC(4, OPC_SIN, sin), + OPC(4, OPC_COS, cos), + OPC(4, OPC_SQRT, sqrt), + OPC(4, OPC_HRSQ, hrsq), + OPC(4, OPC_HLOG2, hlog2), + OPC(4, OPC_HEXP2, hexp2), + + /* category 5: */ + OPC(5, OPC_ISAM, isam), + OPC(5, OPC_ISAML, isaml), + OPC(5, OPC_ISAMM, isamm), + OPC(5, OPC_SAM, sam), + OPC(5, OPC_SAMB, samb), + OPC(5, OPC_SAML, saml), + OPC(5, OPC_SAMGQ, samgq), + OPC(5, OPC_GETLOD, getlod), + OPC(5, OPC_CONV, conv), + OPC(5, OPC_CONVM, convm), + OPC(5, OPC_GETSIZE, getsize), + OPC(5, OPC_GETBUF, getbuf), + OPC(5, OPC_GETPOS, getpos), + OPC(5, OPC_GETINFO, getinfo), + OPC(5, OPC_DSX, dsx), + OPC(5, OPC_DSY, dsy), + OPC(5, OPC_GATHER4R, gather4r), + OPC(5, OPC_GATHER4G, gather4g), + OPC(5, OPC_GATHER4B, gather4b), + OPC(5, OPC_GATHER4A, gather4a), + OPC(5, OPC_SAMGP0, samgp0), + OPC(5, OPC_SAMGP1, samgp1), + OPC(5, OPC_SAMGP2, samgp2), + OPC(5, OPC_SAMGP3, samgp3), + OPC(5, OPC_DSXPP_1, dsxpp.1), + OPC(5, OPC_DSYPP_1, dsypp.1), + OPC(5, OPC_RGETPOS, rgetpos), + OPC(5, OPC_RGETINFO, rgetinfo), + + + /* category 6: */ + OPC(6, OPC_LDG, ldg), + OPC(6, OPC_LDL, ldl), + OPC(6, OPC_LDP, ldp), + OPC(6, OPC_STG, stg), + OPC(6, OPC_STL, stl), + OPC(6, OPC_STP, stp), + OPC(6, OPC_LDIB, ldib), + OPC(6, OPC_G2L, g2l), + OPC(6, OPC_L2G, l2g), + OPC(6, OPC_PREFETCH, prefetch), + OPC(6, OPC_LDLW, ldlw), + OPC(6, OPC_STLW, stlw), + OPC(6, OPC_RESFMT, resfmt), + OPC(6, OPC_RESINFO, resinfo), + OPC(6, OPC_ATOMIC_ADD, atomic.add), + OPC(6, OPC_ATOMIC_SUB, atomic.sub), + OPC(6, OPC_ATOMIC_XCHG, atomic.xchg), + OPC(6, OPC_ATOMIC_INC, atomic.inc), + OPC(6, OPC_ATOMIC_DEC, atomic.dec), + OPC(6, OPC_ATOMIC_CMPXCHG, atomic.cmpxchg), + OPC(6, OPC_ATOMIC_MIN, atomic.min), + OPC(6, OPC_ATOMIC_MAX, atomic.max), + OPC(6, OPC_ATOMIC_AND, atomic.and), + OPC(6, OPC_ATOMIC_OR, atomic.or), + OPC(6, OPC_ATOMIC_XOR, atomic.xor), + OPC(6, OPC_LDGB, ldgb), + OPC(6, OPC_STGB, stgb), + OPC(6, OPC_STIB, stib), + OPC(6, OPC_LDC, ldc), + OPC(6, OPC_LDLV, ldlv), + + OPC(7, OPC_BAR, bar), + OPC(7, OPC_FENCE, fence), + + +#undef OPC +}; + +#define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr, ctx->gpu_id)])) + +static void print_single_instr(struct disasm_ctx *ctx, instr_t *instr) +{ + const char *name = GETINFO(instr)->name; + uint32_t opc = instr_opc(instr, ctx->gpu_id); + + if (name) { + fprintf(ctx->out, "%s", name); + GETINFO(instr)->print(ctx, instr); + } else { + fprintf(ctx->out, "unknown(%d,%d)", instr->opc_cat, opc); + + switch (instr->opc_cat) { + case 0: print_instr_cat0(ctx, instr); break; + case 1: print_instr_cat1(ctx, instr); break; + case 2: print_instr_cat2(ctx, instr); break; + case 3: print_instr_cat3(ctx, instr); break; + case 4: print_instr_cat4(ctx, instr); break; + case 5: print_instr_cat5(ctx, instr); break; + case 6: print_instr_cat6(ctx, instr); break; + case 7: print_instr_cat7(ctx, instr); break; + } + } +} + +static bool print_instr(struct disasm_ctx *ctx, uint32_t *dwords, int n) +{ + instr_t *instr = (instr_t *)dwords; + uint32_t opc = instr_opc(instr, ctx->gpu_id); + unsigned nop = 0; + unsigned cycles = ctx->stats->instructions; + + fprintf(ctx->out, "%s:%d:%04d:%04d[%08xx_%08xx] ", levels[ctx->level], + instr->opc_cat, n, cycles++, dwords[1], dwords[0]); + +#if 0 + /* print unknown bits: */ + if (debug & PRINT_RAW) + fprintf(ctx->out, "[%08xx_%08xx] ", dwords[1] & 0x001ff800, dwords[0] & 0x00000000); + + if (debug & PRINT_VERBOSE) + fprintf(ctx->out, "%d,%02d ", instr->opc_cat, opc); +#endif + + /* NOTE: order flags are printed is a bit fugly.. but for now I + * try to match the order in llvm-a3xx disassembler for easy + * diff'ing.. + */ + + ctx->repeat = instr_repeat(instr); + ctx->stats->instructions += 1 + ctx->repeat; + ctx->stats->instlen++; + + if (instr->sync) { + fprintf(ctx->out, "(sy)"); + ctx->stats->sy++; + } + if (instr->ss && ((instr->opc_cat <= 4) || (instr->opc_cat == 7))) { + fprintf(ctx->out, "(ss)"); + ctx->stats->ss++; + } + if (instr->jmp_tgt) + fprintf(ctx->out, "(jp)"); + if ((instr->opc_cat == 0) && instr->cat0.eq) + fprintf(ctx->out, "(eq)"); + if (instr_sat(instr)) + fprintf(ctx->out, "(sat)"); + if (ctx->repeat) + fprintf(ctx->out, "(rpt%d)", ctx->repeat); + else if ((instr->opc_cat == 2) && (instr->cat2.src1_r || instr->cat2.src2_r)) + nop = (instr->cat2.src2_r * 2) + instr->cat2.src1_r; + else if ((instr->opc_cat == 3) && (instr->cat3.src1_r || instr->cat3.src2_r)) + nop = (instr->cat3.src2_r * 2) + instr->cat3.src1_r; + ctx->stats->instructions += nop; + ctx->stats->nops += nop; + if (opc == OPC_NOP) + ctx->stats->nops += 1 + ctx->repeat; + if (nop) + fprintf(ctx->out, "(nop%d) ", nop); + + if (instr->ul && ((2 <= instr->opc_cat) && (instr->opc_cat <= 4))) + fprintf(ctx->out, "(ul)"); + + print_single_instr(ctx, instr); + fprintf(ctx->out, "\n"); + + process_reg_dst(ctx); + + if ((instr->opc_cat <= 4) && (debug & EXPAND_REPEAT)) { + int i; + for (i = 0; i < nop; i++) { + fprintf(ctx->out, "%s:%d:%04d:%04d[ ] ", + levels[ctx->level], instr->opc_cat, n, cycles++); + fprintf(ctx->out, "nop\n"); + } + for (i = 0; i < ctx->repeat; i++) { + ctx->repeatidx = i + 1; + fprintf(ctx->out, "%s:%d:%04d:%04d[ ] ", + levels[ctx->level], instr->opc_cat, n, cycles++); + + print_single_instr(ctx, instr); + fprintf(ctx->out, "\n"); + } + ctx->repeatidx = 0; + } + + return (instr->opc_cat == 0) && + ((opc == OPC_END) || (opc == OPC_CHSH)); +} + +int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id) +{ + struct shader_stats stats; + return disasm_a3xx_stat(dwords, sizedwords, level, out, gpu_id, &stats); +} + +int disasm_a3xx_stat(uint32_t *dwords, int sizedwords, int level, FILE *out, + unsigned gpu_id, struct shader_stats *stats) +{ + struct disasm_ctx ctx; + int i; + int nop_count = 0; + bool has_end = false; + +// ir3_assert((sizedwords % 2) == 0); + + memset(&ctx, 0, sizeof(ctx)); + ctx.out = out; + ctx.level = level; + ctx.gpu_id = gpu_id; + ctx.stats = stats; + memset(ctx.stats, 0, sizeof(*ctx.stats)); + + for (i = 0; i < sizedwords; i += 2) { + has_end |= print_instr(&ctx, &dwords[i], i/2); + if (!has_end) + continue; + if (dwords[i] == 0 && dwords[i + 1] == 0) + nop_count++; + else + nop_count = 0; + if (nop_count > 3) + break; + } + + print_reg_stats(&ctx); + + return 0; +} diff --git a/src/freedreno/decode/disasm.h b/src/freedreno/decode/disasm.h new file mode 100644 index 00000000000..21ae5a11cd2 --- /dev/null +++ b/src/freedreno/decode/disasm.h @@ -0,0 +1,59 @@ +/* + * Copyright © 2012 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef DISASM_H_ +#define DISASM_H_ + +#include + +enum shader_t { + SHADER_VERTEX, + SHADER_TCS, + SHADER_TES, + SHADER_GEOM, + SHADER_FRAGMENT, + SHADER_COMPUTE, +}; + +/* bitmask of debug flags */ +enum debug_t { + PRINT_RAW = 0x1, /* dump raw hexdump */ + PRINT_VERBOSE = 0x2, + EXPAND_REPEAT = 0x4, +}; + +struct shader_stats { + /* instructions counts rpnN, and instlen does not */ + int instructions, instlen; + int nops; + int ss, sy; + int constlen; +}; + +int disasm_a2xx(uint32_t *dwords, int sizedwords, int level, enum shader_t type); +int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id); +int disasm_a3xx_stat(uint32_t *dwords, int sizedwords, int level, FILE *out, + unsigned gpu_id, struct shader_stats *stats); +void disasm_set_debug(enum debug_t debug); + +#endif /* DISASM_H_ */ diff --git a/src/freedreno/decode/instr-a2xx.h b/src/freedreno/decode/instr-a2xx.h new file mode 100644 index 00000000000..03d1991bcac --- /dev/null +++ b/src/freedreno/decode/instr-a2xx.h @@ -0,0 +1,385 @@ +/* + * Copyright (c) 2012 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef INSTR_A2XX_H_ +#define INSTR_A2XX_H_ + +#define PACKED __attribute__((__packed__)) + + +/* + * ALU instructions: + */ + +typedef enum { + ADDs = 0, + ADD_PREVs = 1, + MULs = 2, + MUL_PREVs = 3, + MUL_PREV2s = 4, + MAXs = 5, + MINs = 6, + SETEs = 7, + SETGTs = 8, + SETGTEs = 9, + SETNEs = 10, + FRACs = 11, + TRUNCs = 12, + FLOORs = 13, + EXP_IEEE = 14, + LOG_CLAMP = 15, + LOG_IEEE = 16, + RECIP_CLAMP = 17, + RECIP_FF = 18, + RECIP_IEEE = 19, + RECIPSQ_CLAMP = 20, + RECIPSQ_FF = 21, + RECIPSQ_IEEE = 22, + MOVAs = 23, + MOVA_FLOORs = 24, + SUBs = 25, + SUB_PREVs = 26, + PRED_SETEs = 27, + PRED_SETNEs = 28, + PRED_SETGTs = 29, + PRED_SETGTEs = 30, + PRED_SET_INVs = 31, + PRED_SET_POPs = 32, + PRED_SET_CLRs = 33, + PRED_SET_RESTOREs = 34, + KILLEs = 35, + KILLGTs = 36, + KILLGTEs = 37, + KILLNEs = 38, + KILLONEs = 39, + SQRT_IEEE = 40, + MUL_CONST_0 = 42, + MUL_CONST_1 = 43, + ADD_CONST_0 = 44, + ADD_CONST_1 = 45, + SUB_CONST_0 = 46, + SUB_CONST_1 = 47, + SIN = 48, + COS = 49, + RETAIN_PREV = 50, +} instr_scalar_opc_t; + +typedef enum { + ADDv = 0, + MULv = 1, + MAXv = 2, + MINv = 3, + SETEv = 4, + SETGTv = 5, + SETGTEv = 6, + SETNEv = 7, + FRACv = 8, + TRUNCv = 9, + FLOORv = 10, + MULADDv = 11, + CNDEv = 12, + CNDGTEv = 13, + CNDGTv = 14, + DOT4v = 15, + DOT3v = 16, + DOT2ADDv = 17, + CUBEv = 18, + MAX4v = 19, + PRED_SETE_PUSHv = 20, + PRED_SETNE_PUSHv = 21, + PRED_SETGT_PUSHv = 22, + PRED_SETGTE_PUSHv = 23, + KILLEv = 24, + KILLGTv = 25, + KILLGTEv = 26, + KILLNEv = 27, + DSTv = 28, + MOVAv = 29, +} instr_vector_opc_t; + +typedef struct PACKED { + /* dword0: */ + uint8_t vector_dest : 6; + uint8_t vector_dest_rel : 1; + uint8_t low_precision_16b_fp : 1; + uint8_t scalar_dest : 6; + uint8_t scalar_dest_rel : 1; + uint8_t export_data : 1; + uint8_t vector_write_mask : 4; + uint8_t scalar_write_mask : 4; + uint8_t vector_clamp : 1; + uint8_t scalar_clamp : 1; + instr_scalar_opc_t scalar_opc : 6; + /* dword1: */ + uint8_t src3_swiz : 8; + uint8_t src2_swiz : 8; + uint8_t src1_swiz : 8; + uint8_t src3_reg_negate : 1; + uint8_t src2_reg_negate : 1; + uint8_t src1_reg_negate : 1; + uint8_t pred_select : 2; + uint8_t relative_addr : 1; + uint8_t const_1_rel_abs : 1; + uint8_t const_0_rel_abs : 1; + /* dword2: */ + uint8_t src3_reg : 6; + uint8_t src3_reg_select : 1; + uint8_t src3_reg_abs : 1; + uint8_t src2_reg : 6; + uint8_t src2_reg_select : 1; + uint8_t src2_reg_abs : 1; + uint8_t src1_reg : 6; + uint8_t src1_reg_select : 1; + uint8_t src1_reg_abs : 1; + instr_vector_opc_t vector_opc : 5; + uint8_t src3_sel : 1; + uint8_t src2_sel : 1; + uint8_t src1_sel : 1; +} instr_alu_t; + + + +/* + * CF instructions: + */ + +typedef enum { + NOP = 0, + EXEC = 1, + EXEC_END = 2, + COND_EXEC = 3, + COND_EXEC_END = 4, + COND_PRED_EXEC = 5, + COND_PRED_EXEC_END = 6, + LOOP_START = 7, + LOOP_END = 8, + COND_CALL = 9, + RETURN = 10, + COND_JMP = 11, + ALLOC = 12, + COND_EXEC_PRED_CLEAN = 13, + COND_EXEC_PRED_CLEAN_END = 14, + MARK_VS_FETCH_DONE = 15, +} instr_cf_opc_t; + +typedef enum { + RELATIVE_ADDR = 0, + ABSOLUTE_ADDR = 1, +} instr_addr_mode_t; + +typedef enum { + SQ_NO_ALLOC = 0, + SQ_POSITION = 1, + SQ_PARAMETER_PIXEL = 2, + SQ_MEMORY = 3, +} instr_alloc_type_t; + +typedef struct PACKED { + uint16_t address : 9; + uint8_t reserved0 : 3; + uint8_t count : 3; + uint8_t yeild : 1; + uint16_t serialize : 12; + uint8_t vc : 6; /* vertex cache? */ + uint8_t bool_addr : 8; + uint8_t condition : 1; + instr_addr_mode_t address_mode : 1; + instr_cf_opc_t opc : 4; +} instr_cf_exec_t; + +typedef struct PACKED { + uint16_t address : 10; + uint8_t reserved0 : 6; + uint8_t loop_id : 5; + uint32_t reserved1 : 22; + instr_addr_mode_t address_mode : 1; + instr_cf_opc_t opc : 4; +} instr_cf_loop_t; + +typedef struct PACKED { + uint16_t address : 10; + uint8_t reserved0 : 3; + uint8_t force_call : 1; + uint8_t predicated_jmp : 1; + uint32_t reserved1 : 18; + uint8_t direction : 1; + uint8_t bool_addr : 8; + uint8_t condition : 1; + instr_addr_mode_t address_mode : 1; + instr_cf_opc_t opc : 4; +} instr_cf_jmp_call_t; + +typedef struct PACKED { + uint8_t size : 4; + uint64_t reserved0 : 36; + uint8_t no_serial : 1; + instr_alloc_type_t buffer_select : 2; + uint8_t alloc_mode : 1; + instr_cf_opc_t opc : 4; +} instr_cf_alloc_t; + +typedef union PACKED { + instr_cf_exec_t exec; + instr_cf_loop_t loop; + instr_cf_jmp_call_t jmp_call; + instr_cf_alloc_t alloc; + struct PACKED { + uint64_t dummy : 44; + instr_cf_opc_t opc : 4; + }; +} instr_cf_t; + + + +/* + * FETCH instructions: + */ + +typedef enum { + VTX_FETCH = 0, + TEX_FETCH = 1, + TEX_GET_BORDER_COLOR_FRAC = 16, + TEX_GET_COMP_TEX_LOD = 17, + TEX_GET_GRADIENTS = 18, + TEX_GET_WEIGHTS = 19, + TEX_SET_TEX_LOD = 24, + TEX_SET_GRADIENTS_H = 25, + TEX_SET_GRADIENTS_V = 26, + TEX_RESERVED_4 = 27, +} instr_fetch_opc_t; + +typedef enum { + TEX_FILTER_POINT = 0, + TEX_FILTER_LINEAR = 1, + TEX_FILTER_BASEMAP = 2, /* only applicable for mip-filter */ + TEX_FILTER_USE_FETCH_CONST = 3, +} instr_tex_filter_t; + +typedef enum { + ANISO_FILTER_DISABLED = 0, + ANISO_FILTER_MAX_1_1 = 1, + ANISO_FILTER_MAX_2_1 = 2, + ANISO_FILTER_MAX_4_1 = 3, + ANISO_FILTER_MAX_8_1 = 4, + ANISO_FILTER_MAX_16_1 = 5, + ANISO_FILTER_USE_FETCH_CONST = 7, +} instr_aniso_filter_t; + +typedef enum { + ARBITRARY_FILTER_2X4_SYM = 0, + ARBITRARY_FILTER_2X4_ASYM = 1, + ARBITRARY_FILTER_4X2_SYM = 2, + ARBITRARY_FILTER_4X2_ASYM = 3, + ARBITRARY_FILTER_4X4_SYM = 4, + ARBITRARY_FILTER_4X4_ASYM = 5, + ARBITRARY_FILTER_USE_FETCH_CONST = 7, +} instr_arbitrary_filter_t; + +typedef enum { + SAMPLE_CENTROID = 0, + SAMPLE_CENTER = 1, +} instr_sample_loc_t; + +typedef unsigned instr_surf_fmt_t; + +typedef struct PACKED { + /* dword0: */ + instr_fetch_opc_t opc : 5; + uint8_t src_reg : 6; + uint8_t src_reg_am : 1; + uint8_t dst_reg : 6; + uint8_t dst_reg_am : 1; + uint8_t fetch_valid_only : 1; + uint8_t const_idx : 5; + uint8_t tx_coord_denorm : 1; + uint8_t src_swiz : 6; + /* dword1: */ + uint16_t dst_swiz : 12; + instr_tex_filter_t mag_filter : 2; + instr_tex_filter_t min_filter : 2; + instr_tex_filter_t mip_filter : 2; + instr_aniso_filter_t aniso_filter : 3; + instr_arbitrary_filter_t arbitrary_filter : 3; + instr_tex_filter_t vol_mag_filter : 2; + instr_tex_filter_t vol_min_filter : 2; + uint8_t use_comp_lod : 1; + uint8_t use_reg_lod : 2; + uint8_t pred_select : 1; + /* dword2: */ + uint8_t use_reg_gradients : 1; + instr_sample_loc_t sample_location : 1; + uint8_t lod_bias : 7; + uint8_t unused : 7; + uint8_t offset_x : 5; + uint8_t offset_y : 5; + uint8_t offset_z : 5; + uint8_t pred_condition : 1; +} instr_fetch_tex_t; + +typedef struct PACKED { + /* dword0: */ + instr_fetch_opc_t opc : 5; + uint8_t src_reg : 6; + uint8_t src_reg_am : 1; + uint8_t dst_reg : 6; + uint8_t dst_reg_am : 1; + uint8_t must_be_one : 1; + uint8_t const_index : 5; + uint8_t const_index_sel : 2; + uint8_t reserved0 : 3; + uint8_t src_swiz : 2; + /* dword1: */ + uint16_t dst_swiz : 12; + uint8_t format_comp_all : 1; /* '1' for signed, '0' for unsigned? */ + uint8_t num_format_all : 1; /* '0' for normalized, '1' for unnormalized */ + uint8_t signed_rf_mode_all : 1; + uint8_t reserved1 : 1; + instr_surf_fmt_t format : 6; + uint8_t reserved2 : 1; + uint8_t exp_adjust_all : 7; + uint8_t reserved3 : 1; + uint8_t pred_select : 1; + /* dword2: */ + uint8_t stride : 8; + /* possibly offset and reserved4 are swapped on a200? */ + uint8_t offset : 8; + uint8_t reserved4 : 8; + uint8_t reserved5 : 7; + uint8_t pred_condition : 1; +} instr_fetch_vtx_t; + +typedef union PACKED { + instr_fetch_tex_t tex; + instr_fetch_vtx_t vtx; + struct PACKED { + /* dword0: */ + instr_fetch_opc_t opc : 5; + uint32_t dummy0 : 27; + /* dword1: */ + uint32_t dummy1 : 32; + /* dword2: */ + uint32_t dummy2 : 32; + }; +} instr_fetch_t; + +#endif /* INSTR_H_ */ diff --git a/src/freedreno/decode/instr-a3xx.h b/src/freedreno/decode/instr-a3xx.h new file mode 100644 index 00000000000..218bdc3e17c --- /dev/null +++ b/src/freedreno/decode/instr-a3xx.h @@ -0,0 +1,1115 @@ +/* + * Copyright (c) 2013 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef INSTR_A3XX_H_ +#define INSTR_A3XX_H_ + +#define PACKED __attribute__((__packed__)) + +#include +#include +#include + +void ir3_assert_handler(const char *expr, const char *file, int line, + const char *func) __attribute__((weak)) __attribute__ ((__noreturn__)); + +/* A wrapper for assert() that allows overriding handling of a failed + * assert. This is needed for tools like crashdec which can want to + * attempt to disassemble memory that might not actually be valid + * instructions. + */ +#define ir3_assert(expr) do { \ + if (!(expr)) { \ + if (ir3_assert_handler) { \ + ir3_assert_handler(#expr, __FILE__, __LINE__, __func__); \ + } \ + assert(expr); \ + } \ + } while (0) + +/* size of largest OPC field of all the instruction categories: */ +#define NOPC_BITS 6 + +#define _OPC(cat, opc) (((cat) << NOPC_BITS) | opc) + +typedef enum { + /* category 0: */ + OPC_NOP = _OPC(0, 0), + OPC_B = _OPC(0, 1), + OPC_JUMP = _OPC(0, 2), + OPC_CALL = _OPC(0, 3), + OPC_RET = _OPC(0, 4), + OPC_KILL = _OPC(0, 5), + OPC_END = _OPC(0, 6), + OPC_EMIT = _OPC(0, 7), + OPC_CUT = _OPC(0, 8), + OPC_CHMASK = _OPC(0, 9), + OPC_CHSH = _OPC(0, 10), + OPC_FLOW_REV = _OPC(0, 11), + + OPC_BKT = _OPC(0, 16), + OPC_STKS = _OPC(0, 17), + OPC_STKR = _OPC(0, 18), + OPC_XSET = _OPC(0, 19), + OPC_XCLR = _OPC(0, 20), + OPC_GETONE = _OPC(0, 21), + OPC_DBG = _OPC(0, 22), + OPC_SHPS = _OPC(0, 23), /* shader prologue start */ + OPC_SHPE = _OPC(0, 24), /* shader prologue end */ + + OPC_PREDT = _OPC(0, 29), /* predicated true */ + OPC_PREDF = _OPC(0, 30), /* predicated false */ + OPC_PREDE = _OPC(0, 31), /* predicated end */ + + /* category 1: */ + OPC_MOV = _OPC(1, 0), + + /* category 2: */ + OPC_ADD_F = _OPC(2, 0), + OPC_MIN_F = _OPC(2, 1), + OPC_MAX_F = _OPC(2, 2), + OPC_MUL_F = _OPC(2, 3), + OPC_SIGN_F = _OPC(2, 4), + OPC_CMPS_F = _OPC(2, 5), + OPC_ABSNEG_F = _OPC(2, 6), + OPC_CMPV_F = _OPC(2, 7), + /* 8 - invalid */ + OPC_FLOOR_F = _OPC(2, 9), + OPC_CEIL_F = _OPC(2, 10), + OPC_RNDNE_F = _OPC(2, 11), + OPC_RNDAZ_F = _OPC(2, 12), + OPC_TRUNC_F = _OPC(2, 13), + /* 14-15 - invalid */ + OPC_ADD_U = _OPC(2, 16), + OPC_ADD_S = _OPC(2, 17), + OPC_SUB_U = _OPC(2, 18), + OPC_SUB_S = _OPC(2, 19), + OPC_CMPS_U = _OPC(2, 20), + OPC_CMPS_S = _OPC(2, 21), + OPC_MIN_U = _OPC(2, 22), + OPC_MIN_S = _OPC(2, 23), + OPC_MAX_U = _OPC(2, 24), + OPC_MAX_S = _OPC(2, 25), + OPC_ABSNEG_S = _OPC(2, 26), + /* 27 - invalid */ + OPC_AND_B = _OPC(2, 28), + OPC_OR_B = _OPC(2, 29), + OPC_NOT_B = _OPC(2, 30), + OPC_XOR_B = _OPC(2, 31), + /* 32 - invalid */ + OPC_CMPV_U = _OPC(2, 33), + OPC_CMPV_S = _OPC(2, 34), + /* 35-47 - invalid */ + OPC_MUL_U24 = _OPC(2, 48), /* 24b mul into 32b result */ + OPC_MUL_S24 = _OPC(2, 49), /* 24b mul into 32b result with sign extension */ + OPC_MULL_U = _OPC(2, 50), + OPC_BFREV_B = _OPC(2, 51), + OPC_CLZ_S = _OPC(2, 52), + OPC_CLZ_B = _OPC(2, 53), + OPC_SHL_B = _OPC(2, 54), + OPC_SHR_B = _OPC(2, 55), + OPC_ASHR_B = _OPC(2, 56), + OPC_BARY_F = _OPC(2, 57), + OPC_MGEN_B = _OPC(2, 58), + OPC_GETBIT_B = _OPC(2, 59), + OPC_SETRM = _OPC(2, 60), + OPC_CBITS_B = _OPC(2, 61), + OPC_SHB = _OPC(2, 62), + OPC_MSAD = _OPC(2, 63), + + /* category 3: */ + OPC_MAD_U16 = _OPC(3, 0), + OPC_MADSH_U16 = _OPC(3, 1), + OPC_MAD_S16 = _OPC(3, 2), + OPC_MADSH_M16 = _OPC(3, 3), /* should this be .s16? */ + OPC_MAD_U24 = _OPC(3, 4), + OPC_MAD_S24 = _OPC(3, 5), + OPC_MAD_F16 = _OPC(3, 6), + OPC_MAD_F32 = _OPC(3, 7), + OPC_SEL_B16 = _OPC(3, 8), + OPC_SEL_B32 = _OPC(3, 9), + OPC_SEL_S16 = _OPC(3, 10), + OPC_SEL_S32 = _OPC(3, 11), + OPC_SEL_F16 = _OPC(3, 12), + OPC_SEL_F32 = _OPC(3, 13), + OPC_SAD_S16 = _OPC(3, 14), + OPC_SAD_S32 = _OPC(3, 15), + + /* category 4: */ + OPC_RCP = _OPC(4, 0), + OPC_RSQ = _OPC(4, 1), + OPC_LOG2 = _OPC(4, 2), + OPC_EXP2 = _OPC(4, 3), + OPC_SIN = _OPC(4, 4), + OPC_COS = _OPC(4, 5), + OPC_SQRT = _OPC(4, 6), + /* NOTE that these are 8+opc from their highp equivs, so it's possible + * that the high order bit in the opc field has been repurposed for + * half-precision use? But note that other ops (rcp/lsin/cos/sqrt) + * still use the same opc as highp + */ + OPC_HRSQ = _OPC(4, 9), + OPC_HLOG2 = _OPC(4, 10), + OPC_HEXP2 = _OPC(4, 11), + + /* category 5: */ + OPC_ISAM = _OPC(5, 0), + OPC_ISAML = _OPC(5, 1), + OPC_ISAMM = _OPC(5, 2), + OPC_SAM = _OPC(5, 3), + OPC_SAMB = _OPC(5, 4), + OPC_SAML = _OPC(5, 5), + OPC_SAMGQ = _OPC(5, 6), + OPC_GETLOD = _OPC(5, 7), + OPC_CONV = _OPC(5, 8), + OPC_CONVM = _OPC(5, 9), + OPC_GETSIZE = _OPC(5, 10), + OPC_GETBUF = _OPC(5, 11), + OPC_GETPOS = _OPC(5, 12), + OPC_GETINFO = _OPC(5, 13), + OPC_DSX = _OPC(5, 14), + OPC_DSY = _OPC(5, 15), + OPC_GATHER4R = _OPC(5, 16), + OPC_GATHER4G = _OPC(5, 17), + OPC_GATHER4B = _OPC(5, 18), + OPC_GATHER4A = _OPC(5, 19), + OPC_SAMGP0 = _OPC(5, 20), + OPC_SAMGP1 = _OPC(5, 21), + OPC_SAMGP2 = _OPC(5, 22), + OPC_SAMGP3 = _OPC(5, 23), + OPC_DSXPP_1 = _OPC(5, 24), + OPC_DSYPP_1 = _OPC(5, 25), + OPC_RGETPOS = _OPC(5, 26), + OPC_RGETINFO = _OPC(5, 27), + + /* category 6: */ + OPC_LDG = _OPC(6, 0), /* load-global */ + OPC_LDL = _OPC(6, 1), + OPC_LDP = _OPC(6, 2), + OPC_STG = _OPC(6, 3), /* store-global */ + OPC_STL = _OPC(6, 4), + OPC_STP = _OPC(6, 5), + OPC_LDIB = _OPC(6, 6), + OPC_G2L = _OPC(6, 7), + OPC_L2G = _OPC(6, 8), + OPC_PREFETCH = _OPC(6, 9), + OPC_LDLW = _OPC(6, 10), + OPC_STLW = _OPC(6, 11), + OPC_RESFMT = _OPC(6, 14), + OPC_RESINFO = _OPC(6, 15), + OPC_ATOMIC_ADD = _OPC(6, 16), + OPC_ATOMIC_SUB = _OPC(6, 17), + OPC_ATOMIC_XCHG = _OPC(6, 18), + OPC_ATOMIC_INC = _OPC(6, 19), + OPC_ATOMIC_DEC = _OPC(6, 20), + OPC_ATOMIC_CMPXCHG = _OPC(6, 21), + OPC_ATOMIC_MIN = _OPC(6, 22), + OPC_ATOMIC_MAX = _OPC(6, 23), + OPC_ATOMIC_AND = _OPC(6, 24), + OPC_ATOMIC_OR = _OPC(6, 25), + OPC_ATOMIC_XOR = _OPC(6, 26), + OPC_LDGB = _OPC(6, 27), + OPC_STGB = _OPC(6, 28), + OPC_STIB = _OPC(6, 29), + OPC_LDC = _OPC(6, 30), + OPC_LDLV = _OPC(6, 31), + + /* category 7: */ + OPC_BAR = _OPC(7, 0), + OPC_FENCE = _OPC(7, 1), +} opc_t; + +#define opc_cat(opc) ((int)((opc) >> NOPC_BITS)) +#define opc_op(opc) ((unsigned)((opc) & ((1 << NOPC_BITS) - 1))) + +typedef enum { + TYPE_F16 = 0, + TYPE_F32 = 1, + TYPE_U16 = 2, + TYPE_U32 = 3, + TYPE_S16 = 4, + TYPE_S32 = 5, + TYPE_U8 = 6, + TYPE_S8 = 7, // XXX I assume? +} type_t; + +static inline uint32_t type_size(type_t type) +{ + switch (type) { + case TYPE_F32: + case TYPE_U32: + case TYPE_S32: + return 32; + case TYPE_F16: + case TYPE_U16: + case TYPE_S16: + return 16; + case TYPE_U8: + case TYPE_S8: + return 8; + default: + ir3_assert(0); /* invalid type */ + return 0; + } +} + +static inline int type_float(type_t type) +{ + return (type == TYPE_F32) || (type == TYPE_F16); +} + +static inline int type_uint(type_t type) +{ + return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8); +} + +static inline int type_sint(type_t type) +{ + return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8); +} + +typedef union PACKED { + /* normal gpr or const src register: */ + struct PACKED { + uint32_t comp : 2; + uint32_t num : 10; + }; + /* for immediate val: */ + int32_t iim_val : 11; + /* to make compiler happy: */ + uint32_t dummy32; + uint32_t dummy10 : 10; + int32_t idummy10 : 10; + uint32_t dummy11 : 11; + uint32_t dummy12 : 12; + uint32_t dummy13 : 13; + uint32_t dummy8 : 8; + int32_t idummy13 : 13; + int32_t idummy8 : 8; +} reg_t; + +/* special registers: */ +#define REG_A0 61 /* address register */ +#define REG_P0 62 /* predicate register */ + +static inline int reg_special(reg_t reg) +{ + return (reg.num == REG_A0) || (reg.num == REG_P0); +} + +typedef enum { + BRANCH_PLAIN = 0, /* br */ + BRANCH_OR = 1, /* brao */ + BRANCH_AND = 2, /* braa */ + BRANCH_CONST = 3, /* brac */ + BRANCH_ANY = 4, /* bany */ + BRANCH_ALL = 5, /* ball */ + BRANCH_X = 6, /* brax ??? */ +} brtype_t; + +typedef struct PACKED { + /* dword0: */ + union PACKED { + struct PACKED { + int16_t immed : 16; + uint32_t dummy1 : 16; + } a3xx; + struct PACKED { + int32_t immed : 20; + uint32_t dummy1 : 12; + } a4xx; + struct PACKED { + int32_t immed : 32; + } a5xx; + }; + + /* dword1: */ + uint32_t idx : 5; /* brac.N index */ + uint32_t brtype : 3; /* branch type, see brtype_t */ + uint32_t repeat : 3; + uint32_t dummy3 : 1; + uint32_t ss : 1; + uint32_t inv1 : 1; + uint32_t comp1 : 2; + uint32_t eq : 1; + uint32_t opc_hi : 1; /* at least one bit */ + uint32_t dummy4 : 2; + uint32_t inv0 : 1; + uint32_t comp0 : 2; /* component for first src */ + uint32_t opc : 4; + uint32_t jmp_tgt : 1; + uint32_t sync : 1; + uint32_t opc_cat : 3; +} instr_cat0_t; + +typedef struct PACKED { + /* dword0: */ + union PACKED { + /* for normal src register: */ + struct PACKED { + uint32_t src : 11; + /* at least low bit of pad must be zero or it will + * look like a address relative src + */ + uint32_t pad : 21; + }; + /* for address relative: */ + struct PACKED { + int32_t off : 10; + uint32_t src_rel_c : 1; + uint32_t src_rel : 1; + uint32_t unknown : 20; + }; + /* for immediate: */ + int32_t iim_val; + uint32_t uim_val; + float fim_val; + }; + + /* dword1: */ + uint32_t dst : 8; + uint32_t repeat : 3; + uint32_t src_r : 1; + uint32_t ss : 1; + uint32_t ul : 1; + uint32_t dst_type : 3; + uint32_t dst_rel : 1; + uint32_t src_type : 3; + uint32_t src_c : 1; + uint32_t src_im : 1; + uint32_t even : 1; + uint32_t pos_inf : 1; + uint32_t must_be_0 : 2; + uint32_t jmp_tgt : 1; + uint32_t sync : 1; + uint32_t opc_cat : 3; +} instr_cat1_t; + +typedef struct PACKED { + /* dword0: */ + union PACKED { + struct PACKED { + uint32_t src1 : 11; + uint32_t must_be_zero1: 2; + uint32_t src1_im : 1; /* immediate */ + uint32_t src1_neg : 1; /* negate */ + uint32_t src1_abs : 1; /* absolute value */ + }; + struct PACKED { + uint32_t src1 : 10; + uint32_t src1_c : 1; /* relative-const */ + uint32_t src1_rel : 1; /* relative address */ + uint32_t must_be_zero : 1; + uint32_t dummy : 3; + } rel1; + struct PACKED { + uint32_t src1 : 12; + uint32_t src1_c : 1; /* const */ + uint32_t dummy : 3; + } c1; + }; + + union PACKED { + struct PACKED { + uint32_t src2 : 11; + uint32_t must_be_zero2: 2; + uint32_t src2_im : 1; /* immediate */ + uint32_t src2_neg : 1; /* negate */ + uint32_t src2_abs : 1; /* absolute value */ + }; + struct PACKED { + uint32_t src2 : 10; + uint32_t src2_c : 1; /* relative-const */ + uint32_t src2_rel : 1; /* relative address */ + uint32_t must_be_zero : 1; + uint32_t dummy : 3; + } rel2; + struct PACKED { + uint32_t src2 : 12; + uint32_t src2_c : 1; /* const */ + uint32_t dummy : 3; + } c2; + }; + + /* dword1: */ + uint32_t dst : 8; + uint32_t repeat : 2; + uint32_t sat : 1; + uint32_t src1_r : 1; /* doubles as nop0 if repeat==0 */ + uint32_t ss : 1; + uint32_t ul : 1; /* dunno */ + uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ + uint32_t ei : 1; + uint32_t cond : 3; + uint32_t src2_r : 1; /* doubles as nop1 if repeat==0 */ + uint32_t full : 1; /* not half */ + uint32_t opc : 6; + uint32_t jmp_tgt : 1; + uint32_t sync : 1; + uint32_t opc_cat : 3; +} instr_cat2_t; + +typedef struct PACKED { + /* dword0: */ + union PACKED { + struct PACKED { + uint32_t src1 : 11; + uint32_t must_be_zero1: 2; + uint32_t src2_c : 1; + uint32_t src1_neg : 1; + uint32_t src2_r : 1; /* doubles as nop1 if repeat==0 */ + }; + struct PACKED { + uint32_t src1 : 10; + uint32_t src1_c : 1; + uint32_t src1_rel : 1; + uint32_t must_be_zero : 1; + uint32_t dummy : 3; + } rel1; + struct PACKED { + uint32_t src1 : 12; + uint32_t src1_c : 1; + uint32_t dummy : 3; + } c1; + }; + + union PACKED { + struct PACKED { + uint32_t src3 : 11; + uint32_t must_be_zero2: 2; + uint32_t src3_r : 1; + uint32_t src2_neg : 1; + uint32_t src3_neg : 1; + }; + struct PACKED { + uint32_t src3 : 10; + uint32_t src3_c : 1; + uint32_t src3_rel : 1; + uint32_t must_be_zero : 1; + uint32_t dummy : 3; + } rel2; + struct PACKED { + uint32_t src3 : 12; + uint32_t src3_c : 1; + uint32_t dummy : 3; + } c2; + }; + + /* dword1: */ + uint32_t dst : 8; + uint32_t repeat : 2; + uint32_t sat : 1; + uint32_t src1_r : 1; /* doubles as nop0 if repeat==0 */ + uint32_t ss : 1; + uint32_t ul : 1; + uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ + uint32_t src2 : 8; + uint32_t opc : 4; + uint32_t jmp_tgt : 1; + uint32_t sync : 1; + uint32_t opc_cat : 3; +} instr_cat3_t; + +static inline bool instr_cat3_full(instr_cat3_t *cat3) +{ + switch (_OPC(3, cat3->opc)) { + case OPC_MAD_F16: + case OPC_MAD_U16: + case OPC_MAD_S16: + case OPC_SEL_B16: + case OPC_SEL_S16: + case OPC_SEL_F16: + case OPC_SAD_S16: + case OPC_SAD_S32: // really?? + return false; + default: + return true; + } +} + +typedef struct PACKED { + /* dword0: */ + union PACKED { + struct PACKED { + uint32_t src : 11; + uint32_t must_be_zero1: 2; + uint32_t src_im : 1; /* immediate */ + uint32_t src_neg : 1; /* negate */ + uint32_t src_abs : 1; /* absolute value */ + }; + struct PACKED { + uint32_t src : 10; + uint32_t src_c : 1; /* relative-const */ + uint32_t src_rel : 1; /* relative address */ + uint32_t must_be_zero : 1; + uint32_t dummy : 3; + } rel; + struct PACKED { + uint32_t src : 12; + uint32_t src_c : 1; /* const */ + uint32_t dummy : 3; + } c; + }; + uint32_t dummy1 : 16; /* seem to be ignored */ + + /* dword1: */ + uint32_t dst : 8; + uint32_t repeat : 2; + uint32_t sat : 1; + uint32_t src_r : 1; + uint32_t ss : 1; + uint32_t ul : 1; + uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ + uint32_t dummy2 : 5; /* seem to be ignored */ + uint32_t full : 1; /* not half */ + uint32_t opc : 6; + uint32_t jmp_tgt : 1; + uint32_t sync : 1; + uint32_t opc_cat : 3; +} instr_cat4_t; + +/* With is_bindless_s2en = 1, this determines whether bindless is enabled and + * if so, how to get the (base, index) pair for both sampler and texture. + * There is a single base embedded in the instruction, which is always used + * for the texture. + */ +typedef enum { + /* Use traditional GL binding model, get texture and sampler index + * from src3 which is not presumed to be uniform. This is + * backwards-compatible with earlier generations, where this field was + * always 0 and nonuniform-indexed sampling always worked. + */ + CAT5_NONUNIFORM = 0, + + /* The sampler base comes from the low 3 bits of a1.x, and the sampler + * and texture index come from src3 which is presumed to be uniform. + */ + CAT5_BINDLESS_A1_UNIFORM = 1, + + /* The texture and sampler share the same base, and the sampler and + * texture index come from src3 which is *not* presumed to be uniform. + */ + CAT5_BINDLESS_NONUNIFORM = 2, + + /* The sampler base comes from the low 3 bits of a1.x, and the sampler + * and texture index come from src3 which is *not* presumed to be + * uniform. + */ + CAT5_BINDLESS_A1_NONUNIFORM = 3, + + /* Use traditional GL binding model, get texture and sampler index + * from src3 which is presumed to be uniform. + */ + CAT5_UNIFORM = 4, + + /* The texture and sampler share the same base, and the sampler and + * texture index come from src3 which is presumed to be uniform. + */ + CAT5_BINDLESS_UNIFORM = 5, + + /* The texture and sampler share the same base, get sampler index from low + * 4 bits of src3 and texture index from high 4 bits. + */ + CAT5_BINDLESS_IMM = 6, + + /* The sampler base comes from the low 3 bits of a1.x, and the texture + * index comes from the next 8 bits of a1.x. The sampler index is an + * immediate in src3. + */ + CAT5_BINDLESS_A1_IMM = 7, +} cat5_desc_mode_t; + +typedef struct PACKED { + /* dword0: */ + union PACKED { + /* normal case: */ + struct PACKED { + uint32_t full : 1; /* not half */ + uint32_t src1 : 8; + uint32_t src2 : 8; + uint32_t dummy1 : 4; /* seem to be ignored */ + uint32_t samp : 4; + uint32_t tex : 7; + } norm; + /* s2en case: */ + struct PACKED { + uint32_t full : 1; /* not half */ + uint32_t src1 : 8; + uint32_t src2 : 8; + uint32_t dummy1 : 2; + uint32_t base_hi : 2; + uint32_t src3 : 8; + uint32_t desc_mode : 3; + } s2en_bindless; + /* same in either case: */ + // XXX I think, confirm this + struct PACKED { + uint32_t full : 1; /* not half */ + uint32_t src1 : 8; + uint32_t src2 : 8; + uint32_t pad : 15; + }; + }; + + /* dword1: */ + uint32_t dst : 8; + uint32_t wrmask : 4; /* write-mask */ + uint32_t type : 3; + uint32_t base_lo : 1; /* used with bindless */ + uint32_t is_3d : 1; + + uint32_t is_a : 1; + uint32_t is_s : 1; + uint32_t is_s2en_bindless : 1; + uint32_t is_o : 1; + uint32_t is_p : 1; + + uint32_t opc : 5; + uint32_t jmp_tgt : 1; + uint32_t sync : 1; + uint32_t opc_cat : 3; +} instr_cat5_t; + +/* dword0 encoding for src_off: [src1 + off], src2: */ +typedef struct PACKED { + /* dword0: */ + uint32_t mustbe1 : 1; + int32_t off : 13; + uint32_t src1 : 8; + uint32_t src1_im : 1; + uint32_t src2_im : 1; + uint32_t src2 : 8; + + /* dword1: */ + uint32_t dword1; +} instr_cat6a_t; + +/* dword0 encoding for !src_off: [src1], src2 */ +typedef struct PACKED { + /* dword0: */ + uint32_t mustbe0 : 1; + uint32_t src1 : 13; + uint32_t ignore0 : 8; + uint32_t src1_im : 1; + uint32_t src2_im : 1; + uint32_t src2 : 8; + + /* dword1: */ + uint32_t dword1; +} instr_cat6b_t; + +/* dword1 encoding for dst_off: */ +typedef struct PACKED { + /* dword0: */ + uint32_t dword0; + + /* note: there is some weird stuff going on where sometimes + * cat6->a.off is involved.. but that seems like a bug in + * the blob, since it is used even if !cat6->src_off + * It would make sense for there to be some more bits to + * bring us to 11 bits worth of offset, but not sure.. + */ + int32_t off : 8; + uint32_t mustbe1 : 1; + uint32_t dst : 8; + uint32_t pad1 : 15; +} instr_cat6c_t; + +/* dword1 encoding for !dst_off: */ +typedef struct PACKED { + /* dword0: */ + uint32_t dword0; + + uint32_t dst : 8; + uint32_t mustbe0 : 1; + uint32_t idx : 8; + uint32_t pad0 : 15; +} instr_cat6d_t; + +/* ldgb and atomics.. + * + * ldgb: pad0=0, pad3=1 + * atomic .g: pad0=1, pad3=1 + * .l: pad0=1, pad3=0 + */ +typedef struct PACKED { + /* dword0: */ + uint32_t pad0 : 1; + uint32_t src3 : 8; + uint32_t d : 2; + uint32_t typed : 1; + uint32_t type_size : 2; + uint32_t src1 : 8; + uint32_t src1_im : 1; + uint32_t src2_im : 1; + uint32_t src2 : 8; + + /* dword1: */ + uint32_t dst : 8; + uint32_t mustbe0 : 1; + uint32_t src_ssbo : 8; + uint32_t pad2 : 3; // type + uint32_t g : 1; + uint32_t pad3 : 1; + uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat +} instr_cat6ldgb_t; + +/* stgb, pad0=0, pad3=2 + */ +typedef struct PACKED { + /* dword0: */ + uint32_t mustbe1 : 1; // ??? + uint32_t src1 : 8; + uint32_t d : 2; + uint32_t typed : 1; + uint32_t type_size : 2; + uint32_t pad0 : 9; + uint32_t src2_im : 1; + uint32_t src2 : 8; + + /* dword1: */ + uint32_t src3 : 8; + uint32_t src3_im : 1; + uint32_t dst_ssbo : 8; + uint32_t pad2 : 3; // type + uint32_t pad3 : 2; + uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat +} instr_cat6stgb_t; + +typedef union PACKED { + instr_cat6a_t a; + instr_cat6b_t b; + instr_cat6c_t c; + instr_cat6d_t d; + instr_cat6ldgb_t ldgb; + instr_cat6stgb_t stgb; + struct PACKED { + /* dword0: */ + uint32_t src_off : 1; + uint32_t pad1 : 31; + + /* dword1: */ + uint32_t pad2 : 8; + uint32_t dst_off : 1; + uint32_t pad3 : 8; + uint32_t type : 3; + uint32_t g : 1; /* or in some cases it means dst immed */ + uint32_t pad4 : 1; + uint32_t opc : 5; + uint32_t jmp_tgt : 1; + uint32_t sync : 1; + uint32_t opc_cat : 3; + }; +} instr_cat6_t; + +/* Similar to cat5_desc_mode_t, describes how the descriptor is loaded. + */ +typedef enum { + /* Use old GL binding model with an immediate index. */ + CAT6_IMM = 0, + + CAT6_UNIFORM = 1, + + CAT6_NONUNIFORM = 2, + + /* Use the bindless model, with an immediate index. + */ + CAT6_BINDLESS_IMM = 4, + + /* Use the bindless model, with a uniform register index. + */ + CAT6_BINDLESS_UNIFORM = 5, + + /* Use the bindless model, with a register index that isn't guaranteed + * to be uniform. This presumably checks if the indices are equal and + * splits up the load/store, because it works the way you would + * expect. + */ + CAT6_BINDLESS_NONUNIFORM = 6, +} cat6_desc_mode_t; + +/** + * For atomic ops (which return a value): + * + * pad1=1, pad3=c, pad5=3 + * src1 - vecN offset/coords + * src2.x - is actually dest register + * src2.y - is 'data' except for cmpxchg where src2.y is 'compare' + * and src2.z is 'data' + * + * For stib (which does not return a value): + * pad1=0, pad3=c, pad5=2 + * src1 - vecN offset/coords + * src2 - value to store + * + * For ldib: + * pad1=1, pad3=c, pad5=2 + * src1 - vecN offset/coords + * + * for ldc (load from UBO using descriptor): + * pad1=0, pad3=8, pad5=2 + * + * pad2 and pad5 are only observed to be 0. + */ +typedef struct PACKED { + /* dword0: */ + uint32_t pad1 : 1; + uint32_t base : 3; + uint32_t pad2 : 2; + uint32_t desc_mode : 3; + uint32_t d : 2; + uint32_t typed : 1; + uint32_t type_size : 2; + uint32_t opc : 5; + uint32_t pad3 : 5; + uint32_t src1 : 8; /* coordinate/offset */ + + /* dword1: */ + uint32_t src2 : 8; /* or the dst for load instructions */ + uint32_t pad4 : 1; //mustbe0 ?? + uint32_t ssbo : 8; /* ssbo/image binding point */ + uint32_t type : 3; + uint32_t pad5 : 7; + uint32_t jmp_tgt : 1; + uint32_t sync : 1; + uint32_t opc_cat : 3; +} instr_cat6_a6xx_t; + +typedef struct PACKED { + /* dword0: */ + uint32_t pad1 : 32; + + /* dword1: */ + uint32_t pad2 : 12; + uint32_t ss : 1; /* maybe in the encoding, but blob only uses (sy) */ + uint32_t pad3 : 6; + uint32_t w : 1; /* write */ + uint32_t r : 1; /* read */ + uint32_t l : 1; /* local */ + uint32_t g : 1; /* global */ + uint32_t opc : 4; /* presumed, but only a couple known OPCs */ + uint32_t jmp_tgt : 1; /* (jp) */ + uint32_t sync : 1; /* (sy) */ + uint32_t opc_cat : 3; +} instr_cat7_t; + +typedef union PACKED { + instr_cat0_t cat0; + instr_cat1_t cat1; + instr_cat2_t cat2; + instr_cat3_t cat3; + instr_cat4_t cat4; + instr_cat5_t cat5; + instr_cat6_t cat6; + instr_cat6_a6xx_t cat6_a6xx; + instr_cat7_t cat7; + struct PACKED { + /* dword0: */ + uint32_t pad1 : 32; + + /* dword1: */ + uint32_t pad2 : 12; + uint32_t ss : 1; /* cat1-cat4 (cat0??) and cat7 (?) */ + uint32_t ul : 1; /* cat2-cat4 (and cat1 in blob.. which may be bug??) */ + uint32_t pad3 : 13; + uint32_t jmp_tgt : 1; + uint32_t sync : 1; + uint32_t opc_cat : 3; + + }; +} instr_t; + +static inline uint32_t instr_repeat(instr_t *instr) +{ + switch (instr->opc_cat) { + case 0: return instr->cat0.repeat; + case 1: return instr->cat1.repeat; + case 2: return instr->cat2.repeat; + case 3: return instr->cat3.repeat; + case 4: return instr->cat4.repeat; + default: return 0; + } +} + +static inline bool instr_sat(instr_t *instr) +{ + switch (instr->opc_cat) { + case 2: return instr->cat2.sat; + case 3: return instr->cat3.sat; + case 4: return instr->cat4.sat; + default: return false; + } +} + +/* We can probably drop the gpu_id arg, but keeping it for now so we can + * assert if we see something we think should be new encoding on an older + * gpu. + */ +static inline bool is_cat6_legacy(instr_t *instr, unsigned gpu_id) +{ + instr_cat6_a6xx_t *cat6 = &instr->cat6_a6xx; + + /* At least one of these two bits is pad in all the possible + * "legacy" cat6 encodings, and a analysis of all the pre-a6xx + * cmdstream traces I have indicates that the pad bit is zero + * in all cases. So we can use this to detect new encoding: + */ + if ((cat6->pad3 & 0x8) && (cat6->pad5 & 0x2)) { + ir3_assert(gpu_id >= 600); + ir3_assert(instr->cat6.opc == 0); + return false; + } + + return true; +} + +static inline uint32_t instr_opc(instr_t *instr, unsigned gpu_id) +{ + switch (instr->opc_cat) { + case 0: return instr->cat0.opc | instr->cat0.opc_hi << 4; + case 1: return 0; + case 2: return instr->cat2.opc; + case 3: return instr->cat3.opc; + case 4: return instr->cat4.opc; + case 5: return instr->cat5.opc; + case 6: + if (!is_cat6_legacy(instr, gpu_id)) + return instr->cat6_a6xx.opc; + return instr->cat6.opc; + case 7: return instr->cat7.opc; + default: return 0; + } +} + +static inline bool is_mad(opc_t opc) +{ + switch (opc) { + case OPC_MAD_U16: + case OPC_MAD_S16: + case OPC_MAD_U24: + case OPC_MAD_S24: + case OPC_MAD_F16: + case OPC_MAD_F32: + return true; + default: + return false; + } +} + +static inline bool is_madsh(opc_t opc) +{ + switch (opc) { + case OPC_MADSH_U16: + case OPC_MADSH_M16: + return true; + default: + return false; + } +} + +static inline bool is_atomic(opc_t opc) +{ + switch (opc) { + case OPC_ATOMIC_ADD: + case OPC_ATOMIC_SUB: + case OPC_ATOMIC_XCHG: + case OPC_ATOMIC_INC: + case OPC_ATOMIC_DEC: + case OPC_ATOMIC_CMPXCHG: + case OPC_ATOMIC_MIN: + case OPC_ATOMIC_MAX: + case OPC_ATOMIC_AND: + case OPC_ATOMIC_OR: + case OPC_ATOMIC_XOR: + return true; + default: + return false; + } +} + +static inline bool is_ssbo(opc_t opc) +{ + switch (opc) { + case OPC_RESFMT: + case OPC_RESINFO: + case OPC_LDGB: + case OPC_STGB: + case OPC_STIB: + return true; + default: + return false; + } +} + +static inline bool is_isam(opc_t opc) +{ + switch (opc) { + case OPC_ISAM: + case OPC_ISAML: + case OPC_ISAMM: + return true; + default: + return false; + } +} + + +static inline bool is_cat2_float(opc_t opc) +{ + switch (opc) { + case OPC_ADD_F: + case OPC_MIN_F: + case OPC_MAX_F: + case OPC_MUL_F: + case OPC_SIGN_F: + case OPC_CMPS_F: + case OPC_ABSNEG_F: + case OPC_CMPV_F: + case OPC_FLOOR_F: + case OPC_CEIL_F: + case OPC_RNDNE_F: + case OPC_RNDAZ_F: + case OPC_TRUNC_F: + return true; + + default: + return false; + } +} + +static inline bool is_cat3_float(opc_t opc) +{ + switch (opc) { + case OPC_MAD_F16: + case OPC_MAD_F32: + case OPC_SEL_F16: + case OPC_SEL_F32: + return true; + default: + return false; + } +} + +int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id); + +#endif /* INSTR_A3XX_H_ */ diff --git a/src/freedreno/decode/io.c b/src/freedreno/decode/io.c new file mode 100644 index 00000000000..5fc5752b298 --- /dev/null +++ b/src/freedreno/decode/io.c @@ -0,0 +1,163 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "io.h" + +struct io { + struct archive *a; + struct archive_entry *entry; + unsigned offset; +}; + +static void io_error(struct io *io) +{ + fprintf(stderr, "%s\n", archive_error_string(io->a)); + io_close(io); +} + +static struct io * io_new(void) +{ + struct io *io = calloc(1, sizeof(*io)); + int ret; + + if (!io) + return NULL; + + io->a = archive_read_new(); + ret = archive_read_support_filter_gzip(io->a); + if (ret != ARCHIVE_OK) { + io_error(io); + return NULL; + } + + ret = archive_read_support_filter_none(io->a); + if (ret != ARCHIVE_OK) { + io_error(io); + return NULL; + } + + ret = archive_read_support_format_all(io->a); + if (ret != ARCHIVE_OK) { + io_error(io); + return NULL; + } + + ret = archive_read_support_format_raw(io->a); + if (ret != ARCHIVE_OK) { + io_error(io); + return NULL; + } + + return io; +} + +struct io * io_open(const char *filename) +{ + struct io *io = io_new(); + int ret; + + if (!io) + return NULL; + + ret = archive_read_open_filename(io->a, filename, 10240); + if (ret != ARCHIVE_OK) { + io_error(io); + return NULL; + } + + ret = archive_read_next_header(io->a, &io->entry); + if (ret != ARCHIVE_OK) { + io_error(io); + return NULL; + } + + return io; +} + +struct io * io_openfd(int fd) +{ + struct io *io = io_new(); + int ret; + + if (!io) + return NULL; + + ret = archive_read_open_fd(io->a, fd, 10240); + if (ret != ARCHIVE_OK) { + io_error(io); + return NULL; + } + + ret = archive_read_next_header(io->a, &io->entry); + if (ret != ARCHIVE_OK) { + io_error(io); + return NULL; + } + + return io; +} + +void io_close(struct io *io) +{ + archive_read_free(io->a); + free(io); +} + +unsigned io_offset(struct io *io) +{ + return io->offset; +} + +#include +int io_readn(struct io *io, void *buf, int nbytes) +{ + char *ptr = buf; + int ret = 0; + while (nbytes > 0) { + int n = archive_read_data(io->a, ptr, nbytes); + if (n < 0) { + fprintf(stderr, "%s\n", archive_error_string(io->a)); + return n; + } + if (n == 0) + break; + ptr += n; + nbytes -= n; + ret += n; + io->offset += n; + } + return ret; +} diff --git a/src/freedreno/decode/io.h b/src/freedreno/decode/io.h new file mode 100644 index 00000000000..d26ba4b82ae --- /dev/null +++ b/src/freedreno/decode/io.h @@ -0,0 +1,51 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef IO_H_ +#define IO_H_ + +/* Simple API to abstract reading from file which might be compressed. + * Maybe someday I'll add writing.. + */ + +struct io; + +struct io * io_open(const char *filename); +struct io * io_openfd(int fd); +void io_close(struct io *io); +unsigned io_offset(struct io *io); +int io_readn(struct io *io, void *buf, int nbytes); + + +static inline int +check_extension(const char *path, const char *ext) +{ + return strcmp(path + strlen(path) - strlen(ext), ext) == 0; +} + +#endif /* IO_H_ */ diff --git a/src/freedreno/decode/meson.build b/src/freedreno/decode/meson.build new file mode 100644 index 00000000000..0ec9995aa74 --- /dev/null +++ b/src/freedreno/decode/meson.build @@ -0,0 +1,144 @@ +# Copyright © 2020 Google, Inc + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +dep_lua = dependency('lua53', required: false) +if not dep_lua.found() + dep_lua = dependency('lua52', required: false) +endif +if not dep_lua.found() + dep_lua = dependency('lua', required: false) +endif + +dep_libarchive = dependency('libarchive', required: false) + +# Shared cmdstream decoding: +libfreedreno_cffdec = static_library( + 'freedreno_cffdec', + [ + 'buffers.c', + 'buffers.h', + 'cffdec.c', + 'cffdec.h', + 'disasm-a2xx.c', + 'disasm-a3xx.c', + 'disasm.h', + 'instr-a2xx.h', + 'instr-a3xx.h', + 'pager.c', + 'pager.h', + 'rnnutil.c', + 'rnnutil.h', + 'util.h', + ], + include_directories: [ + inc_freedreno_rnn, + ], + c_args : [ no_override_init_args ], + gnu_symbol_visibility: 'hidden', + dependencies: [], + link_with: libfreedreno_rnn, + build_by_default: false, +) + +if dep_libarchive.found() + libfreedreno_io = static_library( + 'libfreedreno_io', + [ + 'io.c', + 'io.h', + ], + include_directories: [], + c_args : [no_override_init_args], + gnu_symbol_visibility: 'hidden', + dependencies: [ + dep_libarchive, + ], + build_by_default: false, + ) +endif + +if dep_lua.found() and dep_libarchive.found() + cffdump = executable( + 'cffdump', + [ + 'cffdump.c', + 'script.c', + 'script.h' + ], + include_directories: [ + inc_freedreno_rnn, + ], + c_args : [no_override_init_args], + gnu_symbol_visibility: 'hidden', + dependencies: [ + dep_lua, + ], + link_with: [ + libfreedreno_cffdec, + libfreedreno_io, + ], + build_by_default: with_tools.contains('freedreno'), + install : with_tools.contains('freedreno'), + ) +endif + +crashdec = executable( + 'crashdec', + 'crashdec.c', + include_directories: [ + inc_freedreno_rnn, + ], + gnu_symbol_visibility: 'hidden', + dependencies: [], + link_with: [ + libfreedreno_cffdec, + ], + build_by_default: with_tools.contains('freedreno'), + install : with_tools.contains('freedreno'), +) + +if dep_libarchive.found() + pgmdump = executable( + 'pgmdump', + 'pgmdump.c', + include_directories: [], + gnu_symbol_visibility: 'hidden', + dependencies: [], + link_with: [ + libfreedreno_cffdec, + libfreedreno_io, + ], + build_by_default: with_tools.contains('freedreno'), + install: false, + ) + pgmdump2 = executable( + 'pgmdump2', + 'pgmdump2.c', + include_directories: [], + gnu_symbol_visibility: 'hidden', + dependencies: [], + link_with: [ + libfreedreno_cffdec, + libfreedreno_io, + ], + build_by_default: with_tools.contains('freedreno'), + install: false, + ) +endif diff --git a/src/freedreno/decode/pager.c b/src/freedreno/decode/pager.c new file mode 100644 index 00000000000..fa07c10a17f --- /dev/null +++ b/src/freedreno/decode/pager.c @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2018 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pager.h" + +static pid_t pager_pid; + + +static void +pager_death(int n) +{ + exit(0); +} + +void +pager_open(void) +{ + int fd[2]; + + if (pipe(fd) < 0) { + fprintf(stderr, "Failed to create pager pipe: %m\n"); + exit(-1); + } + + pager_pid = fork(); + if (pager_pid < 0) { + fprintf(stderr, "Failed to fork pager: %m\n"); + exit(-1); + } + + if (pager_pid == 0) { + const char* less_opts; + + dup2(fd[0], STDIN_FILENO); + close(fd[0]); + close(fd[1]); + + less_opts = "FRSMKX"; + setenv("LESS", less_opts, 1); + + execlp("less", "less", NULL); + + } else { + /* we want to kill the parent process when pager exits: */ + signal(SIGCHLD, pager_death); + dup2(fd[1], STDOUT_FILENO); + close(fd[0]); + close(fd[1]); + } +} + +int +pager_close(void) +{ + siginfo_t status; + + close(STDOUT_FILENO); + + while (true) { + memset(&status, 0, sizeof(status)); + if (waitid(P_PID, pager_pid, &status, WEXITED) < 0) { + if (errno == EINTR) + continue; + return -errno; + } + + return 0; + } +} diff --git a/src/freedreno/decode/pager.h b/src/freedreno/decode/pager.h new file mode 100644 index 00000000000..022786eb7da --- /dev/null +++ b/src/freedreno/decode/pager.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2018 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __PAGER_H__ +#define __PAGER_H__ + +void pager_open(void); +int pager_close(void); + +#endif /* __PAGER_H__ */ diff --git a/src/freedreno/decode/pgmdump.c b/src/freedreno/decode/pgmdump.c new file mode 100644 index 00000000000..b8d7cd3837a --- /dev/null +++ b/src/freedreno/decode/pgmdump.c @@ -0,0 +1,1054 @@ +/* + * Copyright (c) 2012 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "redump.h" +#include "disasm.h" +#include "io.h" + +#define ASCII_XOR 0xff +#include "util.h" + +struct pgm_header { + uint32_t size; + uint32_t unknown1; + uint32_t unknown2; + uint32_t revision; + uint32_t unknown4; + uint32_t unknown5; + uint32_t unknown6; + uint32_t unknown7; + uint32_t unknown8; + uint32_t num_attribs; + uint32_t num_uniforms; + uint32_t num_samplers; + uint32_t num_varyings; + uint32_t num_uniformblocks; +}; + +struct vs_header { + uint32_t unknown1; /* seems to be # of sections up to and including shader */ + uint32_t unknown2; /* seems to be low byte or so of SQ_PROGRAM_CNTL */ + uint32_t unknown3; + uint32_t unknown4; + uint32_t unknown5; + uint32_t unknown6; + uint32_t unknown7; + uint32_t unknown8; + uint32_t unknown9; /* seems to be # of sections following shader */ +}; + +struct fs_header { + uint32_t unknown1; +}; +/* + // Covers a lot of type_info + // varying, attribute, uniform, sampler + type_info & 0xFF + if ((type_info >> 8) == 0x8b) // vector + 0x50 = vec2 + 0x51 = vec3 + 0x52 = vec4 + 0x53 = ivec2 + 0x54 = ivec3 + 0x55 = ivec4 + 0x56 = bool // Why is this in vector? + 0x57 = bvec2 + 0x58 = bvec3 + 0x59 = bvec4 + 0x5a = mat2 + 0x5b = mat3 + 0x5c = mat4 + 0x5a = mat2x2 // Same as mat2 + 0x65 = mat2x3 + 0x66 = mat2x4 + 0x67 = mat3x2 + 0x5b = mat3x3 // Same as mat3 + 0x68 = mat3x4 + 0x69 = mat4x2 + 0x6a = mat4x3 + 0x5c = mat4x4 // same as mat4 + 0x5e = sampler2D + 0x5f = sampler3D + 0x60 = samplerCube // XXX: Doesn't work + 0x62 = sampler2DShadow + 0xc6 = uvec2 + 0xc7 = uvec3 + 0xc8 = uvec4 + else if ((type_info >> 8) == 0x8d) // GLES3 samplers + 0xC1 = sampler2DArray + 0xC4 = sampler2DArrayShadow + 0xC5 = samplerCubeShadow + 0xCA = isampler2D + 0xCB = isampler3D + 0xCC = isamplerCube + 0xD2 = usampler2D + 0xD3 = usampler3D + 0xD4 = usamplerCube + 0xD7 = isampler2DArray + 0xD7 = usampler2DArray // Is the same as isampler2DArray? + else // 0x14 = single + 0x04 = int + 0x05 = uint + 0x06 = float +*/ +struct attribute { + uint32_t type_info; + uint32_t reg; /* seems to be the register the fetch instruction loads to */ + uint32_t const_idx; /* the CONST() indx value for sampler */ + uint32_t unknown2; + uint32_t unknown3; + uint32_t unknown4; + uint32_t unknown5; + char name[]; +}; + +struct uniform { + uint32_t type_info; + uint32_t unknown2; + uint32_t unknown3; + uint32_t unknown4; + uint32_t const_base; /* const base register (for uniforms that take more than one const reg, ie. matrices) */ + uint32_t unknown6; + uint32_t const_reg; /* the const register holding the value */ + uint32_t unknown7; + uint32_t unknown8; + uint32_t unknown9; + union { + struct { + char name[1]; + } v1; + struct { + uint32_t unknown10; + uint32_t unknown11; + uint32_t unknown12; + char name[]; + } v2; + }; +}; + +struct uniformblockmember { + uint32_t type_info; + uint32_t is_array; + uint32_t array_size; /* elements in the array */ + uint32_t unknown2; /* Same as array_size */ + uint32_t unknown3; /* Seems to be a offset within UBO in vertex (by components) */ + uint32_t unknown4; + uint32_t unknown5; /* Seems to be a offset within UBO in fragment (by vec4) */ + uint32_t unknown6; + uint32_t unknown7; + uint32_t unknown8; + uint32_t unknown9; /* UBO block index? */ + uint32_t unknown10; + uint32_t unknown11; + uint32_t unknown12; + char name[]; +}; + +struct uniformblock +{ + uint32_t type_info; + uint32_t unknown1; + uint32_t unknown2; + uint32_t unknown3; + uint32_t unknown4; + uint32_t num_members; + uint32_t num_members2; + uint32_t unknown5; + uint32_t unknown6; + uint32_t unknown7; + char name[]; +}; + + +struct sampler { + uint32_t type_info; + uint32_t is_array; + uint32_t array_size; /* elements in the array */ + uint32_t unknown4; /* same as array_size */ + uint32_t unknown5; + uint32_t unknown6; + uint32_t const_idx; /* the CONST() indx value for the sampler */ + uint32_t unknown7; + char name[]; +}; + +struct varying { + uint32_t type_info; + uint32_t unknown2; + uint32_t unknown3; + uint32_t reg; /* the register holding the value (on entry to the shader) */ + char name[]; +}; + +struct output { + uint32_t type_info; + uint32_t unknown2; + uint32_t unknown3; + uint32_t unknown4; + uint32_t unknown5; + uint32_t unknown6; + uint32_t unknown7; + uint32_t unknown8; + char name[]; +}; + +struct constant { + uint32_t unknown1; + uint32_t unknown2; + uint32_t unknown3; + uint32_t const_idx; + float val[]; +}; + +struct state { + char *buf; + int sz; + struct pgm_header *hdr; + struct attribute *attribs[32]; /* don't really know the upper limit.. */ + struct uniform *uniforms[32]; + struct sampler *samplers[32]; + struct varying *varyings[32]; + struct { + struct uniformblock *header; + struct uniformblockmember **members; /* GL ES 3.0 spec mandates minimum 16K support. a3xx supports 65K */ + } uniformblocks[24]; /* Maximum a330 supports */ + struct output *outputs[0]; /* I guess only one?? */ +}; + +static const char *infile; +static int full_dump = 1; +static int dump_shaders = 0; +static int gpu_id; + +static char *find_sect_end(char *buf, int sz) +{ + uint8_t *ptr = (uint8_t *)buf; + uint8_t *end = ptr + sz - 3; + + while (ptr < end) { + uint32_t d = 0; + + d |= ptr[0] << 0; + d |= ptr[1] << 8; + d |= ptr[2] << 16; + d |= ptr[3] << 24; + + /* someone at QC likes baseball */ + if (d == 0xba5eba11) + return (char *)ptr; + + ptr++; + } + return NULL; +} + +static void *next_sect(struct state *state, int *sect_size) +{ + char *end = find_sect_end(state->buf, state->sz); + void *sect; + + if (!end) + return NULL; + + *sect_size = end - state->buf; + + /* copy the section to keep things nicely 32b aligned: */ + sect = malloc(ALIGN(*sect_size, 4)); + memcpy(sect, state->buf, *sect_size); + + state->sz -= *sect_size + 4; + state->buf = end + 4; + + return sect; +} + +static int valid_type(uint32_t type_info) +{ + switch ((type_info >> 8) & 0xff) { + case 0x8b: /* vector */ + case 0x8d: /* GLES3 samplers */ + case 0x14: /* float */ + return 1; + default: + return 0; + } +} + +#if 0 +static int valid_uniformblock(uint32_t type_info) +{ + if (type_info == 0x128) + return 1; + return 0; +} +#endif + +static void dump_attribute(struct attribute *attrib) +{ + printf("\tR%d, CONST(%d): %s\n", attrib->reg, + attrib->const_idx, attrib->name); +} + +static inline int is_uniform_v2(struct uniform *uniform) +{ + /* TODO maybe this should be based on revision #? */ + if (uniform->v2.unknown10 == 0) + return 1; + return 0; +} + +static void dump_uniform(struct uniform *uniform) +{ + char *name = is_uniform_v2(uniform) ? uniform->v2.name : uniform->v1.name; + if (uniform->const_reg == -1) { + printf("\tC%d+: %s\n", uniform->const_base, name); + } else { + printf("\tC%d: %s\n", uniform->const_reg, name); + } +} + +static void dump_sampler(struct sampler *sampler) +{ + printf("\tCONST(%d): %s\n", sampler->const_idx, sampler->name); +} + +static void dump_varying(struct varying *varying) +{ + printf("\tR%d: %s\n", varying->reg, varying->name); +} + +static void dump_uniformblock(struct uniformblock *uniformblock) +{ + printf("\tUniform Block: %s(%d)\n", uniformblock->name, uniformblock->num_members); +} + +static void dump_uniformblockmember(struct uniformblockmember *member) +{ + printf("Uniform Block member: %s\n", member->name); +} + +static void dump_output(struct output *output) +{ + printf("\tR?: %s\n", output->name); +} + +static void dump_constant(struct constant *constant) +{ + printf("\tC%d: %f, %f, %f, %f\n", constant->const_idx, + constant->val[0], constant->val[1], + constant->val[2], constant->val[3]); +} + +/* dump attr/uniform/sampler/varying/const summary: */ +static void dump_short_summary(struct state *state, int nconsts, + struct constant **constants) +{ + int i; + + /* dump attr/uniform/sampler/varying/const summary: */ + for (i = 0; i < state->hdr->num_varyings; i++) { + dump_varying(state->varyings[i]); + } + for (i = 0; i < state->hdr->num_attribs; i++) { + dump_attribute(state->attribs[i]); + } + for (i = 0; i < state->hdr->num_uniforms; i++) { + dump_uniform(state->uniforms[i]); + } + for (i = 0; i < state->hdr->num_samplers; i++) { + dump_sampler(state->samplers[i]); + } + for (i = 0; i < nconsts - 1; i++) { + if (constants[i]->unknown2 == 0) { + dump_constant(constants[i]); + } + } + printf("\n"); +} + +static void dump_raw_shader(uint32_t *dwords, uint32_t sizedwords, int n, char *ext) +{ + static char filename[256]; + int fd; + + if (!dump_shaders) + return; + + sprintf(filename, "%.*s-%d.%s", (int)strlen(infile)-3, infile, n, ext); + fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT, 0644); + write(fd, dwords, sizedwords * 4); +} + +static void dump_shaders_a2xx(struct state *state) +{ + int i, sect_size; + uint8_t *ptr; + + /* dump vertex shaders: */ + for (i = 0; i < 3; i++) { + struct vs_header *vs_hdr = next_sect(state, §_size); + struct constant *constants[32]; + int j, level = 0; + + printf("\n"); + + if (full_dump) { + printf("#######################################################\n"); + printf("######## VS%d HEADER: (size %d)\n", i, sect_size); + dump_hex((void *)vs_hdr, sect_size); + } + + for (j = 0; j < (int)vs_hdr->unknown1 - 1; j++) { + constants[j] = next_sect(state, §_size); + if (full_dump) { + printf("######## VS%d CONST: (size=%d)\n", i, sect_size); + dump_constant(constants[j]); + dump_hex((char *)constants[j], sect_size); + } + } + + ptr = next_sect(state, §_size); + printf("######## VS%d SHADER: (size=%d)\n", i, sect_size); + if (full_dump) { + dump_hex(ptr, sect_size); + level = 1; + } else { + dump_short_summary(state, vs_hdr->unknown1 - 1, constants); + } + disasm_a2xx((uint32_t *)(ptr + 32), (sect_size - 32) / 4, level+1, SHADER_VERTEX); + dump_raw_shader((uint32_t *)(ptr + 32), (sect_size - 32) / 4, i, "vo"); + free(ptr); + + for (j = 0; j < vs_hdr->unknown9; j++) { + ptr = next_sect(state, §_size); + if (full_dump) { + printf("######## VS%d CONST?: (size=%d)\n", i, sect_size); + dump_hex(ptr, sect_size); + } + free(ptr); + } + + for (j = 0; j < vs_hdr->unknown1 - 1; j++) { + free(constants[j]); + } + + free(vs_hdr); + } + + /* dump fragment shaders: */ + for (i = 0; i < 1; i++) { + struct fs_header *fs_hdr = next_sect(state, §_size); + struct constant *constants[32]; + int j, level = 0; + + printf("\n"); + + if (full_dump) { + printf("#######################################################\n"); + printf("######## FS%d HEADER: (size %d)\n", i, sect_size); + dump_hex((void *)fs_hdr, sect_size); + } + + for (j = 0; j < fs_hdr->unknown1 - 1; j++) { + constants[j] = next_sect(state, §_size); + if (full_dump) { + printf("######## FS%d CONST: (size=%d)\n", i, sect_size); + dump_constant(constants[j]); + dump_hex((char *)constants[j], sect_size); + } + } + + ptr = next_sect(state, §_size); + printf("######## FS%d SHADER: (size=%d)\n", i, sect_size); + if (full_dump) { + dump_hex(ptr, sect_size); + level = 1; + } else { + dump_short_summary(state, fs_hdr->unknown1 - 1, constants); + } + disasm_a2xx((uint32_t *)(ptr + 32), (sect_size - 32) / 4, level+1, SHADER_FRAGMENT); + dump_raw_shader((uint32_t *)(ptr + 32), (sect_size - 32) / 4, i, "fo"); + free(ptr); + + for (j = 0; j < fs_hdr->unknown1 - 1; j++) { + free(constants[j]); + } + + free(fs_hdr); + } +} + +static void dump_shaders_a3xx(struct state *state) +{ + int i, j; + + /* dump vertex shaders: */ + for (i = 0; i < 2; i++) { + int instrs_size, hdr_size, sect_size, nconsts = 0, level = 0, compact = 0; + uint8_t *vs_hdr; + struct constant *constants[32]; + uint8_t *instrs = NULL; + + vs_hdr = next_sect(state, &hdr_size); +printf("hdr_size=%d\n", hdr_size); + + /* seems like there are two cases, either: + * 1) 152 byte header, + * 2) zero or more 32 byte compiler const sections + * 3) followed by shader instructions + * or, if there are no compiler consts, this can be + * all smashed in one large section + */ + int n; + if (state->hdr->revision >= 0xb) + n = 160; + else if (state->hdr->revision >= 7) + n = 156; + else + n = 152; + if (hdr_size > n) { + instrs = &vs_hdr[n]; + instrs_size = hdr_size - n; + hdr_size = n; + compact = 1; + } else { + while (1) { + void *ptr = next_sect(state, §_size); + + if ((sect_size != 32) && (sect_size != 44)) { + /* end of constants: */ + instrs = ptr; + instrs_size = sect_size; + break; + } + dump_hex_ascii(ptr, sect_size, 0); + constants[nconsts++] = ptr; + } + } + + printf("\n"); + + if (full_dump) { + printf("#######################################################\n"); + printf("######## VS%d HEADER: (size %d)\n", i, hdr_size); + dump_hex((void *)vs_hdr, hdr_size); + for (j = 0; j < nconsts; j++) { + printf("######## VS%d CONST: (size=%d)\n", i, (int)sizeof(constants[i])); + dump_constant(constants[j]); + dump_hex((char *)constants[j], sizeof(constants[j])); + } + } + + printf("######## VS%d SHADER: (size=%d)\n", i, instrs_size); + if (full_dump) { + dump_hex(instrs, instrs_size); + level = 1; + } else { + dump_short_summary(state, nconsts, constants); + } + + if (!compact) { + if (state->hdr->revision >= 7) { + instrs += ALIGN(instrs_size, 8) - instrs_size; + instrs_size = ALIGN(instrs_size, 8); + } + instrs += 32; + instrs_size -= 32; + } + + disasm_a3xx((uint32_t *)instrs, instrs_size / 4, level+1, SHADER_VERTEX, gpu_id); + dump_raw_shader((uint32_t *)instrs, instrs_size / 4, i, "vo3"); + free(vs_hdr); + } + + /* dump fragment shaders: */ + for (i = 0; i < 1; i++) { + int instrs_size, hdr_size, sect_size, nconsts = 0, level = 0, compact = 0; + uint8_t *fs_hdr; + struct constant *constants[32]; + uint8_t *instrs = NULL; + + fs_hdr = next_sect(state, &hdr_size); + +printf("hdr_size=%d\n", hdr_size); + /* two cases, similar to vertex shader, but magic # is 200 + * (or 208 for newer?).. + */ + int n; + if (state->hdr->revision >= 0xb) + n = 256; + else if (state->hdr->revision >= 8) + n = 208; + else if (state->hdr->revision == 7) + n = 204; + else + n = 200; + + if (hdr_size > n) { + instrs = &fs_hdr[n]; + instrs_size = hdr_size - n; + hdr_size = n; + compact = 1; + } else { + while (1) { + void *ptr = next_sect(state, §_size); + + if ((sect_size != 32) && (sect_size != 44)) { + /* end of constants: */ + instrs = ptr; + instrs_size = sect_size; + break; + } + + dump_hex_ascii(ptr, sect_size, 0); + constants[nconsts++] = ptr; + } + } + + printf("\n"); + + if (full_dump) { + printf("#######################################################\n"); + printf("######## FS%d HEADER: (size %d)\n", i, hdr_size); + dump_hex((void *)fs_hdr, hdr_size); + for (j = 0; j < nconsts; j++) { + printf("######## FS%d CONST: (size=%d)\n", i, (int)sizeof(constants[i])); + dump_constant(constants[j]); + dump_hex((char *)constants[j], sizeof(constants[j])); + } + } + + printf("######## FS%d SHADER: (size=%d)\n", i, instrs_size); + if (full_dump) { + dump_hex(instrs, instrs_size); + level = 1; + } else { + dump_short_summary(state, nconsts, constants); + } + + if (!compact) { + if (state->hdr->revision >= 7) { + instrs += 44; + instrs_size -= 44; + } else { + instrs += 32; + instrs_size -= 32; + } + } + disasm_a3xx((uint32_t *)instrs, instrs_size / 4, level+1, stdout, gpu_id); + dump_raw_shader((uint32_t *)instrs, instrs_size / 4, i, "fo3"); + free(fs_hdr); + } +} + +static void dump_program(struct state *state) +{ + int i, sect_size; + uint8_t *ptr; + + state->hdr = next_sect(state, §_size); + + printf("######## HEADER: (size %d)\n", sect_size); + printf("\tsize: %d\n", state->hdr->size); + printf("\trevision: %d\n", state->hdr->revision); + printf("\tattributes: %d\n", state->hdr->num_attribs); + printf("\tuniforms: %d\n", state->hdr->num_uniforms); + printf("\tsamplers: %d\n", state->hdr->num_samplers); + printf("\tvaryings: %d\n", state->hdr->num_varyings); + printf("\tuniform blocks: %d\n", state->hdr->num_uniformblocks); + if (full_dump) + dump_hex((void *)state->hdr, sect_size); + printf("\n"); + + /* there seems to be two 0xba5eba11's at the end of the header, possibly + * with some other stuff between them: + */ + ptr = next_sect(state, §_size); + if (full_dump) { + dump_hex_ascii(ptr, sect_size, 0); + } + + for (i = 0; (i < state->hdr->num_attribs) && (state->sz > 0); i++) { + state->attribs[i] = next_sect(state, §_size); + + /* hmm, for a3xx (or maybe just newer driver version), we have some + * extra sections that don't seem useful, so skip these: + */ + while (!valid_type(state->attribs[i]->type_info)) { + dump_hex_ascii(state->attribs[i], sect_size, 0); + state->attribs[i] = next_sect(state, §_size); + } + + clean_ascii(state->attribs[i]->name, sect_size - 28); + if (full_dump) { + printf("######## ATTRIBUTE: (size %d)\n", sect_size); + dump_attribute(state->attribs[i]); + dump_hex((char *)state->attribs[i], sect_size); + } + } + + for (i = 0; (i < state->hdr->num_uniforms) && (state->sz > 0); i++) { + state->uniforms[i] = next_sect(state, §_size); + + /* hmm, for a3xx (or maybe just newer driver version), we have some + * extra sections that don't seem useful, so skip these: + */ + while (!valid_type(state->uniforms[i]->type_info)) { + dump_hex_ascii(state->uniforms[i], sect_size, 0); + state->uniforms[i] = next_sect(state, §_size); + } + + if (is_uniform_v2(state->uniforms[i])) { + clean_ascii(state->uniforms[i]->v2.name, sect_size - 53); + } else { + clean_ascii(state->uniforms[i]->v1.name, sect_size - 41); + } + + if (full_dump) { + printf("######## UNIFORM: (size %d)\n", sect_size); + dump_uniform(state->uniforms[i]); + dump_hex((char *)state->uniforms[i], sect_size); + } + } + + for (i = 0; (i < state->hdr->num_samplers) && (state->sz > 0); i++) { + state->samplers[i] = next_sect(state, §_size); + + /* hmm, for a3xx (or maybe just newer driver version), we have some + * extra sections that don't seem useful, so skip these: + */ + while (!valid_type(state->samplers[i]->type_info)) { + dump_hex_ascii(state->samplers[i], sect_size, 0); + state->samplers[i] = next_sect(state, §_size); + } + + clean_ascii(state->samplers[i]->name, sect_size - 33); + if (full_dump) { + printf("######## SAMPLER: (size %d)\n", sect_size); + dump_sampler(state->samplers[i]); + dump_hex((char *)state->samplers[i], sect_size); + } + + } + + // These sections show up after all of the other sampler sections + // Loops through them all since we don't deal with them + if (state->hdr->revision >= 7) { + for (i = 0; (i < state->hdr->num_samplers) && (state->sz > 0); i++) { + ptr = next_sect(state, §_size); + dump_hex_ascii(ptr, sect_size, 0); + } + } + + + for (i = 0; (i < state->hdr->num_varyings) && (state->sz > 0); i++) { + state->varyings[i] = next_sect(state, §_size); + + /* hmm, for a3xx (or maybe just newer driver version), we have some + * extra sections that don't seem useful, so skip these: + */ + while (!valid_type(state->varyings[i]->type_info)) { + dump_hex_ascii(state->varyings[i], sect_size, 0); + state->varyings[i] = next_sect(state, §_size); + } + + clean_ascii(state->varyings[i]->name, sect_size - 16); + if (full_dump) { + printf("######## VARYING: (size %d)\n", sect_size); + dump_varying(state->varyings[i]); + dump_hex((char *)state->varyings[i], sect_size); + } + } + + /* show up again for revision >= 14?? */ + if (state->hdr->revision >= 14) { + for (i = 0; (i < state->hdr->num_varyings) && (state->sz > 0); i++) { + ptr = next_sect(state, §_size); + dump_hex_ascii(ptr, sect_size, 0); + } + } + + /* not sure exactly which revision started this, but seems at least + * rev7 and rev8 implicitly include a new section for gl_FragColor: + */ + if (state->hdr->revision >= 7) { + /* I guess only one? */ + state->outputs[0] = next_sect(state, §_size); + + clean_ascii(state->outputs[0]->name, sect_size - 32); + if (full_dump) { + printf("######## OUTPUT: (size %d)\n", sect_size); + dump_output(state->outputs[0]); + dump_hex((char *)state->outputs[0], sect_size); + } + } + + for (i = 0; (i < state->hdr->num_uniformblocks) && (state->sz > 0); i++) { + state->uniformblocks[i].header = next_sect(state, §_size); + + clean_ascii(state->uniformblocks[i].header->name, sect_size - 40); + if (full_dump) { + printf("######## UNIFORM BLOCK: (size %d)\n", sect_size); + dump_uniformblock(state->uniformblocks[i].header); + dump_hex((char *)state->uniformblocks[i].header, sect_size); + } + + /* + * OpenGL ES 3.0 spec mandates a minimum amount of 16K members supported + * a330 supports a minimum of 65K + */ + state->uniformblocks[i].members = malloc(state->uniformblocks[i].header->num_members * sizeof(void*)); + + int member = 0; + for (member = 0; (member < state->uniformblocks[i].header->num_members) && (state->sz > 0); member++) { + state->uniformblocks[i].members[member] = next_sect(state, §_size); + + clean_ascii(state->uniformblocks[i].members[member]->name, sect_size - 56); + if (full_dump) { + printf("######## UNIFORM BLOCK MEMBER: (size %d)\n", sect_size); + dump_uniformblockmember(state->uniformblocks[i].members[member]); + dump_hex((char *)state->uniformblocks[i].members[member], sect_size); + } + } + /* + * Qualcomm saves the UBO members twice for each UBO + * Don't ask me why + */ + for (member = 0; (member < state->uniformblocks[i].header->num_members) && (state->sz > 0); member++) { + state->uniformblocks[i].members[member] = next_sect(state, §_size); + + clean_ascii(state->uniformblocks[i].members[member]->name, sect_size - 56); + if (full_dump) { + printf("######## UNIFORM BLOCK MEMBER2: (size %d)\n", sect_size); + dump_uniformblockmember(state->uniformblocks[i].members[member]); + dump_hex((char *)state->uniformblocks[i].members[member], sect_size); + } + } + } + + if (gpu_id >= 300) { + dump_shaders_a3xx(state); + } else { + dump_shaders_a2xx(state); + } + + if (!full_dump) + return; + + /* dump ascii version of shader program: */ + ptr = next_sect(state, §_size); + printf("\n#######################################################\n"); + printf("######## SHADER SRC: (size=%d)\n", sect_size); + dump_ascii(ptr, sect_size); + free(ptr); + + /* dump remaining sections (there shouldn't be any): */ + while (state->sz > 0) { + ptr = next_sect(state, §_size); + printf("######## section (size=%d)\n", sect_size); + printf("as hex:\n"); + dump_hex(ptr, sect_size); + printf("as float:\n"); + dump_float(ptr, sect_size); + printf("as ascii:\n"); + dump_ascii(ptr, sect_size); + free(ptr); + } + /* cleanup the uniform buffer members we allocated */ + if (state->hdr->num_uniformblocks > 0) + free (state->uniformblocks[i].members); +} + +int main(int argc, char **argv) +{ + enum rd_sect_type type = RD_NONE; + enum debug_t debug = 0; + void *buf = NULL; + int sz; + struct io *io; + int raw_program = 0; + + /* lame argument parsing: */ + + while (1) { + if ((argc > 1) && !strcmp(argv[1], "--verbose")) { + debug |= PRINT_RAW | PRINT_VERBOSE; + argv++; + argc--; + continue; + } + if ((argc > 1) && !strcmp(argv[1], "--expand")) { + debug |= EXPAND_REPEAT; + argv++; + argc--; + continue; + } + if ((argc > 1) && !strcmp(argv[1], "--short")) { + /* only short dump, original shader, symbol table, and disassembly */ + full_dump = 0; + argv++; + argc--; + continue; + } + if ((argc > 1) && !strcmp(argv[1], "--dump-shaders")) { + dump_shaders = 1; + argv++; + argc--; + continue; + } + if ((argc > 1) && !strcmp(argv[1], "--raw")) { + raw_program = 1; + argv++; + argc--; + continue; + } + if ((argc > 1) && !strcmp(argv[1], "--gpu300")) { + gpu_id = 320; + argv++; + argc--; + continue; + } + break; + } + + if (argc != 2) { + fprintf(stderr, "usage: pgmdump [--verbose] [--short] [--dump-shaders] testlog.rd\n"); + return -1; + } + + disasm_set_debug(debug); + + infile = argv[1]; + + io = io_open(infile); + if (!io) { + fprintf(stderr, "could not open: %s\n", infile); + return -1; + } + + if (raw_program) + { + io_readn(io, &sz, 4); + free(buf); + + /* note: allow hex dumps to go a bit past the end of the buffer.. + * might see some garbage, but better than missing the last few bytes.. + */ + buf = calloc(1, sz + 3); + io_readn(io, buf + 4, sz); + (*(int*)buf) = sz; + + struct state state = { + .buf = buf, + .sz = sz, + }; + printf("############################################################\n"); + printf("program:\n"); + dump_program(&state); + printf("############################################################\n"); + return 0; + } + + /* figure out what sort of input we are dealing with: */ + if (!(check_extension(infile, ".rd") || check_extension(infile, ".rd.gz"))) { + enum shader_t shader = ~0; + int ret; + if (check_extension(infile, ".vo")) { + shader = SHADER_VERTEX; + } else if (check_extension(infile, ".fo")) { + shader = SHADER_FRAGMENT; + } else if (check_extension(infile, ".vo3")) { + } else if (check_extension(infile, ".fo3")) { + } else if (check_extension(infile, ".co3")) { + } else { + fprintf(stderr, "invalid input file: %s\n", infile); + return -1; + } + buf = calloc(1, 100 * 1024); + ret = io_readn(io, buf, 100 * 1024); + if (ret < 0) { + fprintf(stderr, "error: %m"); + return -1; + } + if (shader != ~0) { + return disasm_a2xx(buf, ret/4, 0, shader); + } else { + /* disassembly does not depend on shader stage on a3xx+: */ + return disasm_a3xx(buf, ret/4, 0, stdout, gpu_id); + } + } + + while ((io_readn(io, &type, sizeof(type)) > 0) && (io_readn(io, &sz, 4) > 0)) { + free(buf); + + /* note: allow hex dumps to go a bit past the end of the buffer.. + * might see some garbage, but better than missing the last few bytes.. + */ + buf = calloc(1, sz + 3); + io_readn(io, buf, sz); + + switch(type) { + case RD_TEST: + if (full_dump) + printf("test: %s\n", (char *)buf); + break; + case RD_VERT_SHADER: + printf("vertex shader:\n%s\n", (char *)buf); + break; + case RD_FRAG_SHADER: + printf("fragment shader:\n%s\n", (char *)buf); + break; + case RD_PROGRAM: { + struct state state = { + .buf = buf, + .sz = sz, + }; + printf("############################################################\n"); + printf("program:\n"); + dump_program(&state); + printf("############################################################\n"); + break; + } + case RD_GPU_ID: + gpu_id = *((unsigned int *)buf); + printf("gpu_id: %d\n", gpu_id); + break; + default: + break; + } + } + + io_close(io); + + return 0; +} + diff --git a/src/freedreno/decode/pgmdump2.c b/src/freedreno/decode/pgmdump2.c new file mode 100644 index 00000000000..7410bcd3179 --- /dev/null +++ b/src/freedreno/decode/pgmdump2.c @@ -0,0 +1,585 @@ +/* + * Copyright (c) 2018 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * Decoder for "new" GL_OES_get_program_binary format. + * + * Overall structure is: + * + * - header at top, contains, amongst other things, offsets of + * per shader stage sections. + * - per shader stage section (shader_info) starts with a header, + * followed by a variably length list of descriptors. Each + * descriptor has a type/count/size plus offset from the start + * of shader_info section where the data is found + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "redump.h" +#include "disasm.h" +#include "io.h" +#include "util.h" + +const char *infile; +static int dump_full = 0; +static int dump_offsets = 0; +static int gpu_id = 320; +static int shaderdb = 0; /* output shaderdb style traces to stderr */ + +struct state { + char *buf; + int sz; + int lvl; + + /* current shader_info section, some offsets calculated relative to + * this, rather than relative to start of buffer. + */ + void *shader; + + /* size of each entry within a shader_descriptor_blk: */ + int desc_size; + + const char *shader_type; + int full_regs; + int half_regs; +}; + +#define PACKED __attribute__((__packed__)) + +#define OFF(field) do { \ + if (dump_offsets) \ + printf("%08x: ", (uint32_t)((char *)&field - state->buf));\ + } while (0) + +/* decode field as hex */ +#define X(s, field) do { \ + OFF(s->field); \ + printf("%s%12s:\t0x%x\n", tab(state->lvl), #field, s->field); \ + } while (0) + +/* decode field as digit */ +#define D(s, field) do { \ + OFF(s->field); \ + printf("%s%12s:\t%u\n", tab(state->lvl), #field, s->field); \ + } while (0) + +/* decode field as float/hex */ +#define F(s, field) do { \ + OFF(s->field); \ + printf("%s%12s:\t%f (0x%0x)\n", tab(state->lvl), #field, \ + d2f(s->field), s->field); \ + } while (0) + +/* decode field as register: (type is 'r' or 'c') */ +#define R(s, field, type) do { \ + OFF(s->field); \ + printf("%s%12s:\t%c%u.%c\n", tab(state->lvl), #field, type, \ + (s->field >> 2), "xyzw"[s->field & 0x3]); \ + } while (0) + +/* decode inline string (presumably null terminated?) */ +#define S(s, field) do { \ + OFF(s->field); \ + printf("%s%12s:\t%s\n", tab(state->lvl), #field, s->field); \ + } while (0) + +/* decode string-table string */ +#define T(s, field) TODO + +/* decode field as unknown */ +#define U(s, start, end) \ + dump_unknown(state, s->unk_ ## start ## _ ## end, 0x ## start, (4 + 0x ## end - 0x ## start) / 4) + +/* decode field as offset to other section */ +#define O(s, field, type) do { \ + X(s, field); \ + assert(s->field < state->sz); \ + void *_p = &state->buf[s->field]; \ + state->lvl++; \ + decode_ ## type (state, _p); \ + state->lvl--; \ + } while (0) + +struct shader_info; +static void decode_shader_info(struct state *state, struct shader_info *info); + +static void dump_unknown(struct state *state, void *buf, unsigned start, unsigned n) +{ + uint32_t *ptr = buf; + uint8_t *ascii = buf; + + for (unsigned i = 0; i < n; i++) { + uint32_t d = ptr[i]; + + if (dump_offsets) + printf("%08x:", (uint32_t)((char *)&ptr[i] - state->buf)); + + printf("%s %04x:\t%08x", tab(state->lvl), start + i * 4, d); + + printf("\t|"); + for (unsigned j = 0; j < 4; j++) { + uint8_t c = *(ascii++); + printf("%c", (isascii(c) && !iscntrl(c)) ? c : '.'); + } + printf("|\t%f", d2f(d)); + + /* TODO maybe scan for first non-null and non-ascii char starting from + * end of shader binary to (roughly) establish the start of the string + * table.. that would be a bit better filter for deciding if something + * might be a pointer into the string table. Also, the previous char + * to what it points to should probably be null. + */ + if ((d < state->sz) && + isascii(state->buf[d]) && + (strlen(&state->buf[d]) > 2) && + isascii(state->buf[d+1])) + printf("\t<== %s", &state->buf[d]); + + printf("\n"); + } +} + +struct PACKED header { + uint32_t version; /* I guess, always b10bcace ? */ + uint32_t unk_0004_0014[5]; + uint32_t size; + uint32_t size2; /* just to be sure? */ + uint32_t unk_0020_0020[1]; + uint32_t chksum; /* I guess? Small changes seem to result in big diffs here */ + uint32_t unk_0028_0050[11]; + uint32_t fs_info; /* offset of FS shader_info section */ + uint32_t unk_0058_0090[15]; + uint32_t vs_info; /* offset of VS shader_info section */ + uint32_t unk_0098_00b0[7]; + uint32_t vs_info2; /* offset of VS shader_info section (again?) */ + uint32_t unk_00b8_0110[23]; + uint32_t bs_info; /* offset of binning shader_info section */ +}; + +static void decode_header(struct state *state, struct header *hdr) +{ + X(hdr, version); + U(hdr, 0004, 0014); + X(hdr, size); + X(hdr, size2); + U(hdr, 0020, 0020); + X(hdr, chksum); + U(hdr, 0028, 0050); + state->shader_type = "FRAG"; + O(hdr, fs_info, shader_info); + U(hdr, 0058, 0090); + state->shader_type = "VERT"; + O(hdr, vs_info, shader_info); + U(hdr, 0098, 00b0); + assert(hdr->vs_info == hdr->vs_info2); /* not sure what this if it is ever different */ + X(hdr, vs_info2); + U(hdr, 00b8, 0110); + state->shader_type = "BVERT"; + O(hdr, bs_info, shader_info); + + /* not sure how much of the rest of contents before start of fs_info + * is the header, vs other things.. just dump it all as unknown for + * now: + */ + dump_unknown(state, (void *)hdr + sizeof(*hdr), + sizeof(*hdr), (hdr->fs_info - sizeof(*hdr)) / 4); +} + +struct PACKED shader_entry_point { + /* entry point name, ie. "main" of TBD length, followed by unknown */ + char name[8]; +}; + +static void decode_shader_entry_point(struct state *state, + struct shader_entry_point *e) +{ + S(e, name); +} + +struct PACKED shader_config { + uint32_t unk_0000_0008[3]; + uint32_t full_regs; + uint32_t half_regs; +}; + +static void decode_shader_config(struct state *state, struct shader_config *cfg) +{ + U(cfg, 0000, 0008); + D(cfg, full_regs); + D(cfg, half_regs); + + state->full_regs = cfg->full_regs; + state->half_regs = cfg->half_regs; + + /* dump reset of unknown (size differs btwn versions) */ + dump_unknown(state, (void *)cfg + sizeof(*cfg), sizeof(*cfg), + (state->desc_size - sizeof(*cfg))/4); +} + +struct PACKED shader_io_block { + /* name of TBD length followed by unknown.. 42 dwords total */ + char name[20]; + uint32_t unk_0014_00a4[37]; +}; + +static void decode_shader_io_block(struct state *state, + struct shader_io_block *io) +{ + S(io, name); + U(io, 0014, 00a4); +} + +struct PACKED shader_constant_block { + uint32_t value; + uint32_t unk_0004_000c[3]; + uint32_t regid; + uint32_t unk_0014_0024[5]; +}; + +static void decode_shader_constant_block(struct state *state, + struct shader_constant_block *c) +{ + F(c, value); + U(c, 0004, 000c); + R(c, regid, 'c'); + U(c, 0014, 0024); +} + +enum { + ENTRY_POINT = 0, /* shader_entry_point */ + SHADER_CONFIG = 1, /* XXX placeholder name */ + SHADER_INPUT = 2, /* shader_io_block */ + SHADER_OUTPUT = 3, /* shader_io_block */ + CONSTANTS = 6, /* shader_constant_block */ + INTERNAL = 8, /* internal input, like bary.f coord */ + SHADER = 10, +} shader_info_block_type; + +/* Refers to location of some type of records, with an offset relative to + * start of shader_info block. + */ +struct PACKED shader_descriptor_block { + uint32_t type; /* block type */ + uint32_t offset; /* offset (relative to start of shader_info block) */ + uint32_t size; /* size in bytes */ + uint32_t count; /* number of records */ + uint32_t unk_0010_0010[1]; +}; + +static void decode_shader_descriptor_block(struct state *state, + struct shader_descriptor_block *blk) +{ + D(blk, type); + X(blk, offset); + D(blk, size); + D(blk, count); + U(blk, 0010, 0010); + + /* offset relative to current shader block: */ + void *ptr = state->shader + blk->offset; + + if (blk->count == 0) { + assert(blk->size == 0); + } else { + assert((blk->size % blk->count) == 0); + } + + state->desc_size = blk->size / blk->count; + state->lvl++; + for (unsigned i = 0; i < blk->count; i++) { + switch (blk->type) { + case ENTRY_POINT: + printf("%sentry point %u:\n", tab(state->lvl-1), i); + decode_shader_entry_point(state, ptr); + break; + case SHADER_CONFIG: + printf("%sconfig %u:\n", tab(state->lvl-1), i); + decode_shader_config(state, ptr); + break; + case SHADER_INPUT: + printf("%sinput %u:\n", tab(state->lvl-1), i); + decode_shader_io_block(state, ptr); + break; + case SHADER_OUTPUT: + printf("%soutput %u:\n", tab(state->lvl-1), i); + decode_shader_io_block(state, ptr); + break; + case INTERNAL: + printf("%sinternal input %u:\n", tab(state->lvl-1), i); + decode_shader_io_block(state, ptr); + break; + case CONSTANTS: + printf("%sconstant %u:\n", tab(state->lvl-1), i); + decode_shader_constant_block(state, ptr); + break; + case SHADER: { + struct shader_stats stats; + printf("%sshader %u:\n", tab(state->lvl-1), i); + disasm_a3xx_stat(ptr, blk->size/4, state->lvl, stdout, gpu_id, &stats); + if (shaderdb) { + unsigned dwords = 2 * stats.instlen; + + if (gpu_id >= 400) { + dwords = ALIGN(dwords, 16 * 2); + } else { + dwords = ALIGN(dwords, 4 * 2); + } + + unsigned half_regs = state->half_regs; + unsigned full_regs = state->full_regs; + + /* On a6xx w/ merged/conflicting half and full regs, the + * full_regs footprint will be max of full_regs and half + * of half_regs.. we only care about which value is higher. + */ + if (gpu_id >= 600) { + /* footprint of half_regs in units of full_regs: */ + unsigned half_full = (half_regs + 1) / 2; + if (half_full > full_regs) + full_regs = half_full; + half_regs = 0; + } + + fprintf(stderr, + "%s shader: %u inst, %u nops, %u non-nops, %u dwords, " + "%u half, %u full, %u constlen, " + "%u (ss), %u (sy), %d max_sun, %d loops\n", + state->shader_type, stats.instructions, + stats.nops, stats.instructions - stats.nops, + dwords, half_regs, full_regs, + stats.constlen, stats.ss, stats.sy, + 0, 0); /* max_sun or loops not possible */ + } + /* this is a special case in a way, blk->count is # of + * instructions but disasm_a3xx() decodes all instructions, + * so just bail. + */ + i = blk->count; + break; + } + default: + dump_unknown(state, ptr, 0, state->desc_size/4); + break; + } + ptr += state->desc_size; + } + state->lvl--; +} + +/* there looks like one of these per shader, followed by "main" and + * some more info, and then the shader itself. + */ +struct PACKED shader_info { + uint32_t unk_0000_0010[5]; + uint32_t desc_off; /* offset to first descriptor block */ + uint32_t num_blocks; +}; + +static void decode_shader_info(struct state *state, struct shader_info *info) +{ + assert((info->desc_off % 4) == 0); + + U(info, 0000, 0010); + X(info, desc_off); + D(info, num_blocks); + + dump_unknown(state, &info[1], 0, (info->desc_off - sizeof(*info))/4); + + state->shader = info; + + struct shader_descriptor_block *blocks = ((void *)info) + info->desc_off; + for (unsigned i = 0; i < info->num_blocks; i++) { + printf("%sdescriptor %u:\n", tab(state->lvl), i); + state->lvl++; + decode_shader_descriptor_block(state, &blocks[i]); + state->lvl--; + } +} + +static void dump_program(struct state *state) +{ + struct header *hdr = (void *)state->buf; + + if (dump_full) + dump_unknown(state, state->buf, 0, state->sz/4); + + decode_header(state, hdr); +} + +int main(int argc, char **argv) +{ + enum rd_sect_type type = RD_NONE; + enum debug_t debug = 0; + void *buf = NULL; + int sz; + struct io *io; + int raw_program = 0; + + /* lame argument parsing: */ + + while (1) { + if ((argc > 1) && !strcmp(argv[1], "--verbose")) { + debug |= PRINT_RAW | PRINT_VERBOSE; + argv++; + argc--; + continue; + } + if ((argc > 1) && !strcmp(argv[1], "--expand")) { + debug |= EXPAND_REPEAT; + argv++; + argc--; + continue; + } + if ((argc > 1) && !strcmp(argv[1], "--full")) { + /* only short dump, original shader, symbol table, and disassembly */ + dump_full = 1; + argv++; + argc--; + continue; + } + if ((argc > 1) && !strcmp(argv[1], "--dump-offsets")) { + dump_offsets = 1; + argv++; + argc--; + continue; + } + if ((argc > 1) && !strcmp(argv[1], "--raw")) { + raw_program = 1; + argv++; + argc--; + continue; + } + if ((argc > 1) && !strcmp(argv[1], "--shaderdb")) { + shaderdb = 1; + argv++; + argc--; + continue; + } + break; + } + + if (argc != 2) { + fprintf(stderr, "usage: pgmdump2 [--verbose] [--expand] [--full] [--dump-offsets] [--raw] [--shaderdb] testlog.rd\n"); + return -1; + } + + disasm_set_debug(debug); + + infile = argv[1]; + + io = io_open(infile); + if (!io) { + fprintf(stderr, "could not open: %s\n", infile); + return -1; + } + + if (raw_program) + { + io_readn(io, &sz, 4); + free(buf); + + /* note: allow hex dumps to go a bit past the end of the buffer.. + * might see some garbage, but better than missing the last few bytes.. + */ + buf = calloc(1, sz + 3); + io_readn(io, buf + 4, sz); + (*(int*)buf) = sz; + + struct state state = { + .buf = buf, + .sz = sz, + }; + printf("############################################################\n"); + printf("program:\n"); + dump_program(&state); + printf("############################################################\n"); + return 0; + } + + /* figure out what sort of input we are dealing with: */ + if (!(check_extension(infile, ".rd") || check_extension(infile, ".rd.gz"))) { + int ret; + buf = calloc(1, 100 * 1024); + ret = io_readn(io, buf, 100 * 1024); + if (ret < 0) { + fprintf(stderr, "error: %m"); + return -1; + } + return disasm_a3xx(buf, ret/4, 0, stdout, gpu_id); + } + + while ((io_readn(io, &type, sizeof(type)) > 0) && (io_readn(io, &sz, 4) > 0)) { + free(buf); + + /* note: allow hex dumps to go a bit past the end of the buffer.. + * might see some garbage, but better than missing the last few bytes.. + */ + buf = calloc(1, sz + 3); + io_readn(io, buf, sz); + + switch(type) { + case RD_TEST: + if (dump_full) + printf("test: %s\n", (char *)buf); + break; + case RD_VERT_SHADER: + printf("vertex shader:\n%s\n", (char *)buf); + break; + case RD_FRAG_SHADER: + printf("fragment shader:\n%s\n", (char *)buf); + break; + case RD_PROGRAM: { + struct state state = { + .buf = buf, + .sz = sz, + }; + printf("############################################################\n"); + printf("program:\n"); + dump_program(&state); + printf("############################################################\n"); + break; + } + case RD_GPU_ID: + gpu_id = *((unsigned int *)buf); + printf("gpu_id: %d\n", gpu_id); + break; + default: + break; + } + } + + io_close(io); + + return 0; +} diff --git a/src/freedreno/decode/redump.h b/src/freedreno/decode/redump.h new file mode 100644 index 00000000000..c77344e69c1 --- /dev/null +++ b/src/freedreno/decode/redump.h @@ -0,0 +1,76 @@ +/* + * Copyright © 2012 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef REDUMP_H_ +#define REDUMP_H_ + +enum rd_sect_type { + RD_NONE, + RD_TEST, /* ascii text */ + RD_CMD, /* ascii text */ + RD_GPUADDR, /* u32 gpuaddr, u32 size */ + RD_CONTEXT, /* raw dump */ + RD_CMDSTREAM, /* raw dump */ + RD_CMDSTREAM_ADDR, /* gpu addr of cmdstream */ + RD_PARAM, /* u32 param_type, u32 param_val, u32 bitlen */ + RD_FLUSH, /* empty, clear previous params */ + RD_PROGRAM, /* shader program, raw dump */ + RD_VERT_SHADER, + RD_FRAG_SHADER, + RD_BUFFER_CONTENTS, + RD_GPU_ID, +}; + +/* RD_PARAM types: */ +enum rd_param_type { + RD_PARAM_SURFACE_WIDTH, + RD_PARAM_SURFACE_HEIGHT, + RD_PARAM_SURFACE_PITCH, + RD_PARAM_COLOR, + RD_PARAM_BLIT_X, + RD_PARAM_BLIT_Y, + RD_PARAM_BLIT_WIDTH, + RD_PARAM_BLIT_HEIGHT, + RD_PARAM_BLIT_X2, /* BLIT_X + BLIT_WIDTH */ + RD_PARAM_BLIT_Y2, /* BLIT_Y + BLIT_WIDTH */ +}; + +void rd_start(const char *name, const char *fmt, ...) __attribute__((weak)); +void rd_end(void) __attribute__((weak)); +void rd_write_section(enum rd_sect_type type, const void *buf, int sz) __attribute__((weak)); + +/* for code that should run with and without libwrap, use the following + * macros which check if the fxns are present before calling + */ +#define RD_START(n,f,...) do { if (rd_start) rd_start(n,f,##__VA_ARGS__); } while (0) +#define RD_END() do { if (rd_end) rd_end(); } while (0) +#define RD_WRITE_SECTION(t,b,s) do { if (rd_write_section) rd_write_section(t,b,s); } while (0) + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +#undef ALIGN +#define ALIGN(v,a) (((v) + (a) - 1) & ~((a) - 1)) + +#define min(a, b) (((a) < (b)) ? (a) : (b)) +#define max(a, b) (((a) > (b)) ? (a) : (b)) + +#endif /* REDUMP_H_ */ diff --git a/src/freedreno/decode/rnnutil.c b/src/freedreno/decode/rnnutil.c new file mode 100644 index 00000000000..78915977de4 --- /dev/null +++ b/src/freedreno/decode/rnnutil.c @@ -0,0 +1,217 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include +#include +#include +#include +#include + +#include "rnnutil.h" + +static struct rnndomain *finddom(struct rnn *rnn, uint32_t regbase) +{ + if (rnndec_checkaddr(rnn->vc, rnn->dom[0], regbase, 0)) + return rnn->dom[0]; + return rnn->dom[1]; +} + +void _rnn_init(struct rnn *rnn, int nocolor) +{ + rnn_init(); + + rnn->db = rnn_newdb(); + rnn->vc_nocolor = rnndec_newcontext(rnn->db); + rnn->vc_nocolor->colors = &envy_null_colors; + if (nocolor) { + rnn->vc = rnn->vc_nocolor; + } else { + rnn->vc = rnndec_newcontext(rnn->db); + rnn->vc->colors = &envy_def_colors; + } +} + +struct rnn *rnn_new(int nocolor) +{ + struct rnn *rnn = calloc(sizeof(*rnn), 1); + + if (!rnn) + return NULL; + + _rnn_init(rnn, nocolor); + + return rnn; +} + +static void init(struct rnn *rnn, char *file, char *domain) +{ + /* prepare rnn stuff for lookup */ + rnn_parsefile(rnn->db, file); + rnn_prepdb(rnn->db); + rnn->dom[0] = rnn_finddomain(rnn->db, domain); + if ((strcmp(domain, "A2XX") == 0) || (strcmp(domain, "A3XX") == 0)) { + rnn->dom[1] = rnn_finddomain(rnn->db, "AXXX"); + } else { + rnn->dom[1] = rnn->dom[0]; + } + if (!rnn->dom[0] && rnn->dom[1]) { + fprintf(stderr, "Could not find domain %s in %s\n", domain, file); + } + rnn->variant = domain; + + rnndec_varadd(rnn->vc, "chip", domain); + if (rnn->vc != rnn->vc_nocolor) + rnndec_varadd(rnn->vc_nocolor, "chip", domain); +} + +void rnn_load_file(struct rnn *rnn, char *file, char *domain) +{ + init(rnn, file, domain); +} + +void rnn_load(struct rnn *rnn, const char *gpuname) +{ + if (strstr(gpuname, "a2")) { + init(rnn, "adreno/a2xx.xml", "A2XX"); + } else if (strstr(gpuname, "a3")) { + init(rnn, "adreno/a3xx.xml", "A3XX"); + } else if (strstr(gpuname, "a4")) { + init(rnn, "adreno/a4xx.xml", "A4XX"); + } else if (strstr(gpuname, "a5")) { + init(rnn, "adreno/a5xx.xml", "A5XX"); + } else if (strstr(gpuname, "a6")) { + init(rnn, "adreno/a6xx.xml", "A6XX"); + } +} + +uint32_t rnn_regbase(struct rnn *rnn, const char *name) +{ + uint32_t regbase = rnndec_decodereg(rnn->vc_nocolor, rnn->dom[0], name); + if (!regbase) + regbase = rnndec_decodereg(rnn->vc_nocolor, rnn->dom[1], name); + return regbase; +} + +const char *rnn_regname(struct rnn *rnn, uint32_t regbase, int color) +{ + static char buf[128]; + struct rnndecaddrinfo *info; + + info = rnndec_decodeaddr(color ? rnn->vc : rnn->vc_nocolor, + finddom(rnn, regbase), regbase, 0); + if (info) { + strcpy(buf, info->name); + free(info->name); + free(info); + return buf; + } + return NULL; +} + +struct rnndecaddrinfo *rnn_reginfo(struct rnn *rnn, uint32_t regbase) +{ + return rnndec_decodeaddr(rnn->vc, finddom(rnn, regbase), regbase, 0); +} + +const char *rnn_enumname(struct rnn *rnn, const char *name, uint32_t val) +{ + struct rnndeccontext *ctx = rnn->vc; + struct rnnenum *en = rnn_findenum(ctx->db, name); + if (en) { + int i; + for (i = 0; i < en->valsnum; i++) { + struct rnnvalue *eval = en->vals[i]; + if (eval->valvalid && eval->value == val && + rnndec_varmatch(ctx, &eval->varinfo)) { + return en->vals[i]->name; + } + } + } + return NULL; +} + +static struct rnndelem *regelem(struct rnndomain *domain, const char *name) +{ + int i; + for (i = 0; i < domain->subelemsnum; i++) { + struct rnndelem *elem = domain->subelems[i]; + if (!strcmp(elem->name, name)) + return elem; + } + return NULL; +} + +/* Lookup rnndelem by name: */ +struct rnndelem *rnn_regelem(struct rnn *rnn, const char *name) +{ + struct rnndelem *elem = regelem(rnn->dom[0], name); + if (elem) + return elem; + return regelem(rnn->dom[1], name); +} + +static struct rnndelem *regoff(struct rnndomain *domain, uint32_t offset) +{ + int i; + for (i = 0; i < domain->subelemsnum; i++) { + struct rnndelem *elem = domain->subelems[i]; + if (elem->offset == offset) + return elem; + } + return NULL; +} + +/* Lookup rnndelem by offset: */ +struct rnndelem *rnn_regoff(struct rnn *rnn, uint32_t offset) +{ + struct rnndelem *elem = regoff(rnn->dom[0], offset); + if (elem) + return elem; + return regoff(rnn->dom[1], offset); +} + +enum rnnttype rnn_decodelem(struct rnn *rnn, struct rnntypeinfo *info, + uint32_t regval, union rnndecval *val) +{ + val->u = regval; + switch (info->type) { + case RNN_TTYPE_INLINE_ENUM: + case RNN_TTYPE_ENUM: + case RNN_TTYPE_HEX: + case RNN_TTYPE_INT: + case RNN_TTYPE_UINT: + case RNN_TTYPE_FLOAT: + case RNN_TTYPE_BOOLEAN: + return info->type; + case RNN_TTYPE_FIXED: + case RNN_TTYPE_UFIXED: + /* TODO */ + default: + return RNN_TTYPE_INVALID; + } +} diff --git a/src/freedreno/decode/rnnutil.h b/src/freedreno/decode/rnnutil.h new file mode 100644 index 00000000000..ea667470a89 --- /dev/null +++ b/src/freedreno/decode/rnnutil.h @@ -0,0 +1,66 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef RNNUTIL_H_ +#define RNNUTIL_H_ + +#include +#include +#include + +#include "rnn.h" +#include "rnndec.h" + +struct rnn { + struct rnndb *db; + struct rnndeccontext *vc, *vc_nocolor; + struct rnndomain *dom[2]; + const char *variant; +}; + +union rnndecval { + uint32_t u; + int32_t i; + float f; +}; + +void _rnn_init(struct rnn *rnn, int nocolor); +struct rnn *rnn_new(int nocolor); +void rnn_load_file(struct rnn *rnn, char *file, char *domain); +void rnn_load(struct rnn *rnn, const char *gpuname); +uint32_t rnn_regbase(struct rnn *rnn, const char *name); +const char *rnn_regname(struct rnn *rnn, uint32_t regbase, int color); +struct rnndecaddrinfo *rnn_reginfo(struct rnn *rnn, uint32_t regbase); +const char *rnn_enumname(struct rnn *rnn, const char *name, uint32_t val); + +struct rnndelem *rnn_regelem(struct rnn *rnn, const char *name); +struct rnndelem *rnn_regoff(struct rnn *rnn, uint32_t offset); +enum rnnttype rnn_decodelem(struct rnn *rnn, struct rnntypeinfo *info, + uint32_t regval, union rnndecval *val); + +#endif /* RNNUTIL_H_ */ diff --git a/src/freedreno/decode/script.c b/src/freedreno/decode/script.c new file mode 100644 index 00000000000..a882dd25af6 --- /dev/null +++ b/src/freedreno/decode/script.c @@ -0,0 +1,775 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#define _GNU_SOURCE +#define LUA_COMPAT_APIINTCASTS + +#include +#include +#include +#include +#include +#include +#include + +#include "script.h" +#include "cffdec.h" +#include "rnnutil.h" + +static lua_State *L; + +#if 0 +#define DBG(fmt, ...) \ + do { printf(" ** %s:%d ** "fmt "\n", \ + __FUNCTION__, __LINE__, ##__VA_ARGS__); } while (0) +#else +#define DBG(fmt, ...) do {} while (0) +#endif + +/* An rnn based decoder, which can either be decoding current register + * values, or domain based decoding of a pm4 packet. + * + */ +struct rnndec { + struct rnn base; + + /* for pm4 packet decoding: */ + uint32_t sizedwords; + uint32_t *dwords; +}; + +static inline struct rnndec *to_rnndec(struct rnn *rnn) +{ + return (struct rnndec *)rnn; +} + +static uint32_t rnn_val(struct rnn *rnn, uint32_t regbase) +{ + struct rnndec *rnndec = to_rnndec(rnn); + + if (!rnndec->sizedwords) { + return reg_val(regbase); + } else if (regbase < rnndec->sizedwords) { + return rnndec->dwords[regbase]; + } else { + // XXX throw an error + return -1; + } +} + +/* does not return */ +static void error(const char *fmt) +{ + fprintf(stderr, fmt, lua_tostring(L, -1)); + exit(1); +} + +/* + * An enum type that can be used as string or number: + */ + +struct rnndenum { + const char *str; + int val; +}; + +static int l_meta_rnn_enum_tostring(lua_State *L) +{ + struct rnndenum *e = lua_touserdata(L, 1); + if (e->str) { + lua_pushstring(L, e->str); + } else { + char buf[32]; + sprintf(buf, "%u", e->val); + lua_pushstring(L, buf); + } + return 1; +} + +/* so, this doesn't actually seem to be implemented yet, but hopefully + * some day lua comes to it's senses + */ +static int l_meta_rnn_enum_tonumber(lua_State *L) +{ + struct rnndenum *e = lua_touserdata(L, 1); + lua_pushinteger(L, e->val); + return 1; +} + +static const struct luaL_Reg l_meta_rnn_enum[] = { + {"__tostring", l_meta_rnn_enum_tostring}, + {"__tonumber", l_meta_rnn_enum_tonumber}, + {NULL, NULL} /* sentinel */ +}; + +static void pushenum(struct lua_State *L, int val, struct rnnenum *info) +{ + struct rnndenum *e = lua_newuserdata(L, sizeof(*e)); + + e->val = val; + e->str = NULL; + + for (int i = 0; i < info->valsnum; i++) { + if (info->vals[i]->valvalid && (info->vals[i]->value == val)) { + e->str = info->vals[i]->name; + break; + } + } + + luaL_newmetatable(L, "rnnmetaenum"); + luaL_setfuncs(L, l_meta_rnn_enum, 0); + lua_pop(L, 1); + + luaL_setmetatable(L, "rnnmetaenum"); +} + +/* Expose rnn decode to script environment as "rnn" library: + */ + +struct rnndoff { + struct rnn *rnn; + struct rnndelem *elem; + uint64_t offset; +}; + +static void push_rnndoff(lua_State *L, struct rnn *rnn, + struct rnndelem *elem, uint64_t offset) +{ + struct rnndoff *rnndoff = lua_newuserdata(L, sizeof(*rnndoff)); + rnndoff->rnn = rnn; + rnndoff->elem = elem; + rnndoff->offset = offset; +} + +static int l_rnn_etype_array(lua_State *L, struct rnn *rnn, + struct rnndelem *elem, uint64_t offset); +static int l_rnn_etype_reg(lua_State *L, struct rnn *rnn, + struct rnndelem *elem, uint64_t offset); + +static int pushdecval(struct lua_State *L, struct rnn *rnn, + uint32_t regval, struct rnntypeinfo *info) +{ + union rnndecval val; + switch (rnn_decodelem(rnn, info, regval, &val)) { + case RNN_TTYPE_ENUM: + case RNN_TTYPE_INLINE_ENUM: + pushenum(L, val.i, info->eenum); + return 1; + case RNN_TTYPE_INT: + lua_pushinteger(L, val.i); + return 1; + case RNN_TTYPE_UINT: + case RNN_TTYPE_HEX: + lua_pushunsigned(L, val.u); + return 1; + case RNN_TTYPE_FLOAT: + lua_pushnumber(L, val.f); + return 1; + case RNN_TTYPE_BOOLEAN: + lua_pushboolean(L, val.u); + return 1; + case RNN_TTYPE_INVALID: + default: + return 0; + } + +} + +static int l_rnn_etype(lua_State *L, struct rnn *rnn, + struct rnndelem *elem, uint64_t offset) +{ + int ret; + uint32_t regval; + DBG("elem=%p (%d), offset=%lu", elem, elem->type, offset); + switch (elem->type) { + case RNN_ETYPE_REG: + /* if a register has no bitfields, just return + * the raw value: + */ + regval = rnn_val(rnn, offset); + regval <<= elem->typeinfo.shr; + ret = pushdecval(L, rnn, regval, &elem->typeinfo); + if (ret) + return ret; + return l_rnn_etype_reg(L, rnn, elem, offset); + case RNN_ETYPE_ARRAY: + return l_rnn_etype_array(L, rnn, elem, offset); + default: + /* hmm.. */ + printf("unhandled type: %d\n", elem->type); + return 0; + } +} + +/* + * Struct Object: + * To implement stuff like 'RB_MRT[n].CONTROL' we need a struct-object + * to represent the current array index (ie. 'RB_MRT[n]') + */ + +static int l_rnn_struct_meta_index(lua_State *L) +{ + struct rnndoff *rnndoff = lua_touserdata(L, 1); + const char *name = lua_tostring(L, 2); + struct rnndelem *elem = rnndoff->elem; + int i; + + for (i = 0; i < elem->subelemsnum; i++) { + struct rnndelem *subelem = elem->subelems[i]; + if (!strcmp(name, subelem->name)) { + return l_rnn_etype(L, rnndoff->rnn, subelem, + rnndoff->offset + subelem->offset); + } + } + + return 0; +} + +static const struct luaL_Reg l_meta_rnn_struct[] = { + {"__index", l_rnn_struct_meta_index}, + {NULL, NULL} /* sentinel */ +}; + +static int l_rnn_etype_struct(lua_State *L, struct rnn *rnn, + struct rnndelem *elem, uint64_t offset) +{ + push_rnndoff(L, rnn, elem, offset); + + luaL_newmetatable(L, "rnnmetastruct"); + luaL_setfuncs(L, l_meta_rnn_struct, 0); + lua_pop(L, 1); + + luaL_setmetatable(L, "rnnmetastruct"); + + return 1; +} + +/* + * Array Object: + */ + +static int l_rnn_array_meta_index(lua_State *L) +{ + struct rnndoff *rnndoff = lua_touserdata(L, 1); + int idx = lua_tointeger(L, 2); + struct rnndelem *elem = rnndoff->elem; + uint64_t offset = rnndoff->offset + (elem->stride * idx); + + DBG("rnndoff=%p, idx=%d, numsubelems=%d", + rnndoff, idx, rnndoff->elem->subelemsnum); + + /* if just a single sub-element, it is directly a register, + * otherwise we need to accumulate the array index while + * we wait for the register name within the array.. + */ + if (elem->subelemsnum == 1) { + return l_rnn_etype(L, rnndoff->rnn, elem->subelems[0], offset); + } else { + return l_rnn_etype_struct(L, rnndoff->rnn, elem, offset); + } + + return 0; +} + +static const struct luaL_Reg l_meta_rnn_array[] = { + {"__index", l_rnn_array_meta_index}, + {NULL, NULL} /* sentinel */ +}; + +static int l_rnn_etype_array(lua_State *L, struct rnn *rnn, + struct rnndelem *elem, uint64_t offset) +{ + push_rnndoff(L, rnn, elem, offset); + + luaL_newmetatable(L, "rnnmetaarray"); + luaL_setfuncs(L, l_meta_rnn_array, 0); + lua_pop(L, 1); + + luaL_setmetatable(L, "rnnmetaarray"); + + return 1; +} + +/* + * Register element: + */ + +static int l_rnn_reg_meta_index(lua_State *L) +{ + struct rnndoff *rnndoff = lua_touserdata(L, 1); + const char *name = lua_tostring(L, 2); + struct rnndelem *elem = rnndoff->elem; + struct rnntypeinfo *info = &elem->typeinfo; + struct rnnbitfield **bitfields; + int bitfieldsnum; + int i; + + switch (info->type) { + case RNN_TTYPE_BITSET: + bitfields = info->ebitset->bitfields; + bitfieldsnum = info->ebitset->bitfieldsnum; + break; + case RNN_TTYPE_INLINE_BITSET: + bitfields = info->bitfields; + bitfieldsnum = info->bitfieldsnum; + break; + default: + printf("invalid register type: %d\n", info->type); + return 0; + } + + for (i = 0; i < bitfieldsnum; i++) { + struct rnnbitfield *bf = bitfields[i]; + if (!strcmp(name, bf->name)) { + uint32_t regval = rnn_val(rnndoff->rnn, rnndoff->offset); + + regval &= typeinfo_mask(&bf->typeinfo); + regval >>= bf->typeinfo.low; + regval <<= bf->typeinfo.shr; + + DBG("name=%s, info=%p, subelemsnum=%d, type=%d, regval=%x", + name, info, rnndoff->elem->subelemsnum, + bf->typeinfo.type, regval); + + return pushdecval(L, rnndoff->rnn, regval, &bf->typeinfo); + } + } + + printf("invalid member: %s\n", name); + return 0; +} + +static int l_rnn_reg_meta_tostring(lua_State *L) +{ + struct rnndoff *rnndoff = lua_touserdata(L, 1); + uint32_t regval = rnn_val(rnndoff->rnn, rnndoff->offset); + struct rnndecaddrinfo *info = rnn_reginfo(rnndoff->rnn, rnndoff->offset); + char *decoded; + if (info && info->typeinfo) { + decoded = rnndec_decodeval(rnndoff->rnn->vc, + info->typeinfo, regval); + } else { + asprintf(&decoded, "%08x", regval); + } + lua_pushstring(L, decoded); + free(decoded); + if (info) { + free(info->name); + free(info); + } + return 1; +} + +static int l_rnn_reg_meta_tonumber(lua_State *L) +{ + struct rnndoff *rnndoff = lua_touserdata(L, 1); + uint32_t regval = rnn_val(rnndoff->rnn, rnndoff->offset); + + regval <<= rnndoff->elem->typeinfo.shr; + + lua_pushnumber(L, regval); + return 1; +} + +static const struct luaL_Reg l_meta_rnn_reg[] = { + {"__index", l_rnn_reg_meta_index}, + {"__tostring", l_rnn_reg_meta_tostring}, + {"__tonumber", l_rnn_reg_meta_tonumber}, + {NULL, NULL} /* sentinel */ +}; + +static int l_rnn_etype_reg(lua_State *L, struct rnn *rnn, + struct rnndelem *elem, uint64_t offset) +{ + push_rnndoff(L, rnn, elem, offset); + + luaL_newmetatable(L, "rnnmetareg"); + luaL_setfuncs(L, l_meta_rnn_reg, 0); + lua_pop(L, 1); + + luaL_setmetatable(L, "rnnmetareg"); + + return 1; +} + +/* + * + */ + +static int l_rnn_meta_index(lua_State *L) +{ + struct rnn *rnn = lua_touserdata(L, 1); + const char *name = lua_tostring(L, 2); + struct rnndelem *elem; + + elem = rnn_regelem(rnn, name); + if (!elem) + return 0; + + return l_rnn_etype(L, rnn, elem, elem->offset); +} + +static int l_rnn_meta_gc(lua_State *L) +{ + // TODO + //struct rnn *rnn = lua_touserdata(L, 1); + //rnn_deinit(rnn); + return 0; +} + +static const struct luaL_Reg l_meta_rnn[] = { + {"__index", l_rnn_meta_index}, + {"__gc", l_rnn_meta_gc}, + {NULL, NULL} /* sentinel */ +}; + +static int l_rnn_init(lua_State *L) +{ + const char *gpuname = lua_tostring(L, 1); + struct rnndec *rnndec = lua_newuserdata(L, sizeof(*rnndec)); + _rnn_init(&rnndec->base, 0); + rnn_load(&rnndec->base, gpuname); + rnndec->sizedwords = 0; + + luaL_newmetatable(L, "rnnmeta"); + luaL_setfuncs(L, l_meta_rnn, 0); + lua_pop(L, 1); + + luaL_setmetatable(L, "rnnmeta"); + + return 1; +} + +static int l_rnn_enumname(lua_State *L) +{ + struct rnn *rnn = lua_touserdata(L, 1); + const char *name = lua_tostring(L, 2); + uint32_t val = (uint32_t)lua_tonumber(L, 3); + lua_pushstring(L, rnn_enumname(rnn, name, val)); + return 1; +} + +static int l_rnn_regname(lua_State *L) +{ + struct rnn *rnn = lua_touserdata(L, 1); + uint32_t regbase = (uint32_t)lua_tonumber(L, 2); + lua_pushstring(L, rnn_regname(rnn, regbase, 1)); + return 1; +} + +static int l_rnn_regval(lua_State *L) +{ + struct rnn *rnn = lua_touserdata(L, 1); + uint32_t regbase = (uint32_t)lua_tonumber(L, 2); + uint32_t regval = (uint32_t)lua_tonumber(L, 3); + struct rnndecaddrinfo *info = rnn_reginfo(rnn, regbase); + char *decoded; + if (info && info->typeinfo) { + decoded = rnndec_decodeval(rnn->vc, info->typeinfo, regval); + } else { + asprintf(&decoded, "%08x", regval); + } + lua_pushstring(L, decoded); + free(decoded); + if (info) { + free(info->name); + free(info); + } + return 1; +} + +static const struct luaL_Reg l_rnn[] = { + {"init", l_rnn_init}, + {"enumname", l_rnn_enumname}, + {"regname", l_rnn_regname}, + {"regval", l_rnn_regval}, + {NULL, NULL} /* sentinel */ +}; + + + +/* Expose the register state to script enviroment as a "regs" library: + */ + +static int l_reg_written(lua_State *L) +{ + uint32_t regbase = (uint32_t)lua_tonumber(L, 1); + lua_pushnumber(L, reg_written(regbase)); + return 1; +} + +static int l_reg_lastval(lua_State *L) +{ + uint32_t regbase = (uint32_t)lua_tonumber(L, 1); + lua_pushnumber(L, reg_lastval(regbase)); + return 1; +} + +static int l_reg_val(lua_State *L) +{ + uint32_t regbase = (uint32_t)lua_tonumber(L, 1); + lua_pushnumber(L, reg_val(regbase)); + return 1; +} + +static const struct luaL_Reg l_regs[] = { + {"written", l_reg_written}, + {"lastval", l_reg_lastval}, + {"val", l_reg_val}, + {NULL, NULL} /* sentinel */ +}; + +/* Expose API to lookup snapshot buffers: + */ + +uint64_t gpubaseaddr(uint64_t gpuaddr); +unsigned hostlen(uint64_t gpuaddr); + +/* given address, return base-address of buffer: */ +static int l_bo_base(lua_State *L) +{ + uint64_t addr = (uint64_t)lua_tonumber(L, 1); + lua_pushnumber(L, gpubaseaddr(addr)); + return 1; +} + +/* given address, return the remaining size of the buffer: */ +static int l_bo_size(lua_State *L) +{ + uint64_t addr = (uint64_t)lua_tonumber(L, 1); + lua_pushnumber(L, hostlen(addr)); + return 1; +} + +static const struct luaL_Reg l_bos[] = { + {"base", l_bo_base}, + {"size", l_bo_size}, + {NULL, NULL} /* sentinel */ +}; + +static void openlib(const char *lib, const luaL_Reg *reg) +{ + lua_newtable(L); + luaL_setfuncs(L, reg, 0); + lua_setglobal(L, lib); +} + +/* called at start to load the script: */ +int script_load(const char *file) +{ + int ret; + + assert(!L); + + L = luaL_newstate(); + luaL_openlibs(L); + openlib("bos", l_bos); + openlib("regs", l_regs); + openlib("rnn", l_rnn); + + ret = luaL_loadfile(L, file); + if (ret) + error("%s\n"); + + ret = lua_pcall(L, 0, LUA_MULTRET, 0); + if (ret) + error("%s\n"); + + return 0; +} + + +/* called at start of each cmdstream file: */ +void script_start_cmdstream(const char *name) +{ + if (!L) + return; + + lua_getglobal(L, "start_cmdstream"); + + /* if no handler just ignore it: */ + if (!lua_isfunction(L, -1)) { + lua_pop(L, 1); + return; + } + + lua_pushstring(L, name); + + /* do the call (1 arguments, 0 result) */ + if (lua_pcall(L, 1, 0, 0) != 0) + error("error running function `f': %s\n"); +} + +/* called at each DRAW_INDX, calls script drawidx fxn to process + * the current state + */ +void script_draw(const char *primtype, uint32_t nindx) +{ + if (!L) + return; + + lua_getglobal(L, "draw"); + + /* if no handler just ignore it: */ + if (!lua_isfunction(L, -1)) { + lua_pop(L, 1); + return; + } + + lua_pushstring(L, primtype); + lua_pushnumber(L, nindx); + + /* do the call (2 arguments, 0 result) */ + if (lua_pcall(L, 2, 0, 0) != 0) + error("error running function `f': %s\n"); +} + + +static int l_rnn_meta_dom_index(lua_State *L) +{ + struct rnn *rnn = lua_touserdata(L, 1); + uint32_t offset = (uint32_t)lua_tonumber(L, 2); + struct rnndelem *elem; + + /* TODO might be nicer if the arg isn't a number, to search the domain + * for matching bitfields.. so that the script could do something like + * 'pkt.WIDTH' insteadl of 'pkt[1].WIDTH', ie. not have to remember the + * offset of the dword containing the bitfield.. + */ + + elem = rnn_regoff(rnn, offset); + if (!elem) + return 0; + + return l_rnn_etype(L, rnn, elem, elem->offset); +} + +/* + * A wrapper object for rnndomain based decoding of an array of dwords + * (ie. for pm4 packet decoding). Mostly re-uses the register-value + * decoding for the individual dwords and bitfields. + */ + +static int l_rnn_meta_dom_gc(lua_State *L) +{ + // TODO + //struct rnn *rnn = lua_touserdata(L, 1); + //rnn_deinit(rnn); + return 0; +} + +static const struct luaL_Reg l_meta_rnn_dom[] = { + {"__index", l_rnn_meta_dom_index}, + {"__gc", l_rnn_meta_dom_gc}, + {NULL, NULL} /* sentinel */ +}; + +/* called to general pm4 packet decoding, such as texture/sampler state + */ +void script_packet(uint32_t *dwords, uint32_t sizedwords, + struct rnn *rnn, struct rnndomain *dom) +{ + if (!L) + return; + + lua_getglobal(L, dom->name); + + /* if no handler for the packet, just ignore it: */ + if (!lua_isfunction(L, -1)) { + lua_pop(L, 1); + return; + } + + struct rnndec *rnndec = lua_newuserdata(L, sizeof(*rnndec)); + + rnndec->base = *rnn; + rnndec->base.dom[0] = dom; + rnndec->base.dom[1] = NULL; + rnndec->dwords = dwords; + rnndec->sizedwords = sizedwords; + + luaL_newmetatable(L, "rnnmetadom"); + luaL_setfuncs(L, l_meta_rnn_dom, 0); + lua_pop(L, 1); + + luaL_setmetatable(L, "rnnmetadom"); + + lua_pushnumber(L, sizedwords); + + if (lua_pcall(L, 2, 0, 0) != 0) + error("error running function `f': %s\n"); +} + +/* helper to call fxn that takes and returns void: */ +static void simple_call(const char *name) +{ + if (!L) + return; + + lua_getglobal(L, name); + + /* if no handler just ignore it: */ + if (!lua_isfunction(L, -1)) { + lua_pop(L, 1); + return; + } + + /* do the call (0 arguments, 0 result) */ + if (lua_pcall(L, 0, 0, 0) != 0) + error("error running function `f': %s\n"); +} + +/* called at end of each cmdstream file: */ +void script_end_cmdstream(void) +{ + simple_call("end_cmdstream"); +} + +/* called at start of submit/issueibcmds: */ +void script_start_submit(void) +{ + simple_call("start_submit"); +} + +/* called at end of submit/issueibcmds: */ +void script_end_submit(void) +{ + simple_call("end_submit"); +} + +/* called after last cmdstream file: */ +void script_finish(void) +{ + if (!L) + return; + + simple_call("finish"); + + lua_close(L); + L = NULL; +} diff --git a/src/freedreno/decode/script.h b/src/freedreno/decode/script.h new file mode 100644 index 00000000000..d14b69ae8f7 --- /dev/null +++ b/src/freedreno/decode/script.h @@ -0,0 +1,76 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef SCRIPT_H_ +#define SCRIPT_H_ + +#include + + +// XXX make script support optional +#define ENABLE_SCRIPTING 1 + +#ifdef ENABLE_SCRIPTING + +/* called at start to load the script: */ +int script_load(const char *file); + +/* called at start of each cmdstream file: */ +void script_start_cmdstream(const char *name); + +/* called at each DRAW_INDX, calls script drawidx fxn to process + * the current state + */ +__attribute__((weak)) +void script_draw(const char *primtype, uint32_t nindx); + +struct rnn; +struct rnndomain; +__attribute__((weak)) +void script_packet(uint32_t *dwords, uint32_t sizedwords, + struct rnn *rnn, struct rnndomain *dom); + +/* maybe at some point it is interesting to add additional script + * hooks for CP_EVENT_WRITE, etc? + */ + +/* called at end of each cmdstream file: */ +void script_end_cmdstream(void); + +void script_start_submit(void); +void script_end_submit(void); + +/* called after last cmdstream file: */ +void script_finish(void); + +#else +// TODO no-op stubs.. +#endif + + +#endif /* SCRIPT_H_ */ diff --git a/src/freedreno/decode/scripts/analyze.lua b/src/freedreno/decode/scripts/analyze.lua new file mode 100644 index 00000000000..27e97ecd4a5 --- /dev/null +++ b/src/freedreno/decode/scripts/analyze.lua @@ -0,0 +1,178 @@ +-- A script that compares a set of equivalent cmdstream captures from +-- various generations, looking for equivalencies between registers. +-- +-- This would be run across a group of similar tests for various +-- generations, for example: +-- +-- cffdump --script scripts/analyze.lua a320/quad-flat-*.rd a420/quad-flat-*.rd +-- +-- This is done by comparing unique register values. Ie. for each +-- generation, find the set of registers that have different values +-- between equivalent draw calls. + +local posix = require "posix" + +io.write("Analyzing Data...\n") + +-- results - table structure: +-- * [gpuname] - gpu +-- * tests +-- * [testname] - current test +-- * draws +-- * [1..n] - the draws +-- * primtype - the primitive type +-- * regs - table of values for draw +-- * [regbase] - regval +-- * regvals - table of unique values across all draws +-- * [regbase] +-- * [regval] - list of test names +-- * [1..n] - testname "." didx +local results = {} + +local test = nil +local gpuname = nil +local testname = nil + + +-- srsly, no sparse table size() op? +function tblsz(tbl) + local n = 0; + for k,v in pairs(tbl) do + n = n + 1 + end + return n +end + + +function start_cmdstream(name) + testname = posix.basename(name) + gpuname = posix.basename(posix.dirname(name)) + --io.write("START: gpuname=" .. gpuname .. ", testname=" .. testname .. "\n"); + local gpu = results[gpuname] + if gpu == nil then + gpu = {["tests"] = {}, ["regvals"] = {}} + results[gpuname] = gpu + end + test = {["draws"] = {}} + gpu["tests"][testname] = test +end + +function draw(primtype, nindx) + -- RECTLIST is only used internally.. we want to ignore it for + -- now, although it could potentially be interesting to track + -- these separately (separating clear/restore/resolve) just to + -- figure out which registers are used for which.. + if primtype == "DI_PT_RECTLIST" then + return + end + local regtbl = {} + local draw = {["primtype"] = primtype, ["regs"] = regtbl} + local didx = tblsz(test["draws"]) + + test["draws"][didx] = draw + + -- populate current regs. For now just consider ones that have + -- been written.. maybe we need to make that configurable in + -- case it filters out too many registers. + for regbase=0,0xffff do + if regs.written(regbase) ~= 0 then + local regval = regs.val(regbase) + + -- track reg vals per draw: + regtbl[regbase] = regval + + -- also track which reg vals appear in which tests: + local uniq_regvals = results[gpuname]["regvals"][regbase] + if uniq_regvals == nil then + uniq_regvals = {} + results[gpuname]["regvals"][regbase] = uniq_regvals; + end + local drawlist = uniq_regvals[regval] + if drawlist == nil then + drawlist = {} + uniq_regvals[regval] = drawlist + end + table.insert(drawlist, testname .. "." .. didx) + end + end + + -- TODO maybe we want to whitelist a few well known regs, for the + -- convenience of the code that runs at the end to analyze the data? + -- TODO also would be useful to somehow capture CP_SET_BIN.. + +end + +function end_cmdstream() + test = nil + gpuname = nil + testname = nil +end + +function print_draws(gpuname, gpu) + io.write(" " .. gpuname .. "\n") + for testname,test in pairs(gpu["tests"]) do + io.write(" " .. testname .. ", draws=" .. #test["draws"] .. "\n") + for didx,draw in pairs(test["draws"]) do + io.write(" " .. didx .. ": " .. draw["primtype"] .. "\n") + end + end +end + +-- sort and concat a list of draw names to form a key which can be +-- compared to other drawlists to check for equality +-- TODO maybe we instead want a scheme that allows for some fuzzyness +-- in the matching?? +function drawlistname(drawlist) + local name = nil + for idx,draw in pairs(drawlist) do + if name == nil then + name = draw + else + name = name .. ":" .. draw + end + end + return name +end + +local rnntbl = {} + +function dumpmatches(name) + for gpuname,gpu in pairs(results) do + local r = rnntbl[gpuname] + if r == nil then + io.write("loading rnn database: \n" .. gpuname) + r = rnn.init(gpuname) + rnntbl[gpuname] = r + end + for regbase,regvals in pairs(gpu["regvals"]) do + for regval,drawlist in pairs(regvals) do + local name2 = drawlistname(drawlist) + if name == name2 then + io.write(string.format(" %s:%s:\t%08x %s\n", + gpuname, rnn.regname(r, regbase), + regval, rnn.regval(r, regbase, regval))) + end + end + end + end +end + +function finish() + -- drawlistnames that we've already dumped: + local dumped = {} + + for gpuname,gpu in pairs(results) do + -- print_draws(gpuname, gpu) + for regbase,regvals in pairs(gpu["regvals"]) do + for regval,drawlist in pairs(regvals) do + local name = drawlistname(drawlist) + if dumped[name] == nil then + io.write("\n" .. name .. ":\n") + dumpmatches(name) + dumped[name] = 1 + end + end + end + end +end + diff --git a/src/freedreno/decode/scripts/parse-submits.lua b/src/freedreno/decode/scripts/parse-submits.lua new file mode 100644 index 00000000000..1d21716503d --- /dev/null +++ b/src/freedreno/decode/scripts/parse-submits.lua @@ -0,0 +1,413 @@ +-- Parse cmdstream dump and analyse blits and batches + +--local posix = require "posix" + +function printf(fmt, ...) + return io.write(string.format(fmt, ...)) +end + +function dbg(fmt, ...) + --printf(fmt, ...) +end + +printf("Analyzing Data...\n") + +local r = rnn.init("a630") + +-- Each submit, all draws will target the same N MRTs: +local mrts = {} +local allmrts = {} -- includes historical render targets +function push_mrt(fmt, w, h, samples, base, flag, gmem) + dbg("MRT: %s %ux%u 0x%x\n", fmt, w, h, base) + + local mrt = {} + mrt.format = fmt + mrt.w = w + mrt.h = h + mrt.samples = samples + mrt.base = base + mrt.flag = flag + mrt.gmem = gmem + + mrts[base] = mrt + allmrts[base] = mrt +end + +-- And each each draw will read from M sources/textures: +local sources = {} +function push_source(fmt, w, h, samples, base, flag) + dbg("SRC: %s %ux%u 0x%x\n", fmt, w, h, base) + + local source = {} + source.format = fmt + source.w = w + source.h = h + source.samples = samples + source.base = base + source.flag = flag + + sources[base] = source +end + +local binw +local binh +local nbins +local blits = 0 +local draws = 0 +local drawmode +local cleared +local restored +local resolved +local nullbatch +local depthtest +local depthwrite +local stenciltest +local stencilwrite + +function start_cmdstream(name) + printf("Parsing %s\n", name) +end + +function reset() + dbg("reset\n") + mrts = {} + sources = {} + draws = 0 + blits = 0 + cleared = {} + restored = {} + resolved = {} + depthtest = false + depthwrite = false + stenciltest = false + stencilwrite = false + drawmode = Nil +end + +function start_submit() + dbg("start_submit\n") + reset() + nullbatch = true +end + +function finish() + dbg("finish\n") + + printf("\n") + + -- TODO we get false-positives for 'NULL BATCH!' because we don't have + -- a really good way to differentiate between submits and cmds. Ie. + -- with growable cmdstream, and a large # of tiles, IB1 can get split + -- across multiple buffers. Since we ignore GMEM draws for window- + -- offset != 0,0, the later cmds will appear as null batches + if draws == 0 and blits == 0 then + if nullbatch then + printf("NULL BATCH!\n"); + end + return + end + + if draws > 0 then + printf("Batch:\n") + printf("-------\n") + printf(" # of draws: %u\n", draws) + printf(" mode: %s\n", drawmode) + if drawmode == "RM6_GMEM" then + printf(" bin size: %ux%u (%u bins)\n", binw, binh, nbins) + end + if depthtest or depthwrite then + printf(" ") + if depthtest then + printf("DEPTHTEST ") + end + if depthwrite then + printf("DEPTHWRITE") + end + printf("\n") + end + if stenciltest or stencilwrite then + printf(" ") + if stenciltest then + printf("STENCILTEST ") + end + if stencilwrite then + printf("STENCILWRITE") + end + printf("\n") + end + else + printf("Blit:\n") + printf("-----\n") + end + + for base,mrt in pairs(mrts) do + printf(" MRT[0x%x:0x%x]:\t%ux%u\t\t%s (%s)", base, mrt.flag, mrt.w, mrt.h, mrt.format, mrt.samples) + if drawmode == "RM6_GMEM" then + if cleared[mrt.gmem] then + printf("\tCLEARED") + end + if restored[mrt.gmem] then + printf("\tRESTORED") + end + if resolved[mrt.gmem] then + printf("\tRESOLVED") + end + else + if cleared[mrt.base] then + printf("\tCLEARED") + end + end + printf("\n") + end + + function print_source(source) + printf(" SRC[0x%x:0x%x]:\t%ux%u\t\t%s (%s)\n", source.base, source.flag, source.w, source.h, source.format, source.samples) + end + + for base,source in pairs(sources) do + -- only show sources that have been previously rendered to, other + -- textures are less interesting. Possibly this should be an + -- option somehow + if draws < 10 then + print_source(source) + elseif allmrts[base] or draws == 0 then + print_source(source) + elseif source.flag and allmrts[source.flag] then + print_source(source) + end + end + reset() +end + +function end_submit() + dbg("end_submit\n") + finish() +end + +-- Track the current mode: +local mode = "" +function CP_SET_MARKER(pkt, size) + mode = pkt[0].MARKER + dbg("mode: %s\n", mode) +end + +function CP_EVENT_WRITE(pkt, size) + if tostring(pkt[0].EVENT) ~= "BLIT" then + return + end + nullbatch = false + local m = tostring(mode) + if m == "RM6_GMEM" then + -- either clear or restore: + if r.RB_BLIT_INFO.CLEAR_MASK == 0 then + restored[r.RB_BLIT_BASE_GMEM] = 1 + else + cleared[r.RB_BLIT_BASE_GMEM] = 1 + end + -- push_mrt() because we could have GMEM + -- passes with only a clear and no draws: + local flag = 0 + local sysmem = 0; + -- try to match up the GMEM addr with the MRT/DEPTH state, + -- to avoid relying on RB_BLIT_DST also getting written: + for n = 0,r.RB_FS_OUTPUT_CNTL1.MRT-1 do + if r.RB_MRT[n].BASE_GMEM == r.RB_BLIT_BASE_GMEM then + sysmem = r.RB_MRT[n].BASE_LO | (r.RB_MRT[n].BASE_HI << 32) + flag = r.RB_MRT_FLAG_BUFFER[n].ADDR_LO | (r.RB_MRT_FLAG_BUFFER[n].ADDR_HI << 32) + break + end + end + if sysmem == 0 and r.RB_BLIT_BASE_GMEM == r.RB_DEPTH_BUFFER_BASE_GMEM then + sysmem = r.RB_DEPTH_BUFFER_BASE_LO | (r.RB_DEPTH_BUFFER_BASE_HI << 32) + flag = r.RB_DEPTH_FLAG_BUFFER_BASE_LO | (r.RB_DEPTH_FLAG_BUFFER_BASE_HI << 32) + + end + --NOTE this can get confused by previous blits: + --if sysmem == 0 then + -- -- fallback: + -- sysmem = r.RB_BLIT_DST_LO | (r.RB_BLIT_DST_HI << 32) + -- flag = r.RB_BLIT_FLAG_DST_LO | (r.RB_BLIT_FLAG_DST_HI << 32) + --end + if not r.RB_BLIT_DST_INFO.FLAGS then + flag = 0 + end + -- TODO maybe just emit RB_BLIT_DST_LO/HI for clears.. otherwise + -- we get confused by stale values in registers.. not sure + -- if this is a problem w/ blob + push_mrt(r.RB_BLIT_DST_INFO.COLOR_FORMAT, + r.RB_BLIT_SCISSOR_BR.X + 1, + r.RB_BLIT_SCISSOR_BR.Y + 1, + r.RB_BLIT_DST_INFO.SAMPLES, + sysmem, + flag, + r.RB_BLIT_BASE_GMEM) + elseif m == "RM6_RESOLVE" then + resolved[r.RB_BLIT_BASE_GMEM] = 1 + else + printf("I am confused!!!\n") + end +end + +function A6XX_TEX_CONST(pkt, size) + push_source(pkt[0].FMT, + pkt[1].WIDTH, pkt[1].HEIGHT, + pkt[0].SAMPLES, + pkt[4].BASE_LO | (pkt[5].BASE_HI << 32), + pkt[7].FLAG_LO | (pkt[8].FLAG_HI << 32)) +end + +function handle_blit() + -- blob sometimes uses CP_BLIT for resolves, so filter those out: + -- TODO it would be nice to not hard-code GMEM addr: + -- TODO I guess the src can be an offset from GMEM addr.. + if r.SP_PS_2D_SRC_LO == 0x100000 and not r.RB_2D_BLIT_CNTL.SOLID_COLOR then + resolved[0] = 1 + return + end + if draws > 0 then + finish() + end + reset() + drawmode = "BLIT" + -- This kinda assumes that we are doing full img blits, which is maybe + -- Not completely legit. We could perhaps instead just track pitch and + -- size/pitch?? Or maybe the size doesn't matter much + push_mrt(r.RB_2D_DST_INFO.COLOR_FORMAT, + r.GRAS_2D_DST_BR.X + 1, + r.GRAS_2D_DST_BR.Y + 1, + "MSAA_ONE", + r.RB_2D_DST_LO | (r.RB_2D_DST_HI << 32), + r.RB_2D_DST_FLAGS_LO | (r.RB_2D_DST_FLAGS_HI << 32), + -1) + if r.RB_2D_BLIT_CNTL.SOLID_COLOR then + dbg("CLEAR=%x\n", r.RB_2D_DST_LO | (r.RB_2D_DST_HI << 32)) + cleared[r.RB_2D_DST_LO | (r.RB_2D_DST_HI << 32)] = 1 + else + push_source(r.SP_2D_SRC_FORMAT.COLOR_FORMAT, + r.GRAS_2D_SRC_BR_X.X + 1, + r.GRAS_2D_SRC_BR_Y.Y + 1, + "MSAA_ONE", + r.SP_PS_2D_SRC_LO | (r.SP_PS_2D_SRC_HI << 32), + r.SP_PS_2D_SRC_FLAGS_LO | (r.SP_PS_2D_SRC_FLAGS_HI << 32)) + end + blits = blits + 1 + finish() +end + +function valid_transition(curmode, newmode) + if curmode == "RM6_BINNING" and newmode == "RM6_GMEM" then + return true + end + if curmode == "RM6_GMEM" and newmode == "RM6_RESOLVE" then + return true + end + return false +end + +function draw(primtype, nindx) + dbg("draw: %s (%s)\n", primtype, mode) + nullbatch = false + if primtype == "BLIT_OP_SCALE" then + handle_blit() + return + elseif primtype == "EVENT:BLIT" then + return + end + + local m = tostring(mode) + + -- detect changes in drawmode which indicate a different + -- pass.. BINNING->GMEM means same pass, but other + -- transitions mean different pass: + if drawmode and m ~= drawmode then + dbg("%s -> %s transition\n", drawmode, m) + if not valid_transition(drawmode, m) then + dbg("invalid transition, new render pass!\n") + finish() + reset() + end + end + + if m ~= "RM6_GMEM" and m ~= "RM6_BYPASS" then + if m == "RM6_BINNING" then + drawmode = m + return + end + if m == "RM6_RESOLVE" and primtype == "EVENT:BLIT" then + return + end + printf("unknown MODE %s for primtype %s\n", m, primtype) + return + end + + -- Only count the first tile for GMEM mode to avoid counting + -- each draw for each tile + if m == "RM6_GMEM" then + if r.RB_WINDOW_OFFSET.X ~= 0 or r.RB_WINDOW_OFFSET.Y ~= 0 then + return + end + end + + drawmode = m + local render_components = {} + render_components[0] = r.RB_RENDER_COMPONENTS.RT0; + render_components[1] = r.RB_RENDER_COMPONENTS.RT1; + render_components[2] = r.RB_RENDER_COMPONENTS.RT2; + render_components[3] = r.RB_RENDER_COMPONENTS.RT3; + render_components[4] = r.RB_RENDER_COMPONENTS.RT4; + render_components[5] = r.RB_RENDER_COMPONENTS.RT5; + render_components[6] = r.RB_RENDER_COMPONENTS.RT6; + render_components[7] = r.RB_RENDER_COMPONENTS.RT7; + for n = 0,r.RB_FS_OUTPUT_CNTL1.MRT-1 do + if render_components[n] ~= 0 then + push_mrt(r.RB_MRT[n].BUF_INFO.COLOR_FORMAT, + r.GRAS_SC_SCREEN_SCISSOR[0].BR.X + 1, + r.GRAS_SC_SCREEN_SCISSOR[0].BR.Y + 1, + r.RB_MSAA_CNTL.SAMPLES, + r.RB_MRT[n].BASE_LO | (r.RB_MRT[n].BASE_HI << 32), + r.RB_MRT_FLAG_BUFFER[n].ADDR_LO | (r.RB_MRT_FLAG_BUFFER[n].ADDR_HI << 32), + r.RB_MRT[n].BASE_GMEM) + end + end + + local depthbase = r.RB_DEPTH_BUFFER_BASE_LO | + (r.RB_DEPTH_BUFFER_BASE_HI << 32) + + if depthbase ~= 0 then + push_mrt(r.RB_DEPTH_BUFFER_INFO.DEPTH_FORMAT, + r.GRAS_SC_SCREEN_SCISSOR[0].BR.X + 1, + r.GRAS_SC_SCREEN_SCISSOR[0].BR.Y + 1, + r.RB_MSAA_CNTL.SAMPLES, + depthbase, + r.RB_DEPTH_FLAG_BUFFER_BASE_LO | (r.RB_DEPTH_FLAG_BUFFER_BASE_HI << 32), + r.RB_DEPTH_BUFFER_BASE_GMEM) + end + + if r.RB_DEPTH_CNTL.Z_WRITE_ENABLE then + depthwrite = true + end + + if r.RB_DEPTH_CNTL.Z_ENABLE then + depthtest = true + end + + -- clearly 0 != false.. :-/ + if r.RB_STENCILWRMASK.WRMASK ~= 0 then + stencilwrite = true + end + + if r.RB_STENCIL_CONTROL.STENCIL_ENABLE then + stenciltest = true + end + + -- TODO should also check for stencil buffer for z32+s8 case + + if m == "RM6_GMEM" then + binw = r.VSC_BIN_SIZE.WIDTH + binh = r.VSC_BIN_SIZE.HEIGHT + nbins = r.VSC_BIN_COUNT.NX * r.VSC_BIN_COUNT.NY + end + + draws = draws + 1 +end + diff --git a/src/freedreno/decode/scripts/sanity-a6xx.lua b/src/freedreno/decode/scripts/sanity-a6xx.lua new file mode 100644 index 00000000000..68e4c73c4f0 --- /dev/null +++ b/src/freedreno/decode/scripts/sanity-a6xx.lua @@ -0,0 +1,76 @@ +-- Parse cmdstream dump and check for common errors +-- 1) Check for overflowing HLSQ_xS_CNTL.CONSTLEN +-- 2) Check for constant uploades that overwrite each other. The +-- range checking is reset on each draw, since it is a valid +-- use-case to do partial constant upload. But if we see two +-- CP_LOAD_STATE* that overwrite the same range of constants +-- within the same draw, that is almost certainly unintentional. +-- +-- TODO add more checks +-- TODO maybe some parts could be shared across +-- different generations + +--local posix = require "posix" + +function printf(fmt, ...) + return io.write(string.format(fmt, ...)) +end + +function dbg(fmt, ...) + --printf(fmt, ...) +end + +stages = { + "SB6_VS_SHADER", + "SB6_HS_SHADER", + "SB6_DS_SHADER", + "SB6_GS_SHADER", + "SB6_FS_SHADER", + "SB6_CS_SHADER", +} + +-- maps shader stage to HLSQ_xS_CNTL register name: +cntl_regs = { + ["SB6_VS_SHADER"] = "HLSQ_VS_CNTL", + ["SB6_HS_SHADER"] = "HLSQ_HS_CNTL", + ["SB6_DS_SHADER"] = "HLSQ_DS_CNTL", + ["SB6_GS_SHADER"] = "HLSQ_GS_CNTL", + ["SB6_FS_SHADER"] = "HLSQ_FS_CNTL", + ["SB6_CS_SHADER"] = "HLSQ_CS_CNTL", +} + +-- initialize constant updated ranges: +-- constranges[stagename] -> table of offsets that have been uploaded +constranges = {} +function reset_constranges() + for i,stage in ipairs(stages) do + constranges[stage] = {} + end +end + +reset_constranges() + +printf("Checking cmdstream...\n") + +local r = rnn.init("a630") + +function draw(primtype, nindx) + printf("draw!\n") + -- reset ranges of uploaded consts on each draw: + reset_constranges() +end + +function CP_LOAD_STATE6(pkt, size) + if tostring(pkt[0].STATE_TYPE) ~= "ST6_CONSTANTS" then + return + end + dbg("got CP_LOAD_STATE6\n") + stage = tostring(pkt[0].STATE_BLOCK) + max = pkt[0].DST_OFF + pkt[0].NUM_UNIT + cntl_reg = cntl_regs[stage] + dbg("looking for %s.. max=%d vs %d\n", cntl_reg, max, r[cntl_reg].CONSTLEN) + if max > r[cntl_reg].CONSTLEN then + printf("ERROR: invalid max constant offset for stage %s: %d vs %d\n", stage, max, r[cntl_reg].CONSTLEN) + end + +end diff --git a/src/freedreno/decode/scripts/test.lua b/src/freedreno/decode/scripts/test.lua new file mode 100644 index 00000000000..e9d8db2b6ae --- /dev/null +++ b/src/freedreno/decode/scripts/test.lua @@ -0,0 +1,31 @@ +io.write("HELLO WORLD\n") + +r = rnn.init("a630") + +function start_cmdstream(name) + io.write("START: " .. name .. "\n") +end + +function draw(primtype, nindx) + io.write("DRAW: " .. primtype .. ", " .. nindx .. "\n") + -- io.write("GRAS_CL_VPORT_XOFFSET: " .. r.GRAS_CL_VPORT_XOFFSET .. "\n") + io.write("RB_MRT[0].CONTROL.ROP_CODE: " .. r.RB_MRT[0].CONTROL.ROP_CODE .. "\n") + io.write("SP_VS_OUT[0].A_COMPMASK: " .. r.SP_VS_OUT[0].A_COMPMASK .. "\n") + --io.write("RB_DEPTH_CONTROL.Z_ENABLE: " .. tostring(r.RB_DEPTH_CONTROL.Z_ENABLE) .. "\n") + io.write("0x2280: written=" .. regs.written(0x2280) .. ", lastval=" .. regs.lastval(0x2280) .. ", val=" .. regs.val(0x2280) .. "\n") +end + +function A6XX_TEX_CONST(pkt, size) + io.write("\n-------- " .. size .. "\n") + io.write("-------- w=" .. pkt[1].WIDTH .. ", h=" .. pkt[1].HEIGHT .. "\n") + io.write("\n"); +end + +function end_cmdstream() + io.write("END\n") +end + +function finish() + io.write("FINISH\n") +end + diff --git a/src/freedreno/decode/scripts/tex3d-layout.lua b/src/freedreno/decode/scripts/tex3d-layout.lua new file mode 100644 index 00000000000..2d5069f09ac --- /dev/null +++ b/src/freedreno/decode/scripts/tex3d-layout.lua @@ -0,0 +1,137 @@ +-- Parse logs from test-quad-textured-3d.c to exctract layer/level +-- offsets +-- +-- We figure out the offsets from blits, but there may be some +-- unrelated blits. So just save all of them until we find the +-- texture state for the 3d texture. This gives us the base +-- address, and the miplevel #0 width/height/depth. Then work +-- backwards from there finding the blits to the same dst buffer +-- and deducing the miplevel from the minified dimensions + +local posix = require "posix" + +io.write("Analyzing Data...\n") + +local allblits = {} +local nallblits = 0 +local r = rnn.init("a630") + +function minify(val, lvls) + val = val >> lvls + if val < 1 then + return 1 + end + return val +end + +function printf(fmt, ...) + return io.write(string.format(fmt, ...)) +end + +function start_cmdstream(name) + io.write("Parsing " .. name .. "\n") + allblits = {} + nallblits = 0 +end + +function draw(primtype, nindx) + if primtype ~= "BLIT_OP_SCALE" then + return + end + + -- Just in case, filter out anything that isn't starting + -- at 0,0 + if r.GRAS_2D_DST_TL.X ~= 0 or r.GRAS_2D_DST_TL.Y ~= 0 then + return + end + + local blit = {} + + blit.width = r.GRAS_2D_DST_BR.X + 1 + blit.height = r.GRAS_2D_DST_BR.Y + 1 + blit.pitch = r.RB_2D_DST_SIZE.PITCH + blit.addr = r.RB_2D_DST_LO | (r.RB_2D_DST_HI << 32) + blit.base = bos.base(blit.addr) + blit.endaddr = 0 -- filled in later + --printf("Found blit: 0x%x (0x%x)\n", blit.addr, blit.base) + + allblits[nallblits] = blit + nallblits = nallblits + 1 +end + +function A6XX_TEX_CONST(pkt, size) + -- ignore any texture state w/ DEPTH=1, these aren't the 3d tex state we + -- are looking for + if pkt[5].DEPTH <= 1 then + return + end + + local base = pkt[4].BASE_LO | (pkt[5].BASE_HI << 32) + local width0 = pkt[1].WIDTH + local height0 = pkt[1].HEIGHT + local depth0 = pkt[5].DEPTH + + printf("Found texture state: %ux%ux%u (MIN_LAYERSZ=0x%x)\n", + width0, height0, depth0, pkt[3].MIN_LAYERSZ) + + -- Note that in some case the texture has some extra page or so + -- at the beginning: + local basebase = bos.base(base) + printf("base: 0x%x (0x%x)\n", base, basebase) + + -- see if we can find the associated blits.. The blob always seems to + -- start from the lower (larger) mipmap levels and layers, so we don't + -- need to sort by dst address. Also, while we are at it, fill in the + -- end-addr (at least for everything but the last blit) + local blits = {} + local nblits = 0 + local lastblit = nil + for n = 0,nallblits-1 do + local blit = allblits[n] + --printf("blit addr: 0x%x (0x%x)\n", blit.addr, blit.base) + if blit.base == basebase and blit.addr >= base then + blits[nblits] = blit + nblits = nblits + 1 + if lastblit then + lastblit.endaddr = blit.addr + end + lastblit = blit + end + end + + -- now go thru the relevant blits and print out interesting details + local level = 0 + local layer = 0 + local w = width0 -- track current width/height to detect changing + local h = height0 -- mipmap level + for n = 0,nblits-1 do + local blit = blits[n] + --printf("%u: %ux%u, addr=%x\n", n, blit.width, blit.height, blit.addr) + if w ~= blit.width or h ~= blit.height then + level = level + 1 + layer = 0 + + if blit.width ~= minify(w, 1) or blit.height ~= minify(h, 1) then + printf("I am confused! %ux%u vs %ux%u\n", blit.width, blit.height, minify(w, 1), minify(h, 1)) + printf("addr=%x\n", blit.addr) + --return + end + + w = blit.width + h = blit.height + end + + printf("level=%u, layer=%u, sz=%ux%u, pitch=%u, offset=0x%x, addr=%x", + level, layer, w, h, blit.pitch, blit.addr - base, blit.addr) + if blit.endaddr ~= 0 then + local layersz = blit.endaddr - blit.addr + local alignedheight = layersz / blit.pitch + printf(", layersz=0x%x, alignedheight=%f", layersz, alignedheight) + end + printf("\n") + + layer = layer + 1 + end + printf("\n\n") +end + diff --git a/src/freedreno/decode/scripts/texturator-to-unit-test-5xx.lua b/src/freedreno/decode/scripts/texturator-to-unit-test-5xx.lua new file mode 100644 index 00000000000..b0ac8cb4e03 --- /dev/null +++ b/src/freedreno/decode/scripts/texturator-to-unit-test-5xx.lua @@ -0,0 +1,200 @@ +-- Parse logs from https://github.com/freedreno/freedreno/ +-- test-texturator.c to generate a src/freedreno/fdl/fd5_layout_test.c +-- block. We figure out the offsets from blits, but there may be some +-- unrelated blits. So just save all of them until we find the +-- texture state. This gives us the base address, and the miplevel #0 +-- width/height/depth. Then work backwards from there finding the +-- blits to the same dst buffer and deducing the miplevel from the +-- minified dimensions + +local posix = require "posix" + +io.write("Analyzing Data...\n") + +local r = rnn.init("a530") +local found_tex = 0 + +local allblits = {} +local nallblits = 0 + +function get_first_blit(base, width, height) + local first_blit = nil + + for n = 0,nallblits-1 do + local blit = allblits[n] + if blit.base == base and blit.width == width and blit.height == height then + if not first_blit or blit.addr < first_blit.addr then + first_blit = blit + end + end + end + + return first_blit +end + +function minify(val, lvls) + val = val >> lvls + if val < 1 then + return 1 + end + return val +end + +function printf(fmt, ...) + return io.write(string.format(fmt, ...)) +end + +function start_cmdstream(name) + io.write("Parsing " .. name .. "\n") + allblits = {} + nallblits = 0 +end + +-- Record texture upload blits done through CP_EVENT_WRITE +function CP_EVENT_WRITE(pkt, size) + if tostring(pkt[0].EVENT) ~= "BLIT" then + return + end + + local blit = {} + + blit.width = r.RB_RESOLVE_CNTL_2.X + 1 + blit.height = r.RB_RESOLVE_CNTL_2.Y + 1 + blit.pitch = r.RB_BLIT_DST_PITCH + blit.addr = r.RB_BLIT_DST_LO | (r.RB_BLIT_DST_HI << 32) + blit.base = bos.base(blit.addr) + blit.ubwc_addr = r.RB_BLIT_FLAG_DST_LO | (r.RB_BLIT_FLAG_DST_HI << 32) + blit.ubwc_base = bos.base(blit.ubwc_addr) + blit.ubwc_pitch = r.RB_BLIT_FLAG_DST_PITCH + blit.endaddr = 0 -- filled in later + printf("Found event blit: 0x%x (0x%x) %dx%d UBWC 0x%x (0x%x) tiled %s\n", blit.addr, blit.base, blit.width, blit.height, blit.ubwc_addr, blit.ubwc_base, r.RB_RESOLVE_CNTL_3.TILED) + + allblits[nallblits] = blit + nallblits = nallblits + 1 +end + +function CP_BLIT(pkt, size) + -- Just in case, filter out anything that isn't starting + -- at 0,0 + if pkt[1].SRC_X1 ~= 0 or pkt[1].SRC_Y1 ~= 0 then + return + end + + local blit = {} + + blit.width = pkt[2].SRC_X2 + 1 + blit.height = pkt[2].SRC_Y2 + 1 + blit.pitch = r.RB_2D_DST_SIZE.PITCH + blit.addr = r.RB_2D_DST_LO | (r.RB_2D_DST_HI << 32) + blit.base = bos.base(blit.addr) + blit.ubwc_addr = r.RB_2D_DST_FLAGS_LO | (r.RB_2D_DST_FLAGS_HI << 32) + blit.ubwc_base = bos.base(blit.ubwc_addr) + blit.ubwc_pitch = r.RB_2D_DST_FLAGS_PITCH + blit.endaddr = 0 -- filled in later + printf("Found cp blit: 0x%x (0x%x) %dx%d UBWC 0x%x (0x%x) %s\n", blit.addr, blit.base, blit.width, blit.height, blit.ubwc_addr, blit.ubwc_base, r.RB_2D_DST_INFO.TILE_MODE) + + allblits[nallblits] = blit + nallblits = nallblits + 1 +end + +function A5XX_TEX_CONST(pkt, size) + -- ignore any texture state w/ DEPTH=1, these aren't the 3d tex state we + -- are looking for + + local base = pkt[4].BASE_LO | (pkt[5].BASE_HI << 32) + -- UBWC base on a5xx seems to be at the start of each miplevel, followed by pixels + -- somewhere past that. + local ubwc_base = base + local width0 = pkt[1].WIDTH + local height0 = pkt[1].HEIGHT + local depth0 = pkt[5].DEPTH + + if (found_tex ~= 0) then + return + end + found_tex = 1 + + printf("Found texture state:\n %ux%ux%u (%s, %s, UBWC=%s)\n", + width0, height0, depth0, pkt[0].FMT, pkt[0].TILE_MODE, tostring(pkt[3].FLAG)) + + -- Note that in some case the texture has some extra page or so + -- at the beginning: + local basebase = bos.base(base) + printf("base: 0x%x (0x%x)\n", base, basebase) + printf("ubwcbase: 0x%x (0x%x)\n", ubwc_base, bos.base(ubwc_base)) + + -- see if we can find the associated blits.. The blob always seems to + -- start from the lower (larger) mipmap levels and layers, so we don't + -- need to sort by dst address. Also, while we are at it, fill in the + -- end-addr (at least for everything but the last blit) + local blits = {} + local nblits = 0 + local lastblit = nil + for n = 0,nallblits-1 do + local blit = allblits[n] + --printf("blit addr: 0x%x (0x%x)\n", blit.addr, blit.base) + if blit.base == basebase and blit.addr >= base then + blits[nblits] = blit + nblits = nblits + 1 + if lastblit then + lastblit.endaddr = blit.addr + end + lastblit = blit + end + end + + printf(" {\n") + printf(" .format = %s,\n", pkt[0].FMT) + if (tostring(pkt[2].TYPE) == "A5XX_TEX_3D") then + printf(" .is_3d = true,\n") + end + + printf(" .layout = {\n") + printf(" .tile_mode = %s,\n", pkt[0].TILE_MODE) + printf(" .ubwc = %s,\n", tostring(pkt[3].FLAG)) + + if (tostring(pkt[2].TYPE) == "A5XX_TEX_3D") then + printf(" .width0 = %d, .height0 = %d, .depth0 = %d,\n", width0, height0, depth0) + else + printf(" .width0 = %d, .height0 = %d,\n", width0, height0) + end + + printf(" .slices = {\n") + local w = 0 + local h = 0 + local level = 0 + repeat + local w = minify(width0, level) + local h = minify(height0, level) + local blit = get_first_blit(basebase, w, h) + if blit then + printf(" { .offset = %d, .pitch = %u },\n", + blit.addr - base, + blit.pitch); + end + level = level + 1 + until w == 1 and h == 1 + printf(" },\n") + + if pkt[3].FLAG then + printf(" .ubwc_slices = {\n") + level = 0 + repeat + local w = minify(width0, level) + local h = minify(height0, level) + local blit = get_first_blit(basebase, w, h) + if blit then + printf(" { .offset = %d, .pitch = %u },\n", + blit.ubwc_addr - ubwc_base, + blit.ubwc_pitch); + end + level = level + 1 + until w == 1 and h == 1 + printf(" },\n") + end + + printf(" },\n") + printf(" },\n") + printf("\n\n") +end + diff --git a/src/freedreno/decode/scripts/texturator-to-unit-test.lua b/src/freedreno/decode/scripts/texturator-to-unit-test.lua new file mode 100644 index 00000000000..8836d594545 --- /dev/null +++ b/src/freedreno/decode/scripts/texturator-to-unit-test.lua @@ -0,0 +1,179 @@ +-- Parse logs from https://github.com/freedreno/freedreno/ +-- test-texturator.c to generate a src/freedreno/fdl/fd6_layout_test.c +-- block. We figure out the offsets from blits, but there may be some +-- unrelated blits. So just save all of them until we find the +-- texture state. This gives us the base address, and the miplevel #0 +-- width/height/depth. Then work backwards from there finding the +-- blits to the same dst buffer and deducing the miplevel from the +-- minified dimensions + +local posix = require "posix" + +io.write("Analyzing Data...\n") + +local r = rnn.init("a630") +local found_tex = 0 + +local allblits = {} +local nallblits = 0 + +function get_first_blit(base, width, height) + local first_blit = nil + + for n = 0,nallblits-1 do + local blit = allblits[n] + if blit.base == base and blit.width == width and blit.height == height then + if not first_blit or blit.addr < first_blit.addr then + first_blit = blit + end + end + end + + return first_blit +end + +function minify(val, lvls) + val = val >> lvls + if val < 1 then + return 1 + end + return val +end + +function printf(fmt, ...) + return io.write(string.format(fmt, ...)) +end + +function start_cmdstream(name) + io.write("Parsing " .. name .. "\n") + allblits = {} + nallblits = 0 +end + +function draw(primtype, nindx) + if primtype ~= "BLIT_OP_SCALE" then + return + end + + -- Just in case, filter out anything that isn't starting + -- at 0,0 + if r.GRAS_2D_DST_TL.X ~= 0 or r.GRAS_2D_DST_TL.Y ~= 0 then + return + end + + local blit = {} + + blit.width = r.GRAS_2D_DST_BR.X + 1 + blit.height = r.GRAS_2D_DST_BR.Y + 1 + blit.pitch = r.RB_2D_DST_SIZE.PITCH + blit.addr = r.RB_2D_DST_LO | (r.RB_2D_DST_HI << 32) + blit.base = bos.base(blit.addr) + blit.ubwc_addr = r.RB_2D_DST_FLAGS_LO | (r.RB_2D_DST_FLAGS_HI << 32) + blit.ubwc_base = bos.base(blit.uwbc_addr) + blit.ubwc_pitch = r.RB_2D_DST_FLAGS_PITCH.PITCH + blit.endaddr = 0 -- filled in later + printf("Found blit: 0x%x (0x%x) %dx%d UBWC 0x%x (0x%x)\n", blit.addr, blit.base, blit.width, blit.height, blit.ubwc_addr, blit.ubwc_base) + + allblits[nallblits] = blit + nallblits = nallblits + 1 +end + +function A6XX_TEX_CONST(pkt, size) + -- ignore any texture state w/ DEPTH=1, these aren't the 3d tex state we + -- are looking for + + local base = pkt[4].BASE_LO | (pkt[5].BASE_HI << 32) + local ubwc_base = pkt[7].FLAG_LO | (pkt[8].FLAG_HI << 32) + local width0 = pkt[1].WIDTH + local height0 = pkt[1].HEIGHT + local depth0 = pkt[5].DEPTH + + if (found_tex ~= 0) then + return + end + found_tex = 1 + + printf("Found texture state:\n %ux%ux%u (%s, %s, MIN_LAYERSZ=0x%x, TILE_ALL=%s, UBWC=%s FLAG_LOG2=%ux%u)\n", + width0, height0, depth0, pkt[0].FMT, pkt[0].TILE_MODE, pkt[3].MIN_LAYERSZ, tostring(pkt[3].TILE_ALL), tostring(pkt[3].FLAG), pkt[10].FLAG_BUFFER_LOGW, pkt[10].FLAG_BUFFER_LOGH) + + -- Note that in some case the texture has some extra page or so + -- at the beginning: + local basebase = bos.base(base) + printf("base: 0x%x (0x%x)\n", base, basebase) + printf("ubwcbase: 0x%x (0x%x)\n", ubwc_base, bos.base(ubwc_base)) + + -- see if we can find the associated blits.. The blob always seems to + -- start from the lower (larger) mipmap levels and layers, so we don't + -- need to sort by dst address. Also, while we are at it, fill in the + -- end-addr (at least for everything but the last blit) + local blits = {} + local nblits = 0 + local lastblit = nil + for n = 0,nallblits-1 do + local blit = allblits[n] + --printf("blit addr: 0x%x (0x%x)\n", blit.addr, blit.base) + if blit.base == basebase and blit.addr >= base then + blits[nblits] = blit + nblits = nblits + 1 + if lastblit then + lastblit.endaddr = blit.addr + end + lastblit = blit + end + end + + printf(" {\n") + printf(" .format = %s,\n", pkt[0].FMT) + if (tostring(pkt[2].TYPE) == "A6XX_TEX_3D") then + printf(" .is_3d = true,\n") + end + + printf(" .layout = {\n") + printf(" .tile_mode = %s,\n", pkt[0].TILE_MODE) + printf(" .ubwc = %s,\n", tostring(pkt[3].FLAG)) + + if (tostring(pkt[2].TYPE) == "A6XX_TEX_3D") then + printf(" .width0 = %d, .height0 = %d, .depth = %d,\n", width0, height0, depth0) + else + printf(" .width0 = %d, .height0 = %d,\n", width0, height0) + end + + printf(" .slices = {\n") + local w = 0 + local h = 0 + local level = 0 + repeat + local w = minify(width0, level) + local h = minify(height0, level) + local blit = get_first_blit(basebase, w, h) + if blit then + printf(" { .offset = %d, .pitch = %u },\n", + blit.addr - base, + blit.pitch); + end + level = level + 1 + until w == 1 and h == 1 + printf(" },\n") + + if pkt[3].FLAG then + printf(" .ubwc_slices = {\n") + level = 0 + repeat + local w = minify(width0, level) + local h = minify(height0, level) + local blit = get_first_blit(basebase, w, h) + if blit then + printf(" { .offset = %d, .pitch = %u },\n", + blit.ubwc_addr - ubwc_base, + blit.ubwc_pitch); + end + level = level + 1 + until w == 1 and h == 1 + printf(" },\n") + end + + printf(" },\n") + printf(" },\n") + printf("\n\n") +end + diff --git a/src/freedreno/decode/util.h b/src/freedreno/decode/util.h new file mode 100644 index 00000000000..1ec02023b49 --- /dev/null +++ b/src/freedreno/decode/util.h @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2012-2018 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __UTIL_H__ +#define __UTIL_H__ + +#include +#include +#include + +/* old-style program binary XOR'd ascii w/ 0xff */ +#ifndef ASCII_XOR +# define ASCII_XOR 0 +#endif + +static inline const char *tab(int lvl) +{ + const char *TAB = "\t\t\t\t\t\t\t\t\0"; + return &TAB[strlen(TAB) - lvl]; +} + +/* convert float to dword */ +static inline float d2f(uint32_t d) +{ + union { + float f; + uint32_t d; + } u = { + .d = d, + }; + return u.f; +} + +static inline void dump_hex(const void *buf, int sz) +{ + uint8_t *ptr = (uint8_t *)buf; + uint8_t *end = ptr + sz; + int i = 0; + + while (ptr < end) { + uint32_t d = 0; + + printf((i % 8) ? " " : "\t"); + + d |= *(ptr++) << 0; + d |= *(ptr++) << 8; + d |= *(ptr++) << 16; + d |= *(ptr++) << 24; + + printf("%08x", d); + + if ((i % 8) == 7) { + printf("\n"); + } + + i++; + } + + if (i % 8) { + printf("\n"); + } +} + +static inline void +dump_float(const void *buf, int sz) +{ + uint8_t *ptr = (uint8_t *)buf; + uint8_t *end = ptr + sz - 3; + int i = 0; + + while (ptr < end) { + uint32_t d = 0; + + printf((i % 8) ? " " : "\t"); + + d |= *(ptr++) << 0; + d |= *(ptr++) << 8; + d |= *(ptr++) << 16; + d |= *(ptr++) << 24; + + printf("%8f", d2f(d)); + + if ((i % 8) == 7) { + printf("\n"); + } + + i++; + } + + if (i % 8) { + printf("\n"); + } +} + +#define is_ok_ascii(c) \ + (isascii(c) && ((c == '\t') || !iscntrl(c))) + +static inline void +clean_ascii(char *buf, int sz) +{ + uint8_t *ptr = (uint8_t *)buf; + uint8_t *end = ptr + sz; + while (ptr < end) { + *(ptr++) ^= ASCII_XOR; + } +} + +static inline void +dump_ascii(const void *buf, int sz) +{ + uint8_t *ptr = (uint8_t *)buf; + uint8_t *end = ptr + sz; + printf("\t"); + while (ptr < end) { + uint8_t c = *(ptr++) ^ ASCII_XOR; + if (c == '\n') { + printf("\n\t"); + } else if (c == '\0') { + printf("\n\t-----------------------------------\n\t"); + } else if (is_ok_ascii(c)) { + printf("%c", c); + } else { + printf("?"); + } + } + printf("\n"); +} + +static inline void +dump_hex_ascii(const void *buf, int sz, int level) +{ + uint8_t *ptr = (uint8_t *)buf; + uint8_t *end = ptr + sz; + uint8_t *ascii = ptr; + int i = 0; + + printf("%s-----------------------------------------------\n", tab(level)); + printf("%s%d (0x%x) bytes\n", tab(level), sz, sz); + + while (ptr < end) { + uint32_t d = 0; + + if (i % 4) { + printf(" "); + } else { + printf("%s%06x: ", tab(level), (uint32_t)(ptr - (uint8_t *)buf)); + } + + d |= *(ptr++) << 0; + d |= *(ptr++) << 8; + d |= *(ptr++) << 16; + d |= *(ptr++) << 24; + + printf("%08x", d); + + if ((i % 4) == 3) { + int j; + printf("\t|"); + for (j = 0; j < 16; j++) { + uint8_t c = *(ascii++); + c ^= ASCII_XOR; + printf("%c", (isascii(c) && !iscntrl(c)) ? c : '.'); + } + printf("|\n"); + } + + i++; + } + + if (i % 4) { + for (int j = 4 - (i % 4); j > 0; j--) { + printf(" "); + } + printf("\t|"); + while (ascii < end) { + uint8_t c = *(ascii++); + c ^= ASCII_XOR; + printf("%c", (isascii(c) && !iscntrl(c)) ? c : '.'); + } + printf("|\n"); + } +} + +#endif /* __UTIL_H__ */ diff --git a/src/freedreno/meson.build b/src/freedreno/meson.build index 7b6ab5392b1..6405a7d51dc 100644 --- a/src/freedreno/meson.build +++ b/src/freedreno/meson.build @@ -19,6 +19,7 @@ # SOFTWARE. inc_freedreno = include_directories(['.', './registers']) +inc_freedreno_rnn = include_directories('rnn') subdir('common') subdir('registers') @@ -33,6 +34,7 @@ dep_libxml2 = dependency('libxml-2.0', required: false) # Everything that depends on rnn requires (indirectly) libxml2: if dep_libxml2.found() subdir('rnn') + subdir('decode') endif if with_tools.contains('drm-shim')