--- /dev/null
+/*
+ * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * Helper lib to track gpu buffers contents/address, and map between gpu and
+ * host address while decoding cmdstream/crashdumps
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+
+#include "buffers.h"
+
+struct buffer {
+ void *hostptr;
+ unsigned int len;
+ uint64_t gpuaddr;
+
+ /* for 'once' mode, for buffers containing cmdstream keep track per offset
+ * into buffer of which modes it has already been dumped;
+ */
+ struct {
+ unsigned offset;
+ unsigned dumped_mask;
+ } offsets[64];
+ unsigned noffsets;
+};
+
+static struct buffer buffers[512];
+static int nbuffers;
+
+static int
+buffer_contains_gpuaddr(struct buffer *buf, uint64_t gpuaddr, uint32_t len)
+{
+ return (buf->gpuaddr <= gpuaddr) && (gpuaddr < (buf->gpuaddr + buf->len));
+}
+
+static int
+buffer_contains_hostptr(struct buffer *buf, void *hostptr)
+{
+ return (buf->hostptr <= hostptr) && (hostptr < (buf->hostptr + buf->len));
+}
+
+
+uint64_t
+gpuaddr(void *hostptr)
+{
+ int i;
+ for (i = 0; i < nbuffers; i++)
+ if (buffer_contains_hostptr(&buffers[i], hostptr))
+ return buffers[i].gpuaddr + (hostptr - buffers[i].hostptr);
+ return 0;
+}
+
+uint64_t
+gpubaseaddr(uint64_t gpuaddr)
+{
+ int i;
+ if (!gpuaddr)
+ return 0;
+ for (i = 0; i < nbuffers; i++)
+ if (buffer_contains_gpuaddr(&buffers[i], gpuaddr, 0))
+ return buffers[i].gpuaddr;
+ return 0;
+}
+
+void *
+hostptr(uint64_t gpuaddr)
+{
+ int i;
+ if (!gpuaddr)
+ return 0;
+ for (i = 0; i < nbuffers; i++)
+ if (buffer_contains_gpuaddr(&buffers[i], gpuaddr, 0))
+ return buffers[i].hostptr + (gpuaddr - buffers[i].gpuaddr);
+ return 0;
+}
+
+unsigned
+hostlen(uint64_t gpuaddr)
+{
+ int i;
+ if (!gpuaddr)
+ return 0;
+ for (i = 0; i < nbuffers; i++)
+ if (buffer_contains_gpuaddr(&buffers[i], gpuaddr, 0))
+ return buffers[i].len + buffers[i].gpuaddr - gpuaddr;
+ return 0;
+}
+
+bool
+has_dumped(uint64_t gpuaddr, unsigned enable_mask)
+{
+ if (!gpuaddr)
+ return false;
+
+ for (int i = 0; i < nbuffers; i++) {
+ if (buffer_contains_gpuaddr(&buffers[i], gpuaddr, 0)) {
+ struct buffer *b = &buffers[i];
+ assert(gpuaddr >= b->gpuaddr);
+ unsigned offset = gpuaddr - b->gpuaddr;
+
+ unsigned n = 0;
+ while (n < b->noffsets) {
+ if (offset == b->offsets[n].offset)
+ break;
+ n++;
+ }
+
+ /* if needed, allocate a new offset entry: */
+ if (n == b->noffsets) {
+ b->noffsets++;
+ assert(b->noffsets < ARRAY_SIZE(b->offsets));
+ b->offsets[n].dumped_mask = 0;
+ b->offsets[n].offset = offset;
+ }
+
+ if ((b->offsets[n].dumped_mask & enable_mask) == enable_mask)
+ return true;
+
+ b->offsets[n].dumped_mask |= enable_mask;
+
+ return false;
+ }
+ }
+
+ return false;
+}
+
+void
+reset_buffers(void)
+{
+ for (int i = 0; i < nbuffers; i++) {
+ free(buffers[i].hostptr);
+ buffers[i].hostptr = NULL;
+ buffers[i].len = 0;
+ buffers[i].noffsets = 0;
+ }
+ nbuffers = 0;
+}
+
+/**
+ * Record buffer contents, takes ownership of hostptr (freed in
+ * reset_buffers())
+ */
+void
+add_buffer(uint64_t gpuaddr, unsigned int len, void *hostptr)
+{
+ int i;
+
+ for (i = 0; i < nbuffers; i++) {
+ if (buffers[i].gpuaddr == gpuaddr)
+ break;
+ }
+
+ if (i == nbuffers) {
+ /* some traces, like test-perf, with some blob versions,
+ * seem to generate an unreasonable # of gpu buffers (a
+ * leak?), so just ignore them.
+ */
+ if (nbuffers >= ARRAY_SIZE(buffers)) {
+ free(hostptr);
+ return;
+ }
+ nbuffers++;
+ }
+
+ buffers[i].hostptr = hostptr;
+ buffers[i].len = len;
+ buffers[i].gpuaddr = gpuaddr;
+}
--- /dev/null
+/*
+ * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __BUFFERS_H__
+#define __BUFFERS_H__
+
+#include <stdint.h>
+#include <stdbool.h>
+
+uint64_t gpuaddr(void *hostptr);
+uint64_t gpubaseaddr(uint64_t gpuaddr);
+void * hostptr(uint64_t gpuaddr);
+unsigned hostlen(uint64_t gpuaddr);
+bool has_dumped(uint64_t gpuaddr, unsigned enable_mask);
+
+void reset_buffers(void);
+void add_buffer(uint64_t gpuaddr, unsigned int len, void *hostptr);
+
+#ifndef ARRAY_SIZE
+# define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#endif
+
+#endif /* __BUFFERS_H__ */
--- /dev/null
+/*
+ * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <ctype.h>
+#include <err.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+#include <string.h>
+#include <assert.h>
+#include <signal.h>
+#include <errno.h>
+
+#include "redump.h"
+#include "disasm.h"
+#include "script.h"
+#include "rnnutil.h"
+#include "buffers.h"
+#include "cffdec.h"
+
+/* ************************************************************************* */
+/* originally based on kernel recovery dump code: */
+
+static const struct cffdec_options *options;
+
+static bool needs_wfi = false;
+static bool summary = false;
+static bool in_summary = false;
+static int vertices;
+
+static inline unsigned regcnt(void)
+{
+ if (options->gpu_id >= 500)
+ return 0xffff;
+ else
+ return 0x7fff;
+}
+
+static int is_64b(void)
+{
+ return options->gpu_id >= 500;
+}
+
+
+static int draws[3];
+static struct {
+ uint64_t base;
+ uint32_t size; /* in dwords */
+ /* Generally cmdstream consists of multiple IB calls to different
+ * buffers, which are themselves often re-used for each tile. The
+ * triggered flag serves two purposes to help make it more clear
+ * what part of the cmdstream is before vs after the the GPU hang:
+ *
+ * 1) if in IB2 we are passed the point within the IB2 buffer where
+ * the GPU hung, but IB1 is not passed the point within its
+ * buffer where the GPU had hung, then we know the GPU hang
+ * happens on a future use of that IB2 buffer.
+ *
+ * 2) if in an IB1 or IB2 buffer that is not the one where the GPU
+ * hung, but we've already passed the trigger point at the same
+ * IB level, we know that we are passed the point where the GPU
+ * had hung.
+ *
+ * So this is a one way switch, false->true. And a higher #'d
+ * IB level isn't considered triggered unless the lower #'d IB
+ * level is.
+ */
+ bool triggered;
+} ibs[4];
+static int ib;
+
+static int draw_count;
+static int current_draw_count;
+
+/* query mode.. to handle symbolic register name queries, we need to
+ * defer parsing query string until after gpu_id is know and rnn db
+ * loaded:
+ */
+static int *queryvals;
+
+static bool
+quiet(int lvl)
+{
+ if ((options->draw_filter != -1) && (options->draw_filter != current_draw_count))
+ return true;
+ if ((lvl >= 3) && (summary || options->querystrs || options->script))
+ return true;
+ if ((lvl >= 2) && (options->querystrs || options->script))
+ return true;
+ return false;
+}
+
+void
+printl(int lvl, const char *fmt, ...)
+{
+ va_list args;
+ if (quiet(lvl))
+ return;
+ va_start(args, fmt);
+ vprintf(fmt, args);
+ va_end(args);
+}
+
+static const char *levels[] = {
+ "\t",
+ "\t\t",
+ "\t\t\t",
+ "\t\t\t\t",
+ "\t\t\t\t\t",
+ "\t\t\t\t\t\t",
+ "\t\t\t\t\t\t\t",
+ "\t\t\t\t\t\t\t\t",
+ "\t\t\t\t\t\t\t\t\t",
+ "x",
+ "x",
+ "x",
+ "x",
+ "x",
+ "x",
+};
+
+enum state_src_t {
+ STATE_SRC_DIRECT,
+ STATE_SRC_INDIRECT,
+ STATE_SRC_BINDLESS,
+};
+
+/* SDS (CP_SET_DRAW_STATE) helpers: */
+static void load_all_groups(int level);
+static void disable_all_groups(void);
+
+static void dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, int level);
+static void dump_tex_const(uint32_t *texsamp, int num_unit, int level);
+
+static bool
+highlight_gpuaddr(uint64_t gpuaddr)
+{
+ if (!options->color)
+ return false;
+
+ if (!options->ibs[ib].base)
+ return false;
+
+ if ((ib > 0) && options->ibs[ib-1].base && !ibs[ib-1].triggered)
+ return false;
+
+ if (ibs[ib].triggered)
+ return true;
+
+ if (options->ibs[ib].base != ibs[ib].base)
+ return false;
+
+ uint64_t start = ibs[ib].base + 4 * (ibs[ib].size - options->ibs[ib].rem);
+ uint64_t end = ibs[ib].base + 4 * ibs[ib].size;
+
+ bool triggered = (start <= gpuaddr) && (gpuaddr <= end);
+
+ ibs[ib].triggered |= triggered;
+
+ if (triggered)
+ printf("ESTIMATED CRASH LOCATION!\n");
+
+ return triggered;
+}
+
+static void
+dump_hex(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ int i, j;
+ int lastzero = 1;
+
+ if (quiet(2))
+ return;
+
+ for (i = 0; i < sizedwords; i += 8) {
+ int zero = 1;
+
+ /* always show first row: */
+ if (i == 0)
+ zero = 0;
+
+ for (j = 0; (j < 8) && (i+j < sizedwords) && zero; j++)
+ if (dwords[i+j])
+ zero = 0;
+
+ if (zero && !lastzero)
+ printf("*\n");
+
+ lastzero = zero;
+
+ if (zero)
+ continue;
+
+ uint64_t addr = gpuaddr(&dwords[i]);
+ bool highlight = highlight_gpuaddr(addr);
+
+ if (highlight)
+ printf("\x1b[0;1;31m");
+
+ if (is_64b()) {
+ printf("%016lx:%s", addr, levels[level]);
+ } else {
+ printf("%08x:%s", (uint32_t)addr, levels[level]);
+ }
+
+ if (highlight)
+ printf("\x1b[0m");
+
+ printf("%04x:", i * 4);
+
+ for (j = 0; (j < 8) && (i+j < sizedwords); j++) {
+ printf(" %08x", dwords[i+j]);
+ }
+
+ printf("\n");
+ }
+}
+
+static void
+dump_float(float *dwords, uint32_t sizedwords, int level)
+{
+ int i;
+ for (i = 0; i < sizedwords; i++) {
+ if ((i % 8) == 0) {
+ if (is_64b()) {
+ printf("%016lx:%s", gpuaddr(dwords), levels[level]);
+ } else {
+ printf("%08x:%s", (uint32_t)gpuaddr(dwords), levels[level]);
+ }
+ } else {
+ printf(" ");
+ }
+ printf("%8f", *(dwords++));
+ if ((i % 8) == 7)
+ printf("\n");
+ }
+ if (i % 8)
+ printf("\n");
+}
+
+/* I believe the surface format is low bits:
+#define RB_COLOR_INFO__COLOR_FORMAT_MASK 0x0000000fL
+comments in sys2gmem_tex_const indicate that address is [31:12], but
+looks like at least some of the bits above the format have different meaning..
+*/
+static void parse_dword_addr(uint32_t dword, uint32_t *gpuaddr,
+ uint32_t *flags, uint32_t mask)
+{
+ assert(!is_64b()); /* this is only used on a2xx */
+ *gpuaddr = dword & ~mask;
+ *flags = dword & mask;
+}
+
+static uint32_t type0_reg_vals[0xffff + 1];
+static uint8_t type0_reg_rewritten[sizeof(type0_reg_vals)/8]; /* written since last draw */
+static uint8_t type0_reg_written[sizeof(type0_reg_vals)/8];
+static uint32_t lastvals[ARRAY_SIZE(type0_reg_vals)];
+
+static bool reg_rewritten(uint32_t regbase)
+{
+ return !!(type0_reg_rewritten[regbase/8] & (1 << (regbase % 8)));
+}
+
+bool reg_written(uint32_t regbase)
+{
+ return !!(type0_reg_written[regbase/8] & (1 << (regbase % 8)));
+}
+
+static void clear_rewritten(void)
+{
+ memset(type0_reg_rewritten, 0, sizeof(type0_reg_rewritten));
+}
+
+static void clear_written(void)
+{
+ memset(type0_reg_written, 0, sizeof(type0_reg_written));
+ clear_rewritten();
+}
+
+uint32_t reg_lastval(uint32_t regbase)
+{
+ return lastvals[regbase];
+}
+
+static void
+clear_lastvals(void)
+{
+ memset(lastvals, 0, sizeof(lastvals));
+}
+
+uint32_t
+reg_val(uint32_t regbase)
+{
+ return type0_reg_vals[regbase];
+}
+
+void
+reg_set(uint32_t regbase, uint32_t val)
+{
+ assert(regbase < regcnt());
+ type0_reg_vals[regbase] = val;
+ type0_reg_written[regbase/8] |= (1 << (regbase % 8));
+ type0_reg_rewritten[regbase/8] |= (1 << (regbase % 8));
+}
+
+static void
+reg_dump_scratch(const char *name, uint32_t dword, int level)
+{
+ unsigned r;
+
+ if (quiet(3))
+ return;
+
+ r = regbase("CP_SCRATCH[0].REG");
+
+ // if not, try old a2xx/a3xx version:
+ if (!r)
+ r = regbase("CP_SCRATCH_REG0");
+
+ if (!r)
+ return;
+
+ printf("%s:%u,%u,%u,%u\n", levels[level],
+ reg_val(r + 4), reg_val(r + 5),
+ reg_val(r + 6), reg_val(r + 7));
+}
+
+static void
+dump_gpuaddr_size(uint64_t gpuaddr, int level, int sizedwords, int quietlvl)
+{
+ void *buf;
+
+ if (quiet(quietlvl))
+ return;
+
+ buf = hostptr(gpuaddr);
+ if (buf) {
+ dump_hex(buf, sizedwords, level+1);
+ }
+}
+
+static void
+dump_gpuaddr(uint64_t gpuaddr, int level)
+{
+ dump_gpuaddr_size(gpuaddr, level, 64, 3);
+}
+
+static void
+reg_dump_gpuaddr(const char *name, uint32_t dword, int level)
+{
+ dump_gpuaddr(dword, level);
+}
+
+uint32_t gpuaddr_lo;
+static void
+reg_gpuaddr_lo(const char *name, uint32_t dword, int level)
+{
+ gpuaddr_lo = dword;
+}
+
+static void
+reg_dump_gpuaddr_hi(const char *name, uint32_t dword, int level)
+{
+ dump_gpuaddr(gpuaddr_lo | (((uint64_t)dword) << 32), level);
+}
+
+
+static void
+dump_shader(const char *ext, void *buf, int bufsz)
+{
+ if (options->dump_shaders) {
+ static int n = 0;
+ char filename[8];
+ int fd;
+ sprintf(filename, "%04d.%s", n++, ext);
+ fd = open(filename, O_WRONLY| O_TRUNC | O_CREAT, 0644);
+ write(fd, buf, bufsz);
+ close(fd);
+ }
+}
+
+static void
+disasm_gpuaddr(const char *name, uint64_t gpuaddr, int level)
+{
+ void *buf;
+
+ gpuaddr &= 0xfffffffffffffff0;
+
+ if (quiet(3))
+ return;
+
+ buf = hostptr(gpuaddr);
+ if (buf) {
+ uint32_t sizedwords = hostlen(gpuaddr) / 4;
+ const char *ext;
+
+ dump_hex(buf, min(64, sizedwords), level+1);
+ disasm_a3xx(buf, sizedwords, level+2, stdout, options->gpu_id);
+
+ /* this is a bit ugly way, but oh well.. */
+ if (strstr(name, "SP_VS_OBJ")) {
+ ext = "vo3";
+ } else if (strstr(name, "SP_FS_OBJ")) {
+ ext = "fo3";
+ } else if (strstr(name, "SP_GS_OBJ")) {
+ ext = "go3";
+ } else if (strstr(name, "SP_CS_OBJ")) {
+ ext = "co3";
+ } else {
+ ext = NULL;
+ }
+
+ if (ext)
+ dump_shader(ext, buf, sizedwords * 4);
+ }
+}
+
+static void
+reg_disasm_gpuaddr(const char *name, uint32_t dword, int level)
+{
+ disasm_gpuaddr(name, dword, level);
+}
+
+static void
+reg_disasm_gpuaddr_hi(const char *name, uint32_t dword, int level)
+{
+ disasm_gpuaddr(name, gpuaddr_lo | (((uint64_t)dword) << 32), level);
+}
+
+/* Find the value of the TEX_COUNT register that corresponds to the named
+ * TEX_SAMP/TEX_CONST reg.
+ *
+ * Note, this kinda assumes an equal # of samplers and textures, but not
+ * really sure if there is a much better option. I suppose on a6xx we
+ * could instead decode the bitfields in SP_xS_CONFIG
+ */
+static int
+get_tex_count(const char *name)
+{
+ char count_reg[strlen(name) + 5];
+ char *p;
+
+ p = strstr(name, "CONST");
+ if (!p)
+ p = strstr(name, "SAMP");
+ if (!p)
+ return 0;
+
+ int n = p - name;
+ strncpy(count_reg, name, n);
+ strcpy(count_reg + n, "COUNT");
+
+ return reg_val(regbase(count_reg));
+}
+
+static void
+reg_dump_tex_samp_hi(const char *name, uint32_t dword, int level)
+{
+ if (!in_summary)
+ return;
+
+ int num_unit = get_tex_count(name);
+ uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
+ void *buf = hostptr(gpuaddr);
+
+ if (!buf)
+ return;
+
+ dump_tex_samp(buf, STATE_SRC_DIRECT, num_unit, level+1);
+}
+
+static void
+reg_dump_tex_const_hi(const char *name, uint32_t dword, int level)
+{
+ if (!in_summary)
+ return;
+
+ int num_unit = get_tex_count(name);
+ uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
+ void *buf = hostptr(gpuaddr);
+
+ if (!buf)
+ return;
+
+ dump_tex_const(buf, num_unit, level+1);
+}
+
+/*
+ * Registers with special handling (rnndec_decode() handles rest):
+ */
+#define REG(x, fxn) { #x, fxn }
+static struct {
+ const char *regname;
+ void (*fxn)(const char *name, uint32_t dword, int level);
+ uint32_t regbase;
+} reg_a2xx[] = {
+ REG(CP_SCRATCH_REG0, reg_dump_scratch),
+ REG(CP_SCRATCH_REG1, reg_dump_scratch),
+ REG(CP_SCRATCH_REG2, reg_dump_scratch),
+ REG(CP_SCRATCH_REG3, reg_dump_scratch),
+ REG(CP_SCRATCH_REG4, reg_dump_scratch),
+ REG(CP_SCRATCH_REG5, reg_dump_scratch),
+ REG(CP_SCRATCH_REG6, reg_dump_scratch),
+ REG(CP_SCRATCH_REG7, reg_dump_scratch),
+ {NULL},
+}, reg_a3xx[] = {
+ REG(CP_SCRATCH_REG0, reg_dump_scratch),
+ REG(CP_SCRATCH_REG1, reg_dump_scratch),
+ REG(CP_SCRATCH_REG2, reg_dump_scratch),
+ REG(CP_SCRATCH_REG3, reg_dump_scratch),
+ REG(CP_SCRATCH_REG4, reg_dump_scratch),
+ REG(CP_SCRATCH_REG5, reg_dump_scratch),
+ REG(CP_SCRATCH_REG6, reg_dump_scratch),
+ REG(CP_SCRATCH_REG7, reg_dump_scratch),
+ REG(VSC_SIZE_ADDRESS, reg_dump_gpuaddr),
+ REG(SP_VS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
+ REG(SP_FS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
+ REG(SP_VS_OBJ_START_REG, reg_disasm_gpuaddr),
+ REG(SP_FS_OBJ_START_REG, reg_disasm_gpuaddr),
+ REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
+ {NULL},
+}, reg_a4xx[] = {
+ REG(CP_SCRATCH[0].REG, reg_dump_scratch),
+ REG(CP_SCRATCH[0x1].REG, reg_dump_scratch),
+ REG(CP_SCRATCH[0x2].REG, reg_dump_scratch),
+ REG(CP_SCRATCH[0x3].REG, reg_dump_scratch),
+ REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
+ REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
+ REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
+ REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
+ REG(SP_VS_PVT_MEM_ADDR, reg_dump_gpuaddr),
+ REG(SP_FS_PVT_MEM_ADDR, reg_dump_gpuaddr),
+ REG(SP_GS_PVT_MEM_ADDR, reg_dump_gpuaddr),
+ REG(SP_HS_PVT_MEM_ADDR, reg_dump_gpuaddr),
+ REG(SP_DS_PVT_MEM_ADDR, reg_dump_gpuaddr),
+ REG(SP_CS_PVT_MEM_ADDR, reg_dump_gpuaddr),
+ REG(SP_VS_OBJ_START, reg_disasm_gpuaddr),
+ REG(SP_FS_OBJ_START, reg_disasm_gpuaddr),
+ REG(SP_GS_OBJ_START, reg_disasm_gpuaddr),
+ REG(SP_HS_OBJ_START, reg_disasm_gpuaddr),
+ REG(SP_DS_OBJ_START, reg_disasm_gpuaddr),
+ REG(SP_CS_OBJ_START, reg_disasm_gpuaddr),
+ REG(TPL1_TP_VS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
+ REG(TPL1_TP_HS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
+ REG(TPL1_TP_DS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
+ REG(TPL1_TP_GS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
+ REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
+ {NULL},
+}, reg_a5xx[] = {
+ REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
+ REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
+ REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
+ REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
+ REG(SP_VS_OBJ_START_LO, reg_gpuaddr_lo),
+ REG(SP_VS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
+ REG(SP_HS_OBJ_START_LO, reg_gpuaddr_lo),
+ REG(SP_HS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
+ REG(SP_DS_OBJ_START_LO, reg_gpuaddr_lo),
+ REG(SP_DS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
+ REG(SP_GS_OBJ_START_LO, reg_gpuaddr_lo),
+ REG(SP_GS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
+ REG(SP_FS_OBJ_START_LO, reg_gpuaddr_lo),
+ REG(SP_FS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
+ REG(SP_CS_OBJ_START_LO, reg_gpuaddr_lo),
+ REG(SP_CS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
+ REG(TPL1_VS_TEX_CONST_LO, reg_gpuaddr_lo),
+ REG(TPL1_VS_TEX_CONST_HI, reg_dump_tex_const_hi),
+ REG(TPL1_VS_TEX_SAMP_LO, reg_gpuaddr_lo),
+ REG(TPL1_VS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
+ REG(TPL1_HS_TEX_CONST_LO, reg_gpuaddr_lo),
+ REG(TPL1_HS_TEX_CONST_HI, reg_dump_tex_const_hi),
+ REG(TPL1_HS_TEX_SAMP_LO, reg_gpuaddr_lo),
+ REG(TPL1_HS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
+ REG(TPL1_DS_TEX_CONST_LO, reg_gpuaddr_lo),
+ REG(TPL1_DS_TEX_CONST_HI, reg_dump_tex_const_hi),
+ REG(TPL1_DS_TEX_SAMP_LO, reg_gpuaddr_lo),
+ REG(TPL1_DS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
+ REG(TPL1_GS_TEX_CONST_LO, reg_gpuaddr_lo),
+ REG(TPL1_GS_TEX_CONST_HI, reg_dump_tex_const_hi),
+ REG(TPL1_GS_TEX_SAMP_LO, reg_gpuaddr_lo),
+ REG(TPL1_GS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
+ REG(TPL1_FS_TEX_CONST_LO, reg_gpuaddr_lo),
+ REG(TPL1_FS_TEX_CONST_HI, reg_dump_tex_const_hi),
+ REG(TPL1_FS_TEX_SAMP_LO, reg_gpuaddr_lo),
+ REG(TPL1_FS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
+ REG(TPL1_CS_TEX_CONST_LO, reg_gpuaddr_lo),
+ REG(TPL1_CS_TEX_CONST_HI, reg_dump_tex_const_hi),
+ REG(TPL1_CS_TEX_SAMP_LO, reg_gpuaddr_lo),
+ REG(TPL1_CS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
+ REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_LO, reg_gpuaddr_lo),
+ REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_HI, reg_dump_gpuaddr_hi),
+// REG(RB_MRT_FLAG_BUFFER[0].ADDR_LO, reg_gpuaddr_lo),
+// REG(RB_MRT_FLAG_BUFFER[0].ADDR_HI, reg_dump_gpuaddr_hi),
+// REG(RB_MRT_FLAG_BUFFER[1].ADDR_LO, reg_gpuaddr_lo),
+// REG(RB_MRT_FLAG_BUFFER[1].ADDR_HI, reg_dump_gpuaddr_hi),
+// REG(RB_MRT_FLAG_BUFFER[2].ADDR_LO, reg_gpuaddr_lo),
+// REG(RB_MRT_FLAG_BUFFER[2].ADDR_HI, reg_dump_gpuaddr_hi),
+// REG(RB_MRT_FLAG_BUFFER[3].ADDR_LO, reg_gpuaddr_lo),
+// REG(RB_MRT_FLAG_BUFFER[3].ADDR_HI, reg_dump_gpuaddr_hi),
+// REG(RB_MRT_FLAG_BUFFER[4].ADDR_LO, reg_gpuaddr_lo),
+// REG(RB_MRT_FLAG_BUFFER[4].ADDR_HI, reg_dump_gpuaddr_hi),
+// REG(RB_MRT_FLAG_BUFFER[5].ADDR_LO, reg_gpuaddr_lo),
+// REG(RB_MRT_FLAG_BUFFER[5].ADDR_HI, reg_dump_gpuaddr_hi),
+// REG(RB_MRT_FLAG_BUFFER[6].ADDR_LO, reg_gpuaddr_lo),
+// REG(RB_MRT_FLAG_BUFFER[6].ADDR_HI, reg_dump_gpuaddr_hi),
+// REG(RB_MRT_FLAG_BUFFER[7].ADDR_LO, reg_gpuaddr_lo),
+// REG(RB_MRT_FLAG_BUFFER[7].ADDR_HI, reg_dump_gpuaddr_hi),
+// REG(RB_BLIT_FLAG_DST_LO, reg_gpuaddr_lo),
+// REG(RB_BLIT_FLAG_DST_HI, reg_dump_gpuaddr_hi),
+// REG(RB_MRT[0].BASE_LO, reg_gpuaddr_lo),
+// REG(RB_MRT[0].BASE_HI, reg_dump_gpuaddr_hi),
+// REG(RB_DEPTH_BUFFER_BASE_LO, reg_gpuaddr_lo),
+// REG(RB_DEPTH_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
+// REG(RB_DEPTH_FLAG_BUFFER_BASE_LO, reg_gpuaddr_lo),
+// REG(RB_DEPTH_FLAG_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
+// REG(RB_BLIT_DST_LO, reg_gpuaddr_lo),
+// REG(RB_BLIT_DST_HI, reg_dump_gpuaddr_hi),
+
+// REG(RB_2D_SRC_LO, reg_gpuaddr_lo),
+// REG(RB_2D_SRC_HI, reg_dump_gpuaddr_hi),
+// REG(RB_2D_SRC_FLAGS_LO, reg_gpuaddr_lo),
+// REG(RB_2D_SRC_FLAGS_HI, reg_dump_gpuaddr_hi),
+// REG(RB_2D_DST_LO, reg_gpuaddr_lo),
+// REG(RB_2D_DST_HI, reg_dump_gpuaddr_hi),
+// REG(RB_2D_DST_FLAGS_LO, reg_gpuaddr_lo),
+// REG(RB_2D_DST_FLAGS_HI, reg_dump_gpuaddr_hi),
+
+ {NULL},
+}, reg_a6xx[] = {
+ REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
+ REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
+ REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
+ REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
+
+ REG(SP_VS_OBJ_START_LO, reg_gpuaddr_lo),
+ REG(SP_VS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
+ REG(SP_HS_OBJ_START_LO, reg_gpuaddr_lo),
+ REG(SP_HS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
+ REG(SP_DS_OBJ_START_LO, reg_gpuaddr_lo),
+ REG(SP_DS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
+ REG(SP_GS_OBJ_START_LO, reg_gpuaddr_lo),
+ REG(SP_GS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
+ REG(SP_FS_OBJ_START_LO, reg_gpuaddr_lo),
+ REG(SP_FS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
+ REG(SP_CS_OBJ_START_LO, reg_gpuaddr_lo),
+ REG(SP_CS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
+
+ REG(SP_VS_TEX_CONST_LO, reg_gpuaddr_lo),
+ REG(SP_VS_TEX_CONST_HI, reg_dump_tex_const_hi),
+ REG(SP_VS_TEX_SAMP_LO, reg_gpuaddr_lo),
+ REG(SP_VS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
+ REG(SP_HS_TEX_CONST_LO, reg_gpuaddr_lo),
+ REG(SP_HS_TEX_CONST_HI, reg_dump_tex_const_hi),
+ REG(SP_HS_TEX_SAMP_LO, reg_gpuaddr_lo),
+ REG(SP_HS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
+ REG(SP_DS_TEX_CONST_LO, reg_gpuaddr_lo),
+ REG(SP_DS_TEX_CONST_HI, reg_dump_tex_const_hi),
+ REG(SP_DS_TEX_SAMP_LO, reg_gpuaddr_lo),
+ REG(SP_DS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
+ REG(SP_GS_TEX_CONST_LO, reg_gpuaddr_lo),
+ REG(SP_GS_TEX_CONST_HI, reg_dump_tex_const_hi),
+ REG(SP_GS_TEX_SAMP_LO, reg_gpuaddr_lo),
+ REG(SP_GS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
+ REG(SP_FS_TEX_CONST_LO, reg_gpuaddr_lo),
+ REG(SP_FS_TEX_CONST_HI, reg_dump_tex_const_hi),
+ REG(SP_FS_TEX_SAMP_LO, reg_gpuaddr_lo),
+ REG(SP_FS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
+ REG(SP_CS_TEX_CONST_LO, reg_gpuaddr_lo),
+ REG(SP_CS_TEX_CONST_HI, reg_dump_tex_const_hi),
+ REG(SP_CS_TEX_SAMP_LO, reg_gpuaddr_lo),
+ REG(SP_CS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
+
+ {NULL},
+}, *type0_reg;
+
+static struct rnn *rnn;
+
+static void
+init_rnn(const char *gpuname)
+{
+ rnn = rnn_new(!options->color);
+
+ rnn_load(rnn, gpuname);
+
+ if (options->querystrs) {
+ int i;
+ queryvals = calloc(options->nquery, sizeof(queryvals[0]));
+
+ for (i = 0; i < options->nquery; i++) {
+ int val = strtol(options->querystrs[i], NULL, 0);
+
+ if (val == 0)
+ val = regbase(options->querystrs[i]);
+
+ queryvals[i] = val;
+ printf("querystr: %s -> 0x%x\n", options->querystrs[i], queryvals[i]);
+ }
+ }
+
+ for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
+ type0_reg[idx].regbase = regbase(type0_reg[idx].regname);
+ if (!type0_reg[idx].regbase) {
+ printf("invalid register name: %s\n", type0_reg[idx].regname);
+ exit(1);
+ }
+ }
+}
+
+void
+reset_regs(void)
+{
+ clear_written();
+ clear_lastvals();
+ memset(&ibs, 0, sizeof(ibs));
+}
+
+void
+cffdec_init(const struct cffdec_options *_options)
+{
+ options = _options;
+ summary = options->summary;
+
+ /* in case we're decoding multiple files: */
+ free(queryvals);
+ reset_regs();
+ draw_count = 0;
+
+ /* TODO we need an API to free/cleanup any previous rnn */
+
+ switch (options->gpu_id) {
+ case 200 ... 299:
+ type0_reg = reg_a2xx;
+ init_rnn("a2xx");
+ break;
+ case 300 ... 399:
+ type0_reg = reg_a3xx;
+ init_rnn("a3xx");
+ break;
+ case 400 ... 499:
+ type0_reg = reg_a4xx;
+ init_rnn("a4xx");
+ break;
+ case 500 ... 599:
+ type0_reg = reg_a5xx;
+ init_rnn("a5xx");
+ break;
+ case 600 ... 699:
+ type0_reg = reg_a6xx;
+ init_rnn("a6xx");
+ break;
+ default:
+ errx(-1, "unsupported gpu");
+ }
+}
+
+const char *
+pktname(unsigned opc)
+{
+ return rnn_enumname(rnn, "adreno_pm4_type3_packets", opc);
+}
+
+const char *
+regname(uint32_t regbase, int color)
+{
+ return rnn_regname(rnn, regbase, color);
+}
+
+uint32_t
+regbase(const char *name)
+{
+ return rnn_regbase(rnn, name);
+}
+
+static int
+endswith(uint32_t regbase, const char *suffix)
+{
+ const char *name = regname(regbase, 0);
+ const char *s = strstr(name, suffix);
+ if (!s)
+ return 0;
+ return (s - strlen(name) + strlen(suffix)) == name;
+}
+
+void
+dump_register_val(uint32_t regbase, uint32_t dword, int level)
+{
+ struct rnndecaddrinfo *info = rnn_reginfo(rnn, regbase);
+
+ if (info && info->typeinfo) {
+ uint64_t gpuaddr = 0;
+ char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, dword);
+ printf("%s%s: %s", levels[level], info->name, decoded);
+
+ /* Try and figure out if we are looking at a gpuaddr.. this
+ * might be useful for other gen's too, but at least a5xx has
+ * the _HI/_LO suffix we can look for. Maybe a better approach
+ * would be some special annotation in the xml..
+ */
+ if (options->gpu_id >= 500) {
+ if (endswith(regbase, "_HI") && endswith(regbase-1, "_LO")) {
+ gpuaddr = (((uint64_t)dword) << 32) | reg_val(regbase-1);
+ } else if (endswith(regbase, "_LO") && endswith(regbase+1, "_HI")) {
+ gpuaddr = (((uint64_t)reg_val(regbase+1)) << 32) | dword;
+ }
+ }
+
+ if (gpuaddr && hostptr(gpuaddr)) {
+ printf("\t\tbase=%lx, offset=%lu, size=%u",
+ gpubaseaddr(gpuaddr),
+ gpuaddr - gpubaseaddr(gpuaddr),
+ hostlen(gpubaseaddr(gpuaddr)));
+ }
+
+ printf("\n");
+
+ free(decoded);
+ } else if (info) {
+ printf("%s%s: %08x\n", levels[level], info->name, dword);
+ } else {
+ printf("%s<%04x>: %08x\n", levels[level], regbase, dword);
+ }
+
+ if (info) {
+ free(info->name);
+ free(info);
+ }
+}
+
+static void
+dump_register(uint32_t regbase, uint32_t dword, int level)
+{
+ if (!quiet(3)) {
+ dump_register_val(regbase, dword, level);
+ }
+
+ for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
+ if (type0_reg[idx].regbase == regbase) {
+ type0_reg[idx].fxn(type0_reg[idx].regname, dword, level);
+ break;
+ }
+ }
+}
+
+static bool
+is_banked_reg(uint32_t regbase)
+{
+ return (0x2000 <= regbase) && (regbase < 0x2400);
+}
+
+static void
+dump_registers(uint32_t regbase, uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ while (sizedwords--) {
+ int last_summary = summary;
+
+ /* access to non-banked registers needs a WFI:
+ * TODO banked register range for a2xx??
+ */
+ if (needs_wfi && !is_banked_reg(regbase))
+ printl(2, "NEEDS WFI: %s (%x)\n", regname(regbase, 1), regbase);
+
+ reg_set(regbase, *dwords);
+ dump_register(regbase, *dwords, level);
+ regbase++;
+ dwords++;
+ summary = last_summary;
+ }
+}
+
+static void
+dump_domain(uint32_t *dwords, uint32_t sizedwords, int level,
+ const char *name)
+{
+ struct rnndomain *dom;
+ int i;
+
+ dom = rnn_finddomain(rnn->db, name);
+
+ if (!dom)
+ return;
+
+ if (script_packet)
+ script_packet(dwords, sizedwords, rnn, dom);
+
+ if (quiet(2))
+ return;
+
+ for (i = 0; i < sizedwords; i++) {
+ struct rnndecaddrinfo *info = rnndec_decodeaddr(rnn->vc, dom, i, 0);
+ char *decoded;
+ if (!(info && info->typeinfo))
+ break;
+ uint64_t value = dwords[i];
+ if (info->typeinfo->high >= 32 && i < sizedwords - 1) {
+ value |= (uint64_t) dwords[i + 1] << 32;
+ i++; /* skip the next dword since we're printing it now */
+ }
+ decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value);
+ /* Unlike the register printing path, we don't print the name
+ * of the register, so if it doesn't contain other named
+ * things (i.e. it isn't a bitset) then print the register
+ * name as if it's a bitset with a single entry. This avoids
+ * having to create a dummy register with a single entry to
+ * get a name in the decoding.
+ */
+ if (info->typeinfo->type == RNN_TTYPE_BITSET ||
+ info->typeinfo->type == RNN_TTYPE_INLINE_BITSET) {
+ printf("%s%s\n", levels[level], decoded);
+ } else {
+ printf("%s{ %s%s%s = %s }\n", levels[level],
+ rnn->vc->colors->rname, info->name,
+ rnn->vc->colors->reset, decoded);
+ }
+ free(decoded);
+ free(info->name);
+ free(info);
+ }
+}
+
+
+static uint32_t bin_x1, bin_x2, bin_y1, bin_y2;
+static unsigned mode;
+static const char *render_mode;
+static enum {
+ MODE_BINNING = 0x1,
+ MODE_GMEM = 0x2,
+ MODE_BYPASS = 0x4,
+ MODE_ALL = MODE_BINNING | MODE_GMEM | MODE_BYPASS,
+} enable_mask = MODE_ALL;
+static bool skip_ib2_enable_global;
+static bool skip_ib2_enable_local;
+
+static void
+print_mode(int level)
+{
+ if ((options->gpu_id >= 500) && !quiet(2)) {
+ printf("%smode: %s\n", levels[level], render_mode);
+ printf("%sskip_ib2: g=%d, l=%d\n", levels[level], skip_ib2_enable_global, skip_ib2_enable_local);
+ }
+}
+
+static bool
+skip_query(void)
+{
+ switch (options->query_mode) {
+ case QUERY_ALL:
+ /* never skip: */
+ return false;
+ case QUERY_WRITTEN:
+ for (int i = 0; i < options->nquery; i++) {
+ uint32_t regbase = queryvals[i];
+ if (!reg_written(regbase)) {
+ continue;
+ }
+ if (reg_rewritten(regbase)) {
+ return false;
+ }
+ }
+ return true;
+ case QUERY_DELTA:
+ for (int i = 0; i < options->nquery; i++) {
+ uint32_t regbase = queryvals[i];
+ if (!reg_written(regbase)) {
+ continue;
+ }
+ uint32_t lastval = reg_val(regbase);
+ if (lastval != lastvals[regbase]) {
+ return false;
+ }
+ }
+ return true;
+ }
+ return true;
+}
+
+static void
+__do_query(const char *primtype, uint32_t num_indices)
+{
+ int n = 0;
+
+ if ((500 <= options->gpu_id) && (options->gpu_id < 700)) {
+ uint32_t scissor_tl = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_TL"));
+ uint32_t scissor_br = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_BR"));
+
+ bin_x1 = scissor_tl & 0xffff;
+ bin_y1 = scissor_tl >> 16;
+ bin_x2 = scissor_br & 0xffff;
+ bin_y2 = scissor_br >> 16;
+ }
+
+ for (int i = 0; i < options->nquery; i++) {
+ uint32_t regbase = queryvals[i];
+ if (reg_written(regbase)) {
+ uint32_t lastval = reg_val(regbase);
+ printf("%4d: %s(%u,%u-%u,%u):%u:", draw_count, primtype,
+ bin_x1, bin_y1, bin_x2, bin_y2, num_indices);
+ if (options->gpu_id >= 500)
+ printf("%s:", render_mode);
+ printf("\t%08x", lastval);
+ if (lastval != lastvals[regbase]) {
+ printf("!");
+ } else {
+ printf(" ");
+ }
+ if (reg_rewritten(regbase)) {
+ printf("+");
+ } else {
+ printf(" ");
+ }
+ dump_register_val(regbase, lastval, 0);
+ n++;
+ }
+ }
+
+ if (n > 1)
+ printf("\n");
+}
+
+static void
+do_query_compare(const char *primtype, uint32_t num_indices)
+{
+ unsigned saved_enable_mask = enable_mask;
+ const char *saved_render_mode = render_mode;
+
+ /* in 'query-compare' mode, we want to see if the register is writtten
+ * or changed in any mode:
+ *
+ * (NOTE: this could cause false-positive for 'query-delta' if the reg
+ * is written with different values in binning vs sysmem/gmem mode, as
+ * we don't track previous values per-mode, but I think we can live with
+ * that)
+ */
+ enable_mask = MODE_ALL;
+
+ clear_rewritten();
+ load_all_groups(0);
+
+ if (!skip_query()) {
+ /* dump binning pass values: */
+ enable_mask = MODE_BINNING;
+ render_mode = "BINNING";
+ clear_rewritten();
+ load_all_groups(0);
+ __do_query(primtype, num_indices);
+
+ /* dump draw pass values: */
+ enable_mask = MODE_GMEM | MODE_BYPASS;
+ render_mode = "DRAW";
+ clear_rewritten();
+ load_all_groups(0);
+ __do_query(primtype, num_indices);
+
+ printf("\n");
+ }
+
+ enable_mask = saved_enable_mask;
+ render_mode = saved_render_mode;
+
+ disable_all_groups();
+}
+
+/* well, actually query and script..
+ * NOTE: call this before dump_register_summary()
+ */
+static void
+do_query(const char *primtype, uint32_t num_indices)
+{
+ if (script_draw)
+ script_draw(primtype, num_indices);
+
+ if (options->query_compare) {
+ do_query_compare(primtype, num_indices);
+ return;
+ }
+
+ if (skip_query())
+ return;
+
+ __do_query(primtype, num_indices);
+}
+
+static void
+cp_im_loadi(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ uint32_t start = dwords[1] >> 16;
+ uint32_t size = dwords[1] & 0xffff;
+ const char *type = NULL, *ext = NULL;
+ enum shader_t disasm_type;
+
+ switch (dwords[0]) {
+ case 0:
+ type = "vertex";
+ ext = "vo";
+ disasm_type = SHADER_VERTEX;
+ break;
+ case 1:
+ type = "fragment";
+ ext = "fo";
+ disasm_type = SHADER_FRAGMENT;
+ break;
+ default:
+ type = "<unknown>";
+ disasm_type = 0;
+ break;
+ }
+
+ printf("%s%s shader, start=%04x, size=%04x\n", levels[level], type, start, size);
+ disasm_a2xx(dwords + 2, sizedwords - 2, level+2, disasm_type);
+
+ /* dump raw shader: */
+ if (ext)
+ dump_shader(ext, dwords + 2, (sizedwords - 2) * 4);
+}
+
+static void
+cp_wide_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ uint32_t reg = dwords[0] & 0xffff;
+ int i;
+ for (i = 1; i < sizedwords; i++) {
+ dump_register(reg, dwords[i], level+1);
+ reg_set(reg, dwords[i]);
+ reg++;
+ }
+}
+
+enum state_t {
+ TEX_SAMP = 1,
+ TEX_CONST,
+ TEX_MIPADDR, /* a3xx only */
+ SHADER_PROG,
+ SHADER_CONST,
+
+ // image/ssbo state:
+ SSBO_0,
+ SSBO_1,
+ SSBO_2,
+
+ UBO,
+
+ // unknown things, just to hexdumps:
+ UNKNOWN_DWORDS,
+ UNKNOWN_2DWORDS,
+ UNKNOWN_4DWORDS,
+};
+
+enum adreno_state_block {
+ SB_VERT_TEX = 0,
+ SB_VERT_MIPADDR = 1,
+ SB_FRAG_TEX = 2,
+ SB_FRAG_MIPADDR = 3,
+ SB_VERT_SHADER = 4,
+ SB_GEOM_SHADER = 5,
+ SB_FRAG_SHADER = 6,
+ SB_COMPUTE_SHADER = 7,
+};
+
+/* TODO there is probably a clever way to let rnndec parse things so
+ * we don't have to care about packet format differences across gens
+ */
+
+static void
+a3xx_get_state_type(uint32_t *dwords, enum shader_t *stage, enum state_t *state,
+ enum state_src_t *src)
+{
+ unsigned state_block_id = (dwords[0] >> 19) & 0x7;
+ unsigned state_type = dwords[1] & 0x3;
+ static const struct {
+ enum shader_t stage;
+ enum state_t state;
+ } lookup[0xf][0x3] = {
+ [SB_VERT_TEX][0] = { SHADER_VERTEX, TEX_SAMP },
+ [SB_VERT_TEX][1] = { SHADER_VERTEX, TEX_CONST },
+ [SB_FRAG_TEX][0] = { SHADER_FRAGMENT, TEX_SAMP },
+ [SB_FRAG_TEX][1] = { SHADER_FRAGMENT, TEX_CONST },
+ [SB_VERT_SHADER][0] = { SHADER_VERTEX, SHADER_PROG },
+ [SB_VERT_SHADER][1] = { SHADER_VERTEX, SHADER_CONST },
+ [SB_FRAG_SHADER][0] = { SHADER_FRAGMENT, SHADER_PROG },
+ [SB_FRAG_SHADER][1] = { SHADER_FRAGMENT, SHADER_CONST },
+ };
+
+ *stage = lookup[state_block_id][state_type].stage;
+ *state = lookup[state_block_id][state_type].state;
+ unsigned state_src = (dwords[0] >> 16) & 0x7;
+ if (state_src == 0 /* SS_DIRECT */)
+ *src = STATE_SRC_DIRECT;
+ else
+ *src = STATE_SRC_INDIRECT;
+}
+
+static enum state_src_t
+_get_state_src(unsigned dword0)
+{
+ switch ((dword0 >> 16) & 0x3) {
+ case 0: /* SS4_DIRECT / SS6_DIRECT */
+ return STATE_SRC_DIRECT;
+ case 2: /* SS4_INDIRECT / SS6_INDIRECT */
+ return STATE_SRC_INDIRECT;
+ case 1: /* SS6_BINDLESS */
+ return STATE_SRC_BINDLESS;
+ default:
+ return STATE_SRC_DIRECT;
+ }
+}
+
+static void
+_get_state_type(unsigned state_block_id, unsigned state_type,
+ enum shader_t *stage, enum state_t *state)
+{
+ static const struct {
+ enum shader_t stage;
+ enum state_t state;
+ } lookup[0x10][0x4] = {
+ // SB4_VS_TEX:
+ [0x0][0] = { SHADER_VERTEX, TEX_SAMP },
+ [0x0][1] = { SHADER_VERTEX, TEX_CONST },
+ [0x0][2] = { SHADER_VERTEX, UBO },
+ // SB4_HS_TEX:
+ [0x1][0] = { SHADER_TCS, TEX_SAMP },
+ [0x1][1] = { SHADER_TCS, TEX_CONST },
+ [0x1][2] = { SHADER_TCS, UBO },
+ // SB4_DS_TEX:
+ [0x2][0] = { SHADER_TES, TEX_SAMP },
+ [0x2][1] = { SHADER_TES, TEX_CONST },
+ [0x2][2] = { SHADER_TES, UBO },
+ // SB4_GS_TEX:
+ [0x3][0] = { SHADER_GEOM, TEX_SAMP },
+ [0x3][1] = { SHADER_GEOM, TEX_CONST },
+ [0x3][2] = { SHADER_GEOM, UBO },
+ // SB4_FS_TEX:
+ [0x4][0] = { SHADER_FRAGMENT, TEX_SAMP },
+ [0x4][1] = { SHADER_FRAGMENT, TEX_CONST },
+ [0x4][2] = { SHADER_FRAGMENT, UBO },
+ // SB4_CS_TEX:
+ [0x5][0] = { SHADER_COMPUTE, TEX_SAMP },
+ [0x5][1] = { SHADER_COMPUTE, TEX_CONST },
+ [0x5][2] = { SHADER_COMPUTE, UBO },
+ // SB4_VS_SHADER:
+ [0x8][0] = { SHADER_VERTEX, SHADER_PROG },
+ [0x8][1] = { SHADER_VERTEX, SHADER_CONST },
+ [0x8][2] = { SHADER_VERTEX, UBO },
+ // SB4_HS_SHADER
+ [0x9][0] = { SHADER_TCS, SHADER_PROG },
+ [0x9][1] = { SHADER_TCS, SHADER_CONST },
+ [0x9][2] = { SHADER_TCS, UBO },
+ // SB4_DS_SHADER
+ [0xa][0] = { SHADER_TES, SHADER_PROG },
+ [0xa][1] = { SHADER_TES, SHADER_CONST },
+ [0xa][2] = { SHADER_TES, UBO },
+ // SB4_GS_SHADER
+ [0xb][0] = { SHADER_GEOM, SHADER_PROG },
+ [0xb][1] = { SHADER_GEOM, SHADER_CONST },
+ [0xb][2] = { SHADER_GEOM, UBO },
+ // SB4_FS_SHADER:
+ [0xc][0] = { SHADER_FRAGMENT, SHADER_PROG },
+ [0xc][1] = { SHADER_FRAGMENT, SHADER_CONST },
+ [0xc][2] = { SHADER_FRAGMENT, UBO },
+ // SB4_CS_SHADER:
+ [0xd][0] = { SHADER_COMPUTE, SHADER_PROG },
+ [0xd][1] = { SHADER_COMPUTE, SHADER_CONST },
+ [0xd][2] = { SHADER_COMPUTE, UBO },
+ [0xd][3] = { SHADER_COMPUTE, SSBO_0 }, /* a6xx location */
+ // SB4_SSBO (shared across all stages)
+ [0xe][0] = { 0, SSBO_0 }, /* a5xx (and a4xx?) location */
+ [0xe][1] = { 0, SSBO_1 },
+ [0xe][2] = { 0, SSBO_2 },
+ // SB4_CS_SSBO
+ [0xf][0] = { SHADER_COMPUTE, SSBO_0 },
+ [0xf][1] = { SHADER_COMPUTE, SSBO_1 },
+ [0xf][2] = { SHADER_COMPUTE, SSBO_2 },
+ // unknown things
+ /* This looks like combined UBO state for 3d stages (a5xx and
+ * before?? I think a6xx has UBO state per shader stage:
+ */
+ [0x6][2] = { 0, UBO },
+ [0x7][1] = { 0, UNKNOWN_2DWORDS },
+ };
+
+ *stage = lookup[state_block_id][state_type].stage;
+ *state = lookup[state_block_id][state_type].state;
+}
+
+static void
+a4xx_get_state_type(uint32_t *dwords, enum shader_t *stage, enum state_t *state,
+ enum state_src_t *src)
+{
+ unsigned state_block_id = (dwords[0] >> 18) & 0xf;
+ unsigned state_type = dwords[1] & 0x3;
+ _get_state_type(state_block_id, state_type, stage, state);
+ *src = _get_state_src(dwords[0]);
+}
+
+static void
+a6xx_get_state_type(uint32_t *dwords, enum shader_t *stage, enum state_t *state,
+ enum state_src_t *src)
+{
+ unsigned state_block_id = (dwords[0] >> 18) & 0xf;
+ unsigned state_type = (dwords[0] >> 14) & 0x3;
+ _get_state_type(state_block_id, state_type, stage, state);
+ *src = _get_state_src(dwords[0]);
+}
+
+static void
+dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, int level)
+{
+ for (int i = 0; i < num_unit; i++) {
+ /* work-around to reduce noise for opencl blob which always
+ * writes the max # regardless of # of textures used
+ */
+ if ((num_unit == 16) && (texsamp[0] == 0) && (texsamp[1] == 0))
+ break;
+
+ if ((300 <= options->gpu_id) && (options->gpu_id < 400)) {
+ dump_domain(texsamp, 2, level+2, "A3XX_TEX_SAMP");
+ dump_hex(texsamp, 2, level+1);
+ texsamp += 2;
+ } else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) {
+ dump_domain(texsamp, 2, level+2, "A4XX_TEX_SAMP");
+ dump_hex(texsamp, 2, level+1);
+ texsamp += 2;
+ } else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
+ dump_domain(texsamp, 4, level+2, "A5XX_TEX_SAMP");
+ dump_hex(texsamp, 4, level+1);
+ texsamp += 4;
+ } else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) {
+ dump_domain(texsamp, 4, level+2, "A6XX_TEX_SAMP");
+ dump_hex(texsamp, 4, level+1);
+ texsamp += src == STATE_SRC_BINDLESS ? 16 : 4;
+ }
+ }
+}
+
+static void
+dump_tex_const(uint32_t *texconst, int num_unit, int level)
+{
+ for (int i = 0; i < num_unit; i++) {
+ /* work-around to reduce noise for opencl blob which always
+ * writes the max # regardless of # of textures used
+ */
+ if ((num_unit == 16) &&
+ (texconst[0] == 0) && (texconst[1] == 0) &&
+ (texconst[2] == 0) && (texconst[3] == 0))
+ break;
+
+ if ((300 <= options->gpu_id) && (options->gpu_id < 400)) {
+ dump_domain(texconst, 4, level+2, "A3XX_TEX_CONST");
+ dump_hex(texconst, 4, level+1);
+ texconst += 4;
+ } else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) {
+ dump_domain(texconst, 8, level+2, "A4XX_TEX_CONST");
+ if (options->dump_textures) {
+ uint32_t addr = texconst[4] & ~0x1f;
+ dump_gpuaddr(addr, level-2);
+ }
+ dump_hex(texconst, 8, level+1);
+ texconst += 8;
+ } else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
+ dump_domain(texconst, 12, level+2, "A5XX_TEX_CONST");
+ if (options->dump_textures) {
+ uint64_t addr = (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
+ dump_gpuaddr_size(addr, level-2, hostlen(addr) / 4, 3);
+ }
+ dump_hex(texconst, 12, level+1);
+ texconst += 12;
+ } else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) {
+ dump_domain(texconst, 16, level+2, "A6XX_TEX_CONST");
+ if (options->dump_textures) {
+ uint64_t addr = (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
+ dump_gpuaddr_size(addr, level-2, hostlen(addr) / 4, 3);
+ }
+ dump_hex(texconst, 16, level+1);
+ texconst += 16;
+ }
+ }
+}
+
+static void
+cp_load_state(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ enum shader_t stage;
+ enum state_t state;
+ enum state_src_t src;
+ uint32_t num_unit = (dwords[0] >> 22) & 0x1ff;
+ uint64_t ext_src_addr;
+ void *contents;
+ int i;
+
+ if (quiet(2) && !options->script)
+ return;
+
+ if (options->gpu_id >= 600)
+ a6xx_get_state_type(dwords, &stage, &state, &src);
+ else if (options->gpu_id >= 400)
+ a4xx_get_state_type(dwords, &stage, &state, &src);
+ else
+ a3xx_get_state_type(dwords, &stage, &state, &src);
+
+ switch (src) {
+ case STATE_SRC_DIRECT: ext_src_addr = 0; break;
+ case STATE_SRC_INDIRECT:
+ if (is_64b()) {
+ ext_src_addr = dwords[1] & 0xfffffffc;
+ ext_src_addr |= ((uint64_t)dwords[2]) << 32;
+ } else {
+ ext_src_addr = dwords[1] & 0xfffffffc;
+ }
+
+ break;
+ case STATE_SRC_BINDLESS: {
+ const unsigned base_reg =
+ stage == SHADER_COMPUTE ? regbase("HLSQ_CS_BINDLESS_BASE[0]") : regbase("HLSQ_BINDLESS_BASE[0]");
+
+ if (is_64b()) {
+ const unsigned reg = base_reg + (dwords[1] >> 28) * 2;
+ ext_src_addr = reg_val(reg) & 0xfffffffc;
+ ext_src_addr |= ((uint64_t)reg_val(reg + 1)) << 32;
+ } else {
+ const unsigned reg = base_reg + (dwords[1] >> 28);
+ ext_src_addr = reg_val(reg) & 0xfffffffc;
+ }
+
+ ext_src_addr += 4 * (dwords[1] & 0xffffff);
+ break;
+ }
+ }
+
+ if (ext_src_addr)
+ contents = hostptr(ext_src_addr);
+ else
+ contents = is_64b() ? dwords + 3 : dwords + 2;
+
+ if (!contents)
+ return;
+
+ switch (state) {
+ case SHADER_PROG: {
+ const char *ext = NULL;
+
+ if (quiet(2))
+ return;
+
+ if (options->gpu_id >= 400)
+ num_unit *= 16;
+ else if (options->gpu_id >= 300)
+ num_unit *= 4;
+
+ /* shaders:
+ *
+ * note: num_unit seems to be # of instruction groups, where
+ * an instruction group has 4 64bit instructions.
+ */
+ if (stage == SHADER_VERTEX) {
+ ext = "vo3";
+ } else if (stage == SHADER_GEOM) {
+ ext = "go3";
+ } else if (stage == SHADER_COMPUTE) {
+ ext = "co3";
+ } else if (stage == SHADER_FRAGMENT){
+ ext = "fo3";
+ }
+
+ if (contents)
+ disasm_a3xx(contents, num_unit * 2, level+2, stdout, options->gpu_id);
+
+ /* dump raw shader: */
+ if (ext)
+ dump_shader(ext, contents, num_unit * 2 * 4);
+
+ break;
+ }
+ case SHADER_CONST: {
+ if (quiet(2))
+ return;
+
+ /* uniforms/consts:
+ *
+ * note: num_unit seems to be # of pairs of dwords??
+ */
+
+ if (options->gpu_id >= 400)
+ num_unit *= 2;
+
+ dump_float(contents, num_unit*2, level+1);
+ dump_hex(contents, num_unit*2, level+1);
+
+ break;
+ }
+ case TEX_MIPADDR: {
+ uint32_t *addrs = contents;
+
+ if (quiet(2))
+ return;
+
+ /* mipmap consts block just appears to be array of num_unit gpu addr's: */
+ for (i = 0; i < num_unit; i++) {
+ void *ptr = hostptr(addrs[i]);
+ printf("%s%2d: %08x\n", levels[level+1], i, addrs[i]);
+ if (options->dump_textures) {
+ printf("base=%08x\n", (uint32_t)gpubaseaddr(addrs[i]));
+ dump_hex(ptr, hostlen(addrs[i])/4, level+1);
+ }
+ }
+ break;
+ }
+ case TEX_SAMP: {
+ dump_tex_samp(contents, src, num_unit, level);
+ break;
+ }
+ case TEX_CONST: {
+ dump_tex_const(contents, num_unit, level);
+ break;
+ }
+ case SSBO_0: {
+ uint32_t *ssboconst = (uint32_t *)contents;
+
+ for (i = 0; i < num_unit; i++) {
+ int sz = 4;
+ if (400 <= options->gpu_id && options->gpu_id < 500) {
+ dump_domain(ssboconst, 4, level+2, "A4XX_SSBO_0");
+ } else if (500 <= options->gpu_id && options->gpu_id < 600) {
+ dump_domain(ssboconst, 4, level+2, "A5XX_SSBO_0");
+ } else if (600 <= options->gpu_id && options->gpu_id < 700) {
+ sz = 16;
+ dump_domain(ssboconst, 16, level+2, "A6XX_IBO");
+ }
+ dump_hex(ssboconst, sz, level+1);
+ ssboconst += sz;
+ }
+ break;
+ }
+ case SSBO_1: {
+ uint32_t *ssboconst = (uint32_t *)contents;
+
+ for (i = 0; i < num_unit; i++) {
+ if (400 <= options->gpu_id && options->gpu_id < 500)
+ dump_domain(ssboconst, 2, level+2, "A4XX_SSBO_1");
+ else if (500 <= options->gpu_id && options->gpu_id < 600)
+ dump_domain(ssboconst, 2, level+2, "A5XX_SSBO_1");
+ dump_hex(ssboconst, 2, level+1);
+ ssboconst += 2;
+ }
+ break;
+ }
+ case SSBO_2: {
+ uint32_t *ssboconst = (uint32_t *)contents;
+
+ for (i = 0; i < num_unit; i++) {
+ /* TODO a4xx and a5xx might be same: */
+ if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
+ dump_domain(ssboconst, 2, level+2, "A5XX_SSBO_2");
+ dump_hex(ssboconst, 2, level+1);
+ }
+ if (options->dump_textures) {
+ uint64_t addr = (((uint64_t)ssboconst[1] & 0x1ffff) << 32) | ssboconst[0];
+ dump_gpuaddr_size(addr, level-2, hostlen(addr) / 4, 3);
+ }
+ ssboconst += 2;
+ }
+ break;
+ }
+ case UBO: {
+ uint32_t *uboconst = (uint32_t *)contents;
+
+ for (i = 0; i < num_unit; i++) {
+ // TODO probably similar on a4xx..
+ if (500 <= options->gpu_id && options->gpu_id < 600)
+ dump_domain(uboconst, 2, level+2, "A5XX_UBO");
+ else if (600 <= options->gpu_id && options->gpu_id < 700)
+ dump_domain(uboconst, 2, level+2, "A6XX_UBO");
+ dump_hex(uboconst, 2, level+1);
+ uboconst += src == STATE_SRC_BINDLESS ? 16 : 2;
+ }
+ break;
+ }
+ case UNKNOWN_DWORDS: {
+ if (quiet(2))
+ return;
+ dump_hex(contents, num_unit, level+1);
+ break;
+ }
+ case UNKNOWN_2DWORDS: {
+ if (quiet(2))
+ return;
+ dump_hex(contents, num_unit * 2, level+1);
+ break;
+ }
+ case UNKNOWN_4DWORDS: {
+ if (quiet(2))
+ return;
+ dump_hex(contents, num_unit * 4, level+1);
+ break;
+ }
+ default:
+ if (quiet(2))
+ return;
+ /* hmm.. */
+ dump_hex(contents, num_unit, level+1);
+ break;
+ }
+}
+
+static void
+cp_set_bin(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ bin_x1 = dwords[1] & 0xffff;
+ bin_y1 = dwords[1] >> 16;
+ bin_x2 = dwords[2] & 0xffff;
+ bin_y2 = dwords[2] >> 16;
+}
+
+static void
+dump_a2xx_tex_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val, int level)
+{
+ uint32_t w, h, p;
+ uint32_t gpuaddr, flags, mip_gpuaddr, mip_flags;
+ uint32_t min, mag, swiz, clamp_x, clamp_y, clamp_z;
+ static const char *filter[] = {
+ "point", "bilinear", "bicubic",
+ };
+ static const char *clamp[] = {
+ "wrap", "mirror", "clamp-last-texel",
+ };
+ static const char swiznames[] = "xyzw01??";
+
+ /* see sys2gmem_tex_const[] in adreno_a2xxx.c */
+
+ /* Texture, FormatXYZW=Unsigned, ClampXYZ=Wrap/Repeat,
+ * RFMode=ZeroClamp-1, Dim=1:2d, pitch
+ */
+ p = (dwords[0] >> 22) << 5;
+ clamp_x = (dwords[0] >> 10) & 0x3;
+ clamp_y = (dwords[0] >> 13) & 0x3;
+ clamp_z = (dwords[0] >> 16) & 0x3;
+
+ /* Format=6:8888_WZYX, EndianSwap=0:None, ReqSize=0:256bit, DimHi=0,
+ * NearestClamp=1:OGL Mode
+ */
+ parse_dword_addr(dwords[1], &gpuaddr, &flags, 0xfff);
+
+ /* Width, Height, EndianSwap=0:None */
+ w = (dwords[2] & 0x1fff) + 1;
+ h = ((dwords[2] >> 13) & 0x1fff) + 1;
+
+ /* NumFormat=0:RF, DstSelXYZW=XYZW, ExpAdj=0, MagFilt=MinFilt=0:Point,
+ * Mip=2:BaseMap
+ */
+ mag = (dwords[3] >> 19) & 0x3;
+ min = (dwords[3] >> 21) & 0x3;
+ swiz = (dwords[3] >> 1) & 0xfff;
+
+ /* VolMag=VolMin=0:Point, MinMipLvl=0, MaxMipLvl=1, LodBiasH=V=0,
+ * Dim3d=0
+ */
+ // XXX
+
+ /* BorderColor=0:ABGRBlack, ForceBC=0:diable, TriJuice=0, Aniso=0,
+ * Dim=1:2d, MipPacking=0
+ */
+ parse_dword_addr(dwords[5], &mip_gpuaddr, &mip_flags, 0xfff);
+
+ printf("%sset texture const %04x\n", levels[level], val);
+ printf("%sclamp x/y/z: %s/%s/%s\n", levels[level+1],
+ clamp[clamp_x], clamp[clamp_y], clamp[clamp_z]);
+ printf("%sfilter min/mag: %s/%s\n", levels[level+1], filter[min], filter[mag]);
+ printf("%sswizzle: %c%c%c%c\n", levels[level+1],
+ swiznames[(swiz >> 0) & 0x7], swiznames[(swiz >> 3) & 0x7],
+ swiznames[(swiz >> 6) & 0x7], swiznames[(swiz >> 9) & 0x7]);
+ printf("%saddr=%08x (flags=%03x), size=%dx%d, pitch=%d, format=%s\n",
+ levels[level+1], gpuaddr, flags, w, h, p,
+ rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf));
+ printf("%smipaddr=%08x (flags=%03x)\n", levels[level+1],
+ mip_gpuaddr, mip_flags);
+}
+
+static void
+dump_a2xx_shader_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val, int level)
+{
+ int i;
+ printf("%sset shader const %04x\n", levels[level], val);
+ for (i = 0; i < sizedwords; ) {
+ uint32_t gpuaddr, flags;
+ parse_dword_addr(dwords[i++], &gpuaddr, &flags, 0xf);
+ void *addr = hostptr(gpuaddr);
+ if (addr) {
+ const char * fmt =
+ rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf);
+ uint32_t size = dwords[i++];
+ printf("%saddr=%08x, size=%d, format=%s\n", levels[level+1],
+ gpuaddr, size, fmt);
+ // TODO maybe dump these as bytes instead of dwords?
+ size = (size + 3) / 4; // for now convert to dwords
+ dump_hex(addr, min(size, 64), level + 1);
+ if (size > min(size, 64))
+ printf("%s\t\t...\n", levels[level+1]);
+ dump_float(addr, min(size, 64), level + 1);
+ if (size > min(size, 64))
+ printf("%s\t\t...\n", levels[level+1]);
+ }
+ }
+}
+
+static void
+cp_set_const(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ uint32_t val = dwords[0] & 0xffff;
+ switch((dwords[0] >> 16) & 0xf) {
+ case 0x0:
+ dump_float((float *)(dwords+1), sizedwords-1, level+1);
+ break;
+ case 0x1:
+ /* need to figure out how const space is partitioned between
+ * attributes, textures, etc..
+ */
+ if (val < 0x78) {
+ dump_a2xx_tex_const(dwords+1, sizedwords-1, val, level);
+ } else {
+ dump_a2xx_shader_const(dwords+1, sizedwords-1, val, level);
+ }
+ break;
+ case 0x2:
+ printf("%sset bool const %04x\n", levels[level], val);
+ break;
+ case 0x3:
+ printf("%sset loop const %04x\n", levels[level], val);
+ break;
+ case 0x4:
+ val += 0x2000;
+ if (dwords[0] & 0x80000000) {
+ uint32_t srcreg = dwords[1];
+ uint32_t dstval = dwords[2];
+
+ /* TODO: not sure what happens w/ payload != 2.. */
+ assert(sizedwords == 3);
+ assert(srcreg < ARRAY_SIZE(type0_reg_vals));
+
+ /* note: rnn_regname uses a static buf so we can't do
+ * two regname() calls for one printf..
+ */
+ printf("%s%s = %08x + ", levels[level], regname(val, 1), dstval);
+ printf("%s (%08x)\n", regname(srcreg, 1), type0_reg_vals[srcreg]);
+
+ dstval += type0_reg_vals[srcreg];
+
+ dump_registers(val, &dstval, 1, level+1);
+ } else {
+ dump_registers(val, dwords+1, sizedwords-1, level+1);
+ }
+ break;
+ }
+}
+
+static void dump_register_summary(int level);
+
+static void
+cp_event_write(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ const char *name = rnn_enumname(rnn, "vgt_event_type", dwords[0]);
+ printl(2, "%sevent %s\n", levels[level], name);
+
+ if (name && (options->gpu_id > 500)) {
+ char eventname[64];
+ snprintf(eventname, sizeof(eventname), "EVENT:%s", name);
+ if (!strcmp(name, "BLIT")) {
+ do_query(eventname, 0);
+ print_mode(level);
+ dump_register_summary(level);
+ }
+ }
+}
+
+static void
+dump_register_summary(int level)
+{
+ uint32_t i;
+ bool saved_summary = summary;
+ summary = false;
+
+ in_summary = true;
+
+ /* dump current state of registers: */
+ printl(2, "%sdraw[%i] register values\n", levels[level], draw_count);
+ for (i = 0; i < regcnt(); i++) {
+ uint32_t regbase = i;
+ uint32_t lastval = reg_val(regbase);
+ /* skip registers that haven't been updated since last draw/blit: */
+ if (!(options->allregs || reg_rewritten(regbase)))
+ continue;
+ if (!reg_written(regbase))
+ continue;
+ if (lastval != lastvals[regbase]) {
+ printl(2, "!");
+ lastvals[regbase] = lastval;
+ } else {
+ printl(2, " ");
+ }
+ if (reg_rewritten(regbase)) {
+ printl(2, "+");
+ } else {
+ printl(2, " ");
+ }
+ printl(2, "\t%08x", lastval);
+ if (!quiet(2)) {
+ dump_register(regbase, lastval, level);
+ }
+ }
+
+ clear_rewritten();
+
+ in_summary = false;
+
+ draw_count++;
+ summary = saved_summary;
+}
+
+static uint32_t
+draw_indx_common(uint32_t *dwords, int level)
+{
+ uint32_t prim_type = dwords[1] & 0x1f;
+ uint32_t source_select = (dwords[1] >> 6) & 0x3;
+ uint32_t num_indices = dwords[2];
+ const char *primtype;
+
+ primtype = rnn_enumname(rnn, "pc_di_primtype", prim_type);
+
+ do_query(primtype, num_indices);
+
+ printl(2, "%sdraw: %d\n", levels[level], draws[ib]);
+ printl(2, "%sprim_type: %s (%d)\n", levels[level], primtype,
+ prim_type);
+ printl(2, "%ssource_select: %s (%d)\n", levels[level],
+ rnn_enumname(rnn, "pc_di_src_sel", source_select),
+ source_select);
+ printl(2, "%snum_indices: %d\n", levels[level], num_indices);
+
+ vertices += num_indices;
+
+ draws[ib]++;
+
+ return num_indices;
+}
+
+enum pc_di_index_size {
+ INDEX_SIZE_IGN = 0,
+ INDEX_SIZE_16_BIT = 0,
+ INDEX_SIZE_32_BIT = 1,
+ INDEX_SIZE_8_BIT = 2,
+ INDEX_SIZE_INVALID = 0,
+};
+
+static void
+cp_draw_indx(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ uint32_t num_indices = draw_indx_common(dwords, level);
+
+ assert(!is_64b());
+
+ /* if we have an index buffer, dump that: */
+ if (sizedwords == 5) {
+ void *ptr = hostptr(dwords[3]);
+ printl(2, "%sgpuaddr: %08x\n", levels[level], dwords[3]);
+ printl(2, "%sidx_size: %d\n", levels[level], dwords[4]);
+ if (ptr) {
+ enum pc_di_index_size size =
+ ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
+ if (!quiet(2)) {
+ int i;
+ printf("%sidxs: ", levels[level]);
+ if (size == INDEX_SIZE_8_BIT) {
+ uint8_t *idx = ptr;
+ for (i = 0; i < dwords[4]; i++)
+ printf(" %u", idx[i]);
+ } else if (size == INDEX_SIZE_16_BIT) {
+ uint16_t *idx = ptr;
+ for (i = 0; i < dwords[4]/2; i++)
+ printf(" %u", idx[i]);
+ } else if (size == INDEX_SIZE_32_BIT) {
+ uint32_t *idx = ptr;
+ for (i = 0; i < dwords[4]/4; i++)
+ printf(" %u", idx[i]);
+ }
+ printf("\n");
+ dump_hex(ptr, dwords[4]/4, level+1);
+ }
+ }
+ }
+
+ /* don't bother dumping registers for the dummy draw_indx's.. */
+ if (num_indices > 0)
+ dump_register_summary(level);
+
+ needs_wfi = true;
+}
+
+static void
+cp_draw_indx_2(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ uint32_t num_indices = draw_indx_common(dwords, level);
+ enum pc_di_index_size size =
+ ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
+ void *ptr = &dwords[3];
+ int sz = 0;
+
+ assert(!is_64b());
+
+ /* CP_DRAW_INDX_2 has embedded/inline idx buffer: */
+ if (!quiet(2)) {
+ int i;
+ printf("%sidxs: ", levels[level]);
+ if (size == INDEX_SIZE_8_BIT) {
+ uint8_t *idx = ptr;
+ for (i = 0; i < num_indices; i++)
+ printf(" %u", idx[i]);
+ sz = num_indices;
+ } else if (size == INDEX_SIZE_16_BIT) {
+ uint16_t *idx = ptr;
+ for (i = 0; i < num_indices; i++)
+ printf(" %u", idx[i]);
+ sz = num_indices * 2;
+ } else if (size == INDEX_SIZE_32_BIT) {
+ uint32_t *idx = ptr;
+ for (i = 0; i < num_indices; i++)
+ printf(" %u", idx[i]);
+ sz = num_indices * 4;
+ }
+ printf("\n");
+ dump_hex(ptr, sz / 4, level+1);
+ }
+
+ /* don't bother dumping registers for the dummy draw_indx's.. */
+ if (num_indices > 0)
+ dump_register_summary(level);
+}
+
+static void
+cp_draw_indx_offset(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ uint32_t num_indices = dwords[2];
+ uint32_t prim_type = dwords[0] & 0x1f;
+
+ do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), num_indices);
+ print_mode(level);
+
+ /* don't bother dumping registers for the dummy draw_indx's.. */
+ if (num_indices > 0)
+ dump_register_summary(level);
+}
+
+static void
+cp_draw_indx_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ uint32_t prim_type = dwords[0] & 0x1f;
+ uint64_t addr;
+
+ do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
+ print_mode(level);
+
+ if (is_64b())
+ addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
+ else
+ addr = dwords[1];
+ dump_gpuaddr_size(addr, level, 0x10, 2);
+
+ if (is_64b())
+ addr = (((uint64_t)dwords[5] & 0x1ffff) << 32) | dwords[4];
+ else
+ addr = dwords[3];
+ dump_gpuaddr_size(addr, level, 0x10, 2);
+
+ dump_register_summary(level);
+}
+
+static void
+cp_draw_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ uint32_t prim_type = dwords[0] & 0x1f;
+ uint64_t addr;
+
+ do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
+ print_mode(level);
+
+ addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
+ dump_gpuaddr_size(addr, level, 0x10, 2);
+
+ dump_register_summary(level);
+}
+
+static void
+cp_run_cl(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ do_query("COMPUTE", 1);
+ dump_register_summary(level);
+}
+
+static void
+cp_nop(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ const char *buf = (void *)dwords;
+ int i;
+
+ if (quiet(3))
+ return;
+
+ // blob doesn't use CP_NOP for string_marker but it does
+ // use it for things that end up looking like, but aren't
+ // ascii chars:
+ if (!options->decode_markers)
+ return;
+
+ for (i = 0; i < 4 * sizedwords; i++) {
+ if (buf[i] == '\0')
+ break;
+ if (isascii(buf[i]))
+ printf("%c", buf[i]);
+ }
+ printf("\n");
+}
+
+static void
+cp_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ /* traverse indirect buffers */
+ uint64_t ibaddr;
+ uint32_t ibsize;
+ uint32_t *ptr = NULL;
+
+ if (is_64b()) {
+ /* a5xx+.. high 32b of gpu addr, then size: */
+ ibaddr = dwords[0];
+ ibaddr |= ((uint64_t)dwords[1]) << 32;
+ ibsize = dwords[2];
+ } else {
+ ibaddr = dwords[0];
+ ibsize = dwords[1];
+ }
+
+ if (!quiet(3)) {
+ if (is_64b()) {
+ printf("%sibaddr:%016lx\n", levels[level], ibaddr);
+ } else {
+ printf("%sibaddr:%08x\n", levels[level], (uint32_t)ibaddr);
+ }
+ printf("%sibsize:%08x\n", levels[level], ibsize);
+ }
+
+ if (options->once && has_dumped(ibaddr, enable_mask))
+ return;
+
+ /* 'query-compare' mode implies 'once' mode, although we need only to
+ * process the cmdstream for *any* enable_mask mode, since we are
+ * comparing binning vs draw reg values at the same time, ie. it is
+ * not useful to process the same draw in both binning and draw pass.
+ */
+ if (options->query_compare && has_dumped(ibaddr, MODE_ALL))
+ return;
+
+ /* map gpuaddr back to hostptr: */
+ ptr = hostptr(ibaddr);
+
+ if (ptr) {
+ /* If the GPU hung within the target IB, the trigger point will be
+ * just after the current CP_INDIRECT_BUFFER. Because the IB is
+ * executed but never returns. Account for this by checking if
+ * the IB returned:
+ */
+ highlight_gpuaddr(gpuaddr(&dwords[is_64b() ? 3 : 2]));
+
+ ib++;
+ ibs[ib].base = ibaddr;
+ ibs[ib].size = ibsize;
+
+ dump_commands(ptr, ibsize, level);
+ ib--;
+ } else {
+ fprintf(stderr, "could not find: %016"PRIx64" (%d)\n", ibaddr, ibsize);
+ }
+}
+
+static void
+cp_wfi(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ needs_wfi = false;
+}
+
+static void
+cp_mem_write(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ if (quiet(2))
+ return;
+
+ if (is_64b()) {
+ uint64_t gpuaddr = dwords[0] | (((uint64_t)dwords[1]) << 32);
+ printf("%sgpuaddr:%016lx\n", levels[level], gpuaddr);
+ dump_hex(&dwords[2], sizedwords-2, level+1);
+
+ if (pkt_is_type4(dwords[2]) || pkt_is_type7(dwords[2]))
+ dump_commands(&dwords[2], sizedwords-2, level+1);
+ } else {
+ uint32_t gpuaddr = dwords[0];
+ printf("%sgpuaddr:%08x\n", levels[level], gpuaddr);
+ dump_float((float *)&dwords[1], sizedwords-1, level+1);
+ }
+}
+
+static void
+cp_rmw(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ uint32_t val = dwords[0] & 0xffff;
+ uint32_t and = dwords[1];
+ uint32_t or = dwords[2];
+ printl(3, "%srmw (%s & 0x%08x) | 0x%08x)\n", levels[level], regname(val, 1), and, or);
+ if (needs_wfi)
+ printl(2, "NEEDS WFI: rmw (%s & 0x%08x) | 0x%08x)\n", regname(val, 1), and, or);
+ reg_set(val, (reg_val(val) & and) | or);
+}
+
+static void
+cp_reg_mem(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ uint32_t val = dwords[0] & 0xffff;
+ printl(3, "%sbase register: %s\n", levels[level], regname(val, 1));
+
+ if (quiet(2))
+ return;
+
+ uint64_t gpuaddr = dwords[1] | (((uint64_t)dwords[2]) << 32);
+ printf("%sgpuaddr:%016lx\n", levels[level], gpuaddr);
+ void *ptr = hostptr(gpuaddr);
+ if (ptr) {
+ uint32_t cnt = (dwords[0] >> 19) & 0x3ff;
+ dump_hex(ptr, cnt, level + 1);
+ }
+}
+
+struct draw_state {
+ uint16_t enable_mask;
+ uint16_t flags;
+ uint32_t count;
+ uint64_t addr;
+};
+
+struct draw_state state[32];
+
+#define FLAG_DIRTY 0x1
+#define FLAG_DISABLE 0x2
+#define FLAG_DISABLE_ALL_GROUPS 0x4
+#define FLAG_LOAD_IMMED 0x8
+
+static int draw_mode;
+
+static void
+disable_group(unsigned group_id)
+{
+ struct draw_state *ds = &state[group_id];
+ memset(ds, 0, sizeof(*ds));
+}
+
+static void
+disable_all_groups(void)
+{
+ for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
+ disable_group(i);
+}
+
+static void
+load_group(unsigned group_id, int level)
+{
+ struct draw_state *ds = &state[group_id];
+
+ if (!ds->count)
+ return;
+
+ printl(2, "%sgroup_id: %u\n", levels[level], group_id);
+ printl(2, "%scount: %d\n", levels[level], ds->count);
+ printl(2, "%saddr: %016llx\n", levels[level], ds->addr);
+ printl(2, "%sflags: %x\n", levels[level], ds->flags);
+
+ if (options->gpu_id >= 600) {
+ printl(2, "%senable_mask: 0x%x\n", levels[level], ds->enable_mask);
+
+ if (!(ds->enable_mask & enable_mask)) {
+ printl(2, "%s\tskipped!\n\n", levels[level]);
+ return;
+ }
+ }
+
+ void *ptr = hostptr(ds->addr);
+ if (ptr) {
+ if (!quiet(2))
+ dump_hex(ptr, ds->count, level+1);
+
+ ib++;
+ dump_commands(ptr, ds->count, level+1);
+ ib--;
+ }
+}
+
+static void
+load_all_groups(int level)
+{
+ /* sanity check, we should never recursively hit recursion here, and if
+ * we do bad things happen:
+ */
+ static bool loading_groups = false;
+ if (loading_groups) {
+ printf("ERROR: nothing in draw state should trigger recursively loading groups!\n");
+ return;
+ }
+ loading_groups = true;
+ for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
+ load_group(i, level);
+ loading_groups = false;
+
+ /* in 'query-compare' mode, defer disabling all groups until we have a
+ * chance to process the query:
+ */
+ if (!options->query_compare)
+ disable_all_groups();
+}
+
+static void
+cp_set_draw_state(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ uint32_t i;
+
+ for (i = 0; i < sizedwords; ) {
+ struct draw_state *ds;
+ uint32_t count = dwords[i] & 0xffff;
+ uint32_t group_id = (dwords[i] >> 24) & 0x1f;
+ uint32_t enable_mask = (dwords[i] >> 20) & 0xf;
+ uint32_t flags = (dwords[i] >> 16) & 0xf;
+ uint64_t addr;
+
+ if (is_64b()) {
+ addr = dwords[i + 1];
+ addr |= ((uint64_t)dwords[i + 2]) << 32;
+ i += 3;
+ } else {
+ addr = dwords[i + 1];
+ i += 2;
+ }
+
+ if (flags & FLAG_DISABLE_ALL_GROUPS) {
+ disable_all_groups();
+ continue;
+ }
+
+ if (flags & FLAG_DISABLE) {
+ disable_group(group_id);
+ continue;
+ }
+
+ assert(group_id < ARRAY_SIZE(state));
+ disable_group(group_id);
+
+ ds = &state[group_id];
+
+ ds->enable_mask = enable_mask;
+ ds->flags = flags;
+ ds->count = count;
+ ds->addr = addr;
+
+ if (flags & FLAG_LOAD_IMMED) {
+ load_group(group_id, level);
+ disable_group(group_id);
+ }
+ }
+}
+
+static void
+cp_set_mode(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ draw_mode = dwords[0];
+}
+
+/* execute compute shader */
+static void
+cp_exec_cs(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ do_query("compute", 0);
+ dump_register_summary(level);
+}
+
+static void
+cp_exec_cs_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ uint64_t addr;
+
+ if (is_64b()) {
+ addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
+ } else {
+ addr = dwords[1];
+ }
+
+ printl(3, "%saddr: %016llx\n", levels[level], addr);
+ dump_gpuaddr_size(addr, level, 0x10, 2);
+
+ do_query("compute", 0);
+ dump_register_summary(level);
+}
+
+static void
+cp_set_marker(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ render_mode = rnn_enumname(rnn, "a6xx_render_mode", dwords[0] & 0xf);
+
+ if (!strcmp(render_mode, "RM6_BINNING")) {
+ enable_mask = MODE_BINNING;
+ } else if (!strcmp(render_mode, "RM6_GMEM")) {
+ enable_mask = MODE_GMEM;
+ } else if (!strcmp(render_mode, "RM6_BYPASS")) {
+ enable_mask = MODE_BYPASS;
+ }
+}
+
+static void
+cp_set_render_mode(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ uint64_t addr;
+ uint32_t *ptr, len;
+
+ assert(is_64b());
+
+ /* TODO seems to have two ptrs, 9 dwords total (incl pkt7 hdr)..
+ * not sure if this can come in different sizes.
+ *
+ * First ptr doesn't seem to be cmdstream, second one does.
+ *
+ * Comment from downstream kernel:
+ *
+ * SRM -- set render mode (ex binning, direct render etc)
+ * SRM is set by UMD usually at start of IB to tell CP the type of
+ * preemption.
+ * KMD needs to set SRM to NULL to indicate CP that rendering is
+ * done by IB.
+ * ------------------------------------------------------------------
+ *
+ * Seems to always be one of these two:
+ * 70ec0008 00000001 001c0000 00000000 00000010 00000003 0000000d 001c2000 00000000
+ * 70ec0008 00000001 001c0000 00000000 00000000 00000003 0000000d 001c2000 00000000
+ *
+ */
+
+ assert(options->gpu_id >= 500);
+
+ render_mode = rnn_enumname(rnn, "render_mode_cmd", dwords[0]);
+
+ if (sizedwords == 1)
+ return;
+
+ addr = dwords[1];
+ addr |= ((uint64_t)dwords[2]) << 32;
+
+ mode = dwords[3];
+
+ dump_gpuaddr(addr, level+1);
+
+ if (sizedwords == 5)
+ return;
+
+ assert(sizedwords == 8);
+
+ len = dwords[5];
+ addr = dwords[6];
+ addr |= ((uint64_t)dwords[7]) << 32;
+
+ printl(3, "%saddr: 0x%016lx\n", levels[level], addr);
+ printl(3, "%slen: 0x%x\n", levels[level], len);
+
+ ptr = hostptr(addr);
+
+ if (ptr) {
+ if (!quiet(2)) {
+ ib++;
+ dump_commands(ptr, len, level+1);
+ ib--;
+ dump_hex(ptr, len, level+1);
+ }
+ }
+}
+
+static void
+cp_compute_checkpoint(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ uint64_t addr;
+ uint32_t *ptr, len;
+
+ assert(is_64b());
+ assert(options->gpu_id >= 500);
+
+ assert(sizedwords == 8);
+
+ addr = dwords[5];
+ addr |= ((uint64_t)dwords[6]) << 32;
+ len = dwords[7];
+
+ printl(3, "%saddr: 0x%016lx\n", levels[level], addr);
+ printl(3, "%slen: 0x%x\n", levels[level], len);
+
+ ptr = hostptr(addr);
+
+ if (ptr) {
+ if (!quiet(2)) {
+ ib++;
+ dump_commands(ptr, len, level+1);
+ ib--;
+ dump_hex(ptr, len, level+1);
+ }
+ }
+}
+
+static void
+cp_blit(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ do_query(rnn_enumname(rnn, "cp_blit_cmd", dwords[0]), 0);
+ print_mode(level);
+ dump_register_summary(level);
+}
+
+static void
+cp_context_reg_bunch(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ int i;
+
+ /* NOTE: seems to write same reg multiple times.. not sure if different parts of
+ * these are triggered by the FLUSH_SO_n events?? (if that is what they actually
+ * are?)
+ */
+ bool saved_summary = summary;
+ summary = false;
+
+ for (i = 0; i < sizedwords; i += 2) {
+ dump_register(dwords[i+0], dwords[i+1], level+1);
+ reg_set(dwords[i+0], dwords[i+1]);
+ }
+
+ summary = saved_summary;
+}
+
+static void
+cp_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ uint32_t reg = dwords[1] & 0xffff;
+
+ dump_register(reg, dwords[2], level+1);
+ reg_set(reg, dwords[2]);
+}
+
+static void
+cp_set_ctxswitch_ib(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ uint64_t addr;
+ uint32_t size = dwords[2] & 0xffff;
+ void *ptr;
+
+ addr = dwords[0] | ((uint64_t)dwords[1] << 32);
+
+ printf("addr=%lx\n", addr);
+ ptr = hostptr(addr);
+ if (ptr) {
+ dump_commands(ptr, size, level+1);
+ }
+}
+
+static void
+cp_skip_ib2_enable_global(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ skip_ib2_enable_global = dwords[0];
+}
+
+static void
+cp_skip_ib2_enable_local(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ skip_ib2_enable_local = dwords[0];
+}
+
+#define CP(x, fxn, ...) { "CP_" #x, fxn, ##__VA_ARGS__ }
+static const struct type3_op {
+ const char *name;
+ void (*fxn)(uint32_t *dwords, uint32_t sizedwords, int level);
+ struct {
+ bool load_all_groups;
+ } options;
+} type3_op[] = {
+ CP(NOP, cp_nop),
+ CP(INDIRECT_BUFFER, cp_indirect),
+ CP(INDIRECT_BUFFER_PFD, cp_indirect),
+ CP(WAIT_FOR_IDLE, cp_wfi),
+ CP(REG_RMW, cp_rmw),
+ CP(REG_TO_MEM, cp_reg_mem),
+ CP(MEM_TO_REG, cp_reg_mem), /* same layout as CP_REG_TO_MEM */
+ CP(MEM_WRITE, cp_mem_write),
+ CP(EVENT_WRITE, cp_event_write),
+ CP(RUN_OPENCL, cp_run_cl),
+ CP(DRAW_INDX, cp_draw_indx, {.load_all_groups=true}),
+ CP(DRAW_INDX_2, cp_draw_indx_2, {.load_all_groups=true}),
+ CP(SET_CONSTANT, cp_set_const),
+ CP(IM_LOAD_IMMEDIATE, cp_im_loadi),
+ CP(WIDE_REG_WRITE, cp_wide_reg_write),
+
+ /* for a3xx */
+ CP(LOAD_STATE, cp_load_state),
+ CP(SET_BIN, cp_set_bin),
+
+ /* for a4xx */
+ CP(LOAD_STATE4, cp_load_state),
+ CP(SET_DRAW_STATE, cp_set_draw_state),
+ CP(DRAW_INDX_OFFSET, cp_draw_indx_offset, {.load_all_groups=true}),
+ CP(EXEC_CS, cp_exec_cs, {.load_all_groups=true}),
+ CP(EXEC_CS_INDIRECT, cp_exec_cs_indirect, {.load_all_groups=true}),
+
+ /* for a5xx */
+ CP(SET_RENDER_MODE, cp_set_render_mode),
+ CP(COMPUTE_CHECKPOINT, cp_compute_checkpoint),
+ CP(BLIT, cp_blit),
+ CP(CONTEXT_REG_BUNCH, cp_context_reg_bunch),
+ CP(DRAW_INDIRECT, cp_draw_indirect, {.load_all_groups=true}),
+ CP(DRAW_INDX_INDIRECT, cp_draw_indx_indirect, {.load_all_groups=true}),
+ CP(SKIP_IB2_ENABLE_GLOBAL, cp_skip_ib2_enable_global),
+ CP(SKIP_IB2_ENABLE_LOCAL, cp_skip_ib2_enable_local),
+
+ /* for a6xx */
+ CP(LOAD_STATE6_GEOM, cp_load_state),
+ CP(LOAD_STATE6_FRAG, cp_load_state),
+ CP(LOAD_STATE6, cp_load_state),
+ CP(SET_MODE, cp_set_mode),
+ CP(SET_MARKER, cp_set_marker),
+ CP(REG_WRITE, cp_reg_write),
+
+ CP(SET_CTXSWITCH_IB, cp_set_ctxswitch_ib),
+};
+
+static void
+noop_fxn(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+}
+
+static const struct type3_op *
+get_type3_op(unsigned opc)
+{
+ static const struct type3_op dummy_op = {
+ .fxn = noop_fxn,
+ };
+ const char *name = pktname(opc);
+
+ if (!name)
+ return &dummy_op;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(type3_op); i++)
+ if (!strcmp(name, type3_op[i].name))
+ return &type3_op[i];
+
+ return &dummy_op;
+}
+
+void
+dump_commands(uint32_t *dwords, uint32_t sizedwords, int level)
+{
+ int dwords_left = sizedwords;
+ uint32_t count = 0; /* dword count including packet header */
+ uint32_t val;
+
+// assert(dwords);
+ if (!dwords) {
+ printf("NULL cmd buffer!\n");
+ return;
+ }
+
+ draws[ib] = 0;
+
+ while (dwords_left > 0) {
+
+ current_draw_count = draw_count;
+
+ /* hack, this looks like a -1 underflow, in some versions
+ * when it tries to write zero registers via pkt0
+ */
+// if ((dwords[0] >> 16) == 0xffff)
+// goto skip;
+
+ if (pkt_is_type0(dwords[0])) {
+ printl(3, "t0");
+ count = type0_pkt_size(dwords[0]) + 1;
+ val = type0_pkt_offset(dwords[0]);
+ assert(val < regcnt());
+ printl(3, "%swrite %s%s (%04x)\n", levels[level+1], regname(val, 1),
+ (dwords[0] & 0x8000) ? " (same register)" : "", val);
+ dump_registers(val, dwords+1, count-1, level+2);
+ if (!quiet(3))
+ dump_hex(dwords, count, level+1);
+ } else if (pkt_is_type4(dwords[0])) {
+ /* basically the same(ish) as type0 prior to a5xx */
+ printl(3, "t4");
+ count = type4_pkt_size(dwords[0]) + 1;
+ val = type4_pkt_offset(dwords[0]);
+ assert(val < regcnt());
+ printl(3, "%swrite %s (%04x)\n", levels[level+1], regname(val, 1), val);
+ dump_registers(val, dwords+1, count-1, level+2);
+ if (!quiet(3))
+ dump_hex(dwords, count, level+1);
+#if 0
+ } else if (pkt_is_type1(dwords[0])) {
+ printl(3, "t1");
+ count = 3;
+ val = dwords[0] & 0xfff;
+ printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
+ dump_registers(val, dwords+1, 1, level+2);
+ val = (dwords[0] >> 12) & 0xfff;
+ printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
+ dump_registers(val, dwords+2, 1, level+2);
+ if (!quiet(3))
+ dump_hex(dwords, count, level+1);
+ } else if (pkt_is_type2(dwords[0])) {
+ printl(3, "t2");
+ printf("%sNOP\n", levels[level+1]);
+ count = 1;
+ if (!quiet(3))
+ dump_hex(dwords, count, level+1);
+#endif
+ } else if (pkt_is_type3(dwords[0])) {
+ count = type3_pkt_size(dwords[0]) + 1;
+ val = cp_type3_opcode(dwords[0]);
+ const struct type3_op *op = get_type3_op(val);
+ if (op->options.load_all_groups)
+ load_all_groups(level+1);
+ printl(3, "t3");
+ const char *name = pktname(val);
+ if (!quiet(2)) {
+ printf("\t%sopcode: %s%s%s (%02x) (%d dwords)%s\n", levels[level],
+ rnn->vc->colors->bctarg, name, rnn->vc->colors->reset,
+ val, count, (dwords[0] & 0x1) ? " (predicated)" : "");
+ }
+ if (name)
+ dump_domain(dwords+1, count-1, level+2, name);
+ op->fxn(dwords+1, count-1, level+1);
+ if (!quiet(2))
+ dump_hex(dwords, count, level+1);
+ } else if (pkt_is_type7(dwords[0])) {
+ count = type7_pkt_size(dwords[0]) + 1;
+ val = cp_type7_opcode(dwords[0]);
+ const struct type3_op *op = get_type3_op(val);
+ if (op->options.load_all_groups)
+ load_all_groups(level+1);
+ printl(3, "t7");
+ const char *name = pktname(val);
+ if (!quiet(2)) {
+ printf("\t%sopcode: %s%s%s (%02x) (%d dwords)\n", levels[level],
+ rnn->vc->colors->bctarg, name, rnn->vc->colors->reset,
+ val, count);
+ }
+ if (name) {
+ /* special hack for two packets that decode the same way
+ * on a6xx:
+ */
+ if (!strcmp(name, "CP_LOAD_STATE6_FRAG") ||
+ !strcmp(name, "CP_LOAD_STATE6_GEOM"))
+ name = "CP_LOAD_STATE6";
+ dump_domain(dwords+1, count-1, level+2, name);
+ }
+ op->fxn(dwords+1, count-1, level+1);
+ if (!quiet(2))
+ dump_hex(dwords, count, level+1);
+ } else if (pkt_is_type2(dwords[0])) {
+ printl(3, "t2");
+ printl(3, "%snop\n", levels[level+1]);
+ } else {
+ /* for 5xx+ we can do a passable job of looking for start of next valid packet: */
+ if (options->gpu_id >= 500) {
+ while (dwords_left > 0) {
+ if (pkt_is_type7(dwords[0]) || pkt_is_type4(dwords[0]))
+ break;
+ printf("bad type! %08x\n", dwords[0]);
+ dwords++;
+ dwords_left--;
+ }
+ } else {
+ printf("bad type! %08x\n", dwords[0]);
+ return;
+ }
+ }
+
+ dwords += count;
+ dwords_left -= count;
+
+ }
+
+ if (dwords_left < 0)
+ printf("**** this ain't right!! dwords_left=%d\n", dwords_left);
+}
--- /dev/null
+/*
+ * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CFFDEC_H__
+#define __CFFDEC_H__
+
+#include <stdbool.h>
+
+enum query_mode {
+ /* default mode, dump all queried regs on each draw: */
+ QUERY_ALL = 0,
+
+ /* only dump if any of the queried regs were written
+ * since last draw:
+ */
+ QUERY_WRITTEN,
+
+ /* only dump if any of the queried regs changed since
+ * last draw:
+ */
+ QUERY_DELTA,
+};
+
+struct cffdec_options {
+ unsigned gpu_id;
+ int draw_filter;
+ int color;
+ int dump_shaders;
+ int summary;
+ int allregs;
+ int dump_textures;
+ int decode_markers;
+ char *script;
+
+ int query_compare; /* binning vs SYSMEM/GMEM compare mode */
+ int query_mode; /* enum query_mode */
+ char **querystrs;
+ int nquery;
+
+ /* In "once" mode, only decode a cmdstream buffer once (per draw
+ * mode, in the case of a6xx+ where a single cmdstream buffer can
+ * be used for both binning and draw pass), rather than each time
+ * encountered (ie. once per tile/bin in GMEM draw passes)
+ */
+ int once;
+
+ /* for crashdec, where we know CP_IBx_REM_SIZE, we can use this
+ * to highlight the cmdstream not parsed yet, to make it easier
+ * to see how far along the CP is.
+ */
+ struct {
+ uint64_t base;
+ uint32_t rem;
+ } ibs[4];
+};
+
+void printl(int lvl, const char *fmt, ...);
+const char * pktname(unsigned opc);
+uint32_t regbase(const char *name);
+const char * regname(uint32_t regbase, int color);
+bool reg_written(uint32_t regbase);
+uint32_t reg_lastval(uint32_t regbase);
+uint32_t reg_val(uint32_t regbase);
+void reg_set(uint32_t regbase, uint32_t val);
+void reset_regs(void);
+void cffdec_init(const struct cffdec_options *options);
+void dump_register_val(uint32_t regbase, uint32_t dword, int level);
+void dump_commands(uint32_t *dwords, uint32_t sizedwords, int level);
+
+/*
+ * Helpers for packet parsing:
+ */
+
+
+#define CP_TYPE0_PKT 0x00000000
+#define CP_TYPE2_PKT 0x80000000
+#define CP_TYPE3_PKT 0xc0000000
+#define CP_TYPE4_PKT 0x40000000
+#define CP_TYPE7_PKT 0x70000000
+
+#define pkt_is_type0(pkt) (((pkt) & 0XC0000000) == CP_TYPE0_PKT)
+#define type0_pkt_size(pkt) ((((pkt) >> 16) & 0x3FFF) + 1)
+#define type0_pkt_offset(pkt) ((pkt) & 0x7FFF)
+
+#define pkt_is_type2(pkt) ((pkt) == CP_TYPE2_PKT)
+
+/*
+ * Check both for the type3 opcode and make sure that the reserved bits [1:7]
+ * and 15 are 0
+ */
+
+static inline uint pm4_calc_odd_parity_bit(uint val)
+{
+ return (0x9669 >> (0xf & ((val) ^
+ ((val) >> 4) ^ ((val) >> 8) ^ ((val) >> 12) ^
+ ((val) >> 16) ^ ((val) >> 20) ^ ((val) >> 24) ^
+ ((val) >> 28)))) & 1;
+}
+
+#define pkt_is_type3(pkt) \
+ ((((pkt) & 0xC0000000) == CP_TYPE3_PKT) && \
+ (((pkt) & 0x80FE) == 0))
+
+#define cp_type3_opcode(pkt) (((pkt) >> 8) & 0xFF)
+#define type3_pkt_size(pkt) ((((pkt) >> 16) & 0x3FFF) + 1)
+
+#define pkt_is_type4(pkt) \
+ ((((pkt) & 0xF0000000) == CP_TYPE4_PKT) && \
+ ((((pkt) >> 27) & 0x1) == \
+ pm4_calc_odd_parity_bit(type4_pkt_offset(pkt))) \
+ && ((((pkt) >> 7) & 0x1) == \
+ pm4_calc_odd_parity_bit(type4_pkt_size(pkt))))
+
+#define type4_pkt_offset(pkt) (((pkt) >> 8) & 0x7FFFF)
+#define type4_pkt_size(pkt) ((pkt) & 0x7F)
+
+#define pkt_is_type7(pkt) \
+ ((((pkt) & 0xF0000000) == CP_TYPE7_PKT) && \
+ (((pkt) & 0x0F000000) == 0) && \
+ ((((pkt) >> 23) & 0x1) == \
+ pm4_calc_odd_parity_bit(cp_type7_opcode(pkt))) \
+ && ((((pkt) >> 15) & 0x1) == \
+ pm4_calc_odd_parity_bit(type7_pkt_size(pkt))))
+
+#define cp_type7_opcode(pkt) (((pkt) >> 16) & 0x7F)
+#define type7_pkt_size(pkt) ((pkt) & 0x3FFF)
+
+#endif /* __CFFDEC_H__ */
--- /dev/null
+/*
+ * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <ctype.h>
+#include <err.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+#include <string.h>
+#include <assert.h>
+#include <signal.h>
+#include <errno.h>
+
+#include "redump.h"
+#include "disasm.h"
+#include "script.h"
+#include "io.h"
+#include "rnnutil.h"
+#include "pager.h"
+#include "buffers.h"
+#include "cffdec.h"
+
+static struct cffdec_options options = {
+ .gpu_id = 220,
+};
+
+static bool needs_wfi = false;
+static bool is_blob = false;
+static int show_comp = false;
+static int interactive;
+static int vertices;
+
+static int handle_file(const char *filename, int start, int end, int draw);
+
+static void print_usage(const char *name)
+{
+ fprintf(stderr, "Usage:\n\n"
+ "\t%s [OPTSIONS]... FILE...\n\n"
+ "Options:\n"
+ "\t-v, --verbose - more verbose disassembly\n"
+ "\t--dump-shaders - dump each shader to a raw file\n"
+ "\t--no-color - disable colorized output (default for non-console\n"
+ "\t output)\n"
+ "\t--color - enable colorized output (default for tty output)\n"
+ "\t--no-pager - disable pager (default for non-console output)\n"
+ "\t--pager - enable pager (default for tty output)\n"
+ "\t-s, --summary - don't show individual register writes, but just\n"
+ "\t register values on draws\n"
+ "\t-a, --allregs - show all registers (including ones not written\n"
+ "\t since previous draw) on each draw\n"
+ "\t-S, --start=N - start decoding from frame N\n"
+ "\t-E, --end=N - stop decoding after frame N\n"
+ "\t-F, --frame=N - decode only frame N\n"
+ "\t-D, --draw=N - decode only draw N\n"
+ "\t--textures - dump texture contents (if possible)\n"
+ "\t-L, --script=LUA - run specified lua script to analyze state\n"
+ "\t-q, --query=REG - query mode, dump only specified query registers on\n"
+ "\t each draw; multiple --query/-q args can be given to\n"
+ "\t dump multiple registers; register can be specified\n"
+ "\t either by name or numeric offset\n"
+ "\t--query-all - in query mode, show all queried regs on each draw\n"
+ "\t (default query mode)\n"
+ "\t--query-written - in query mode, show queried regs on draws if any of\n"
+ "\t them have been written since previous draw\n"
+ "\t--query-delta - in query mode, show queried regs on draws if any of\n"
+ "\t them have changed since previous draw\n"
+ "\t--query-compare - dump registers for BINNING vs GMEM/BYPASS per draw;\n"
+ "\t only applicable for regs set via SDS group (a6xx+),\n"
+ "\t implies --once, can be combined with --query-all,\n"
+ "\t --query-written, or --query-delta\n"
+ "\t--once - decode cmdstream only once (per draw mode); if same\n"
+ "\t cmdstream is executed for each tile, this will decode\n"
+ "\t it only for the first tile and skip the remainder,\n"
+ "\t which can be useful when looking at state that does\n"
+ "\t not change per tile\n"
+ "\t--not-once - decode cmdstream for each IB (default)\n"
+ "\t-h, --help - show this message\n"
+ , name);
+ exit(2);
+}
+
+static const struct option opts[] = {
+ /* Long opts that simply set a flag (no corresponding short alias: */
+ { "dump-shaders", no_argument, &options.dump_shaders, 1 },
+ { "no-color", no_argument, &options.color, 0 },
+ { "color", no_argument, &options.color, 1 },
+ { "no-pager", no_argument, &interactive, 0 },
+ { "pager", no_argument, &interactive, 1 },
+ { "textures", no_argument, &options.dump_textures, 1 },
+ { "show-compositor", no_argument, &show_comp, 1 },
+ { "query-all", no_argument, &options.query_mode, QUERY_ALL },
+ { "query-written", no_argument, &options.query_mode, QUERY_WRITTEN },
+ { "query-delta", no_argument, &options.query_mode, QUERY_DELTA },
+ { "query-compare", no_argument, &options.query_compare, 1 },
+ { "once", no_argument, &options.once, 1 },
+ { "not-once", no_argument, &options.once, 0 },
+
+ /* Long opts with short alias: */
+ { "verbose", no_argument, 0, 'v' },
+ { "summary", no_argument, 0, 's' },
+ { "allregs", no_argument, 0, 'a' },
+ { "start", required_argument, 0, 'S' },
+ { "end", required_argument, 0, 'E' },
+ { "frame", required_argument, 0, 'F' },
+ { "draw", required_argument, 0, 'D' },
+ { "script", required_argument, 0, 'L' },
+ { "query", required_argument, 0, 'q' },
+ { "help", no_argument, 0, 'h' },
+};
+
+int main(int argc, char **argv)
+{
+ int ret = -1;
+ int start = 0, end = 0x7ffffff, draw = -1;
+ int c;
+
+ interactive = isatty(STDOUT_FILENO);
+
+ options.color = interactive;
+
+ while ((c = getopt_long(argc, argv, "vsaS:E:F:D:L:q:h", opts, NULL)) != -1) {
+ switch (c) {
+ case 0:
+ /* option that set a flag, nothing to do */
+ break;
+ case 'v':
+ disasm_set_debug(PRINT_RAW | EXPAND_REPEAT | PRINT_VERBOSE);
+ break;
+ case 's':
+ options.summary = true;
+ break;
+ case 'a':
+ options.allregs = true;
+ break;
+ case 'S':
+ start = atoi(optarg);
+ break;
+ case 'E':
+ end = atoi(optarg);
+ break;
+ case 'F':
+ start = end = atoi(optarg);
+ break;
+ case 'D':
+ draw = atoi(optarg);
+ break;
+ case 'L':
+ options.script = optarg;
+ if (script_load(options.script)) {
+ errx(-1, "error loading %s\n", options.script);
+ }
+ break;
+ case 'q':
+ options.querystrs = realloc(options.querystrs,
+ (options.nquery + 1) * sizeof(*options.querystrs));
+ options.querystrs[options.nquery] = optarg;
+ options.nquery++;
+ interactive = 0;
+ break;
+ case 'h':
+ default:
+ print_usage(argv[0]);
+ }
+ }
+
+ if (interactive) {
+ pager_open();
+ }
+
+ while (optind < argc) {
+ ret = handle_file(argv[optind], start, end, draw);
+ if (ret) {
+ fprintf(stderr, "error reading: %s\n", argv[optind]);
+ fprintf(stderr, "continuing..\n");
+ }
+ optind++;
+ }
+
+ if (ret)
+ print_usage(argv[0]);
+
+ if ((options.query_mode || options.query_compare) && !options.nquery) {
+ fprintf(stderr, "query options only valid in query mode!\n");
+ print_usage(argv[0]);
+ }
+
+ script_finish();
+
+ if (interactive) {
+ pager_close();
+ }
+
+ return ret;
+}
+
+static void parse_addr(uint32_t *buf, int sz, unsigned int *len, uint64_t *gpuaddr)
+{
+ *gpuaddr = buf[0];
+ *len = buf[1];
+ if (sz > 8)
+ *gpuaddr |= ((uint64_t)(buf[2])) << 32;
+}
+
+static int handle_file(const char *filename, int start, int end, int draw)
+{
+ enum rd_sect_type type = RD_NONE;
+ void *buf = NULL;
+ struct io *io;
+ int submit = 0, got_gpu_id = 0;
+ int sz, ret = 0;
+ bool needs_reset = false;
+ bool skip = false;
+
+ options.draw_filter = draw;
+
+ cffdec_init(&options);
+
+ printf("Reading %s...\n", filename);
+
+ script_start_cmdstream(filename);
+
+ if (!strcmp(filename, "-"))
+ io = io_openfd(0);
+ else
+ io = io_open(filename);
+
+ if (!io) {
+ fprintf(stderr, "could not open: %s\n", filename);
+ return -1;
+ }
+
+ struct {
+ unsigned int len;
+ uint64_t gpuaddr;
+ } gpuaddr = {0};
+
+ while (true) {
+ uint32_t arr[2];
+
+ ret = io_readn(io, arr, 8);
+ if (ret <= 0)
+ goto end;
+
+ while ((arr[0] == 0xffffffff) && (arr[1] == 0xffffffff)) {
+ ret = io_readn(io, arr, 8);
+ if (ret <= 0)
+ goto end;
+ }
+
+ type = arr[0];
+ sz = arr[1];
+
+ if (sz < 0) {
+ ret = -1;
+ goto end;
+ }
+
+ free(buf);
+
+ needs_wfi = false;
+
+ buf = malloc(sz + 1);
+ ((char *)buf)[sz] = '\0';
+ ret = io_readn(io, buf, sz);
+ if (ret < 0)
+ goto end;
+
+ switch(type) {
+ case RD_TEST:
+ printl(1, "test: %s\n", (char *)buf);
+ break;
+ case RD_CMD:
+ is_blob = true;
+ printl(2, "cmd: %s\n", (char *)buf);
+ skip = false;
+ if (!show_comp) {
+ skip |= (strstr(buf, "fdperf") == buf);
+ skip |= (strstr(buf, "chrome") == buf);
+ skip |= (strstr(buf, "surfaceflinger") == buf);
+ skip |= ((char *)buf)[0] == 'X';
+ }
+ break;
+ case RD_VERT_SHADER:
+ printl(2, "vertex shader:\n%s\n", (char *)buf);
+ break;
+ case RD_FRAG_SHADER:
+ printl(2, "fragment shader:\n%s\n", (char *)buf);
+ break;
+ case RD_GPUADDR:
+ if (needs_reset) {
+ reset_buffers();
+ needs_reset = false;
+ }
+ parse_addr(buf, sz, &gpuaddr.len, &gpuaddr.gpuaddr);
+ break;
+ case RD_BUFFER_CONTENTS:
+ add_buffer(gpuaddr.gpuaddr, gpuaddr.len, buf);
+ buf = NULL;
+ break;
+ case RD_CMDSTREAM_ADDR:
+ if ((start <= submit) && (submit <= end)) {
+ unsigned int sizedwords;
+ uint64_t gpuaddr;
+ parse_addr(buf, sz, &sizedwords, &gpuaddr);
+ printl(2, "############################################################\n");
+ printl(2, "cmdstream: %d dwords\n", sizedwords);
+ if (!skip) {
+ script_start_submit();
+ dump_commands(hostptr(gpuaddr), sizedwords, 0);
+ script_end_submit();
+ }
+ printl(2, "############################################################\n");
+ printl(2, "vertices: %d\n", vertices);
+ }
+ needs_reset = true;
+ submit++;
+ break;
+ case RD_GPU_ID:
+ if (!got_gpu_id) {
+ options.gpu_id = *((unsigned int *)buf);
+ printl(2, "gpu_id: %d\n", options.gpu_id);
+ cffdec_init(&options);
+ got_gpu_id = 1;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+end:
+ script_end_cmdstream();
+
+ io_close(io);
+ fflush(stdout);
+
+ if (ret < 0) {
+ printf("corrupt file\n");
+ }
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright © 2020 Google, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * Decoder for devcoredump traces from drm/msm. In case of a gpu crash/hang,
+ * the coredump should be found in:
+ *
+ * /sys/class/devcoredump/devcd<n>/data
+ *
+ * The crashdump will hang around for 5min, it can be cleared by writing to
+ * the file, ie:
+ *
+ * echo 1 > /sys/class/devcoredump/devcd<n>/data
+ *
+ * (the driver won't log any new crashdumps until the previous one is cleared
+ * or times out after 5min)
+ */
+
+
+#include <assert.h>
+#include <getopt.h>
+#include <inttypes.h>
+#include <setjmp.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "buffers.h"
+#include "cffdec.h"
+#include "disasm.h"
+#include "pager.h"
+#include "rnnutil.h"
+#include "util.h"
+#include "instr-a3xx.h"
+
+
+static FILE *in;
+static bool verbose;
+
+static struct rnn *rnn_gmu;
+static struct rnn *rnn_control;
+static struct rnn *rnn_pipe;
+
+static struct cffdec_options options = {
+ .draw_filter = -1,
+};
+
+static inline bool is_a6xx(void) { return (600 <= options.gpu_id) && (options.gpu_id < 700); }
+static inline bool is_a5xx(void) { return (500 <= options.gpu_id) && (options.gpu_id < 600); }
+static inline bool is_64b(void) { return options.gpu_id >= 500; }
+
+/*
+ * Helpers to read register values:
+ */
+
+/* read registers that are 64b on 64b GPUs (ie. a5xx+) */
+static uint64_t
+regval64(const char *name)
+{
+ unsigned reg = regbase(name);
+ assert(reg);
+ uint64_t val = reg_val(reg);
+ if (is_64b())
+ val |= ((uint64_t)reg_val(reg + 1)) << 32;
+ return val;
+}
+
+static uint32_t
+regval(const char *name)
+{
+ unsigned reg = regbase(name);
+ assert(reg);
+ return reg_val(reg);
+}
+
+/*
+ * Line reading and string helpers:
+ */
+
+static char *lastline;
+static char *pushedline;
+
+static const char *
+popline(void)
+{
+ char *r = pushedline;
+
+ if (r) {
+ pushedline = NULL;
+ return r;
+ }
+
+ free(lastline);
+
+ size_t n = 0;
+ if (getline(&r, &n, in) < 0)
+ exit(0);
+
+ lastline = r;
+ return r;
+}
+
+static void
+pushline(void)
+{
+ assert(!pushedline);
+ pushedline = lastline;
+}
+
+static uint32_t *
+popline_ascii85(uint32_t sizedwords)
+{
+ const char *line = popline();
+
+ /* At this point we exepct the ascii85 data to be indented *some*
+ * amount, and to terminate at the end of the line. So just eat
+ * up the leading whitespace.
+ */
+ assert(*line == ' ');
+ while (*line == ' ')
+ line++;
+
+ uint32_t *buf = calloc(1, 4 * sizedwords);
+ int idx = 0;
+
+ while (*line != '\n') {
+ if (*line == 'z') {
+ buf[idx++] = 0;
+ line++;
+ continue;
+ }
+
+ uint32_t accum = 0;
+ for (int i = 0; (i < 5) && (*line != '\n'); i++) {
+ accum *= 85;
+ accum += *line - '!';
+ line++;
+ }
+
+ buf[idx++] = accum;
+ }
+
+ return buf;
+}
+
+static bool
+startswith(const char *line, const char *start)
+{
+ return strstr(line, start) == line;
+}
+
+static void
+parseline(const char *line, const char *fmt, ...)
+{
+ int fmtlen = strlen(fmt);
+ int n = 0;
+ int l = 0;
+
+ /* scan fmt string to extract expected # of conversions: */
+ for (int i = 0; i < fmtlen; i++) {
+ if (fmt[i] == '%') {
+ if (i == (l - 1)) { /* prev char was %, ie. we have %% */
+ n--;
+ l = 0;
+ } else {
+ n++;
+ l = i;
+ }
+ }
+ }
+
+ va_list ap;
+ va_start(ap, fmt);
+ if (vsscanf(line, fmt, ap) != n) {
+ fprintf(stderr, "parse error scanning: '%s'\n", fmt);
+ exit(1);
+ }
+ va_end(ap);
+}
+
+#define foreach_line_in_section(_line) \
+ for (const char *_line = popline(); _line; _line = popline()) \
+ /* check for start of next section */ \
+ if (_line[0] != ' ') { \
+ pushline(); \
+ break; \
+ } else
+
+/*
+ * Provide our own disasm assert() handler, so that we can recover
+ * after attempting to disassemble things that might not be valid
+ * instructions:
+ */
+
+static bool jmp_env_valid;
+static jmp_buf jmp_env;
+
+void
+ir3_assert_handler(const char *expr, const char *file, int line,
+ const char *func)
+{
+ printf("%s:%u: %s: Assertion `%s' failed.\n", file, line, func, expr);
+ if (jmp_env_valid)
+ longjmp(jmp_env, 1);
+ abort();
+}
+
+#define TRY(x) do { \
+ assert(!jmp_env_valid); \
+ if (setjmp(jmp_env) == 0) { \
+ jmp_env_valid = true; \
+ x; \
+ } \
+ jmp_env_valid = false; \
+ } while (0)
+
+/*
+ * Decode ringbuffer section:
+ */
+
+static struct {
+ uint64_t iova;
+ uint32_t rptr;
+ uint32_t wptr;
+ uint32_t size;
+ uint32_t *buf;
+} ringbuffers[5];
+
+static void
+decode_ringbuffer(void)
+{
+ int id = 0;
+
+ foreach_line_in_section (line) {
+ if (startswith(line, " - id:")) {
+ parseline(line, " - id: %d", &id);
+ assert(id < ARRAY_SIZE(ringbuffers));
+ } else if (startswith(line, " iova:")) {
+ parseline(line, " iova: %"PRIx64, &ringbuffers[id].iova);
+ } else if (startswith(line, " rptr:")) {
+ parseline(line, " rptr: %d", &ringbuffers[id].rptr);
+ } else if (startswith(line, " wptr:")) {
+ parseline(line, " wptr: %d", &ringbuffers[id].wptr);
+ } else if (startswith(line, " size:")) {
+ parseline(line, " size: %d", &ringbuffers[id].size);
+ } else if (startswith(line, " data: !!ascii85 |")) {
+ ringbuffers[id].buf = popline_ascii85(ringbuffers[id].size / 4);
+ add_buffer(ringbuffers[id].iova, ringbuffers[id].size, ringbuffers[id].buf);
+ continue;
+ }
+
+ printf("%s", line);
+ }
+}
+
+static bool
+valid_header(uint32_t pkt)
+{
+ if (options.gpu_id >= 500) {
+ return pkt_is_type4(pkt) || pkt_is_type7(pkt);
+ } else {
+ /* TODO maybe we can check validish looking pkt3 opc or pkt0
+ * register offset.. the cmds sent by kernel are usually
+ * fairly limited (other than initialization) which confines
+ * the search space a bit..
+ */
+ return true;
+ }
+}
+
+static void
+dump_cmdstream(void)
+{
+ uint64_t rb_base = regval64("CP_RB_BASE");
+
+ printf("got rb_base=%"PRIx64"\n", rb_base);
+
+ options.ibs[1].base = regval64("CP_IB1_BASE");
+ options.ibs[1].rem = regval("CP_IB1_REM_SIZE");
+ options.ibs[2].base = regval64("CP_IB2_BASE");
+ options.ibs[2].rem = regval("CP_IB2_REM_SIZE");
+
+ /* Adjust remaining size to account for cmdstream slurped into ROQ
+ * but not yet consumed by SQE
+ *
+ * TODO add support for earlier GPUs once we tease out the needed
+ * registers.. see crashit.c in msmtest for hints.
+ *
+ * TODO it would be nice to be able to extract out register bitfields
+ * by name rather than hard-coding this.
+ */
+ if (is_a6xx()) {
+ options.ibs[1].rem += regval("CP_CSQ_IB1_STAT") >> 16;
+ options.ibs[2].rem += regval("CP_CSQ_IB2_STAT") >> 16;
+ }
+
+ printf("IB1: %"PRIx64", %u\n", options.ibs[1].base, options.ibs[1].rem);
+ printf("IB2: %"PRIx64", %u\n", options.ibs[2].base, options.ibs[2].rem);
+
+ /* now that we've got the regvals we want, reset register state
+ * so we aren't seeing values from decode_registers();
+ */
+ reset_regs();
+
+ for (int id = 0; id < ARRAY_SIZE(ringbuffers); id++) {
+ if (ringbuffers[id].iova != rb_base)
+ continue;
+ if (!ringbuffers[id].size)
+ continue;
+
+ printf("found ring!\n");
+
+ /* The kernel level ringbuffer (RB) wraps around, which
+ * cffdec doesn't really deal with.. so figure out how
+ * many dwords are unread
+ */
+ unsigned ringszdw = ringbuffers[id].size >> 2; /* in dwords */
+
+/* helper macro to deal with modulo size math: */
+#define mod_add(b, v) ((ringszdw + (int)(b) + (int)(v)) % ringszdw)
+
+ /* The rptr will (most likely) have moved past the IB to
+ * userspace cmdstream, so back up a bit, and then advance
+ * until we find a valid start of a packet.. this is going
+ * to be less reliable on a4xx and before (pkt0/pkt3),
+ * compared to pkt4/pkt7 with parity bits
+ */
+ const int lookback = 12;
+ unsigned rptr = mod_add(ringbuffers[id].rptr, -lookback);
+
+ for (int idx = 0; idx < lookback; idx++) {
+ if (valid_header(ringbuffers[id].buf[rptr]))
+ break;
+ rptr = mod_add(rptr, 1);
+ }
+
+ unsigned cmdszdw = mod_add(ringbuffers[id].wptr, -rptr);
+
+ printf("got cmdszdw=%d\n", cmdszdw);
+ uint32_t *buf = malloc(cmdszdw * 4);
+
+ for (int idx = 0; idx < cmdszdw; idx++) {
+ int p = mod_add(rptr, idx);
+ buf[idx] = ringbuffers[id].buf[p];
+ }
+
+ dump_commands(buf, cmdszdw, 0);
+ free(buf);
+ }
+}
+
+/*
+ * Decode 'bos' (buffers) section:
+ */
+
+static void
+decode_bos(void)
+{
+ uint32_t size = 0;
+ uint64_t iova = 0;
+
+ foreach_line_in_section (line) {
+ if (startswith(line, " - iova:")) {
+ parseline(line, " - iova: %"PRIx64, &iova);
+ } else if (startswith(line, " size:")) {
+ parseline(line, " size: %u", &size);
+ } else if (startswith(line, " data: !!ascii85 |")) {
+ uint32_t *buf = popline_ascii85(size / 4);
+
+ if (verbose)
+ dump_hex_ascii(buf, size, 1);
+
+ add_buffer(iova, size, buf);
+
+ continue;
+ }
+
+ printf("%s", line);
+ }
+}
+
+/*
+ * Decode registers section:
+ */
+
+static void
+dump_register(struct rnn *rnn, uint32_t offset, uint32_t value)
+{
+ struct rnndecaddrinfo *info = rnn_reginfo(rnn, offset);
+ if (info && info->typeinfo) {
+ char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value);
+ printf("%s: %s\n", info->name, decoded);
+ } else if (info) {
+ printf("%s: %08x\n", info->name, value);
+ } else {
+ printf("<%04x>: %08x\n", offset, value);
+ }
+}
+
+static void
+decode_gmu_registers(void)
+{
+ foreach_line_in_section (line) {
+ uint32_t offset, value;
+ parseline(line, " - { offset: %x, value: %x }", &offset, &value);
+
+ printf("\t%08x\t", value);
+ dump_register(rnn_gmu, offset/4, value);
+ }
+}
+
+static void
+decode_registers(void)
+{
+ foreach_line_in_section (line) {
+ uint32_t offset, value;
+ parseline(line, " - { offset: %x, value: %x }", &offset, &value);
+
+ reg_set(offset/4, value);
+ printf("\t%08x", value);
+ dump_register_val(offset/4, value, 0);
+ }
+}
+
+/* similar to registers section, but for banked context regs: */
+static void
+decode_clusters(void)
+{
+ foreach_line_in_section (line) {
+ if (startswith(line, " - cluster-name:") ||
+ startswith(line, " - context:")) {
+ printf("%s", line);
+ continue;
+ }
+
+ uint32_t offset, value;
+ parseline(line, " - { offset: %x, value: %x }", &offset, &value);
+
+ printf("\t%08x", value);
+ dump_register_val(offset/4, value, 0);
+ }
+}
+
+/*
+ * Decode indexed-registers.. these aren't like normal registers, but a
+ * sort of FIFO where successive reads pop out associated debug state.
+ */
+
+static void
+dump_cp_seq_stat(uint32_t *stat)
+{
+ printf("\t PC: %04x\n", stat[0]);
+ stat++;
+
+ if (is_a6xx() && valid_header(stat[0])) {
+ if (pkt_is_type7(stat[0])) {
+ unsigned opc = cp_type7_opcode(stat[0]);
+ const char *name = pktname(opc);
+ if (name)
+ printf("\tPKT: %s\n", name);
+ } else {
+ /* Not sure if this case can happen: */
+ }
+ }
+
+ for (int i = 0; i < 16; i++) {
+ printf("\t$%02x: %08x\t\t$%02x: %08x\n",
+ i + 1, stat[i], i + 16 + 1, stat[i + 16]);
+ }
+}
+
+static void
+dump_control_regs(uint32_t *regs)
+{
+ if (!rnn_control)
+ return;
+
+ /* Control regs 0x100-0x17f are a scratch space to be used by the
+ * firmware however it wants, unlike lower regs which involve some
+ * fixed-function units. Therefore only these registers get dumped
+ * directly.
+ */
+ for (uint32_t i = 0; i < 0x80; i++) {
+ printf("\t%08x\t", regs[i]);
+ dump_register(rnn_control, i + 0x100, regs[i]);
+ }
+}
+
+static void
+dump_cp_ucode_dbg(uint32_t *dbg)
+{
+ /* Notes on the data:
+ * There seems to be a section every 4096 DWORD's. The sections aren't
+ * all the same size, so the rest of the 4096 DWORD's are filled with
+ * mirrors of the actual data.
+ */
+
+ for (int section = 0; section < 6; section++, dbg += 0x1000) {
+ switch (section) {
+ case 0:
+ /* Contains scattered data from a630_sqe.fw: */
+ printf("\tSQE instruction cache:\n");
+ dump_hex_ascii(dbg, 4 * 0x400, 1);
+ break;
+ case 1:
+ printf("\tUnknown 1:\n");
+ dump_hex_ascii(dbg, 4 * 0x80, 1);
+ break;
+ case 2:
+ printf("\tUnknown 2:\n");
+ dump_hex_ascii(dbg, 4 * 0x200, 1);
+ break;
+ case 3:
+ printf("\tUnknown 3:\n");
+ dump_hex_ascii(dbg, 4 * 0x80, 1);
+ break;
+ case 4:
+ /* Don't bother printing this normally */
+ if (verbose) {
+ printf("\tSQE packet jumptable contents:\n");
+ dump_hex_ascii(dbg, 4 * 0x80, 1);
+ }
+ break;
+ case 5:
+ printf("\tSQE scratch control regs:\n");
+ dump_control_regs(dbg);
+ break;
+ }
+ }
+}
+
+static void
+dump_mem_pool_reg_write(unsigned reg, uint32_t data, unsigned context, bool pipe)
+{
+ if (pipe) {
+ struct rnndecaddrinfo *info = rnn_reginfo(rnn_pipe, reg);
+ printf("\t\twrite %s (%02x) pipe\n", info->name, reg);
+
+ if (!strcmp(info->typeinfo->name, "void")) {
+ /* registers that ignore their payload */
+ } else {
+ printf("\t\t\t");
+ dump_register(rnn_pipe, reg, data);
+ }
+ } else {
+ printf("\t\twrite %s (%05x) context %d\n", regname(reg, 1), reg, context);
+ dump_register_val(reg, data, 2);
+ }
+}
+
+static void
+dump_mem_pool_chunk(const uint32_t *chunk)
+{
+ struct __attribute__((packed)) {
+ bool reg0_enabled : 1;
+ bool reg1_enabled : 1;
+ uint32_t data0 : 32;
+ uint32_t data1 : 32;
+ uint32_t reg0 : 18;
+ uint32_t reg1 : 18;
+ bool reg0_pipe : 1;
+ bool reg1_pipe : 1;
+ uint32_t reg0_context : 1;
+ uint32_t reg1_context : 1;
+ uint32_t padding : 22;
+ } fields;
+
+ memcpy(&fields, chunk, 4 * sizeof(uint32_t));
+
+ if (fields.reg0_enabled) {
+ dump_mem_pool_reg_write(fields.reg0, fields.data0, fields.reg0_context, fields.reg0_pipe);
+ }
+
+ if (fields.reg1_enabled) {
+ dump_mem_pool_reg_write(fields.reg1, fields.data1, fields.reg1_context, fields.reg1_pipe);
+ }
+}
+
+static void
+dump_cp_mem_pool(uint32_t *mempool)
+{
+ /* The mem pool is a shared pool of memory used for storing in-flight
+ * register writes. There are 6 different queues, one for each
+ * cluster. Writing to $data (or for some special registers, $addr)
+ * pushes data onto the appropriate queue, and each queue is pulled
+ * from by the appropriate cluster. The queues are thus written to
+ * in-order, but may be read out-of-order.
+ *
+ * The queues are conceptually divided into 128-bit "chunks", and the
+ * read and write pointers are in units of chunks. These chunks are
+ * organized internally into 8-chunk "blocks", and memory is allocated
+ * dynamically in terms of blocks. Each queue is represented as a
+ * singly-linked list of blocks, as well as 3-bit start/end chunk
+ * pointers that point within the first/last block. The next pointers
+ * are located in a separate array, rather than inline.
+ */
+
+ /* TODO: The firmware CP_MEM_POOL save/restore routines do something
+ * like:
+ *
+ * cread $02, [ $00 + 0 ]
+ * and $02, $02, 0x118
+ * ...
+ * brne $02, 0, #label
+ * mov $03, 0x2000
+ * mov $03, 0x1000
+ * label:
+ * ...
+ *
+ * I think that control register 0 is the GPU version, and some
+ * versions have a smaller mem pool. It seems some models have a mem
+ * pool that's half the size, and a bunch of offsets are shifted
+ * accordingly. Unfortunately the kernel driver's dumping code doesn't
+ * seem to take this into account, even the downstream android driver,
+ * and we don't know which versions 0x8, 0x10, or 0x100 correspond
+ * to. Or maybe we can use CP_DBG_MEM_POOL_SIZE to figure this out?
+ */
+ bool small_mem_pool = false;
+
+ /* The array of next pointers for each block. */
+ const uint32_t *next_pointers = small_mem_pool ? &mempool[0x800] : &mempool[0x1000];
+
+ /* Maximum number of blocks in the pool, also the size of the pointers
+ * array.
+ */
+ const int num_blocks = small_mem_pool ? 0x30 : 0x80;
+
+ /* Number of queues */
+ const unsigned num_queues = 6;
+
+ /* Unfortunately the per-queue state is a little more complicated than
+ * a simple pair of begin/end pointers. Instead of a single beginning
+ * block, there are *two*, with the property that either the two are
+ * equal or the second is the "next" of the first. Similarly there are
+ * two end blocks. Thus the queue either looks like this:
+ *
+ * A -> B -> ... -> C -> D
+ *
+ * Or like this, or some combination:
+ *
+ * A/B -> ... -> C/D
+ *
+ * However, there's only one beginning/end chunk offset. Now the
+ * question is, which of A or B is the actual start? I.e. is the chunk
+ * offset an offset inside A or B? It depends. I'll show a typical read
+ * cycle, starting here (read pointer marked with a *) with a chunk
+ * offset of 0:
+ *
+ * A B
+ * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+ * |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_| -> |_|_|_|_|_|_|_|_|
+ *
+ * Once the pointer advances far enough, the hardware decides to free
+ * A, after which the read-side state looks like:
+ *
+ * (free) A/B
+ * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+ * |_|_|_|_|_|_|_|_| |_|_|_|*|_|_|_|_| -> |_|_|_|_|_|_|_|_|
+ *
+ * Then after advancing the pointer a bit more, the hardware fetches
+ * the "next" pointer for A and stores it in B:
+ *
+ * (free) A B
+ * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+ * |_|_|_|_|_|_|_|_| |_|_|_|_|_|_|_|*| -> |_|_|_|_|_|_|_|_|
+ *
+ * Then the read pointer advances into B, at which point we've come
+ * back to the first state having advanced a whole block:
+ *
+ * (free) A B
+ * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+ * |_|_|_|_|_|_|_|_| |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_|
+ *
+ *
+ * There is a similar cycle for the write pointer. Now, the question
+ * is, how do we know which state we're in? We need to know this to
+ * know whether the pointer (*) is in A or B if they're different. It
+ * seems like there should be some bit somewhere describing this, but
+ * after lots of experimentation I've come up empty-handed. For now we
+ * assume that if the pointer is in the first half, then we're in
+ * either the first or second state and use B, and otherwise we're in
+ * the second or third state and use A. So far I haven't seen anything
+ * that violates this assumption.
+ */
+
+ struct {
+ uint32_t unk0;
+ uint32_t padding0[7]; /* Mirrors of unk0 */
+
+ struct {
+ uint32_t chunk : 3;
+ uint32_t first_block : 32 - 3;
+ } writer[6];
+ uint32_t padding1[2]; /* Mirrors of writer[4], writer[5] */
+
+ uint32_t unk1;
+ uint32_t padding2[7]; /* Mirrors of unk1 */
+
+ uint32_t writer_second_block[6];
+ uint32_t padding3[2];
+
+ uint32_t unk2[6];
+ uint32_t padding4[2];
+
+ struct {
+ uint32_t chunk : 3;
+ uint32_t first_block : 32 - 3;
+ } reader[6];
+ uint32_t padding5[2]; /* Mirrors of reader[4], reader[5] */
+
+ uint32_t unk3;
+ uint32_t padding6[7]; /* Mirrors of unk3 */
+
+ uint32_t reader_second_block[6];
+ uint32_t padding7[2];
+
+ uint32_t block_count[6];
+ uint32_t padding[2];
+
+ uint32_t unk4;
+ uint32_t padding9[7]; /* Mirrors of unk4 */
+ } data1;
+
+ const uint32_t *data1_ptr = small_mem_pool ? &mempool[0xc00] : &mempool[0x1800];
+ memcpy(&data1, data1_ptr, sizeof(data1));
+
+ /* Based on the kernel, the first dword is the mem pool size (in
+ * blocks?) and mirrors CP_MEM_POOL_DBG_SIZE.
+ */
+ const uint32_t *data2_ptr = small_mem_pool ? &mempool[0x1000] : &mempool[0x2000];
+ const int data2_size = 0x60;
+
+ /* This seems to be the size of each queue in chunks. */
+ const uint32_t *queue_sizes = &data2_ptr[0x18];
+
+ printf("\tdata2:\n");
+ dump_hex_ascii(data2_ptr, 4 * data2_size, 1);
+
+ /* These seem to be some kind of counter of allocated/deallocated blocks */
+ if (verbose) {
+ printf("\tunk0: %x\n", data1.unk0);
+ printf("\tunk1: %x\n", data1.unk1);
+ printf("\tunk3: %x\n", data1.unk3);
+ printf("\tunk4: %x\n\n", data1.unk4);
+ }
+
+ for (int queue = 0; queue < num_queues; queue++) {
+ const char *cluster_names[6] = {
+ "FE", "SP_VS", "PC_VS", "GRAS", "SP_PS", "PS"
+ };
+ printf("\tCLUSTER_%s:\n\n", cluster_names[queue]);
+
+ if (verbose) {
+ printf("\t\twriter_first_block: 0x%x\n", data1.writer[queue].first_block);
+ printf("\t\twriter_second_block: 0x%x\n", data1.writer_second_block[queue]);
+ printf("\t\twriter_chunk: %d\n", data1.writer[queue].chunk);
+ printf("\t\treader_first_block: 0x%x\n", data1.reader[queue].first_block);
+ printf("\t\treader_second_block: 0x%x\n", data1.reader_second_block[queue]);
+ printf("\t\treader_chunk: %d\n", data1.reader[queue].chunk);
+ printf("\t\tblock_count: %d\n", data1.block_count[queue]);
+ printf("\t\tunk2: 0x%x\n", data1.unk2[queue]);
+ printf("\t\tqueue_size: %d\n\n", queue_sizes[queue]);
+ }
+
+ uint32_t cur_chunk = data1.reader[queue].chunk;
+ uint32_t cur_block = cur_chunk > 3 ?
+ data1.reader[queue].first_block :
+ data1.reader_second_block[queue];
+ uint32_t last_chunk = data1.writer[queue].chunk;
+ uint32_t last_block = last_chunk > 3 ?
+ data1.writer[queue].first_block :
+ data1.writer_second_block[queue];
+
+ if (verbose)
+ printf("\tblock %x\n", cur_block);
+ if (cur_block >= num_blocks) {
+ fprintf(stderr, "block %x too large\n", cur_block);
+ exit(1);
+ }
+ unsigned calculated_queue_size = 0;
+ while (cur_block != last_block || cur_chunk != last_chunk) {
+ calculated_queue_size++;
+ uint32_t *chunk_ptr = &mempool[cur_block * 0x20 + cur_chunk * 4];
+
+ dump_mem_pool_chunk(chunk_ptr);
+
+ printf("\t%05x: %08x %08x %08x %08x\n",
+ 4 * (cur_block * 0x20 + cur_chunk + 4),
+ chunk_ptr[0], chunk_ptr[1], chunk_ptr[2], chunk_ptr[3]);
+
+ cur_chunk++;
+ if (cur_chunk == 8) {
+ cur_block = next_pointers[cur_block];
+ if (verbose)
+ printf("\tblock %x\n", cur_block);
+ if (cur_block >= num_blocks) {
+ fprintf(stderr, "block %x too large\n", cur_block);
+ exit(1);
+ }
+ cur_chunk = 0;
+ }
+ }
+ if (calculated_queue_size != queue_sizes[queue]) {
+ printf("\t\tCALCULATED SIZE %d DOES NOT MATCH!\n", calculated_queue_size);
+ }
+ printf("\n");
+ }
+}
+
+static void
+decode_indexed_registers(void)
+{
+ char *name = NULL;
+ uint32_t sizedwords = 0;
+
+ foreach_line_in_section (line) {
+ if (startswith(line, " - regs-name:")) {
+ free(name);
+ parseline(line, " - regs-name: %ms", &name);
+ } else if (startswith(line, " dwords:")) {
+ parseline(line, " dwords: %u", &sizedwords);
+ } else if (startswith(line, " data: !!ascii85 |")) {
+ uint32_t *buf = popline_ascii85(sizedwords);
+
+ /* some of the sections are pretty large, and are (at least
+ * so far) not useful, so skip them if not in verbose mode:
+ */
+ bool dump = verbose ||
+ !strcmp(name, "CP_SEQ_STAT") ||
+ !strcmp(name, "CP_DRAW_STATE") ||
+ !strcmp(name, "CP_ROQ") ||
+ 0;
+
+ if (!strcmp(name, "CP_SEQ_STAT"))
+ dump_cp_seq_stat(buf);
+
+ if (!strcmp(name, "CP_UCODE_DBG_DATA"))
+ dump_cp_ucode_dbg(buf);
+
+ /* note that name was typo'd in earlier kernels: */
+ if (!strcmp(name, "CP_MEMPOOL") || !strcmp(name, "CP_MEMPOOOL"))
+ dump_cp_mem_pool(buf);
+
+ if (dump)
+ dump_hex_ascii(buf, 4 * sizedwords, 1);
+ free(buf);
+
+ continue;
+ }
+
+ printf("%s", line);
+ }
+}
+
+/*
+ * Decode shader-blocks:
+ */
+
+static void
+decode_shader_blocks(void)
+{
+ char *type = NULL;
+ uint32_t sizedwords = 0;
+
+ foreach_line_in_section (line) {
+ if (startswith(line, " - type:")) {
+ free(type);
+ parseline(line, " - type: %ms", &type);
+ } else if (startswith(line, " size:")) {
+ parseline(line, " size: %u", &sizedwords);
+ } else if (startswith(line, " data: !!ascii85 |")) {
+ uint32_t *buf = popline_ascii85(sizedwords);
+
+ /* some of the sections are pretty large, and are (at least
+ * so far) not useful, so skip them if not in verbose mode:
+ */
+ bool dump = verbose ||
+ !strcmp(type, "A6XX_SP_INST_DATA") ||
+ !strcmp(type, "A6XX_HLSQ_INST_RAM") ||
+ 0;
+
+ if (!strcmp(type, "A6XX_SP_INST_DATA") ||
+ !strcmp(type, "A6XX_HLSQ_INST_RAM")) {
+ /* TODO this section actually contains multiple shaders
+ * (or parts of shaders?), so perhaps we should search
+ * for ends of shaders and decode each?
+ */
+ TRY(disasm_a3xx(buf, sizedwords, 1, stdout, options.gpu_id));
+ }
+
+ if (dump)
+ dump_hex_ascii(buf, 4 * sizedwords, 1);
+
+ free(buf);
+
+ continue;
+ }
+
+ printf("%s", line);
+ }
+
+ free(type);
+}
+
+/*
+ * Decode debugbus section:
+ */
+
+static void
+decode_debugbus(void)
+{
+ char *block = NULL;
+ uint32_t sizedwords = 0;
+
+ foreach_line_in_section (line) {
+ if (startswith(line, " - debugbus-block:")) {
+ free(block);
+ parseline(line, " - debugbus-block: %ms", &block);
+ } else if (startswith(line, " count:")) {
+ parseline(line, " count: %u", &sizedwords);
+ } else if (startswith(line, " data: !!ascii85 |")) {
+ uint32_t *buf = popline_ascii85(sizedwords);
+
+ /* some of the sections are pretty large, and are (at least
+ * so far) not useful, so skip them if not in verbose mode:
+ */
+ bool dump = verbose ||
+ 0;
+
+ if (dump)
+ dump_hex_ascii(buf, 4 * sizedwords, 1);
+
+ free(buf);
+
+ continue;
+ }
+
+ printf("%s", line);
+ }
+}
+
+/*
+ * Main crashdump decode loop:
+ */
+
+static void
+decode(void)
+{
+ const char *line;
+
+ while ((line = popline())) {
+ printf("%s", line);
+ if (startswith(line, "revision:")) {
+ parseline(line, "revision: %u", &options.gpu_id);
+ printf("Got gpu_id=%u\n", options.gpu_id);
+
+ cffdec_init(&options);
+
+ if (is_a6xx()) {
+ rnn_gmu = rnn_new(!options.color);
+ rnn_load_file(rnn_gmu, "adreno/a6xx_gmu.xml", "A6XX");
+ rnn_control = rnn_new(!options.color);
+ rnn_load_file(rnn_control, "adreno/adreno_control_regs.xml", "A6XX_CONTROL_REG");
+ rnn_pipe = rnn_new(!options.color);
+ rnn_load_file(rnn_pipe, "adreno/adreno_pipe_regs.xml", "A6XX_PIPE_REG");
+ } else if (is_a5xx()) {
+ rnn_control = rnn_new(!options.color);
+ rnn_load_file(rnn_control, "adreno/adreno_control_regs.xml", "A5XX_CONTROL_REG");
+ } else {
+ rnn_control = NULL;
+ }
+ } else if (startswith(line, "bos:")) {
+ decode_bos();
+ } else if (startswith(line, "ringbuffer:")) {
+ decode_ringbuffer();
+ } else if (startswith(line, "registers:")) {
+ decode_registers();
+
+ /* after we've recorded buffer contents, and CP register values,
+ * we can take a stab at decoding the cmdstream:
+ */
+ dump_cmdstream();
+ } else if (startswith(line, "registers-gmu:")) {
+ decode_gmu_registers();
+ } else if (startswith(line, "indexed-registers:")) {
+ decode_indexed_registers();
+ } else if (startswith(line, "shader-blocks:")) {
+ decode_shader_blocks();
+ } else if (startswith(line, "clusters:")) {
+ decode_clusters();
+ } else if (startswith(line, "debugbus:")) {
+ decode_debugbus();
+ }
+ }
+}
+
+/*
+ * Usage and argument parsing:
+ */
+
+static void
+usage(void)
+{
+ fprintf(stderr, "Usage:\n\n"
+ "\tcrashdec [-achmsv] [-f FILE]\n\n"
+ "Options:\n"
+ "\t-a, --allregs - show all registers (including ones not written since\n"
+ "\t previous draw) at each draw\n"
+ "\t-c, --color - use colors\n"
+ "\t-f, --file=FILE - read input from specified file (rather than stdin)\n"
+ "\t-h, --help - this usage message\n"
+ "\t-m, --markers - try to decode CP_NOP string markers\n"
+ "\t-s, --summary - don't show individual register writes, but just show\n"
+ "\t register values on draws\n"
+ "\t-v, --verbose - dump more verbose output, including contents of\n"
+ "\t less interesting buffers\n"
+ "\n"
+ );
+ exit(2);
+}
+
+static const struct option opts[] = {
+ { .name = "allregs", .has_arg = 0, NULL, 'a' },
+ { .name = "color", .has_arg = 0, NULL, 'c' },
+ { .name = "file", .has_arg = 1, NULL, 'f' },
+ { .name = "help", .has_arg = 0, NULL, 'h' },
+ { .name = "markers", .has_arg = 0, NULL, 'm' },
+ { .name = "summary", .has_arg = 0, NULL, 's' },
+ { .name = "verbose", .has_arg = 0, NULL, 'v' },
+ {}
+};
+
+static bool interactive;
+
+static void
+cleanup(void)
+{
+ fflush(stdout);
+
+ if (interactive) {
+ pager_close();
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ int c;
+
+ interactive = isatty(STDOUT_FILENO);
+ options.color = interactive;
+
+ /* default to read from stdin: */
+ in = stdin;
+
+ while ((c = getopt_long(argc, argv, "acf:hmsv", opts, NULL)) != -1) {
+ switch (c) {
+ case 'a':
+ options.allregs = true;
+ break;
+ case 'c':
+ options.color = true;
+ break;
+ case 'f':
+ in = fopen(optarg, "r");
+ break;
+ case 'm':
+ options.decode_markers = true;
+ break;
+ case 's':
+ options.summary = true;
+ break;
+ case 'v':
+ verbose = true;
+ break;
+ case 'h':
+ default:
+ usage();
+ }
+ }
+
+ if (interactive) {
+ pager_open();
+ }
+
+ atexit(cleanup);
+
+ decode();
+ cleanup();
+}
--- /dev/null
+/*
+ * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "disasm.h"
+#include "instr-a2xx.h"
+#include "rnnutil.h"
+
+static const char *levels[] = {
+ "",
+ "\t",
+ "\t\t",
+ "\t\t\t",
+ "\t\t\t\t",
+ "\t\t\t\t\t",
+ "\t\t\t\t\t\t",
+ "\t\t\t\t\t\t\t",
+ "\t\t\t\t\t\t\t\t",
+ "\t\t\t\t\t\t\t\t\t",
+ "x",
+ "x",
+ "x",
+ "x",
+ "x",
+ "x",
+};
+
+enum debug_t debug;
+
+static struct rnn *rnn;
+
+/*
+ * ALU instructions:
+ */
+
+static const char chan_names[] = {
+ 'x', 'y', 'z', 'w',
+ /* these only apply to FETCH dst's: */
+ '0', '1', '?', '_',
+};
+
+static void print_srcreg(uint32_t num, uint32_t type,
+ uint32_t swiz, uint32_t negate, uint32_t abs)
+{
+ if (negate)
+ printf("-");
+ if (abs)
+ printf("|");
+ printf("%c%u", type ? 'R' : 'C', num);
+ if (swiz) {
+ int i;
+ printf(".");
+ for (i = 0; i < 4; i++) {
+ printf("%c", chan_names[(swiz + i) & 0x3]);
+ swiz >>= 2;
+ }
+ }
+ if (abs)
+ printf("|");
+}
+
+static void print_dstreg(uint32_t num, uint32_t mask, uint32_t dst_exp)
+{
+ printf("%s%u", dst_exp ? "export" : "R", num);
+ if (mask != 0xf) {
+ int i;
+ printf(".");
+ for (i = 0; i < 4; i++) {
+ printf("%c", (mask & 0x1) ? chan_names[i] : '_');
+ mask >>= 1;
+ }
+ }
+}
+
+static void print_export_comment(uint32_t num, enum shader_t type)
+{
+ const char *name = NULL;
+ switch (type) {
+ case SHADER_VERTEX:
+ switch (num) {
+ case 62: name = "gl_Position"; break;
+ case 63: name = "gl_PointSize"; break;
+ }
+ break;
+ case SHADER_FRAGMENT:
+ switch (num) {
+ case 0: name = "gl_FragColor"; break;
+ }
+ break;
+ default:
+ break;
+ }
+ /* if we had a symbol table here, we could look
+ * up the name of the varying..
+ */
+ if (name) {
+ printf("\t; %s", name);
+ }
+}
+
+struct {
+ uint32_t num_srcs;
+ const char *name;
+} vector_instructions[0x20] = {
+#define INSTR(opc, num_srcs) [opc] = { num_srcs, #opc }
+ INSTR(ADDv, 2),
+ INSTR(MULv, 2),
+ INSTR(MAXv, 2),
+ INSTR(MINv, 2),
+ INSTR(SETEv, 2),
+ INSTR(SETGTv, 2),
+ INSTR(SETGTEv, 2),
+ INSTR(SETNEv, 2),
+ INSTR(FRACv, 1),
+ INSTR(TRUNCv, 1),
+ INSTR(FLOORv, 1),
+ INSTR(MULADDv, 3),
+ INSTR(CNDEv, 3),
+ INSTR(CNDGTEv, 3),
+ INSTR(CNDGTv, 3),
+ INSTR(DOT4v, 2),
+ INSTR(DOT3v, 2),
+ INSTR(DOT2ADDv, 3), // ???
+ INSTR(CUBEv, 2),
+ INSTR(MAX4v, 1),
+ INSTR(PRED_SETE_PUSHv, 2),
+ INSTR(PRED_SETNE_PUSHv, 2),
+ INSTR(PRED_SETGT_PUSHv, 2),
+ INSTR(PRED_SETGTE_PUSHv, 2),
+ INSTR(KILLEv, 2),
+ INSTR(KILLGTv, 2),
+ INSTR(KILLGTEv, 2),
+ INSTR(KILLNEv, 2),
+ INSTR(DSTv, 2),
+ INSTR(MOVAv, 1),
+}, scalar_instructions[0x40] = {
+ INSTR(ADDs, 1),
+ INSTR(ADD_PREVs, 1),
+ INSTR(MULs, 1),
+ INSTR(MUL_PREVs, 1),
+ INSTR(MUL_PREV2s, 1),
+ INSTR(MAXs, 1),
+ INSTR(MINs, 1),
+ INSTR(SETEs, 1),
+ INSTR(SETGTs, 1),
+ INSTR(SETGTEs, 1),
+ INSTR(SETNEs, 1),
+ INSTR(FRACs, 1),
+ INSTR(TRUNCs, 1),
+ INSTR(FLOORs, 1),
+ INSTR(EXP_IEEE, 1),
+ INSTR(LOG_CLAMP, 1),
+ INSTR(LOG_IEEE, 1),
+ INSTR(RECIP_CLAMP, 1),
+ INSTR(RECIP_FF, 1),
+ INSTR(RECIP_IEEE, 1),
+ INSTR(RECIPSQ_CLAMP, 1),
+ INSTR(RECIPSQ_FF, 1),
+ INSTR(RECIPSQ_IEEE, 1),
+ INSTR(MOVAs, 1),
+ INSTR(MOVA_FLOORs, 1),
+ INSTR(SUBs, 1),
+ INSTR(SUB_PREVs, 1),
+ INSTR(PRED_SETEs, 1),
+ INSTR(PRED_SETNEs, 1),
+ INSTR(PRED_SETGTs, 1),
+ INSTR(PRED_SETGTEs, 1),
+ INSTR(PRED_SET_INVs, 1),
+ INSTR(PRED_SET_POPs, 1),
+ INSTR(PRED_SET_CLRs, 1),
+ INSTR(PRED_SET_RESTOREs, 1),
+ INSTR(KILLEs, 1),
+ INSTR(KILLGTs, 1),
+ INSTR(KILLGTEs, 1),
+ INSTR(KILLNEs, 1),
+ INSTR(KILLONEs, 1),
+ INSTR(SQRT_IEEE, 1),
+ INSTR(MUL_CONST_0, 1),
+ INSTR(MUL_CONST_1, 1),
+ INSTR(ADD_CONST_0, 1),
+ INSTR(ADD_CONST_1, 1),
+ INSTR(SUB_CONST_0, 1),
+ INSTR(SUB_CONST_1, 1),
+ INSTR(SIN, 1),
+ INSTR(COS, 1),
+ INSTR(RETAIN_PREV, 1),
+#undef INSTR
+};
+
+static int disasm_alu(uint32_t *dwords, uint32_t alu_off,
+ int level, int sync, enum shader_t type)
+{
+ instr_alu_t *alu = (instr_alu_t *)dwords;
+
+ printf("%s", levels[level]);
+ if (debug & PRINT_RAW) {
+ printf("%02x: %08x %08x %08x\t", alu_off,
+ dwords[0], dwords[1], dwords[2]);
+ }
+
+ printf(" %sALU:\t", sync ? "(S)" : " ");
+
+ printf("%s", vector_instructions[alu->vector_opc].name);
+
+ if (alu->pred_select & 0x2) {
+ /* seems to work similar to conditional execution in ARM instruction
+ * set, so let's use a similar syntax for now:
+ */
+ printf((alu->pred_select & 0x1) ? "EQ" : "NE");
+ }
+
+ printf("\t");
+
+ print_dstreg(alu->vector_dest, alu->vector_write_mask, alu->export_data);
+ printf(" = ");
+ if (vector_instructions[alu->vector_opc].num_srcs == 3) {
+ print_srcreg(alu->src3_reg, alu->src3_sel, alu->src3_swiz,
+ alu->src3_reg_negate, alu->src3_reg_abs);
+ printf(", ");
+ }
+ print_srcreg(alu->src1_reg, alu->src1_sel, alu->src1_swiz,
+ alu->src1_reg_negate, alu->src1_reg_abs);
+ if (vector_instructions[alu->vector_opc].num_srcs > 1) {
+ printf(", ");
+ print_srcreg(alu->src2_reg, alu->src2_sel, alu->src2_swiz,
+ alu->src2_reg_negate, alu->src2_reg_abs);
+ }
+
+ if (alu->vector_clamp)
+ printf(" CLAMP");
+
+ if (alu->export_data)
+ print_export_comment(alu->vector_dest, type);
+
+ printf("\n");
+
+ if (alu->scalar_write_mask || !alu->vector_write_mask) {
+ /* 2nd optional scalar op: */
+
+ printf("%s", levels[level]);
+ if (debug & PRINT_RAW)
+ printf(" \t");
+
+ if (scalar_instructions[alu->scalar_opc].name) {
+ printf("\t \t%s\t", scalar_instructions[alu->scalar_opc].name);
+ } else {
+ printf("\t \tOP(%u)\t", alu->scalar_opc);
+ }
+
+ print_dstreg(alu->scalar_dest, alu->scalar_write_mask, alu->export_data);
+ printf(" = ");
+ print_srcreg(alu->src3_reg, alu->src3_sel, alu->src3_swiz,
+ alu->src3_reg_negate, alu->src3_reg_abs);
+ // TODO ADD/MUL must have another src?!?
+ if (alu->scalar_clamp)
+ printf(" CLAMP");
+ if (alu->export_data)
+ print_export_comment(alu->scalar_dest, type);
+ printf("\n");
+ }
+
+ return 0;
+}
+
+
+/*
+ * FETCH instructions:
+ */
+
+static void print_fetch_dst(uint32_t dst_reg, uint32_t dst_swiz)
+{
+ int i;
+ printf("\tR%u.", dst_reg);
+ for (i = 0; i < 4; i++) {
+ printf("%c", chan_names[dst_swiz & 0x7]);
+ dst_swiz >>= 3;
+ }
+}
+
+static void print_fetch_vtx(instr_fetch_t *fetch)
+{
+ instr_fetch_vtx_t *vtx = &fetch->vtx;
+
+ if (vtx->pred_select) {
+ /* seems to work similar to conditional execution in ARM instruction
+ * set, so let's use a similar syntax for now:
+ */
+ printf(vtx->pred_condition ? "EQ" : "NE");
+ }
+
+ print_fetch_dst(vtx->dst_reg, vtx->dst_swiz);
+ printf(" = R%u.", vtx->src_reg);
+ printf("%c", chan_names[vtx->src_swiz & 0x3]);
+
+ const char *fmt = rnn_enumname(rnn, "a2xx_sq_surfaceformat", vtx->format);
+ if (fmt) {
+ printf(" %s", fmt);
+ } else {
+ printf(" TYPE(0x%x)", vtx->format);
+ }
+ printf(" %s", vtx->format_comp_all ? "SIGNED" : "UNSIGNED");
+ if (!vtx->num_format_all)
+ printf(" NORMALIZED");
+ printf(" STRIDE(%u)", vtx->stride);
+ if (vtx->offset)
+ printf(" OFFSET(%u)", vtx->offset);
+ printf(" CONST(%u, %u)", vtx->const_index, vtx->const_index_sel);
+ if (0) {
+ // XXX
+ printf(" src_reg_am=%u", vtx->src_reg_am);
+ printf(" dst_reg_am=%u", vtx->dst_reg_am);
+ printf(" num_format_all=%u", vtx->num_format_all);
+ printf(" signed_rf_mode_all=%u", vtx->signed_rf_mode_all);
+ printf(" exp_adjust_all=%u", vtx->exp_adjust_all);
+ }
+}
+
+static void print_fetch_tex(instr_fetch_t *fetch)
+{
+ static const char *filter[] = {
+ [TEX_FILTER_POINT] = "POINT",
+ [TEX_FILTER_LINEAR] = "LINEAR",
+ [TEX_FILTER_BASEMAP] = "BASEMAP",
+ };
+ static const char *aniso_filter[] = {
+ [ANISO_FILTER_DISABLED] = "DISABLED",
+ [ANISO_FILTER_MAX_1_1] = "MAX_1_1",
+ [ANISO_FILTER_MAX_2_1] = "MAX_2_1",
+ [ANISO_FILTER_MAX_4_1] = "MAX_4_1",
+ [ANISO_FILTER_MAX_8_1] = "MAX_8_1",
+ [ANISO_FILTER_MAX_16_1] = "MAX_16_1",
+ };
+ static const char *arbitrary_filter[] = {
+ [ARBITRARY_FILTER_2X4_SYM] = "2x4_SYM",
+ [ARBITRARY_FILTER_2X4_ASYM] = "2x4_ASYM",
+ [ARBITRARY_FILTER_4X2_SYM] = "4x2_SYM",
+ [ARBITRARY_FILTER_4X2_ASYM] = "4x2_ASYM",
+ [ARBITRARY_FILTER_4X4_SYM] = "4x4_SYM",
+ [ARBITRARY_FILTER_4X4_ASYM] = "4x4_ASYM",
+ };
+ static const char *sample_loc[] = {
+ [SAMPLE_CENTROID] = "CENTROID",
+ [SAMPLE_CENTER] = "CENTER",
+ };
+ instr_fetch_tex_t *tex = &fetch->tex;
+ uint32_t src_swiz = tex->src_swiz;
+ int i;
+
+ if (tex->pred_select) {
+ /* seems to work similar to conditional execution in ARM instruction
+ * set, so let's use a similar syntax for now:
+ */
+ printf(tex->pred_condition ? "EQ" : "NE");
+ }
+
+ print_fetch_dst(tex->dst_reg, tex->dst_swiz);
+ printf(" = R%u.", tex->src_reg);
+ for (i = 0; i < 3; i++) {
+ printf("%c", chan_names[src_swiz & 0x3]);
+ src_swiz >>= 2;
+ }
+ printf(" CONST(%u)", tex->const_idx);
+ if (tex->fetch_valid_only)
+ printf(" VALID_ONLY");
+ if (tex->tx_coord_denorm)
+ printf(" DENORM");
+ if (tex->mag_filter != TEX_FILTER_USE_FETCH_CONST)
+ printf(" MAG(%s)", filter[tex->mag_filter]);
+ if (tex->min_filter != TEX_FILTER_USE_FETCH_CONST)
+ printf(" MIN(%s)", filter[tex->min_filter]);
+ if (tex->mip_filter != TEX_FILTER_USE_FETCH_CONST)
+ printf(" MIP(%s)", filter[tex->mip_filter]);
+ if (tex->aniso_filter != ANISO_FILTER_USE_FETCH_CONST)
+ printf(" ANISO(%s)", aniso_filter[tex->aniso_filter]);
+ if (tex->arbitrary_filter != ARBITRARY_FILTER_USE_FETCH_CONST)
+ printf(" ARBITRARY(%s)", arbitrary_filter[tex->arbitrary_filter]);
+ if (tex->vol_mag_filter != TEX_FILTER_USE_FETCH_CONST)
+ printf(" VOL_MAG(%s)", filter[tex->vol_mag_filter]);
+ if (tex->vol_min_filter != TEX_FILTER_USE_FETCH_CONST)
+ printf(" VOL_MIN(%s)", filter[tex->vol_min_filter]);
+ if (!tex->use_comp_lod) {
+ printf(" LOD(%u)", tex->use_comp_lod);
+ printf(" LOD_BIAS(%u)", tex->lod_bias);
+ }
+ if (tex->use_reg_lod) {
+ printf(" REG_LOD(%u)", tex->use_reg_lod);
+ }
+ if (tex->use_reg_gradients)
+ printf(" USE_REG_GRADIENTS");
+ printf(" LOCATION(%s)", sample_loc[tex->sample_location]);
+ if (tex->offset_x || tex->offset_y || tex->offset_z)
+ printf(" OFFSET(%u,%u,%u)", tex->offset_x, tex->offset_y, tex->offset_z);
+}
+
+struct {
+ const char *name;
+ void (*fxn)(instr_fetch_t *cf);
+} fetch_instructions[] = {
+#define INSTR(opc, name, fxn) [opc] = { name, fxn }
+ INSTR(VTX_FETCH, "VERTEX", print_fetch_vtx),
+ INSTR(TEX_FETCH, "SAMPLE", print_fetch_tex),
+ INSTR(TEX_GET_BORDER_COLOR_FRAC, "?", print_fetch_tex),
+ INSTR(TEX_GET_COMP_TEX_LOD, "?", print_fetch_tex),
+ INSTR(TEX_GET_GRADIENTS, "?", print_fetch_tex),
+ INSTR(TEX_GET_WEIGHTS, "?", print_fetch_tex),
+ INSTR(TEX_SET_TEX_LOD, "SET_TEX_LOD", print_fetch_tex),
+ INSTR(TEX_SET_GRADIENTS_H, "?", print_fetch_tex),
+ INSTR(TEX_SET_GRADIENTS_V, "?", print_fetch_tex),
+ INSTR(TEX_RESERVED_4, "?", print_fetch_tex),
+#undef INSTR
+};
+
+static int disasm_fetch(uint32_t *dwords, uint32_t alu_off, int level, int sync)
+{
+ instr_fetch_t *fetch = (instr_fetch_t *)dwords;
+
+ printf("%s", levels[level]);
+ if (debug & PRINT_RAW) {
+ printf("%02x: %08x %08x %08x\t", alu_off,
+ dwords[0], dwords[1], dwords[2]);
+ }
+
+ printf(" %sFETCH:\t", sync ? "(S)" : " ");
+ printf("%s", fetch_instructions[fetch->opc].name);
+ fetch_instructions[fetch->opc].fxn(fetch);
+ printf("\n");
+
+ return 0;
+}
+
+/*
+ * CF instructions:
+ */
+
+static int cf_exec(instr_cf_t *cf)
+{
+ return (cf->opc == EXEC) ||
+ (cf->opc == EXEC_END) ||
+ (cf->opc == COND_EXEC) ||
+ (cf->opc == COND_EXEC_END) ||
+ (cf->opc == COND_PRED_EXEC) ||
+ (cf->opc == COND_PRED_EXEC_END) ||
+ (cf->opc == COND_EXEC_PRED_CLEAN) ||
+ (cf->opc == COND_EXEC_PRED_CLEAN_END);
+}
+
+static int cf_cond_exec(instr_cf_t *cf)
+{
+ return (cf->opc == COND_EXEC) ||
+ (cf->opc == COND_EXEC_END) ||
+ (cf->opc == COND_PRED_EXEC) ||
+ (cf->opc == COND_PRED_EXEC_END) ||
+ (cf->opc == COND_EXEC_PRED_CLEAN) ||
+ (cf->opc == COND_EXEC_PRED_CLEAN_END);
+}
+
+static void print_cf_nop(instr_cf_t *cf)
+{
+}
+
+static void print_cf_exec(instr_cf_t *cf)
+{
+ printf(" ADDR(0x%x) CNT(0x%x)", cf->exec.address, cf->exec.count);
+ if (cf->exec.yeild)
+ printf(" YIELD");
+ if (cf->exec.vc)
+ printf(" VC(0x%x)", cf->exec.vc);
+ if (cf->exec.bool_addr)
+ printf(" BOOL_ADDR(0x%x)", cf->exec.bool_addr);
+ if (cf->exec.address_mode == ABSOLUTE_ADDR)
+ printf(" ABSOLUTE_ADDR");
+ if (cf_cond_exec(cf))
+ printf(" COND(%d)", cf->exec.condition);
+}
+
+static void print_cf_loop(instr_cf_t *cf)
+{
+ printf(" ADDR(0x%x) LOOP_ID(%d)", cf->loop.address, cf->loop.loop_id);
+ if (cf->loop.address_mode == ABSOLUTE_ADDR)
+ printf(" ABSOLUTE_ADDR");
+}
+
+static void print_cf_jmp_call(instr_cf_t *cf)
+{
+ printf(" ADDR(0x%x) DIR(%d)", cf->jmp_call.address, cf->jmp_call.direction);
+ if (cf->jmp_call.force_call)
+ printf(" FORCE_CALL");
+ if (cf->jmp_call.predicated_jmp)
+ printf(" COND(%d)", cf->jmp_call.condition);
+ if (cf->jmp_call.bool_addr)
+ printf(" BOOL_ADDR(0x%x)", cf->jmp_call.bool_addr);
+ if (cf->jmp_call.address_mode == ABSOLUTE_ADDR)
+ printf(" ABSOLUTE_ADDR");
+}
+
+static void print_cf_alloc(instr_cf_t *cf)
+{
+ static const char *bufname[] = {
+ [SQ_NO_ALLOC] = "NO ALLOC",
+ [SQ_POSITION] = "POSITION",
+ [SQ_PARAMETER_PIXEL] = "PARAM/PIXEL",
+ [SQ_MEMORY] = "MEMORY",
+ };
+ printf(" %s SIZE(0x%x)", bufname[cf->alloc.buffer_select], cf->alloc.size);
+ if (cf->alloc.no_serial)
+ printf(" NO_SERIAL");
+ if (cf->alloc.alloc_mode) // ???
+ printf(" ALLOC_MODE");
+}
+
+struct {
+ const char *name;
+ void (*fxn)(instr_cf_t *cf);
+} cf_instructions[] = {
+#define INSTR(opc, fxn) [opc] = { #opc, fxn }
+ INSTR(NOP, print_cf_nop),
+ INSTR(EXEC, print_cf_exec),
+ INSTR(EXEC_END, print_cf_exec),
+ INSTR(COND_EXEC, print_cf_exec),
+ INSTR(COND_EXEC_END, print_cf_exec),
+ INSTR(COND_PRED_EXEC, print_cf_exec),
+ INSTR(COND_PRED_EXEC_END, print_cf_exec),
+ INSTR(LOOP_START, print_cf_loop),
+ INSTR(LOOP_END, print_cf_loop),
+ INSTR(COND_CALL, print_cf_jmp_call),
+ INSTR(RETURN, print_cf_jmp_call),
+ INSTR(COND_JMP, print_cf_jmp_call),
+ INSTR(ALLOC, print_cf_alloc),
+ INSTR(COND_EXEC_PRED_CLEAN, print_cf_exec),
+ INSTR(COND_EXEC_PRED_CLEAN_END, print_cf_exec),
+ INSTR(MARK_VS_FETCH_DONE, print_cf_nop), // ??
+#undef INSTR
+};
+
+static void print_cf(instr_cf_t *cf, int level)
+{
+ printf("%s", levels[level]);
+ if (debug & PRINT_RAW) {
+ uint16_t *words = (uint16_t *)cf;
+ printf(" %04x %04x %04x \t",
+ words[0], words[1], words[2]);
+ }
+ printf("%s", cf_instructions[cf->opc].name);
+ cf_instructions[cf->opc].fxn(cf);
+ printf("\n");
+}
+
+/*
+ * The adreno shader microcode consists of two parts:
+ * 1) A CF (control-flow) program, at the header of the compiled shader,
+ * which refers to ALU/FETCH instructions that follow it by address.
+ * 2) ALU and FETCH instructions
+ */
+
+int disasm_a2xx(uint32_t *dwords, int sizedwords, int level, enum shader_t type)
+{
+ instr_cf_t *cfs = (instr_cf_t *)dwords;
+ int idx, max_idx;
+
+ if (!rnn) {
+ rnn = rnn_new(1);
+ rnn_load(rnn, "a2xx");
+ }
+
+ for (idx = 0; ; idx++) {
+ instr_cf_t *cf = &cfs[idx];
+ if (cf_exec(cf)) {
+ max_idx = 2 * cf->exec.address;
+ break;
+ }
+ }
+
+ for (idx = 0; idx < max_idx; idx++) {
+ instr_cf_t *cf = &cfs[idx];
+
+ print_cf(cf, level);
+
+ if (cf_exec(cf)) {
+ uint32_t sequence = cf->exec.serialize;
+ uint32_t i;
+ for (i = 0; i < cf->exec.count; i++) {
+ uint32_t alu_off = (cf->exec.address + i);
+ if (sequence & 0x1) {
+ disasm_fetch(dwords + alu_off * 3, alu_off, level, sequence & 0x2);
+ } else {
+ disasm_alu(dwords + alu_off * 3, alu_off, level, sequence & 0x2, type);
+ }
+ sequence >>= 2;
+ }
+ }
+ }
+
+ return 0;
+}
+
+void disasm_set_debug(enum debug_t d)
+{
+ debug = d;
+}
--- /dev/null
+/*
+ * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <assert.h>
+
+#include "disasm.h"
+#include "instr-a3xx.h"
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+extern enum debug_t debug;
+
+static const char *levels[] = {
+ "",
+ "\t",
+ "\t\t",
+ "\t\t\t",
+ "\t\t\t\t",
+ "\t\t\t\t\t",
+ "\t\t\t\t\t\t",
+ "\t\t\t\t\t\t\t",
+ "\t\t\t\t\t\t\t\t",
+ "\t\t\t\t\t\t\t\t\t",
+ "x",
+ "x",
+ "x",
+ "x",
+ "x",
+ "x",
+};
+
+static const char *component = "xyzw";
+
+static const char *type[] = {
+ [TYPE_F16] = "f16",
+ [TYPE_F32] = "f32",
+ [TYPE_U16] = "u16",
+ [TYPE_U32] = "u32",
+ [TYPE_S16] = "s16",
+ [TYPE_S32] = "s32",
+ [TYPE_U8] = "u8",
+ [TYPE_S8] = "s8",
+};
+
+
+#define MAX_REG 4096
+
+typedef struct {
+ uint8_t full[MAX_REG/8];
+ uint8_t half[MAX_REG/8];
+} regmask_t;
+
+struct disasm_ctx {
+ FILE *out;
+ int level;
+ unsigned gpu_id;
+
+ struct shader_stats *stats;
+
+ /* we have to process the dst register after src to avoid tripping up
+ * the read-before-write detection
+ */
+ unsigned last_dst;
+ bool last_dst_full;
+ bool last_dst_valid;
+
+ /* current instruction repeat flag: */
+ unsigned repeat;
+ /* current instruction repeat indx/offset (for --expand): */
+ unsigned repeatidx;
+
+ /* tracking for register usage */
+ struct {
+ regmask_t used;
+ regmask_t used_merged;
+ regmask_t rbw; /* read before write */
+ regmask_t war; /* write after read */
+ regmask_t cnst; /* used consts */
+ } regs;
+};
+
+static const char *float_imms[] = {
+ "0.0",
+ "0.5",
+ "1.0",
+ "2.0",
+ "e",
+ "pi",
+ "1/pi",
+ "1/log2(e)",
+ "log2(e)",
+ "1/log2(10)",
+ "log2(10)",
+ "4.0",
+};
+
+static void print_reg(struct disasm_ctx *ctx, reg_t reg, bool full,
+ bool is_float, bool r,
+ bool c, bool im, bool neg, bool abs, bool addr_rel)
+{
+ const char type = c ? 'c' : 'r';
+
+ // XXX I prefer - and || for neg/abs, but preserving format used
+ // by libllvm-a3xx for easy diffing..
+
+ if (abs && neg)
+ fprintf(ctx->out, "(absneg)");
+ else if (neg)
+ fprintf(ctx->out, "(neg)");
+ else if (abs)
+ fprintf(ctx->out, "(abs)");
+
+ if (r)
+ fprintf(ctx->out, "(r)");
+
+ if (im) {
+ if (is_float && full && reg.iim_val < ARRAY_SIZE(float_imms)) {
+ fprintf(ctx->out, "(%s)", float_imms[reg.iim_val]);
+ } else {
+ fprintf(ctx->out, "%d", reg.iim_val);
+ }
+ } else if (addr_rel) {
+ /* I would just use %+d but trying to make it diff'able with
+ * libllvm-a3xx...
+ */
+ if (reg.iim_val < 0)
+ fprintf(ctx->out, "%s%c<a0.x - %d>", full ? "" : "h", type, -reg.iim_val);
+ else if (reg.iim_val > 0)
+ fprintf(ctx->out, "%s%c<a0.x + %d>", full ? "" : "h", type, reg.iim_val);
+ else
+ fprintf(ctx->out, "%s%c<a0.x>", full ? "" : "h", type);
+ } else if ((reg.num == REG_A0) && !c) {
+ /* This matches libllvm output, the second (scalar) address register
+ * seems to be called a1.x instead of a0.y.
+ */
+ fprintf(ctx->out, "a%d.x", reg.comp);
+ } else if ((reg.num == REG_P0) && !c) {
+ fprintf(ctx->out, "p0.%c", component[reg.comp]);
+ } else {
+ fprintf(ctx->out, "%s%c%d.%c", full ? "" : "h", type, reg.num, component[reg.comp]);
+ }
+}
+
+/* Tracking for registers used, read-before-write (input), and
+ * write-after-read (output.. but not 100%)..
+ */
+
+static void regmask_set(regmask_t *regmask, unsigned num, bool full, unsigned val)
+{
+ unsigned i = num / 8;
+ unsigned j = num % 8;
+ ir3_assert(num < MAX_REG);
+ if (full) {
+ regmask->full[i] = (regmask->full[i] & ~(1 << j)) | (val << j);
+ } else {
+ regmask->half[i] = (regmask->half[i] & ~(1 << j)) | (val << j);
+ }
+}
+
+static unsigned regmask_get(regmask_t *regmask, unsigned num, bool full)
+{
+ unsigned i = num / 8;
+ unsigned j = num % 8;
+ ir3_assert(num < MAX_REG);
+ if (full) {
+ return (regmask->full[i] >> j) & 0x1;
+ } else {
+ return (regmask->half[i] >> j) & 0x1;
+ }
+}
+
+static unsigned regidx(reg_t reg)
+{
+ return (4 * reg.num) + reg.comp;
+}
+
+static reg_t idxreg(unsigned idx)
+{
+ return (reg_t){
+ .comp = idx & 0x3,
+ .num = idx >> 2,
+ };
+}
+
+static int print_regs(struct disasm_ctx *ctx, regmask_t *regmask, bool full)
+{
+ int num, max = 0, cnt = 0;
+ int first, last;
+
+ void print_sequence(void)
+ {
+ if (first != MAX_REG) {
+ if (first == last) {
+ fprintf(ctx->out, " %d", first);
+ } else {
+ fprintf(ctx->out, " %d-%d", first, last);
+ }
+ }
+ }
+
+ first = last = MAX_REG;
+
+ for (num = 0; num < MAX_REG; num++) {
+ if (regmask_get(regmask, num, full)) {
+ if (num != (last + 1)) {
+ print_sequence();
+ first = num;
+ }
+ last = num;
+ if (num < (48*4))
+ max = num;
+ cnt++;
+ }
+ }
+
+ print_sequence();
+
+ fprintf(ctx->out, " (cnt=%d, max=%d)", cnt, max);
+
+ return max;
+}
+
+static void print_reg_stats(struct disasm_ctx *ctx)
+{
+ int fullreg, halfreg;
+
+ fprintf(ctx->out, "%sRegister Stats:\n", levels[ctx->level]);
+ fprintf(ctx->out, "%s- used (half):", levels[ctx->level]);
+ halfreg = print_regs(ctx, &ctx->regs.used, false);
+ fprintf(ctx->out, "\n");
+ fprintf(ctx->out, "%s- used (full):", levels[ctx->level]);
+ fullreg = print_regs(ctx, &ctx->regs.used, true);
+ fprintf(ctx->out, "\n");
+ fprintf(ctx->out, "%s- used (merged):", levels[ctx->level]);
+ print_regs(ctx, &ctx->regs.used_merged, false);
+ fprintf(ctx->out, "\n");
+ fprintf(ctx->out, "%s- input (half):", levels[ctx->level]);
+ print_regs(ctx, &ctx->regs.rbw, false);
+ fprintf(ctx->out, "\n");
+ fprintf(ctx->out, "%s- input (full):", levels[ctx->level]);
+ print_regs(ctx, &ctx->regs.rbw, true);
+ fprintf(ctx->out, "\n");
+ fprintf(ctx->out, "%s- const (half):", levels[ctx->level]);
+ print_regs(ctx, &ctx->regs.cnst, false);
+ fprintf(ctx->out, "\n");
+ fprintf(ctx->out, "%s- const (full):", levels[ctx->level]);
+ print_regs(ctx, &ctx->regs.cnst, true);
+ fprintf(ctx->out, "\n");
+ fprintf(ctx->out, "%s- output (half):", levels[ctx->level]);
+ print_regs(ctx, &ctx->regs.war, false);
+ fprintf(ctx->out, " (estimated)\n");
+ fprintf(ctx->out, "%s- output (full):", levels[ctx->level]);
+ print_regs(ctx, &ctx->regs.war, true);
+ fprintf(ctx->out, " (estimated)\n");
+
+ /* convert to vec4, which is the granularity that registers are
+ * assigned to shader:
+ */
+ fullreg = (fullreg + 3) / 4;
+ halfreg = (halfreg + 3) / 4;
+
+ // Note this count of instructions includes rptN, which matches
+ // up to how mesa prints this:
+ fprintf(ctx->out, "%s- shaderdb: %d instructions, %d nops, %d non-nops, "
+ "(%d instlen), %d half, %d full\n",
+ levels[ctx->level], ctx->stats->instructions, ctx->stats->nops,
+ ctx->stats->instructions - ctx->stats->nops, ctx->stats->instlen,
+ halfreg, fullreg);
+ fprintf(ctx->out, "%s- shaderdb: %d (ss), %d (sy)\n", levels[ctx->level],
+ ctx->stats->ss, ctx->stats->sy);
+}
+
+static void process_reg_dst(struct disasm_ctx *ctx)
+{
+ int i;
+
+ if (!ctx->last_dst_valid)
+ return;
+
+ for (i = 0; i <= ctx->repeat; i++) {
+ unsigned dst = ctx->last_dst + i;
+
+ regmask_set(&ctx->regs.war, dst, ctx->last_dst_full, 1);
+ regmask_set(&ctx->regs.used, dst, ctx->last_dst_full, 1);
+
+ if (ctx->last_dst_full) {
+ regmask_set(&ctx->regs.used_merged, (dst*2)+0, false, 1);
+ regmask_set(&ctx->regs.used_merged, (dst*2)+1, false, 1);
+ } else {
+ regmask_set(&ctx->regs.used_merged, dst, false, 1);
+ }
+ }
+
+ ctx->last_dst_valid = false;
+}
+
+static void print_reg_dst(struct disasm_ctx *ctx, reg_t reg, bool full, bool addr_rel)
+{
+ /* presumably the special registers a0.c and p0.c don't count.. */
+ if (!(addr_rel || (reg.num == 61) || (reg.num == 62))) {
+ ctx->last_dst = regidx(reg);
+ ctx->last_dst_full = full;
+ ctx->last_dst_valid = true;
+ }
+ reg = idxreg(regidx(reg) + ctx->repeatidx);
+ print_reg(ctx, reg, full, false, false, false, false, false, false, addr_rel);
+}
+
+static void print_reg_src(struct disasm_ctx *ctx, reg_t reg, bool full, bool f, bool r,
+ bool c, bool im, bool neg, bool abs, bool addr_rel)
+{
+ /* presumably the special registers a0.c and p0.c don't count.. */
+ if (!(addr_rel || c || im || (reg.num == 61) || (reg.num == 62))) {
+ int i, num = regidx(reg);
+ for (i = 0; i <= ctx->repeat; i++) {
+ unsigned src = num + i;
+
+ if (!regmask_get(&ctx->regs.used, src, full))
+ regmask_set(&ctx->regs.rbw, src, full, 1);
+
+ regmask_set(&ctx->regs.war, src, full, 0);
+ regmask_set(&ctx->regs.used, src, full, 1);
+
+ if (full) {
+ regmask_set(&ctx->regs.used_merged, (src*2)+0, false, 1);
+ regmask_set(&ctx->regs.used_merged, (src*2)+1, false, 1);
+ } else {
+ regmask_set(&ctx->regs.used_merged, src, false, 1);
+ }
+
+ if (!r)
+ break;
+ }
+ } else if (c) {
+ int i, num = regidx(reg);
+ for (i = 0; i <= ctx->repeat; i++) {
+ unsigned src = num + i;
+
+ regmask_set(&ctx->regs.cnst, src, full, 1);
+
+ if (!r)
+ break;
+ }
+
+ unsigned max = (num + ctx->repeat + 1 + 3) / 4;
+ if (max > ctx->stats->constlen)
+ ctx->stats->constlen = max;
+ }
+
+ if (r)
+ reg = idxreg(regidx(reg) + ctx->repeatidx);
+
+ print_reg(ctx, reg, full, f, r, c, im, neg, abs, addr_rel);
+}
+
+/* TODO switch to using reginfo struct everywhere, since more readable
+ * than passing a bunch of bools to print_reg_src
+ */
+
+struct reginfo {
+ reg_t reg;
+ bool full;
+ bool r;
+ bool c;
+ bool f; /* src reg is interpreted as float, used for printing immediates */
+ bool im;
+ bool neg;
+ bool abs;
+ bool addr_rel;
+};
+
+static void print_src(struct disasm_ctx *ctx, struct reginfo *info)
+{
+ reg_t reg = info->reg;
+
+ if (info->r)
+ reg = idxreg(regidx(info->reg) + ctx->repeatidx);
+
+ print_reg_src(ctx, reg, info->full, info->f, info->r, info->c, info->im,
+ info->neg, info->abs, info->addr_rel);
+}
+
+//static void print_dst(struct disasm_ctx *ctx, struct reginfo *info)
+//{
+// print_reg_dst(ctx, info->reg, info->full, info->addr_rel);
+//}
+
+static void print_instr_cat0(struct disasm_ctx *ctx, instr_t *instr)
+{
+ static const struct {
+ const char *suffix;
+ int nsrc;
+ bool idx;
+ } brinfo[7] = {
+ [BRANCH_PLAIN] = { "r", 1, false },
+ [BRANCH_OR] = { "rao", 2, false },
+ [BRANCH_AND] = { "raa", 2, false },
+ [BRANCH_CONST] = { "rac", 0, true },
+ [BRANCH_ANY] = { "any", 1, false },
+ [BRANCH_ALL] = { "all", 1, false },
+ [BRANCH_X] = { "rax", 0, false },
+ };
+ instr_cat0_t *cat0 = &instr->cat0;
+
+ switch (instr_opc(instr, ctx->gpu_id)) {
+ case OPC_KILL:
+ case OPC_PREDT:
+ case OPC_PREDF:
+ fprintf(ctx->out, " %sp0.%c", cat0->inv0 ? "!" : "",
+ component[cat0->comp0]);
+ break;
+ case OPC_B:
+ fprintf(ctx->out, "%s", brinfo[cat0->brtype].suffix);
+ if (brinfo[cat0->brtype].idx) {
+ fprintf(ctx->out, ".%u", cat0->idx);
+ }
+ if (brinfo[cat0->brtype].nsrc >= 1) {
+ fprintf(ctx->out, " %sp0.%c,", cat0->inv0 ? "!" : "",
+ component[cat0->comp0]);
+ }
+ if (brinfo[cat0->brtype].nsrc >= 2) {
+ fprintf(ctx->out, " %sp0.%c,", cat0->inv1 ? "!" : "",
+ component[cat0->comp1]);
+ }
+ fprintf(ctx->out, " #%d", cat0->a3xx.immed);
+ break;
+ case OPC_JUMP:
+ case OPC_CALL:
+ case OPC_BKT:
+ case OPC_GETONE:
+ case OPC_SHPS:
+ fprintf(ctx->out, " #%d", cat0->a3xx.immed);
+ break;
+ }
+
+ if ((debug & PRINT_VERBOSE) && (cat0->dummy3|cat0->dummy4))
+ fprintf(ctx->out, "\t{0: %x,%x}", cat0->dummy3, cat0->dummy4);
+}
+
+static void print_instr_cat1(struct disasm_ctx *ctx, instr_t *instr)
+{
+ instr_cat1_t *cat1 = &instr->cat1;
+
+ if (cat1->ul)
+ fprintf(ctx->out, "(ul)");
+
+ if (cat1->src_type == cat1->dst_type) {
+ if ((cat1->src_type == TYPE_S16) && (((reg_t)cat1->dst).num == REG_A0)) {
+ /* special case (nmemonic?): */
+ fprintf(ctx->out, "mova");
+ } else {
+ fprintf(ctx->out, "mov.%s%s", type[cat1->src_type], type[cat1->dst_type]);
+ }
+ } else {
+ fprintf(ctx->out, "cov.%s%s", type[cat1->src_type], type[cat1->dst_type]);
+ }
+
+ fprintf(ctx->out, " ");
+
+ if (cat1->even)
+ fprintf(ctx->out, "(even)");
+
+ if (cat1->pos_inf)
+ fprintf(ctx->out, "(pos_infinity)");
+
+ print_reg_dst(ctx, (reg_t)(cat1->dst), type_size(cat1->dst_type) == 32,
+ cat1->dst_rel);
+
+ fprintf(ctx->out, ", ");
+
+ /* ugg, have to special case this.. vs print_reg().. */
+ if (cat1->src_im) {
+ if (type_float(cat1->src_type))
+ fprintf(ctx->out, "(%f)", cat1->fim_val);
+ else if (type_uint(cat1->src_type))
+ fprintf(ctx->out, "0x%08x", cat1->uim_val);
+ else
+ fprintf(ctx->out, "%d", cat1->iim_val);
+ } else if (cat1->src_rel && !cat1->src_c) {
+ /* I would just use %+d but trying to make it diff'able with
+ * libllvm-a3xx...
+ */
+ char type = cat1->src_rel_c ? 'c' : 'r';
+ const char *full = (type_size(cat1->src_type) == 32) ? "" : "h";
+ if (cat1->off < 0)
+ fprintf(ctx->out, "%s%c<a0.x - %d>", full, type, -cat1->off);
+ else if (cat1->off > 0)
+ fprintf(ctx->out, "%s%c<a0.x + %d>", full, type, cat1->off);
+ else
+ fprintf(ctx->out, "%s%c<a0.x>", full, type);
+ } else {
+ struct reginfo src = {
+ .reg = (reg_t)cat1->src,
+ .full = type_size(cat1->src_type) == 32,
+ .r = cat1->src_r,
+ .c = cat1->src_c,
+ .im = cat1->src_im,
+ };
+ print_src(ctx, &src);
+ }
+
+ if ((debug & PRINT_VERBOSE) && (cat1->must_be_0))
+ fprintf(ctx->out, "\t{1: %x}", cat1->must_be_0);
+}
+
+static void print_instr_cat2(struct disasm_ctx *ctx, instr_t *instr)
+{
+ instr_cat2_t *cat2 = &instr->cat2;
+ int opc = _OPC(2, cat2->opc);
+ static const char *cond[] = {
+ "lt",
+ "le",
+ "gt",
+ "ge",
+ "eq",
+ "ne",
+ "?6?",
+ };
+
+ switch (opc) {
+ case OPC_CMPS_F:
+ case OPC_CMPS_U:
+ case OPC_CMPS_S:
+ case OPC_CMPV_F:
+ case OPC_CMPV_U:
+ case OPC_CMPV_S:
+ fprintf(ctx->out, ".%s", cond[cat2->cond]);
+ break;
+ }
+
+ fprintf(ctx->out, " ");
+ if (cat2->ei)
+ fprintf(ctx->out, "(ei)");
+ print_reg_dst(ctx, (reg_t)(cat2->dst), cat2->full ^ cat2->dst_half, false);
+ fprintf(ctx->out, ", ");
+
+ struct reginfo src1 = {
+ .full = cat2->full,
+ .r = cat2->repeat ? cat2->src1_r : 0,
+ .f = is_cat2_float(opc),
+ .im = cat2->src1_im,
+ .abs = cat2->src1_abs,
+ .neg = cat2->src1_neg,
+ };
+
+ if (cat2->c1.src1_c) {
+ src1.reg = (reg_t)(cat2->c1.src1);
+ src1.c = true;
+ } else if (cat2->rel1.src1_rel) {
+ src1.reg = (reg_t)(cat2->rel1.src1);
+ src1.c = cat2->rel1.src1_c;
+ src1.addr_rel = true;
+ } else {
+ src1.reg = (reg_t)(cat2->src1);
+ }
+ print_src(ctx, &src1);
+
+ struct reginfo src2 = {
+ .r = cat2->repeat ? cat2->src2_r : 0,
+ .full = cat2->full,
+ .f = is_cat2_float(opc),
+ .abs = cat2->src2_abs,
+ .neg = cat2->src2_neg,
+ .im = cat2->src2_im,
+ };
+ switch (opc) {
+ case OPC_ABSNEG_F:
+ case OPC_ABSNEG_S:
+ case OPC_CLZ_B:
+ case OPC_CLZ_S:
+ case OPC_SIGN_F:
+ case OPC_FLOOR_F:
+ case OPC_CEIL_F:
+ case OPC_RNDNE_F:
+ case OPC_RNDAZ_F:
+ case OPC_TRUNC_F:
+ case OPC_NOT_B:
+ case OPC_BFREV_B:
+ case OPC_SETRM:
+ case OPC_CBITS_B:
+ /* these only have one src reg */
+ break;
+ default:
+ fprintf(ctx->out, ", ");
+ if (cat2->c2.src2_c) {
+ src2.reg = (reg_t)(cat2->c2.src2);
+ src2.c = true;
+ } else if (cat2->rel2.src2_rel) {
+ src2.reg = (reg_t)(cat2->rel2.src2);
+ src2.c = cat2->rel2.src2_c;
+ src2.addr_rel = true;
+ } else {
+ src2.reg = (reg_t)(cat2->src2);
+ }
+ print_src(ctx, &src2);
+ break;
+ }
+}
+
+static void print_instr_cat3(struct disasm_ctx *ctx, instr_t *instr)
+{
+ instr_cat3_t *cat3 = &instr->cat3;
+ bool full = instr_cat3_full(cat3);
+
+ fprintf(ctx->out, " ");
+ print_reg_dst(ctx, (reg_t)(cat3->dst), full ^ cat3->dst_half, false);
+ fprintf(ctx->out, ", ");
+
+ struct reginfo src1 = {
+ .r = cat3->repeat ? cat3->src1_r : 0,
+ .full = full,
+ .neg = cat3->src1_neg,
+ };
+ if (cat3->c1.src1_c) {
+ src1.reg = (reg_t)(cat3->c1.src1);
+ src1.c = true;
+ } else if (cat3->rel1.src1_rel) {
+ src1.reg = (reg_t)(cat3->rel1.src1);
+ src1.c = cat3->rel1.src1_c;
+ src1.addr_rel = true;
+ } else {
+ src1.reg = (reg_t)(cat3->src1);
+ }
+ print_src(ctx, &src1);
+
+ fprintf(ctx->out, ", ");
+ struct reginfo src2 = {
+ .reg = (reg_t)cat3->src2,
+ .full = full,
+ .r = cat3->repeat ? cat3->src2_r : 0,
+ .c = cat3->src2_c,
+ .neg = cat3->src2_neg,
+ };
+ print_src(ctx, &src2);
+
+ fprintf(ctx->out, ", ");
+ struct reginfo src3 = {
+ .r = cat3->src3_r,
+ .full = full,
+ .neg = cat3->src3_neg,
+ };
+ if (cat3->c2.src3_c) {
+ src3.reg = (reg_t)(cat3->c2.src3);
+ src3.c = true;
+ } else if (cat3->rel2.src3_rel) {
+ src3.reg = (reg_t)(cat3->rel2.src3);
+ src3.c = cat3->rel2.src3_c;
+ src3.addr_rel = true;
+ } else {
+ src3.reg = (reg_t)(cat3->src3);
+ }
+ print_src(ctx, &src3);
+}
+
+static void print_instr_cat4(struct disasm_ctx *ctx, instr_t *instr)
+{
+ instr_cat4_t *cat4 = &instr->cat4;
+
+ fprintf(ctx->out, " ");
+ print_reg_dst(ctx, (reg_t)(cat4->dst), cat4->full ^ cat4->dst_half, false);
+ fprintf(ctx->out, ", ");
+
+ struct reginfo src = {
+ .r = cat4->src_r,
+ .im = cat4->src_im,
+ .full = cat4->full,
+ .neg = cat4->src_neg,
+ .abs = cat4->src_abs,
+ };
+ if (cat4->c.src_c) {
+ src.reg = (reg_t)(cat4->c.src);
+ src.c = true;
+ } else if (cat4->rel.src_rel) {
+ src.reg = (reg_t)(cat4->rel.src);
+ src.c = cat4->rel.src_c;
+ src.addr_rel = true;
+ } else {
+ src.reg = (reg_t)(cat4->src);
+ }
+ print_src(ctx, &src);
+
+ if ((debug & PRINT_VERBOSE) && (cat4->dummy1|cat4->dummy2))
+ fprintf(ctx->out, "\t{4: %x,%x}", cat4->dummy1, cat4->dummy2);
+}
+
+static void print_instr_cat5(struct disasm_ctx *ctx, instr_t *instr)
+{
+ static const struct {
+ bool src1, src2, samp, tex;
+ } info[0x1f] = {
+ [opc_op(OPC_ISAM)] = { true, false, true, true, },
+ [opc_op(OPC_ISAML)] = { true, true, true, true, },
+ [opc_op(OPC_ISAMM)] = { true, false, true, true, },
+ [opc_op(OPC_SAM)] = { true, false, true, true, },
+ [opc_op(OPC_SAMB)] = { true, true, true, true, },
+ [opc_op(OPC_SAML)] = { true, true, true, true, },
+ [opc_op(OPC_SAMGQ)] = { true, false, true, true, },
+ [opc_op(OPC_GETLOD)] = { true, false, true, true, },
+ [opc_op(OPC_CONV)] = { true, true, true, true, },
+ [opc_op(OPC_CONVM)] = { true, true, true, true, },
+ [opc_op(OPC_GETSIZE)] = { true, false, false, true, },
+ [opc_op(OPC_GETBUF)] = { false, false, false, true, },
+ [opc_op(OPC_GETPOS)] = { true, false, false, true, },
+ [opc_op(OPC_GETINFO)] = { false, false, false, true, },
+ [opc_op(OPC_DSX)] = { true, false, false, false, },
+ [opc_op(OPC_DSY)] = { true, false, false, false, },
+ [opc_op(OPC_GATHER4R)] = { true, false, true, true, },
+ [opc_op(OPC_GATHER4G)] = { true, false, true, true, },
+ [opc_op(OPC_GATHER4B)] = { true, false, true, true, },
+ [opc_op(OPC_GATHER4A)] = { true, false, true, true, },
+ [opc_op(OPC_SAMGP0)] = { true, false, true, true, },
+ [opc_op(OPC_SAMGP1)] = { true, false, true, true, },
+ [opc_op(OPC_SAMGP2)] = { true, false, true, true, },
+ [opc_op(OPC_SAMGP3)] = { true, false, true, true, },
+ [opc_op(OPC_DSXPP_1)] = { true, false, false, false, },
+ [opc_op(OPC_DSYPP_1)] = { true, false, false, false, },
+ [opc_op(OPC_RGETPOS)] = { true, false, false, false, },
+ [opc_op(OPC_RGETINFO)] = { false, false, false, false, },
+ };
+
+ static const struct {
+ bool indirect;
+ bool bindless;
+ bool use_a1;
+ bool uniform;
+ } desc_features[8] = {
+ [CAT5_NONUNIFORM] = { .indirect = true, },
+ [CAT5_UNIFORM] = { .indirect = true, .uniform = true, },
+ [CAT5_BINDLESS_IMM] = { .bindless = true, },
+ [CAT5_BINDLESS_UNIFORM] = {
+ .bindless = true,
+ .indirect = true,
+ .uniform = true,
+ },
+ [CAT5_BINDLESS_NONUNIFORM] = {
+ .bindless = true,
+ .indirect = true,
+ },
+ [CAT5_BINDLESS_A1_IMM] = {
+ .bindless = true,
+ .use_a1 = true,
+ },
+ [CAT5_BINDLESS_A1_UNIFORM] = {
+ .bindless = true,
+ .indirect = true,
+ .uniform = true,
+ .use_a1 = true,
+ },
+ [CAT5_BINDLESS_A1_NONUNIFORM] = {
+ .bindless = true,
+ .indirect = true,
+ .use_a1 = true,
+ },
+ };
+
+ instr_cat5_t *cat5 = &instr->cat5;
+ int i;
+
+ bool desc_indirect =
+ cat5->is_s2en_bindless &&
+ desc_features[cat5->s2en_bindless.desc_mode].indirect;
+ bool bindless =
+ cat5->is_s2en_bindless &&
+ desc_features[cat5->s2en_bindless.desc_mode].bindless;
+ bool use_a1 =
+ cat5->is_s2en_bindless &&
+ desc_features[cat5->s2en_bindless.desc_mode].use_a1;
+ bool uniform =
+ cat5->is_s2en_bindless &&
+ desc_features[cat5->s2en_bindless.desc_mode].uniform;
+
+ if (cat5->is_3d) fprintf(ctx->out, ".3d");
+ if (cat5->is_a) fprintf(ctx->out, ".a");
+ if (cat5->is_o) fprintf(ctx->out, ".o");
+ if (cat5->is_p) fprintf(ctx->out, ".p");
+ if (cat5->is_s) fprintf(ctx->out, ".s");
+ if (desc_indirect) fprintf(ctx->out, ".s2en");
+ if (uniform) fprintf(ctx->out, ".uniform");
+
+ if (bindless) {
+ unsigned base = (cat5->s2en_bindless.base_hi << 1) | cat5->base_lo;
+ fprintf(ctx->out, ".base%d", base);
+ }
+
+ fprintf(ctx->out, " ");
+
+ switch (_OPC(5, cat5->opc)) {
+ case OPC_DSXPP_1:
+ case OPC_DSYPP_1:
+ break;
+ default:
+ fprintf(ctx->out, "(%s)", type[cat5->type]);
+ break;
+ }
+
+ fprintf(ctx->out, "(");
+ for (i = 0; i < 4; i++)
+ if (cat5->wrmask & (1 << i))
+ fprintf(ctx->out, "%c", "xyzw"[i]);
+ fprintf(ctx->out, ")");
+
+ print_reg_dst(ctx, (reg_t)(cat5->dst), type_size(cat5->type) == 32, false);
+
+ if (info[cat5->opc].src1) {
+ fprintf(ctx->out, ", ");
+ struct reginfo src = { .reg = (reg_t)(cat5->src1), .full = cat5->full };
+ print_src(ctx, &src);
+ }
+
+ if (cat5->is_o || info[cat5->opc].src2) {
+ fprintf(ctx->out, ", ");
+ struct reginfo src = { .reg = (reg_t)(cat5->src2), .full = cat5->full };
+ print_src(ctx, &src);
+ }
+ if (cat5->is_s2en_bindless) {
+ if (!desc_indirect) {
+ if (info[cat5->opc].samp) {
+ if (use_a1)
+ fprintf(ctx->out, ", s#%d", cat5->s2en_bindless.src3);
+ else
+ fprintf(ctx->out, ", s#%d", cat5->s2en_bindless.src3 & 0xf);
+ }
+
+ if (info[cat5->opc].tex && !use_a1) {
+ fprintf(ctx->out, ", t#%d", cat5->s2en_bindless.src3 >> 4);
+ }
+ }
+ } else {
+ if (info[cat5->opc].samp)
+ fprintf(ctx->out, ", s#%d", cat5->norm.samp);
+ if (info[cat5->opc].tex)
+ fprintf(ctx->out, ", t#%d", cat5->norm.tex);
+ }
+
+ if (desc_indirect) {
+ fprintf(ctx->out, ", ");
+ struct reginfo src = { .reg = (reg_t)(cat5->s2en_bindless.src3), .full = bindless };
+ print_src(ctx, &src);
+ }
+
+ if (use_a1)
+ fprintf(ctx->out, ", a1.x");
+
+ if (debug & PRINT_VERBOSE) {
+ if (cat5->is_s2en_bindless) {
+ if ((debug & PRINT_VERBOSE) && cat5->s2en_bindless.dummy1)
+ fprintf(ctx->out, "\t{5: %x}", cat5->s2en_bindless.dummy1);
+ } else {
+ if ((debug & PRINT_VERBOSE) && cat5->norm.dummy1)
+ fprintf(ctx->out, "\t{5: %x}", cat5->norm.dummy1);
+ }
+ }
+}
+
+static void print_instr_cat6_a3xx(struct disasm_ctx *ctx, instr_t *instr)
+{
+ instr_cat6_t *cat6 = &instr->cat6;
+ char sd = 0, ss = 0; /* dst/src address space */
+ bool nodst = false;
+ struct reginfo dst, src1, src2;
+ int src1off = 0, dstoff = 0;
+
+ memset(&dst, 0, sizeof(dst));
+ memset(&src1, 0, sizeof(src1));
+ memset(&src2, 0, sizeof(src2));
+
+ switch (_OPC(6, cat6->opc)) {
+ case OPC_RESINFO:
+ case OPC_RESFMT:
+ dst.full = type_size(cat6->type) == 32;
+ src1.full = type_size(cat6->type) == 32;
+ src2.full = type_size(cat6->type) == 32;
+ break;
+ case OPC_L2G:
+ case OPC_G2L:
+ dst.full = true;
+ src1.full = true;
+ src2.full = true;
+ break;
+ case OPC_STG:
+ case OPC_STL:
+ case OPC_STP:
+ case OPC_STLW:
+ case OPC_STIB:
+ dst.full = type_size(cat6->type) == 32;
+ src1.full = type_size(cat6->type) == 32;
+ src2.full = type_size(cat6->type) == 32;
+ break;
+ default:
+ dst.full = type_size(cat6->type) == 32;
+ src1.full = true;
+ src2.full = true;
+ break;
+ }
+
+ switch (_OPC(6, cat6->opc)) {
+ case OPC_PREFETCH:
+ break;
+ case OPC_RESINFO:
+ fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1);
+ break;
+ case OPC_LDGB:
+ fprintf(ctx->out, ".%s", cat6->ldgb.typed ? "typed" : "untyped");
+ fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1);
+ fprintf(ctx->out, ".%s", type[cat6->type]);
+ fprintf(ctx->out, ".%d", cat6->ldgb.type_size + 1);
+ break;
+ case OPC_STGB:
+ case OPC_STIB:
+ fprintf(ctx->out, ".%s", cat6->stgb.typed ? "typed" : "untyped");
+ fprintf(ctx->out, ".%dd", cat6->stgb.d + 1);
+ fprintf(ctx->out, ".%s", type[cat6->type]);
+ fprintf(ctx->out, ".%d", cat6->stgb.type_size + 1);
+ break;
+ case OPC_ATOMIC_ADD:
+ case OPC_ATOMIC_SUB:
+ case OPC_ATOMIC_XCHG:
+ case OPC_ATOMIC_INC:
+ case OPC_ATOMIC_DEC:
+ case OPC_ATOMIC_CMPXCHG:
+ case OPC_ATOMIC_MIN:
+ case OPC_ATOMIC_MAX:
+ case OPC_ATOMIC_AND:
+ case OPC_ATOMIC_OR:
+ case OPC_ATOMIC_XOR:
+ ss = cat6->g ? 'g' : 'l';
+ fprintf(ctx->out, ".%s", cat6->ldgb.typed ? "typed" : "untyped");
+ fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1);
+ fprintf(ctx->out, ".%s", type[cat6->type]);
+ fprintf(ctx->out, ".%d", cat6->ldgb.type_size + 1);
+ fprintf(ctx->out, ".%c", ss);
+ break;
+ default:
+ dst.im = cat6->g && !cat6->dst_off;
+ fprintf(ctx->out, ".%s", type[cat6->type]);
+ break;
+ }
+ fprintf(ctx->out, " ");
+
+ switch (_OPC(6, cat6->opc)) {
+ case OPC_STG:
+ sd = 'g';
+ break;
+ case OPC_STP:
+ sd = 'p';
+ break;
+ case OPC_STL:
+ case OPC_STLW:
+ sd = 'l';
+ break;
+
+ case OPC_LDG:
+ case OPC_LDC:
+ ss = 'g';
+ break;
+ case OPC_LDP:
+ ss = 'p';
+ break;
+ case OPC_LDL:
+ case OPC_LDLW:
+ case OPC_LDLV:
+ ss = 'l';
+ break;
+
+ case OPC_L2G:
+ ss = 'l';
+ sd = 'g';
+ break;
+
+ case OPC_G2L:
+ ss = 'g';
+ sd = 'l';
+ break;
+
+ case OPC_PREFETCH:
+ ss = 'g';
+ nodst = true;
+ break;
+ }
+
+ if ((_OPC(6, cat6->opc) == OPC_STGB) || (_OPC(6, cat6->opc) == OPC_STIB)) {
+ struct reginfo src3;
+
+ memset(&src3, 0, sizeof(src3));
+
+ src1.reg = (reg_t)(cat6->stgb.src1);
+ src2.reg = (reg_t)(cat6->stgb.src2);
+ src2.im = cat6->stgb.src2_im;
+ src3.reg = (reg_t)(cat6->stgb.src3);
+ src3.im = cat6->stgb.src3_im;
+ src3.full = true;
+
+ fprintf(ctx->out, "g[%u], ", cat6->stgb.dst_ssbo);
+ print_src(ctx, &src1);
+ fprintf(ctx->out, ", ");
+ print_src(ctx, &src2);
+ fprintf(ctx->out, ", ");
+ print_src(ctx, &src3);
+
+ if (debug & PRINT_VERBOSE)
+ fprintf(ctx->out, " (pad0=%x, pad3=%x)", cat6->stgb.pad0, cat6->stgb.pad3);
+
+ return;
+ }
+
+ if (is_atomic(_OPC(6, cat6->opc))) {
+
+ src1.reg = (reg_t)(cat6->ldgb.src1);
+ src1.im = cat6->ldgb.src1_im;
+ src2.reg = (reg_t)(cat6->ldgb.src2);
+ src2.im = cat6->ldgb.src2_im;
+ dst.reg = (reg_t)(cat6->ldgb.dst);
+
+ print_src(ctx, &dst);
+ fprintf(ctx->out, ", ");
+ if (ss == 'g') {
+ struct reginfo src3;
+ memset(&src3, 0, sizeof(src3));
+
+ src3.reg = (reg_t)(cat6->ldgb.src3);
+ src3.full = true;
+
+ /* For images, the ".typed" variant is used and src2 is
+ * the ivecN coordinates, ie ivec2 for 2d.
+ *
+ * For SSBOs, the ".untyped" variant is used and src2 is
+ * a simple dword offset.. src3 appears to be
+ * uvec2(offset * 4, 0). Not sure the point of that.
+ */
+
+ fprintf(ctx->out, "g[%u], ", cat6->ldgb.src_ssbo);
+ print_src(ctx, &src1); /* value */
+ fprintf(ctx->out, ", ");
+ print_src(ctx, &src2); /* offset/coords */
+ fprintf(ctx->out, ", ");
+ print_src(ctx, &src3); /* 64b byte offset.. */
+
+ if (debug & PRINT_VERBOSE) {
+ fprintf(ctx->out, " (pad0=%x, pad3=%x, mustbe0=%x)", cat6->ldgb.pad0,
+ cat6->ldgb.pad3, cat6->ldgb.mustbe0);
+ }
+ } else { /* ss == 'l' */
+ fprintf(ctx->out, "l[");
+ print_src(ctx, &src1); /* simple byte offset */
+ fprintf(ctx->out, "], ");
+ print_src(ctx, &src2); /* value */
+
+ if (debug & PRINT_VERBOSE) {
+ fprintf(ctx->out, " (src3=%x, pad0=%x, pad3=%x, mustbe0=%x)",
+ cat6->ldgb.src3, cat6->ldgb.pad0,
+ cat6->ldgb.pad3, cat6->ldgb.mustbe0);
+ }
+ }
+
+ return;
+ } else if (_OPC(6, cat6->opc) == OPC_RESINFO) {
+ dst.reg = (reg_t)(cat6->ldgb.dst);
+
+ print_src(ctx, &dst);
+ fprintf(ctx->out, ", ");
+ fprintf(ctx->out, "g[%u]", cat6->ldgb.src_ssbo);
+
+ return;
+ } else if (_OPC(6, cat6->opc) == OPC_LDGB) {
+
+ src1.reg = (reg_t)(cat6->ldgb.src1);
+ src1.im = cat6->ldgb.src1_im;
+ src2.reg = (reg_t)(cat6->ldgb.src2);
+ src2.im = cat6->ldgb.src2_im;
+ dst.reg = (reg_t)(cat6->ldgb.dst);
+
+ print_src(ctx, &dst);
+ fprintf(ctx->out, ", ");
+ fprintf(ctx->out, "g[%u], ", cat6->ldgb.src_ssbo);
+ print_src(ctx, &src1);
+ fprintf(ctx->out, ", ");
+ print_src(ctx, &src2);
+
+ if (debug & PRINT_VERBOSE)
+ fprintf(ctx->out, " (pad0=%x, pad3=%x, mustbe0=%x)", cat6->ldgb.pad0, cat6->ldgb.pad3, cat6->ldgb.mustbe0);
+
+ return;
+ } else if (_OPC(6, cat6->opc) == OPC_LDG && cat6->a.src1_im && cat6->a.src2_im) {
+ struct reginfo src3;
+
+ memset(&src3, 0, sizeof(src3));
+ src1.reg = (reg_t)(cat6->a.src1);
+ src2.reg = (reg_t)(cat6->a.src2);
+ src2.im = cat6->a.src2_im;
+ src3.reg = (reg_t)(cat6->a.off);
+ src3.full = true;
+ dst.reg = (reg_t)(cat6->d.dst);
+
+ print_src(ctx, &dst);
+ fprintf(ctx->out, ", g[");
+ print_src(ctx, &src1);
+ fprintf(ctx->out, "+");
+ print_src(ctx, &src3);
+ fprintf(ctx->out, "], ");
+ print_src(ctx, &src2);
+
+ return;
+ }
+ if (cat6->dst_off) {
+ dst.reg = (reg_t)(cat6->c.dst);
+ dstoff = cat6->c.off;
+ } else {
+ dst.reg = (reg_t)(cat6->d.dst);
+ }
+
+ if (cat6->src_off) {
+ src1.reg = (reg_t)(cat6->a.src1);
+ src1.im = cat6->a.src1_im;
+ src2.reg = (reg_t)(cat6->a.src2);
+ src2.im = cat6->a.src2_im;
+ src1off = cat6->a.off;
+ } else {
+ src1.reg = (reg_t)(cat6->b.src1);
+ src1.im = cat6->b.src1_im;
+ src2.reg = (reg_t)(cat6->b.src2);
+ src2.im = cat6->b.src2_im;
+ }
+
+ if (!nodst) {
+ if (sd)
+ fprintf(ctx->out, "%c[", sd);
+ /* note: dst might actually be a src (ie. address to store to) */
+ print_src(ctx, &dst);
+ if (cat6->dst_off && cat6->g) {
+ struct reginfo dstoff_reg = {0};
+ dstoff_reg.reg = (reg_t) cat6->c.off;
+ dstoff_reg.full = true;
+ fprintf(ctx->out, "+");
+ print_src(ctx, &dstoff_reg);
+ } else if (dstoff)
+ fprintf(ctx->out, "%+d", dstoff);
+ if (sd)
+ fprintf(ctx->out, "]");
+ fprintf(ctx->out, ", ");
+ }
+
+ if (ss)
+ fprintf(ctx->out, "%c[", ss);
+
+ /* can have a larger than normal immed, so hack: */
+ if (src1.im) {
+ fprintf(ctx->out, "%u", src1.reg.dummy13);
+ } else {
+ print_src(ctx, &src1);
+ }
+
+ if (cat6->src_off && cat6->g)
+ print_src(ctx, &src2);
+ else if (src1off)
+ fprintf(ctx->out, "%+d", src1off);
+ if (ss)
+ fprintf(ctx->out, "]");
+
+ switch (_OPC(6, cat6->opc)) {
+ case OPC_RESINFO:
+ case OPC_RESFMT:
+ break;
+ default:
+ fprintf(ctx->out, ", ");
+ print_src(ctx, &src2);
+ break;
+ }
+}
+
+static void print_instr_cat6_a6xx(struct disasm_ctx *ctx, instr_t *instr)
+{
+ instr_cat6_a6xx_t *cat6 = &instr->cat6_a6xx;
+ struct reginfo src1, src2, ssbo;
+ bool uses_type = _OPC(6, cat6->opc) != OPC_LDC;
+
+ static const struct {
+ bool indirect;
+ bool bindless;
+ const char *name;
+ } desc_features[8] = {
+ [CAT6_IMM] = {
+ .name = "imm"
+ },
+ [CAT6_UNIFORM] = {
+ .indirect = true,
+ .name = "uniform"
+ },
+ [CAT6_NONUNIFORM] = {
+ .indirect = true,
+ .name = "nonuniform"
+ },
+ [CAT6_BINDLESS_IMM] = {
+ .bindless = true,
+ .name = "imm"
+ },
+ [CAT6_BINDLESS_UNIFORM] = {
+ .bindless = true,
+ .indirect = true,
+ .name = "uniform"
+ },
+ [CAT6_BINDLESS_NONUNIFORM] = {
+ .bindless = true,
+ .indirect = true,
+ .name = "nonuniform"
+ },
+ };
+
+ bool indirect_ssbo = desc_features[cat6->desc_mode].indirect;
+ bool bindless = desc_features[cat6->desc_mode].bindless;
+ bool type_full = cat6->type != TYPE_U16;
+
+
+ memset(&src1, 0, sizeof(src1));
+ memset(&src2, 0, sizeof(src2));
+ memset(&ssbo, 0, sizeof(ssbo));
+
+ if (uses_type) {
+ fprintf(ctx->out, ".%s", cat6->typed ? "typed" : "untyped");
+ fprintf(ctx->out, ".%dd", cat6->d + 1);
+ fprintf(ctx->out, ".%s", type[cat6->type]);
+ } else {
+ fprintf(ctx->out, ".offset%d", cat6->d);
+ }
+ fprintf(ctx->out, ".%u", cat6->type_size + 1);
+
+ fprintf(ctx->out, ".%s", desc_features[cat6->desc_mode].name);
+ if (bindless)
+ fprintf(ctx->out, ".base%d", cat6->base);
+ fprintf(ctx->out, " ");
+
+ src2.reg = (reg_t)(cat6->src2);
+ src2.full = type_full;
+ print_src(ctx, &src2);
+ fprintf(ctx->out, ", ");
+
+ src1.reg = (reg_t)(cat6->src1);
+ src1.full = true; // XXX
+ print_src(ctx, &src1);
+ fprintf(ctx->out, ", ");
+ ssbo.reg = (reg_t)(cat6->ssbo);
+ ssbo.im = !indirect_ssbo;
+ ssbo.full = true;
+ print_src(ctx, &ssbo);
+
+ if (debug & PRINT_VERBOSE) {
+ fprintf(ctx->out, " (pad1=%x, pad2=%x, pad3=%x, pad4=%x, pad5=%x)",
+ cat6->pad1, cat6->pad2, cat6->pad3, cat6->pad4, cat6->pad5);
+ }
+}
+
+static void print_instr_cat6(struct disasm_ctx *ctx, instr_t *instr)
+{
+ if (!is_cat6_legacy(instr, ctx->gpu_id)) {
+ print_instr_cat6_a6xx(ctx, instr);
+ if (debug & PRINT_VERBOSE)
+ fprintf(ctx->out, " NEW");
+ } else {
+ print_instr_cat6_a3xx(ctx, instr);
+ if (debug & PRINT_VERBOSE)
+ fprintf(ctx->out, " LEGACY");
+ }
+}
+static void print_instr_cat7(struct disasm_ctx *ctx, instr_t *instr)
+{
+ instr_cat7_t *cat7 = &instr->cat7;
+
+ if (cat7->g)
+ fprintf(ctx->out, ".g");
+ if (cat7->l)
+ fprintf(ctx->out, ".l");
+
+ if (_OPC(7, cat7->opc) == OPC_FENCE) {
+ if (cat7->r)
+ fprintf(ctx->out, ".r");
+ if (cat7->w)
+ fprintf(ctx->out, ".w");
+ }
+}
+
+/* size of largest OPC field of all the instruction categories: */
+#define NOPC_BITS 6
+
+static const struct opc_info {
+ uint16_t cat;
+ uint16_t opc;
+ const char *name;
+ void (*print)(struct disasm_ctx *ctx, instr_t *instr);
+} opcs[1 << (3+NOPC_BITS)] = {
+#define OPC(cat, opc, name) [(opc)] = { (cat), (opc), #name, print_instr_cat##cat }
+ /* category 0: */
+ OPC(0, OPC_NOP, nop),
+ OPC(0, OPC_B, b),
+ OPC(0, OPC_JUMP, jump),
+ OPC(0, OPC_CALL, call),
+ OPC(0, OPC_RET, ret),
+ OPC(0, OPC_KILL, kill),
+ OPC(0, OPC_END, end),
+ OPC(0, OPC_EMIT, emit),
+ OPC(0, OPC_CUT, cut),
+ OPC(0, OPC_CHMASK, chmask),
+ OPC(0, OPC_CHSH, chsh),
+ OPC(0, OPC_FLOW_REV, flow_rev),
+ OPC(0, OPC_PREDT, predt),
+ OPC(0, OPC_PREDF, predf),
+ OPC(0, OPC_PREDE, prede),
+ OPC(0, OPC_BKT, bkt),
+ OPC(0, OPC_STKS, stks),
+ OPC(0, OPC_STKR, stkr),
+ OPC(0, OPC_XSET, xset),
+ OPC(0, OPC_XCLR, xclr),
+ OPC(0, OPC_GETONE, getone),
+ OPC(0, OPC_DBG, dbg),
+ OPC(0, OPC_SHPS, shps),
+ OPC(0, OPC_SHPE, shpe),
+
+ /* category 1: */
+ OPC(1, OPC_MOV, ),
+
+ /* category 2: */
+ OPC(2, OPC_ADD_F, add.f),
+ OPC(2, OPC_MIN_F, min.f),
+ OPC(2, OPC_MAX_F, max.f),
+ OPC(2, OPC_MUL_F, mul.f),
+ OPC(2, OPC_SIGN_F, sign.f),
+ OPC(2, OPC_CMPS_F, cmps.f),
+ OPC(2, OPC_ABSNEG_F, absneg.f),
+ OPC(2, OPC_CMPV_F, cmpv.f),
+ OPC(2, OPC_FLOOR_F, floor.f),
+ OPC(2, OPC_CEIL_F, ceil.f),
+ OPC(2, OPC_RNDNE_F, rndne.f),
+ OPC(2, OPC_RNDAZ_F, rndaz.f),
+ OPC(2, OPC_TRUNC_F, trunc.f),
+ OPC(2, OPC_ADD_U, add.u),
+ OPC(2, OPC_ADD_S, add.s),
+ OPC(2, OPC_SUB_U, sub.u),
+ OPC(2, OPC_SUB_S, sub.s),
+ OPC(2, OPC_CMPS_U, cmps.u),
+ OPC(2, OPC_CMPS_S, cmps.s),
+ OPC(2, OPC_MIN_U, min.u),
+ OPC(2, OPC_MIN_S, min.s),
+ OPC(2, OPC_MAX_U, max.u),
+ OPC(2, OPC_MAX_S, max.s),
+ OPC(2, OPC_ABSNEG_S, absneg.s),
+ OPC(2, OPC_AND_B, and.b),
+ OPC(2, OPC_OR_B, or.b),
+ OPC(2, OPC_NOT_B, not.b),
+ OPC(2, OPC_XOR_B, xor.b),
+ OPC(2, OPC_CMPV_U, cmpv.u),
+ OPC(2, OPC_CMPV_S, cmpv.s),
+ OPC(2, OPC_MUL_U24, mul.u24),
+ OPC(2, OPC_MUL_S24, mul.s24),
+ OPC(2, OPC_MULL_U, mull.u),
+ OPC(2, OPC_BFREV_B, bfrev.b),
+ OPC(2, OPC_CLZ_S, clz.s),
+ OPC(2, OPC_CLZ_B, clz.b),
+ OPC(2, OPC_SHL_B, shl.b),
+ OPC(2, OPC_SHR_B, shr.b),
+ OPC(2, OPC_ASHR_B, ashr.b),
+ OPC(2, OPC_BARY_F, bary.f),
+ OPC(2, OPC_MGEN_B, mgen.b),
+ OPC(2, OPC_GETBIT_B, getbit.b),
+ OPC(2, OPC_SETRM, setrm),
+ OPC(2, OPC_CBITS_B, cbits.b),
+ OPC(2, OPC_SHB, shb),
+ OPC(2, OPC_MSAD, msad),
+
+ /* category 3: */
+ OPC(3, OPC_MAD_U16, mad.u16),
+ OPC(3, OPC_MADSH_U16, madsh.u16),
+ OPC(3, OPC_MAD_S16, mad.s16),
+ OPC(3, OPC_MADSH_M16, madsh.m16),
+ OPC(3, OPC_MAD_U24, mad.u24),
+ OPC(3, OPC_MAD_S24, mad.s24),
+ OPC(3, OPC_MAD_F16, mad.f16),
+ OPC(3, OPC_MAD_F32, mad.f32),
+ OPC(3, OPC_SEL_B16, sel.b16),
+ OPC(3, OPC_SEL_B32, sel.b32),
+ OPC(3, OPC_SEL_S16, sel.s16),
+ OPC(3, OPC_SEL_S32, sel.s32),
+ OPC(3, OPC_SEL_F16, sel.f16),
+ OPC(3, OPC_SEL_F32, sel.f32),
+ OPC(3, OPC_SAD_S16, sad.s16),
+ OPC(3, OPC_SAD_S32, sad.s32),
+
+ /* category 4: */
+ OPC(4, OPC_RCP, rcp),
+ OPC(4, OPC_RSQ, rsq),
+ OPC(4, OPC_LOG2, log2),
+ OPC(4, OPC_EXP2, exp2),
+ OPC(4, OPC_SIN, sin),
+ OPC(4, OPC_COS, cos),
+ OPC(4, OPC_SQRT, sqrt),
+ OPC(4, OPC_HRSQ, hrsq),
+ OPC(4, OPC_HLOG2, hlog2),
+ OPC(4, OPC_HEXP2, hexp2),
+
+ /* category 5: */
+ OPC(5, OPC_ISAM, isam),
+ OPC(5, OPC_ISAML, isaml),
+ OPC(5, OPC_ISAMM, isamm),
+ OPC(5, OPC_SAM, sam),
+ OPC(5, OPC_SAMB, samb),
+ OPC(5, OPC_SAML, saml),
+ OPC(5, OPC_SAMGQ, samgq),
+ OPC(5, OPC_GETLOD, getlod),
+ OPC(5, OPC_CONV, conv),
+ OPC(5, OPC_CONVM, convm),
+ OPC(5, OPC_GETSIZE, getsize),
+ OPC(5, OPC_GETBUF, getbuf),
+ OPC(5, OPC_GETPOS, getpos),
+ OPC(5, OPC_GETINFO, getinfo),
+ OPC(5, OPC_DSX, dsx),
+ OPC(5, OPC_DSY, dsy),
+ OPC(5, OPC_GATHER4R, gather4r),
+ OPC(5, OPC_GATHER4G, gather4g),
+ OPC(5, OPC_GATHER4B, gather4b),
+ OPC(5, OPC_GATHER4A, gather4a),
+ OPC(5, OPC_SAMGP0, samgp0),
+ OPC(5, OPC_SAMGP1, samgp1),
+ OPC(5, OPC_SAMGP2, samgp2),
+ OPC(5, OPC_SAMGP3, samgp3),
+ OPC(5, OPC_DSXPP_1, dsxpp.1),
+ OPC(5, OPC_DSYPP_1, dsypp.1),
+ OPC(5, OPC_RGETPOS, rgetpos),
+ OPC(5, OPC_RGETINFO, rgetinfo),
+
+
+ /* category 6: */
+ OPC(6, OPC_LDG, ldg),
+ OPC(6, OPC_LDL, ldl),
+ OPC(6, OPC_LDP, ldp),
+ OPC(6, OPC_STG, stg),
+ OPC(6, OPC_STL, stl),
+ OPC(6, OPC_STP, stp),
+ OPC(6, OPC_LDIB, ldib),
+ OPC(6, OPC_G2L, g2l),
+ OPC(6, OPC_L2G, l2g),
+ OPC(6, OPC_PREFETCH, prefetch),
+ OPC(6, OPC_LDLW, ldlw),
+ OPC(6, OPC_STLW, stlw),
+ OPC(6, OPC_RESFMT, resfmt),
+ OPC(6, OPC_RESINFO, resinfo),
+ OPC(6, OPC_ATOMIC_ADD, atomic.add),
+ OPC(6, OPC_ATOMIC_SUB, atomic.sub),
+ OPC(6, OPC_ATOMIC_XCHG, atomic.xchg),
+ OPC(6, OPC_ATOMIC_INC, atomic.inc),
+ OPC(6, OPC_ATOMIC_DEC, atomic.dec),
+ OPC(6, OPC_ATOMIC_CMPXCHG, atomic.cmpxchg),
+ OPC(6, OPC_ATOMIC_MIN, atomic.min),
+ OPC(6, OPC_ATOMIC_MAX, atomic.max),
+ OPC(6, OPC_ATOMIC_AND, atomic.and),
+ OPC(6, OPC_ATOMIC_OR, atomic.or),
+ OPC(6, OPC_ATOMIC_XOR, atomic.xor),
+ OPC(6, OPC_LDGB, ldgb),
+ OPC(6, OPC_STGB, stgb),
+ OPC(6, OPC_STIB, stib),
+ OPC(6, OPC_LDC, ldc),
+ OPC(6, OPC_LDLV, ldlv),
+
+ OPC(7, OPC_BAR, bar),
+ OPC(7, OPC_FENCE, fence),
+
+
+#undef OPC
+};
+
+#define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr, ctx->gpu_id)]))
+
+static void print_single_instr(struct disasm_ctx *ctx, instr_t *instr)
+{
+ const char *name = GETINFO(instr)->name;
+ uint32_t opc = instr_opc(instr, ctx->gpu_id);
+
+ if (name) {
+ fprintf(ctx->out, "%s", name);
+ GETINFO(instr)->print(ctx, instr);
+ } else {
+ fprintf(ctx->out, "unknown(%d,%d)", instr->opc_cat, opc);
+
+ switch (instr->opc_cat) {
+ case 0: print_instr_cat0(ctx, instr); break;
+ case 1: print_instr_cat1(ctx, instr); break;
+ case 2: print_instr_cat2(ctx, instr); break;
+ case 3: print_instr_cat3(ctx, instr); break;
+ case 4: print_instr_cat4(ctx, instr); break;
+ case 5: print_instr_cat5(ctx, instr); break;
+ case 6: print_instr_cat6(ctx, instr); break;
+ case 7: print_instr_cat7(ctx, instr); break;
+ }
+ }
+}
+
+static bool print_instr(struct disasm_ctx *ctx, uint32_t *dwords, int n)
+{
+ instr_t *instr = (instr_t *)dwords;
+ uint32_t opc = instr_opc(instr, ctx->gpu_id);
+ unsigned nop = 0;
+ unsigned cycles = ctx->stats->instructions;
+
+ fprintf(ctx->out, "%s:%d:%04d:%04d[%08xx_%08xx] ", levels[ctx->level],
+ instr->opc_cat, n, cycles++, dwords[1], dwords[0]);
+
+#if 0
+ /* print unknown bits: */
+ if (debug & PRINT_RAW)
+ fprintf(ctx->out, "[%08xx_%08xx] ", dwords[1] & 0x001ff800, dwords[0] & 0x00000000);
+
+ if (debug & PRINT_VERBOSE)
+ fprintf(ctx->out, "%d,%02d ", instr->opc_cat, opc);
+#endif
+
+ /* NOTE: order flags are printed is a bit fugly.. but for now I
+ * try to match the order in llvm-a3xx disassembler for easy
+ * diff'ing..
+ */
+
+ ctx->repeat = instr_repeat(instr);
+ ctx->stats->instructions += 1 + ctx->repeat;
+ ctx->stats->instlen++;
+
+ if (instr->sync) {
+ fprintf(ctx->out, "(sy)");
+ ctx->stats->sy++;
+ }
+ if (instr->ss && ((instr->opc_cat <= 4) || (instr->opc_cat == 7))) {
+ fprintf(ctx->out, "(ss)");
+ ctx->stats->ss++;
+ }
+ if (instr->jmp_tgt)
+ fprintf(ctx->out, "(jp)");
+ if ((instr->opc_cat == 0) && instr->cat0.eq)
+ fprintf(ctx->out, "(eq)");
+ if (instr_sat(instr))
+ fprintf(ctx->out, "(sat)");
+ if (ctx->repeat)
+ fprintf(ctx->out, "(rpt%d)", ctx->repeat);
+ else if ((instr->opc_cat == 2) && (instr->cat2.src1_r || instr->cat2.src2_r))
+ nop = (instr->cat2.src2_r * 2) + instr->cat2.src1_r;
+ else if ((instr->opc_cat == 3) && (instr->cat3.src1_r || instr->cat3.src2_r))
+ nop = (instr->cat3.src2_r * 2) + instr->cat3.src1_r;
+ ctx->stats->instructions += nop;
+ ctx->stats->nops += nop;
+ if (opc == OPC_NOP)
+ ctx->stats->nops += 1 + ctx->repeat;
+ if (nop)
+ fprintf(ctx->out, "(nop%d) ", nop);
+
+ if (instr->ul && ((2 <= instr->opc_cat) && (instr->opc_cat <= 4)))
+ fprintf(ctx->out, "(ul)");
+
+ print_single_instr(ctx, instr);
+ fprintf(ctx->out, "\n");
+
+ process_reg_dst(ctx);
+
+ if ((instr->opc_cat <= 4) && (debug & EXPAND_REPEAT)) {
+ int i;
+ for (i = 0; i < nop; i++) {
+ fprintf(ctx->out, "%s:%d:%04d:%04d[ ] ",
+ levels[ctx->level], instr->opc_cat, n, cycles++);
+ fprintf(ctx->out, "nop\n");
+ }
+ for (i = 0; i < ctx->repeat; i++) {
+ ctx->repeatidx = i + 1;
+ fprintf(ctx->out, "%s:%d:%04d:%04d[ ] ",
+ levels[ctx->level], instr->opc_cat, n, cycles++);
+
+ print_single_instr(ctx, instr);
+ fprintf(ctx->out, "\n");
+ }
+ ctx->repeatidx = 0;
+ }
+
+ return (instr->opc_cat == 0) &&
+ ((opc == OPC_END) || (opc == OPC_CHSH));
+}
+
+int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id)
+{
+ struct shader_stats stats;
+ return disasm_a3xx_stat(dwords, sizedwords, level, out, gpu_id, &stats);
+}
+
+int disasm_a3xx_stat(uint32_t *dwords, int sizedwords, int level, FILE *out,
+ unsigned gpu_id, struct shader_stats *stats)
+{
+ struct disasm_ctx ctx;
+ int i;
+ int nop_count = 0;
+ bool has_end = false;
+
+// ir3_assert((sizedwords % 2) == 0);
+
+ memset(&ctx, 0, sizeof(ctx));
+ ctx.out = out;
+ ctx.level = level;
+ ctx.gpu_id = gpu_id;
+ ctx.stats = stats;
+ memset(ctx.stats, 0, sizeof(*ctx.stats));
+
+ for (i = 0; i < sizedwords; i += 2) {
+ has_end |= print_instr(&ctx, &dwords[i], i/2);
+ if (!has_end)
+ continue;
+ if (dwords[i] == 0 && dwords[i + 1] == 0)
+ nop_count++;
+ else
+ nop_count = 0;
+ if (nop_count > 3)
+ break;
+ }
+
+ print_reg_stats(&ctx);
+
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright © 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef DISASM_H_
+#define DISASM_H_
+
+#include <stdio.h>
+
+enum shader_t {
+ SHADER_VERTEX,
+ SHADER_TCS,
+ SHADER_TES,
+ SHADER_GEOM,
+ SHADER_FRAGMENT,
+ SHADER_COMPUTE,
+};
+
+/* bitmask of debug flags */
+enum debug_t {
+ PRINT_RAW = 0x1, /* dump raw hexdump */
+ PRINT_VERBOSE = 0x2,
+ EXPAND_REPEAT = 0x4,
+};
+
+struct shader_stats {
+ /* instructions counts rpnN, and instlen does not */
+ int instructions, instlen;
+ int nops;
+ int ss, sy;
+ int constlen;
+};
+
+int disasm_a2xx(uint32_t *dwords, int sizedwords, int level, enum shader_t type);
+int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id);
+int disasm_a3xx_stat(uint32_t *dwords, int sizedwords, int level, FILE *out,
+ unsigned gpu_id, struct shader_stats *stats);
+void disasm_set_debug(enum debug_t debug);
+
+#endif /* DISASM_H_ */
--- /dev/null
+/*
+ * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef INSTR_A2XX_H_
+#define INSTR_A2XX_H_
+
+#define PACKED __attribute__((__packed__))
+
+
+/*
+ * ALU instructions:
+ */
+
+typedef enum {
+ ADDs = 0,
+ ADD_PREVs = 1,
+ MULs = 2,
+ MUL_PREVs = 3,
+ MUL_PREV2s = 4,
+ MAXs = 5,
+ MINs = 6,
+ SETEs = 7,
+ SETGTs = 8,
+ SETGTEs = 9,
+ SETNEs = 10,
+ FRACs = 11,
+ TRUNCs = 12,
+ FLOORs = 13,
+ EXP_IEEE = 14,
+ LOG_CLAMP = 15,
+ LOG_IEEE = 16,
+ RECIP_CLAMP = 17,
+ RECIP_FF = 18,
+ RECIP_IEEE = 19,
+ RECIPSQ_CLAMP = 20,
+ RECIPSQ_FF = 21,
+ RECIPSQ_IEEE = 22,
+ MOVAs = 23,
+ MOVA_FLOORs = 24,
+ SUBs = 25,
+ SUB_PREVs = 26,
+ PRED_SETEs = 27,
+ PRED_SETNEs = 28,
+ PRED_SETGTs = 29,
+ PRED_SETGTEs = 30,
+ PRED_SET_INVs = 31,
+ PRED_SET_POPs = 32,
+ PRED_SET_CLRs = 33,
+ PRED_SET_RESTOREs = 34,
+ KILLEs = 35,
+ KILLGTs = 36,
+ KILLGTEs = 37,
+ KILLNEs = 38,
+ KILLONEs = 39,
+ SQRT_IEEE = 40,
+ MUL_CONST_0 = 42,
+ MUL_CONST_1 = 43,
+ ADD_CONST_0 = 44,
+ ADD_CONST_1 = 45,
+ SUB_CONST_0 = 46,
+ SUB_CONST_1 = 47,
+ SIN = 48,
+ COS = 49,
+ RETAIN_PREV = 50,
+} instr_scalar_opc_t;
+
+typedef enum {
+ ADDv = 0,
+ MULv = 1,
+ MAXv = 2,
+ MINv = 3,
+ SETEv = 4,
+ SETGTv = 5,
+ SETGTEv = 6,
+ SETNEv = 7,
+ FRACv = 8,
+ TRUNCv = 9,
+ FLOORv = 10,
+ MULADDv = 11,
+ CNDEv = 12,
+ CNDGTEv = 13,
+ CNDGTv = 14,
+ DOT4v = 15,
+ DOT3v = 16,
+ DOT2ADDv = 17,
+ CUBEv = 18,
+ MAX4v = 19,
+ PRED_SETE_PUSHv = 20,
+ PRED_SETNE_PUSHv = 21,
+ PRED_SETGT_PUSHv = 22,
+ PRED_SETGTE_PUSHv = 23,
+ KILLEv = 24,
+ KILLGTv = 25,
+ KILLGTEv = 26,
+ KILLNEv = 27,
+ DSTv = 28,
+ MOVAv = 29,
+} instr_vector_opc_t;
+
+typedef struct PACKED {
+ /* dword0: */
+ uint8_t vector_dest : 6;
+ uint8_t vector_dest_rel : 1;
+ uint8_t low_precision_16b_fp : 1;
+ uint8_t scalar_dest : 6;
+ uint8_t scalar_dest_rel : 1;
+ uint8_t export_data : 1;
+ uint8_t vector_write_mask : 4;
+ uint8_t scalar_write_mask : 4;
+ uint8_t vector_clamp : 1;
+ uint8_t scalar_clamp : 1;
+ instr_scalar_opc_t scalar_opc : 6;
+ /* dword1: */
+ uint8_t src3_swiz : 8;
+ uint8_t src2_swiz : 8;
+ uint8_t src1_swiz : 8;
+ uint8_t src3_reg_negate : 1;
+ uint8_t src2_reg_negate : 1;
+ uint8_t src1_reg_negate : 1;
+ uint8_t pred_select : 2;
+ uint8_t relative_addr : 1;
+ uint8_t const_1_rel_abs : 1;
+ uint8_t const_0_rel_abs : 1;
+ /* dword2: */
+ uint8_t src3_reg : 6;
+ uint8_t src3_reg_select : 1;
+ uint8_t src3_reg_abs : 1;
+ uint8_t src2_reg : 6;
+ uint8_t src2_reg_select : 1;
+ uint8_t src2_reg_abs : 1;
+ uint8_t src1_reg : 6;
+ uint8_t src1_reg_select : 1;
+ uint8_t src1_reg_abs : 1;
+ instr_vector_opc_t vector_opc : 5;
+ uint8_t src3_sel : 1;
+ uint8_t src2_sel : 1;
+ uint8_t src1_sel : 1;
+} instr_alu_t;
+
+
+
+/*
+ * CF instructions:
+ */
+
+typedef enum {
+ NOP = 0,
+ EXEC = 1,
+ EXEC_END = 2,
+ COND_EXEC = 3,
+ COND_EXEC_END = 4,
+ COND_PRED_EXEC = 5,
+ COND_PRED_EXEC_END = 6,
+ LOOP_START = 7,
+ LOOP_END = 8,
+ COND_CALL = 9,
+ RETURN = 10,
+ COND_JMP = 11,
+ ALLOC = 12,
+ COND_EXEC_PRED_CLEAN = 13,
+ COND_EXEC_PRED_CLEAN_END = 14,
+ MARK_VS_FETCH_DONE = 15,
+} instr_cf_opc_t;
+
+typedef enum {
+ RELATIVE_ADDR = 0,
+ ABSOLUTE_ADDR = 1,
+} instr_addr_mode_t;
+
+typedef enum {
+ SQ_NO_ALLOC = 0,
+ SQ_POSITION = 1,
+ SQ_PARAMETER_PIXEL = 2,
+ SQ_MEMORY = 3,
+} instr_alloc_type_t;
+
+typedef struct PACKED {
+ uint16_t address : 9;
+ uint8_t reserved0 : 3;
+ uint8_t count : 3;
+ uint8_t yeild : 1;
+ uint16_t serialize : 12;
+ uint8_t vc : 6; /* vertex cache? */
+ uint8_t bool_addr : 8;
+ uint8_t condition : 1;
+ instr_addr_mode_t address_mode : 1;
+ instr_cf_opc_t opc : 4;
+} instr_cf_exec_t;
+
+typedef struct PACKED {
+ uint16_t address : 10;
+ uint8_t reserved0 : 6;
+ uint8_t loop_id : 5;
+ uint32_t reserved1 : 22;
+ instr_addr_mode_t address_mode : 1;
+ instr_cf_opc_t opc : 4;
+} instr_cf_loop_t;
+
+typedef struct PACKED {
+ uint16_t address : 10;
+ uint8_t reserved0 : 3;
+ uint8_t force_call : 1;
+ uint8_t predicated_jmp : 1;
+ uint32_t reserved1 : 18;
+ uint8_t direction : 1;
+ uint8_t bool_addr : 8;
+ uint8_t condition : 1;
+ instr_addr_mode_t address_mode : 1;
+ instr_cf_opc_t opc : 4;
+} instr_cf_jmp_call_t;
+
+typedef struct PACKED {
+ uint8_t size : 4;
+ uint64_t reserved0 : 36;
+ uint8_t no_serial : 1;
+ instr_alloc_type_t buffer_select : 2;
+ uint8_t alloc_mode : 1;
+ instr_cf_opc_t opc : 4;
+} instr_cf_alloc_t;
+
+typedef union PACKED {
+ instr_cf_exec_t exec;
+ instr_cf_loop_t loop;
+ instr_cf_jmp_call_t jmp_call;
+ instr_cf_alloc_t alloc;
+ struct PACKED {
+ uint64_t dummy : 44;
+ instr_cf_opc_t opc : 4;
+ };
+} instr_cf_t;
+
+
+
+/*
+ * FETCH instructions:
+ */
+
+typedef enum {
+ VTX_FETCH = 0,
+ TEX_FETCH = 1,
+ TEX_GET_BORDER_COLOR_FRAC = 16,
+ TEX_GET_COMP_TEX_LOD = 17,
+ TEX_GET_GRADIENTS = 18,
+ TEX_GET_WEIGHTS = 19,
+ TEX_SET_TEX_LOD = 24,
+ TEX_SET_GRADIENTS_H = 25,
+ TEX_SET_GRADIENTS_V = 26,
+ TEX_RESERVED_4 = 27,
+} instr_fetch_opc_t;
+
+typedef enum {
+ TEX_FILTER_POINT = 0,
+ TEX_FILTER_LINEAR = 1,
+ TEX_FILTER_BASEMAP = 2, /* only applicable for mip-filter */
+ TEX_FILTER_USE_FETCH_CONST = 3,
+} instr_tex_filter_t;
+
+typedef enum {
+ ANISO_FILTER_DISABLED = 0,
+ ANISO_FILTER_MAX_1_1 = 1,
+ ANISO_FILTER_MAX_2_1 = 2,
+ ANISO_FILTER_MAX_4_1 = 3,
+ ANISO_FILTER_MAX_8_1 = 4,
+ ANISO_FILTER_MAX_16_1 = 5,
+ ANISO_FILTER_USE_FETCH_CONST = 7,
+} instr_aniso_filter_t;
+
+typedef enum {
+ ARBITRARY_FILTER_2X4_SYM = 0,
+ ARBITRARY_FILTER_2X4_ASYM = 1,
+ ARBITRARY_FILTER_4X2_SYM = 2,
+ ARBITRARY_FILTER_4X2_ASYM = 3,
+ ARBITRARY_FILTER_4X4_SYM = 4,
+ ARBITRARY_FILTER_4X4_ASYM = 5,
+ ARBITRARY_FILTER_USE_FETCH_CONST = 7,
+} instr_arbitrary_filter_t;
+
+typedef enum {
+ SAMPLE_CENTROID = 0,
+ SAMPLE_CENTER = 1,
+} instr_sample_loc_t;
+
+typedef unsigned instr_surf_fmt_t;
+
+typedef struct PACKED {
+ /* dword0: */
+ instr_fetch_opc_t opc : 5;
+ uint8_t src_reg : 6;
+ uint8_t src_reg_am : 1;
+ uint8_t dst_reg : 6;
+ uint8_t dst_reg_am : 1;
+ uint8_t fetch_valid_only : 1;
+ uint8_t const_idx : 5;
+ uint8_t tx_coord_denorm : 1;
+ uint8_t src_swiz : 6;
+ /* dword1: */
+ uint16_t dst_swiz : 12;
+ instr_tex_filter_t mag_filter : 2;
+ instr_tex_filter_t min_filter : 2;
+ instr_tex_filter_t mip_filter : 2;
+ instr_aniso_filter_t aniso_filter : 3;
+ instr_arbitrary_filter_t arbitrary_filter : 3;
+ instr_tex_filter_t vol_mag_filter : 2;
+ instr_tex_filter_t vol_min_filter : 2;
+ uint8_t use_comp_lod : 1;
+ uint8_t use_reg_lod : 2;
+ uint8_t pred_select : 1;
+ /* dword2: */
+ uint8_t use_reg_gradients : 1;
+ instr_sample_loc_t sample_location : 1;
+ uint8_t lod_bias : 7;
+ uint8_t unused : 7;
+ uint8_t offset_x : 5;
+ uint8_t offset_y : 5;
+ uint8_t offset_z : 5;
+ uint8_t pred_condition : 1;
+} instr_fetch_tex_t;
+
+typedef struct PACKED {
+ /* dword0: */
+ instr_fetch_opc_t opc : 5;
+ uint8_t src_reg : 6;
+ uint8_t src_reg_am : 1;
+ uint8_t dst_reg : 6;
+ uint8_t dst_reg_am : 1;
+ uint8_t must_be_one : 1;
+ uint8_t const_index : 5;
+ uint8_t const_index_sel : 2;
+ uint8_t reserved0 : 3;
+ uint8_t src_swiz : 2;
+ /* dword1: */
+ uint16_t dst_swiz : 12;
+ uint8_t format_comp_all : 1; /* '1' for signed, '0' for unsigned? */
+ uint8_t num_format_all : 1; /* '0' for normalized, '1' for unnormalized */
+ uint8_t signed_rf_mode_all : 1;
+ uint8_t reserved1 : 1;
+ instr_surf_fmt_t format : 6;
+ uint8_t reserved2 : 1;
+ uint8_t exp_adjust_all : 7;
+ uint8_t reserved3 : 1;
+ uint8_t pred_select : 1;
+ /* dword2: */
+ uint8_t stride : 8;
+ /* possibly offset and reserved4 are swapped on a200? */
+ uint8_t offset : 8;
+ uint8_t reserved4 : 8;
+ uint8_t reserved5 : 7;
+ uint8_t pred_condition : 1;
+} instr_fetch_vtx_t;
+
+typedef union PACKED {
+ instr_fetch_tex_t tex;
+ instr_fetch_vtx_t vtx;
+ struct PACKED {
+ /* dword0: */
+ instr_fetch_opc_t opc : 5;
+ uint32_t dummy0 : 27;
+ /* dword1: */
+ uint32_t dummy1 : 32;
+ /* dword2: */
+ uint32_t dummy2 : 32;
+ };
+} instr_fetch_t;
+
+#endif /* INSTR_H_ */
--- /dev/null
+/*
+ * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef INSTR_A3XX_H_
+#define INSTR_A3XX_H_
+
+#define PACKED __attribute__((__packed__))
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <assert.h>
+
+void ir3_assert_handler(const char *expr, const char *file, int line,
+ const char *func) __attribute__((weak)) __attribute__ ((__noreturn__));
+
+/* A wrapper for assert() that allows overriding handling of a failed
+ * assert. This is needed for tools like crashdec which can want to
+ * attempt to disassemble memory that might not actually be valid
+ * instructions.
+ */
+#define ir3_assert(expr) do { \
+ if (!(expr)) { \
+ if (ir3_assert_handler) { \
+ ir3_assert_handler(#expr, __FILE__, __LINE__, __func__); \
+ } \
+ assert(expr); \
+ } \
+ } while (0)
+
+/* size of largest OPC field of all the instruction categories: */
+#define NOPC_BITS 6
+
+#define _OPC(cat, opc) (((cat) << NOPC_BITS) | opc)
+
+typedef enum {
+ /* category 0: */
+ OPC_NOP = _OPC(0, 0),
+ OPC_B = _OPC(0, 1),
+ OPC_JUMP = _OPC(0, 2),
+ OPC_CALL = _OPC(0, 3),
+ OPC_RET = _OPC(0, 4),
+ OPC_KILL = _OPC(0, 5),
+ OPC_END = _OPC(0, 6),
+ OPC_EMIT = _OPC(0, 7),
+ OPC_CUT = _OPC(0, 8),
+ OPC_CHMASK = _OPC(0, 9),
+ OPC_CHSH = _OPC(0, 10),
+ OPC_FLOW_REV = _OPC(0, 11),
+
+ OPC_BKT = _OPC(0, 16),
+ OPC_STKS = _OPC(0, 17),
+ OPC_STKR = _OPC(0, 18),
+ OPC_XSET = _OPC(0, 19),
+ OPC_XCLR = _OPC(0, 20),
+ OPC_GETONE = _OPC(0, 21),
+ OPC_DBG = _OPC(0, 22),
+ OPC_SHPS = _OPC(0, 23), /* shader prologue start */
+ OPC_SHPE = _OPC(0, 24), /* shader prologue end */
+
+ OPC_PREDT = _OPC(0, 29), /* predicated true */
+ OPC_PREDF = _OPC(0, 30), /* predicated false */
+ OPC_PREDE = _OPC(0, 31), /* predicated end */
+
+ /* category 1: */
+ OPC_MOV = _OPC(1, 0),
+
+ /* category 2: */
+ OPC_ADD_F = _OPC(2, 0),
+ OPC_MIN_F = _OPC(2, 1),
+ OPC_MAX_F = _OPC(2, 2),
+ OPC_MUL_F = _OPC(2, 3),
+ OPC_SIGN_F = _OPC(2, 4),
+ OPC_CMPS_F = _OPC(2, 5),
+ OPC_ABSNEG_F = _OPC(2, 6),
+ OPC_CMPV_F = _OPC(2, 7),
+ /* 8 - invalid */
+ OPC_FLOOR_F = _OPC(2, 9),
+ OPC_CEIL_F = _OPC(2, 10),
+ OPC_RNDNE_F = _OPC(2, 11),
+ OPC_RNDAZ_F = _OPC(2, 12),
+ OPC_TRUNC_F = _OPC(2, 13),
+ /* 14-15 - invalid */
+ OPC_ADD_U = _OPC(2, 16),
+ OPC_ADD_S = _OPC(2, 17),
+ OPC_SUB_U = _OPC(2, 18),
+ OPC_SUB_S = _OPC(2, 19),
+ OPC_CMPS_U = _OPC(2, 20),
+ OPC_CMPS_S = _OPC(2, 21),
+ OPC_MIN_U = _OPC(2, 22),
+ OPC_MIN_S = _OPC(2, 23),
+ OPC_MAX_U = _OPC(2, 24),
+ OPC_MAX_S = _OPC(2, 25),
+ OPC_ABSNEG_S = _OPC(2, 26),
+ /* 27 - invalid */
+ OPC_AND_B = _OPC(2, 28),
+ OPC_OR_B = _OPC(2, 29),
+ OPC_NOT_B = _OPC(2, 30),
+ OPC_XOR_B = _OPC(2, 31),
+ /* 32 - invalid */
+ OPC_CMPV_U = _OPC(2, 33),
+ OPC_CMPV_S = _OPC(2, 34),
+ /* 35-47 - invalid */
+ OPC_MUL_U24 = _OPC(2, 48), /* 24b mul into 32b result */
+ OPC_MUL_S24 = _OPC(2, 49), /* 24b mul into 32b result with sign extension */
+ OPC_MULL_U = _OPC(2, 50),
+ OPC_BFREV_B = _OPC(2, 51),
+ OPC_CLZ_S = _OPC(2, 52),
+ OPC_CLZ_B = _OPC(2, 53),
+ OPC_SHL_B = _OPC(2, 54),
+ OPC_SHR_B = _OPC(2, 55),
+ OPC_ASHR_B = _OPC(2, 56),
+ OPC_BARY_F = _OPC(2, 57),
+ OPC_MGEN_B = _OPC(2, 58),
+ OPC_GETBIT_B = _OPC(2, 59),
+ OPC_SETRM = _OPC(2, 60),
+ OPC_CBITS_B = _OPC(2, 61),
+ OPC_SHB = _OPC(2, 62),
+ OPC_MSAD = _OPC(2, 63),
+
+ /* category 3: */
+ OPC_MAD_U16 = _OPC(3, 0),
+ OPC_MADSH_U16 = _OPC(3, 1),
+ OPC_MAD_S16 = _OPC(3, 2),
+ OPC_MADSH_M16 = _OPC(3, 3), /* should this be .s16? */
+ OPC_MAD_U24 = _OPC(3, 4),
+ OPC_MAD_S24 = _OPC(3, 5),
+ OPC_MAD_F16 = _OPC(3, 6),
+ OPC_MAD_F32 = _OPC(3, 7),
+ OPC_SEL_B16 = _OPC(3, 8),
+ OPC_SEL_B32 = _OPC(3, 9),
+ OPC_SEL_S16 = _OPC(3, 10),
+ OPC_SEL_S32 = _OPC(3, 11),
+ OPC_SEL_F16 = _OPC(3, 12),
+ OPC_SEL_F32 = _OPC(3, 13),
+ OPC_SAD_S16 = _OPC(3, 14),
+ OPC_SAD_S32 = _OPC(3, 15),
+
+ /* category 4: */
+ OPC_RCP = _OPC(4, 0),
+ OPC_RSQ = _OPC(4, 1),
+ OPC_LOG2 = _OPC(4, 2),
+ OPC_EXP2 = _OPC(4, 3),
+ OPC_SIN = _OPC(4, 4),
+ OPC_COS = _OPC(4, 5),
+ OPC_SQRT = _OPC(4, 6),
+ /* NOTE that these are 8+opc from their highp equivs, so it's possible
+ * that the high order bit in the opc field has been repurposed for
+ * half-precision use? But note that other ops (rcp/lsin/cos/sqrt)
+ * still use the same opc as highp
+ */
+ OPC_HRSQ = _OPC(4, 9),
+ OPC_HLOG2 = _OPC(4, 10),
+ OPC_HEXP2 = _OPC(4, 11),
+
+ /* category 5: */
+ OPC_ISAM = _OPC(5, 0),
+ OPC_ISAML = _OPC(5, 1),
+ OPC_ISAMM = _OPC(5, 2),
+ OPC_SAM = _OPC(5, 3),
+ OPC_SAMB = _OPC(5, 4),
+ OPC_SAML = _OPC(5, 5),
+ OPC_SAMGQ = _OPC(5, 6),
+ OPC_GETLOD = _OPC(5, 7),
+ OPC_CONV = _OPC(5, 8),
+ OPC_CONVM = _OPC(5, 9),
+ OPC_GETSIZE = _OPC(5, 10),
+ OPC_GETBUF = _OPC(5, 11),
+ OPC_GETPOS = _OPC(5, 12),
+ OPC_GETINFO = _OPC(5, 13),
+ OPC_DSX = _OPC(5, 14),
+ OPC_DSY = _OPC(5, 15),
+ OPC_GATHER4R = _OPC(5, 16),
+ OPC_GATHER4G = _OPC(5, 17),
+ OPC_GATHER4B = _OPC(5, 18),
+ OPC_GATHER4A = _OPC(5, 19),
+ OPC_SAMGP0 = _OPC(5, 20),
+ OPC_SAMGP1 = _OPC(5, 21),
+ OPC_SAMGP2 = _OPC(5, 22),
+ OPC_SAMGP3 = _OPC(5, 23),
+ OPC_DSXPP_1 = _OPC(5, 24),
+ OPC_DSYPP_1 = _OPC(5, 25),
+ OPC_RGETPOS = _OPC(5, 26),
+ OPC_RGETINFO = _OPC(5, 27),
+
+ /* category 6: */
+ OPC_LDG = _OPC(6, 0), /* load-global */
+ OPC_LDL = _OPC(6, 1),
+ OPC_LDP = _OPC(6, 2),
+ OPC_STG = _OPC(6, 3), /* store-global */
+ OPC_STL = _OPC(6, 4),
+ OPC_STP = _OPC(6, 5),
+ OPC_LDIB = _OPC(6, 6),
+ OPC_G2L = _OPC(6, 7),
+ OPC_L2G = _OPC(6, 8),
+ OPC_PREFETCH = _OPC(6, 9),
+ OPC_LDLW = _OPC(6, 10),
+ OPC_STLW = _OPC(6, 11),
+ OPC_RESFMT = _OPC(6, 14),
+ OPC_RESINFO = _OPC(6, 15),
+ OPC_ATOMIC_ADD = _OPC(6, 16),
+ OPC_ATOMIC_SUB = _OPC(6, 17),
+ OPC_ATOMIC_XCHG = _OPC(6, 18),
+ OPC_ATOMIC_INC = _OPC(6, 19),
+ OPC_ATOMIC_DEC = _OPC(6, 20),
+ OPC_ATOMIC_CMPXCHG = _OPC(6, 21),
+ OPC_ATOMIC_MIN = _OPC(6, 22),
+ OPC_ATOMIC_MAX = _OPC(6, 23),
+ OPC_ATOMIC_AND = _OPC(6, 24),
+ OPC_ATOMIC_OR = _OPC(6, 25),
+ OPC_ATOMIC_XOR = _OPC(6, 26),
+ OPC_LDGB = _OPC(6, 27),
+ OPC_STGB = _OPC(6, 28),
+ OPC_STIB = _OPC(6, 29),
+ OPC_LDC = _OPC(6, 30),
+ OPC_LDLV = _OPC(6, 31),
+
+ /* category 7: */
+ OPC_BAR = _OPC(7, 0),
+ OPC_FENCE = _OPC(7, 1),
+} opc_t;
+
+#define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
+#define opc_op(opc) ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))
+
+typedef enum {
+ TYPE_F16 = 0,
+ TYPE_F32 = 1,
+ TYPE_U16 = 2,
+ TYPE_U32 = 3,
+ TYPE_S16 = 4,
+ TYPE_S32 = 5,
+ TYPE_U8 = 6,
+ TYPE_S8 = 7, // XXX I assume?
+} type_t;
+
+static inline uint32_t type_size(type_t type)
+{
+ switch (type) {
+ case TYPE_F32:
+ case TYPE_U32:
+ case TYPE_S32:
+ return 32;
+ case TYPE_F16:
+ case TYPE_U16:
+ case TYPE_S16:
+ return 16;
+ case TYPE_U8:
+ case TYPE_S8:
+ return 8;
+ default:
+ ir3_assert(0); /* invalid type */
+ return 0;
+ }
+}
+
+static inline int type_float(type_t type)
+{
+ return (type == TYPE_F32) || (type == TYPE_F16);
+}
+
+static inline int type_uint(type_t type)
+{
+ return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8);
+}
+
+static inline int type_sint(type_t type)
+{
+ return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8);
+}
+
+typedef union PACKED {
+ /* normal gpr or const src register: */
+ struct PACKED {
+ uint32_t comp : 2;
+ uint32_t num : 10;
+ };
+ /* for immediate val: */
+ int32_t iim_val : 11;
+ /* to make compiler happy: */
+ uint32_t dummy32;
+ uint32_t dummy10 : 10;
+ int32_t idummy10 : 10;
+ uint32_t dummy11 : 11;
+ uint32_t dummy12 : 12;
+ uint32_t dummy13 : 13;
+ uint32_t dummy8 : 8;
+ int32_t idummy13 : 13;
+ int32_t idummy8 : 8;
+} reg_t;
+
+/* special registers: */
+#define REG_A0 61 /* address register */
+#define REG_P0 62 /* predicate register */
+
+static inline int reg_special(reg_t reg)
+{
+ return (reg.num == REG_A0) || (reg.num == REG_P0);
+}
+
+typedef enum {
+ BRANCH_PLAIN = 0, /* br */
+ BRANCH_OR = 1, /* brao */
+ BRANCH_AND = 2, /* braa */
+ BRANCH_CONST = 3, /* brac */
+ BRANCH_ANY = 4, /* bany */
+ BRANCH_ALL = 5, /* ball */
+ BRANCH_X = 6, /* brax ??? */
+} brtype_t;
+
+typedef struct PACKED {
+ /* dword0: */
+ union PACKED {
+ struct PACKED {
+ int16_t immed : 16;
+ uint32_t dummy1 : 16;
+ } a3xx;
+ struct PACKED {
+ int32_t immed : 20;
+ uint32_t dummy1 : 12;
+ } a4xx;
+ struct PACKED {
+ int32_t immed : 32;
+ } a5xx;
+ };
+
+ /* dword1: */
+ uint32_t idx : 5; /* brac.N index */
+ uint32_t brtype : 3; /* branch type, see brtype_t */
+ uint32_t repeat : 3;
+ uint32_t dummy3 : 1;
+ uint32_t ss : 1;
+ uint32_t inv1 : 1;
+ uint32_t comp1 : 2;
+ uint32_t eq : 1;
+ uint32_t opc_hi : 1; /* at least one bit */
+ uint32_t dummy4 : 2;
+ uint32_t inv0 : 1;
+ uint32_t comp0 : 2; /* component for first src */
+ uint32_t opc : 4;
+ uint32_t jmp_tgt : 1;
+ uint32_t sync : 1;
+ uint32_t opc_cat : 3;
+} instr_cat0_t;
+
+typedef struct PACKED {
+ /* dword0: */
+ union PACKED {
+ /* for normal src register: */
+ struct PACKED {
+ uint32_t src : 11;
+ /* at least low bit of pad must be zero or it will
+ * look like a address relative src
+ */
+ uint32_t pad : 21;
+ };
+ /* for address relative: */
+ struct PACKED {
+ int32_t off : 10;
+ uint32_t src_rel_c : 1;
+ uint32_t src_rel : 1;
+ uint32_t unknown : 20;
+ };
+ /* for immediate: */
+ int32_t iim_val;
+ uint32_t uim_val;
+ float fim_val;
+ };
+
+ /* dword1: */
+ uint32_t dst : 8;
+ uint32_t repeat : 3;
+ uint32_t src_r : 1;
+ uint32_t ss : 1;
+ uint32_t ul : 1;
+ uint32_t dst_type : 3;
+ uint32_t dst_rel : 1;
+ uint32_t src_type : 3;
+ uint32_t src_c : 1;
+ uint32_t src_im : 1;
+ uint32_t even : 1;
+ uint32_t pos_inf : 1;
+ uint32_t must_be_0 : 2;
+ uint32_t jmp_tgt : 1;
+ uint32_t sync : 1;
+ uint32_t opc_cat : 3;
+} instr_cat1_t;
+
+typedef struct PACKED {
+ /* dword0: */
+ union PACKED {
+ struct PACKED {
+ uint32_t src1 : 11;
+ uint32_t must_be_zero1: 2;
+ uint32_t src1_im : 1; /* immediate */
+ uint32_t src1_neg : 1; /* negate */
+ uint32_t src1_abs : 1; /* absolute value */
+ };
+ struct PACKED {
+ uint32_t src1 : 10;
+ uint32_t src1_c : 1; /* relative-const */
+ uint32_t src1_rel : 1; /* relative address */
+ uint32_t must_be_zero : 1;
+ uint32_t dummy : 3;
+ } rel1;
+ struct PACKED {
+ uint32_t src1 : 12;
+ uint32_t src1_c : 1; /* const */
+ uint32_t dummy : 3;
+ } c1;
+ };
+
+ union PACKED {
+ struct PACKED {
+ uint32_t src2 : 11;
+ uint32_t must_be_zero2: 2;
+ uint32_t src2_im : 1; /* immediate */
+ uint32_t src2_neg : 1; /* negate */
+ uint32_t src2_abs : 1; /* absolute value */
+ };
+ struct PACKED {
+ uint32_t src2 : 10;
+ uint32_t src2_c : 1; /* relative-const */
+ uint32_t src2_rel : 1; /* relative address */
+ uint32_t must_be_zero : 1;
+ uint32_t dummy : 3;
+ } rel2;
+ struct PACKED {
+ uint32_t src2 : 12;
+ uint32_t src2_c : 1; /* const */
+ uint32_t dummy : 3;
+ } c2;
+ };
+
+ /* dword1: */
+ uint32_t dst : 8;
+ uint32_t repeat : 2;
+ uint32_t sat : 1;
+ uint32_t src1_r : 1; /* doubles as nop0 if repeat==0 */
+ uint32_t ss : 1;
+ uint32_t ul : 1; /* dunno */
+ uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
+ uint32_t ei : 1;
+ uint32_t cond : 3;
+ uint32_t src2_r : 1; /* doubles as nop1 if repeat==0 */
+ uint32_t full : 1; /* not half */
+ uint32_t opc : 6;
+ uint32_t jmp_tgt : 1;
+ uint32_t sync : 1;
+ uint32_t opc_cat : 3;
+} instr_cat2_t;
+
+typedef struct PACKED {
+ /* dword0: */
+ union PACKED {
+ struct PACKED {
+ uint32_t src1 : 11;
+ uint32_t must_be_zero1: 2;
+ uint32_t src2_c : 1;
+ uint32_t src1_neg : 1;
+ uint32_t src2_r : 1; /* doubles as nop1 if repeat==0 */
+ };
+ struct PACKED {
+ uint32_t src1 : 10;
+ uint32_t src1_c : 1;
+ uint32_t src1_rel : 1;
+ uint32_t must_be_zero : 1;
+ uint32_t dummy : 3;
+ } rel1;
+ struct PACKED {
+ uint32_t src1 : 12;
+ uint32_t src1_c : 1;
+ uint32_t dummy : 3;
+ } c1;
+ };
+
+ union PACKED {
+ struct PACKED {
+ uint32_t src3 : 11;
+ uint32_t must_be_zero2: 2;
+ uint32_t src3_r : 1;
+ uint32_t src2_neg : 1;
+ uint32_t src3_neg : 1;
+ };
+ struct PACKED {
+ uint32_t src3 : 10;
+ uint32_t src3_c : 1;
+ uint32_t src3_rel : 1;
+ uint32_t must_be_zero : 1;
+ uint32_t dummy : 3;
+ } rel2;
+ struct PACKED {
+ uint32_t src3 : 12;
+ uint32_t src3_c : 1;
+ uint32_t dummy : 3;
+ } c2;
+ };
+
+ /* dword1: */
+ uint32_t dst : 8;
+ uint32_t repeat : 2;
+ uint32_t sat : 1;
+ uint32_t src1_r : 1; /* doubles as nop0 if repeat==0 */
+ uint32_t ss : 1;
+ uint32_t ul : 1;
+ uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
+ uint32_t src2 : 8;
+ uint32_t opc : 4;
+ uint32_t jmp_tgt : 1;
+ uint32_t sync : 1;
+ uint32_t opc_cat : 3;
+} instr_cat3_t;
+
+static inline bool instr_cat3_full(instr_cat3_t *cat3)
+{
+ switch (_OPC(3, cat3->opc)) {
+ case OPC_MAD_F16:
+ case OPC_MAD_U16:
+ case OPC_MAD_S16:
+ case OPC_SEL_B16:
+ case OPC_SEL_S16:
+ case OPC_SEL_F16:
+ case OPC_SAD_S16:
+ case OPC_SAD_S32: // really??
+ return false;
+ default:
+ return true;
+ }
+}
+
+typedef struct PACKED {
+ /* dword0: */
+ union PACKED {
+ struct PACKED {
+ uint32_t src : 11;
+ uint32_t must_be_zero1: 2;
+ uint32_t src_im : 1; /* immediate */
+ uint32_t src_neg : 1; /* negate */
+ uint32_t src_abs : 1; /* absolute value */
+ };
+ struct PACKED {
+ uint32_t src : 10;
+ uint32_t src_c : 1; /* relative-const */
+ uint32_t src_rel : 1; /* relative address */
+ uint32_t must_be_zero : 1;
+ uint32_t dummy : 3;
+ } rel;
+ struct PACKED {
+ uint32_t src : 12;
+ uint32_t src_c : 1; /* const */
+ uint32_t dummy : 3;
+ } c;
+ };
+ uint32_t dummy1 : 16; /* seem to be ignored */
+
+ /* dword1: */
+ uint32_t dst : 8;
+ uint32_t repeat : 2;
+ uint32_t sat : 1;
+ uint32_t src_r : 1;
+ uint32_t ss : 1;
+ uint32_t ul : 1;
+ uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
+ uint32_t dummy2 : 5; /* seem to be ignored */
+ uint32_t full : 1; /* not half */
+ uint32_t opc : 6;
+ uint32_t jmp_tgt : 1;
+ uint32_t sync : 1;
+ uint32_t opc_cat : 3;
+} instr_cat4_t;
+
+/* With is_bindless_s2en = 1, this determines whether bindless is enabled and
+ * if so, how to get the (base, index) pair for both sampler and texture.
+ * There is a single base embedded in the instruction, which is always used
+ * for the texture.
+ */
+typedef enum {
+ /* Use traditional GL binding model, get texture and sampler index
+ * from src3 which is not presumed to be uniform. This is
+ * backwards-compatible with earlier generations, where this field was
+ * always 0 and nonuniform-indexed sampling always worked.
+ */
+ CAT5_NONUNIFORM = 0,
+
+ /* The sampler base comes from the low 3 bits of a1.x, and the sampler
+ * and texture index come from src3 which is presumed to be uniform.
+ */
+ CAT5_BINDLESS_A1_UNIFORM = 1,
+
+ /* The texture and sampler share the same base, and the sampler and
+ * texture index come from src3 which is *not* presumed to be uniform.
+ */
+ CAT5_BINDLESS_NONUNIFORM = 2,
+
+ /* The sampler base comes from the low 3 bits of a1.x, and the sampler
+ * and texture index come from src3 which is *not* presumed to be
+ * uniform.
+ */
+ CAT5_BINDLESS_A1_NONUNIFORM = 3,
+
+ /* Use traditional GL binding model, get texture and sampler index
+ * from src3 which is presumed to be uniform.
+ */
+ CAT5_UNIFORM = 4,
+
+ /* The texture and sampler share the same base, and the sampler and
+ * texture index come from src3 which is presumed to be uniform.
+ */
+ CAT5_BINDLESS_UNIFORM = 5,
+
+ /* The texture and sampler share the same base, get sampler index from low
+ * 4 bits of src3 and texture index from high 4 bits.
+ */
+ CAT5_BINDLESS_IMM = 6,
+
+ /* The sampler base comes from the low 3 bits of a1.x, and the texture
+ * index comes from the next 8 bits of a1.x. The sampler index is an
+ * immediate in src3.
+ */
+ CAT5_BINDLESS_A1_IMM = 7,
+} cat5_desc_mode_t;
+
+typedef struct PACKED {
+ /* dword0: */
+ union PACKED {
+ /* normal case: */
+ struct PACKED {
+ uint32_t full : 1; /* not half */
+ uint32_t src1 : 8;
+ uint32_t src2 : 8;
+ uint32_t dummy1 : 4; /* seem to be ignored */
+ uint32_t samp : 4;
+ uint32_t tex : 7;
+ } norm;
+ /* s2en case: */
+ struct PACKED {
+ uint32_t full : 1; /* not half */
+ uint32_t src1 : 8;
+ uint32_t src2 : 8;
+ uint32_t dummy1 : 2;
+ uint32_t base_hi : 2;
+ uint32_t src3 : 8;
+ uint32_t desc_mode : 3;
+ } s2en_bindless;
+ /* same in either case: */
+ // XXX I think, confirm this
+ struct PACKED {
+ uint32_t full : 1; /* not half */
+ uint32_t src1 : 8;
+ uint32_t src2 : 8;
+ uint32_t pad : 15;
+ };
+ };
+
+ /* dword1: */
+ uint32_t dst : 8;
+ uint32_t wrmask : 4; /* write-mask */
+ uint32_t type : 3;
+ uint32_t base_lo : 1; /* used with bindless */
+ uint32_t is_3d : 1;
+
+ uint32_t is_a : 1;
+ uint32_t is_s : 1;
+ uint32_t is_s2en_bindless : 1;
+ uint32_t is_o : 1;
+ uint32_t is_p : 1;
+
+ uint32_t opc : 5;
+ uint32_t jmp_tgt : 1;
+ uint32_t sync : 1;
+ uint32_t opc_cat : 3;
+} instr_cat5_t;
+
+/* dword0 encoding for src_off: [src1 + off], src2: */
+typedef struct PACKED {
+ /* dword0: */
+ uint32_t mustbe1 : 1;
+ int32_t off : 13;
+ uint32_t src1 : 8;
+ uint32_t src1_im : 1;
+ uint32_t src2_im : 1;
+ uint32_t src2 : 8;
+
+ /* dword1: */
+ uint32_t dword1;
+} instr_cat6a_t;
+
+/* dword0 encoding for !src_off: [src1], src2 */
+typedef struct PACKED {
+ /* dword0: */
+ uint32_t mustbe0 : 1;
+ uint32_t src1 : 13;
+ uint32_t ignore0 : 8;
+ uint32_t src1_im : 1;
+ uint32_t src2_im : 1;
+ uint32_t src2 : 8;
+
+ /* dword1: */
+ uint32_t dword1;
+} instr_cat6b_t;
+
+/* dword1 encoding for dst_off: */
+typedef struct PACKED {
+ /* dword0: */
+ uint32_t dword0;
+
+ /* note: there is some weird stuff going on where sometimes
+ * cat6->a.off is involved.. but that seems like a bug in
+ * the blob, since it is used even if !cat6->src_off
+ * It would make sense for there to be some more bits to
+ * bring us to 11 bits worth of offset, but not sure..
+ */
+ int32_t off : 8;
+ uint32_t mustbe1 : 1;
+ uint32_t dst : 8;
+ uint32_t pad1 : 15;
+} instr_cat6c_t;
+
+/* dword1 encoding for !dst_off: */
+typedef struct PACKED {
+ /* dword0: */
+ uint32_t dword0;
+
+ uint32_t dst : 8;
+ uint32_t mustbe0 : 1;
+ uint32_t idx : 8;
+ uint32_t pad0 : 15;
+} instr_cat6d_t;
+
+/* ldgb and atomics..
+ *
+ * ldgb: pad0=0, pad3=1
+ * atomic .g: pad0=1, pad3=1
+ * .l: pad0=1, pad3=0
+ */
+typedef struct PACKED {
+ /* dword0: */
+ uint32_t pad0 : 1;
+ uint32_t src3 : 8;
+ uint32_t d : 2;
+ uint32_t typed : 1;
+ uint32_t type_size : 2;
+ uint32_t src1 : 8;
+ uint32_t src1_im : 1;
+ uint32_t src2_im : 1;
+ uint32_t src2 : 8;
+
+ /* dword1: */
+ uint32_t dst : 8;
+ uint32_t mustbe0 : 1;
+ uint32_t src_ssbo : 8;
+ uint32_t pad2 : 3; // type
+ uint32_t g : 1;
+ uint32_t pad3 : 1;
+ uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat
+} instr_cat6ldgb_t;
+
+/* stgb, pad0=0, pad3=2
+ */
+typedef struct PACKED {
+ /* dword0: */
+ uint32_t mustbe1 : 1; // ???
+ uint32_t src1 : 8;
+ uint32_t d : 2;
+ uint32_t typed : 1;
+ uint32_t type_size : 2;
+ uint32_t pad0 : 9;
+ uint32_t src2_im : 1;
+ uint32_t src2 : 8;
+
+ /* dword1: */
+ uint32_t src3 : 8;
+ uint32_t src3_im : 1;
+ uint32_t dst_ssbo : 8;
+ uint32_t pad2 : 3; // type
+ uint32_t pad3 : 2;
+ uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat
+} instr_cat6stgb_t;
+
+typedef union PACKED {
+ instr_cat6a_t a;
+ instr_cat6b_t b;
+ instr_cat6c_t c;
+ instr_cat6d_t d;
+ instr_cat6ldgb_t ldgb;
+ instr_cat6stgb_t stgb;
+ struct PACKED {
+ /* dword0: */
+ uint32_t src_off : 1;
+ uint32_t pad1 : 31;
+
+ /* dword1: */
+ uint32_t pad2 : 8;
+ uint32_t dst_off : 1;
+ uint32_t pad3 : 8;
+ uint32_t type : 3;
+ uint32_t g : 1; /* or in some cases it means dst immed */
+ uint32_t pad4 : 1;
+ uint32_t opc : 5;
+ uint32_t jmp_tgt : 1;
+ uint32_t sync : 1;
+ uint32_t opc_cat : 3;
+ };
+} instr_cat6_t;
+
+/* Similar to cat5_desc_mode_t, describes how the descriptor is loaded.
+ */
+typedef enum {
+ /* Use old GL binding model with an immediate index. */
+ CAT6_IMM = 0,
+
+ CAT6_UNIFORM = 1,
+
+ CAT6_NONUNIFORM = 2,
+
+ /* Use the bindless model, with an immediate index.
+ */
+ CAT6_BINDLESS_IMM = 4,
+
+ /* Use the bindless model, with a uniform register index.
+ */
+ CAT6_BINDLESS_UNIFORM = 5,
+
+ /* Use the bindless model, with a register index that isn't guaranteed
+ * to be uniform. This presumably checks if the indices are equal and
+ * splits up the load/store, because it works the way you would
+ * expect.
+ */
+ CAT6_BINDLESS_NONUNIFORM = 6,
+} cat6_desc_mode_t;
+
+/**
+ * For atomic ops (which return a value):
+ *
+ * pad1=1, pad3=c, pad5=3
+ * src1 - vecN offset/coords
+ * src2.x - is actually dest register
+ * src2.y - is 'data' except for cmpxchg where src2.y is 'compare'
+ * and src2.z is 'data'
+ *
+ * For stib (which does not return a value):
+ * pad1=0, pad3=c, pad5=2
+ * src1 - vecN offset/coords
+ * src2 - value to store
+ *
+ * For ldib:
+ * pad1=1, pad3=c, pad5=2
+ * src1 - vecN offset/coords
+ *
+ * for ldc (load from UBO using descriptor):
+ * pad1=0, pad3=8, pad5=2
+ *
+ * pad2 and pad5 are only observed to be 0.
+ */
+typedef struct PACKED {
+ /* dword0: */
+ uint32_t pad1 : 1;
+ uint32_t base : 3;
+ uint32_t pad2 : 2;
+ uint32_t desc_mode : 3;
+ uint32_t d : 2;
+ uint32_t typed : 1;
+ uint32_t type_size : 2;
+ uint32_t opc : 5;
+ uint32_t pad3 : 5;
+ uint32_t src1 : 8; /* coordinate/offset */
+
+ /* dword1: */
+ uint32_t src2 : 8; /* or the dst for load instructions */
+ uint32_t pad4 : 1; //mustbe0 ??
+ uint32_t ssbo : 8; /* ssbo/image binding point */
+ uint32_t type : 3;
+ uint32_t pad5 : 7;
+ uint32_t jmp_tgt : 1;
+ uint32_t sync : 1;
+ uint32_t opc_cat : 3;
+} instr_cat6_a6xx_t;
+
+typedef struct PACKED {
+ /* dword0: */
+ uint32_t pad1 : 32;
+
+ /* dword1: */
+ uint32_t pad2 : 12;
+ uint32_t ss : 1; /* maybe in the encoding, but blob only uses (sy) */
+ uint32_t pad3 : 6;
+ uint32_t w : 1; /* write */
+ uint32_t r : 1; /* read */
+ uint32_t l : 1; /* local */
+ uint32_t g : 1; /* global */
+ uint32_t opc : 4; /* presumed, but only a couple known OPCs */
+ uint32_t jmp_tgt : 1; /* (jp) */
+ uint32_t sync : 1; /* (sy) */
+ uint32_t opc_cat : 3;
+} instr_cat7_t;
+
+typedef union PACKED {
+ instr_cat0_t cat0;
+ instr_cat1_t cat1;
+ instr_cat2_t cat2;
+ instr_cat3_t cat3;
+ instr_cat4_t cat4;
+ instr_cat5_t cat5;
+ instr_cat6_t cat6;
+ instr_cat6_a6xx_t cat6_a6xx;
+ instr_cat7_t cat7;
+ struct PACKED {
+ /* dword0: */
+ uint32_t pad1 : 32;
+
+ /* dword1: */
+ uint32_t pad2 : 12;
+ uint32_t ss : 1; /* cat1-cat4 (cat0??) and cat7 (?) */
+ uint32_t ul : 1; /* cat2-cat4 (and cat1 in blob.. which may be bug??) */
+ uint32_t pad3 : 13;
+ uint32_t jmp_tgt : 1;
+ uint32_t sync : 1;
+ uint32_t opc_cat : 3;
+
+ };
+} instr_t;
+
+static inline uint32_t instr_repeat(instr_t *instr)
+{
+ switch (instr->opc_cat) {
+ case 0: return instr->cat0.repeat;
+ case 1: return instr->cat1.repeat;
+ case 2: return instr->cat2.repeat;
+ case 3: return instr->cat3.repeat;
+ case 4: return instr->cat4.repeat;
+ default: return 0;
+ }
+}
+
+static inline bool instr_sat(instr_t *instr)
+{
+ switch (instr->opc_cat) {
+ case 2: return instr->cat2.sat;
+ case 3: return instr->cat3.sat;
+ case 4: return instr->cat4.sat;
+ default: return false;
+ }
+}
+
+/* We can probably drop the gpu_id arg, but keeping it for now so we can
+ * assert if we see something we think should be new encoding on an older
+ * gpu.
+ */
+static inline bool is_cat6_legacy(instr_t *instr, unsigned gpu_id)
+{
+ instr_cat6_a6xx_t *cat6 = &instr->cat6_a6xx;
+
+ /* At least one of these two bits is pad in all the possible
+ * "legacy" cat6 encodings, and a analysis of all the pre-a6xx
+ * cmdstream traces I have indicates that the pad bit is zero
+ * in all cases. So we can use this to detect new encoding:
+ */
+ if ((cat6->pad3 & 0x8) && (cat6->pad5 & 0x2)) {
+ ir3_assert(gpu_id >= 600);
+ ir3_assert(instr->cat6.opc == 0);
+ return false;
+ }
+
+ return true;
+}
+
+static inline uint32_t instr_opc(instr_t *instr, unsigned gpu_id)
+{
+ switch (instr->opc_cat) {
+ case 0: return instr->cat0.opc | instr->cat0.opc_hi << 4;
+ case 1: return 0;
+ case 2: return instr->cat2.opc;
+ case 3: return instr->cat3.opc;
+ case 4: return instr->cat4.opc;
+ case 5: return instr->cat5.opc;
+ case 6:
+ if (!is_cat6_legacy(instr, gpu_id))
+ return instr->cat6_a6xx.opc;
+ return instr->cat6.opc;
+ case 7: return instr->cat7.opc;
+ default: return 0;
+ }
+}
+
+static inline bool is_mad(opc_t opc)
+{
+ switch (opc) {
+ case OPC_MAD_U16:
+ case OPC_MAD_S16:
+ case OPC_MAD_U24:
+ case OPC_MAD_S24:
+ case OPC_MAD_F16:
+ case OPC_MAD_F32:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool is_madsh(opc_t opc)
+{
+ switch (opc) {
+ case OPC_MADSH_U16:
+ case OPC_MADSH_M16:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool is_atomic(opc_t opc)
+{
+ switch (opc) {
+ case OPC_ATOMIC_ADD:
+ case OPC_ATOMIC_SUB:
+ case OPC_ATOMIC_XCHG:
+ case OPC_ATOMIC_INC:
+ case OPC_ATOMIC_DEC:
+ case OPC_ATOMIC_CMPXCHG:
+ case OPC_ATOMIC_MIN:
+ case OPC_ATOMIC_MAX:
+ case OPC_ATOMIC_AND:
+ case OPC_ATOMIC_OR:
+ case OPC_ATOMIC_XOR:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool is_ssbo(opc_t opc)
+{
+ switch (opc) {
+ case OPC_RESFMT:
+ case OPC_RESINFO:
+ case OPC_LDGB:
+ case OPC_STGB:
+ case OPC_STIB:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool is_isam(opc_t opc)
+{
+ switch (opc) {
+ case OPC_ISAM:
+ case OPC_ISAML:
+ case OPC_ISAMM:
+ return true;
+ default:
+ return false;
+ }
+}
+
+
+static inline bool is_cat2_float(opc_t opc)
+{
+ switch (opc) {
+ case OPC_ADD_F:
+ case OPC_MIN_F:
+ case OPC_MAX_F:
+ case OPC_MUL_F:
+ case OPC_SIGN_F:
+ case OPC_CMPS_F:
+ case OPC_ABSNEG_F:
+ case OPC_CMPV_F:
+ case OPC_FLOOR_F:
+ case OPC_CEIL_F:
+ case OPC_RNDNE_F:
+ case OPC_RNDAZ_F:
+ case OPC_TRUNC_F:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+static inline bool is_cat3_float(opc_t opc)
+{
+ switch (opc) {
+ case OPC_MAD_F16:
+ case OPC_MAD_F32:
+ case OPC_SEL_F16:
+ case OPC_SEL_F32:
+ return true;
+ default:
+ return false;
+ }
+}
+
+int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id);
+
+#endif /* INSTR_A3XX_H_ */
--- /dev/null
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <archive.h>
+#include <archive_entry.h>
+
+#include "io.h"
+
+struct io {
+ struct archive *a;
+ struct archive_entry *entry;
+ unsigned offset;
+};
+
+static void io_error(struct io *io)
+{
+ fprintf(stderr, "%s\n", archive_error_string(io->a));
+ io_close(io);
+}
+
+static struct io * io_new(void)
+{
+ struct io *io = calloc(1, sizeof(*io));
+ int ret;
+
+ if (!io)
+ return NULL;
+
+ io->a = archive_read_new();
+ ret = archive_read_support_filter_gzip(io->a);
+ if (ret != ARCHIVE_OK) {
+ io_error(io);
+ return NULL;
+ }
+
+ ret = archive_read_support_filter_none(io->a);
+ if (ret != ARCHIVE_OK) {
+ io_error(io);
+ return NULL;
+ }
+
+ ret = archive_read_support_format_all(io->a);
+ if (ret != ARCHIVE_OK) {
+ io_error(io);
+ return NULL;
+ }
+
+ ret = archive_read_support_format_raw(io->a);
+ if (ret != ARCHIVE_OK) {
+ io_error(io);
+ return NULL;
+ }
+
+ return io;
+}
+
+struct io * io_open(const char *filename)
+{
+ struct io *io = io_new();
+ int ret;
+
+ if (!io)
+ return NULL;
+
+ ret = archive_read_open_filename(io->a, filename, 10240);
+ if (ret != ARCHIVE_OK) {
+ io_error(io);
+ return NULL;
+ }
+
+ ret = archive_read_next_header(io->a, &io->entry);
+ if (ret != ARCHIVE_OK) {
+ io_error(io);
+ return NULL;
+ }
+
+ return io;
+}
+
+struct io * io_openfd(int fd)
+{
+ struct io *io = io_new();
+ int ret;
+
+ if (!io)
+ return NULL;
+
+ ret = archive_read_open_fd(io->a, fd, 10240);
+ if (ret != ARCHIVE_OK) {
+ io_error(io);
+ return NULL;
+ }
+
+ ret = archive_read_next_header(io->a, &io->entry);
+ if (ret != ARCHIVE_OK) {
+ io_error(io);
+ return NULL;
+ }
+
+ return io;
+}
+
+void io_close(struct io *io)
+{
+ archive_read_free(io->a);
+ free(io);
+}
+
+unsigned io_offset(struct io *io)
+{
+ return io->offset;
+}
+
+#include <assert.h>
+int io_readn(struct io *io, void *buf, int nbytes)
+{
+ char *ptr = buf;
+ int ret = 0;
+ while (nbytes > 0) {
+ int n = archive_read_data(io->a, ptr, nbytes);
+ if (n < 0) {
+ fprintf(stderr, "%s\n", archive_error_string(io->a));
+ return n;
+ }
+ if (n == 0)
+ break;
+ ptr += n;
+ nbytes -= n;
+ ret += n;
+ io->offset += n;
+ }
+ return ret;
+}
--- /dev/null
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef IO_H_
+#define IO_H_
+
+/* Simple API to abstract reading from file which might be compressed.
+ * Maybe someday I'll add writing..
+ */
+
+struct io;
+
+struct io * io_open(const char *filename);
+struct io * io_openfd(int fd);
+void io_close(struct io *io);
+unsigned io_offset(struct io *io);
+int io_readn(struct io *io, void *buf, int nbytes);
+
+
+static inline int
+check_extension(const char *path, const char *ext)
+{
+ return strcmp(path + strlen(path) - strlen(ext), ext) == 0;
+}
+
+#endif /* IO_H_ */
--- /dev/null
+# Copyright © 2020 Google, Inc
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+dep_lua = dependency('lua53', required: false)
+if not dep_lua.found()
+ dep_lua = dependency('lua52', required: false)
+endif
+if not dep_lua.found()
+ dep_lua = dependency('lua', required: false)
+endif
+
+dep_libarchive = dependency('libarchive', required: false)
+
+# Shared cmdstream decoding:
+libfreedreno_cffdec = static_library(
+ 'freedreno_cffdec',
+ [
+ 'buffers.c',
+ 'buffers.h',
+ 'cffdec.c',
+ 'cffdec.h',
+ 'disasm-a2xx.c',
+ 'disasm-a3xx.c',
+ 'disasm.h',
+ 'instr-a2xx.h',
+ 'instr-a3xx.h',
+ 'pager.c',
+ 'pager.h',
+ 'rnnutil.c',
+ 'rnnutil.h',
+ 'util.h',
+ ],
+ include_directories: [
+ inc_freedreno_rnn,
+ ],
+ c_args : [ no_override_init_args ],
+ gnu_symbol_visibility: 'hidden',
+ dependencies: [],
+ link_with: libfreedreno_rnn,
+ build_by_default: false,
+)
+
+if dep_libarchive.found()
+ libfreedreno_io = static_library(
+ 'libfreedreno_io',
+ [
+ 'io.c',
+ 'io.h',
+ ],
+ include_directories: [],
+ c_args : [no_override_init_args],
+ gnu_symbol_visibility: 'hidden',
+ dependencies: [
+ dep_libarchive,
+ ],
+ build_by_default: false,
+ )
+endif
+
+if dep_lua.found() and dep_libarchive.found()
+ cffdump = executable(
+ 'cffdump',
+ [
+ 'cffdump.c',
+ 'script.c',
+ 'script.h'
+ ],
+ include_directories: [
+ inc_freedreno_rnn,
+ ],
+ c_args : [no_override_init_args],
+ gnu_symbol_visibility: 'hidden',
+ dependencies: [
+ dep_lua,
+ ],
+ link_with: [
+ libfreedreno_cffdec,
+ libfreedreno_io,
+ ],
+ build_by_default: with_tools.contains('freedreno'),
+ install : with_tools.contains('freedreno'),
+ )
+endif
+
+crashdec = executable(
+ 'crashdec',
+ 'crashdec.c',
+ include_directories: [
+ inc_freedreno_rnn,
+ ],
+ gnu_symbol_visibility: 'hidden',
+ dependencies: [],
+ link_with: [
+ libfreedreno_cffdec,
+ ],
+ build_by_default: with_tools.contains('freedreno'),
+ install : with_tools.contains('freedreno'),
+)
+
+if dep_libarchive.found()
+ pgmdump = executable(
+ 'pgmdump',
+ 'pgmdump.c',
+ include_directories: [],
+ gnu_symbol_visibility: 'hidden',
+ dependencies: [],
+ link_with: [
+ libfreedreno_cffdec,
+ libfreedreno_io,
+ ],
+ build_by_default: with_tools.contains('freedreno'),
+ install: false,
+ )
+ pgmdump2 = executable(
+ 'pgmdump2',
+ 'pgmdump2.c',
+ include_directories: [],
+ gnu_symbol_visibility: 'hidden',
+ dependencies: [],
+ link_with: [
+ libfreedreno_cffdec,
+ libfreedreno_io,
+ ],
+ build_by_default: with_tools.contains('freedreno'),
+ install: false,
+ )
+endif
--- /dev/null
+/*
+ * Copyright (c) 2018 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <errno.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "pager.h"
+
+static pid_t pager_pid;
+
+
+static void
+pager_death(int n)
+{
+ exit(0);
+}
+
+void
+pager_open(void)
+{
+ int fd[2];
+
+ if (pipe(fd) < 0) {
+ fprintf(stderr, "Failed to create pager pipe: %m\n");
+ exit(-1);
+ }
+
+ pager_pid = fork();
+ if (pager_pid < 0) {
+ fprintf(stderr, "Failed to fork pager: %m\n");
+ exit(-1);
+ }
+
+ if (pager_pid == 0) {
+ const char* less_opts;
+
+ dup2(fd[0], STDIN_FILENO);
+ close(fd[0]);
+ close(fd[1]);
+
+ less_opts = "FRSMKX";
+ setenv("LESS", less_opts, 1);
+
+ execlp("less", "less", NULL);
+
+ } else {
+ /* we want to kill the parent process when pager exits: */
+ signal(SIGCHLD, pager_death);
+ dup2(fd[1], STDOUT_FILENO);
+ close(fd[0]);
+ close(fd[1]);
+ }
+}
+
+int
+pager_close(void)
+{
+ siginfo_t status;
+
+ close(STDOUT_FILENO);
+
+ while (true) {
+ memset(&status, 0, sizeof(status));
+ if (waitid(P_PID, pager_pid, &status, WEXITED) < 0) {
+ if (errno == EINTR)
+ continue;
+ return -errno;
+ }
+
+ return 0;
+ }
+}
--- /dev/null
+/*
+ * Copyright (c) 2018 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __PAGER_H__
+#define __PAGER_H__
+
+void pager_open(void);
+int pager_close(void);
+
+#endif /* __PAGER_H__ */
--- /dev/null
+/*
+ * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+
+#include "redump.h"
+#include "disasm.h"
+#include "io.h"
+
+#define ASCII_XOR 0xff
+#include "util.h"
+
+struct pgm_header {
+ uint32_t size;
+ uint32_t unknown1;
+ uint32_t unknown2;
+ uint32_t revision;
+ uint32_t unknown4;
+ uint32_t unknown5;
+ uint32_t unknown6;
+ uint32_t unknown7;
+ uint32_t unknown8;
+ uint32_t num_attribs;
+ uint32_t num_uniforms;
+ uint32_t num_samplers;
+ uint32_t num_varyings;
+ uint32_t num_uniformblocks;
+};
+
+struct vs_header {
+ uint32_t unknown1; /* seems to be # of sections up to and including shader */
+ uint32_t unknown2; /* seems to be low byte or so of SQ_PROGRAM_CNTL */
+ uint32_t unknown3;
+ uint32_t unknown4;
+ uint32_t unknown5;
+ uint32_t unknown6;
+ uint32_t unknown7;
+ uint32_t unknown8;
+ uint32_t unknown9; /* seems to be # of sections following shader */
+};
+
+struct fs_header {
+ uint32_t unknown1;
+};
+/*
+ // Covers a lot of type_info
+ // varying, attribute, uniform, sampler
+ type_info & 0xFF
+ if ((type_info >> 8) == 0x8b) // vector
+ 0x50 = vec2
+ 0x51 = vec3
+ 0x52 = vec4
+ 0x53 = ivec2
+ 0x54 = ivec3
+ 0x55 = ivec4
+ 0x56 = bool // Why is this in vector?
+ 0x57 = bvec2
+ 0x58 = bvec3
+ 0x59 = bvec4
+ 0x5a = mat2
+ 0x5b = mat3
+ 0x5c = mat4
+ 0x5a = mat2x2 // Same as mat2
+ 0x65 = mat2x3
+ 0x66 = mat2x4
+ 0x67 = mat3x2
+ 0x5b = mat3x3 // Same as mat3
+ 0x68 = mat3x4
+ 0x69 = mat4x2
+ 0x6a = mat4x3
+ 0x5c = mat4x4 // same as mat4
+ 0x5e = sampler2D
+ 0x5f = sampler3D
+ 0x60 = samplerCube // XXX: Doesn't work
+ 0x62 = sampler2DShadow
+ 0xc6 = uvec2
+ 0xc7 = uvec3
+ 0xc8 = uvec4
+ else if ((type_info >> 8) == 0x8d) // GLES3 samplers
+ 0xC1 = sampler2DArray
+ 0xC4 = sampler2DArrayShadow
+ 0xC5 = samplerCubeShadow
+ 0xCA = isampler2D
+ 0xCB = isampler3D
+ 0xCC = isamplerCube
+ 0xD2 = usampler2D
+ 0xD3 = usampler3D
+ 0xD4 = usamplerCube
+ 0xD7 = isampler2DArray
+ 0xD7 = usampler2DArray // Is the same as isampler2DArray?
+ else // 0x14 = single
+ 0x04 = int
+ 0x05 = uint
+ 0x06 = float
+*/
+struct attribute {
+ uint32_t type_info;
+ uint32_t reg; /* seems to be the register the fetch instruction loads to */
+ uint32_t const_idx; /* the CONST() indx value for sampler */
+ uint32_t unknown2;
+ uint32_t unknown3;
+ uint32_t unknown4;
+ uint32_t unknown5;
+ char name[];
+};
+
+struct uniform {
+ uint32_t type_info;
+ uint32_t unknown2;
+ uint32_t unknown3;
+ uint32_t unknown4;
+ uint32_t const_base; /* const base register (for uniforms that take more than one const reg, ie. matrices) */
+ uint32_t unknown6;
+ uint32_t const_reg; /* the const register holding the value */
+ uint32_t unknown7;
+ uint32_t unknown8;
+ uint32_t unknown9;
+ union {
+ struct {
+ char name[1];
+ } v1;
+ struct {
+ uint32_t unknown10;
+ uint32_t unknown11;
+ uint32_t unknown12;
+ char name[];
+ } v2;
+ };
+};
+
+struct uniformblockmember {
+ uint32_t type_info;
+ uint32_t is_array;
+ uint32_t array_size; /* elements in the array */
+ uint32_t unknown2; /* Same as array_size */
+ uint32_t unknown3; /* Seems to be a offset within UBO in vertex (by components) */
+ uint32_t unknown4;
+ uint32_t unknown5; /* Seems to be a offset within UBO in fragment (by vec4) */
+ uint32_t unknown6;
+ uint32_t unknown7;
+ uint32_t unknown8;
+ uint32_t unknown9; /* UBO block index? */
+ uint32_t unknown10;
+ uint32_t unknown11;
+ uint32_t unknown12;
+ char name[];
+};
+
+struct uniformblock
+{
+ uint32_t type_info;
+ uint32_t unknown1;
+ uint32_t unknown2;
+ uint32_t unknown3;
+ uint32_t unknown4;
+ uint32_t num_members;
+ uint32_t num_members2;
+ uint32_t unknown5;
+ uint32_t unknown6;
+ uint32_t unknown7;
+ char name[];
+};
+
+
+struct sampler {
+ uint32_t type_info;
+ uint32_t is_array;
+ uint32_t array_size; /* elements in the array */
+ uint32_t unknown4; /* same as array_size */
+ uint32_t unknown5;
+ uint32_t unknown6;
+ uint32_t const_idx; /* the CONST() indx value for the sampler */
+ uint32_t unknown7;
+ char name[];
+};
+
+struct varying {
+ uint32_t type_info;
+ uint32_t unknown2;
+ uint32_t unknown3;
+ uint32_t reg; /* the register holding the value (on entry to the shader) */
+ char name[];
+};
+
+struct output {
+ uint32_t type_info;
+ uint32_t unknown2;
+ uint32_t unknown3;
+ uint32_t unknown4;
+ uint32_t unknown5;
+ uint32_t unknown6;
+ uint32_t unknown7;
+ uint32_t unknown8;
+ char name[];
+};
+
+struct constant {
+ uint32_t unknown1;
+ uint32_t unknown2;
+ uint32_t unknown3;
+ uint32_t const_idx;
+ float val[];
+};
+
+struct state {
+ char *buf;
+ int sz;
+ struct pgm_header *hdr;
+ struct attribute *attribs[32]; /* don't really know the upper limit.. */
+ struct uniform *uniforms[32];
+ struct sampler *samplers[32];
+ struct varying *varyings[32];
+ struct {
+ struct uniformblock *header;
+ struct uniformblockmember **members; /* GL ES 3.0 spec mandates minimum 16K support. a3xx supports 65K */
+ } uniformblocks[24]; /* Maximum a330 supports */
+ struct output *outputs[0]; /* I guess only one?? */
+};
+
+static const char *infile;
+static int full_dump = 1;
+static int dump_shaders = 0;
+static int gpu_id;
+
+static char *find_sect_end(char *buf, int sz)
+{
+ uint8_t *ptr = (uint8_t *)buf;
+ uint8_t *end = ptr + sz - 3;
+
+ while (ptr < end) {
+ uint32_t d = 0;
+
+ d |= ptr[0] << 0;
+ d |= ptr[1] << 8;
+ d |= ptr[2] << 16;
+ d |= ptr[3] << 24;
+
+ /* someone at QC likes baseball */
+ if (d == 0xba5eba11)
+ return (char *)ptr;
+
+ ptr++;
+ }
+ return NULL;
+}
+
+static void *next_sect(struct state *state, int *sect_size)
+{
+ char *end = find_sect_end(state->buf, state->sz);
+ void *sect;
+
+ if (!end)
+ return NULL;
+
+ *sect_size = end - state->buf;
+
+ /* copy the section to keep things nicely 32b aligned: */
+ sect = malloc(ALIGN(*sect_size, 4));
+ memcpy(sect, state->buf, *sect_size);
+
+ state->sz -= *sect_size + 4;
+ state->buf = end + 4;
+
+ return sect;
+}
+
+static int valid_type(uint32_t type_info)
+{
+ switch ((type_info >> 8) & 0xff) {
+ case 0x8b: /* vector */
+ case 0x8d: /* GLES3 samplers */
+ case 0x14: /* float */
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+#if 0
+static int valid_uniformblock(uint32_t type_info)
+{
+ if (type_info == 0x128)
+ return 1;
+ return 0;
+}
+#endif
+
+static void dump_attribute(struct attribute *attrib)
+{
+ printf("\tR%d, CONST(%d): %s\n", attrib->reg,
+ attrib->const_idx, attrib->name);
+}
+
+static inline int is_uniform_v2(struct uniform *uniform)
+{
+ /* TODO maybe this should be based on revision #? */
+ if (uniform->v2.unknown10 == 0)
+ return 1;
+ return 0;
+}
+
+static void dump_uniform(struct uniform *uniform)
+{
+ char *name = is_uniform_v2(uniform) ? uniform->v2.name : uniform->v1.name;
+ if (uniform->const_reg == -1) {
+ printf("\tC%d+: %s\n", uniform->const_base, name);
+ } else {
+ printf("\tC%d: %s\n", uniform->const_reg, name);
+ }
+}
+
+static void dump_sampler(struct sampler *sampler)
+{
+ printf("\tCONST(%d): %s\n", sampler->const_idx, sampler->name);
+}
+
+static void dump_varying(struct varying *varying)
+{
+ printf("\tR%d: %s\n", varying->reg, varying->name);
+}
+
+static void dump_uniformblock(struct uniformblock *uniformblock)
+{
+ printf("\tUniform Block: %s(%d)\n", uniformblock->name, uniformblock->num_members);
+}
+
+static void dump_uniformblockmember(struct uniformblockmember *member)
+{
+ printf("Uniform Block member: %s\n", member->name);
+}
+
+static void dump_output(struct output *output)
+{
+ printf("\tR?: %s\n", output->name);
+}
+
+static void dump_constant(struct constant *constant)
+{
+ printf("\tC%d: %f, %f, %f, %f\n", constant->const_idx,
+ constant->val[0], constant->val[1],
+ constant->val[2], constant->val[3]);
+}
+
+/* dump attr/uniform/sampler/varying/const summary: */
+static void dump_short_summary(struct state *state, int nconsts,
+ struct constant **constants)
+{
+ int i;
+
+ /* dump attr/uniform/sampler/varying/const summary: */
+ for (i = 0; i < state->hdr->num_varyings; i++) {
+ dump_varying(state->varyings[i]);
+ }
+ for (i = 0; i < state->hdr->num_attribs; i++) {
+ dump_attribute(state->attribs[i]);
+ }
+ for (i = 0; i < state->hdr->num_uniforms; i++) {
+ dump_uniform(state->uniforms[i]);
+ }
+ for (i = 0; i < state->hdr->num_samplers; i++) {
+ dump_sampler(state->samplers[i]);
+ }
+ for (i = 0; i < nconsts - 1; i++) {
+ if (constants[i]->unknown2 == 0) {
+ dump_constant(constants[i]);
+ }
+ }
+ printf("\n");
+}
+
+static void dump_raw_shader(uint32_t *dwords, uint32_t sizedwords, int n, char *ext)
+{
+ static char filename[256];
+ int fd;
+
+ if (!dump_shaders)
+ return;
+
+ sprintf(filename, "%.*s-%d.%s", (int)strlen(infile)-3, infile, n, ext);
+ fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT, 0644);
+ write(fd, dwords, sizedwords * 4);
+}
+
+static void dump_shaders_a2xx(struct state *state)
+{
+ int i, sect_size;
+ uint8_t *ptr;
+
+ /* dump vertex shaders: */
+ for (i = 0; i < 3; i++) {
+ struct vs_header *vs_hdr = next_sect(state, §_size);
+ struct constant *constants[32];
+ int j, level = 0;
+
+ printf("\n");
+
+ if (full_dump) {
+ printf("#######################################################\n");
+ printf("######## VS%d HEADER: (size %d)\n", i, sect_size);
+ dump_hex((void *)vs_hdr, sect_size);
+ }
+
+ for (j = 0; j < (int)vs_hdr->unknown1 - 1; j++) {
+ constants[j] = next_sect(state, §_size);
+ if (full_dump) {
+ printf("######## VS%d CONST: (size=%d)\n", i, sect_size);
+ dump_constant(constants[j]);
+ dump_hex((char *)constants[j], sect_size);
+ }
+ }
+
+ ptr = next_sect(state, §_size);
+ printf("######## VS%d SHADER: (size=%d)\n", i, sect_size);
+ if (full_dump) {
+ dump_hex(ptr, sect_size);
+ level = 1;
+ } else {
+ dump_short_summary(state, vs_hdr->unknown1 - 1, constants);
+ }
+ disasm_a2xx((uint32_t *)(ptr + 32), (sect_size - 32) / 4, level+1, SHADER_VERTEX);
+ dump_raw_shader((uint32_t *)(ptr + 32), (sect_size - 32) / 4, i, "vo");
+ free(ptr);
+
+ for (j = 0; j < vs_hdr->unknown9; j++) {
+ ptr = next_sect(state, §_size);
+ if (full_dump) {
+ printf("######## VS%d CONST?: (size=%d)\n", i, sect_size);
+ dump_hex(ptr, sect_size);
+ }
+ free(ptr);
+ }
+
+ for (j = 0; j < vs_hdr->unknown1 - 1; j++) {
+ free(constants[j]);
+ }
+
+ free(vs_hdr);
+ }
+
+ /* dump fragment shaders: */
+ for (i = 0; i < 1; i++) {
+ struct fs_header *fs_hdr = next_sect(state, §_size);
+ struct constant *constants[32];
+ int j, level = 0;
+
+ printf("\n");
+
+ if (full_dump) {
+ printf("#######################################################\n");
+ printf("######## FS%d HEADER: (size %d)\n", i, sect_size);
+ dump_hex((void *)fs_hdr, sect_size);
+ }
+
+ for (j = 0; j < fs_hdr->unknown1 - 1; j++) {
+ constants[j] = next_sect(state, §_size);
+ if (full_dump) {
+ printf("######## FS%d CONST: (size=%d)\n", i, sect_size);
+ dump_constant(constants[j]);
+ dump_hex((char *)constants[j], sect_size);
+ }
+ }
+
+ ptr = next_sect(state, §_size);
+ printf("######## FS%d SHADER: (size=%d)\n", i, sect_size);
+ if (full_dump) {
+ dump_hex(ptr, sect_size);
+ level = 1;
+ } else {
+ dump_short_summary(state, fs_hdr->unknown1 - 1, constants);
+ }
+ disasm_a2xx((uint32_t *)(ptr + 32), (sect_size - 32) / 4, level+1, SHADER_FRAGMENT);
+ dump_raw_shader((uint32_t *)(ptr + 32), (sect_size - 32) / 4, i, "fo");
+ free(ptr);
+
+ for (j = 0; j < fs_hdr->unknown1 - 1; j++) {
+ free(constants[j]);
+ }
+
+ free(fs_hdr);
+ }
+}
+
+static void dump_shaders_a3xx(struct state *state)
+{
+ int i, j;
+
+ /* dump vertex shaders: */
+ for (i = 0; i < 2; i++) {
+ int instrs_size, hdr_size, sect_size, nconsts = 0, level = 0, compact = 0;
+ uint8_t *vs_hdr;
+ struct constant *constants[32];
+ uint8_t *instrs = NULL;
+
+ vs_hdr = next_sect(state, &hdr_size);
+printf("hdr_size=%d\n", hdr_size);
+
+ /* seems like there are two cases, either:
+ * 1) 152 byte header,
+ * 2) zero or more 32 byte compiler const sections
+ * 3) followed by shader instructions
+ * or, if there are no compiler consts, this can be
+ * all smashed in one large section
+ */
+ int n;
+ if (state->hdr->revision >= 0xb)
+ n = 160;
+ else if (state->hdr->revision >= 7)
+ n = 156;
+ else
+ n = 152;
+ if (hdr_size > n) {
+ instrs = &vs_hdr[n];
+ instrs_size = hdr_size - n;
+ hdr_size = n;
+ compact = 1;
+ } else {
+ while (1) {
+ void *ptr = next_sect(state, §_size);
+
+ if ((sect_size != 32) && (sect_size != 44)) {
+ /* end of constants: */
+ instrs = ptr;
+ instrs_size = sect_size;
+ break;
+ }
+ dump_hex_ascii(ptr, sect_size, 0);
+ constants[nconsts++] = ptr;
+ }
+ }
+
+ printf("\n");
+
+ if (full_dump) {
+ printf("#######################################################\n");
+ printf("######## VS%d HEADER: (size %d)\n", i, hdr_size);
+ dump_hex((void *)vs_hdr, hdr_size);
+ for (j = 0; j < nconsts; j++) {
+ printf("######## VS%d CONST: (size=%d)\n", i, (int)sizeof(constants[i]));
+ dump_constant(constants[j]);
+ dump_hex((char *)constants[j], sizeof(constants[j]));
+ }
+ }
+
+ printf("######## VS%d SHADER: (size=%d)\n", i, instrs_size);
+ if (full_dump) {
+ dump_hex(instrs, instrs_size);
+ level = 1;
+ } else {
+ dump_short_summary(state, nconsts, constants);
+ }
+
+ if (!compact) {
+ if (state->hdr->revision >= 7) {
+ instrs += ALIGN(instrs_size, 8) - instrs_size;
+ instrs_size = ALIGN(instrs_size, 8);
+ }
+ instrs += 32;
+ instrs_size -= 32;
+ }
+
+ disasm_a3xx((uint32_t *)instrs, instrs_size / 4, level+1, SHADER_VERTEX, gpu_id);
+ dump_raw_shader((uint32_t *)instrs, instrs_size / 4, i, "vo3");
+ free(vs_hdr);
+ }
+
+ /* dump fragment shaders: */
+ for (i = 0; i < 1; i++) {
+ int instrs_size, hdr_size, sect_size, nconsts = 0, level = 0, compact = 0;
+ uint8_t *fs_hdr;
+ struct constant *constants[32];
+ uint8_t *instrs = NULL;
+
+ fs_hdr = next_sect(state, &hdr_size);
+
+printf("hdr_size=%d\n", hdr_size);
+ /* two cases, similar to vertex shader, but magic # is 200
+ * (or 208 for newer?)..
+ */
+ int n;
+ if (state->hdr->revision >= 0xb)
+ n = 256;
+ else if (state->hdr->revision >= 8)
+ n = 208;
+ else if (state->hdr->revision == 7)
+ n = 204;
+ else
+ n = 200;
+
+ if (hdr_size > n) {
+ instrs = &fs_hdr[n];
+ instrs_size = hdr_size - n;
+ hdr_size = n;
+ compact = 1;
+ } else {
+ while (1) {
+ void *ptr = next_sect(state, §_size);
+
+ if ((sect_size != 32) && (sect_size != 44)) {
+ /* end of constants: */
+ instrs = ptr;
+ instrs_size = sect_size;
+ break;
+ }
+
+ dump_hex_ascii(ptr, sect_size, 0);
+ constants[nconsts++] = ptr;
+ }
+ }
+
+ printf("\n");
+
+ if (full_dump) {
+ printf("#######################################################\n");
+ printf("######## FS%d HEADER: (size %d)\n", i, hdr_size);
+ dump_hex((void *)fs_hdr, hdr_size);
+ for (j = 0; j < nconsts; j++) {
+ printf("######## FS%d CONST: (size=%d)\n", i, (int)sizeof(constants[i]));
+ dump_constant(constants[j]);
+ dump_hex((char *)constants[j], sizeof(constants[j]));
+ }
+ }
+
+ printf("######## FS%d SHADER: (size=%d)\n", i, instrs_size);
+ if (full_dump) {
+ dump_hex(instrs, instrs_size);
+ level = 1;
+ } else {
+ dump_short_summary(state, nconsts, constants);
+ }
+
+ if (!compact) {
+ if (state->hdr->revision >= 7) {
+ instrs += 44;
+ instrs_size -= 44;
+ } else {
+ instrs += 32;
+ instrs_size -= 32;
+ }
+ }
+ disasm_a3xx((uint32_t *)instrs, instrs_size / 4, level+1, stdout, gpu_id);
+ dump_raw_shader((uint32_t *)instrs, instrs_size / 4, i, "fo3");
+ free(fs_hdr);
+ }
+}
+
+static void dump_program(struct state *state)
+{
+ int i, sect_size;
+ uint8_t *ptr;
+
+ state->hdr = next_sect(state, §_size);
+
+ printf("######## HEADER: (size %d)\n", sect_size);
+ printf("\tsize: %d\n", state->hdr->size);
+ printf("\trevision: %d\n", state->hdr->revision);
+ printf("\tattributes: %d\n", state->hdr->num_attribs);
+ printf("\tuniforms: %d\n", state->hdr->num_uniforms);
+ printf("\tsamplers: %d\n", state->hdr->num_samplers);
+ printf("\tvaryings: %d\n", state->hdr->num_varyings);
+ printf("\tuniform blocks: %d\n", state->hdr->num_uniformblocks);
+ if (full_dump)
+ dump_hex((void *)state->hdr, sect_size);
+ printf("\n");
+
+ /* there seems to be two 0xba5eba11's at the end of the header, possibly
+ * with some other stuff between them:
+ */
+ ptr = next_sect(state, §_size);
+ if (full_dump) {
+ dump_hex_ascii(ptr, sect_size, 0);
+ }
+
+ for (i = 0; (i < state->hdr->num_attribs) && (state->sz > 0); i++) {
+ state->attribs[i] = next_sect(state, §_size);
+
+ /* hmm, for a3xx (or maybe just newer driver version), we have some
+ * extra sections that don't seem useful, so skip these:
+ */
+ while (!valid_type(state->attribs[i]->type_info)) {
+ dump_hex_ascii(state->attribs[i], sect_size, 0);
+ state->attribs[i] = next_sect(state, §_size);
+ }
+
+ clean_ascii(state->attribs[i]->name, sect_size - 28);
+ if (full_dump) {
+ printf("######## ATTRIBUTE: (size %d)\n", sect_size);
+ dump_attribute(state->attribs[i]);
+ dump_hex((char *)state->attribs[i], sect_size);
+ }
+ }
+
+ for (i = 0; (i < state->hdr->num_uniforms) && (state->sz > 0); i++) {
+ state->uniforms[i] = next_sect(state, §_size);
+
+ /* hmm, for a3xx (or maybe just newer driver version), we have some
+ * extra sections that don't seem useful, so skip these:
+ */
+ while (!valid_type(state->uniforms[i]->type_info)) {
+ dump_hex_ascii(state->uniforms[i], sect_size, 0);
+ state->uniforms[i] = next_sect(state, §_size);
+ }
+
+ if (is_uniform_v2(state->uniforms[i])) {
+ clean_ascii(state->uniforms[i]->v2.name, sect_size - 53);
+ } else {
+ clean_ascii(state->uniforms[i]->v1.name, sect_size - 41);
+ }
+
+ if (full_dump) {
+ printf("######## UNIFORM: (size %d)\n", sect_size);
+ dump_uniform(state->uniforms[i]);
+ dump_hex((char *)state->uniforms[i], sect_size);
+ }
+ }
+
+ for (i = 0; (i < state->hdr->num_samplers) && (state->sz > 0); i++) {
+ state->samplers[i] = next_sect(state, §_size);
+
+ /* hmm, for a3xx (or maybe just newer driver version), we have some
+ * extra sections that don't seem useful, so skip these:
+ */
+ while (!valid_type(state->samplers[i]->type_info)) {
+ dump_hex_ascii(state->samplers[i], sect_size, 0);
+ state->samplers[i] = next_sect(state, §_size);
+ }
+
+ clean_ascii(state->samplers[i]->name, sect_size - 33);
+ if (full_dump) {
+ printf("######## SAMPLER: (size %d)\n", sect_size);
+ dump_sampler(state->samplers[i]);
+ dump_hex((char *)state->samplers[i], sect_size);
+ }
+
+ }
+
+ // These sections show up after all of the other sampler sections
+ // Loops through them all since we don't deal with them
+ if (state->hdr->revision >= 7) {
+ for (i = 0; (i < state->hdr->num_samplers) && (state->sz > 0); i++) {
+ ptr = next_sect(state, §_size);
+ dump_hex_ascii(ptr, sect_size, 0);
+ }
+ }
+
+
+ for (i = 0; (i < state->hdr->num_varyings) && (state->sz > 0); i++) {
+ state->varyings[i] = next_sect(state, §_size);
+
+ /* hmm, for a3xx (or maybe just newer driver version), we have some
+ * extra sections that don't seem useful, so skip these:
+ */
+ while (!valid_type(state->varyings[i]->type_info)) {
+ dump_hex_ascii(state->varyings[i], sect_size, 0);
+ state->varyings[i] = next_sect(state, §_size);
+ }
+
+ clean_ascii(state->varyings[i]->name, sect_size - 16);
+ if (full_dump) {
+ printf("######## VARYING: (size %d)\n", sect_size);
+ dump_varying(state->varyings[i]);
+ dump_hex((char *)state->varyings[i], sect_size);
+ }
+ }
+
+ /* show up again for revision >= 14?? */
+ if (state->hdr->revision >= 14) {
+ for (i = 0; (i < state->hdr->num_varyings) && (state->sz > 0); i++) {
+ ptr = next_sect(state, §_size);
+ dump_hex_ascii(ptr, sect_size, 0);
+ }
+ }
+
+ /* not sure exactly which revision started this, but seems at least
+ * rev7 and rev8 implicitly include a new section for gl_FragColor:
+ */
+ if (state->hdr->revision >= 7) {
+ /* I guess only one? */
+ state->outputs[0] = next_sect(state, §_size);
+
+ clean_ascii(state->outputs[0]->name, sect_size - 32);
+ if (full_dump) {
+ printf("######## OUTPUT: (size %d)\n", sect_size);
+ dump_output(state->outputs[0]);
+ dump_hex((char *)state->outputs[0], sect_size);
+ }
+ }
+
+ for (i = 0; (i < state->hdr->num_uniformblocks) && (state->sz > 0); i++) {
+ state->uniformblocks[i].header = next_sect(state, §_size);
+
+ clean_ascii(state->uniformblocks[i].header->name, sect_size - 40);
+ if (full_dump) {
+ printf("######## UNIFORM BLOCK: (size %d)\n", sect_size);
+ dump_uniformblock(state->uniformblocks[i].header);
+ dump_hex((char *)state->uniformblocks[i].header, sect_size);
+ }
+
+ /*
+ * OpenGL ES 3.0 spec mandates a minimum amount of 16K members supported
+ * a330 supports a minimum of 65K
+ */
+ state->uniformblocks[i].members = malloc(state->uniformblocks[i].header->num_members * sizeof(void*));
+
+ int member = 0;
+ for (member = 0; (member < state->uniformblocks[i].header->num_members) && (state->sz > 0); member++) {
+ state->uniformblocks[i].members[member] = next_sect(state, §_size);
+
+ clean_ascii(state->uniformblocks[i].members[member]->name, sect_size - 56);
+ if (full_dump) {
+ printf("######## UNIFORM BLOCK MEMBER: (size %d)\n", sect_size);
+ dump_uniformblockmember(state->uniformblocks[i].members[member]);
+ dump_hex((char *)state->uniformblocks[i].members[member], sect_size);
+ }
+ }
+ /*
+ * Qualcomm saves the UBO members twice for each UBO
+ * Don't ask me why
+ */
+ for (member = 0; (member < state->uniformblocks[i].header->num_members) && (state->sz > 0); member++) {
+ state->uniformblocks[i].members[member] = next_sect(state, §_size);
+
+ clean_ascii(state->uniformblocks[i].members[member]->name, sect_size - 56);
+ if (full_dump) {
+ printf("######## UNIFORM BLOCK MEMBER2: (size %d)\n", sect_size);
+ dump_uniformblockmember(state->uniformblocks[i].members[member]);
+ dump_hex((char *)state->uniformblocks[i].members[member], sect_size);
+ }
+ }
+ }
+
+ if (gpu_id >= 300) {
+ dump_shaders_a3xx(state);
+ } else {
+ dump_shaders_a2xx(state);
+ }
+
+ if (!full_dump)
+ return;
+
+ /* dump ascii version of shader program: */
+ ptr = next_sect(state, §_size);
+ printf("\n#######################################################\n");
+ printf("######## SHADER SRC: (size=%d)\n", sect_size);
+ dump_ascii(ptr, sect_size);
+ free(ptr);
+
+ /* dump remaining sections (there shouldn't be any): */
+ while (state->sz > 0) {
+ ptr = next_sect(state, §_size);
+ printf("######## section (size=%d)\n", sect_size);
+ printf("as hex:\n");
+ dump_hex(ptr, sect_size);
+ printf("as float:\n");
+ dump_float(ptr, sect_size);
+ printf("as ascii:\n");
+ dump_ascii(ptr, sect_size);
+ free(ptr);
+ }
+ /* cleanup the uniform buffer members we allocated */
+ if (state->hdr->num_uniformblocks > 0)
+ free (state->uniformblocks[i].members);
+}
+
+int main(int argc, char **argv)
+{
+ enum rd_sect_type type = RD_NONE;
+ enum debug_t debug = 0;
+ void *buf = NULL;
+ int sz;
+ struct io *io;
+ int raw_program = 0;
+
+ /* lame argument parsing: */
+
+ while (1) {
+ if ((argc > 1) && !strcmp(argv[1], "--verbose")) {
+ debug |= PRINT_RAW | PRINT_VERBOSE;
+ argv++;
+ argc--;
+ continue;
+ }
+ if ((argc > 1) && !strcmp(argv[1], "--expand")) {
+ debug |= EXPAND_REPEAT;
+ argv++;
+ argc--;
+ continue;
+ }
+ if ((argc > 1) && !strcmp(argv[1], "--short")) {
+ /* only short dump, original shader, symbol table, and disassembly */
+ full_dump = 0;
+ argv++;
+ argc--;
+ continue;
+ }
+ if ((argc > 1) && !strcmp(argv[1], "--dump-shaders")) {
+ dump_shaders = 1;
+ argv++;
+ argc--;
+ continue;
+ }
+ if ((argc > 1) && !strcmp(argv[1], "--raw")) {
+ raw_program = 1;
+ argv++;
+ argc--;
+ continue;
+ }
+ if ((argc > 1) && !strcmp(argv[1], "--gpu300")) {
+ gpu_id = 320;
+ argv++;
+ argc--;
+ continue;
+ }
+ break;
+ }
+
+ if (argc != 2) {
+ fprintf(stderr, "usage: pgmdump [--verbose] [--short] [--dump-shaders] testlog.rd\n");
+ return -1;
+ }
+
+ disasm_set_debug(debug);
+
+ infile = argv[1];
+
+ io = io_open(infile);
+ if (!io) {
+ fprintf(stderr, "could not open: %s\n", infile);
+ return -1;
+ }
+
+ if (raw_program)
+ {
+ io_readn(io, &sz, 4);
+ free(buf);
+
+ /* note: allow hex dumps to go a bit past the end of the buffer..
+ * might see some garbage, but better than missing the last few bytes..
+ */
+ buf = calloc(1, sz + 3);
+ io_readn(io, buf + 4, sz);
+ (*(int*)buf) = sz;
+
+ struct state state = {
+ .buf = buf,
+ .sz = sz,
+ };
+ printf("############################################################\n");
+ printf("program:\n");
+ dump_program(&state);
+ printf("############################################################\n");
+ return 0;
+ }
+
+ /* figure out what sort of input we are dealing with: */
+ if (!(check_extension(infile, ".rd") || check_extension(infile, ".rd.gz"))) {
+ enum shader_t shader = ~0;
+ int ret;
+ if (check_extension(infile, ".vo")) {
+ shader = SHADER_VERTEX;
+ } else if (check_extension(infile, ".fo")) {
+ shader = SHADER_FRAGMENT;
+ } else if (check_extension(infile, ".vo3")) {
+ } else if (check_extension(infile, ".fo3")) {
+ } else if (check_extension(infile, ".co3")) {
+ } else {
+ fprintf(stderr, "invalid input file: %s\n", infile);
+ return -1;
+ }
+ buf = calloc(1, 100 * 1024);
+ ret = io_readn(io, buf, 100 * 1024);
+ if (ret < 0) {
+ fprintf(stderr, "error: %m");
+ return -1;
+ }
+ if (shader != ~0) {
+ return disasm_a2xx(buf, ret/4, 0, shader);
+ } else {
+ /* disassembly does not depend on shader stage on a3xx+: */
+ return disasm_a3xx(buf, ret/4, 0, stdout, gpu_id);
+ }
+ }
+
+ while ((io_readn(io, &type, sizeof(type)) > 0) && (io_readn(io, &sz, 4) > 0)) {
+ free(buf);
+
+ /* note: allow hex dumps to go a bit past the end of the buffer..
+ * might see some garbage, but better than missing the last few bytes..
+ */
+ buf = calloc(1, sz + 3);
+ io_readn(io, buf, sz);
+
+ switch(type) {
+ case RD_TEST:
+ if (full_dump)
+ printf("test: %s\n", (char *)buf);
+ break;
+ case RD_VERT_SHADER:
+ printf("vertex shader:\n%s\n", (char *)buf);
+ break;
+ case RD_FRAG_SHADER:
+ printf("fragment shader:\n%s\n", (char *)buf);
+ break;
+ case RD_PROGRAM: {
+ struct state state = {
+ .buf = buf,
+ .sz = sz,
+ };
+ printf("############################################################\n");
+ printf("program:\n");
+ dump_program(&state);
+ printf("############################################################\n");
+ break;
+ }
+ case RD_GPU_ID:
+ gpu_id = *((unsigned int *)buf);
+ printf("gpu_id: %d\n", gpu_id);
+ break;
+ default:
+ break;
+ }
+ }
+
+ io_close(io);
+
+ return 0;
+}
+
--- /dev/null
+/*
+ * Copyright (c) 2018 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * Decoder for "new" GL_OES_get_program_binary format.
+ *
+ * Overall structure is:
+ *
+ * - header at top, contains, amongst other things, offsets of
+ * per shader stage sections.
+ * - per shader stage section (shader_info) starts with a header,
+ * followed by a variably length list of descriptors. Each
+ * descriptor has a type/count/size plus offset from the start
+ * of shader_info section where the data is found
+ */
+
+#include <assert.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <stddef.h>
+#include <fcntl.h>
+#include <string.h>
+
+#include "redump.h"
+#include "disasm.h"
+#include "io.h"
+#include "util.h"
+
+const char *infile;
+static int dump_full = 0;
+static int dump_offsets = 0;
+static int gpu_id = 320;
+static int shaderdb = 0; /* output shaderdb style traces to stderr */
+
+struct state {
+ char *buf;
+ int sz;
+ int lvl;
+
+ /* current shader_info section, some offsets calculated relative to
+ * this, rather than relative to start of buffer.
+ */
+ void *shader;
+
+ /* size of each entry within a shader_descriptor_blk: */
+ int desc_size;
+
+ const char *shader_type;
+ int full_regs;
+ int half_regs;
+};
+
+#define PACKED __attribute__((__packed__))
+
+#define OFF(field) do { \
+ if (dump_offsets) \
+ printf("%08x: ", (uint32_t)((char *)&field - state->buf));\
+ } while (0)
+
+/* decode field as hex */
+#define X(s, field) do { \
+ OFF(s->field); \
+ printf("%s%12s:\t0x%x\n", tab(state->lvl), #field, s->field); \
+ } while (0)
+
+/* decode field as digit */
+#define D(s, field) do { \
+ OFF(s->field); \
+ printf("%s%12s:\t%u\n", tab(state->lvl), #field, s->field); \
+ } while (0)
+
+/* decode field as float/hex */
+#define F(s, field) do { \
+ OFF(s->field); \
+ printf("%s%12s:\t%f (0x%0x)\n", tab(state->lvl), #field, \
+ d2f(s->field), s->field); \
+ } while (0)
+
+/* decode field as register: (type is 'r' or 'c') */
+#define R(s, field, type) do { \
+ OFF(s->field); \
+ printf("%s%12s:\t%c%u.%c\n", tab(state->lvl), #field, type, \
+ (s->field >> 2), "xyzw"[s->field & 0x3]); \
+ } while (0)
+
+/* decode inline string (presumably null terminated?) */
+#define S(s, field) do { \
+ OFF(s->field); \
+ printf("%s%12s:\t%s\n", tab(state->lvl), #field, s->field); \
+ } while (0)
+
+/* decode string-table string */
+#define T(s, field) TODO
+
+/* decode field as unknown */
+#define U(s, start, end) \
+ dump_unknown(state, s->unk_ ## start ## _ ## end, 0x ## start, (4 + 0x ## end - 0x ## start) / 4)
+
+/* decode field as offset to other section */
+#define O(s, field, type) do { \
+ X(s, field); \
+ assert(s->field < state->sz); \
+ void *_p = &state->buf[s->field]; \
+ state->lvl++; \
+ decode_ ## type (state, _p); \
+ state->lvl--; \
+ } while (0)
+
+struct shader_info;
+static void decode_shader_info(struct state *state, struct shader_info *info);
+
+static void dump_unknown(struct state *state, void *buf, unsigned start, unsigned n)
+{
+ uint32_t *ptr = buf;
+ uint8_t *ascii = buf;
+
+ for (unsigned i = 0; i < n; i++) {
+ uint32_t d = ptr[i];
+
+ if (dump_offsets)
+ printf("%08x:", (uint32_t)((char *)&ptr[i] - state->buf));
+
+ printf("%s %04x:\t%08x", tab(state->lvl), start + i * 4, d);
+
+ printf("\t|");
+ for (unsigned j = 0; j < 4; j++) {
+ uint8_t c = *(ascii++);
+ printf("%c", (isascii(c) && !iscntrl(c)) ? c : '.');
+ }
+ printf("|\t%f", d2f(d));
+
+ /* TODO maybe scan for first non-null and non-ascii char starting from
+ * end of shader binary to (roughly) establish the start of the string
+ * table.. that would be a bit better filter for deciding if something
+ * might be a pointer into the string table. Also, the previous char
+ * to what it points to should probably be null.
+ */
+ if ((d < state->sz) &&
+ isascii(state->buf[d]) &&
+ (strlen(&state->buf[d]) > 2) &&
+ isascii(state->buf[d+1]))
+ printf("\t<== %s", &state->buf[d]);
+
+ printf("\n");
+ }
+}
+
+struct PACKED header {
+ uint32_t version; /* I guess, always b10bcace ? */
+ uint32_t unk_0004_0014[5];
+ uint32_t size;
+ uint32_t size2; /* just to be sure? */
+ uint32_t unk_0020_0020[1];
+ uint32_t chksum; /* I guess? Small changes seem to result in big diffs here */
+ uint32_t unk_0028_0050[11];
+ uint32_t fs_info; /* offset of FS shader_info section */
+ uint32_t unk_0058_0090[15];
+ uint32_t vs_info; /* offset of VS shader_info section */
+ uint32_t unk_0098_00b0[7];
+ uint32_t vs_info2; /* offset of VS shader_info section (again?) */
+ uint32_t unk_00b8_0110[23];
+ uint32_t bs_info; /* offset of binning shader_info section */
+};
+
+static void decode_header(struct state *state, struct header *hdr)
+{
+ X(hdr, version);
+ U(hdr, 0004, 0014);
+ X(hdr, size);
+ X(hdr, size2);
+ U(hdr, 0020, 0020);
+ X(hdr, chksum);
+ U(hdr, 0028, 0050);
+ state->shader_type = "FRAG";
+ O(hdr, fs_info, shader_info);
+ U(hdr, 0058, 0090);
+ state->shader_type = "VERT";
+ O(hdr, vs_info, shader_info);
+ U(hdr, 0098, 00b0);
+ assert(hdr->vs_info == hdr->vs_info2); /* not sure what this if it is ever different */
+ X(hdr, vs_info2);
+ U(hdr, 00b8, 0110);
+ state->shader_type = "BVERT";
+ O(hdr, bs_info, shader_info);
+
+ /* not sure how much of the rest of contents before start of fs_info
+ * is the header, vs other things.. just dump it all as unknown for
+ * now:
+ */
+ dump_unknown(state, (void *)hdr + sizeof(*hdr),
+ sizeof(*hdr), (hdr->fs_info - sizeof(*hdr)) / 4);
+}
+
+struct PACKED shader_entry_point {
+ /* entry point name, ie. "main" of TBD length, followed by unknown */
+ char name[8];
+};
+
+static void decode_shader_entry_point(struct state *state,
+ struct shader_entry_point *e)
+{
+ S(e, name);
+}
+
+struct PACKED shader_config {
+ uint32_t unk_0000_0008[3];
+ uint32_t full_regs;
+ uint32_t half_regs;
+};
+
+static void decode_shader_config(struct state *state, struct shader_config *cfg)
+{
+ U(cfg, 0000, 0008);
+ D(cfg, full_regs);
+ D(cfg, half_regs);
+
+ state->full_regs = cfg->full_regs;
+ state->half_regs = cfg->half_regs;
+
+ /* dump reset of unknown (size differs btwn versions) */
+ dump_unknown(state, (void *)cfg + sizeof(*cfg), sizeof(*cfg),
+ (state->desc_size - sizeof(*cfg))/4);
+}
+
+struct PACKED shader_io_block {
+ /* name of TBD length followed by unknown.. 42 dwords total */
+ char name[20];
+ uint32_t unk_0014_00a4[37];
+};
+
+static void decode_shader_io_block(struct state *state,
+ struct shader_io_block *io)
+{
+ S(io, name);
+ U(io, 0014, 00a4);
+}
+
+struct PACKED shader_constant_block {
+ uint32_t value;
+ uint32_t unk_0004_000c[3];
+ uint32_t regid;
+ uint32_t unk_0014_0024[5];
+};
+
+static void decode_shader_constant_block(struct state *state,
+ struct shader_constant_block *c)
+{
+ F(c, value);
+ U(c, 0004, 000c);
+ R(c, regid, 'c');
+ U(c, 0014, 0024);
+}
+
+enum {
+ ENTRY_POINT = 0, /* shader_entry_point */
+ SHADER_CONFIG = 1, /* XXX placeholder name */
+ SHADER_INPUT = 2, /* shader_io_block */
+ SHADER_OUTPUT = 3, /* shader_io_block */
+ CONSTANTS = 6, /* shader_constant_block */
+ INTERNAL = 8, /* internal input, like bary.f coord */
+ SHADER = 10,
+} shader_info_block_type;
+
+/* Refers to location of some type of records, with an offset relative to
+ * start of shader_info block.
+ */
+struct PACKED shader_descriptor_block {
+ uint32_t type; /* block type */
+ uint32_t offset; /* offset (relative to start of shader_info block) */
+ uint32_t size; /* size in bytes */
+ uint32_t count; /* number of records */
+ uint32_t unk_0010_0010[1];
+};
+
+static void decode_shader_descriptor_block(struct state *state,
+ struct shader_descriptor_block *blk)
+{
+ D(blk, type);
+ X(blk, offset);
+ D(blk, size);
+ D(blk, count);
+ U(blk, 0010, 0010);
+
+ /* offset relative to current shader block: */
+ void *ptr = state->shader + blk->offset;
+
+ if (blk->count == 0) {
+ assert(blk->size == 0);
+ } else {
+ assert((blk->size % blk->count) == 0);
+ }
+
+ state->desc_size = blk->size / blk->count;
+ state->lvl++;
+ for (unsigned i = 0; i < blk->count; i++) {
+ switch (blk->type) {
+ case ENTRY_POINT:
+ printf("%sentry point %u:\n", tab(state->lvl-1), i);
+ decode_shader_entry_point(state, ptr);
+ break;
+ case SHADER_CONFIG:
+ printf("%sconfig %u:\n", tab(state->lvl-1), i);
+ decode_shader_config(state, ptr);
+ break;
+ case SHADER_INPUT:
+ printf("%sinput %u:\n", tab(state->lvl-1), i);
+ decode_shader_io_block(state, ptr);
+ break;
+ case SHADER_OUTPUT:
+ printf("%soutput %u:\n", tab(state->lvl-1), i);
+ decode_shader_io_block(state, ptr);
+ break;
+ case INTERNAL:
+ printf("%sinternal input %u:\n", tab(state->lvl-1), i);
+ decode_shader_io_block(state, ptr);
+ break;
+ case CONSTANTS:
+ printf("%sconstant %u:\n", tab(state->lvl-1), i);
+ decode_shader_constant_block(state, ptr);
+ break;
+ case SHADER: {
+ struct shader_stats stats;
+ printf("%sshader %u:\n", tab(state->lvl-1), i);
+ disasm_a3xx_stat(ptr, blk->size/4, state->lvl, stdout, gpu_id, &stats);
+ if (shaderdb) {
+ unsigned dwords = 2 * stats.instlen;
+
+ if (gpu_id >= 400) {
+ dwords = ALIGN(dwords, 16 * 2);
+ } else {
+ dwords = ALIGN(dwords, 4 * 2);
+ }
+
+ unsigned half_regs = state->half_regs;
+ unsigned full_regs = state->full_regs;
+
+ /* On a6xx w/ merged/conflicting half and full regs, the
+ * full_regs footprint will be max of full_regs and half
+ * of half_regs.. we only care about which value is higher.
+ */
+ if (gpu_id >= 600) {
+ /* footprint of half_regs in units of full_regs: */
+ unsigned half_full = (half_regs + 1) / 2;
+ if (half_full > full_regs)
+ full_regs = half_full;
+ half_regs = 0;
+ }
+
+ fprintf(stderr,
+ "%s shader: %u inst, %u nops, %u non-nops, %u dwords, "
+ "%u half, %u full, %u constlen, "
+ "%u (ss), %u (sy), %d max_sun, %d loops\n",
+ state->shader_type, stats.instructions,
+ stats.nops, stats.instructions - stats.nops,
+ dwords, half_regs, full_regs,
+ stats.constlen, stats.ss, stats.sy,
+ 0, 0); /* max_sun or loops not possible */
+ }
+ /* this is a special case in a way, blk->count is # of
+ * instructions but disasm_a3xx() decodes all instructions,
+ * so just bail.
+ */
+ i = blk->count;
+ break;
+ }
+ default:
+ dump_unknown(state, ptr, 0, state->desc_size/4);
+ break;
+ }
+ ptr += state->desc_size;
+ }
+ state->lvl--;
+}
+
+/* there looks like one of these per shader, followed by "main" and
+ * some more info, and then the shader itself.
+ */
+struct PACKED shader_info {
+ uint32_t unk_0000_0010[5];
+ uint32_t desc_off; /* offset to first descriptor block */
+ uint32_t num_blocks;
+};
+
+static void decode_shader_info(struct state *state, struct shader_info *info)
+{
+ assert((info->desc_off % 4) == 0);
+
+ U(info, 0000, 0010);
+ X(info, desc_off);
+ D(info, num_blocks);
+
+ dump_unknown(state, &info[1], 0, (info->desc_off - sizeof(*info))/4);
+
+ state->shader = info;
+
+ struct shader_descriptor_block *blocks = ((void *)info) + info->desc_off;
+ for (unsigned i = 0; i < info->num_blocks; i++) {
+ printf("%sdescriptor %u:\n", tab(state->lvl), i);
+ state->lvl++;
+ decode_shader_descriptor_block(state, &blocks[i]);
+ state->lvl--;
+ }
+}
+
+static void dump_program(struct state *state)
+{
+ struct header *hdr = (void *)state->buf;
+
+ if (dump_full)
+ dump_unknown(state, state->buf, 0, state->sz/4);
+
+ decode_header(state, hdr);
+}
+
+int main(int argc, char **argv)
+{
+ enum rd_sect_type type = RD_NONE;
+ enum debug_t debug = 0;
+ void *buf = NULL;
+ int sz;
+ struct io *io;
+ int raw_program = 0;
+
+ /* lame argument parsing: */
+
+ while (1) {
+ if ((argc > 1) && !strcmp(argv[1], "--verbose")) {
+ debug |= PRINT_RAW | PRINT_VERBOSE;
+ argv++;
+ argc--;
+ continue;
+ }
+ if ((argc > 1) && !strcmp(argv[1], "--expand")) {
+ debug |= EXPAND_REPEAT;
+ argv++;
+ argc--;
+ continue;
+ }
+ if ((argc > 1) && !strcmp(argv[1], "--full")) {
+ /* only short dump, original shader, symbol table, and disassembly */
+ dump_full = 1;
+ argv++;
+ argc--;
+ continue;
+ }
+ if ((argc > 1) && !strcmp(argv[1], "--dump-offsets")) {
+ dump_offsets = 1;
+ argv++;
+ argc--;
+ continue;
+ }
+ if ((argc > 1) && !strcmp(argv[1], "--raw")) {
+ raw_program = 1;
+ argv++;
+ argc--;
+ continue;
+ }
+ if ((argc > 1) && !strcmp(argv[1], "--shaderdb")) {
+ shaderdb = 1;
+ argv++;
+ argc--;
+ continue;
+ }
+ break;
+ }
+
+ if (argc != 2) {
+ fprintf(stderr, "usage: pgmdump2 [--verbose] [--expand] [--full] [--dump-offsets] [--raw] [--shaderdb] testlog.rd\n");
+ return -1;
+ }
+
+ disasm_set_debug(debug);
+
+ infile = argv[1];
+
+ io = io_open(infile);
+ if (!io) {
+ fprintf(stderr, "could not open: %s\n", infile);
+ return -1;
+ }
+
+ if (raw_program)
+ {
+ io_readn(io, &sz, 4);
+ free(buf);
+
+ /* note: allow hex dumps to go a bit past the end of the buffer..
+ * might see some garbage, but better than missing the last few bytes..
+ */
+ buf = calloc(1, sz + 3);
+ io_readn(io, buf + 4, sz);
+ (*(int*)buf) = sz;
+
+ struct state state = {
+ .buf = buf,
+ .sz = sz,
+ };
+ printf("############################################################\n");
+ printf("program:\n");
+ dump_program(&state);
+ printf("############################################################\n");
+ return 0;
+ }
+
+ /* figure out what sort of input we are dealing with: */
+ if (!(check_extension(infile, ".rd") || check_extension(infile, ".rd.gz"))) {
+ int ret;
+ buf = calloc(1, 100 * 1024);
+ ret = io_readn(io, buf, 100 * 1024);
+ if (ret < 0) {
+ fprintf(stderr, "error: %m");
+ return -1;
+ }
+ return disasm_a3xx(buf, ret/4, 0, stdout, gpu_id);
+ }
+
+ while ((io_readn(io, &type, sizeof(type)) > 0) && (io_readn(io, &sz, 4) > 0)) {
+ free(buf);
+
+ /* note: allow hex dumps to go a bit past the end of the buffer..
+ * might see some garbage, but better than missing the last few bytes..
+ */
+ buf = calloc(1, sz + 3);
+ io_readn(io, buf, sz);
+
+ switch(type) {
+ case RD_TEST:
+ if (dump_full)
+ printf("test: %s\n", (char *)buf);
+ break;
+ case RD_VERT_SHADER:
+ printf("vertex shader:\n%s\n", (char *)buf);
+ break;
+ case RD_FRAG_SHADER:
+ printf("fragment shader:\n%s\n", (char *)buf);
+ break;
+ case RD_PROGRAM: {
+ struct state state = {
+ .buf = buf,
+ .sz = sz,
+ };
+ printf("############################################################\n");
+ printf("program:\n");
+ dump_program(&state);
+ printf("############################################################\n");
+ break;
+ }
+ case RD_GPU_ID:
+ gpu_id = *((unsigned int *)buf);
+ printf("gpu_id: %d\n", gpu_id);
+ break;
+ default:
+ break;
+ }
+ }
+
+ io_close(io);
+
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright © 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef REDUMP_H_
+#define REDUMP_H_
+
+enum rd_sect_type {
+ RD_NONE,
+ RD_TEST, /* ascii text */
+ RD_CMD, /* ascii text */
+ RD_GPUADDR, /* u32 gpuaddr, u32 size */
+ RD_CONTEXT, /* raw dump */
+ RD_CMDSTREAM, /* raw dump */
+ RD_CMDSTREAM_ADDR, /* gpu addr of cmdstream */
+ RD_PARAM, /* u32 param_type, u32 param_val, u32 bitlen */
+ RD_FLUSH, /* empty, clear previous params */
+ RD_PROGRAM, /* shader program, raw dump */
+ RD_VERT_SHADER,
+ RD_FRAG_SHADER,
+ RD_BUFFER_CONTENTS,
+ RD_GPU_ID,
+};
+
+/* RD_PARAM types: */
+enum rd_param_type {
+ RD_PARAM_SURFACE_WIDTH,
+ RD_PARAM_SURFACE_HEIGHT,
+ RD_PARAM_SURFACE_PITCH,
+ RD_PARAM_COLOR,
+ RD_PARAM_BLIT_X,
+ RD_PARAM_BLIT_Y,
+ RD_PARAM_BLIT_WIDTH,
+ RD_PARAM_BLIT_HEIGHT,
+ RD_PARAM_BLIT_X2, /* BLIT_X + BLIT_WIDTH */
+ RD_PARAM_BLIT_Y2, /* BLIT_Y + BLIT_WIDTH */
+};
+
+void rd_start(const char *name, const char *fmt, ...) __attribute__((weak));
+void rd_end(void) __attribute__((weak));
+void rd_write_section(enum rd_sect_type type, const void *buf, int sz) __attribute__((weak));
+
+/* for code that should run with and without libwrap, use the following
+ * macros which check if the fxns are present before calling
+ */
+#define RD_START(n,f,...) do { if (rd_start) rd_start(n,f,##__VA_ARGS__); } while (0)
+#define RD_END() do { if (rd_end) rd_end(); } while (0)
+#define RD_WRITE_SECTION(t,b,s) do { if (rd_write_section) rd_write_section(t,b,s); } while (0)
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#undef ALIGN
+#define ALIGN(v,a) (((v) + (a) - 1) & ~((a) - 1))
+
+#define min(a, b) (((a) < (b)) ? (a) : (b))
+#define max(a, b) (((a) > (b)) ? (a) : (b))
+
+#endif /* REDUMP_H_ */
--- /dev/null
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <assert.h>
+
+#include "rnnutil.h"
+
+static struct rnndomain *finddom(struct rnn *rnn, uint32_t regbase)
+{
+ if (rnndec_checkaddr(rnn->vc, rnn->dom[0], regbase, 0))
+ return rnn->dom[0];
+ return rnn->dom[1];
+}
+
+void _rnn_init(struct rnn *rnn, int nocolor)
+{
+ rnn_init();
+
+ rnn->db = rnn_newdb();
+ rnn->vc_nocolor = rnndec_newcontext(rnn->db);
+ rnn->vc_nocolor->colors = &envy_null_colors;
+ if (nocolor) {
+ rnn->vc = rnn->vc_nocolor;
+ } else {
+ rnn->vc = rnndec_newcontext(rnn->db);
+ rnn->vc->colors = &envy_def_colors;
+ }
+}
+
+struct rnn *rnn_new(int nocolor)
+{
+ struct rnn *rnn = calloc(sizeof(*rnn), 1);
+
+ if (!rnn)
+ return NULL;
+
+ _rnn_init(rnn, nocolor);
+
+ return rnn;
+}
+
+static void init(struct rnn *rnn, char *file, char *domain)
+{
+ /* prepare rnn stuff for lookup */
+ rnn_parsefile(rnn->db, file);
+ rnn_prepdb(rnn->db);
+ rnn->dom[0] = rnn_finddomain(rnn->db, domain);
+ if ((strcmp(domain, "A2XX") == 0) || (strcmp(domain, "A3XX") == 0)) {
+ rnn->dom[1] = rnn_finddomain(rnn->db, "AXXX");
+ } else {
+ rnn->dom[1] = rnn->dom[0];
+ }
+ if (!rnn->dom[0] && rnn->dom[1]) {
+ fprintf(stderr, "Could not find domain %s in %s\n", domain, file);
+ }
+ rnn->variant = domain;
+
+ rnndec_varadd(rnn->vc, "chip", domain);
+ if (rnn->vc != rnn->vc_nocolor)
+ rnndec_varadd(rnn->vc_nocolor, "chip", domain);
+}
+
+void rnn_load_file(struct rnn *rnn, char *file, char *domain)
+{
+ init(rnn, file, domain);
+}
+
+void rnn_load(struct rnn *rnn, const char *gpuname)
+{
+ if (strstr(gpuname, "a2")) {
+ init(rnn, "adreno/a2xx.xml", "A2XX");
+ } else if (strstr(gpuname, "a3")) {
+ init(rnn, "adreno/a3xx.xml", "A3XX");
+ } else if (strstr(gpuname, "a4")) {
+ init(rnn, "adreno/a4xx.xml", "A4XX");
+ } else if (strstr(gpuname, "a5")) {
+ init(rnn, "adreno/a5xx.xml", "A5XX");
+ } else if (strstr(gpuname, "a6")) {
+ init(rnn, "adreno/a6xx.xml", "A6XX");
+ }
+}
+
+uint32_t rnn_regbase(struct rnn *rnn, const char *name)
+{
+ uint32_t regbase = rnndec_decodereg(rnn->vc_nocolor, rnn->dom[0], name);
+ if (!regbase)
+ regbase = rnndec_decodereg(rnn->vc_nocolor, rnn->dom[1], name);
+ return regbase;
+}
+
+const char *rnn_regname(struct rnn *rnn, uint32_t regbase, int color)
+{
+ static char buf[128];
+ struct rnndecaddrinfo *info;
+
+ info = rnndec_decodeaddr(color ? rnn->vc : rnn->vc_nocolor,
+ finddom(rnn, regbase), regbase, 0);
+ if (info) {
+ strcpy(buf, info->name);
+ free(info->name);
+ free(info);
+ return buf;
+ }
+ return NULL;
+}
+
+struct rnndecaddrinfo *rnn_reginfo(struct rnn *rnn, uint32_t regbase)
+{
+ return rnndec_decodeaddr(rnn->vc, finddom(rnn, regbase), regbase, 0);
+}
+
+const char *rnn_enumname(struct rnn *rnn, const char *name, uint32_t val)
+{
+ struct rnndeccontext *ctx = rnn->vc;
+ struct rnnenum *en = rnn_findenum(ctx->db, name);
+ if (en) {
+ int i;
+ for (i = 0; i < en->valsnum; i++) {
+ struct rnnvalue *eval = en->vals[i];
+ if (eval->valvalid && eval->value == val &&
+ rnndec_varmatch(ctx, &eval->varinfo)) {
+ return en->vals[i]->name;
+ }
+ }
+ }
+ return NULL;
+}
+
+static struct rnndelem *regelem(struct rnndomain *domain, const char *name)
+{
+ int i;
+ for (i = 0; i < domain->subelemsnum; i++) {
+ struct rnndelem *elem = domain->subelems[i];
+ if (!strcmp(elem->name, name))
+ return elem;
+ }
+ return NULL;
+}
+
+/* Lookup rnndelem by name: */
+struct rnndelem *rnn_regelem(struct rnn *rnn, const char *name)
+{
+ struct rnndelem *elem = regelem(rnn->dom[0], name);
+ if (elem)
+ return elem;
+ return regelem(rnn->dom[1], name);
+}
+
+static struct rnndelem *regoff(struct rnndomain *domain, uint32_t offset)
+{
+ int i;
+ for (i = 0; i < domain->subelemsnum; i++) {
+ struct rnndelem *elem = domain->subelems[i];
+ if (elem->offset == offset)
+ return elem;
+ }
+ return NULL;
+}
+
+/* Lookup rnndelem by offset: */
+struct rnndelem *rnn_regoff(struct rnn *rnn, uint32_t offset)
+{
+ struct rnndelem *elem = regoff(rnn->dom[0], offset);
+ if (elem)
+ return elem;
+ return regoff(rnn->dom[1], offset);
+}
+
+enum rnnttype rnn_decodelem(struct rnn *rnn, struct rnntypeinfo *info,
+ uint32_t regval, union rnndecval *val)
+{
+ val->u = regval;
+ switch (info->type) {
+ case RNN_TTYPE_INLINE_ENUM:
+ case RNN_TTYPE_ENUM:
+ case RNN_TTYPE_HEX:
+ case RNN_TTYPE_INT:
+ case RNN_TTYPE_UINT:
+ case RNN_TTYPE_FLOAT:
+ case RNN_TTYPE_BOOLEAN:
+ return info->type;
+ case RNN_TTYPE_FIXED:
+ case RNN_TTYPE_UFIXED:
+ /* TODO */
+ default:
+ return RNN_TTYPE_INVALID;
+ }
+}
--- /dev/null
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef RNNUTIL_H_
+#define RNNUTIL_H_
+
+#include <stdint.h>
+#include <string.h>
+#include <assert.h>
+
+#include "rnn.h"
+#include "rnndec.h"
+
+struct rnn {
+ struct rnndb *db;
+ struct rnndeccontext *vc, *vc_nocolor;
+ struct rnndomain *dom[2];
+ const char *variant;
+};
+
+union rnndecval {
+ uint32_t u;
+ int32_t i;
+ float f;
+};
+
+void _rnn_init(struct rnn *rnn, int nocolor);
+struct rnn *rnn_new(int nocolor);
+void rnn_load_file(struct rnn *rnn, char *file, char *domain);
+void rnn_load(struct rnn *rnn, const char *gpuname);
+uint32_t rnn_regbase(struct rnn *rnn, const char *name);
+const char *rnn_regname(struct rnn *rnn, uint32_t regbase, int color);
+struct rnndecaddrinfo *rnn_reginfo(struct rnn *rnn, uint32_t regbase);
+const char *rnn_enumname(struct rnn *rnn, const char *name, uint32_t val);
+
+struct rnndelem *rnn_regelem(struct rnn *rnn, const char *name);
+struct rnndelem *rnn_regoff(struct rnn *rnn, uint32_t offset);
+enum rnnttype rnn_decodelem(struct rnn *rnn, struct rnntypeinfo *info,
+ uint32_t regval, union rnndecval *val);
+
+#endif /* RNNUTIL_H_ */
--- /dev/null
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#define _GNU_SOURCE
+#define LUA_COMPAT_APIINTCASTS
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <lua.h>
+#include <lauxlib.h>
+#include <lualib.h>
+#include <assert.h>
+
+#include "script.h"
+#include "cffdec.h"
+#include "rnnutil.h"
+
+static lua_State *L;
+
+#if 0
+#define DBG(fmt, ...) \
+ do { printf(" ** %s:%d ** "fmt "\n", \
+ __FUNCTION__, __LINE__, ##__VA_ARGS__); } while (0)
+#else
+#define DBG(fmt, ...) do {} while (0)
+#endif
+
+/* An rnn based decoder, which can either be decoding current register
+ * values, or domain based decoding of a pm4 packet.
+ *
+ */
+struct rnndec {
+ struct rnn base;
+
+ /* for pm4 packet decoding: */
+ uint32_t sizedwords;
+ uint32_t *dwords;
+};
+
+static inline struct rnndec *to_rnndec(struct rnn *rnn)
+{
+ return (struct rnndec *)rnn;
+}
+
+static uint32_t rnn_val(struct rnn *rnn, uint32_t regbase)
+{
+ struct rnndec *rnndec = to_rnndec(rnn);
+
+ if (!rnndec->sizedwords) {
+ return reg_val(regbase);
+ } else if (regbase < rnndec->sizedwords) {
+ return rnndec->dwords[regbase];
+ } else {
+ // XXX throw an error
+ return -1;
+ }
+}
+
+/* does not return */
+static void error(const char *fmt)
+{
+ fprintf(stderr, fmt, lua_tostring(L, -1));
+ exit(1);
+}
+
+/*
+ * An enum type that can be used as string or number:
+ */
+
+struct rnndenum {
+ const char *str;
+ int val;
+};
+
+static int l_meta_rnn_enum_tostring(lua_State *L)
+{
+ struct rnndenum *e = lua_touserdata(L, 1);
+ if (e->str) {
+ lua_pushstring(L, e->str);
+ } else {
+ char buf[32];
+ sprintf(buf, "%u", e->val);
+ lua_pushstring(L, buf);
+ }
+ return 1;
+}
+
+/* so, this doesn't actually seem to be implemented yet, but hopefully
+ * some day lua comes to it's senses
+ */
+static int l_meta_rnn_enum_tonumber(lua_State *L)
+{
+ struct rnndenum *e = lua_touserdata(L, 1);
+ lua_pushinteger(L, e->val);
+ return 1;
+}
+
+static const struct luaL_Reg l_meta_rnn_enum[] = {
+ {"__tostring", l_meta_rnn_enum_tostring},
+ {"__tonumber", l_meta_rnn_enum_tonumber},
+ {NULL, NULL} /* sentinel */
+};
+
+static void pushenum(struct lua_State *L, int val, struct rnnenum *info)
+{
+ struct rnndenum *e = lua_newuserdata(L, sizeof(*e));
+
+ e->val = val;
+ e->str = NULL;
+
+ for (int i = 0; i < info->valsnum; i++) {
+ if (info->vals[i]->valvalid && (info->vals[i]->value == val)) {
+ e->str = info->vals[i]->name;
+ break;
+ }
+ }
+
+ luaL_newmetatable(L, "rnnmetaenum");
+ luaL_setfuncs(L, l_meta_rnn_enum, 0);
+ lua_pop(L, 1);
+
+ luaL_setmetatable(L, "rnnmetaenum");
+}
+
+/* Expose rnn decode to script environment as "rnn" library:
+ */
+
+struct rnndoff {
+ struct rnn *rnn;
+ struct rnndelem *elem;
+ uint64_t offset;
+};
+
+static void push_rnndoff(lua_State *L, struct rnn *rnn,
+ struct rnndelem *elem, uint64_t offset)
+{
+ struct rnndoff *rnndoff = lua_newuserdata(L, sizeof(*rnndoff));
+ rnndoff->rnn = rnn;
+ rnndoff->elem = elem;
+ rnndoff->offset = offset;
+}
+
+static int l_rnn_etype_array(lua_State *L, struct rnn *rnn,
+ struct rnndelem *elem, uint64_t offset);
+static int l_rnn_etype_reg(lua_State *L, struct rnn *rnn,
+ struct rnndelem *elem, uint64_t offset);
+
+static int pushdecval(struct lua_State *L, struct rnn *rnn,
+ uint32_t regval, struct rnntypeinfo *info)
+{
+ union rnndecval val;
+ switch (rnn_decodelem(rnn, info, regval, &val)) {
+ case RNN_TTYPE_ENUM:
+ case RNN_TTYPE_INLINE_ENUM:
+ pushenum(L, val.i, info->eenum);
+ return 1;
+ case RNN_TTYPE_INT:
+ lua_pushinteger(L, val.i);
+ return 1;
+ case RNN_TTYPE_UINT:
+ case RNN_TTYPE_HEX:
+ lua_pushunsigned(L, val.u);
+ return 1;
+ case RNN_TTYPE_FLOAT:
+ lua_pushnumber(L, val.f);
+ return 1;
+ case RNN_TTYPE_BOOLEAN:
+ lua_pushboolean(L, val.u);
+ return 1;
+ case RNN_TTYPE_INVALID:
+ default:
+ return 0;
+ }
+
+}
+
+static int l_rnn_etype(lua_State *L, struct rnn *rnn,
+ struct rnndelem *elem, uint64_t offset)
+{
+ int ret;
+ uint32_t regval;
+ DBG("elem=%p (%d), offset=%lu", elem, elem->type, offset);
+ switch (elem->type) {
+ case RNN_ETYPE_REG:
+ /* if a register has no bitfields, just return
+ * the raw value:
+ */
+ regval = rnn_val(rnn, offset);
+ regval <<= elem->typeinfo.shr;
+ ret = pushdecval(L, rnn, regval, &elem->typeinfo);
+ if (ret)
+ return ret;
+ return l_rnn_etype_reg(L, rnn, elem, offset);
+ case RNN_ETYPE_ARRAY:
+ return l_rnn_etype_array(L, rnn, elem, offset);
+ default:
+ /* hmm.. */
+ printf("unhandled type: %d\n", elem->type);
+ return 0;
+ }
+}
+
+/*
+ * Struct Object:
+ * To implement stuff like 'RB_MRT[n].CONTROL' we need a struct-object
+ * to represent the current array index (ie. 'RB_MRT[n]')
+ */
+
+static int l_rnn_struct_meta_index(lua_State *L)
+{
+ struct rnndoff *rnndoff = lua_touserdata(L, 1);
+ const char *name = lua_tostring(L, 2);
+ struct rnndelem *elem = rnndoff->elem;
+ int i;
+
+ for (i = 0; i < elem->subelemsnum; i++) {
+ struct rnndelem *subelem = elem->subelems[i];
+ if (!strcmp(name, subelem->name)) {
+ return l_rnn_etype(L, rnndoff->rnn, subelem,
+ rnndoff->offset + subelem->offset);
+ }
+ }
+
+ return 0;
+}
+
+static const struct luaL_Reg l_meta_rnn_struct[] = {
+ {"__index", l_rnn_struct_meta_index},
+ {NULL, NULL} /* sentinel */
+};
+
+static int l_rnn_etype_struct(lua_State *L, struct rnn *rnn,
+ struct rnndelem *elem, uint64_t offset)
+{
+ push_rnndoff(L, rnn, elem, offset);
+
+ luaL_newmetatable(L, "rnnmetastruct");
+ luaL_setfuncs(L, l_meta_rnn_struct, 0);
+ lua_pop(L, 1);
+
+ luaL_setmetatable(L, "rnnmetastruct");
+
+ return 1;
+}
+
+/*
+ * Array Object:
+ */
+
+static int l_rnn_array_meta_index(lua_State *L)
+{
+ struct rnndoff *rnndoff = lua_touserdata(L, 1);
+ int idx = lua_tointeger(L, 2);
+ struct rnndelem *elem = rnndoff->elem;
+ uint64_t offset = rnndoff->offset + (elem->stride * idx);
+
+ DBG("rnndoff=%p, idx=%d, numsubelems=%d",
+ rnndoff, idx, rnndoff->elem->subelemsnum);
+
+ /* if just a single sub-element, it is directly a register,
+ * otherwise we need to accumulate the array index while
+ * we wait for the register name within the array..
+ */
+ if (elem->subelemsnum == 1) {
+ return l_rnn_etype(L, rnndoff->rnn, elem->subelems[0], offset);
+ } else {
+ return l_rnn_etype_struct(L, rnndoff->rnn, elem, offset);
+ }
+
+ return 0;
+}
+
+static const struct luaL_Reg l_meta_rnn_array[] = {
+ {"__index", l_rnn_array_meta_index},
+ {NULL, NULL} /* sentinel */
+};
+
+static int l_rnn_etype_array(lua_State *L, struct rnn *rnn,
+ struct rnndelem *elem, uint64_t offset)
+{
+ push_rnndoff(L, rnn, elem, offset);
+
+ luaL_newmetatable(L, "rnnmetaarray");
+ luaL_setfuncs(L, l_meta_rnn_array, 0);
+ lua_pop(L, 1);
+
+ luaL_setmetatable(L, "rnnmetaarray");
+
+ return 1;
+}
+
+/*
+ * Register element:
+ */
+
+static int l_rnn_reg_meta_index(lua_State *L)
+{
+ struct rnndoff *rnndoff = lua_touserdata(L, 1);
+ const char *name = lua_tostring(L, 2);
+ struct rnndelem *elem = rnndoff->elem;
+ struct rnntypeinfo *info = &elem->typeinfo;
+ struct rnnbitfield **bitfields;
+ int bitfieldsnum;
+ int i;
+
+ switch (info->type) {
+ case RNN_TTYPE_BITSET:
+ bitfields = info->ebitset->bitfields;
+ bitfieldsnum = info->ebitset->bitfieldsnum;
+ break;
+ case RNN_TTYPE_INLINE_BITSET:
+ bitfields = info->bitfields;
+ bitfieldsnum = info->bitfieldsnum;
+ break;
+ default:
+ printf("invalid register type: %d\n", info->type);
+ return 0;
+ }
+
+ for (i = 0; i < bitfieldsnum; i++) {
+ struct rnnbitfield *bf = bitfields[i];
+ if (!strcmp(name, bf->name)) {
+ uint32_t regval = rnn_val(rnndoff->rnn, rnndoff->offset);
+
+ regval &= typeinfo_mask(&bf->typeinfo);
+ regval >>= bf->typeinfo.low;
+ regval <<= bf->typeinfo.shr;
+
+ DBG("name=%s, info=%p, subelemsnum=%d, type=%d, regval=%x",
+ name, info, rnndoff->elem->subelemsnum,
+ bf->typeinfo.type, regval);
+
+ return pushdecval(L, rnndoff->rnn, regval, &bf->typeinfo);
+ }
+ }
+
+ printf("invalid member: %s\n", name);
+ return 0;
+}
+
+static int l_rnn_reg_meta_tostring(lua_State *L)
+{
+ struct rnndoff *rnndoff = lua_touserdata(L, 1);
+ uint32_t regval = rnn_val(rnndoff->rnn, rnndoff->offset);
+ struct rnndecaddrinfo *info = rnn_reginfo(rnndoff->rnn, rnndoff->offset);
+ char *decoded;
+ if (info && info->typeinfo) {
+ decoded = rnndec_decodeval(rnndoff->rnn->vc,
+ info->typeinfo, regval);
+ } else {
+ asprintf(&decoded, "%08x", regval);
+ }
+ lua_pushstring(L, decoded);
+ free(decoded);
+ if (info) {
+ free(info->name);
+ free(info);
+ }
+ return 1;
+}
+
+static int l_rnn_reg_meta_tonumber(lua_State *L)
+{
+ struct rnndoff *rnndoff = lua_touserdata(L, 1);
+ uint32_t regval = rnn_val(rnndoff->rnn, rnndoff->offset);
+
+ regval <<= rnndoff->elem->typeinfo.shr;
+
+ lua_pushnumber(L, regval);
+ return 1;
+}
+
+static const struct luaL_Reg l_meta_rnn_reg[] = {
+ {"__index", l_rnn_reg_meta_index},
+ {"__tostring", l_rnn_reg_meta_tostring},
+ {"__tonumber", l_rnn_reg_meta_tonumber},
+ {NULL, NULL} /* sentinel */
+};
+
+static int l_rnn_etype_reg(lua_State *L, struct rnn *rnn,
+ struct rnndelem *elem, uint64_t offset)
+{
+ push_rnndoff(L, rnn, elem, offset);
+
+ luaL_newmetatable(L, "rnnmetareg");
+ luaL_setfuncs(L, l_meta_rnn_reg, 0);
+ lua_pop(L, 1);
+
+ luaL_setmetatable(L, "rnnmetareg");
+
+ return 1;
+}
+
+/*
+ *
+ */
+
+static int l_rnn_meta_index(lua_State *L)
+{
+ struct rnn *rnn = lua_touserdata(L, 1);
+ const char *name = lua_tostring(L, 2);
+ struct rnndelem *elem;
+
+ elem = rnn_regelem(rnn, name);
+ if (!elem)
+ return 0;
+
+ return l_rnn_etype(L, rnn, elem, elem->offset);
+}
+
+static int l_rnn_meta_gc(lua_State *L)
+{
+ // TODO
+ //struct rnn *rnn = lua_touserdata(L, 1);
+ //rnn_deinit(rnn);
+ return 0;
+}
+
+static const struct luaL_Reg l_meta_rnn[] = {
+ {"__index", l_rnn_meta_index},
+ {"__gc", l_rnn_meta_gc},
+ {NULL, NULL} /* sentinel */
+};
+
+static int l_rnn_init(lua_State *L)
+{
+ const char *gpuname = lua_tostring(L, 1);
+ struct rnndec *rnndec = lua_newuserdata(L, sizeof(*rnndec));
+ _rnn_init(&rnndec->base, 0);
+ rnn_load(&rnndec->base, gpuname);
+ rnndec->sizedwords = 0;
+
+ luaL_newmetatable(L, "rnnmeta");
+ luaL_setfuncs(L, l_meta_rnn, 0);
+ lua_pop(L, 1);
+
+ luaL_setmetatable(L, "rnnmeta");
+
+ return 1;
+}
+
+static int l_rnn_enumname(lua_State *L)
+{
+ struct rnn *rnn = lua_touserdata(L, 1);
+ const char *name = lua_tostring(L, 2);
+ uint32_t val = (uint32_t)lua_tonumber(L, 3);
+ lua_pushstring(L, rnn_enumname(rnn, name, val));
+ return 1;
+}
+
+static int l_rnn_regname(lua_State *L)
+{
+ struct rnn *rnn = lua_touserdata(L, 1);
+ uint32_t regbase = (uint32_t)lua_tonumber(L, 2);
+ lua_pushstring(L, rnn_regname(rnn, regbase, 1));
+ return 1;
+}
+
+static int l_rnn_regval(lua_State *L)
+{
+ struct rnn *rnn = lua_touserdata(L, 1);
+ uint32_t regbase = (uint32_t)lua_tonumber(L, 2);
+ uint32_t regval = (uint32_t)lua_tonumber(L, 3);
+ struct rnndecaddrinfo *info = rnn_reginfo(rnn, regbase);
+ char *decoded;
+ if (info && info->typeinfo) {
+ decoded = rnndec_decodeval(rnn->vc, info->typeinfo, regval);
+ } else {
+ asprintf(&decoded, "%08x", regval);
+ }
+ lua_pushstring(L, decoded);
+ free(decoded);
+ if (info) {
+ free(info->name);
+ free(info);
+ }
+ return 1;
+}
+
+static const struct luaL_Reg l_rnn[] = {
+ {"init", l_rnn_init},
+ {"enumname", l_rnn_enumname},
+ {"regname", l_rnn_regname},
+ {"regval", l_rnn_regval},
+ {NULL, NULL} /* sentinel */
+};
+
+
+
+/* Expose the register state to script enviroment as a "regs" library:
+ */
+
+static int l_reg_written(lua_State *L)
+{
+ uint32_t regbase = (uint32_t)lua_tonumber(L, 1);
+ lua_pushnumber(L, reg_written(regbase));
+ return 1;
+}
+
+static int l_reg_lastval(lua_State *L)
+{
+ uint32_t regbase = (uint32_t)lua_tonumber(L, 1);
+ lua_pushnumber(L, reg_lastval(regbase));
+ return 1;
+}
+
+static int l_reg_val(lua_State *L)
+{
+ uint32_t regbase = (uint32_t)lua_tonumber(L, 1);
+ lua_pushnumber(L, reg_val(regbase));
+ return 1;
+}
+
+static const struct luaL_Reg l_regs[] = {
+ {"written", l_reg_written},
+ {"lastval", l_reg_lastval},
+ {"val", l_reg_val},
+ {NULL, NULL} /* sentinel */
+};
+
+/* Expose API to lookup snapshot buffers:
+ */
+
+uint64_t gpubaseaddr(uint64_t gpuaddr);
+unsigned hostlen(uint64_t gpuaddr);
+
+/* given address, return base-address of buffer: */
+static int l_bo_base(lua_State *L)
+{
+ uint64_t addr = (uint64_t)lua_tonumber(L, 1);
+ lua_pushnumber(L, gpubaseaddr(addr));
+ return 1;
+}
+
+/* given address, return the remaining size of the buffer: */
+static int l_bo_size(lua_State *L)
+{
+ uint64_t addr = (uint64_t)lua_tonumber(L, 1);
+ lua_pushnumber(L, hostlen(addr));
+ return 1;
+}
+
+static const struct luaL_Reg l_bos[] = {
+ {"base", l_bo_base},
+ {"size", l_bo_size},
+ {NULL, NULL} /* sentinel */
+};
+
+static void openlib(const char *lib, const luaL_Reg *reg)
+{
+ lua_newtable(L);
+ luaL_setfuncs(L, reg, 0);
+ lua_setglobal(L, lib);
+}
+
+/* called at start to load the script: */
+int script_load(const char *file)
+{
+ int ret;
+
+ assert(!L);
+
+ L = luaL_newstate();
+ luaL_openlibs(L);
+ openlib("bos", l_bos);
+ openlib("regs", l_regs);
+ openlib("rnn", l_rnn);
+
+ ret = luaL_loadfile(L, file);
+ if (ret)
+ error("%s\n");
+
+ ret = lua_pcall(L, 0, LUA_MULTRET, 0);
+ if (ret)
+ error("%s\n");
+
+ return 0;
+}
+
+
+/* called at start of each cmdstream file: */
+void script_start_cmdstream(const char *name)
+{
+ if (!L)
+ return;
+
+ lua_getglobal(L, "start_cmdstream");
+
+ /* if no handler just ignore it: */
+ if (!lua_isfunction(L, -1)) {
+ lua_pop(L, 1);
+ return;
+ }
+
+ lua_pushstring(L, name);
+
+ /* do the call (1 arguments, 0 result) */
+ if (lua_pcall(L, 1, 0, 0) != 0)
+ error("error running function `f': %s\n");
+}
+
+/* called at each DRAW_INDX, calls script drawidx fxn to process
+ * the current state
+ */
+void script_draw(const char *primtype, uint32_t nindx)
+{
+ if (!L)
+ return;
+
+ lua_getglobal(L, "draw");
+
+ /* if no handler just ignore it: */
+ if (!lua_isfunction(L, -1)) {
+ lua_pop(L, 1);
+ return;
+ }
+
+ lua_pushstring(L, primtype);
+ lua_pushnumber(L, nindx);
+
+ /* do the call (2 arguments, 0 result) */
+ if (lua_pcall(L, 2, 0, 0) != 0)
+ error("error running function `f': %s\n");
+}
+
+
+static int l_rnn_meta_dom_index(lua_State *L)
+{
+ struct rnn *rnn = lua_touserdata(L, 1);
+ uint32_t offset = (uint32_t)lua_tonumber(L, 2);
+ struct rnndelem *elem;
+
+ /* TODO might be nicer if the arg isn't a number, to search the domain
+ * for matching bitfields.. so that the script could do something like
+ * 'pkt.WIDTH' insteadl of 'pkt[1].WIDTH', ie. not have to remember the
+ * offset of the dword containing the bitfield..
+ */
+
+ elem = rnn_regoff(rnn, offset);
+ if (!elem)
+ return 0;
+
+ return l_rnn_etype(L, rnn, elem, elem->offset);
+}
+
+/*
+ * A wrapper object for rnndomain based decoding of an array of dwords
+ * (ie. for pm4 packet decoding). Mostly re-uses the register-value
+ * decoding for the individual dwords and bitfields.
+ */
+
+static int l_rnn_meta_dom_gc(lua_State *L)
+{
+ // TODO
+ //struct rnn *rnn = lua_touserdata(L, 1);
+ //rnn_deinit(rnn);
+ return 0;
+}
+
+static const struct luaL_Reg l_meta_rnn_dom[] = {
+ {"__index", l_rnn_meta_dom_index},
+ {"__gc", l_rnn_meta_dom_gc},
+ {NULL, NULL} /* sentinel */
+};
+
+/* called to general pm4 packet decoding, such as texture/sampler state
+ */
+void script_packet(uint32_t *dwords, uint32_t sizedwords,
+ struct rnn *rnn, struct rnndomain *dom)
+{
+ if (!L)
+ return;
+
+ lua_getglobal(L, dom->name);
+
+ /* if no handler for the packet, just ignore it: */
+ if (!lua_isfunction(L, -1)) {
+ lua_pop(L, 1);
+ return;
+ }
+
+ struct rnndec *rnndec = lua_newuserdata(L, sizeof(*rnndec));
+
+ rnndec->base = *rnn;
+ rnndec->base.dom[0] = dom;
+ rnndec->base.dom[1] = NULL;
+ rnndec->dwords = dwords;
+ rnndec->sizedwords = sizedwords;
+
+ luaL_newmetatable(L, "rnnmetadom");
+ luaL_setfuncs(L, l_meta_rnn_dom, 0);
+ lua_pop(L, 1);
+
+ luaL_setmetatable(L, "rnnmetadom");
+
+ lua_pushnumber(L, sizedwords);
+
+ if (lua_pcall(L, 2, 0, 0) != 0)
+ error("error running function `f': %s\n");
+}
+
+/* helper to call fxn that takes and returns void: */
+static void simple_call(const char *name)
+{
+ if (!L)
+ return;
+
+ lua_getglobal(L, name);
+
+ /* if no handler just ignore it: */
+ if (!lua_isfunction(L, -1)) {
+ lua_pop(L, 1);
+ return;
+ }
+
+ /* do the call (0 arguments, 0 result) */
+ if (lua_pcall(L, 0, 0, 0) != 0)
+ error("error running function `f': %s\n");
+}
+
+/* called at end of each cmdstream file: */
+void script_end_cmdstream(void)
+{
+ simple_call("end_cmdstream");
+}
+
+/* called at start of submit/issueibcmds: */
+void script_start_submit(void)
+{
+ simple_call("start_submit");
+}
+
+/* called at end of submit/issueibcmds: */
+void script_end_submit(void)
+{
+ simple_call("end_submit");
+}
+
+/* called after last cmdstream file: */
+void script_finish(void)
+{
+ if (!L)
+ return;
+
+ simple_call("finish");
+
+ lua_close(L);
+ L = NULL;
+}
--- /dev/null
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef SCRIPT_H_
+#define SCRIPT_H_
+
+#include <stdint.h>
+
+
+// XXX make script support optional
+#define ENABLE_SCRIPTING 1
+
+#ifdef ENABLE_SCRIPTING
+
+/* called at start to load the script: */
+int script_load(const char *file);
+
+/* called at start of each cmdstream file: */
+void script_start_cmdstream(const char *name);
+
+/* called at each DRAW_INDX, calls script drawidx fxn to process
+ * the current state
+ */
+__attribute__((weak))
+void script_draw(const char *primtype, uint32_t nindx);
+
+struct rnn;
+struct rnndomain;
+__attribute__((weak))
+void script_packet(uint32_t *dwords, uint32_t sizedwords,
+ struct rnn *rnn, struct rnndomain *dom);
+
+/* maybe at some point it is interesting to add additional script
+ * hooks for CP_EVENT_WRITE, etc?
+ */
+
+/* called at end of each cmdstream file: */
+void script_end_cmdstream(void);
+
+void script_start_submit(void);
+void script_end_submit(void);
+
+/* called after last cmdstream file: */
+void script_finish(void);
+
+#else
+// TODO no-op stubs..
+#endif
+
+
+#endif /* SCRIPT_H_ */
--- /dev/null
+-- A script that compares a set of equivalent cmdstream captures from
+-- various generations, looking for equivalencies between registers.
+--
+-- This would be run across a group of similar tests for various
+-- generations, for example:
+--
+-- cffdump --script scripts/analyze.lua a320/quad-flat-*.rd a420/quad-flat-*.rd
+--
+-- This is done by comparing unique register values. Ie. for each
+-- generation, find the set of registers that have different values
+-- between equivalent draw calls.
+
+local posix = require "posix"
+
+io.write("Analyzing Data...\n")
+
+-- results - table structure:
+-- * [gpuname] - gpu
+-- * tests
+-- * [testname] - current test
+-- * draws
+-- * [1..n] - the draws
+-- * primtype - the primitive type
+-- * regs - table of values for draw
+-- * [regbase] - regval
+-- * regvals - table of unique values across all draws
+-- * [regbase]
+-- * [regval] - list of test names
+-- * [1..n] - testname "." didx
+local results = {}
+
+local test = nil
+local gpuname = nil
+local testname = nil
+
+
+-- srsly, no sparse table size() op?
+function tblsz(tbl)
+ local n = 0;
+ for k,v in pairs(tbl) do
+ n = n + 1
+ end
+ return n
+end
+
+
+function start_cmdstream(name)
+ testname = posix.basename(name)
+ gpuname = posix.basename(posix.dirname(name))
+ --io.write("START: gpuname=" .. gpuname .. ", testname=" .. testname .. "\n");
+ local gpu = results[gpuname]
+ if gpu == nil then
+ gpu = {["tests"] = {}, ["regvals"] = {}}
+ results[gpuname] = gpu
+ end
+ test = {["draws"] = {}}
+ gpu["tests"][testname] = test
+end
+
+function draw(primtype, nindx)
+ -- RECTLIST is only used internally.. we want to ignore it for
+ -- now, although it could potentially be interesting to track
+ -- these separately (separating clear/restore/resolve) just to
+ -- figure out which registers are used for which..
+ if primtype == "DI_PT_RECTLIST" then
+ return
+ end
+ local regtbl = {}
+ local draw = {["primtype"] = primtype, ["regs"] = regtbl}
+ local didx = tblsz(test["draws"])
+
+ test["draws"][didx] = draw
+
+ -- populate current regs. For now just consider ones that have
+ -- been written.. maybe we need to make that configurable in
+ -- case it filters out too many registers.
+ for regbase=0,0xffff do
+ if regs.written(regbase) ~= 0 then
+ local regval = regs.val(regbase)
+
+ -- track reg vals per draw:
+ regtbl[regbase] = regval
+
+ -- also track which reg vals appear in which tests:
+ local uniq_regvals = results[gpuname]["regvals"][regbase]
+ if uniq_regvals == nil then
+ uniq_regvals = {}
+ results[gpuname]["regvals"][regbase] = uniq_regvals;
+ end
+ local drawlist = uniq_regvals[regval]
+ if drawlist == nil then
+ drawlist = {}
+ uniq_regvals[regval] = drawlist
+ end
+ table.insert(drawlist, testname .. "." .. didx)
+ end
+ end
+
+ -- TODO maybe we want to whitelist a few well known regs, for the
+ -- convenience of the code that runs at the end to analyze the data?
+ -- TODO also would be useful to somehow capture CP_SET_BIN..
+
+end
+
+function end_cmdstream()
+ test = nil
+ gpuname = nil
+ testname = nil
+end
+
+function print_draws(gpuname, gpu)
+ io.write(" " .. gpuname .. "\n")
+ for testname,test in pairs(gpu["tests"]) do
+ io.write(" " .. testname .. ", draws=" .. #test["draws"] .. "\n")
+ for didx,draw in pairs(test["draws"]) do
+ io.write(" " .. didx .. ": " .. draw["primtype"] .. "\n")
+ end
+ end
+end
+
+-- sort and concat a list of draw names to form a key which can be
+-- compared to other drawlists to check for equality
+-- TODO maybe we instead want a scheme that allows for some fuzzyness
+-- in the matching??
+function drawlistname(drawlist)
+ local name = nil
+ for idx,draw in pairs(drawlist) do
+ if name == nil then
+ name = draw
+ else
+ name = name .. ":" .. draw
+ end
+ end
+ return name
+end
+
+local rnntbl = {}
+
+function dumpmatches(name)
+ for gpuname,gpu in pairs(results) do
+ local r = rnntbl[gpuname]
+ if r == nil then
+ io.write("loading rnn database: \n" .. gpuname)
+ r = rnn.init(gpuname)
+ rnntbl[gpuname] = r
+ end
+ for regbase,regvals in pairs(gpu["regvals"]) do
+ for regval,drawlist in pairs(regvals) do
+ local name2 = drawlistname(drawlist)
+ if name == name2 then
+ io.write(string.format(" %s:%s:\t%08x %s\n",
+ gpuname, rnn.regname(r, regbase),
+ regval, rnn.regval(r, regbase, regval)))
+ end
+ end
+ end
+ end
+end
+
+function finish()
+ -- drawlistnames that we've already dumped:
+ local dumped = {}
+
+ for gpuname,gpu in pairs(results) do
+ -- print_draws(gpuname, gpu)
+ for regbase,regvals in pairs(gpu["regvals"]) do
+ for regval,drawlist in pairs(regvals) do
+ local name = drawlistname(drawlist)
+ if dumped[name] == nil then
+ io.write("\n" .. name .. ":\n")
+ dumpmatches(name)
+ dumped[name] = 1
+ end
+ end
+ end
+ end
+end
+
--- /dev/null
+-- Parse cmdstream dump and analyse blits and batches
+
+--local posix = require "posix"
+
+function printf(fmt, ...)
+ return io.write(string.format(fmt, ...))
+end
+
+function dbg(fmt, ...)
+ --printf(fmt, ...)
+end
+
+printf("Analyzing Data...\n")
+
+local r = rnn.init("a630")
+
+-- Each submit, all draws will target the same N MRTs:
+local mrts = {}
+local allmrts = {} -- includes historical render targets
+function push_mrt(fmt, w, h, samples, base, flag, gmem)
+ dbg("MRT: %s %ux%u 0x%x\n", fmt, w, h, base)
+
+ local mrt = {}
+ mrt.format = fmt
+ mrt.w = w
+ mrt.h = h
+ mrt.samples = samples
+ mrt.base = base
+ mrt.flag = flag
+ mrt.gmem = gmem
+
+ mrts[base] = mrt
+ allmrts[base] = mrt
+end
+
+-- And each each draw will read from M sources/textures:
+local sources = {}
+function push_source(fmt, w, h, samples, base, flag)
+ dbg("SRC: %s %ux%u 0x%x\n", fmt, w, h, base)
+
+ local source = {}
+ source.format = fmt
+ source.w = w
+ source.h = h
+ source.samples = samples
+ source.base = base
+ source.flag = flag
+
+ sources[base] = source
+end
+
+local binw
+local binh
+local nbins
+local blits = 0
+local draws = 0
+local drawmode
+local cleared
+local restored
+local resolved
+local nullbatch
+local depthtest
+local depthwrite
+local stenciltest
+local stencilwrite
+
+function start_cmdstream(name)
+ printf("Parsing %s\n", name)
+end
+
+function reset()
+ dbg("reset\n")
+ mrts = {}
+ sources = {}
+ draws = 0
+ blits = 0
+ cleared = {}
+ restored = {}
+ resolved = {}
+ depthtest = false
+ depthwrite = false
+ stenciltest = false
+ stencilwrite = false
+ drawmode = Nil
+end
+
+function start_submit()
+ dbg("start_submit\n")
+ reset()
+ nullbatch = true
+end
+
+function finish()
+ dbg("finish\n")
+
+ printf("\n")
+
+ -- TODO we get false-positives for 'NULL BATCH!' because we don't have
+ -- a really good way to differentiate between submits and cmds. Ie.
+ -- with growable cmdstream, and a large # of tiles, IB1 can get split
+ -- across multiple buffers. Since we ignore GMEM draws for window-
+ -- offset != 0,0, the later cmds will appear as null batches
+ if draws == 0 and blits == 0 then
+ if nullbatch then
+ printf("NULL BATCH!\n");
+ end
+ return
+ end
+
+ if draws > 0 then
+ printf("Batch:\n")
+ printf("-------\n")
+ printf(" # of draws: %u\n", draws)
+ printf(" mode: %s\n", drawmode)
+ if drawmode == "RM6_GMEM" then
+ printf(" bin size: %ux%u (%u bins)\n", binw, binh, nbins)
+ end
+ if depthtest or depthwrite then
+ printf(" ")
+ if depthtest then
+ printf("DEPTHTEST ")
+ end
+ if depthwrite then
+ printf("DEPTHWRITE")
+ end
+ printf("\n")
+ end
+ if stenciltest or stencilwrite then
+ printf(" ")
+ if stenciltest then
+ printf("STENCILTEST ")
+ end
+ if stencilwrite then
+ printf("STENCILWRITE")
+ end
+ printf("\n")
+ end
+ else
+ printf("Blit:\n")
+ printf("-----\n")
+ end
+
+ for base,mrt in pairs(mrts) do
+ printf(" MRT[0x%x:0x%x]:\t%ux%u\t\t%s (%s)", base, mrt.flag, mrt.w, mrt.h, mrt.format, mrt.samples)
+ if drawmode == "RM6_GMEM" then
+ if cleared[mrt.gmem] then
+ printf("\tCLEARED")
+ end
+ if restored[mrt.gmem] then
+ printf("\tRESTORED")
+ end
+ if resolved[mrt.gmem] then
+ printf("\tRESOLVED")
+ end
+ else
+ if cleared[mrt.base] then
+ printf("\tCLEARED")
+ end
+ end
+ printf("\n")
+ end
+
+ function print_source(source)
+ printf(" SRC[0x%x:0x%x]:\t%ux%u\t\t%s (%s)\n", source.base, source.flag, source.w, source.h, source.format, source.samples)
+ end
+
+ for base,source in pairs(sources) do
+ -- only show sources that have been previously rendered to, other
+ -- textures are less interesting. Possibly this should be an
+ -- option somehow
+ if draws < 10 then
+ print_source(source)
+ elseif allmrts[base] or draws == 0 then
+ print_source(source)
+ elseif source.flag and allmrts[source.flag] then
+ print_source(source)
+ end
+ end
+ reset()
+end
+
+function end_submit()
+ dbg("end_submit\n")
+ finish()
+end
+
+-- Track the current mode:
+local mode = ""
+function CP_SET_MARKER(pkt, size)
+ mode = pkt[0].MARKER
+ dbg("mode: %s\n", mode)
+end
+
+function CP_EVENT_WRITE(pkt, size)
+ if tostring(pkt[0].EVENT) ~= "BLIT" then
+ return
+ end
+ nullbatch = false
+ local m = tostring(mode)
+ if m == "RM6_GMEM" then
+ -- either clear or restore:
+ if r.RB_BLIT_INFO.CLEAR_MASK == 0 then
+ restored[r.RB_BLIT_BASE_GMEM] = 1
+ else
+ cleared[r.RB_BLIT_BASE_GMEM] = 1
+ end
+ -- push_mrt() because we could have GMEM
+ -- passes with only a clear and no draws:
+ local flag = 0
+ local sysmem = 0;
+ -- try to match up the GMEM addr with the MRT/DEPTH state,
+ -- to avoid relying on RB_BLIT_DST also getting written:
+ for n = 0,r.RB_FS_OUTPUT_CNTL1.MRT-1 do
+ if r.RB_MRT[n].BASE_GMEM == r.RB_BLIT_BASE_GMEM then
+ sysmem = r.RB_MRT[n].BASE_LO | (r.RB_MRT[n].BASE_HI << 32)
+ flag = r.RB_MRT_FLAG_BUFFER[n].ADDR_LO | (r.RB_MRT_FLAG_BUFFER[n].ADDR_HI << 32)
+ break
+ end
+ end
+ if sysmem == 0 and r.RB_BLIT_BASE_GMEM == r.RB_DEPTH_BUFFER_BASE_GMEM then
+ sysmem = r.RB_DEPTH_BUFFER_BASE_LO | (r.RB_DEPTH_BUFFER_BASE_HI << 32)
+ flag = r.RB_DEPTH_FLAG_BUFFER_BASE_LO | (r.RB_DEPTH_FLAG_BUFFER_BASE_HI << 32)
+
+ end
+ --NOTE this can get confused by previous blits:
+ --if sysmem == 0 then
+ -- -- fallback:
+ -- sysmem = r.RB_BLIT_DST_LO | (r.RB_BLIT_DST_HI << 32)
+ -- flag = r.RB_BLIT_FLAG_DST_LO | (r.RB_BLIT_FLAG_DST_HI << 32)
+ --end
+ if not r.RB_BLIT_DST_INFO.FLAGS then
+ flag = 0
+ end
+ -- TODO maybe just emit RB_BLIT_DST_LO/HI for clears.. otherwise
+ -- we get confused by stale values in registers.. not sure
+ -- if this is a problem w/ blob
+ push_mrt(r.RB_BLIT_DST_INFO.COLOR_FORMAT,
+ r.RB_BLIT_SCISSOR_BR.X + 1,
+ r.RB_BLIT_SCISSOR_BR.Y + 1,
+ r.RB_BLIT_DST_INFO.SAMPLES,
+ sysmem,
+ flag,
+ r.RB_BLIT_BASE_GMEM)
+ elseif m == "RM6_RESOLVE" then
+ resolved[r.RB_BLIT_BASE_GMEM] = 1
+ else
+ printf("I am confused!!!\n")
+ end
+end
+
+function A6XX_TEX_CONST(pkt, size)
+ push_source(pkt[0].FMT,
+ pkt[1].WIDTH, pkt[1].HEIGHT,
+ pkt[0].SAMPLES,
+ pkt[4].BASE_LO | (pkt[5].BASE_HI << 32),
+ pkt[7].FLAG_LO | (pkt[8].FLAG_HI << 32))
+end
+
+function handle_blit()
+ -- blob sometimes uses CP_BLIT for resolves, so filter those out:
+ -- TODO it would be nice to not hard-code GMEM addr:
+ -- TODO I guess the src can be an offset from GMEM addr..
+ if r.SP_PS_2D_SRC_LO == 0x100000 and not r.RB_2D_BLIT_CNTL.SOLID_COLOR then
+ resolved[0] = 1
+ return
+ end
+ if draws > 0 then
+ finish()
+ end
+ reset()
+ drawmode = "BLIT"
+ -- This kinda assumes that we are doing full img blits, which is maybe
+ -- Not completely legit. We could perhaps instead just track pitch and
+ -- size/pitch?? Or maybe the size doesn't matter much
+ push_mrt(r.RB_2D_DST_INFO.COLOR_FORMAT,
+ r.GRAS_2D_DST_BR.X + 1,
+ r.GRAS_2D_DST_BR.Y + 1,
+ "MSAA_ONE",
+ r.RB_2D_DST_LO | (r.RB_2D_DST_HI << 32),
+ r.RB_2D_DST_FLAGS_LO | (r.RB_2D_DST_FLAGS_HI << 32),
+ -1)
+ if r.RB_2D_BLIT_CNTL.SOLID_COLOR then
+ dbg("CLEAR=%x\n", r.RB_2D_DST_LO | (r.RB_2D_DST_HI << 32))
+ cleared[r.RB_2D_DST_LO | (r.RB_2D_DST_HI << 32)] = 1
+ else
+ push_source(r.SP_2D_SRC_FORMAT.COLOR_FORMAT,
+ r.GRAS_2D_SRC_BR_X.X + 1,
+ r.GRAS_2D_SRC_BR_Y.Y + 1,
+ "MSAA_ONE",
+ r.SP_PS_2D_SRC_LO | (r.SP_PS_2D_SRC_HI << 32),
+ r.SP_PS_2D_SRC_FLAGS_LO | (r.SP_PS_2D_SRC_FLAGS_HI << 32))
+ end
+ blits = blits + 1
+ finish()
+end
+
+function valid_transition(curmode, newmode)
+ if curmode == "RM6_BINNING" and newmode == "RM6_GMEM" then
+ return true
+ end
+ if curmode == "RM6_GMEM" and newmode == "RM6_RESOLVE" then
+ return true
+ end
+ return false
+end
+
+function draw(primtype, nindx)
+ dbg("draw: %s (%s)\n", primtype, mode)
+ nullbatch = false
+ if primtype == "BLIT_OP_SCALE" then
+ handle_blit()
+ return
+ elseif primtype == "EVENT:BLIT" then
+ return
+ end
+
+ local m = tostring(mode)
+
+ -- detect changes in drawmode which indicate a different
+ -- pass.. BINNING->GMEM means same pass, but other
+ -- transitions mean different pass:
+ if drawmode and m ~= drawmode then
+ dbg("%s -> %s transition\n", drawmode, m)
+ if not valid_transition(drawmode, m) then
+ dbg("invalid transition, new render pass!\n")
+ finish()
+ reset()
+ end
+ end
+
+ if m ~= "RM6_GMEM" and m ~= "RM6_BYPASS" then
+ if m == "RM6_BINNING" then
+ drawmode = m
+ return
+ end
+ if m == "RM6_RESOLVE" and primtype == "EVENT:BLIT" then
+ return
+ end
+ printf("unknown MODE %s for primtype %s\n", m, primtype)
+ return
+ end
+
+ -- Only count the first tile for GMEM mode to avoid counting
+ -- each draw for each tile
+ if m == "RM6_GMEM" then
+ if r.RB_WINDOW_OFFSET.X ~= 0 or r.RB_WINDOW_OFFSET.Y ~= 0 then
+ return
+ end
+ end
+
+ drawmode = m
+ local render_components = {}
+ render_components[0] = r.RB_RENDER_COMPONENTS.RT0;
+ render_components[1] = r.RB_RENDER_COMPONENTS.RT1;
+ render_components[2] = r.RB_RENDER_COMPONENTS.RT2;
+ render_components[3] = r.RB_RENDER_COMPONENTS.RT3;
+ render_components[4] = r.RB_RENDER_COMPONENTS.RT4;
+ render_components[5] = r.RB_RENDER_COMPONENTS.RT5;
+ render_components[6] = r.RB_RENDER_COMPONENTS.RT6;
+ render_components[7] = r.RB_RENDER_COMPONENTS.RT7;
+ for n = 0,r.RB_FS_OUTPUT_CNTL1.MRT-1 do
+ if render_components[n] ~= 0 then
+ push_mrt(r.RB_MRT[n].BUF_INFO.COLOR_FORMAT,
+ r.GRAS_SC_SCREEN_SCISSOR[0].BR.X + 1,
+ r.GRAS_SC_SCREEN_SCISSOR[0].BR.Y + 1,
+ r.RB_MSAA_CNTL.SAMPLES,
+ r.RB_MRT[n].BASE_LO | (r.RB_MRT[n].BASE_HI << 32),
+ r.RB_MRT_FLAG_BUFFER[n].ADDR_LO | (r.RB_MRT_FLAG_BUFFER[n].ADDR_HI << 32),
+ r.RB_MRT[n].BASE_GMEM)
+ end
+ end
+
+ local depthbase = r.RB_DEPTH_BUFFER_BASE_LO |
+ (r.RB_DEPTH_BUFFER_BASE_HI << 32)
+
+ if depthbase ~= 0 then
+ push_mrt(r.RB_DEPTH_BUFFER_INFO.DEPTH_FORMAT,
+ r.GRAS_SC_SCREEN_SCISSOR[0].BR.X + 1,
+ r.GRAS_SC_SCREEN_SCISSOR[0].BR.Y + 1,
+ r.RB_MSAA_CNTL.SAMPLES,
+ depthbase,
+ r.RB_DEPTH_FLAG_BUFFER_BASE_LO | (r.RB_DEPTH_FLAG_BUFFER_BASE_HI << 32),
+ r.RB_DEPTH_BUFFER_BASE_GMEM)
+ end
+
+ if r.RB_DEPTH_CNTL.Z_WRITE_ENABLE then
+ depthwrite = true
+ end
+
+ if r.RB_DEPTH_CNTL.Z_ENABLE then
+ depthtest = true
+ end
+
+ -- clearly 0 != false.. :-/
+ if r.RB_STENCILWRMASK.WRMASK ~= 0 then
+ stencilwrite = true
+ end
+
+ if r.RB_STENCIL_CONTROL.STENCIL_ENABLE then
+ stenciltest = true
+ end
+
+ -- TODO should also check for stencil buffer for z32+s8 case
+
+ if m == "RM6_GMEM" then
+ binw = r.VSC_BIN_SIZE.WIDTH
+ binh = r.VSC_BIN_SIZE.HEIGHT
+ nbins = r.VSC_BIN_COUNT.NX * r.VSC_BIN_COUNT.NY
+ end
+
+ draws = draws + 1
+end
+
--- /dev/null
+-- Parse cmdstream dump and check for common errors
+-- 1) Check for overflowing HLSQ_xS_CNTL.CONSTLEN
+-- 2) Check for constant uploades that overwrite each other. The
+-- range checking is reset on each draw, since it is a valid
+-- use-case to do partial constant upload. But if we see two
+-- CP_LOAD_STATE* that overwrite the same range of constants
+-- within the same draw, that is almost certainly unintentional.
+--
+-- TODO add more checks
+-- TODO maybe some parts could be shared across
+-- different generations
+
+--local posix = require "posix"
+
+function printf(fmt, ...)
+ return io.write(string.format(fmt, ...))
+end
+
+function dbg(fmt, ...)
+ --printf(fmt, ...)
+end
+
+stages = {
+ "SB6_VS_SHADER",
+ "SB6_HS_SHADER",
+ "SB6_DS_SHADER",
+ "SB6_GS_SHADER",
+ "SB6_FS_SHADER",
+ "SB6_CS_SHADER",
+}
+
+-- maps shader stage to HLSQ_xS_CNTL register name:
+cntl_regs = {
+ ["SB6_VS_SHADER"] = "HLSQ_VS_CNTL",
+ ["SB6_HS_SHADER"] = "HLSQ_HS_CNTL",
+ ["SB6_DS_SHADER"] = "HLSQ_DS_CNTL",
+ ["SB6_GS_SHADER"] = "HLSQ_GS_CNTL",
+ ["SB6_FS_SHADER"] = "HLSQ_FS_CNTL",
+ ["SB6_CS_SHADER"] = "HLSQ_CS_CNTL",
+}
+
+-- initialize constant updated ranges:
+-- constranges[stagename] -> table of offsets that have been uploaded
+constranges = {}
+function reset_constranges()
+ for i,stage in ipairs(stages) do
+ constranges[stage] = {}
+ end
+end
+
+reset_constranges()
+
+printf("Checking cmdstream...\n")
+
+local r = rnn.init("a630")
+
+function draw(primtype, nindx)
+ printf("draw!\n")
+ -- reset ranges of uploaded consts on each draw:
+ reset_constranges()
+end
+
+function CP_LOAD_STATE6(pkt, size)
+ if tostring(pkt[0].STATE_TYPE) ~= "ST6_CONSTANTS" then
+ return
+ end
+ dbg("got CP_LOAD_STATE6\n")
+ stage = tostring(pkt[0].STATE_BLOCK)
+ max = pkt[0].DST_OFF + pkt[0].NUM_UNIT
+ cntl_reg = cntl_regs[stage]
+ dbg("looking for %s.. max=%d vs %d\n", cntl_reg, max, r[cntl_reg].CONSTLEN)
+ if max > r[cntl_reg].CONSTLEN then
+ printf("ERROR: invalid max constant offset for stage %s: %d vs %d\n", stage, max, r[cntl_reg].CONSTLEN)
+ end
+
+end
--- /dev/null
+io.write("HELLO WORLD\n")
+
+r = rnn.init("a630")
+
+function start_cmdstream(name)
+ io.write("START: " .. name .. "\n")
+end
+
+function draw(primtype, nindx)
+ io.write("DRAW: " .. primtype .. ", " .. nindx .. "\n")
+ -- io.write("GRAS_CL_VPORT_XOFFSET: " .. r.GRAS_CL_VPORT_XOFFSET .. "\n")
+ io.write("RB_MRT[0].CONTROL.ROP_CODE: " .. r.RB_MRT[0].CONTROL.ROP_CODE .. "\n")
+ io.write("SP_VS_OUT[0].A_COMPMASK: " .. r.SP_VS_OUT[0].A_COMPMASK .. "\n")
+ --io.write("RB_DEPTH_CONTROL.Z_ENABLE: " .. tostring(r.RB_DEPTH_CONTROL.Z_ENABLE) .. "\n")
+ io.write("0x2280: written=" .. regs.written(0x2280) .. ", lastval=" .. regs.lastval(0x2280) .. ", val=" .. regs.val(0x2280) .. "\n")
+end
+
+function A6XX_TEX_CONST(pkt, size)
+ io.write("\n-------- " .. size .. "\n")
+ io.write("-------- w=" .. pkt[1].WIDTH .. ", h=" .. pkt[1].HEIGHT .. "\n")
+ io.write("\n");
+end
+
+function end_cmdstream()
+ io.write("END\n")
+end
+
+function finish()
+ io.write("FINISH\n")
+end
+
--- /dev/null
+-- Parse logs from test-quad-textured-3d.c to exctract layer/level
+-- offsets
+--
+-- We figure out the offsets from blits, but there may be some
+-- unrelated blits. So just save all of them until we find the
+-- texture state for the 3d texture. This gives us the base
+-- address, and the miplevel #0 width/height/depth. Then work
+-- backwards from there finding the blits to the same dst buffer
+-- and deducing the miplevel from the minified dimensions
+
+local posix = require "posix"
+
+io.write("Analyzing Data...\n")
+
+local allblits = {}
+local nallblits = 0
+local r = rnn.init("a630")
+
+function minify(val, lvls)
+ val = val >> lvls
+ if val < 1 then
+ return 1
+ end
+ return val
+end
+
+function printf(fmt, ...)
+ return io.write(string.format(fmt, ...))
+end
+
+function start_cmdstream(name)
+ io.write("Parsing " .. name .. "\n")
+ allblits = {}
+ nallblits = 0
+end
+
+function draw(primtype, nindx)
+ if primtype ~= "BLIT_OP_SCALE" then
+ return
+ end
+
+ -- Just in case, filter out anything that isn't starting
+ -- at 0,0
+ if r.GRAS_2D_DST_TL.X ~= 0 or r.GRAS_2D_DST_TL.Y ~= 0 then
+ return
+ end
+
+ local blit = {}
+
+ blit.width = r.GRAS_2D_DST_BR.X + 1
+ blit.height = r.GRAS_2D_DST_BR.Y + 1
+ blit.pitch = r.RB_2D_DST_SIZE.PITCH
+ blit.addr = r.RB_2D_DST_LO | (r.RB_2D_DST_HI << 32)
+ blit.base = bos.base(blit.addr)
+ blit.endaddr = 0 -- filled in later
+ --printf("Found blit: 0x%x (0x%x)\n", blit.addr, blit.base)
+
+ allblits[nallblits] = blit
+ nallblits = nallblits + 1
+end
+
+function A6XX_TEX_CONST(pkt, size)
+ -- ignore any texture state w/ DEPTH=1, these aren't the 3d tex state we
+ -- are looking for
+ if pkt[5].DEPTH <= 1 then
+ return
+ end
+
+ local base = pkt[4].BASE_LO | (pkt[5].BASE_HI << 32)
+ local width0 = pkt[1].WIDTH
+ local height0 = pkt[1].HEIGHT
+ local depth0 = pkt[5].DEPTH
+
+ printf("Found texture state: %ux%ux%u (MIN_LAYERSZ=0x%x)\n",
+ width0, height0, depth0, pkt[3].MIN_LAYERSZ)
+
+ -- Note that in some case the texture has some extra page or so
+ -- at the beginning:
+ local basebase = bos.base(base)
+ printf("base: 0x%x (0x%x)\n", base, basebase)
+
+ -- see if we can find the associated blits.. The blob always seems to
+ -- start from the lower (larger) mipmap levels and layers, so we don't
+ -- need to sort by dst address. Also, while we are at it, fill in the
+ -- end-addr (at least for everything but the last blit)
+ local blits = {}
+ local nblits = 0
+ local lastblit = nil
+ for n = 0,nallblits-1 do
+ local blit = allblits[n]
+ --printf("blit addr: 0x%x (0x%x)\n", blit.addr, blit.base)
+ if blit.base == basebase and blit.addr >= base then
+ blits[nblits] = blit
+ nblits = nblits + 1
+ if lastblit then
+ lastblit.endaddr = blit.addr
+ end
+ lastblit = blit
+ end
+ end
+
+ -- now go thru the relevant blits and print out interesting details
+ local level = 0
+ local layer = 0
+ local w = width0 -- track current width/height to detect changing
+ local h = height0 -- mipmap level
+ for n = 0,nblits-1 do
+ local blit = blits[n]
+ --printf("%u: %ux%u, addr=%x\n", n, blit.width, blit.height, blit.addr)
+ if w ~= blit.width or h ~= blit.height then
+ level = level + 1
+ layer = 0
+
+ if blit.width ~= minify(w, 1) or blit.height ~= minify(h, 1) then
+ printf("I am confused! %ux%u vs %ux%u\n", blit.width, blit.height, minify(w, 1), minify(h, 1))
+ printf("addr=%x\n", blit.addr)
+ --return
+ end
+
+ w = blit.width
+ h = blit.height
+ end
+
+ printf("level=%u, layer=%u, sz=%ux%u, pitch=%u, offset=0x%x, addr=%x",
+ level, layer, w, h, blit.pitch, blit.addr - base, blit.addr)
+ if blit.endaddr ~= 0 then
+ local layersz = blit.endaddr - blit.addr
+ local alignedheight = layersz / blit.pitch
+ printf(", layersz=0x%x, alignedheight=%f", layersz, alignedheight)
+ end
+ printf("\n")
+
+ layer = layer + 1
+ end
+ printf("\n\n")
+end
+
--- /dev/null
+-- Parse logs from https://github.com/freedreno/freedreno/
+-- test-texturator.c to generate a src/freedreno/fdl/fd5_layout_test.c
+-- block. We figure out the offsets from blits, but there may be some
+-- unrelated blits. So just save all of them until we find the
+-- texture state. This gives us the base address, and the miplevel #0
+-- width/height/depth. Then work backwards from there finding the
+-- blits to the same dst buffer and deducing the miplevel from the
+-- minified dimensions
+
+local posix = require "posix"
+
+io.write("Analyzing Data...\n")
+
+local r = rnn.init("a530")
+local found_tex = 0
+
+local allblits = {}
+local nallblits = 0
+
+function get_first_blit(base, width, height)
+ local first_blit = nil
+
+ for n = 0,nallblits-1 do
+ local blit = allblits[n]
+ if blit.base == base and blit.width == width and blit.height == height then
+ if not first_blit or blit.addr < first_blit.addr then
+ first_blit = blit
+ end
+ end
+ end
+
+ return first_blit
+end
+
+function minify(val, lvls)
+ val = val >> lvls
+ if val < 1 then
+ return 1
+ end
+ return val
+end
+
+function printf(fmt, ...)
+ return io.write(string.format(fmt, ...))
+end
+
+function start_cmdstream(name)
+ io.write("Parsing " .. name .. "\n")
+ allblits = {}
+ nallblits = 0
+end
+
+-- Record texture upload blits done through CP_EVENT_WRITE
+function CP_EVENT_WRITE(pkt, size)
+ if tostring(pkt[0].EVENT) ~= "BLIT" then
+ return
+ end
+
+ local blit = {}
+
+ blit.width = r.RB_RESOLVE_CNTL_2.X + 1
+ blit.height = r.RB_RESOLVE_CNTL_2.Y + 1
+ blit.pitch = r.RB_BLIT_DST_PITCH
+ blit.addr = r.RB_BLIT_DST_LO | (r.RB_BLIT_DST_HI << 32)
+ blit.base = bos.base(blit.addr)
+ blit.ubwc_addr = r.RB_BLIT_FLAG_DST_LO | (r.RB_BLIT_FLAG_DST_HI << 32)
+ blit.ubwc_base = bos.base(blit.ubwc_addr)
+ blit.ubwc_pitch = r.RB_BLIT_FLAG_DST_PITCH
+ blit.endaddr = 0 -- filled in later
+ printf("Found event blit: 0x%x (0x%x) %dx%d UBWC 0x%x (0x%x) tiled %s\n", blit.addr, blit.base, blit.width, blit.height, blit.ubwc_addr, blit.ubwc_base, r.RB_RESOLVE_CNTL_3.TILED)
+
+ allblits[nallblits] = blit
+ nallblits = nallblits + 1
+end
+
+function CP_BLIT(pkt, size)
+ -- Just in case, filter out anything that isn't starting
+ -- at 0,0
+ if pkt[1].SRC_X1 ~= 0 or pkt[1].SRC_Y1 ~= 0 then
+ return
+ end
+
+ local blit = {}
+
+ blit.width = pkt[2].SRC_X2 + 1
+ blit.height = pkt[2].SRC_Y2 + 1
+ blit.pitch = r.RB_2D_DST_SIZE.PITCH
+ blit.addr = r.RB_2D_DST_LO | (r.RB_2D_DST_HI << 32)
+ blit.base = bos.base(blit.addr)
+ blit.ubwc_addr = r.RB_2D_DST_FLAGS_LO | (r.RB_2D_DST_FLAGS_HI << 32)
+ blit.ubwc_base = bos.base(blit.ubwc_addr)
+ blit.ubwc_pitch = r.RB_2D_DST_FLAGS_PITCH
+ blit.endaddr = 0 -- filled in later
+ printf("Found cp blit: 0x%x (0x%x) %dx%d UBWC 0x%x (0x%x) %s\n", blit.addr, blit.base, blit.width, blit.height, blit.ubwc_addr, blit.ubwc_base, r.RB_2D_DST_INFO.TILE_MODE)
+
+ allblits[nallblits] = blit
+ nallblits = nallblits + 1
+end
+
+function A5XX_TEX_CONST(pkt, size)
+ -- ignore any texture state w/ DEPTH=1, these aren't the 3d tex state we
+ -- are looking for
+
+ local base = pkt[4].BASE_LO | (pkt[5].BASE_HI << 32)
+ -- UBWC base on a5xx seems to be at the start of each miplevel, followed by pixels
+ -- somewhere past that.
+ local ubwc_base = base
+ local width0 = pkt[1].WIDTH
+ local height0 = pkt[1].HEIGHT
+ local depth0 = pkt[5].DEPTH
+
+ if (found_tex ~= 0) then
+ return
+ end
+ found_tex = 1
+
+ printf("Found texture state:\n %ux%ux%u (%s, %s, UBWC=%s)\n",
+ width0, height0, depth0, pkt[0].FMT, pkt[0].TILE_MODE, tostring(pkt[3].FLAG))
+
+ -- Note that in some case the texture has some extra page or so
+ -- at the beginning:
+ local basebase = bos.base(base)
+ printf("base: 0x%x (0x%x)\n", base, basebase)
+ printf("ubwcbase: 0x%x (0x%x)\n", ubwc_base, bos.base(ubwc_base))
+
+ -- see if we can find the associated blits.. The blob always seems to
+ -- start from the lower (larger) mipmap levels and layers, so we don't
+ -- need to sort by dst address. Also, while we are at it, fill in the
+ -- end-addr (at least for everything but the last blit)
+ local blits = {}
+ local nblits = 0
+ local lastblit = nil
+ for n = 0,nallblits-1 do
+ local blit = allblits[n]
+ --printf("blit addr: 0x%x (0x%x)\n", blit.addr, blit.base)
+ if blit.base == basebase and blit.addr >= base then
+ blits[nblits] = blit
+ nblits = nblits + 1
+ if lastblit then
+ lastblit.endaddr = blit.addr
+ end
+ lastblit = blit
+ end
+ end
+
+ printf(" {\n")
+ printf(" .format = %s,\n", pkt[0].FMT)
+ if (tostring(pkt[2].TYPE) == "A5XX_TEX_3D") then
+ printf(" .is_3d = true,\n")
+ end
+
+ printf(" .layout = {\n")
+ printf(" .tile_mode = %s,\n", pkt[0].TILE_MODE)
+ printf(" .ubwc = %s,\n", tostring(pkt[3].FLAG))
+
+ if (tostring(pkt[2].TYPE) == "A5XX_TEX_3D") then
+ printf(" .width0 = %d, .height0 = %d, .depth0 = %d,\n", width0, height0, depth0)
+ else
+ printf(" .width0 = %d, .height0 = %d,\n", width0, height0)
+ end
+
+ printf(" .slices = {\n")
+ local w = 0
+ local h = 0
+ local level = 0
+ repeat
+ local w = minify(width0, level)
+ local h = minify(height0, level)
+ local blit = get_first_blit(basebase, w, h)
+ if blit then
+ printf(" { .offset = %d, .pitch = %u },\n",
+ blit.addr - base,
+ blit.pitch);
+ end
+ level = level + 1
+ until w == 1 and h == 1
+ printf(" },\n")
+
+ if pkt[3].FLAG then
+ printf(" .ubwc_slices = {\n")
+ level = 0
+ repeat
+ local w = minify(width0, level)
+ local h = minify(height0, level)
+ local blit = get_first_blit(basebase, w, h)
+ if blit then
+ printf(" { .offset = %d, .pitch = %u },\n",
+ blit.ubwc_addr - ubwc_base,
+ blit.ubwc_pitch);
+ end
+ level = level + 1
+ until w == 1 and h == 1
+ printf(" },\n")
+ end
+
+ printf(" },\n")
+ printf(" },\n")
+ printf("\n\n")
+end
+
--- /dev/null
+-- Parse logs from https://github.com/freedreno/freedreno/
+-- test-texturator.c to generate a src/freedreno/fdl/fd6_layout_test.c
+-- block. We figure out the offsets from blits, but there may be some
+-- unrelated blits. So just save all of them until we find the
+-- texture state. This gives us the base address, and the miplevel #0
+-- width/height/depth. Then work backwards from there finding the
+-- blits to the same dst buffer and deducing the miplevel from the
+-- minified dimensions
+
+local posix = require "posix"
+
+io.write("Analyzing Data...\n")
+
+local r = rnn.init("a630")
+local found_tex = 0
+
+local allblits = {}
+local nallblits = 0
+
+function get_first_blit(base, width, height)
+ local first_blit = nil
+
+ for n = 0,nallblits-1 do
+ local blit = allblits[n]
+ if blit.base == base and blit.width == width and blit.height == height then
+ if not first_blit or blit.addr < first_blit.addr then
+ first_blit = blit
+ end
+ end
+ end
+
+ return first_blit
+end
+
+function minify(val, lvls)
+ val = val >> lvls
+ if val < 1 then
+ return 1
+ end
+ return val
+end
+
+function printf(fmt, ...)
+ return io.write(string.format(fmt, ...))
+end
+
+function start_cmdstream(name)
+ io.write("Parsing " .. name .. "\n")
+ allblits = {}
+ nallblits = 0
+end
+
+function draw(primtype, nindx)
+ if primtype ~= "BLIT_OP_SCALE" then
+ return
+ end
+
+ -- Just in case, filter out anything that isn't starting
+ -- at 0,0
+ if r.GRAS_2D_DST_TL.X ~= 0 or r.GRAS_2D_DST_TL.Y ~= 0 then
+ return
+ end
+
+ local blit = {}
+
+ blit.width = r.GRAS_2D_DST_BR.X + 1
+ blit.height = r.GRAS_2D_DST_BR.Y + 1
+ blit.pitch = r.RB_2D_DST_SIZE.PITCH
+ blit.addr = r.RB_2D_DST_LO | (r.RB_2D_DST_HI << 32)
+ blit.base = bos.base(blit.addr)
+ blit.ubwc_addr = r.RB_2D_DST_FLAGS_LO | (r.RB_2D_DST_FLAGS_HI << 32)
+ blit.ubwc_base = bos.base(blit.uwbc_addr)
+ blit.ubwc_pitch = r.RB_2D_DST_FLAGS_PITCH.PITCH
+ blit.endaddr = 0 -- filled in later
+ printf("Found blit: 0x%x (0x%x) %dx%d UBWC 0x%x (0x%x)\n", blit.addr, blit.base, blit.width, blit.height, blit.ubwc_addr, blit.ubwc_base)
+
+ allblits[nallblits] = blit
+ nallblits = nallblits + 1
+end
+
+function A6XX_TEX_CONST(pkt, size)
+ -- ignore any texture state w/ DEPTH=1, these aren't the 3d tex state we
+ -- are looking for
+
+ local base = pkt[4].BASE_LO | (pkt[5].BASE_HI << 32)
+ local ubwc_base = pkt[7].FLAG_LO | (pkt[8].FLAG_HI << 32)
+ local width0 = pkt[1].WIDTH
+ local height0 = pkt[1].HEIGHT
+ local depth0 = pkt[5].DEPTH
+
+ if (found_tex ~= 0) then
+ return
+ end
+ found_tex = 1
+
+ printf("Found texture state:\n %ux%ux%u (%s, %s, MIN_LAYERSZ=0x%x, TILE_ALL=%s, UBWC=%s FLAG_LOG2=%ux%u)\n",
+ width0, height0, depth0, pkt[0].FMT, pkt[0].TILE_MODE, pkt[3].MIN_LAYERSZ, tostring(pkt[3].TILE_ALL), tostring(pkt[3].FLAG), pkt[10].FLAG_BUFFER_LOGW, pkt[10].FLAG_BUFFER_LOGH)
+
+ -- Note that in some case the texture has some extra page or so
+ -- at the beginning:
+ local basebase = bos.base(base)
+ printf("base: 0x%x (0x%x)\n", base, basebase)
+ printf("ubwcbase: 0x%x (0x%x)\n", ubwc_base, bos.base(ubwc_base))
+
+ -- see if we can find the associated blits.. The blob always seems to
+ -- start from the lower (larger) mipmap levels and layers, so we don't
+ -- need to sort by dst address. Also, while we are at it, fill in the
+ -- end-addr (at least for everything but the last blit)
+ local blits = {}
+ local nblits = 0
+ local lastblit = nil
+ for n = 0,nallblits-1 do
+ local blit = allblits[n]
+ --printf("blit addr: 0x%x (0x%x)\n", blit.addr, blit.base)
+ if blit.base == basebase and blit.addr >= base then
+ blits[nblits] = blit
+ nblits = nblits + 1
+ if lastblit then
+ lastblit.endaddr = blit.addr
+ end
+ lastblit = blit
+ end
+ end
+
+ printf(" {\n")
+ printf(" .format = %s,\n", pkt[0].FMT)
+ if (tostring(pkt[2].TYPE) == "A6XX_TEX_3D") then
+ printf(" .is_3d = true,\n")
+ end
+
+ printf(" .layout = {\n")
+ printf(" .tile_mode = %s,\n", pkt[0].TILE_MODE)
+ printf(" .ubwc = %s,\n", tostring(pkt[3].FLAG))
+
+ if (tostring(pkt[2].TYPE) == "A6XX_TEX_3D") then
+ printf(" .width0 = %d, .height0 = %d, .depth = %d,\n", width0, height0, depth0)
+ else
+ printf(" .width0 = %d, .height0 = %d,\n", width0, height0)
+ end
+
+ printf(" .slices = {\n")
+ local w = 0
+ local h = 0
+ local level = 0
+ repeat
+ local w = minify(width0, level)
+ local h = minify(height0, level)
+ local blit = get_first_blit(basebase, w, h)
+ if blit then
+ printf(" { .offset = %d, .pitch = %u },\n",
+ blit.addr - base,
+ blit.pitch);
+ end
+ level = level + 1
+ until w == 1 and h == 1
+ printf(" },\n")
+
+ if pkt[3].FLAG then
+ printf(" .ubwc_slices = {\n")
+ level = 0
+ repeat
+ local w = minify(width0, level)
+ local h = minify(height0, level)
+ local blit = get_first_blit(basebase, w, h)
+ if blit then
+ printf(" { .offset = %d, .pitch = %u },\n",
+ blit.ubwc_addr - ubwc_base,
+ blit.ubwc_pitch);
+ end
+ level = level + 1
+ until w == 1 and h == 1
+ printf(" },\n")
+ end
+
+ printf(" },\n")
+ printf(" },\n")
+ printf("\n\n")
+end
+
--- /dev/null
+/*
+ * Copyright (c) 2012-2018 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __UTIL_H__
+#define __UTIL_H__
+
+#include <ctype.h>
+#include <stdint.h>
+#include <stdio.h>
+
+/* old-style program binary XOR'd ascii w/ 0xff */
+#ifndef ASCII_XOR
+# define ASCII_XOR 0
+#endif
+
+static inline const char *tab(int lvl)
+{
+ const char *TAB = "\t\t\t\t\t\t\t\t\0";
+ return &TAB[strlen(TAB) - lvl];
+}
+
+/* convert float to dword */
+static inline float d2f(uint32_t d)
+{
+ union {
+ float f;
+ uint32_t d;
+ } u = {
+ .d = d,
+ };
+ return u.f;
+}
+
+static inline void dump_hex(const void *buf, int sz)
+{
+ uint8_t *ptr = (uint8_t *)buf;
+ uint8_t *end = ptr + sz;
+ int i = 0;
+
+ while (ptr < end) {
+ uint32_t d = 0;
+
+ printf((i % 8) ? " " : "\t");
+
+ d |= *(ptr++) << 0;
+ d |= *(ptr++) << 8;
+ d |= *(ptr++) << 16;
+ d |= *(ptr++) << 24;
+
+ printf("%08x", d);
+
+ if ((i % 8) == 7) {
+ printf("\n");
+ }
+
+ i++;
+ }
+
+ if (i % 8) {
+ printf("\n");
+ }
+}
+
+static inline void
+dump_float(const void *buf, int sz)
+{
+ uint8_t *ptr = (uint8_t *)buf;
+ uint8_t *end = ptr + sz - 3;
+ int i = 0;
+
+ while (ptr < end) {
+ uint32_t d = 0;
+
+ printf((i % 8) ? " " : "\t");
+
+ d |= *(ptr++) << 0;
+ d |= *(ptr++) << 8;
+ d |= *(ptr++) << 16;
+ d |= *(ptr++) << 24;
+
+ printf("%8f", d2f(d));
+
+ if ((i % 8) == 7) {
+ printf("\n");
+ }
+
+ i++;
+ }
+
+ if (i % 8) {
+ printf("\n");
+ }
+}
+
+#define is_ok_ascii(c) \
+ (isascii(c) && ((c == '\t') || !iscntrl(c)))
+
+static inline void
+clean_ascii(char *buf, int sz)
+{
+ uint8_t *ptr = (uint8_t *)buf;
+ uint8_t *end = ptr + sz;
+ while (ptr < end) {
+ *(ptr++) ^= ASCII_XOR;
+ }
+}
+
+static inline void
+dump_ascii(const void *buf, int sz)
+{
+ uint8_t *ptr = (uint8_t *)buf;
+ uint8_t *end = ptr + sz;
+ printf("\t");
+ while (ptr < end) {
+ uint8_t c = *(ptr++) ^ ASCII_XOR;
+ if (c == '\n') {
+ printf("\n\t");
+ } else if (c == '\0') {
+ printf("\n\t-----------------------------------\n\t");
+ } else if (is_ok_ascii(c)) {
+ printf("%c", c);
+ } else {
+ printf("?");
+ }
+ }
+ printf("\n");
+}
+
+static inline void
+dump_hex_ascii(const void *buf, int sz, int level)
+{
+ uint8_t *ptr = (uint8_t *)buf;
+ uint8_t *end = ptr + sz;
+ uint8_t *ascii = ptr;
+ int i = 0;
+
+ printf("%s-----------------------------------------------\n", tab(level));
+ printf("%s%d (0x%x) bytes\n", tab(level), sz, sz);
+
+ while (ptr < end) {
+ uint32_t d = 0;
+
+ if (i % 4) {
+ printf(" ");
+ } else {
+ printf("%s%06x: ", tab(level), (uint32_t)(ptr - (uint8_t *)buf));
+ }
+
+ d |= *(ptr++) << 0;
+ d |= *(ptr++) << 8;
+ d |= *(ptr++) << 16;
+ d |= *(ptr++) << 24;
+
+ printf("%08x", d);
+
+ if ((i % 4) == 3) {
+ int j;
+ printf("\t|");
+ for (j = 0; j < 16; j++) {
+ uint8_t c = *(ascii++);
+ c ^= ASCII_XOR;
+ printf("%c", (isascii(c) && !iscntrl(c)) ? c : '.');
+ }
+ printf("|\n");
+ }
+
+ i++;
+ }
+
+ if (i % 4) {
+ for (int j = 4 - (i % 4); j > 0; j--) {
+ printf(" ");
+ }
+ printf("\t|");
+ while (ascii < end) {
+ uint8_t c = *(ascii++);
+ c ^= ASCII_XOR;
+ printf("%c", (isascii(c) && !iscntrl(c)) ? c : '.');
+ }
+ printf("|\n");
+ }
+}
+
+#endif /* __UTIL_H__ */
# SOFTWARE.
inc_freedreno = include_directories(['.', './registers'])
+inc_freedreno_rnn = include_directories('rnn')
subdir('common')
subdir('registers')
# Everything that depends on rnn requires (indirectly) libxml2:
if dep_libxml2.found()
subdir('rnn')
+ subdir('decode')
endif
if with_tools.contains('drm-shim')