d0b269578adff26fe95b6a506f755db417e4f5e8
[mesa.git] / src / freedreno / decode / cffdec.c
1 /*
2 * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <err.h>
27 #include <inttypes.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <stdint.h>
31 #include <stdarg.h>
32 #include <stdbool.h>
33 #include <unistd.h>
34 #include <sys/types.h>
35 #include <sys/stat.h>
36 #include <sys/wait.h>
37 #include <fcntl.h>
38 #include <string.h>
39 #include <assert.h>
40 #include <signal.h>
41 #include <errno.h>
42
43 #include "redump.h"
44 #include "disasm.h"
45 #include "script.h"
46 #include "rnnutil.h"
47 #include "buffers.h"
48 #include "cffdec.h"
49
50 /* ************************************************************************* */
51 /* originally based on kernel recovery dump code: */
52
53 static const struct cffdec_options *options;
54
55 static bool needs_wfi = false;
56 static bool summary = false;
57 static bool in_summary = false;
58 static int vertices;
59
60 static inline unsigned regcnt(void)
61 {
62 if (options->gpu_id >= 500)
63 return 0xffff;
64 else
65 return 0x7fff;
66 }
67
68 static int is_64b(void)
69 {
70 return options->gpu_id >= 500;
71 }
72
73
74 static int draws[3];
75 static struct {
76 uint64_t base;
77 uint32_t size; /* in dwords */
78 /* Generally cmdstream consists of multiple IB calls to different
79 * buffers, which are themselves often re-used for each tile. The
80 * triggered flag serves two purposes to help make it more clear
81 * what part of the cmdstream is before vs after the the GPU hang:
82 *
83 * 1) if in IB2 we are passed the point within the IB2 buffer where
84 * the GPU hung, but IB1 is not passed the point within its
85 * buffer where the GPU had hung, then we know the GPU hang
86 * happens on a future use of that IB2 buffer.
87 *
88 * 2) if in an IB1 or IB2 buffer that is not the one where the GPU
89 * hung, but we've already passed the trigger point at the same
90 * IB level, we know that we are passed the point where the GPU
91 * had hung.
92 *
93 * So this is a one way switch, false->true. And a higher #'d
94 * IB level isn't considered triggered unless the lower #'d IB
95 * level is.
96 */
97 bool triggered;
98 } ibs[4];
99 static int ib;
100
101 static int draw_count;
102 static int current_draw_count;
103
104 /* query mode.. to handle symbolic register name queries, we need to
105 * defer parsing query string until after gpu_id is know and rnn db
106 * loaded:
107 */
108 static int *queryvals;
109
110 static bool
111 quiet(int lvl)
112 {
113 if ((options->draw_filter != -1) && (options->draw_filter != current_draw_count))
114 return true;
115 if ((lvl >= 3) && (summary || options->querystrs || options->script))
116 return true;
117 if ((lvl >= 2) && (options->querystrs || options->script))
118 return true;
119 return false;
120 }
121
122 void
123 printl(int lvl, const char *fmt, ...)
124 {
125 va_list args;
126 if (quiet(lvl))
127 return;
128 va_start(args, fmt);
129 vprintf(fmt, args);
130 va_end(args);
131 }
132
133 static const char *levels[] = {
134 "\t",
135 "\t\t",
136 "\t\t\t",
137 "\t\t\t\t",
138 "\t\t\t\t\t",
139 "\t\t\t\t\t\t",
140 "\t\t\t\t\t\t\t",
141 "\t\t\t\t\t\t\t\t",
142 "\t\t\t\t\t\t\t\t\t",
143 "x",
144 "x",
145 "x",
146 "x",
147 "x",
148 "x",
149 };
150
151 enum state_src_t {
152 STATE_SRC_DIRECT,
153 STATE_SRC_INDIRECT,
154 STATE_SRC_BINDLESS,
155 };
156
157 /* SDS (CP_SET_DRAW_STATE) helpers: */
158 static void load_all_groups(int level);
159 static void disable_all_groups(void);
160
161 static void dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, int level);
162 static void dump_tex_const(uint32_t *texsamp, int num_unit, int level);
163
164 static bool
165 highlight_gpuaddr(uint64_t gpuaddr)
166 {
167 if (!options->color)
168 return false;
169
170 if (!options->ibs[ib].base)
171 return false;
172
173 if ((ib > 0) && options->ibs[ib-1].base && !ibs[ib-1].triggered)
174 return false;
175
176 if (ibs[ib].triggered)
177 return true;
178
179 if (options->ibs[ib].base != ibs[ib].base)
180 return false;
181
182 uint64_t start = ibs[ib].base + 4 * (ibs[ib].size - options->ibs[ib].rem);
183 uint64_t end = ibs[ib].base + 4 * ibs[ib].size;
184
185 bool triggered = (start <= gpuaddr) && (gpuaddr <= end);
186
187 ibs[ib].triggered |= triggered;
188
189 if (triggered)
190 printf("ESTIMATED CRASH LOCATION!\n");
191
192 return triggered;
193 }
194
195 static void
196 dump_hex(uint32_t *dwords, uint32_t sizedwords, int level)
197 {
198 int i, j;
199 int lastzero = 1;
200
201 if (quiet(2))
202 return;
203
204 for (i = 0; i < sizedwords; i += 8) {
205 int zero = 1;
206
207 /* always show first row: */
208 if (i == 0)
209 zero = 0;
210
211 for (j = 0; (j < 8) && (i+j < sizedwords) && zero; j++)
212 if (dwords[i+j])
213 zero = 0;
214
215 if (zero && !lastzero)
216 printf("*\n");
217
218 lastzero = zero;
219
220 if (zero)
221 continue;
222
223 uint64_t addr = gpuaddr(&dwords[i]);
224 bool highlight = highlight_gpuaddr(addr);
225
226 if (highlight)
227 printf("\x1b[0;1;31m");
228
229 if (is_64b()) {
230 printf("%016lx:%s", addr, levels[level]);
231 } else {
232 printf("%08x:%s", (uint32_t)addr, levels[level]);
233 }
234
235 if (highlight)
236 printf("\x1b[0m");
237
238 printf("%04x:", i * 4);
239
240 for (j = 0; (j < 8) && (i+j < sizedwords); j++) {
241 printf(" %08x", dwords[i+j]);
242 }
243
244 printf("\n");
245 }
246 }
247
248 static void
249 dump_float(float *dwords, uint32_t sizedwords, int level)
250 {
251 int i;
252 for (i = 0; i < sizedwords; i++) {
253 if ((i % 8) == 0) {
254 if (is_64b()) {
255 printf("%016lx:%s", gpuaddr(dwords), levels[level]);
256 } else {
257 printf("%08x:%s", (uint32_t)gpuaddr(dwords), levels[level]);
258 }
259 } else {
260 printf(" ");
261 }
262 printf("%8f", *(dwords++));
263 if ((i % 8) == 7)
264 printf("\n");
265 }
266 if (i % 8)
267 printf("\n");
268 }
269
270 /* I believe the surface format is low bits:
271 #define RB_COLOR_INFO__COLOR_FORMAT_MASK 0x0000000fL
272 comments in sys2gmem_tex_const indicate that address is [31:12], but
273 looks like at least some of the bits above the format have different meaning..
274 */
275 static void parse_dword_addr(uint32_t dword, uint32_t *gpuaddr,
276 uint32_t *flags, uint32_t mask)
277 {
278 assert(!is_64b()); /* this is only used on a2xx */
279 *gpuaddr = dword & ~mask;
280 *flags = dword & mask;
281 }
282
283 static uint32_t type0_reg_vals[0xffff + 1];
284 static uint8_t type0_reg_rewritten[sizeof(type0_reg_vals)/8]; /* written since last draw */
285 static uint8_t type0_reg_written[sizeof(type0_reg_vals)/8];
286 static uint32_t lastvals[ARRAY_SIZE(type0_reg_vals)];
287
288 static bool reg_rewritten(uint32_t regbase)
289 {
290 return !!(type0_reg_rewritten[regbase/8] & (1 << (regbase % 8)));
291 }
292
293 bool reg_written(uint32_t regbase)
294 {
295 return !!(type0_reg_written[regbase/8] & (1 << (regbase % 8)));
296 }
297
298 static void clear_rewritten(void)
299 {
300 memset(type0_reg_rewritten, 0, sizeof(type0_reg_rewritten));
301 }
302
303 static void clear_written(void)
304 {
305 memset(type0_reg_written, 0, sizeof(type0_reg_written));
306 clear_rewritten();
307 }
308
309 uint32_t reg_lastval(uint32_t regbase)
310 {
311 return lastvals[regbase];
312 }
313
314 static void
315 clear_lastvals(void)
316 {
317 memset(lastvals, 0, sizeof(lastvals));
318 }
319
320 uint32_t
321 reg_val(uint32_t regbase)
322 {
323 return type0_reg_vals[regbase];
324 }
325
326 void
327 reg_set(uint32_t regbase, uint32_t val)
328 {
329 assert(regbase < regcnt());
330 type0_reg_vals[regbase] = val;
331 type0_reg_written[regbase/8] |= (1 << (regbase % 8));
332 type0_reg_rewritten[regbase/8] |= (1 << (regbase % 8));
333 }
334
335 static void
336 reg_dump_scratch(const char *name, uint32_t dword, int level)
337 {
338 unsigned r;
339
340 if (quiet(3))
341 return;
342
343 r = regbase("CP_SCRATCH[0].REG");
344
345 // if not, try old a2xx/a3xx version:
346 if (!r)
347 r = regbase("CP_SCRATCH_REG0");
348
349 if (!r)
350 return;
351
352 printf("%s:%u,%u,%u,%u\n", levels[level],
353 reg_val(r + 4), reg_val(r + 5),
354 reg_val(r + 6), reg_val(r + 7));
355 }
356
357 static void
358 dump_gpuaddr_size(uint64_t gpuaddr, int level, int sizedwords, int quietlvl)
359 {
360 void *buf;
361
362 if (quiet(quietlvl))
363 return;
364
365 buf = hostptr(gpuaddr);
366 if (buf) {
367 dump_hex(buf, sizedwords, level+1);
368 }
369 }
370
371 static void
372 dump_gpuaddr(uint64_t gpuaddr, int level)
373 {
374 dump_gpuaddr_size(gpuaddr, level, 64, 3);
375 }
376
377 static void
378 reg_dump_gpuaddr(const char *name, uint32_t dword, int level)
379 {
380 dump_gpuaddr(dword, level);
381 }
382
383 uint32_t gpuaddr_lo;
384 static void
385 reg_gpuaddr_lo(const char *name, uint32_t dword, int level)
386 {
387 gpuaddr_lo = dword;
388 }
389
390 static void
391 reg_dump_gpuaddr_hi(const char *name, uint32_t dword, int level)
392 {
393 dump_gpuaddr(gpuaddr_lo | (((uint64_t)dword) << 32), level);
394 }
395
396
397 static void
398 dump_shader(const char *ext, void *buf, int bufsz)
399 {
400 if (options->dump_shaders) {
401 static int n = 0;
402 char filename[8];
403 int fd;
404 sprintf(filename, "%04d.%s", n++, ext);
405 fd = open(filename, O_WRONLY| O_TRUNC | O_CREAT, 0644);
406 write(fd, buf, bufsz);
407 close(fd);
408 }
409 }
410
411 static void
412 disasm_gpuaddr(const char *name, uint64_t gpuaddr, int level)
413 {
414 void *buf;
415
416 gpuaddr &= 0xfffffffffffffff0;
417
418 if (quiet(3))
419 return;
420
421 buf = hostptr(gpuaddr);
422 if (buf) {
423 uint32_t sizedwords = hostlen(gpuaddr) / 4;
424 const char *ext;
425
426 dump_hex(buf, min(64, sizedwords), level+1);
427 disasm_a3xx(buf, sizedwords, level+2, stdout, options->gpu_id);
428
429 /* this is a bit ugly way, but oh well.. */
430 if (strstr(name, "SP_VS_OBJ")) {
431 ext = "vo3";
432 } else if (strstr(name, "SP_FS_OBJ")) {
433 ext = "fo3";
434 } else if (strstr(name, "SP_GS_OBJ")) {
435 ext = "go3";
436 } else if (strstr(name, "SP_CS_OBJ")) {
437 ext = "co3";
438 } else {
439 ext = NULL;
440 }
441
442 if (ext)
443 dump_shader(ext, buf, sizedwords * 4);
444 }
445 }
446
447 static void
448 reg_disasm_gpuaddr(const char *name, uint32_t dword, int level)
449 {
450 disasm_gpuaddr(name, dword, level);
451 }
452
453 static void
454 reg_disasm_gpuaddr_hi(const char *name, uint32_t dword, int level)
455 {
456 disasm_gpuaddr(name, gpuaddr_lo | (((uint64_t)dword) << 32), level);
457 }
458
459 /* Find the value of the TEX_COUNT register that corresponds to the named
460 * TEX_SAMP/TEX_CONST reg.
461 *
462 * Note, this kinda assumes an equal # of samplers and textures, but not
463 * really sure if there is a much better option. I suppose on a6xx we
464 * could instead decode the bitfields in SP_xS_CONFIG
465 */
466 static int
467 get_tex_count(const char *name)
468 {
469 char count_reg[strlen(name) + 5];
470 char *p;
471
472 p = strstr(name, "CONST");
473 if (!p)
474 p = strstr(name, "SAMP");
475 if (!p)
476 return 0;
477
478 int n = p - name;
479 strncpy(count_reg, name, n);
480 strcpy(count_reg + n, "COUNT");
481
482 return reg_val(regbase(count_reg));
483 }
484
485 static void
486 reg_dump_tex_samp_hi(const char *name, uint32_t dword, int level)
487 {
488 if (!in_summary)
489 return;
490
491 int num_unit = get_tex_count(name);
492 uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
493 void *buf = hostptr(gpuaddr);
494
495 if (!buf)
496 return;
497
498 dump_tex_samp(buf, STATE_SRC_DIRECT, num_unit, level+1);
499 }
500
501 static void
502 reg_dump_tex_const_hi(const char *name, uint32_t dword, int level)
503 {
504 if (!in_summary)
505 return;
506
507 int num_unit = get_tex_count(name);
508 uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
509 void *buf = hostptr(gpuaddr);
510
511 if (!buf)
512 return;
513
514 dump_tex_const(buf, num_unit, level+1);
515 }
516
517 /*
518 * Registers with special handling (rnndec_decode() handles rest):
519 */
520 #define REG(x, fxn) { #x, fxn }
521 static struct {
522 const char *regname;
523 void (*fxn)(const char *name, uint32_t dword, int level);
524 uint32_t regbase;
525 } reg_a2xx[] = {
526 REG(CP_SCRATCH_REG0, reg_dump_scratch),
527 REG(CP_SCRATCH_REG1, reg_dump_scratch),
528 REG(CP_SCRATCH_REG2, reg_dump_scratch),
529 REG(CP_SCRATCH_REG3, reg_dump_scratch),
530 REG(CP_SCRATCH_REG4, reg_dump_scratch),
531 REG(CP_SCRATCH_REG5, reg_dump_scratch),
532 REG(CP_SCRATCH_REG6, reg_dump_scratch),
533 REG(CP_SCRATCH_REG7, reg_dump_scratch),
534 {NULL},
535 }, reg_a3xx[] = {
536 REG(CP_SCRATCH_REG0, reg_dump_scratch),
537 REG(CP_SCRATCH_REG1, reg_dump_scratch),
538 REG(CP_SCRATCH_REG2, reg_dump_scratch),
539 REG(CP_SCRATCH_REG3, reg_dump_scratch),
540 REG(CP_SCRATCH_REG4, reg_dump_scratch),
541 REG(CP_SCRATCH_REG5, reg_dump_scratch),
542 REG(CP_SCRATCH_REG6, reg_dump_scratch),
543 REG(CP_SCRATCH_REG7, reg_dump_scratch),
544 REG(VSC_SIZE_ADDRESS, reg_dump_gpuaddr),
545 REG(SP_VS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
546 REG(SP_FS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
547 REG(SP_VS_OBJ_START_REG, reg_disasm_gpuaddr),
548 REG(SP_FS_OBJ_START_REG, reg_disasm_gpuaddr),
549 REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
550 {NULL},
551 }, reg_a4xx[] = {
552 REG(CP_SCRATCH[0].REG, reg_dump_scratch),
553 REG(CP_SCRATCH[0x1].REG, reg_dump_scratch),
554 REG(CP_SCRATCH[0x2].REG, reg_dump_scratch),
555 REG(CP_SCRATCH[0x3].REG, reg_dump_scratch),
556 REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
557 REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
558 REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
559 REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
560 REG(SP_VS_PVT_MEM_ADDR, reg_dump_gpuaddr),
561 REG(SP_FS_PVT_MEM_ADDR, reg_dump_gpuaddr),
562 REG(SP_GS_PVT_MEM_ADDR, reg_dump_gpuaddr),
563 REG(SP_HS_PVT_MEM_ADDR, reg_dump_gpuaddr),
564 REG(SP_DS_PVT_MEM_ADDR, reg_dump_gpuaddr),
565 REG(SP_CS_PVT_MEM_ADDR, reg_dump_gpuaddr),
566 REG(SP_VS_OBJ_START, reg_disasm_gpuaddr),
567 REG(SP_FS_OBJ_START, reg_disasm_gpuaddr),
568 REG(SP_GS_OBJ_START, reg_disasm_gpuaddr),
569 REG(SP_HS_OBJ_START, reg_disasm_gpuaddr),
570 REG(SP_DS_OBJ_START, reg_disasm_gpuaddr),
571 REG(SP_CS_OBJ_START, reg_disasm_gpuaddr),
572 REG(TPL1_TP_VS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
573 REG(TPL1_TP_HS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
574 REG(TPL1_TP_DS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
575 REG(TPL1_TP_GS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
576 REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
577 {NULL},
578 }, reg_a5xx[] = {
579 REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
580 REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
581 REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
582 REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
583 REG(SP_VS_OBJ_START_LO, reg_gpuaddr_lo),
584 REG(SP_VS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
585 REG(SP_HS_OBJ_START_LO, reg_gpuaddr_lo),
586 REG(SP_HS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
587 REG(SP_DS_OBJ_START_LO, reg_gpuaddr_lo),
588 REG(SP_DS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
589 REG(SP_GS_OBJ_START_LO, reg_gpuaddr_lo),
590 REG(SP_GS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
591 REG(SP_FS_OBJ_START_LO, reg_gpuaddr_lo),
592 REG(SP_FS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
593 REG(SP_CS_OBJ_START_LO, reg_gpuaddr_lo),
594 REG(SP_CS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
595 REG(TPL1_VS_TEX_CONST_LO, reg_gpuaddr_lo),
596 REG(TPL1_VS_TEX_CONST_HI, reg_dump_tex_const_hi),
597 REG(TPL1_VS_TEX_SAMP_LO, reg_gpuaddr_lo),
598 REG(TPL1_VS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
599 REG(TPL1_HS_TEX_CONST_LO, reg_gpuaddr_lo),
600 REG(TPL1_HS_TEX_CONST_HI, reg_dump_tex_const_hi),
601 REG(TPL1_HS_TEX_SAMP_LO, reg_gpuaddr_lo),
602 REG(TPL1_HS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
603 REG(TPL1_DS_TEX_CONST_LO, reg_gpuaddr_lo),
604 REG(TPL1_DS_TEX_CONST_HI, reg_dump_tex_const_hi),
605 REG(TPL1_DS_TEX_SAMP_LO, reg_gpuaddr_lo),
606 REG(TPL1_DS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
607 REG(TPL1_GS_TEX_CONST_LO, reg_gpuaddr_lo),
608 REG(TPL1_GS_TEX_CONST_HI, reg_dump_tex_const_hi),
609 REG(TPL1_GS_TEX_SAMP_LO, reg_gpuaddr_lo),
610 REG(TPL1_GS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
611 REG(TPL1_FS_TEX_CONST_LO, reg_gpuaddr_lo),
612 REG(TPL1_FS_TEX_CONST_HI, reg_dump_tex_const_hi),
613 REG(TPL1_FS_TEX_SAMP_LO, reg_gpuaddr_lo),
614 REG(TPL1_FS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
615 REG(TPL1_CS_TEX_CONST_LO, reg_gpuaddr_lo),
616 REG(TPL1_CS_TEX_CONST_HI, reg_dump_tex_const_hi),
617 REG(TPL1_CS_TEX_SAMP_LO, reg_gpuaddr_lo),
618 REG(TPL1_CS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
619 REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_LO, reg_gpuaddr_lo),
620 REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_HI, reg_dump_gpuaddr_hi),
621 // REG(RB_MRT_FLAG_BUFFER[0].ADDR_LO, reg_gpuaddr_lo),
622 // REG(RB_MRT_FLAG_BUFFER[0].ADDR_HI, reg_dump_gpuaddr_hi),
623 // REG(RB_MRT_FLAG_BUFFER[1].ADDR_LO, reg_gpuaddr_lo),
624 // REG(RB_MRT_FLAG_BUFFER[1].ADDR_HI, reg_dump_gpuaddr_hi),
625 // REG(RB_MRT_FLAG_BUFFER[2].ADDR_LO, reg_gpuaddr_lo),
626 // REG(RB_MRT_FLAG_BUFFER[2].ADDR_HI, reg_dump_gpuaddr_hi),
627 // REG(RB_MRT_FLAG_BUFFER[3].ADDR_LO, reg_gpuaddr_lo),
628 // REG(RB_MRT_FLAG_BUFFER[3].ADDR_HI, reg_dump_gpuaddr_hi),
629 // REG(RB_MRT_FLAG_BUFFER[4].ADDR_LO, reg_gpuaddr_lo),
630 // REG(RB_MRT_FLAG_BUFFER[4].ADDR_HI, reg_dump_gpuaddr_hi),
631 // REG(RB_MRT_FLAG_BUFFER[5].ADDR_LO, reg_gpuaddr_lo),
632 // REG(RB_MRT_FLAG_BUFFER[5].ADDR_HI, reg_dump_gpuaddr_hi),
633 // REG(RB_MRT_FLAG_BUFFER[6].ADDR_LO, reg_gpuaddr_lo),
634 // REG(RB_MRT_FLAG_BUFFER[6].ADDR_HI, reg_dump_gpuaddr_hi),
635 // REG(RB_MRT_FLAG_BUFFER[7].ADDR_LO, reg_gpuaddr_lo),
636 // REG(RB_MRT_FLAG_BUFFER[7].ADDR_HI, reg_dump_gpuaddr_hi),
637 // REG(RB_BLIT_FLAG_DST_LO, reg_gpuaddr_lo),
638 // REG(RB_BLIT_FLAG_DST_HI, reg_dump_gpuaddr_hi),
639 // REG(RB_MRT[0].BASE_LO, reg_gpuaddr_lo),
640 // REG(RB_MRT[0].BASE_HI, reg_dump_gpuaddr_hi),
641 // REG(RB_DEPTH_BUFFER_BASE_LO, reg_gpuaddr_lo),
642 // REG(RB_DEPTH_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
643 // REG(RB_DEPTH_FLAG_BUFFER_BASE_LO, reg_gpuaddr_lo),
644 // REG(RB_DEPTH_FLAG_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
645 // REG(RB_BLIT_DST_LO, reg_gpuaddr_lo),
646 // REG(RB_BLIT_DST_HI, reg_dump_gpuaddr_hi),
647
648 // REG(RB_2D_SRC_LO, reg_gpuaddr_lo),
649 // REG(RB_2D_SRC_HI, reg_dump_gpuaddr_hi),
650 // REG(RB_2D_SRC_FLAGS_LO, reg_gpuaddr_lo),
651 // REG(RB_2D_SRC_FLAGS_HI, reg_dump_gpuaddr_hi),
652 // REG(RB_2D_DST_LO, reg_gpuaddr_lo),
653 // REG(RB_2D_DST_HI, reg_dump_gpuaddr_hi),
654 // REG(RB_2D_DST_FLAGS_LO, reg_gpuaddr_lo),
655 // REG(RB_2D_DST_FLAGS_HI, reg_dump_gpuaddr_hi),
656
657 {NULL},
658 }, reg_a6xx[] = {
659 REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
660 REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
661 REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
662 REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
663
664 REG(SP_VS_OBJ_START_LO, reg_gpuaddr_lo),
665 REG(SP_VS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
666 REG(SP_HS_OBJ_START_LO, reg_gpuaddr_lo),
667 REG(SP_HS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
668 REG(SP_DS_OBJ_START_LO, reg_gpuaddr_lo),
669 REG(SP_DS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
670 REG(SP_GS_OBJ_START_LO, reg_gpuaddr_lo),
671 REG(SP_GS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
672 REG(SP_FS_OBJ_START_LO, reg_gpuaddr_lo),
673 REG(SP_FS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
674 REG(SP_CS_OBJ_START_LO, reg_gpuaddr_lo),
675 REG(SP_CS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
676
677 REG(SP_VS_TEX_CONST_LO, reg_gpuaddr_lo),
678 REG(SP_VS_TEX_CONST_HI, reg_dump_tex_const_hi),
679 REG(SP_VS_TEX_SAMP_LO, reg_gpuaddr_lo),
680 REG(SP_VS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
681 REG(SP_HS_TEX_CONST_LO, reg_gpuaddr_lo),
682 REG(SP_HS_TEX_CONST_HI, reg_dump_tex_const_hi),
683 REG(SP_HS_TEX_SAMP_LO, reg_gpuaddr_lo),
684 REG(SP_HS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
685 REG(SP_DS_TEX_CONST_LO, reg_gpuaddr_lo),
686 REG(SP_DS_TEX_CONST_HI, reg_dump_tex_const_hi),
687 REG(SP_DS_TEX_SAMP_LO, reg_gpuaddr_lo),
688 REG(SP_DS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
689 REG(SP_GS_TEX_CONST_LO, reg_gpuaddr_lo),
690 REG(SP_GS_TEX_CONST_HI, reg_dump_tex_const_hi),
691 REG(SP_GS_TEX_SAMP_LO, reg_gpuaddr_lo),
692 REG(SP_GS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
693 REG(SP_FS_TEX_CONST_LO, reg_gpuaddr_lo),
694 REG(SP_FS_TEX_CONST_HI, reg_dump_tex_const_hi),
695 REG(SP_FS_TEX_SAMP_LO, reg_gpuaddr_lo),
696 REG(SP_FS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
697 REG(SP_CS_TEX_CONST_LO, reg_gpuaddr_lo),
698 REG(SP_CS_TEX_CONST_HI, reg_dump_tex_const_hi),
699 REG(SP_CS_TEX_SAMP_LO, reg_gpuaddr_lo),
700 REG(SP_CS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
701
702 {NULL},
703 }, *type0_reg;
704
705 static struct rnn *rnn;
706
707 static void
708 init_rnn(const char *gpuname)
709 {
710 rnn = rnn_new(!options->color);
711
712 rnn_load(rnn, gpuname);
713
714 if (options->querystrs) {
715 int i;
716 queryvals = calloc(options->nquery, sizeof(queryvals[0]));
717
718 for (i = 0; i < options->nquery; i++) {
719 int val = strtol(options->querystrs[i], NULL, 0);
720
721 if (val == 0)
722 val = regbase(options->querystrs[i]);
723
724 queryvals[i] = val;
725 printf("querystr: %s -> 0x%x\n", options->querystrs[i], queryvals[i]);
726 }
727 }
728
729 for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
730 type0_reg[idx].regbase = regbase(type0_reg[idx].regname);
731 if (!type0_reg[idx].regbase) {
732 printf("invalid register name: %s\n", type0_reg[idx].regname);
733 exit(1);
734 }
735 }
736 }
737
738 void
739 reset_regs(void)
740 {
741 clear_written();
742 clear_lastvals();
743 memset(&ibs, 0, sizeof(ibs));
744 }
745
746 void
747 cffdec_init(const struct cffdec_options *_options)
748 {
749 options = _options;
750 summary = options->summary;
751
752 /* in case we're decoding multiple files: */
753 free(queryvals);
754 reset_regs();
755 draw_count = 0;
756
757 /* TODO we need an API to free/cleanup any previous rnn */
758
759 switch (options->gpu_id) {
760 case 200 ... 299:
761 type0_reg = reg_a2xx;
762 init_rnn("a2xx");
763 break;
764 case 300 ... 399:
765 type0_reg = reg_a3xx;
766 init_rnn("a3xx");
767 break;
768 case 400 ... 499:
769 type0_reg = reg_a4xx;
770 init_rnn("a4xx");
771 break;
772 case 500 ... 599:
773 type0_reg = reg_a5xx;
774 init_rnn("a5xx");
775 break;
776 case 600 ... 699:
777 type0_reg = reg_a6xx;
778 init_rnn("a6xx");
779 break;
780 default:
781 errx(-1, "unsupported gpu");
782 }
783 }
784
785 const char *
786 pktname(unsigned opc)
787 {
788 return rnn_enumname(rnn, "adreno_pm4_type3_packets", opc);
789 }
790
791 const char *
792 regname(uint32_t regbase, int color)
793 {
794 return rnn_regname(rnn, regbase, color);
795 }
796
797 uint32_t
798 regbase(const char *name)
799 {
800 return rnn_regbase(rnn, name);
801 }
802
803 static int
804 endswith(uint32_t regbase, const char *suffix)
805 {
806 const char *name = regname(regbase, 0);
807 const char *s = strstr(name, suffix);
808 if (!s)
809 return 0;
810 return (s - strlen(name) + strlen(suffix)) == name;
811 }
812
813 void
814 dump_register_val(uint32_t regbase, uint32_t dword, int level)
815 {
816 struct rnndecaddrinfo *info = rnn_reginfo(rnn, regbase);
817
818 if (info && info->typeinfo) {
819 uint64_t gpuaddr = 0;
820 char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, dword);
821 printf("%s%s: %s", levels[level], info->name, decoded);
822
823 /* Try and figure out if we are looking at a gpuaddr.. this
824 * might be useful for other gen's too, but at least a5xx has
825 * the _HI/_LO suffix we can look for. Maybe a better approach
826 * would be some special annotation in the xml..
827 */
828 if (options->gpu_id >= 500) {
829 if (endswith(regbase, "_HI") && endswith(regbase-1, "_LO")) {
830 gpuaddr = (((uint64_t)dword) << 32) | reg_val(regbase-1);
831 } else if (endswith(regbase, "_LO") && endswith(regbase+1, "_HI")) {
832 gpuaddr = (((uint64_t)reg_val(regbase+1)) << 32) | dword;
833 }
834 }
835
836 if (gpuaddr && hostptr(gpuaddr)) {
837 printf("\t\tbase=%lx, offset=%lu, size=%u",
838 gpubaseaddr(gpuaddr),
839 gpuaddr - gpubaseaddr(gpuaddr),
840 hostlen(gpubaseaddr(gpuaddr)));
841 }
842
843 printf("\n");
844
845 free(decoded);
846 } else if (info) {
847 printf("%s%s: %08x\n", levels[level], info->name, dword);
848 } else {
849 printf("%s<%04x>: %08x\n", levels[level], regbase, dword);
850 }
851
852 if (info) {
853 free(info->name);
854 free(info);
855 }
856 }
857
858 static void
859 dump_register(uint32_t regbase, uint32_t dword, int level)
860 {
861 if (!quiet(3)) {
862 dump_register_val(regbase, dword, level);
863 }
864
865 for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
866 if (type0_reg[idx].regbase == regbase) {
867 type0_reg[idx].fxn(type0_reg[idx].regname, dword, level);
868 break;
869 }
870 }
871 }
872
873 static bool
874 is_banked_reg(uint32_t regbase)
875 {
876 return (0x2000 <= regbase) && (regbase < 0x2400);
877 }
878
879 static void
880 dump_registers(uint32_t regbase, uint32_t *dwords, uint32_t sizedwords, int level)
881 {
882 while (sizedwords--) {
883 int last_summary = summary;
884
885 /* access to non-banked registers needs a WFI:
886 * TODO banked register range for a2xx??
887 */
888 if (needs_wfi && !is_banked_reg(regbase))
889 printl(2, "NEEDS WFI: %s (%x)\n", regname(regbase, 1), regbase);
890
891 reg_set(regbase, *dwords);
892 dump_register(regbase, *dwords, level);
893 regbase++;
894 dwords++;
895 summary = last_summary;
896 }
897 }
898
899 static void
900 dump_domain(uint32_t *dwords, uint32_t sizedwords, int level,
901 const char *name)
902 {
903 struct rnndomain *dom;
904 int i;
905
906 dom = rnn_finddomain(rnn->db, name);
907
908 if (!dom)
909 return;
910
911 if (script_packet)
912 script_packet(dwords, sizedwords, rnn, dom);
913
914 if (quiet(2))
915 return;
916
917 for (i = 0; i < sizedwords; i++) {
918 struct rnndecaddrinfo *info = rnndec_decodeaddr(rnn->vc, dom, i, 0);
919 char *decoded;
920 if (!(info && info->typeinfo))
921 break;
922 uint64_t value = dwords[i];
923 if (info->typeinfo->high >= 32 && i < sizedwords - 1) {
924 value |= (uint64_t) dwords[i + 1] << 32;
925 i++; /* skip the next dword since we're printing it now */
926 }
927 decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value);
928 /* Unlike the register printing path, we don't print the name
929 * of the register, so if it doesn't contain other named
930 * things (i.e. it isn't a bitset) then print the register
931 * name as if it's a bitset with a single entry. This avoids
932 * having to create a dummy register with a single entry to
933 * get a name in the decoding.
934 */
935 if (info->typeinfo->type == RNN_TTYPE_BITSET ||
936 info->typeinfo->type == RNN_TTYPE_INLINE_BITSET) {
937 printf("%s%s\n", levels[level], decoded);
938 } else {
939 printf("%s{ %s%s%s = %s }\n", levels[level],
940 rnn->vc->colors->rname, info->name,
941 rnn->vc->colors->reset, decoded);
942 }
943 free(decoded);
944 free(info->name);
945 free(info);
946 }
947 }
948
949
950 static uint32_t bin_x1, bin_x2, bin_y1, bin_y2;
951 static unsigned mode;
952 static const char *render_mode;
953 static enum {
954 MODE_BINNING = 0x1,
955 MODE_GMEM = 0x2,
956 MODE_BYPASS = 0x4,
957 MODE_ALL = MODE_BINNING | MODE_GMEM | MODE_BYPASS,
958 } enable_mask = MODE_ALL;
959 static bool skip_ib2_enable_global;
960 static bool skip_ib2_enable_local;
961
962 static void
963 print_mode(int level)
964 {
965 if ((options->gpu_id >= 500) && !quiet(2)) {
966 printf("%smode: %s\n", levels[level], render_mode);
967 printf("%sskip_ib2: g=%d, l=%d\n", levels[level], skip_ib2_enable_global, skip_ib2_enable_local);
968 }
969 }
970
971 static bool
972 skip_query(void)
973 {
974 switch (options->query_mode) {
975 case QUERY_ALL:
976 /* never skip: */
977 return false;
978 case QUERY_WRITTEN:
979 for (int i = 0; i < options->nquery; i++) {
980 uint32_t regbase = queryvals[i];
981 if (!reg_written(regbase)) {
982 continue;
983 }
984 if (reg_rewritten(regbase)) {
985 return false;
986 }
987 }
988 return true;
989 case QUERY_DELTA:
990 for (int i = 0; i < options->nquery; i++) {
991 uint32_t regbase = queryvals[i];
992 if (!reg_written(regbase)) {
993 continue;
994 }
995 uint32_t lastval = reg_val(regbase);
996 if (lastval != lastvals[regbase]) {
997 return false;
998 }
999 }
1000 return true;
1001 }
1002 return true;
1003 }
1004
1005 static void
1006 __do_query(const char *primtype, uint32_t num_indices)
1007 {
1008 int n = 0;
1009
1010 if ((500 <= options->gpu_id) && (options->gpu_id < 700)) {
1011 uint32_t scissor_tl = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_TL"));
1012 uint32_t scissor_br = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_BR"));
1013
1014 bin_x1 = scissor_tl & 0xffff;
1015 bin_y1 = scissor_tl >> 16;
1016 bin_x2 = scissor_br & 0xffff;
1017 bin_y2 = scissor_br >> 16;
1018 }
1019
1020 for (int i = 0; i < options->nquery; i++) {
1021 uint32_t regbase = queryvals[i];
1022 if (reg_written(regbase)) {
1023 uint32_t lastval = reg_val(regbase);
1024 printf("%4d: %s(%u,%u-%u,%u):%u:", draw_count, primtype,
1025 bin_x1, bin_y1, bin_x2, bin_y2, num_indices);
1026 if (options->gpu_id >= 500)
1027 printf("%s:", render_mode);
1028 printf("\t%08x", lastval);
1029 if (lastval != lastvals[regbase]) {
1030 printf("!");
1031 } else {
1032 printf(" ");
1033 }
1034 if (reg_rewritten(regbase)) {
1035 printf("+");
1036 } else {
1037 printf(" ");
1038 }
1039 dump_register_val(regbase, lastval, 0);
1040 n++;
1041 }
1042 }
1043
1044 if (n > 1)
1045 printf("\n");
1046 }
1047
1048 static void
1049 do_query_compare(const char *primtype, uint32_t num_indices)
1050 {
1051 unsigned saved_enable_mask = enable_mask;
1052 const char *saved_render_mode = render_mode;
1053
1054 /* in 'query-compare' mode, we want to see if the register is writtten
1055 * or changed in any mode:
1056 *
1057 * (NOTE: this could cause false-positive for 'query-delta' if the reg
1058 * is written with different values in binning vs sysmem/gmem mode, as
1059 * we don't track previous values per-mode, but I think we can live with
1060 * that)
1061 */
1062 enable_mask = MODE_ALL;
1063
1064 clear_rewritten();
1065 load_all_groups(0);
1066
1067 if (!skip_query()) {
1068 /* dump binning pass values: */
1069 enable_mask = MODE_BINNING;
1070 render_mode = "BINNING";
1071 clear_rewritten();
1072 load_all_groups(0);
1073 __do_query(primtype, num_indices);
1074
1075 /* dump draw pass values: */
1076 enable_mask = MODE_GMEM | MODE_BYPASS;
1077 render_mode = "DRAW";
1078 clear_rewritten();
1079 load_all_groups(0);
1080 __do_query(primtype, num_indices);
1081
1082 printf("\n");
1083 }
1084
1085 enable_mask = saved_enable_mask;
1086 render_mode = saved_render_mode;
1087
1088 disable_all_groups();
1089 }
1090
1091 /* well, actually query and script..
1092 * NOTE: call this before dump_register_summary()
1093 */
1094 static void
1095 do_query(const char *primtype, uint32_t num_indices)
1096 {
1097 if (script_draw)
1098 script_draw(primtype, num_indices);
1099
1100 if (options->query_compare) {
1101 do_query_compare(primtype, num_indices);
1102 return;
1103 }
1104
1105 if (skip_query())
1106 return;
1107
1108 __do_query(primtype, num_indices);
1109 }
1110
1111 static void
1112 cp_im_loadi(uint32_t *dwords, uint32_t sizedwords, int level)
1113 {
1114 uint32_t start = dwords[1] >> 16;
1115 uint32_t size = dwords[1] & 0xffff;
1116 const char *type = NULL, *ext = NULL;
1117 enum shader_t disasm_type;
1118
1119 switch (dwords[0]) {
1120 case 0:
1121 type = "vertex";
1122 ext = "vo";
1123 disasm_type = SHADER_VERTEX;
1124 break;
1125 case 1:
1126 type = "fragment";
1127 ext = "fo";
1128 disasm_type = SHADER_FRAGMENT;
1129 break;
1130 default:
1131 type = "<unknown>";
1132 disasm_type = 0;
1133 break;
1134 }
1135
1136 printf("%s%s shader, start=%04x, size=%04x\n", levels[level], type, start, size);
1137 disasm_a2xx(dwords + 2, sizedwords - 2, level+2, disasm_type);
1138
1139 /* dump raw shader: */
1140 if (ext)
1141 dump_shader(ext, dwords + 2, (sizedwords - 2) * 4);
1142 }
1143
1144 static void
1145 cp_wide_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
1146 {
1147 uint32_t reg = dwords[0] & 0xffff;
1148 int i;
1149 for (i = 1; i < sizedwords; i++) {
1150 dump_register(reg, dwords[i], level+1);
1151 reg_set(reg, dwords[i]);
1152 reg++;
1153 }
1154 }
1155
1156 enum state_t {
1157 TEX_SAMP = 1,
1158 TEX_CONST,
1159 TEX_MIPADDR, /* a3xx only */
1160 SHADER_PROG,
1161 SHADER_CONST,
1162
1163 // image/ssbo state:
1164 SSBO_0,
1165 SSBO_1,
1166 SSBO_2,
1167
1168 UBO,
1169
1170 // unknown things, just to hexdumps:
1171 UNKNOWN_DWORDS,
1172 UNKNOWN_2DWORDS,
1173 UNKNOWN_4DWORDS,
1174 };
1175
1176 enum adreno_state_block {
1177 SB_VERT_TEX = 0,
1178 SB_VERT_MIPADDR = 1,
1179 SB_FRAG_TEX = 2,
1180 SB_FRAG_MIPADDR = 3,
1181 SB_VERT_SHADER = 4,
1182 SB_GEOM_SHADER = 5,
1183 SB_FRAG_SHADER = 6,
1184 SB_COMPUTE_SHADER = 7,
1185 };
1186
1187 /* TODO there is probably a clever way to let rnndec parse things so
1188 * we don't have to care about packet format differences across gens
1189 */
1190
1191 static void
1192 a3xx_get_state_type(uint32_t *dwords, enum shader_t *stage, enum state_t *state,
1193 enum state_src_t *src)
1194 {
1195 unsigned state_block_id = (dwords[0] >> 19) & 0x7;
1196 unsigned state_type = dwords[1] & 0x3;
1197 static const struct {
1198 enum shader_t stage;
1199 enum state_t state;
1200 } lookup[0xf][0x3] = {
1201 [SB_VERT_TEX][0] = { SHADER_VERTEX, TEX_SAMP },
1202 [SB_VERT_TEX][1] = { SHADER_VERTEX, TEX_CONST },
1203 [SB_FRAG_TEX][0] = { SHADER_FRAGMENT, TEX_SAMP },
1204 [SB_FRAG_TEX][1] = { SHADER_FRAGMENT, TEX_CONST },
1205 [SB_VERT_SHADER][0] = { SHADER_VERTEX, SHADER_PROG },
1206 [SB_VERT_SHADER][1] = { SHADER_VERTEX, SHADER_CONST },
1207 [SB_FRAG_SHADER][0] = { SHADER_FRAGMENT, SHADER_PROG },
1208 [SB_FRAG_SHADER][1] = { SHADER_FRAGMENT, SHADER_CONST },
1209 };
1210
1211 *stage = lookup[state_block_id][state_type].stage;
1212 *state = lookup[state_block_id][state_type].state;
1213 unsigned state_src = (dwords[0] >> 16) & 0x7;
1214 if (state_src == 0 /* SS_DIRECT */)
1215 *src = STATE_SRC_DIRECT;
1216 else
1217 *src = STATE_SRC_INDIRECT;
1218 }
1219
1220 static enum state_src_t
1221 _get_state_src(unsigned dword0)
1222 {
1223 switch ((dword0 >> 16) & 0x3) {
1224 case 0: /* SS4_DIRECT / SS6_DIRECT */
1225 return STATE_SRC_DIRECT;
1226 case 2: /* SS4_INDIRECT / SS6_INDIRECT */
1227 return STATE_SRC_INDIRECT;
1228 case 1: /* SS6_BINDLESS */
1229 return STATE_SRC_BINDLESS;
1230 default:
1231 return STATE_SRC_DIRECT;
1232 }
1233 }
1234
1235 static void
1236 _get_state_type(unsigned state_block_id, unsigned state_type,
1237 enum shader_t *stage, enum state_t *state)
1238 {
1239 static const struct {
1240 enum shader_t stage;
1241 enum state_t state;
1242 } lookup[0x10][0x4] = {
1243 // SB4_VS_TEX:
1244 [0x0][0] = { SHADER_VERTEX, TEX_SAMP },
1245 [0x0][1] = { SHADER_VERTEX, TEX_CONST },
1246 [0x0][2] = { SHADER_VERTEX, UBO },
1247 // SB4_HS_TEX:
1248 [0x1][0] = { SHADER_TCS, TEX_SAMP },
1249 [0x1][1] = { SHADER_TCS, TEX_CONST },
1250 [0x1][2] = { SHADER_TCS, UBO },
1251 // SB4_DS_TEX:
1252 [0x2][0] = { SHADER_TES, TEX_SAMP },
1253 [0x2][1] = { SHADER_TES, TEX_CONST },
1254 [0x2][2] = { SHADER_TES, UBO },
1255 // SB4_GS_TEX:
1256 [0x3][0] = { SHADER_GEOM, TEX_SAMP },
1257 [0x3][1] = { SHADER_GEOM, TEX_CONST },
1258 [0x3][2] = { SHADER_GEOM, UBO },
1259 // SB4_FS_TEX:
1260 [0x4][0] = { SHADER_FRAGMENT, TEX_SAMP },
1261 [0x4][1] = { SHADER_FRAGMENT, TEX_CONST },
1262 [0x4][2] = { SHADER_FRAGMENT, UBO },
1263 // SB4_CS_TEX:
1264 [0x5][0] = { SHADER_COMPUTE, TEX_SAMP },
1265 [0x5][1] = { SHADER_COMPUTE, TEX_CONST },
1266 [0x5][2] = { SHADER_COMPUTE, UBO },
1267 // SB4_VS_SHADER:
1268 [0x8][0] = { SHADER_VERTEX, SHADER_PROG },
1269 [0x8][1] = { SHADER_VERTEX, SHADER_CONST },
1270 [0x8][2] = { SHADER_VERTEX, UBO },
1271 // SB4_HS_SHADER
1272 [0x9][0] = { SHADER_TCS, SHADER_PROG },
1273 [0x9][1] = { SHADER_TCS, SHADER_CONST },
1274 [0x9][2] = { SHADER_TCS, UBO },
1275 // SB4_DS_SHADER
1276 [0xa][0] = { SHADER_TES, SHADER_PROG },
1277 [0xa][1] = { SHADER_TES, SHADER_CONST },
1278 [0xa][2] = { SHADER_TES, UBO },
1279 // SB4_GS_SHADER
1280 [0xb][0] = { SHADER_GEOM, SHADER_PROG },
1281 [0xb][1] = { SHADER_GEOM, SHADER_CONST },
1282 [0xb][2] = { SHADER_GEOM, UBO },
1283 // SB4_FS_SHADER:
1284 [0xc][0] = { SHADER_FRAGMENT, SHADER_PROG },
1285 [0xc][1] = { SHADER_FRAGMENT, SHADER_CONST },
1286 [0xc][2] = { SHADER_FRAGMENT, UBO },
1287 // SB4_CS_SHADER:
1288 [0xd][0] = { SHADER_COMPUTE, SHADER_PROG },
1289 [0xd][1] = { SHADER_COMPUTE, SHADER_CONST },
1290 [0xd][2] = { SHADER_COMPUTE, UBO },
1291 [0xd][3] = { SHADER_COMPUTE, SSBO_0 }, /* a6xx location */
1292 // SB4_SSBO (shared across all stages)
1293 [0xe][0] = { 0, SSBO_0 }, /* a5xx (and a4xx?) location */
1294 [0xe][1] = { 0, SSBO_1 },
1295 [0xe][2] = { 0, SSBO_2 },
1296 // SB4_CS_SSBO
1297 [0xf][0] = { SHADER_COMPUTE, SSBO_0 },
1298 [0xf][1] = { SHADER_COMPUTE, SSBO_1 },
1299 [0xf][2] = { SHADER_COMPUTE, SSBO_2 },
1300 // unknown things
1301 /* This looks like combined UBO state for 3d stages (a5xx and
1302 * before?? I think a6xx has UBO state per shader stage:
1303 */
1304 [0x6][2] = { 0, UBO },
1305 [0x7][1] = { 0, UNKNOWN_2DWORDS },
1306 };
1307
1308 *stage = lookup[state_block_id][state_type].stage;
1309 *state = lookup[state_block_id][state_type].state;
1310 }
1311
1312 static void
1313 a4xx_get_state_type(uint32_t *dwords, enum shader_t *stage, enum state_t *state,
1314 enum state_src_t *src)
1315 {
1316 unsigned state_block_id = (dwords[0] >> 18) & 0xf;
1317 unsigned state_type = dwords[1] & 0x3;
1318 _get_state_type(state_block_id, state_type, stage, state);
1319 *src = _get_state_src(dwords[0]);
1320 }
1321
1322 static void
1323 a6xx_get_state_type(uint32_t *dwords, enum shader_t *stage, enum state_t *state,
1324 enum state_src_t *src)
1325 {
1326 unsigned state_block_id = (dwords[0] >> 18) & 0xf;
1327 unsigned state_type = (dwords[0] >> 14) & 0x3;
1328 _get_state_type(state_block_id, state_type, stage, state);
1329 *src = _get_state_src(dwords[0]);
1330 }
1331
1332 static void
1333 dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, int level)
1334 {
1335 for (int i = 0; i < num_unit; i++) {
1336 /* work-around to reduce noise for opencl blob which always
1337 * writes the max # regardless of # of textures used
1338 */
1339 if ((num_unit == 16) && (texsamp[0] == 0) && (texsamp[1] == 0))
1340 break;
1341
1342 if ((300 <= options->gpu_id) && (options->gpu_id < 400)) {
1343 dump_domain(texsamp, 2, level+2, "A3XX_TEX_SAMP");
1344 dump_hex(texsamp, 2, level+1);
1345 texsamp += 2;
1346 } else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) {
1347 dump_domain(texsamp, 2, level+2, "A4XX_TEX_SAMP");
1348 dump_hex(texsamp, 2, level+1);
1349 texsamp += 2;
1350 } else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
1351 dump_domain(texsamp, 4, level+2, "A5XX_TEX_SAMP");
1352 dump_hex(texsamp, 4, level+1);
1353 texsamp += 4;
1354 } else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) {
1355 dump_domain(texsamp, 4, level+2, "A6XX_TEX_SAMP");
1356 dump_hex(texsamp, 4, level+1);
1357 texsamp += src == STATE_SRC_BINDLESS ? 16 : 4;
1358 }
1359 }
1360 }
1361
1362 static void
1363 dump_tex_const(uint32_t *texconst, int num_unit, int level)
1364 {
1365 for (int i = 0; i < num_unit; i++) {
1366 /* work-around to reduce noise for opencl blob which always
1367 * writes the max # regardless of # of textures used
1368 */
1369 if ((num_unit == 16) &&
1370 (texconst[0] == 0) && (texconst[1] == 0) &&
1371 (texconst[2] == 0) && (texconst[3] == 0))
1372 break;
1373
1374 if ((300 <= options->gpu_id) && (options->gpu_id < 400)) {
1375 dump_domain(texconst, 4, level+2, "A3XX_TEX_CONST");
1376 dump_hex(texconst, 4, level+1);
1377 texconst += 4;
1378 } else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) {
1379 dump_domain(texconst, 8, level+2, "A4XX_TEX_CONST");
1380 if (options->dump_textures) {
1381 uint32_t addr = texconst[4] & ~0x1f;
1382 dump_gpuaddr(addr, level-2);
1383 }
1384 dump_hex(texconst, 8, level+1);
1385 texconst += 8;
1386 } else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
1387 dump_domain(texconst, 12, level+2, "A5XX_TEX_CONST");
1388 if (options->dump_textures) {
1389 uint64_t addr = (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
1390 dump_gpuaddr_size(addr, level-2, hostlen(addr) / 4, 3);
1391 }
1392 dump_hex(texconst, 12, level+1);
1393 texconst += 12;
1394 } else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) {
1395 dump_domain(texconst, 16, level+2, "A6XX_TEX_CONST");
1396 if (options->dump_textures) {
1397 uint64_t addr = (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
1398 dump_gpuaddr_size(addr, level-2, hostlen(addr) / 4, 3);
1399 }
1400 dump_hex(texconst, 16, level+1);
1401 texconst += 16;
1402 }
1403 }
1404 }
1405
1406 static void
1407 cp_load_state(uint32_t *dwords, uint32_t sizedwords, int level)
1408 {
1409 enum shader_t stage;
1410 enum state_t state;
1411 enum state_src_t src;
1412 uint32_t num_unit = (dwords[0] >> 22) & 0x1ff;
1413 uint64_t ext_src_addr;
1414 void *contents;
1415 int i;
1416
1417 if (quiet(2) && !options->script)
1418 return;
1419
1420 if (options->gpu_id >= 600)
1421 a6xx_get_state_type(dwords, &stage, &state, &src);
1422 else if (options->gpu_id >= 400)
1423 a4xx_get_state_type(dwords, &stage, &state, &src);
1424 else
1425 a3xx_get_state_type(dwords, &stage, &state, &src);
1426
1427 switch (src) {
1428 case STATE_SRC_DIRECT: ext_src_addr = 0; break;
1429 case STATE_SRC_INDIRECT:
1430 if (is_64b()) {
1431 ext_src_addr = dwords[1] & 0xfffffffc;
1432 ext_src_addr |= ((uint64_t)dwords[2]) << 32;
1433 } else {
1434 ext_src_addr = dwords[1] & 0xfffffffc;
1435 }
1436
1437 break;
1438 case STATE_SRC_BINDLESS: {
1439 const unsigned base_reg =
1440 stage == SHADER_COMPUTE ? regbase("HLSQ_CS_BINDLESS_BASE[0]") : regbase("HLSQ_BINDLESS_BASE[0]");
1441
1442 if (is_64b()) {
1443 const unsigned reg = base_reg + (dwords[1] >> 28) * 2;
1444 ext_src_addr = reg_val(reg) & 0xfffffffc;
1445 ext_src_addr |= ((uint64_t)reg_val(reg + 1)) << 32;
1446 } else {
1447 const unsigned reg = base_reg + (dwords[1] >> 28);
1448 ext_src_addr = reg_val(reg) & 0xfffffffc;
1449 }
1450
1451 ext_src_addr += 4 * (dwords[1] & 0xffffff);
1452 break;
1453 }
1454 }
1455
1456 if (ext_src_addr)
1457 contents = hostptr(ext_src_addr);
1458 else
1459 contents = is_64b() ? dwords + 3 : dwords + 2;
1460
1461 if (!contents)
1462 return;
1463
1464 switch (state) {
1465 case SHADER_PROG: {
1466 const char *ext = NULL;
1467
1468 if (quiet(2))
1469 return;
1470
1471 if (options->gpu_id >= 400)
1472 num_unit *= 16;
1473 else if (options->gpu_id >= 300)
1474 num_unit *= 4;
1475
1476 /* shaders:
1477 *
1478 * note: num_unit seems to be # of instruction groups, where
1479 * an instruction group has 4 64bit instructions.
1480 */
1481 if (stage == SHADER_VERTEX) {
1482 ext = "vo3";
1483 } else if (stage == SHADER_GEOM) {
1484 ext = "go3";
1485 } else if (stage == SHADER_COMPUTE) {
1486 ext = "co3";
1487 } else if (stage == SHADER_FRAGMENT){
1488 ext = "fo3";
1489 }
1490
1491 if (contents)
1492 disasm_a3xx(contents, num_unit * 2, level+2, stdout, options->gpu_id);
1493
1494 /* dump raw shader: */
1495 if (ext)
1496 dump_shader(ext, contents, num_unit * 2 * 4);
1497
1498 break;
1499 }
1500 case SHADER_CONST: {
1501 if (quiet(2))
1502 return;
1503
1504 /* uniforms/consts:
1505 *
1506 * note: num_unit seems to be # of pairs of dwords??
1507 */
1508
1509 if (options->gpu_id >= 400)
1510 num_unit *= 2;
1511
1512 dump_float(contents, num_unit*2, level+1);
1513 dump_hex(contents, num_unit*2, level+1);
1514
1515 break;
1516 }
1517 case TEX_MIPADDR: {
1518 uint32_t *addrs = contents;
1519
1520 if (quiet(2))
1521 return;
1522
1523 /* mipmap consts block just appears to be array of num_unit gpu addr's: */
1524 for (i = 0; i < num_unit; i++) {
1525 void *ptr = hostptr(addrs[i]);
1526 printf("%s%2d: %08x\n", levels[level+1], i, addrs[i]);
1527 if (options->dump_textures) {
1528 printf("base=%08x\n", (uint32_t)gpubaseaddr(addrs[i]));
1529 dump_hex(ptr, hostlen(addrs[i])/4, level+1);
1530 }
1531 }
1532 break;
1533 }
1534 case TEX_SAMP: {
1535 dump_tex_samp(contents, src, num_unit, level);
1536 break;
1537 }
1538 case TEX_CONST: {
1539 dump_tex_const(contents, num_unit, level);
1540 break;
1541 }
1542 case SSBO_0: {
1543 uint32_t *ssboconst = (uint32_t *)contents;
1544
1545 for (i = 0; i < num_unit; i++) {
1546 int sz = 4;
1547 if (400 <= options->gpu_id && options->gpu_id < 500) {
1548 dump_domain(ssboconst, 4, level+2, "A4XX_SSBO_0");
1549 } else if (500 <= options->gpu_id && options->gpu_id < 600) {
1550 dump_domain(ssboconst, 4, level+2, "A5XX_SSBO_0");
1551 } else if (600 <= options->gpu_id && options->gpu_id < 700) {
1552 sz = 16;
1553 dump_domain(ssboconst, 16, level+2, "A6XX_IBO");
1554 }
1555 dump_hex(ssboconst, sz, level+1);
1556 ssboconst += sz;
1557 }
1558 break;
1559 }
1560 case SSBO_1: {
1561 uint32_t *ssboconst = (uint32_t *)contents;
1562
1563 for (i = 0; i < num_unit; i++) {
1564 if (400 <= options->gpu_id && options->gpu_id < 500)
1565 dump_domain(ssboconst, 2, level+2, "A4XX_SSBO_1");
1566 else if (500 <= options->gpu_id && options->gpu_id < 600)
1567 dump_domain(ssboconst, 2, level+2, "A5XX_SSBO_1");
1568 dump_hex(ssboconst, 2, level+1);
1569 ssboconst += 2;
1570 }
1571 break;
1572 }
1573 case SSBO_2: {
1574 uint32_t *ssboconst = (uint32_t *)contents;
1575
1576 for (i = 0; i < num_unit; i++) {
1577 /* TODO a4xx and a5xx might be same: */
1578 if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
1579 dump_domain(ssboconst, 2, level+2, "A5XX_SSBO_2");
1580 dump_hex(ssboconst, 2, level+1);
1581 }
1582 if (options->dump_textures) {
1583 uint64_t addr = (((uint64_t)ssboconst[1] & 0x1ffff) << 32) | ssboconst[0];
1584 dump_gpuaddr_size(addr, level-2, hostlen(addr) / 4, 3);
1585 }
1586 ssboconst += 2;
1587 }
1588 break;
1589 }
1590 case UBO: {
1591 uint32_t *uboconst = (uint32_t *)contents;
1592
1593 for (i = 0; i < num_unit; i++) {
1594 // TODO probably similar on a4xx..
1595 if (500 <= options->gpu_id && options->gpu_id < 600)
1596 dump_domain(uboconst, 2, level+2, "A5XX_UBO");
1597 else if (600 <= options->gpu_id && options->gpu_id < 700)
1598 dump_domain(uboconst, 2, level+2, "A6XX_UBO");
1599 dump_hex(uboconst, 2, level+1);
1600 uboconst += src == STATE_SRC_BINDLESS ? 16 : 2;
1601 }
1602 break;
1603 }
1604 case UNKNOWN_DWORDS: {
1605 if (quiet(2))
1606 return;
1607 dump_hex(contents, num_unit, level+1);
1608 break;
1609 }
1610 case UNKNOWN_2DWORDS: {
1611 if (quiet(2))
1612 return;
1613 dump_hex(contents, num_unit * 2, level+1);
1614 break;
1615 }
1616 case UNKNOWN_4DWORDS: {
1617 if (quiet(2))
1618 return;
1619 dump_hex(contents, num_unit * 4, level+1);
1620 break;
1621 }
1622 default:
1623 if (quiet(2))
1624 return;
1625 /* hmm.. */
1626 dump_hex(contents, num_unit, level+1);
1627 break;
1628 }
1629 }
1630
1631 static void
1632 cp_set_bin(uint32_t *dwords, uint32_t sizedwords, int level)
1633 {
1634 bin_x1 = dwords[1] & 0xffff;
1635 bin_y1 = dwords[1] >> 16;
1636 bin_x2 = dwords[2] & 0xffff;
1637 bin_y2 = dwords[2] >> 16;
1638 }
1639
1640 static void
1641 dump_a2xx_tex_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val, int level)
1642 {
1643 uint32_t w, h, p;
1644 uint32_t gpuaddr, flags, mip_gpuaddr, mip_flags;
1645 uint32_t min, mag, swiz, clamp_x, clamp_y, clamp_z;
1646 static const char *filter[] = {
1647 "point", "bilinear", "bicubic",
1648 };
1649 static const char *clamp[] = {
1650 "wrap", "mirror", "clamp-last-texel",
1651 };
1652 static const char swiznames[] = "xyzw01??";
1653
1654 /* see sys2gmem_tex_const[] in adreno_a2xxx.c */
1655
1656 /* Texture, FormatXYZW=Unsigned, ClampXYZ=Wrap/Repeat,
1657 * RFMode=ZeroClamp-1, Dim=1:2d, pitch
1658 */
1659 p = (dwords[0] >> 22) << 5;
1660 clamp_x = (dwords[0] >> 10) & 0x3;
1661 clamp_y = (dwords[0] >> 13) & 0x3;
1662 clamp_z = (dwords[0] >> 16) & 0x3;
1663
1664 /* Format=6:8888_WZYX, EndianSwap=0:None, ReqSize=0:256bit, DimHi=0,
1665 * NearestClamp=1:OGL Mode
1666 */
1667 parse_dword_addr(dwords[1], &gpuaddr, &flags, 0xfff);
1668
1669 /* Width, Height, EndianSwap=0:None */
1670 w = (dwords[2] & 0x1fff) + 1;
1671 h = ((dwords[2] >> 13) & 0x1fff) + 1;
1672
1673 /* NumFormat=0:RF, DstSelXYZW=XYZW, ExpAdj=0, MagFilt=MinFilt=0:Point,
1674 * Mip=2:BaseMap
1675 */
1676 mag = (dwords[3] >> 19) & 0x3;
1677 min = (dwords[3] >> 21) & 0x3;
1678 swiz = (dwords[3] >> 1) & 0xfff;
1679
1680 /* VolMag=VolMin=0:Point, MinMipLvl=0, MaxMipLvl=1, LodBiasH=V=0,
1681 * Dim3d=0
1682 */
1683 // XXX
1684
1685 /* BorderColor=0:ABGRBlack, ForceBC=0:diable, TriJuice=0, Aniso=0,
1686 * Dim=1:2d, MipPacking=0
1687 */
1688 parse_dword_addr(dwords[5], &mip_gpuaddr, &mip_flags, 0xfff);
1689
1690 printf("%sset texture const %04x\n", levels[level], val);
1691 printf("%sclamp x/y/z: %s/%s/%s\n", levels[level+1],
1692 clamp[clamp_x], clamp[clamp_y], clamp[clamp_z]);
1693 printf("%sfilter min/mag: %s/%s\n", levels[level+1], filter[min], filter[mag]);
1694 printf("%sswizzle: %c%c%c%c\n", levels[level+1],
1695 swiznames[(swiz >> 0) & 0x7], swiznames[(swiz >> 3) & 0x7],
1696 swiznames[(swiz >> 6) & 0x7], swiznames[(swiz >> 9) & 0x7]);
1697 printf("%saddr=%08x (flags=%03x), size=%dx%d, pitch=%d, format=%s\n",
1698 levels[level+1], gpuaddr, flags, w, h, p,
1699 rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf));
1700 printf("%smipaddr=%08x (flags=%03x)\n", levels[level+1],
1701 mip_gpuaddr, mip_flags);
1702 }
1703
1704 static void
1705 dump_a2xx_shader_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val, int level)
1706 {
1707 int i;
1708 printf("%sset shader const %04x\n", levels[level], val);
1709 for (i = 0; i < sizedwords; ) {
1710 uint32_t gpuaddr, flags;
1711 parse_dword_addr(dwords[i++], &gpuaddr, &flags, 0xf);
1712 void *addr = hostptr(gpuaddr);
1713 if (addr) {
1714 const char * fmt =
1715 rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf);
1716 uint32_t size = dwords[i++];
1717 printf("%saddr=%08x, size=%d, format=%s\n", levels[level+1],
1718 gpuaddr, size, fmt);
1719 // TODO maybe dump these as bytes instead of dwords?
1720 size = (size + 3) / 4; // for now convert to dwords
1721 dump_hex(addr, min(size, 64), level + 1);
1722 if (size > min(size, 64))
1723 printf("%s\t\t...\n", levels[level+1]);
1724 dump_float(addr, min(size, 64), level + 1);
1725 if (size > min(size, 64))
1726 printf("%s\t\t...\n", levels[level+1]);
1727 }
1728 }
1729 }
1730
1731 static void
1732 cp_set_const(uint32_t *dwords, uint32_t sizedwords, int level)
1733 {
1734 uint32_t val = dwords[0] & 0xffff;
1735 switch((dwords[0] >> 16) & 0xf) {
1736 case 0x0:
1737 dump_float((float *)(dwords+1), sizedwords-1, level+1);
1738 break;
1739 case 0x1:
1740 /* need to figure out how const space is partitioned between
1741 * attributes, textures, etc..
1742 */
1743 if (val < 0x78) {
1744 dump_a2xx_tex_const(dwords+1, sizedwords-1, val, level);
1745 } else {
1746 dump_a2xx_shader_const(dwords+1, sizedwords-1, val, level);
1747 }
1748 break;
1749 case 0x2:
1750 printf("%sset bool const %04x\n", levels[level], val);
1751 break;
1752 case 0x3:
1753 printf("%sset loop const %04x\n", levels[level], val);
1754 break;
1755 case 0x4:
1756 val += 0x2000;
1757 if (dwords[0] & 0x80000000) {
1758 uint32_t srcreg = dwords[1];
1759 uint32_t dstval = dwords[2];
1760
1761 /* TODO: not sure what happens w/ payload != 2.. */
1762 assert(sizedwords == 3);
1763 assert(srcreg < ARRAY_SIZE(type0_reg_vals));
1764
1765 /* note: rnn_regname uses a static buf so we can't do
1766 * two regname() calls for one printf..
1767 */
1768 printf("%s%s = %08x + ", levels[level], regname(val, 1), dstval);
1769 printf("%s (%08x)\n", regname(srcreg, 1), type0_reg_vals[srcreg]);
1770
1771 dstval += type0_reg_vals[srcreg];
1772
1773 dump_registers(val, &dstval, 1, level+1);
1774 } else {
1775 dump_registers(val, dwords+1, sizedwords-1, level+1);
1776 }
1777 break;
1778 }
1779 }
1780
1781 static void dump_register_summary(int level);
1782
1783 static void
1784 cp_event_write(uint32_t *dwords, uint32_t sizedwords, int level)
1785 {
1786 const char *name = rnn_enumname(rnn, "vgt_event_type", dwords[0]);
1787 printl(2, "%sevent %s\n", levels[level], name);
1788
1789 if (name && (options->gpu_id > 500)) {
1790 char eventname[64];
1791 snprintf(eventname, sizeof(eventname), "EVENT:%s", name);
1792 if (!strcmp(name, "BLIT")) {
1793 do_query(eventname, 0);
1794 print_mode(level);
1795 dump_register_summary(level);
1796 }
1797 }
1798 }
1799
1800 static void
1801 dump_register_summary(int level)
1802 {
1803 uint32_t i;
1804 bool saved_summary = summary;
1805 summary = false;
1806
1807 in_summary = true;
1808
1809 /* dump current state of registers: */
1810 printl(2, "%sdraw[%i] register values\n", levels[level], draw_count);
1811 for (i = 0; i < regcnt(); i++) {
1812 uint32_t regbase = i;
1813 uint32_t lastval = reg_val(regbase);
1814 /* skip registers that haven't been updated since last draw/blit: */
1815 if (!(options->allregs || reg_rewritten(regbase)))
1816 continue;
1817 if (!reg_written(regbase))
1818 continue;
1819 if (lastval != lastvals[regbase]) {
1820 printl(2, "!");
1821 lastvals[regbase] = lastval;
1822 } else {
1823 printl(2, " ");
1824 }
1825 if (reg_rewritten(regbase)) {
1826 printl(2, "+");
1827 } else {
1828 printl(2, " ");
1829 }
1830 printl(2, "\t%08x", lastval);
1831 if (!quiet(2)) {
1832 dump_register(regbase, lastval, level);
1833 }
1834 }
1835
1836 clear_rewritten();
1837
1838 in_summary = false;
1839
1840 draw_count++;
1841 summary = saved_summary;
1842 }
1843
1844 static uint32_t
1845 draw_indx_common(uint32_t *dwords, int level)
1846 {
1847 uint32_t prim_type = dwords[1] & 0x1f;
1848 uint32_t source_select = (dwords[1] >> 6) & 0x3;
1849 uint32_t num_indices = dwords[2];
1850 const char *primtype;
1851
1852 primtype = rnn_enumname(rnn, "pc_di_primtype", prim_type);
1853
1854 do_query(primtype, num_indices);
1855
1856 printl(2, "%sdraw: %d\n", levels[level], draws[ib]);
1857 printl(2, "%sprim_type: %s (%d)\n", levels[level], primtype,
1858 prim_type);
1859 printl(2, "%ssource_select: %s (%d)\n", levels[level],
1860 rnn_enumname(rnn, "pc_di_src_sel", source_select),
1861 source_select);
1862 printl(2, "%snum_indices: %d\n", levels[level], num_indices);
1863
1864 vertices += num_indices;
1865
1866 draws[ib]++;
1867
1868 return num_indices;
1869 }
1870
1871 enum pc_di_index_size {
1872 INDEX_SIZE_IGN = 0,
1873 INDEX_SIZE_16_BIT = 0,
1874 INDEX_SIZE_32_BIT = 1,
1875 INDEX_SIZE_8_BIT = 2,
1876 INDEX_SIZE_INVALID = 0,
1877 };
1878
1879 static void
1880 cp_draw_indx(uint32_t *dwords, uint32_t sizedwords, int level)
1881 {
1882 uint32_t num_indices = draw_indx_common(dwords, level);
1883
1884 assert(!is_64b());
1885
1886 /* if we have an index buffer, dump that: */
1887 if (sizedwords == 5) {
1888 void *ptr = hostptr(dwords[3]);
1889 printl(2, "%sgpuaddr: %08x\n", levels[level], dwords[3]);
1890 printl(2, "%sidx_size: %d\n", levels[level], dwords[4]);
1891 if (ptr) {
1892 enum pc_di_index_size size =
1893 ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
1894 if (!quiet(2)) {
1895 int i;
1896 printf("%sidxs: ", levels[level]);
1897 if (size == INDEX_SIZE_8_BIT) {
1898 uint8_t *idx = ptr;
1899 for (i = 0; i < dwords[4]; i++)
1900 printf(" %u", idx[i]);
1901 } else if (size == INDEX_SIZE_16_BIT) {
1902 uint16_t *idx = ptr;
1903 for (i = 0; i < dwords[4]/2; i++)
1904 printf(" %u", idx[i]);
1905 } else if (size == INDEX_SIZE_32_BIT) {
1906 uint32_t *idx = ptr;
1907 for (i = 0; i < dwords[4]/4; i++)
1908 printf(" %u", idx[i]);
1909 }
1910 printf("\n");
1911 dump_hex(ptr, dwords[4]/4, level+1);
1912 }
1913 }
1914 }
1915
1916 /* don't bother dumping registers for the dummy draw_indx's.. */
1917 if (num_indices > 0)
1918 dump_register_summary(level);
1919
1920 needs_wfi = true;
1921 }
1922
1923 static void
1924 cp_draw_indx_2(uint32_t *dwords, uint32_t sizedwords, int level)
1925 {
1926 uint32_t num_indices = draw_indx_common(dwords, level);
1927 enum pc_di_index_size size =
1928 ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
1929 void *ptr = &dwords[3];
1930 int sz = 0;
1931
1932 assert(!is_64b());
1933
1934 /* CP_DRAW_INDX_2 has embedded/inline idx buffer: */
1935 if (!quiet(2)) {
1936 int i;
1937 printf("%sidxs: ", levels[level]);
1938 if (size == INDEX_SIZE_8_BIT) {
1939 uint8_t *idx = ptr;
1940 for (i = 0; i < num_indices; i++)
1941 printf(" %u", idx[i]);
1942 sz = num_indices;
1943 } else if (size == INDEX_SIZE_16_BIT) {
1944 uint16_t *idx = ptr;
1945 for (i = 0; i < num_indices; i++)
1946 printf(" %u", idx[i]);
1947 sz = num_indices * 2;
1948 } else if (size == INDEX_SIZE_32_BIT) {
1949 uint32_t *idx = ptr;
1950 for (i = 0; i < num_indices; i++)
1951 printf(" %u", idx[i]);
1952 sz = num_indices * 4;
1953 }
1954 printf("\n");
1955 dump_hex(ptr, sz / 4, level+1);
1956 }
1957
1958 /* don't bother dumping registers for the dummy draw_indx's.. */
1959 if (num_indices > 0)
1960 dump_register_summary(level);
1961 }
1962
1963 static void
1964 cp_draw_indx_offset(uint32_t *dwords, uint32_t sizedwords, int level)
1965 {
1966 uint32_t num_indices = dwords[2];
1967 uint32_t prim_type = dwords[0] & 0x1f;
1968
1969 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), num_indices);
1970 print_mode(level);
1971
1972 /* don't bother dumping registers for the dummy draw_indx's.. */
1973 if (num_indices > 0)
1974 dump_register_summary(level);
1975 }
1976
1977 static void
1978 cp_draw_indx_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
1979 {
1980 uint32_t prim_type = dwords[0] & 0x1f;
1981 uint64_t addr;
1982
1983 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
1984 print_mode(level);
1985
1986 if (is_64b())
1987 addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
1988 else
1989 addr = dwords[1];
1990 dump_gpuaddr_size(addr, level, 0x10, 2);
1991
1992 if (is_64b())
1993 addr = (((uint64_t)dwords[5] & 0x1ffff) << 32) | dwords[4];
1994 else
1995 addr = dwords[3];
1996 dump_gpuaddr_size(addr, level, 0x10, 2);
1997
1998 dump_register_summary(level);
1999 }
2000
2001 static void
2002 cp_draw_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2003 {
2004 uint32_t prim_type = dwords[0] & 0x1f;
2005 uint64_t addr;
2006
2007 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2008 print_mode(level);
2009
2010 addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2011 dump_gpuaddr_size(addr, level, 0x10, 2);
2012
2013 dump_register_summary(level);
2014 }
2015
2016 static void
2017 cp_run_cl(uint32_t *dwords, uint32_t sizedwords, int level)
2018 {
2019 do_query("COMPUTE", 1);
2020 dump_register_summary(level);
2021 }
2022
2023 static void
2024 cp_nop(uint32_t *dwords, uint32_t sizedwords, int level)
2025 {
2026 const char *buf = (void *)dwords;
2027 int i;
2028
2029 if (quiet(3))
2030 return;
2031
2032 // blob doesn't use CP_NOP for string_marker but it does
2033 // use it for things that end up looking like, but aren't
2034 // ascii chars:
2035 if (!options->decode_markers)
2036 return;
2037
2038 for (i = 0; i < 4 * sizedwords; i++) {
2039 if (buf[i] == '\0')
2040 break;
2041 if (isascii(buf[i]))
2042 printf("%c", buf[i]);
2043 }
2044 printf("\n");
2045 }
2046
2047 static void
2048 cp_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2049 {
2050 /* traverse indirect buffers */
2051 uint64_t ibaddr;
2052 uint32_t ibsize;
2053 uint32_t *ptr = NULL;
2054
2055 if (is_64b()) {
2056 /* a5xx+.. high 32b of gpu addr, then size: */
2057 ibaddr = dwords[0];
2058 ibaddr |= ((uint64_t)dwords[1]) << 32;
2059 ibsize = dwords[2];
2060 } else {
2061 ibaddr = dwords[0];
2062 ibsize = dwords[1];
2063 }
2064
2065 if (!quiet(3)) {
2066 if (is_64b()) {
2067 printf("%sibaddr:%016lx\n", levels[level], ibaddr);
2068 } else {
2069 printf("%sibaddr:%08x\n", levels[level], (uint32_t)ibaddr);
2070 }
2071 printf("%sibsize:%08x\n", levels[level], ibsize);
2072 }
2073
2074 if (options->once && has_dumped(ibaddr, enable_mask))
2075 return;
2076
2077 /* 'query-compare' mode implies 'once' mode, although we need only to
2078 * process the cmdstream for *any* enable_mask mode, since we are
2079 * comparing binning vs draw reg values at the same time, ie. it is
2080 * not useful to process the same draw in both binning and draw pass.
2081 */
2082 if (options->query_compare && has_dumped(ibaddr, MODE_ALL))
2083 return;
2084
2085 /* map gpuaddr back to hostptr: */
2086 ptr = hostptr(ibaddr);
2087
2088 if (ptr) {
2089 /* If the GPU hung within the target IB, the trigger point will be
2090 * just after the current CP_INDIRECT_BUFFER. Because the IB is
2091 * executed but never returns. Account for this by checking if
2092 * the IB returned:
2093 */
2094 highlight_gpuaddr(gpuaddr(&dwords[is_64b() ? 3 : 2]));
2095
2096 ib++;
2097 ibs[ib].base = ibaddr;
2098 ibs[ib].size = ibsize;
2099
2100 dump_commands(ptr, ibsize, level);
2101 ib--;
2102 } else {
2103 fprintf(stderr, "could not find: %016"PRIx64" (%d)\n", ibaddr, ibsize);
2104 }
2105 }
2106
2107 static void
2108 cp_wfi(uint32_t *dwords, uint32_t sizedwords, int level)
2109 {
2110 needs_wfi = false;
2111 }
2112
2113 static void
2114 cp_mem_write(uint32_t *dwords, uint32_t sizedwords, int level)
2115 {
2116 if (quiet(2))
2117 return;
2118
2119 if (is_64b()) {
2120 uint64_t gpuaddr = dwords[0] | (((uint64_t)dwords[1]) << 32);
2121 printf("%sgpuaddr:%016lx\n", levels[level], gpuaddr);
2122 dump_hex(&dwords[2], sizedwords-2, level+1);
2123
2124 if (pkt_is_type4(dwords[2]) || pkt_is_type7(dwords[2]))
2125 dump_commands(&dwords[2], sizedwords-2, level+1);
2126 } else {
2127 uint32_t gpuaddr = dwords[0];
2128 printf("%sgpuaddr:%08x\n", levels[level], gpuaddr);
2129 dump_float((float *)&dwords[1], sizedwords-1, level+1);
2130 }
2131 }
2132
2133 static void
2134 cp_rmw(uint32_t *dwords, uint32_t sizedwords, int level)
2135 {
2136 uint32_t val = dwords[0] & 0xffff;
2137 uint32_t and = dwords[1];
2138 uint32_t or = dwords[2];
2139 printl(3, "%srmw (%s & 0x%08x) | 0x%08x)\n", levels[level], regname(val, 1), and, or);
2140 if (needs_wfi)
2141 printl(2, "NEEDS WFI: rmw (%s & 0x%08x) | 0x%08x)\n", regname(val, 1), and, or);
2142 reg_set(val, (reg_val(val) & and) | or);
2143 }
2144
2145 static void
2146 cp_reg_mem(uint32_t *dwords, uint32_t sizedwords, int level)
2147 {
2148 uint32_t val = dwords[0] & 0xffff;
2149 printl(3, "%sbase register: %s\n", levels[level], regname(val, 1));
2150
2151 if (quiet(2))
2152 return;
2153
2154 uint64_t gpuaddr = dwords[1] | (((uint64_t)dwords[2]) << 32);
2155 printf("%sgpuaddr:%016lx\n", levels[level], gpuaddr);
2156 void *ptr = hostptr(gpuaddr);
2157 if (ptr) {
2158 uint32_t cnt = (dwords[0] >> 19) & 0x3ff;
2159 dump_hex(ptr, cnt, level + 1);
2160 }
2161 }
2162
2163 struct draw_state {
2164 uint16_t enable_mask;
2165 uint16_t flags;
2166 uint32_t count;
2167 uint64_t addr;
2168 };
2169
2170 struct draw_state state[32];
2171
2172 #define FLAG_DIRTY 0x1
2173 #define FLAG_DISABLE 0x2
2174 #define FLAG_DISABLE_ALL_GROUPS 0x4
2175 #define FLAG_LOAD_IMMED 0x8
2176
2177 static int draw_mode;
2178
2179 static void
2180 disable_group(unsigned group_id)
2181 {
2182 struct draw_state *ds = &state[group_id];
2183 memset(ds, 0, sizeof(*ds));
2184 }
2185
2186 static void
2187 disable_all_groups(void)
2188 {
2189 for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
2190 disable_group(i);
2191 }
2192
2193 static void
2194 load_group(unsigned group_id, int level)
2195 {
2196 struct draw_state *ds = &state[group_id];
2197
2198 if (!ds->count)
2199 return;
2200
2201 printl(2, "%sgroup_id: %u\n", levels[level], group_id);
2202 printl(2, "%scount: %d\n", levels[level], ds->count);
2203 printl(2, "%saddr: %016llx\n", levels[level], ds->addr);
2204 printl(2, "%sflags: %x\n", levels[level], ds->flags);
2205
2206 if (options->gpu_id >= 600) {
2207 printl(2, "%senable_mask: 0x%x\n", levels[level], ds->enable_mask);
2208
2209 if (!(ds->enable_mask & enable_mask)) {
2210 printl(2, "%s\tskipped!\n\n", levels[level]);
2211 return;
2212 }
2213 }
2214
2215 void *ptr = hostptr(ds->addr);
2216 if (ptr) {
2217 if (!quiet(2))
2218 dump_hex(ptr, ds->count, level+1);
2219
2220 ib++;
2221 dump_commands(ptr, ds->count, level+1);
2222 ib--;
2223 }
2224 }
2225
2226 static void
2227 load_all_groups(int level)
2228 {
2229 /* sanity check, we should never recursively hit recursion here, and if
2230 * we do bad things happen:
2231 */
2232 static bool loading_groups = false;
2233 if (loading_groups) {
2234 printf("ERROR: nothing in draw state should trigger recursively loading groups!\n");
2235 return;
2236 }
2237 loading_groups = true;
2238 for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
2239 load_group(i, level);
2240 loading_groups = false;
2241
2242 /* in 'query-compare' mode, defer disabling all groups until we have a
2243 * chance to process the query:
2244 */
2245 if (!options->query_compare)
2246 disable_all_groups();
2247 }
2248
2249 static void
2250 cp_set_draw_state(uint32_t *dwords, uint32_t sizedwords, int level)
2251 {
2252 uint32_t i;
2253
2254 for (i = 0; i < sizedwords; ) {
2255 struct draw_state *ds;
2256 uint32_t count = dwords[i] & 0xffff;
2257 uint32_t group_id = (dwords[i] >> 24) & 0x1f;
2258 uint32_t enable_mask = (dwords[i] >> 20) & 0xf;
2259 uint32_t flags = (dwords[i] >> 16) & 0xf;
2260 uint64_t addr;
2261
2262 if (is_64b()) {
2263 addr = dwords[i + 1];
2264 addr |= ((uint64_t)dwords[i + 2]) << 32;
2265 i += 3;
2266 } else {
2267 addr = dwords[i + 1];
2268 i += 2;
2269 }
2270
2271 if (flags & FLAG_DISABLE_ALL_GROUPS) {
2272 disable_all_groups();
2273 continue;
2274 }
2275
2276 if (flags & FLAG_DISABLE) {
2277 disable_group(group_id);
2278 continue;
2279 }
2280
2281 assert(group_id < ARRAY_SIZE(state));
2282 disable_group(group_id);
2283
2284 ds = &state[group_id];
2285
2286 ds->enable_mask = enable_mask;
2287 ds->flags = flags;
2288 ds->count = count;
2289 ds->addr = addr;
2290
2291 if (flags & FLAG_LOAD_IMMED) {
2292 load_group(group_id, level);
2293 disable_group(group_id);
2294 }
2295 }
2296 }
2297
2298 static void
2299 cp_set_mode(uint32_t *dwords, uint32_t sizedwords, int level)
2300 {
2301 draw_mode = dwords[0];
2302 }
2303
2304 /* execute compute shader */
2305 static void
2306 cp_exec_cs(uint32_t *dwords, uint32_t sizedwords, int level)
2307 {
2308 do_query("compute", 0);
2309 dump_register_summary(level);
2310 }
2311
2312 static void
2313 cp_exec_cs_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2314 {
2315 uint64_t addr;
2316
2317 if (is_64b()) {
2318 addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2319 } else {
2320 addr = dwords[1];
2321 }
2322
2323 printl(3, "%saddr: %016llx\n", levels[level], addr);
2324 dump_gpuaddr_size(addr, level, 0x10, 2);
2325
2326 do_query("compute", 0);
2327 dump_register_summary(level);
2328 }
2329
2330 static void
2331 cp_set_marker(uint32_t *dwords, uint32_t sizedwords, int level)
2332 {
2333 render_mode = rnn_enumname(rnn, "a6xx_render_mode", dwords[0] & 0xf);
2334
2335 if (!strcmp(render_mode, "RM6_BINNING")) {
2336 enable_mask = MODE_BINNING;
2337 } else if (!strcmp(render_mode, "RM6_GMEM")) {
2338 enable_mask = MODE_GMEM;
2339 } else if (!strcmp(render_mode, "RM6_BYPASS")) {
2340 enable_mask = MODE_BYPASS;
2341 }
2342 }
2343
2344 static void
2345 cp_set_render_mode(uint32_t *dwords, uint32_t sizedwords, int level)
2346 {
2347 uint64_t addr;
2348 uint32_t *ptr, len;
2349
2350 assert(is_64b());
2351
2352 /* TODO seems to have two ptrs, 9 dwords total (incl pkt7 hdr)..
2353 * not sure if this can come in different sizes.
2354 *
2355 * First ptr doesn't seem to be cmdstream, second one does.
2356 *
2357 * Comment from downstream kernel:
2358 *
2359 * SRM -- set render mode (ex binning, direct render etc)
2360 * SRM is set by UMD usually at start of IB to tell CP the type of
2361 * preemption.
2362 * KMD needs to set SRM to NULL to indicate CP that rendering is
2363 * done by IB.
2364 * ------------------------------------------------------------------
2365 *
2366 * Seems to always be one of these two:
2367 * 70ec0008 00000001 001c0000 00000000 00000010 00000003 0000000d 001c2000 00000000
2368 * 70ec0008 00000001 001c0000 00000000 00000000 00000003 0000000d 001c2000 00000000
2369 *
2370 */
2371
2372 assert(options->gpu_id >= 500);
2373
2374 render_mode = rnn_enumname(rnn, "render_mode_cmd", dwords[0]);
2375
2376 if (sizedwords == 1)
2377 return;
2378
2379 addr = dwords[1];
2380 addr |= ((uint64_t)dwords[2]) << 32;
2381
2382 mode = dwords[3];
2383
2384 dump_gpuaddr(addr, level+1);
2385
2386 if (sizedwords == 5)
2387 return;
2388
2389 assert(sizedwords == 8);
2390
2391 len = dwords[5];
2392 addr = dwords[6];
2393 addr |= ((uint64_t)dwords[7]) << 32;
2394
2395 printl(3, "%saddr: 0x%016lx\n", levels[level], addr);
2396 printl(3, "%slen: 0x%x\n", levels[level], len);
2397
2398 ptr = hostptr(addr);
2399
2400 if (ptr) {
2401 if (!quiet(2)) {
2402 ib++;
2403 dump_commands(ptr, len, level+1);
2404 ib--;
2405 dump_hex(ptr, len, level+1);
2406 }
2407 }
2408 }
2409
2410 static void
2411 cp_compute_checkpoint(uint32_t *dwords, uint32_t sizedwords, int level)
2412 {
2413 uint64_t addr;
2414 uint32_t *ptr, len;
2415
2416 assert(is_64b());
2417 assert(options->gpu_id >= 500);
2418
2419 assert(sizedwords == 8);
2420
2421 addr = dwords[5];
2422 addr |= ((uint64_t)dwords[6]) << 32;
2423 len = dwords[7];
2424
2425 printl(3, "%saddr: 0x%016lx\n", levels[level], addr);
2426 printl(3, "%slen: 0x%x\n", levels[level], len);
2427
2428 ptr = hostptr(addr);
2429
2430 if (ptr) {
2431 if (!quiet(2)) {
2432 ib++;
2433 dump_commands(ptr, len, level+1);
2434 ib--;
2435 dump_hex(ptr, len, level+1);
2436 }
2437 }
2438 }
2439
2440 static void
2441 cp_blit(uint32_t *dwords, uint32_t sizedwords, int level)
2442 {
2443 do_query(rnn_enumname(rnn, "cp_blit_cmd", dwords[0]), 0);
2444 print_mode(level);
2445 dump_register_summary(level);
2446 }
2447
2448 static void
2449 cp_context_reg_bunch(uint32_t *dwords, uint32_t sizedwords, int level)
2450 {
2451 int i;
2452
2453 /* NOTE: seems to write same reg multiple times.. not sure if different parts of
2454 * these are triggered by the FLUSH_SO_n events?? (if that is what they actually
2455 * are?)
2456 */
2457 bool saved_summary = summary;
2458 summary = false;
2459
2460 for (i = 0; i < sizedwords; i += 2) {
2461 dump_register(dwords[i+0], dwords[i+1], level+1);
2462 reg_set(dwords[i+0], dwords[i+1]);
2463 }
2464
2465 summary = saved_summary;
2466 }
2467
2468 static void
2469 cp_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
2470 {
2471 uint32_t reg = dwords[1] & 0xffff;
2472
2473 dump_register(reg, dwords[2], level+1);
2474 reg_set(reg, dwords[2]);
2475 }
2476
2477 static void
2478 cp_set_ctxswitch_ib(uint32_t *dwords, uint32_t sizedwords, int level)
2479 {
2480 uint64_t addr;
2481 uint32_t size = dwords[2] & 0xffff;
2482 void *ptr;
2483
2484 addr = dwords[0] | ((uint64_t)dwords[1] << 32);
2485
2486 printf("addr=%lx\n", addr);
2487 ptr = hostptr(addr);
2488 if (ptr) {
2489 dump_commands(ptr, size, level+1);
2490 }
2491 }
2492
2493 static void
2494 cp_skip_ib2_enable_global(uint32_t *dwords, uint32_t sizedwords, int level)
2495 {
2496 skip_ib2_enable_global = dwords[0];
2497 }
2498
2499 static void
2500 cp_skip_ib2_enable_local(uint32_t *dwords, uint32_t sizedwords, int level)
2501 {
2502 skip_ib2_enable_local = dwords[0];
2503 }
2504
2505 #define CP(x, fxn, ...) { "CP_" #x, fxn, ##__VA_ARGS__ }
2506 static const struct type3_op {
2507 const char *name;
2508 void (*fxn)(uint32_t *dwords, uint32_t sizedwords, int level);
2509 struct {
2510 bool load_all_groups;
2511 } options;
2512 } type3_op[] = {
2513 CP(NOP, cp_nop),
2514 CP(INDIRECT_BUFFER, cp_indirect),
2515 CP(INDIRECT_BUFFER_PFD, cp_indirect),
2516 CP(WAIT_FOR_IDLE, cp_wfi),
2517 CP(REG_RMW, cp_rmw),
2518 CP(REG_TO_MEM, cp_reg_mem),
2519 CP(MEM_TO_REG, cp_reg_mem), /* same layout as CP_REG_TO_MEM */
2520 CP(MEM_WRITE, cp_mem_write),
2521 CP(EVENT_WRITE, cp_event_write),
2522 CP(RUN_OPENCL, cp_run_cl),
2523 CP(DRAW_INDX, cp_draw_indx, {.load_all_groups=true}),
2524 CP(DRAW_INDX_2, cp_draw_indx_2, {.load_all_groups=true}),
2525 CP(SET_CONSTANT, cp_set_const),
2526 CP(IM_LOAD_IMMEDIATE, cp_im_loadi),
2527 CP(WIDE_REG_WRITE, cp_wide_reg_write),
2528
2529 /* for a3xx */
2530 CP(LOAD_STATE, cp_load_state),
2531 CP(SET_BIN, cp_set_bin),
2532
2533 /* for a4xx */
2534 CP(LOAD_STATE4, cp_load_state),
2535 CP(SET_DRAW_STATE, cp_set_draw_state),
2536 CP(DRAW_INDX_OFFSET, cp_draw_indx_offset, {.load_all_groups=true}),
2537 CP(EXEC_CS, cp_exec_cs, {.load_all_groups=true}),
2538 CP(EXEC_CS_INDIRECT, cp_exec_cs_indirect, {.load_all_groups=true}),
2539
2540 /* for a5xx */
2541 CP(SET_RENDER_MODE, cp_set_render_mode),
2542 CP(COMPUTE_CHECKPOINT, cp_compute_checkpoint),
2543 CP(BLIT, cp_blit),
2544 CP(CONTEXT_REG_BUNCH, cp_context_reg_bunch),
2545 CP(DRAW_INDIRECT, cp_draw_indirect, {.load_all_groups=true}),
2546 CP(DRAW_INDX_INDIRECT, cp_draw_indx_indirect, {.load_all_groups=true}),
2547 CP(SKIP_IB2_ENABLE_GLOBAL, cp_skip_ib2_enable_global),
2548 CP(SKIP_IB2_ENABLE_LOCAL, cp_skip_ib2_enable_local),
2549
2550 /* for a6xx */
2551 CP(LOAD_STATE6_GEOM, cp_load_state),
2552 CP(LOAD_STATE6_FRAG, cp_load_state),
2553 CP(LOAD_STATE6, cp_load_state),
2554 CP(SET_MODE, cp_set_mode),
2555 CP(SET_MARKER, cp_set_marker),
2556 CP(REG_WRITE, cp_reg_write),
2557
2558 CP(SET_CTXSWITCH_IB, cp_set_ctxswitch_ib),
2559 };
2560
2561 static void
2562 noop_fxn(uint32_t *dwords, uint32_t sizedwords, int level)
2563 {
2564 }
2565
2566 static const struct type3_op *
2567 get_type3_op(unsigned opc)
2568 {
2569 static const struct type3_op dummy_op = {
2570 .fxn = noop_fxn,
2571 };
2572 const char *name = pktname(opc);
2573
2574 if (!name)
2575 return &dummy_op;
2576
2577 for (unsigned i = 0; i < ARRAY_SIZE(type3_op); i++)
2578 if (!strcmp(name, type3_op[i].name))
2579 return &type3_op[i];
2580
2581 return &dummy_op;
2582 }
2583
2584 void
2585 dump_commands(uint32_t *dwords, uint32_t sizedwords, int level)
2586 {
2587 int dwords_left = sizedwords;
2588 uint32_t count = 0; /* dword count including packet header */
2589 uint32_t val;
2590
2591 // assert(dwords);
2592 if (!dwords) {
2593 printf("NULL cmd buffer!\n");
2594 return;
2595 }
2596
2597 draws[ib] = 0;
2598
2599 while (dwords_left > 0) {
2600
2601 current_draw_count = draw_count;
2602
2603 /* hack, this looks like a -1 underflow, in some versions
2604 * when it tries to write zero registers via pkt0
2605 */
2606 // if ((dwords[0] >> 16) == 0xffff)
2607 // goto skip;
2608
2609 if (pkt_is_type0(dwords[0])) {
2610 printl(3, "t0");
2611 count = type0_pkt_size(dwords[0]) + 1;
2612 val = type0_pkt_offset(dwords[0]);
2613 assert(val < regcnt());
2614 printl(3, "%swrite %s%s (%04x)\n", levels[level+1], regname(val, 1),
2615 (dwords[0] & 0x8000) ? " (same register)" : "", val);
2616 dump_registers(val, dwords+1, count-1, level+2);
2617 if (!quiet(3))
2618 dump_hex(dwords, count, level+1);
2619 } else if (pkt_is_type4(dwords[0])) {
2620 /* basically the same(ish) as type0 prior to a5xx */
2621 printl(3, "t4");
2622 count = type4_pkt_size(dwords[0]) + 1;
2623 val = type4_pkt_offset(dwords[0]);
2624 assert(val < regcnt());
2625 printl(3, "%swrite %s (%04x)\n", levels[level+1], regname(val, 1), val);
2626 dump_registers(val, dwords+1, count-1, level+2);
2627 if (!quiet(3))
2628 dump_hex(dwords, count, level+1);
2629 #if 0
2630 } else if (pkt_is_type1(dwords[0])) {
2631 printl(3, "t1");
2632 count = 3;
2633 val = dwords[0] & 0xfff;
2634 printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
2635 dump_registers(val, dwords+1, 1, level+2);
2636 val = (dwords[0] >> 12) & 0xfff;
2637 printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
2638 dump_registers(val, dwords+2, 1, level+2);
2639 if (!quiet(3))
2640 dump_hex(dwords, count, level+1);
2641 } else if (pkt_is_type2(dwords[0])) {
2642 printl(3, "t2");
2643 printf("%sNOP\n", levels[level+1]);
2644 count = 1;
2645 if (!quiet(3))
2646 dump_hex(dwords, count, level+1);
2647 #endif
2648 } else if (pkt_is_type3(dwords[0])) {
2649 count = type3_pkt_size(dwords[0]) + 1;
2650 val = cp_type3_opcode(dwords[0]);
2651 const struct type3_op *op = get_type3_op(val);
2652 if (op->options.load_all_groups)
2653 load_all_groups(level+1);
2654 printl(3, "t3");
2655 const char *name = pktname(val);
2656 if (!quiet(2)) {
2657 printf("\t%sopcode: %s%s%s (%02x) (%d dwords)%s\n", levels[level],
2658 rnn->vc->colors->bctarg, name, rnn->vc->colors->reset,
2659 val, count, (dwords[0] & 0x1) ? " (predicated)" : "");
2660 }
2661 if (name)
2662 dump_domain(dwords+1, count-1, level+2, name);
2663 op->fxn(dwords+1, count-1, level+1);
2664 if (!quiet(2))
2665 dump_hex(dwords, count, level+1);
2666 } else if (pkt_is_type7(dwords[0])) {
2667 count = type7_pkt_size(dwords[0]) + 1;
2668 val = cp_type7_opcode(dwords[0]);
2669 const struct type3_op *op = get_type3_op(val);
2670 if (op->options.load_all_groups)
2671 load_all_groups(level+1);
2672 printl(3, "t7");
2673 const char *name = pktname(val);
2674 if (!quiet(2)) {
2675 printf("\t%sopcode: %s%s%s (%02x) (%d dwords)\n", levels[level],
2676 rnn->vc->colors->bctarg, name, rnn->vc->colors->reset,
2677 val, count);
2678 }
2679 if (name) {
2680 /* special hack for two packets that decode the same way
2681 * on a6xx:
2682 */
2683 if (!strcmp(name, "CP_LOAD_STATE6_FRAG") ||
2684 !strcmp(name, "CP_LOAD_STATE6_GEOM"))
2685 name = "CP_LOAD_STATE6";
2686 dump_domain(dwords+1, count-1, level+2, name);
2687 }
2688 op->fxn(dwords+1, count-1, level+1);
2689 if (!quiet(2))
2690 dump_hex(dwords, count, level+1);
2691 } else if (pkt_is_type2(dwords[0])) {
2692 printl(3, "t2");
2693 printl(3, "%snop\n", levels[level+1]);
2694 } else {
2695 /* for 5xx+ we can do a passable job of looking for start of next valid packet: */
2696 if (options->gpu_id >= 500) {
2697 while (dwords_left > 0) {
2698 if (pkt_is_type7(dwords[0]) || pkt_is_type4(dwords[0]))
2699 break;
2700 printf("bad type! %08x\n", dwords[0]);
2701 dwords++;
2702 dwords_left--;
2703 }
2704 } else {
2705 printf("bad type! %08x\n", dwords[0]);
2706 return;
2707 }
2708 }
2709
2710 dwords += count;
2711 dwords_left -= count;
2712
2713 }
2714
2715 if (dwords_left < 0)
2716 printf("**** this ain't right!! dwords_left=%d\n", dwords_left);
2717 }