freedreno: drop shader_t
[mesa.git] / src / freedreno / decode / cffdec.c
1 /*
2 * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <err.h>
27 #include <inttypes.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <stdint.h>
31 #include <stdarg.h>
32 #include <stdbool.h>
33 #include <unistd.h>
34 #include <sys/types.h>
35 #include <sys/stat.h>
36 #include <sys/wait.h>
37 #include <fcntl.h>
38 #include <string.h>
39 #include <assert.h>
40 #include <signal.h>
41 #include <errno.h>
42
43 #include "redump.h"
44 #include "disasm.h"
45 #include "script.h"
46 #include "rnnutil.h"
47 #include "buffers.h"
48 #include "cffdec.h"
49
50 /* ************************************************************************* */
51 /* originally based on kernel recovery dump code: */
52
53 static const struct cffdec_options *options;
54
55 static bool needs_wfi = false;
56 static bool summary = false;
57 static bool in_summary = false;
58 static int vertices;
59
60 static inline unsigned regcnt(void)
61 {
62 if (options->gpu_id >= 500)
63 return 0xffff;
64 else
65 return 0x7fff;
66 }
67
68 static int is_64b(void)
69 {
70 return options->gpu_id >= 500;
71 }
72
73
74 static int draws[3];
75 static struct {
76 uint64_t base;
77 uint32_t size; /* in dwords */
78 /* Generally cmdstream consists of multiple IB calls to different
79 * buffers, which are themselves often re-used for each tile. The
80 * triggered flag serves two purposes to help make it more clear
81 * what part of the cmdstream is before vs after the the GPU hang:
82 *
83 * 1) if in IB2 we are passed the point within the IB2 buffer where
84 * the GPU hung, but IB1 is not passed the point within its
85 * buffer where the GPU had hung, then we know the GPU hang
86 * happens on a future use of that IB2 buffer.
87 *
88 * 2) if in an IB1 or IB2 buffer that is not the one where the GPU
89 * hung, but we've already passed the trigger point at the same
90 * IB level, we know that we are passed the point where the GPU
91 * had hung.
92 *
93 * So this is a one way switch, false->true. And a higher #'d
94 * IB level isn't considered triggered unless the lower #'d IB
95 * level is.
96 */
97 bool triggered;
98 } ibs[4];
99 static int ib;
100
101 static int draw_count;
102 static int current_draw_count;
103
104 /* query mode.. to handle symbolic register name queries, we need to
105 * defer parsing query string until after gpu_id is know and rnn db
106 * loaded:
107 */
108 static int *queryvals;
109
110 static bool
111 quiet(int lvl)
112 {
113 if ((options->draw_filter != -1) && (options->draw_filter != current_draw_count))
114 return true;
115 if ((lvl >= 3) && (summary || options->querystrs || options->script))
116 return true;
117 if ((lvl >= 2) && (options->querystrs || options->script))
118 return true;
119 return false;
120 }
121
122 void
123 printl(int lvl, const char *fmt, ...)
124 {
125 va_list args;
126 if (quiet(lvl))
127 return;
128 va_start(args, fmt);
129 vprintf(fmt, args);
130 va_end(args);
131 }
132
133 static const char *levels[] = {
134 "\t",
135 "\t\t",
136 "\t\t\t",
137 "\t\t\t\t",
138 "\t\t\t\t\t",
139 "\t\t\t\t\t\t",
140 "\t\t\t\t\t\t\t",
141 "\t\t\t\t\t\t\t\t",
142 "\t\t\t\t\t\t\t\t\t",
143 "x",
144 "x",
145 "x",
146 "x",
147 "x",
148 "x",
149 };
150
151 enum state_src_t {
152 STATE_SRC_DIRECT,
153 STATE_SRC_INDIRECT,
154 STATE_SRC_BINDLESS,
155 };
156
157 /* SDS (CP_SET_DRAW_STATE) helpers: */
158 static void load_all_groups(int level);
159 static void disable_all_groups(void);
160
161 static void dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, int level);
162 static void dump_tex_const(uint32_t *texsamp, int num_unit, int level);
163
164 static bool
165 highlight_gpuaddr(uint64_t gpuaddr)
166 {
167 if (!options->color)
168 return false;
169
170 if (!options->ibs[ib].base)
171 return false;
172
173 if ((ib > 0) && options->ibs[ib-1].base && !ibs[ib-1].triggered)
174 return false;
175
176 if (ibs[ib].triggered)
177 return true;
178
179 if (options->ibs[ib].base != ibs[ib].base)
180 return false;
181
182 uint64_t start = ibs[ib].base + 4 * (ibs[ib].size - options->ibs[ib].rem);
183 uint64_t end = ibs[ib].base + 4 * ibs[ib].size;
184
185 bool triggered = (start <= gpuaddr) && (gpuaddr <= end);
186
187 ibs[ib].triggered |= triggered;
188
189 if (triggered)
190 printf("ESTIMATED CRASH LOCATION!\n");
191
192 return triggered;
193 }
194
195 static void
196 dump_hex(uint32_t *dwords, uint32_t sizedwords, int level)
197 {
198 int i, j;
199 int lastzero = 1;
200
201 if (quiet(2))
202 return;
203
204 for (i = 0; i < sizedwords; i += 8) {
205 int zero = 1;
206
207 /* always show first row: */
208 if (i == 0)
209 zero = 0;
210
211 for (j = 0; (j < 8) && (i+j < sizedwords) && zero; j++)
212 if (dwords[i+j])
213 zero = 0;
214
215 if (zero && !lastzero)
216 printf("*\n");
217
218 lastzero = zero;
219
220 if (zero)
221 continue;
222
223 uint64_t addr = gpuaddr(&dwords[i]);
224 bool highlight = highlight_gpuaddr(addr);
225
226 if (highlight)
227 printf("\x1b[0;1;31m");
228
229 if (is_64b()) {
230 printf("%016lx:%s", addr, levels[level]);
231 } else {
232 printf("%08x:%s", (uint32_t)addr, levels[level]);
233 }
234
235 if (highlight)
236 printf("\x1b[0m");
237
238 printf("%04x:", i * 4);
239
240 for (j = 0; (j < 8) && (i+j < sizedwords); j++) {
241 printf(" %08x", dwords[i+j]);
242 }
243
244 printf("\n");
245 }
246 }
247
248 static void
249 dump_float(float *dwords, uint32_t sizedwords, int level)
250 {
251 int i;
252 for (i = 0; i < sizedwords; i++) {
253 if ((i % 8) == 0) {
254 if (is_64b()) {
255 printf("%016lx:%s", gpuaddr(dwords), levels[level]);
256 } else {
257 printf("%08x:%s", (uint32_t)gpuaddr(dwords), levels[level]);
258 }
259 } else {
260 printf(" ");
261 }
262 printf("%8f", *(dwords++));
263 if ((i % 8) == 7)
264 printf("\n");
265 }
266 if (i % 8)
267 printf("\n");
268 }
269
270 /* I believe the surface format is low bits:
271 #define RB_COLOR_INFO__COLOR_FORMAT_MASK 0x0000000fL
272 comments in sys2gmem_tex_const indicate that address is [31:12], but
273 looks like at least some of the bits above the format have different meaning..
274 */
275 static void parse_dword_addr(uint32_t dword, uint32_t *gpuaddr,
276 uint32_t *flags, uint32_t mask)
277 {
278 assert(!is_64b()); /* this is only used on a2xx */
279 *gpuaddr = dword & ~mask;
280 *flags = dword & mask;
281 }
282
283 static uint32_t type0_reg_vals[0xffff + 1];
284 static uint8_t type0_reg_rewritten[sizeof(type0_reg_vals)/8]; /* written since last draw */
285 static uint8_t type0_reg_written[sizeof(type0_reg_vals)/8];
286 static uint32_t lastvals[ARRAY_SIZE(type0_reg_vals)];
287
288 static bool reg_rewritten(uint32_t regbase)
289 {
290 return !!(type0_reg_rewritten[regbase/8] & (1 << (regbase % 8)));
291 }
292
293 bool reg_written(uint32_t regbase)
294 {
295 return !!(type0_reg_written[regbase/8] & (1 << (regbase % 8)));
296 }
297
298 static void clear_rewritten(void)
299 {
300 memset(type0_reg_rewritten, 0, sizeof(type0_reg_rewritten));
301 }
302
303 static void clear_written(void)
304 {
305 memset(type0_reg_written, 0, sizeof(type0_reg_written));
306 clear_rewritten();
307 }
308
309 uint32_t reg_lastval(uint32_t regbase)
310 {
311 return lastvals[regbase];
312 }
313
314 static void
315 clear_lastvals(void)
316 {
317 memset(lastvals, 0, sizeof(lastvals));
318 }
319
320 uint32_t
321 reg_val(uint32_t regbase)
322 {
323 return type0_reg_vals[regbase];
324 }
325
326 void
327 reg_set(uint32_t regbase, uint32_t val)
328 {
329 assert(regbase < regcnt());
330 type0_reg_vals[regbase] = val;
331 type0_reg_written[regbase/8] |= (1 << (regbase % 8));
332 type0_reg_rewritten[regbase/8] |= (1 << (regbase % 8));
333 }
334
335 static void
336 reg_dump_scratch(const char *name, uint32_t dword, int level)
337 {
338 unsigned r;
339
340 if (quiet(3))
341 return;
342
343 r = regbase("CP_SCRATCH[0].REG");
344
345 // if not, try old a2xx/a3xx version:
346 if (!r)
347 r = regbase("CP_SCRATCH_REG0");
348
349 if (!r)
350 return;
351
352 printf("%s:%u,%u,%u,%u\n", levels[level],
353 reg_val(r + 4), reg_val(r + 5),
354 reg_val(r + 6), reg_val(r + 7));
355 }
356
357 static void
358 dump_gpuaddr_size(uint64_t gpuaddr, int level, int sizedwords, int quietlvl)
359 {
360 void *buf;
361
362 if (quiet(quietlvl))
363 return;
364
365 buf = hostptr(gpuaddr);
366 if (buf) {
367 dump_hex(buf, sizedwords, level+1);
368 }
369 }
370
371 static void
372 dump_gpuaddr(uint64_t gpuaddr, int level)
373 {
374 dump_gpuaddr_size(gpuaddr, level, 64, 3);
375 }
376
377 static void
378 reg_dump_gpuaddr(const char *name, uint32_t dword, int level)
379 {
380 dump_gpuaddr(dword, level);
381 }
382
383 uint32_t gpuaddr_lo;
384 static void
385 reg_gpuaddr_lo(const char *name, uint32_t dword, int level)
386 {
387 gpuaddr_lo = dword;
388 }
389
390 static void
391 reg_dump_gpuaddr_hi(const char *name, uint32_t dword, int level)
392 {
393 dump_gpuaddr(gpuaddr_lo | (((uint64_t)dword) << 32), level);
394 }
395
396
397 static void
398 dump_shader(const char *ext, void *buf, int bufsz)
399 {
400 if (options->dump_shaders) {
401 static int n = 0;
402 char filename[8];
403 int fd;
404 sprintf(filename, "%04d.%s", n++, ext);
405 fd = open(filename, O_WRONLY| O_TRUNC | O_CREAT, 0644);
406 write(fd, buf, bufsz);
407 close(fd);
408 }
409 }
410
411 static void
412 disasm_gpuaddr(const char *name, uint64_t gpuaddr, int level)
413 {
414 void *buf;
415
416 gpuaddr &= 0xfffffffffffffff0;
417
418 if (quiet(3))
419 return;
420
421 buf = hostptr(gpuaddr);
422 if (buf) {
423 uint32_t sizedwords = hostlen(gpuaddr) / 4;
424 const char *ext;
425
426 dump_hex(buf, min(64, sizedwords), level+1);
427 disasm_a3xx(buf, sizedwords, level+2, stdout, options->gpu_id);
428
429 /* this is a bit ugly way, but oh well.. */
430 if (strstr(name, "SP_VS_OBJ")) {
431 ext = "vo3";
432 } else if (strstr(name, "SP_FS_OBJ")) {
433 ext = "fo3";
434 } else if (strstr(name, "SP_GS_OBJ")) {
435 ext = "go3";
436 } else if (strstr(name, "SP_CS_OBJ")) {
437 ext = "co3";
438 } else {
439 ext = NULL;
440 }
441
442 if (ext)
443 dump_shader(ext, buf, sizedwords * 4);
444 }
445 }
446
447 static void
448 reg_disasm_gpuaddr(const char *name, uint32_t dword, int level)
449 {
450 disasm_gpuaddr(name, dword, level);
451 }
452
453 static void
454 reg_disasm_gpuaddr_hi(const char *name, uint32_t dword, int level)
455 {
456 disasm_gpuaddr(name, gpuaddr_lo | (((uint64_t)dword) << 32), level);
457 }
458
459 /* Find the value of the TEX_COUNT register that corresponds to the named
460 * TEX_SAMP/TEX_CONST reg.
461 *
462 * Note, this kinda assumes an equal # of samplers and textures, but not
463 * really sure if there is a much better option. I suppose on a6xx we
464 * could instead decode the bitfields in SP_xS_CONFIG
465 */
466 static int
467 get_tex_count(const char *name)
468 {
469 char count_reg[strlen(name) + 5];
470 char *p;
471
472 p = strstr(name, "CONST");
473 if (!p)
474 p = strstr(name, "SAMP");
475 if (!p)
476 return 0;
477
478 int n = p - name;
479 strncpy(count_reg, name, n);
480 strcpy(count_reg + n, "COUNT");
481
482 return reg_val(regbase(count_reg));
483 }
484
485 static void
486 reg_dump_tex_samp_hi(const char *name, uint32_t dword, int level)
487 {
488 if (!in_summary)
489 return;
490
491 int num_unit = get_tex_count(name);
492 uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
493 void *buf = hostptr(gpuaddr);
494
495 if (!buf)
496 return;
497
498 dump_tex_samp(buf, STATE_SRC_DIRECT, num_unit, level+1);
499 }
500
501 static void
502 reg_dump_tex_const_hi(const char *name, uint32_t dword, int level)
503 {
504 if (!in_summary)
505 return;
506
507 int num_unit = get_tex_count(name);
508 uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
509 void *buf = hostptr(gpuaddr);
510
511 if (!buf)
512 return;
513
514 dump_tex_const(buf, num_unit, level+1);
515 }
516
517 /*
518 * Registers with special handling (rnndec_decode() handles rest):
519 */
520 #define REG(x, fxn) { #x, fxn }
521 static struct {
522 const char *regname;
523 void (*fxn)(const char *name, uint32_t dword, int level);
524 uint32_t regbase;
525 } reg_a2xx[] = {
526 REG(CP_SCRATCH_REG0, reg_dump_scratch),
527 REG(CP_SCRATCH_REG1, reg_dump_scratch),
528 REG(CP_SCRATCH_REG2, reg_dump_scratch),
529 REG(CP_SCRATCH_REG3, reg_dump_scratch),
530 REG(CP_SCRATCH_REG4, reg_dump_scratch),
531 REG(CP_SCRATCH_REG5, reg_dump_scratch),
532 REG(CP_SCRATCH_REG6, reg_dump_scratch),
533 REG(CP_SCRATCH_REG7, reg_dump_scratch),
534 {NULL},
535 }, reg_a3xx[] = {
536 REG(CP_SCRATCH_REG0, reg_dump_scratch),
537 REG(CP_SCRATCH_REG1, reg_dump_scratch),
538 REG(CP_SCRATCH_REG2, reg_dump_scratch),
539 REG(CP_SCRATCH_REG3, reg_dump_scratch),
540 REG(CP_SCRATCH_REG4, reg_dump_scratch),
541 REG(CP_SCRATCH_REG5, reg_dump_scratch),
542 REG(CP_SCRATCH_REG6, reg_dump_scratch),
543 REG(CP_SCRATCH_REG7, reg_dump_scratch),
544 REG(VSC_SIZE_ADDRESS, reg_dump_gpuaddr),
545 REG(SP_VS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
546 REG(SP_FS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
547 REG(SP_VS_OBJ_START_REG, reg_disasm_gpuaddr),
548 REG(SP_FS_OBJ_START_REG, reg_disasm_gpuaddr),
549 REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
550 {NULL},
551 }, reg_a4xx[] = {
552 REG(CP_SCRATCH[0].REG, reg_dump_scratch),
553 REG(CP_SCRATCH[0x1].REG, reg_dump_scratch),
554 REG(CP_SCRATCH[0x2].REG, reg_dump_scratch),
555 REG(CP_SCRATCH[0x3].REG, reg_dump_scratch),
556 REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
557 REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
558 REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
559 REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
560 REG(SP_VS_PVT_MEM_ADDR, reg_dump_gpuaddr),
561 REG(SP_FS_PVT_MEM_ADDR, reg_dump_gpuaddr),
562 REG(SP_GS_PVT_MEM_ADDR, reg_dump_gpuaddr),
563 REG(SP_HS_PVT_MEM_ADDR, reg_dump_gpuaddr),
564 REG(SP_DS_PVT_MEM_ADDR, reg_dump_gpuaddr),
565 REG(SP_CS_PVT_MEM_ADDR, reg_dump_gpuaddr),
566 REG(SP_VS_OBJ_START, reg_disasm_gpuaddr),
567 REG(SP_FS_OBJ_START, reg_disasm_gpuaddr),
568 REG(SP_GS_OBJ_START, reg_disasm_gpuaddr),
569 REG(SP_HS_OBJ_START, reg_disasm_gpuaddr),
570 REG(SP_DS_OBJ_START, reg_disasm_gpuaddr),
571 REG(SP_CS_OBJ_START, reg_disasm_gpuaddr),
572 REG(TPL1_TP_VS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
573 REG(TPL1_TP_HS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
574 REG(TPL1_TP_DS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
575 REG(TPL1_TP_GS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
576 REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
577 {NULL},
578 }, reg_a5xx[] = {
579 REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
580 REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
581 REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
582 REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
583 REG(SP_VS_OBJ_START_LO, reg_gpuaddr_lo),
584 REG(SP_VS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
585 REG(SP_HS_OBJ_START_LO, reg_gpuaddr_lo),
586 REG(SP_HS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
587 REG(SP_DS_OBJ_START_LO, reg_gpuaddr_lo),
588 REG(SP_DS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
589 REG(SP_GS_OBJ_START_LO, reg_gpuaddr_lo),
590 REG(SP_GS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
591 REG(SP_FS_OBJ_START_LO, reg_gpuaddr_lo),
592 REG(SP_FS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
593 REG(SP_CS_OBJ_START_LO, reg_gpuaddr_lo),
594 REG(SP_CS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
595 REG(TPL1_VS_TEX_CONST_LO, reg_gpuaddr_lo),
596 REG(TPL1_VS_TEX_CONST_HI, reg_dump_tex_const_hi),
597 REG(TPL1_VS_TEX_SAMP_LO, reg_gpuaddr_lo),
598 REG(TPL1_VS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
599 REG(TPL1_HS_TEX_CONST_LO, reg_gpuaddr_lo),
600 REG(TPL1_HS_TEX_CONST_HI, reg_dump_tex_const_hi),
601 REG(TPL1_HS_TEX_SAMP_LO, reg_gpuaddr_lo),
602 REG(TPL1_HS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
603 REG(TPL1_DS_TEX_CONST_LO, reg_gpuaddr_lo),
604 REG(TPL1_DS_TEX_CONST_HI, reg_dump_tex_const_hi),
605 REG(TPL1_DS_TEX_SAMP_LO, reg_gpuaddr_lo),
606 REG(TPL1_DS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
607 REG(TPL1_GS_TEX_CONST_LO, reg_gpuaddr_lo),
608 REG(TPL1_GS_TEX_CONST_HI, reg_dump_tex_const_hi),
609 REG(TPL1_GS_TEX_SAMP_LO, reg_gpuaddr_lo),
610 REG(TPL1_GS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
611 REG(TPL1_FS_TEX_CONST_LO, reg_gpuaddr_lo),
612 REG(TPL1_FS_TEX_CONST_HI, reg_dump_tex_const_hi),
613 REG(TPL1_FS_TEX_SAMP_LO, reg_gpuaddr_lo),
614 REG(TPL1_FS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
615 REG(TPL1_CS_TEX_CONST_LO, reg_gpuaddr_lo),
616 REG(TPL1_CS_TEX_CONST_HI, reg_dump_tex_const_hi),
617 REG(TPL1_CS_TEX_SAMP_LO, reg_gpuaddr_lo),
618 REG(TPL1_CS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
619 REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_LO, reg_gpuaddr_lo),
620 REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_HI, reg_dump_gpuaddr_hi),
621 // REG(RB_MRT_FLAG_BUFFER[0].ADDR_LO, reg_gpuaddr_lo),
622 // REG(RB_MRT_FLAG_BUFFER[0].ADDR_HI, reg_dump_gpuaddr_hi),
623 // REG(RB_MRT_FLAG_BUFFER[1].ADDR_LO, reg_gpuaddr_lo),
624 // REG(RB_MRT_FLAG_BUFFER[1].ADDR_HI, reg_dump_gpuaddr_hi),
625 // REG(RB_MRT_FLAG_BUFFER[2].ADDR_LO, reg_gpuaddr_lo),
626 // REG(RB_MRT_FLAG_BUFFER[2].ADDR_HI, reg_dump_gpuaddr_hi),
627 // REG(RB_MRT_FLAG_BUFFER[3].ADDR_LO, reg_gpuaddr_lo),
628 // REG(RB_MRT_FLAG_BUFFER[3].ADDR_HI, reg_dump_gpuaddr_hi),
629 // REG(RB_MRT_FLAG_BUFFER[4].ADDR_LO, reg_gpuaddr_lo),
630 // REG(RB_MRT_FLAG_BUFFER[4].ADDR_HI, reg_dump_gpuaddr_hi),
631 // REG(RB_MRT_FLAG_BUFFER[5].ADDR_LO, reg_gpuaddr_lo),
632 // REG(RB_MRT_FLAG_BUFFER[5].ADDR_HI, reg_dump_gpuaddr_hi),
633 // REG(RB_MRT_FLAG_BUFFER[6].ADDR_LO, reg_gpuaddr_lo),
634 // REG(RB_MRT_FLAG_BUFFER[6].ADDR_HI, reg_dump_gpuaddr_hi),
635 // REG(RB_MRT_FLAG_BUFFER[7].ADDR_LO, reg_gpuaddr_lo),
636 // REG(RB_MRT_FLAG_BUFFER[7].ADDR_HI, reg_dump_gpuaddr_hi),
637 // REG(RB_BLIT_FLAG_DST_LO, reg_gpuaddr_lo),
638 // REG(RB_BLIT_FLAG_DST_HI, reg_dump_gpuaddr_hi),
639 // REG(RB_MRT[0].BASE_LO, reg_gpuaddr_lo),
640 // REG(RB_MRT[0].BASE_HI, reg_dump_gpuaddr_hi),
641 // REG(RB_DEPTH_BUFFER_BASE_LO, reg_gpuaddr_lo),
642 // REG(RB_DEPTH_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
643 // REG(RB_DEPTH_FLAG_BUFFER_BASE_LO, reg_gpuaddr_lo),
644 // REG(RB_DEPTH_FLAG_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
645 // REG(RB_BLIT_DST_LO, reg_gpuaddr_lo),
646 // REG(RB_BLIT_DST_HI, reg_dump_gpuaddr_hi),
647
648 // REG(RB_2D_SRC_LO, reg_gpuaddr_lo),
649 // REG(RB_2D_SRC_HI, reg_dump_gpuaddr_hi),
650 // REG(RB_2D_SRC_FLAGS_LO, reg_gpuaddr_lo),
651 // REG(RB_2D_SRC_FLAGS_HI, reg_dump_gpuaddr_hi),
652 // REG(RB_2D_DST_LO, reg_gpuaddr_lo),
653 // REG(RB_2D_DST_HI, reg_dump_gpuaddr_hi),
654 // REG(RB_2D_DST_FLAGS_LO, reg_gpuaddr_lo),
655 // REG(RB_2D_DST_FLAGS_HI, reg_dump_gpuaddr_hi),
656
657 {NULL},
658 }, reg_a6xx[] = {
659 REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
660 REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
661 REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
662 REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
663
664 REG(SP_VS_OBJ_START_LO, reg_gpuaddr_lo),
665 REG(SP_VS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
666 REG(SP_HS_OBJ_START_LO, reg_gpuaddr_lo),
667 REG(SP_HS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
668 REG(SP_DS_OBJ_START_LO, reg_gpuaddr_lo),
669 REG(SP_DS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
670 REG(SP_GS_OBJ_START_LO, reg_gpuaddr_lo),
671 REG(SP_GS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
672 REG(SP_FS_OBJ_START_LO, reg_gpuaddr_lo),
673 REG(SP_FS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
674 REG(SP_CS_OBJ_START_LO, reg_gpuaddr_lo),
675 REG(SP_CS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
676
677 REG(SP_VS_TEX_CONST_LO, reg_gpuaddr_lo),
678 REG(SP_VS_TEX_CONST_HI, reg_dump_tex_const_hi),
679 REG(SP_VS_TEX_SAMP_LO, reg_gpuaddr_lo),
680 REG(SP_VS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
681 REG(SP_HS_TEX_CONST_LO, reg_gpuaddr_lo),
682 REG(SP_HS_TEX_CONST_HI, reg_dump_tex_const_hi),
683 REG(SP_HS_TEX_SAMP_LO, reg_gpuaddr_lo),
684 REG(SP_HS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
685 REG(SP_DS_TEX_CONST_LO, reg_gpuaddr_lo),
686 REG(SP_DS_TEX_CONST_HI, reg_dump_tex_const_hi),
687 REG(SP_DS_TEX_SAMP_LO, reg_gpuaddr_lo),
688 REG(SP_DS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
689 REG(SP_GS_TEX_CONST_LO, reg_gpuaddr_lo),
690 REG(SP_GS_TEX_CONST_HI, reg_dump_tex_const_hi),
691 REG(SP_GS_TEX_SAMP_LO, reg_gpuaddr_lo),
692 REG(SP_GS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
693 REG(SP_FS_TEX_CONST_LO, reg_gpuaddr_lo),
694 REG(SP_FS_TEX_CONST_HI, reg_dump_tex_const_hi),
695 REG(SP_FS_TEX_SAMP_LO, reg_gpuaddr_lo),
696 REG(SP_FS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
697 REG(SP_CS_TEX_CONST_LO, reg_gpuaddr_lo),
698 REG(SP_CS_TEX_CONST_HI, reg_dump_tex_const_hi),
699 REG(SP_CS_TEX_SAMP_LO, reg_gpuaddr_lo),
700 REG(SP_CS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
701
702 {NULL},
703 }, *type0_reg;
704
705 static struct rnn *rnn;
706
707 static void
708 init_rnn(const char *gpuname)
709 {
710 rnn = rnn_new(!options->color);
711
712 rnn_load(rnn, gpuname);
713
714 if (options->querystrs) {
715 int i;
716 queryvals = calloc(options->nquery, sizeof(queryvals[0]));
717
718 for (i = 0; i < options->nquery; i++) {
719 int val = strtol(options->querystrs[i], NULL, 0);
720
721 if (val == 0)
722 val = regbase(options->querystrs[i]);
723
724 queryvals[i] = val;
725 printf("querystr: %s -> 0x%x\n", options->querystrs[i], queryvals[i]);
726 }
727 }
728
729 for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
730 type0_reg[idx].regbase = regbase(type0_reg[idx].regname);
731 if (!type0_reg[idx].regbase) {
732 printf("invalid register name: %s\n", type0_reg[idx].regname);
733 exit(1);
734 }
735 }
736 }
737
738 void
739 reset_regs(void)
740 {
741 clear_written();
742 clear_lastvals();
743 memset(&ibs, 0, sizeof(ibs));
744 }
745
746 void
747 cffdec_init(const struct cffdec_options *_options)
748 {
749 options = _options;
750 summary = options->summary;
751
752 /* in case we're decoding multiple files: */
753 free(queryvals);
754 reset_regs();
755 draw_count = 0;
756
757 /* TODO we need an API to free/cleanup any previous rnn */
758
759 switch (options->gpu_id) {
760 case 200 ... 299:
761 type0_reg = reg_a2xx;
762 init_rnn("a2xx");
763 break;
764 case 300 ... 399:
765 type0_reg = reg_a3xx;
766 init_rnn("a3xx");
767 break;
768 case 400 ... 499:
769 type0_reg = reg_a4xx;
770 init_rnn("a4xx");
771 break;
772 case 500 ... 599:
773 type0_reg = reg_a5xx;
774 init_rnn("a5xx");
775 break;
776 case 600 ... 699:
777 type0_reg = reg_a6xx;
778 init_rnn("a6xx");
779 break;
780 default:
781 errx(-1, "unsupported gpu");
782 }
783 }
784
785 const char *
786 pktname(unsigned opc)
787 {
788 return rnn_enumname(rnn, "adreno_pm4_type3_packets", opc);
789 }
790
791 const char *
792 regname(uint32_t regbase, int color)
793 {
794 return rnn_regname(rnn, regbase, color);
795 }
796
797 uint32_t
798 regbase(const char *name)
799 {
800 return rnn_regbase(rnn, name);
801 }
802
803 static int
804 endswith(uint32_t regbase, const char *suffix)
805 {
806 const char *name = regname(regbase, 0);
807 const char *s = strstr(name, suffix);
808 if (!s)
809 return 0;
810 return (s - strlen(name) + strlen(suffix)) == name;
811 }
812
813 void
814 dump_register_val(uint32_t regbase, uint32_t dword, int level)
815 {
816 struct rnndecaddrinfo *info = rnn_reginfo(rnn, regbase);
817
818 if (info && info->typeinfo) {
819 uint64_t gpuaddr = 0;
820 char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, dword);
821 printf("%s%s: %s", levels[level], info->name, decoded);
822
823 /* Try and figure out if we are looking at a gpuaddr.. this
824 * might be useful for other gen's too, but at least a5xx has
825 * the _HI/_LO suffix we can look for. Maybe a better approach
826 * would be some special annotation in the xml..
827 */
828 if (options->gpu_id >= 500) {
829 if (endswith(regbase, "_HI") && endswith(regbase-1, "_LO")) {
830 gpuaddr = (((uint64_t)dword) << 32) | reg_val(regbase-1);
831 } else if (endswith(regbase, "_LO") && endswith(regbase+1, "_HI")) {
832 gpuaddr = (((uint64_t)reg_val(regbase+1)) << 32) | dword;
833 }
834 }
835
836 if (gpuaddr && hostptr(gpuaddr)) {
837 printf("\t\tbase=%lx, offset=%lu, size=%u",
838 gpubaseaddr(gpuaddr),
839 gpuaddr - gpubaseaddr(gpuaddr),
840 hostlen(gpubaseaddr(gpuaddr)));
841 }
842
843 printf("\n");
844
845 free(decoded);
846 } else if (info) {
847 printf("%s%s: %08x\n", levels[level], info->name, dword);
848 } else {
849 printf("%s<%04x>: %08x\n", levels[level], regbase, dword);
850 }
851
852 if (info) {
853 free(info->name);
854 free(info);
855 }
856 }
857
858 static void
859 dump_register(uint32_t regbase, uint32_t dword, int level)
860 {
861 if (!quiet(3)) {
862 dump_register_val(regbase, dword, level);
863 }
864
865 for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
866 if (type0_reg[idx].regbase == regbase) {
867 type0_reg[idx].fxn(type0_reg[idx].regname, dword, level);
868 break;
869 }
870 }
871 }
872
873 static bool
874 is_banked_reg(uint32_t regbase)
875 {
876 return (0x2000 <= regbase) && (regbase < 0x2400);
877 }
878
879 static void
880 dump_registers(uint32_t regbase, uint32_t *dwords, uint32_t sizedwords, int level)
881 {
882 while (sizedwords--) {
883 int last_summary = summary;
884
885 /* access to non-banked registers needs a WFI:
886 * TODO banked register range for a2xx??
887 */
888 if (needs_wfi && !is_banked_reg(regbase))
889 printl(2, "NEEDS WFI: %s (%x)\n", regname(regbase, 1), regbase);
890
891 reg_set(regbase, *dwords);
892 dump_register(regbase, *dwords, level);
893 regbase++;
894 dwords++;
895 summary = last_summary;
896 }
897 }
898
899 static void
900 dump_domain(uint32_t *dwords, uint32_t sizedwords, int level,
901 const char *name)
902 {
903 struct rnndomain *dom;
904 int i;
905
906 dom = rnn_finddomain(rnn->db, name);
907
908 if (!dom)
909 return;
910
911 if (script_packet)
912 script_packet(dwords, sizedwords, rnn, dom);
913
914 if (quiet(2))
915 return;
916
917 for (i = 0; i < sizedwords; i++) {
918 struct rnndecaddrinfo *info = rnndec_decodeaddr(rnn->vc, dom, i, 0);
919 char *decoded;
920 if (!(info && info->typeinfo))
921 break;
922 uint64_t value = dwords[i];
923 if (info->typeinfo->high >= 32 && i < sizedwords - 1) {
924 value |= (uint64_t) dwords[i + 1] << 32;
925 i++; /* skip the next dword since we're printing it now */
926 }
927 decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value);
928 /* Unlike the register printing path, we don't print the name
929 * of the register, so if it doesn't contain other named
930 * things (i.e. it isn't a bitset) then print the register
931 * name as if it's a bitset with a single entry. This avoids
932 * having to create a dummy register with a single entry to
933 * get a name in the decoding.
934 */
935 if (info->typeinfo->type == RNN_TTYPE_BITSET ||
936 info->typeinfo->type == RNN_TTYPE_INLINE_BITSET) {
937 printf("%s%s\n", levels[level], decoded);
938 } else {
939 printf("%s{ %s%s%s = %s }\n", levels[level],
940 rnn->vc->colors->rname, info->name,
941 rnn->vc->colors->reset, decoded);
942 }
943 free(decoded);
944 free(info->name);
945 free(info);
946 }
947 }
948
949
950 static uint32_t bin_x1, bin_x2, bin_y1, bin_y2;
951 static unsigned mode;
952 static const char *render_mode;
953 static enum {
954 MODE_BINNING = 0x1,
955 MODE_GMEM = 0x2,
956 MODE_BYPASS = 0x4,
957 MODE_ALL = MODE_BINNING | MODE_GMEM | MODE_BYPASS,
958 } enable_mask = MODE_ALL;
959 static bool skip_ib2_enable_global;
960 static bool skip_ib2_enable_local;
961
962 static void
963 print_mode(int level)
964 {
965 if ((options->gpu_id >= 500) && !quiet(2)) {
966 printf("%smode: %s\n", levels[level], render_mode);
967 printf("%sskip_ib2: g=%d, l=%d\n", levels[level], skip_ib2_enable_global, skip_ib2_enable_local);
968 }
969 }
970
971 static bool
972 skip_query(void)
973 {
974 switch (options->query_mode) {
975 case QUERY_ALL:
976 /* never skip: */
977 return false;
978 case QUERY_WRITTEN:
979 for (int i = 0; i < options->nquery; i++) {
980 uint32_t regbase = queryvals[i];
981 if (!reg_written(regbase)) {
982 continue;
983 }
984 if (reg_rewritten(regbase)) {
985 return false;
986 }
987 }
988 return true;
989 case QUERY_DELTA:
990 for (int i = 0; i < options->nquery; i++) {
991 uint32_t regbase = queryvals[i];
992 if (!reg_written(regbase)) {
993 continue;
994 }
995 uint32_t lastval = reg_val(regbase);
996 if (lastval != lastvals[regbase]) {
997 return false;
998 }
999 }
1000 return true;
1001 }
1002 return true;
1003 }
1004
1005 static void
1006 __do_query(const char *primtype, uint32_t num_indices)
1007 {
1008 int n = 0;
1009
1010 if ((500 <= options->gpu_id) && (options->gpu_id < 700)) {
1011 uint32_t scissor_tl = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_TL"));
1012 uint32_t scissor_br = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_BR"));
1013
1014 bin_x1 = scissor_tl & 0xffff;
1015 bin_y1 = scissor_tl >> 16;
1016 bin_x2 = scissor_br & 0xffff;
1017 bin_y2 = scissor_br >> 16;
1018 }
1019
1020 for (int i = 0; i < options->nquery; i++) {
1021 uint32_t regbase = queryvals[i];
1022 if (reg_written(regbase)) {
1023 uint32_t lastval = reg_val(regbase);
1024 printf("%4d: %s(%u,%u-%u,%u):%u:", draw_count, primtype,
1025 bin_x1, bin_y1, bin_x2, bin_y2, num_indices);
1026 if (options->gpu_id >= 500)
1027 printf("%s:", render_mode);
1028 printf("\t%08x", lastval);
1029 if (lastval != lastvals[regbase]) {
1030 printf("!");
1031 } else {
1032 printf(" ");
1033 }
1034 if (reg_rewritten(regbase)) {
1035 printf("+");
1036 } else {
1037 printf(" ");
1038 }
1039 dump_register_val(regbase, lastval, 0);
1040 n++;
1041 }
1042 }
1043
1044 if (n > 1)
1045 printf("\n");
1046 }
1047
1048 static void
1049 do_query_compare(const char *primtype, uint32_t num_indices)
1050 {
1051 unsigned saved_enable_mask = enable_mask;
1052 const char *saved_render_mode = render_mode;
1053
1054 /* in 'query-compare' mode, we want to see if the register is writtten
1055 * or changed in any mode:
1056 *
1057 * (NOTE: this could cause false-positive for 'query-delta' if the reg
1058 * is written with different values in binning vs sysmem/gmem mode, as
1059 * we don't track previous values per-mode, but I think we can live with
1060 * that)
1061 */
1062 enable_mask = MODE_ALL;
1063
1064 clear_rewritten();
1065 load_all_groups(0);
1066
1067 if (!skip_query()) {
1068 /* dump binning pass values: */
1069 enable_mask = MODE_BINNING;
1070 render_mode = "BINNING";
1071 clear_rewritten();
1072 load_all_groups(0);
1073 __do_query(primtype, num_indices);
1074
1075 /* dump draw pass values: */
1076 enable_mask = MODE_GMEM | MODE_BYPASS;
1077 render_mode = "DRAW";
1078 clear_rewritten();
1079 load_all_groups(0);
1080 __do_query(primtype, num_indices);
1081
1082 printf("\n");
1083 }
1084
1085 enable_mask = saved_enable_mask;
1086 render_mode = saved_render_mode;
1087
1088 disable_all_groups();
1089 }
1090
1091 /* well, actually query and script..
1092 * NOTE: call this before dump_register_summary()
1093 */
1094 static void
1095 do_query(const char *primtype, uint32_t num_indices)
1096 {
1097 if (script_draw)
1098 script_draw(primtype, num_indices);
1099
1100 if (options->query_compare) {
1101 do_query_compare(primtype, num_indices);
1102 return;
1103 }
1104
1105 if (skip_query())
1106 return;
1107
1108 __do_query(primtype, num_indices);
1109 }
1110
1111 static void
1112 cp_im_loadi(uint32_t *dwords, uint32_t sizedwords, int level)
1113 {
1114 uint32_t start = dwords[1] >> 16;
1115 uint32_t size = dwords[1] & 0xffff;
1116 const char *type = NULL, *ext = NULL;
1117 gl_shader_stage disasm_type;
1118
1119 switch (dwords[0]) {
1120 case 0:
1121 type = "vertex";
1122 ext = "vo";
1123 disasm_type = MESA_SHADER_VERTEX;
1124 break;
1125 case 1:
1126 type = "fragment";
1127 ext = "fo";
1128 disasm_type = MESA_SHADER_FRAGMENT;
1129 break;
1130 default:
1131 type = "<unknown>";
1132 disasm_type = 0;
1133 break;
1134 }
1135
1136 printf("%s%s shader, start=%04x, size=%04x\n", levels[level], type, start, size);
1137 disasm_a2xx(dwords + 2, sizedwords - 2, level+2, disasm_type);
1138
1139 /* dump raw shader: */
1140 if (ext)
1141 dump_shader(ext, dwords + 2, (sizedwords - 2) * 4);
1142 }
1143
1144 static void
1145 cp_wide_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
1146 {
1147 uint32_t reg = dwords[0] & 0xffff;
1148 int i;
1149 for (i = 1; i < sizedwords; i++) {
1150 dump_register(reg, dwords[i], level+1);
1151 reg_set(reg, dwords[i]);
1152 reg++;
1153 }
1154 }
1155
1156 enum state_t {
1157 TEX_SAMP = 1,
1158 TEX_CONST,
1159 TEX_MIPADDR, /* a3xx only */
1160 SHADER_PROG,
1161 SHADER_CONST,
1162
1163 // image/ssbo state:
1164 SSBO_0,
1165 SSBO_1,
1166 SSBO_2,
1167
1168 UBO,
1169
1170 // unknown things, just to hexdumps:
1171 UNKNOWN_DWORDS,
1172 UNKNOWN_2DWORDS,
1173 UNKNOWN_4DWORDS,
1174 };
1175
1176 enum adreno_state_block {
1177 SB_VERT_TEX = 0,
1178 SB_VERT_MIPADDR = 1,
1179 SB_FRAG_TEX = 2,
1180 SB_FRAG_MIPADDR = 3,
1181 SB_VERT_SHADER = 4,
1182 SB_GEOM_SHADER = 5,
1183 SB_FRAG_SHADER = 6,
1184 SB_COMPUTE_SHADER = 7,
1185 };
1186
1187 /* TODO there is probably a clever way to let rnndec parse things so
1188 * we don't have to care about packet format differences across gens
1189 */
1190
1191 static void
1192 a3xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage, enum state_t *state,
1193 enum state_src_t *src)
1194 {
1195 unsigned state_block_id = (dwords[0] >> 19) & 0x7;
1196 unsigned state_type = dwords[1] & 0x3;
1197 static const struct {
1198 gl_shader_stage stage;
1199 enum state_t state;
1200 } lookup[0xf][0x3] = {
1201 [SB_VERT_TEX][0] = { MESA_SHADER_VERTEX, TEX_SAMP },
1202 [SB_VERT_TEX][1] = { MESA_SHADER_VERTEX, TEX_CONST },
1203 [SB_FRAG_TEX][0] = { MESA_SHADER_FRAGMENT, TEX_SAMP },
1204 [SB_FRAG_TEX][1] = { MESA_SHADER_FRAGMENT, TEX_CONST },
1205 [SB_VERT_SHADER][0] = { MESA_SHADER_VERTEX, SHADER_PROG },
1206 [SB_VERT_SHADER][1] = { MESA_SHADER_VERTEX, SHADER_CONST },
1207 [SB_FRAG_SHADER][0] = { MESA_SHADER_FRAGMENT, SHADER_PROG },
1208 [SB_FRAG_SHADER][1] = { MESA_SHADER_FRAGMENT, SHADER_CONST },
1209 };
1210
1211 *stage = lookup[state_block_id][state_type].stage;
1212 *state = lookup[state_block_id][state_type].state;
1213 unsigned state_src = (dwords[0] >> 16) & 0x7;
1214 if (state_src == 0 /* SS_DIRECT */)
1215 *src = STATE_SRC_DIRECT;
1216 else
1217 *src = STATE_SRC_INDIRECT;
1218 }
1219
1220 static enum state_src_t
1221 _get_state_src(unsigned dword0)
1222 {
1223 switch ((dword0 >> 16) & 0x3) {
1224 case 0: /* SS4_DIRECT / SS6_DIRECT */
1225 return STATE_SRC_DIRECT;
1226 case 2: /* SS4_INDIRECT / SS6_INDIRECT */
1227 return STATE_SRC_INDIRECT;
1228 case 1: /* SS6_BINDLESS */
1229 return STATE_SRC_BINDLESS;
1230 default:
1231 return STATE_SRC_DIRECT;
1232 }
1233 }
1234
1235 static void
1236 _get_state_type(unsigned state_block_id, unsigned state_type,
1237 gl_shader_stage *stage, enum state_t *state)
1238 {
1239 static const struct {
1240 gl_shader_stage stage;
1241 enum state_t state;
1242 } lookup[0x10][0x4] = {
1243 // SB4_VS_TEX:
1244 [0x0][0] = { MESA_SHADER_VERTEX, TEX_SAMP },
1245 [0x0][1] = { MESA_SHADER_VERTEX, TEX_CONST },
1246 [0x0][2] = { MESA_SHADER_VERTEX, UBO },
1247 // SB4_HS_TEX:
1248 [0x1][0] = { MESA_SHADER_TESS_CTRL, TEX_SAMP },
1249 [0x1][1] = { MESA_SHADER_TESS_CTRL, TEX_CONST },
1250 [0x1][2] = { MESA_SHADER_TESS_CTRL, UBO },
1251 // SB4_DS_TEX:
1252 [0x2][0] = { MESA_SHADER_TESS_EVAL, TEX_SAMP },
1253 [0x2][1] = { MESA_SHADER_TESS_EVAL, TEX_CONST },
1254 [0x2][2] = { MESA_SHADER_TESS_EVAL, UBO },
1255 // SB4_GS_TEX:
1256 [0x3][0] = { MESA_SHADER_GEOMETRY, TEX_SAMP },
1257 [0x3][1] = { MESA_SHADER_GEOMETRY, TEX_CONST },
1258 [0x3][2] = { MESA_SHADER_GEOMETRY, UBO },
1259 // SB4_FS_TEX:
1260 [0x4][0] = { MESA_SHADER_FRAGMENT, TEX_SAMP },
1261 [0x4][1] = { MESA_SHADER_FRAGMENT, TEX_CONST },
1262 [0x4][2] = { MESA_SHADER_FRAGMENT, UBO },
1263 // SB4_CS_TEX:
1264 [0x5][0] = { MESA_SHADER_COMPUTE, TEX_SAMP },
1265 [0x5][1] = { MESA_SHADER_COMPUTE, TEX_CONST },
1266 [0x5][2] = { MESA_SHADER_COMPUTE, UBO },
1267 // SB4_VS_SHADER:
1268 [0x8][0] = { MESA_SHADER_VERTEX, SHADER_PROG },
1269 [0x8][1] = { MESA_SHADER_VERTEX, SHADER_CONST },
1270 [0x8][2] = { MESA_SHADER_VERTEX, UBO },
1271 // SB4_HS_SHADER
1272 [0x9][0] = { MESA_SHADER_TESS_CTRL, SHADER_PROG },
1273 [0x9][1] = { MESA_SHADER_TESS_CTRL, SHADER_CONST },
1274 [0x9][2] = { MESA_SHADER_TESS_CTRL, UBO },
1275 // SB4_DS_SHADER
1276 [0xa][0] = { MESA_SHADER_TESS_EVAL, SHADER_PROG },
1277 [0xa][1] = { MESA_SHADER_TESS_EVAL, SHADER_CONST },
1278 [0xa][2] = { MESA_SHADER_TESS_EVAL, UBO },
1279 // SB4_GS_SHADER
1280 [0xb][0] = { MESA_SHADER_GEOMETRY, SHADER_PROG },
1281 [0xb][1] = { MESA_SHADER_GEOMETRY, SHADER_CONST },
1282 [0xb][2] = { MESA_SHADER_GEOMETRY, UBO },
1283 // SB4_FS_SHADER:
1284 [0xc][0] = { MESA_SHADER_FRAGMENT, SHADER_PROG },
1285 [0xc][1] = { MESA_SHADER_FRAGMENT, SHADER_CONST },
1286 [0xc][2] = { MESA_SHADER_FRAGMENT, UBO },
1287 // SB4_CS_SHADER:
1288 [0xd][0] = { MESA_SHADER_COMPUTE, SHADER_PROG },
1289 [0xd][1] = { MESA_SHADER_COMPUTE, SHADER_CONST },
1290 [0xd][2] = { MESA_SHADER_COMPUTE, UBO },
1291 [0xd][3] = { MESA_SHADER_COMPUTE, SSBO_0 }, /* a6xx location */
1292 // SB4_SSBO (shared across all stages)
1293 [0xe][0] = { 0, SSBO_0 }, /* a5xx (and a4xx?) location */
1294 [0xe][1] = { 0, SSBO_1 },
1295 [0xe][2] = { 0, SSBO_2 },
1296 // SB4_CS_SSBO
1297 [0xf][0] = { MESA_SHADER_COMPUTE, SSBO_0 },
1298 [0xf][1] = { MESA_SHADER_COMPUTE, SSBO_1 },
1299 [0xf][2] = { MESA_SHADER_COMPUTE, SSBO_2 },
1300 // unknown things
1301 /* This looks like combined UBO state for 3d stages (a5xx and
1302 * before?? I think a6xx has UBO state per shader stage:
1303 */
1304 [0x6][2] = { 0, UBO },
1305 [0x7][1] = { 0, UNKNOWN_2DWORDS },
1306 };
1307
1308 *stage = lookup[state_block_id][state_type].stage;
1309 *state = lookup[state_block_id][state_type].state;
1310 }
1311
1312 static void
1313 a4xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage, enum state_t *state,
1314 enum state_src_t *src)
1315 {
1316 unsigned state_block_id = (dwords[0] >> 18) & 0xf;
1317 unsigned state_type = dwords[1] & 0x3;
1318 _get_state_type(state_block_id, state_type, stage, state);
1319 *src = _get_state_src(dwords[0]);
1320 }
1321
1322 static void
1323 a6xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage, enum state_t *state,
1324 enum state_src_t *src)
1325 {
1326 unsigned state_block_id = (dwords[0] >> 18) & 0xf;
1327 unsigned state_type = (dwords[0] >> 14) & 0x3;
1328 _get_state_type(state_block_id, state_type, stage, state);
1329 *src = _get_state_src(dwords[0]);
1330 }
1331
1332 static void
1333 dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, int level)
1334 {
1335 for (int i = 0; i < num_unit; i++) {
1336 /* work-around to reduce noise for opencl blob which always
1337 * writes the max # regardless of # of textures used
1338 */
1339 if ((num_unit == 16) && (texsamp[0] == 0) && (texsamp[1] == 0))
1340 break;
1341
1342 if ((300 <= options->gpu_id) && (options->gpu_id < 400)) {
1343 dump_domain(texsamp, 2, level+2, "A3XX_TEX_SAMP");
1344 dump_hex(texsamp, 2, level+1);
1345 texsamp += 2;
1346 } else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) {
1347 dump_domain(texsamp, 2, level+2, "A4XX_TEX_SAMP");
1348 dump_hex(texsamp, 2, level+1);
1349 texsamp += 2;
1350 } else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
1351 dump_domain(texsamp, 4, level+2, "A5XX_TEX_SAMP");
1352 dump_hex(texsamp, 4, level+1);
1353 texsamp += 4;
1354 } else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) {
1355 dump_domain(texsamp, 4, level+2, "A6XX_TEX_SAMP");
1356 dump_hex(texsamp, 4, level+1);
1357 texsamp += src == STATE_SRC_BINDLESS ? 16 : 4;
1358 }
1359 }
1360 }
1361
1362 static void
1363 dump_tex_const(uint32_t *texconst, int num_unit, int level)
1364 {
1365 for (int i = 0; i < num_unit; i++) {
1366 /* work-around to reduce noise for opencl blob which always
1367 * writes the max # regardless of # of textures used
1368 */
1369 if ((num_unit == 16) &&
1370 (texconst[0] == 0) && (texconst[1] == 0) &&
1371 (texconst[2] == 0) && (texconst[3] == 0))
1372 break;
1373
1374 if ((300 <= options->gpu_id) && (options->gpu_id < 400)) {
1375 dump_domain(texconst, 4, level+2, "A3XX_TEX_CONST");
1376 dump_hex(texconst, 4, level+1);
1377 texconst += 4;
1378 } else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) {
1379 dump_domain(texconst, 8, level+2, "A4XX_TEX_CONST");
1380 if (options->dump_textures) {
1381 uint32_t addr = texconst[4] & ~0x1f;
1382 dump_gpuaddr(addr, level-2);
1383 }
1384 dump_hex(texconst, 8, level+1);
1385 texconst += 8;
1386 } else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
1387 dump_domain(texconst, 12, level+2, "A5XX_TEX_CONST");
1388 if (options->dump_textures) {
1389 uint64_t addr = (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
1390 dump_gpuaddr_size(addr, level-2, hostlen(addr) / 4, 3);
1391 }
1392 dump_hex(texconst, 12, level+1);
1393 texconst += 12;
1394 } else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) {
1395 dump_domain(texconst, 16, level+2, "A6XX_TEX_CONST");
1396 if (options->dump_textures) {
1397 uint64_t addr = (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
1398 dump_gpuaddr_size(addr, level-2, hostlen(addr) / 4, 3);
1399 }
1400 dump_hex(texconst, 16, level+1);
1401 texconst += 16;
1402 }
1403 }
1404 }
1405
1406 static void
1407 cp_load_state(uint32_t *dwords, uint32_t sizedwords, int level)
1408 {
1409 gl_shader_stage stage;
1410 enum state_t state;
1411 enum state_src_t src;
1412 uint32_t num_unit = (dwords[0] >> 22) & 0x1ff;
1413 uint64_t ext_src_addr;
1414 void *contents;
1415 int i;
1416
1417 if (quiet(2) && !options->script)
1418 return;
1419
1420 if (options->gpu_id >= 600)
1421 a6xx_get_state_type(dwords, &stage, &state, &src);
1422 else if (options->gpu_id >= 400)
1423 a4xx_get_state_type(dwords, &stage, &state, &src);
1424 else
1425 a3xx_get_state_type(dwords, &stage, &state, &src);
1426
1427 switch (src) {
1428 case STATE_SRC_DIRECT: ext_src_addr = 0; break;
1429 case STATE_SRC_INDIRECT:
1430 if (is_64b()) {
1431 ext_src_addr = dwords[1] & 0xfffffffc;
1432 ext_src_addr |= ((uint64_t)dwords[2]) << 32;
1433 } else {
1434 ext_src_addr = dwords[1] & 0xfffffffc;
1435 }
1436
1437 break;
1438 case STATE_SRC_BINDLESS: {
1439 const unsigned base_reg =
1440 stage == MESA_SHADER_COMPUTE ?
1441 regbase("HLSQ_CS_BINDLESS_BASE[0]") :
1442 regbase("HLSQ_BINDLESS_BASE[0]");
1443
1444 if (is_64b()) {
1445 const unsigned reg = base_reg + (dwords[1] >> 28) * 2;
1446 ext_src_addr = reg_val(reg) & 0xfffffffc;
1447 ext_src_addr |= ((uint64_t)reg_val(reg + 1)) << 32;
1448 } else {
1449 const unsigned reg = base_reg + (dwords[1] >> 28);
1450 ext_src_addr = reg_val(reg) & 0xfffffffc;
1451 }
1452
1453 ext_src_addr += 4 * (dwords[1] & 0xffffff);
1454 break;
1455 }
1456 }
1457
1458 if (ext_src_addr)
1459 contents = hostptr(ext_src_addr);
1460 else
1461 contents = is_64b() ? dwords + 3 : dwords + 2;
1462
1463 if (!contents)
1464 return;
1465
1466 switch (state) {
1467 case SHADER_PROG: {
1468 const char *ext = NULL;
1469
1470 if (quiet(2))
1471 return;
1472
1473 if (options->gpu_id >= 400)
1474 num_unit *= 16;
1475 else if (options->gpu_id >= 300)
1476 num_unit *= 4;
1477
1478 /* shaders:
1479 *
1480 * note: num_unit seems to be # of instruction groups, where
1481 * an instruction group has 4 64bit instructions.
1482 */
1483 if (stage == MESA_SHADER_VERTEX) {
1484 ext = "vo3";
1485 } else if (stage == MESA_SHADER_GEOMETRY) {
1486 ext = "go3";
1487 } else if (stage == MESA_SHADER_COMPUTE) {
1488 ext = "co3";
1489 } else if (stage == MESA_SHADER_FRAGMENT){
1490 ext = "fo3";
1491 }
1492
1493 if (contents)
1494 disasm_a3xx(contents, num_unit * 2, level+2, stdout, options->gpu_id);
1495
1496 /* dump raw shader: */
1497 if (ext)
1498 dump_shader(ext, contents, num_unit * 2 * 4);
1499
1500 break;
1501 }
1502 case SHADER_CONST: {
1503 if (quiet(2))
1504 return;
1505
1506 /* uniforms/consts:
1507 *
1508 * note: num_unit seems to be # of pairs of dwords??
1509 */
1510
1511 if (options->gpu_id >= 400)
1512 num_unit *= 2;
1513
1514 dump_float(contents, num_unit*2, level+1);
1515 dump_hex(contents, num_unit*2, level+1);
1516
1517 break;
1518 }
1519 case TEX_MIPADDR: {
1520 uint32_t *addrs = contents;
1521
1522 if (quiet(2))
1523 return;
1524
1525 /* mipmap consts block just appears to be array of num_unit gpu addr's: */
1526 for (i = 0; i < num_unit; i++) {
1527 void *ptr = hostptr(addrs[i]);
1528 printf("%s%2d: %08x\n", levels[level+1], i, addrs[i]);
1529 if (options->dump_textures) {
1530 printf("base=%08x\n", (uint32_t)gpubaseaddr(addrs[i]));
1531 dump_hex(ptr, hostlen(addrs[i])/4, level+1);
1532 }
1533 }
1534 break;
1535 }
1536 case TEX_SAMP: {
1537 dump_tex_samp(contents, src, num_unit, level);
1538 break;
1539 }
1540 case TEX_CONST: {
1541 dump_tex_const(contents, num_unit, level);
1542 break;
1543 }
1544 case SSBO_0: {
1545 uint32_t *ssboconst = (uint32_t *)contents;
1546
1547 for (i = 0; i < num_unit; i++) {
1548 int sz = 4;
1549 if (400 <= options->gpu_id && options->gpu_id < 500) {
1550 dump_domain(ssboconst, 4, level+2, "A4XX_SSBO_0");
1551 } else if (500 <= options->gpu_id && options->gpu_id < 600) {
1552 dump_domain(ssboconst, 4, level+2, "A5XX_SSBO_0");
1553 } else if (600 <= options->gpu_id && options->gpu_id < 700) {
1554 sz = 16;
1555 dump_domain(ssboconst, 16, level+2, "A6XX_IBO");
1556 }
1557 dump_hex(ssboconst, sz, level+1);
1558 ssboconst += sz;
1559 }
1560 break;
1561 }
1562 case SSBO_1: {
1563 uint32_t *ssboconst = (uint32_t *)contents;
1564
1565 for (i = 0; i < num_unit; i++) {
1566 if (400 <= options->gpu_id && options->gpu_id < 500)
1567 dump_domain(ssboconst, 2, level+2, "A4XX_SSBO_1");
1568 else if (500 <= options->gpu_id && options->gpu_id < 600)
1569 dump_domain(ssboconst, 2, level+2, "A5XX_SSBO_1");
1570 dump_hex(ssboconst, 2, level+1);
1571 ssboconst += 2;
1572 }
1573 break;
1574 }
1575 case SSBO_2: {
1576 uint32_t *ssboconst = (uint32_t *)contents;
1577
1578 for (i = 0; i < num_unit; i++) {
1579 /* TODO a4xx and a5xx might be same: */
1580 if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
1581 dump_domain(ssboconst, 2, level+2, "A5XX_SSBO_2");
1582 dump_hex(ssboconst, 2, level+1);
1583 }
1584 if (options->dump_textures) {
1585 uint64_t addr = (((uint64_t)ssboconst[1] & 0x1ffff) << 32) | ssboconst[0];
1586 dump_gpuaddr_size(addr, level-2, hostlen(addr) / 4, 3);
1587 }
1588 ssboconst += 2;
1589 }
1590 break;
1591 }
1592 case UBO: {
1593 uint32_t *uboconst = (uint32_t *)contents;
1594
1595 for (i = 0; i < num_unit; i++) {
1596 // TODO probably similar on a4xx..
1597 if (500 <= options->gpu_id && options->gpu_id < 600)
1598 dump_domain(uboconst, 2, level+2, "A5XX_UBO");
1599 else if (600 <= options->gpu_id && options->gpu_id < 700)
1600 dump_domain(uboconst, 2, level+2, "A6XX_UBO");
1601 dump_hex(uboconst, 2, level+1);
1602 uboconst += src == STATE_SRC_BINDLESS ? 16 : 2;
1603 }
1604 break;
1605 }
1606 case UNKNOWN_DWORDS: {
1607 if (quiet(2))
1608 return;
1609 dump_hex(contents, num_unit, level+1);
1610 break;
1611 }
1612 case UNKNOWN_2DWORDS: {
1613 if (quiet(2))
1614 return;
1615 dump_hex(contents, num_unit * 2, level+1);
1616 break;
1617 }
1618 case UNKNOWN_4DWORDS: {
1619 if (quiet(2))
1620 return;
1621 dump_hex(contents, num_unit * 4, level+1);
1622 break;
1623 }
1624 default:
1625 if (quiet(2))
1626 return;
1627 /* hmm.. */
1628 dump_hex(contents, num_unit, level+1);
1629 break;
1630 }
1631 }
1632
1633 static void
1634 cp_set_bin(uint32_t *dwords, uint32_t sizedwords, int level)
1635 {
1636 bin_x1 = dwords[1] & 0xffff;
1637 bin_y1 = dwords[1] >> 16;
1638 bin_x2 = dwords[2] & 0xffff;
1639 bin_y2 = dwords[2] >> 16;
1640 }
1641
1642 static void
1643 dump_a2xx_tex_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val, int level)
1644 {
1645 uint32_t w, h, p;
1646 uint32_t gpuaddr, flags, mip_gpuaddr, mip_flags;
1647 uint32_t min, mag, swiz, clamp_x, clamp_y, clamp_z;
1648 static const char *filter[] = {
1649 "point", "bilinear", "bicubic",
1650 };
1651 static const char *clamp[] = {
1652 "wrap", "mirror", "clamp-last-texel",
1653 };
1654 static const char swiznames[] = "xyzw01??";
1655
1656 /* see sys2gmem_tex_const[] in adreno_a2xxx.c */
1657
1658 /* Texture, FormatXYZW=Unsigned, ClampXYZ=Wrap/Repeat,
1659 * RFMode=ZeroClamp-1, Dim=1:2d, pitch
1660 */
1661 p = (dwords[0] >> 22) << 5;
1662 clamp_x = (dwords[0] >> 10) & 0x3;
1663 clamp_y = (dwords[0] >> 13) & 0x3;
1664 clamp_z = (dwords[0] >> 16) & 0x3;
1665
1666 /* Format=6:8888_WZYX, EndianSwap=0:None, ReqSize=0:256bit, DimHi=0,
1667 * NearestClamp=1:OGL Mode
1668 */
1669 parse_dword_addr(dwords[1], &gpuaddr, &flags, 0xfff);
1670
1671 /* Width, Height, EndianSwap=0:None */
1672 w = (dwords[2] & 0x1fff) + 1;
1673 h = ((dwords[2] >> 13) & 0x1fff) + 1;
1674
1675 /* NumFormat=0:RF, DstSelXYZW=XYZW, ExpAdj=0, MagFilt=MinFilt=0:Point,
1676 * Mip=2:BaseMap
1677 */
1678 mag = (dwords[3] >> 19) & 0x3;
1679 min = (dwords[3] >> 21) & 0x3;
1680 swiz = (dwords[3] >> 1) & 0xfff;
1681
1682 /* VolMag=VolMin=0:Point, MinMipLvl=0, MaxMipLvl=1, LodBiasH=V=0,
1683 * Dim3d=0
1684 */
1685 // XXX
1686
1687 /* BorderColor=0:ABGRBlack, ForceBC=0:diable, TriJuice=0, Aniso=0,
1688 * Dim=1:2d, MipPacking=0
1689 */
1690 parse_dword_addr(dwords[5], &mip_gpuaddr, &mip_flags, 0xfff);
1691
1692 printf("%sset texture const %04x\n", levels[level], val);
1693 printf("%sclamp x/y/z: %s/%s/%s\n", levels[level+1],
1694 clamp[clamp_x], clamp[clamp_y], clamp[clamp_z]);
1695 printf("%sfilter min/mag: %s/%s\n", levels[level+1], filter[min], filter[mag]);
1696 printf("%sswizzle: %c%c%c%c\n", levels[level+1],
1697 swiznames[(swiz >> 0) & 0x7], swiznames[(swiz >> 3) & 0x7],
1698 swiznames[(swiz >> 6) & 0x7], swiznames[(swiz >> 9) & 0x7]);
1699 printf("%saddr=%08x (flags=%03x), size=%dx%d, pitch=%d, format=%s\n",
1700 levels[level+1], gpuaddr, flags, w, h, p,
1701 rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf));
1702 printf("%smipaddr=%08x (flags=%03x)\n", levels[level+1],
1703 mip_gpuaddr, mip_flags);
1704 }
1705
1706 static void
1707 dump_a2xx_shader_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val, int level)
1708 {
1709 int i;
1710 printf("%sset shader const %04x\n", levels[level], val);
1711 for (i = 0; i < sizedwords; ) {
1712 uint32_t gpuaddr, flags;
1713 parse_dword_addr(dwords[i++], &gpuaddr, &flags, 0xf);
1714 void *addr = hostptr(gpuaddr);
1715 if (addr) {
1716 const char * fmt =
1717 rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf);
1718 uint32_t size = dwords[i++];
1719 printf("%saddr=%08x, size=%d, format=%s\n", levels[level+1],
1720 gpuaddr, size, fmt);
1721 // TODO maybe dump these as bytes instead of dwords?
1722 size = (size + 3) / 4; // for now convert to dwords
1723 dump_hex(addr, min(size, 64), level + 1);
1724 if (size > min(size, 64))
1725 printf("%s\t\t...\n", levels[level+1]);
1726 dump_float(addr, min(size, 64), level + 1);
1727 if (size > min(size, 64))
1728 printf("%s\t\t...\n", levels[level+1]);
1729 }
1730 }
1731 }
1732
1733 static void
1734 cp_set_const(uint32_t *dwords, uint32_t sizedwords, int level)
1735 {
1736 uint32_t val = dwords[0] & 0xffff;
1737 switch((dwords[0] >> 16) & 0xf) {
1738 case 0x0:
1739 dump_float((float *)(dwords+1), sizedwords-1, level+1);
1740 break;
1741 case 0x1:
1742 /* need to figure out how const space is partitioned between
1743 * attributes, textures, etc..
1744 */
1745 if (val < 0x78) {
1746 dump_a2xx_tex_const(dwords+1, sizedwords-1, val, level);
1747 } else {
1748 dump_a2xx_shader_const(dwords+1, sizedwords-1, val, level);
1749 }
1750 break;
1751 case 0x2:
1752 printf("%sset bool const %04x\n", levels[level], val);
1753 break;
1754 case 0x3:
1755 printf("%sset loop const %04x\n", levels[level], val);
1756 break;
1757 case 0x4:
1758 val += 0x2000;
1759 if (dwords[0] & 0x80000000) {
1760 uint32_t srcreg = dwords[1];
1761 uint32_t dstval = dwords[2];
1762
1763 /* TODO: not sure what happens w/ payload != 2.. */
1764 assert(sizedwords == 3);
1765 assert(srcreg < ARRAY_SIZE(type0_reg_vals));
1766
1767 /* note: rnn_regname uses a static buf so we can't do
1768 * two regname() calls for one printf..
1769 */
1770 printf("%s%s = %08x + ", levels[level], regname(val, 1), dstval);
1771 printf("%s (%08x)\n", regname(srcreg, 1), type0_reg_vals[srcreg]);
1772
1773 dstval += type0_reg_vals[srcreg];
1774
1775 dump_registers(val, &dstval, 1, level+1);
1776 } else {
1777 dump_registers(val, dwords+1, sizedwords-1, level+1);
1778 }
1779 break;
1780 }
1781 }
1782
1783 static void dump_register_summary(int level);
1784
1785 static void
1786 cp_event_write(uint32_t *dwords, uint32_t sizedwords, int level)
1787 {
1788 const char *name = rnn_enumname(rnn, "vgt_event_type", dwords[0]);
1789 printl(2, "%sevent %s\n", levels[level], name);
1790
1791 if (name && (options->gpu_id > 500)) {
1792 char eventname[64];
1793 snprintf(eventname, sizeof(eventname), "EVENT:%s", name);
1794 if (!strcmp(name, "BLIT")) {
1795 do_query(eventname, 0);
1796 print_mode(level);
1797 dump_register_summary(level);
1798 }
1799 }
1800 }
1801
1802 static void
1803 dump_register_summary(int level)
1804 {
1805 uint32_t i;
1806 bool saved_summary = summary;
1807 summary = false;
1808
1809 in_summary = true;
1810
1811 /* dump current state of registers: */
1812 printl(2, "%sdraw[%i] register values\n", levels[level], draw_count);
1813 for (i = 0; i < regcnt(); i++) {
1814 uint32_t regbase = i;
1815 uint32_t lastval = reg_val(regbase);
1816 /* skip registers that haven't been updated since last draw/blit: */
1817 if (!(options->allregs || reg_rewritten(regbase)))
1818 continue;
1819 if (!reg_written(regbase))
1820 continue;
1821 if (lastval != lastvals[regbase]) {
1822 printl(2, "!");
1823 lastvals[regbase] = lastval;
1824 } else {
1825 printl(2, " ");
1826 }
1827 if (reg_rewritten(regbase)) {
1828 printl(2, "+");
1829 } else {
1830 printl(2, " ");
1831 }
1832 printl(2, "\t%08x", lastval);
1833 if (!quiet(2)) {
1834 dump_register(regbase, lastval, level);
1835 }
1836 }
1837
1838 clear_rewritten();
1839
1840 in_summary = false;
1841
1842 draw_count++;
1843 summary = saved_summary;
1844 }
1845
1846 static uint32_t
1847 draw_indx_common(uint32_t *dwords, int level)
1848 {
1849 uint32_t prim_type = dwords[1] & 0x1f;
1850 uint32_t source_select = (dwords[1] >> 6) & 0x3;
1851 uint32_t num_indices = dwords[2];
1852 const char *primtype;
1853
1854 primtype = rnn_enumname(rnn, "pc_di_primtype", prim_type);
1855
1856 do_query(primtype, num_indices);
1857
1858 printl(2, "%sdraw: %d\n", levels[level], draws[ib]);
1859 printl(2, "%sprim_type: %s (%d)\n", levels[level], primtype,
1860 prim_type);
1861 printl(2, "%ssource_select: %s (%d)\n", levels[level],
1862 rnn_enumname(rnn, "pc_di_src_sel", source_select),
1863 source_select);
1864 printl(2, "%snum_indices: %d\n", levels[level], num_indices);
1865
1866 vertices += num_indices;
1867
1868 draws[ib]++;
1869
1870 return num_indices;
1871 }
1872
1873 enum pc_di_index_size {
1874 INDEX_SIZE_IGN = 0,
1875 INDEX_SIZE_16_BIT = 0,
1876 INDEX_SIZE_32_BIT = 1,
1877 INDEX_SIZE_8_BIT = 2,
1878 INDEX_SIZE_INVALID = 0,
1879 };
1880
1881 static void
1882 cp_draw_indx(uint32_t *dwords, uint32_t sizedwords, int level)
1883 {
1884 uint32_t num_indices = draw_indx_common(dwords, level);
1885
1886 assert(!is_64b());
1887
1888 /* if we have an index buffer, dump that: */
1889 if (sizedwords == 5) {
1890 void *ptr = hostptr(dwords[3]);
1891 printl(2, "%sgpuaddr: %08x\n", levels[level], dwords[3]);
1892 printl(2, "%sidx_size: %d\n", levels[level], dwords[4]);
1893 if (ptr) {
1894 enum pc_di_index_size size =
1895 ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
1896 if (!quiet(2)) {
1897 int i;
1898 printf("%sidxs: ", levels[level]);
1899 if (size == INDEX_SIZE_8_BIT) {
1900 uint8_t *idx = ptr;
1901 for (i = 0; i < dwords[4]; i++)
1902 printf(" %u", idx[i]);
1903 } else if (size == INDEX_SIZE_16_BIT) {
1904 uint16_t *idx = ptr;
1905 for (i = 0; i < dwords[4]/2; i++)
1906 printf(" %u", idx[i]);
1907 } else if (size == INDEX_SIZE_32_BIT) {
1908 uint32_t *idx = ptr;
1909 for (i = 0; i < dwords[4]/4; i++)
1910 printf(" %u", idx[i]);
1911 }
1912 printf("\n");
1913 dump_hex(ptr, dwords[4]/4, level+1);
1914 }
1915 }
1916 }
1917
1918 /* don't bother dumping registers for the dummy draw_indx's.. */
1919 if (num_indices > 0)
1920 dump_register_summary(level);
1921
1922 needs_wfi = true;
1923 }
1924
1925 static void
1926 cp_draw_indx_2(uint32_t *dwords, uint32_t sizedwords, int level)
1927 {
1928 uint32_t num_indices = draw_indx_common(dwords, level);
1929 enum pc_di_index_size size =
1930 ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
1931 void *ptr = &dwords[3];
1932 int sz = 0;
1933
1934 assert(!is_64b());
1935
1936 /* CP_DRAW_INDX_2 has embedded/inline idx buffer: */
1937 if (!quiet(2)) {
1938 int i;
1939 printf("%sidxs: ", levels[level]);
1940 if (size == INDEX_SIZE_8_BIT) {
1941 uint8_t *idx = ptr;
1942 for (i = 0; i < num_indices; i++)
1943 printf(" %u", idx[i]);
1944 sz = num_indices;
1945 } else if (size == INDEX_SIZE_16_BIT) {
1946 uint16_t *idx = ptr;
1947 for (i = 0; i < num_indices; i++)
1948 printf(" %u", idx[i]);
1949 sz = num_indices * 2;
1950 } else if (size == INDEX_SIZE_32_BIT) {
1951 uint32_t *idx = ptr;
1952 for (i = 0; i < num_indices; i++)
1953 printf(" %u", idx[i]);
1954 sz = num_indices * 4;
1955 }
1956 printf("\n");
1957 dump_hex(ptr, sz / 4, level+1);
1958 }
1959
1960 /* don't bother dumping registers for the dummy draw_indx's.. */
1961 if (num_indices > 0)
1962 dump_register_summary(level);
1963 }
1964
1965 static void
1966 cp_draw_indx_offset(uint32_t *dwords, uint32_t sizedwords, int level)
1967 {
1968 uint32_t num_indices = dwords[2];
1969 uint32_t prim_type = dwords[0] & 0x1f;
1970
1971 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), num_indices);
1972 print_mode(level);
1973
1974 /* don't bother dumping registers for the dummy draw_indx's.. */
1975 if (num_indices > 0)
1976 dump_register_summary(level);
1977 }
1978
1979 static void
1980 cp_draw_indx_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
1981 {
1982 uint32_t prim_type = dwords[0] & 0x1f;
1983 uint64_t addr;
1984
1985 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
1986 print_mode(level);
1987
1988 if (is_64b())
1989 addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
1990 else
1991 addr = dwords[1];
1992 dump_gpuaddr_size(addr, level, 0x10, 2);
1993
1994 if (is_64b())
1995 addr = (((uint64_t)dwords[5] & 0x1ffff) << 32) | dwords[4];
1996 else
1997 addr = dwords[3];
1998 dump_gpuaddr_size(addr, level, 0x10, 2);
1999
2000 dump_register_summary(level);
2001 }
2002
2003 static void
2004 cp_draw_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2005 {
2006 uint32_t prim_type = dwords[0] & 0x1f;
2007 uint64_t addr;
2008
2009 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2010 print_mode(level);
2011
2012 addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2013 dump_gpuaddr_size(addr, level, 0x10, 2);
2014
2015 dump_register_summary(level);
2016 }
2017
2018 static void
2019 cp_run_cl(uint32_t *dwords, uint32_t sizedwords, int level)
2020 {
2021 do_query("COMPUTE", 1);
2022 dump_register_summary(level);
2023 }
2024
2025 static void
2026 cp_nop(uint32_t *dwords, uint32_t sizedwords, int level)
2027 {
2028 const char *buf = (void *)dwords;
2029 int i;
2030
2031 if (quiet(3))
2032 return;
2033
2034 // blob doesn't use CP_NOP for string_marker but it does
2035 // use it for things that end up looking like, but aren't
2036 // ascii chars:
2037 if (!options->decode_markers)
2038 return;
2039
2040 for (i = 0; i < 4 * sizedwords; i++) {
2041 if (buf[i] == '\0')
2042 break;
2043 if (isascii(buf[i]))
2044 printf("%c", buf[i]);
2045 }
2046 printf("\n");
2047 }
2048
2049 static void
2050 cp_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2051 {
2052 /* traverse indirect buffers */
2053 uint64_t ibaddr;
2054 uint32_t ibsize;
2055 uint32_t *ptr = NULL;
2056
2057 if (is_64b()) {
2058 /* a5xx+.. high 32b of gpu addr, then size: */
2059 ibaddr = dwords[0];
2060 ibaddr |= ((uint64_t)dwords[1]) << 32;
2061 ibsize = dwords[2];
2062 } else {
2063 ibaddr = dwords[0];
2064 ibsize = dwords[1];
2065 }
2066
2067 if (!quiet(3)) {
2068 if (is_64b()) {
2069 printf("%sibaddr:%016lx\n", levels[level], ibaddr);
2070 } else {
2071 printf("%sibaddr:%08x\n", levels[level], (uint32_t)ibaddr);
2072 }
2073 printf("%sibsize:%08x\n", levels[level], ibsize);
2074 }
2075
2076 if (options->once && has_dumped(ibaddr, enable_mask))
2077 return;
2078
2079 /* 'query-compare' mode implies 'once' mode, although we need only to
2080 * process the cmdstream for *any* enable_mask mode, since we are
2081 * comparing binning vs draw reg values at the same time, ie. it is
2082 * not useful to process the same draw in both binning and draw pass.
2083 */
2084 if (options->query_compare && has_dumped(ibaddr, MODE_ALL))
2085 return;
2086
2087 /* map gpuaddr back to hostptr: */
2088 ptr = hostptr(ibaddr);
2089
2090 if (ptr) {
2091 /* If the GPU hung within the target IB, the trigger point will be
2092 * just after the current CP_INDIRECT_BUFFER. Because the IB is
2093 * executed but never returns. Account for this by checking if
2094 * the IB returned:
2095 */
2096 highlight_gpuaddr(gpuaddr(&dwords[is_64b() ? 3 : 2]));
2097
2098 ib++;
2099 ibs[ib].base = ibaddr;
2100 ibs[ib].size = ibsize;
2101
2102 dump_commands(ptr, ibsize, level);
2103 ib--;
2104 } else {
2105 fprintf(stderr, "could not find: %016"PRIx64" (%d)\n", ibaddr, ibsize);
2106 }
2107 }
2108
2109 static void
2110 cp_wfi(uint32_t *dwords, uint32_t sizedwords, int level)
2111 {
2112 needs_wfi = false;
2113 }
2114
2115 static void
2116 cp_mem_write(uint32_t *dwords, uint32_t sizedwords, int level)
2117 {
2118 if (quiet(2))
2119 return;
2120
2121 if (is_64b()) {
2122 uint64_t gpuaddr = dwords[0] | (((uint64_t)dwords[1]) << 32);
2123 printf("%sgpuaddr:%016lx\n", levels[level], gpuaddr);
2124 dump_hex(&dwords[2], sizedwords-2, level+1);
2125
2126 if (pkt_is_type4(dwords[2]) || pkt_is_type7(dwords[2]))
2127 dump_commands(&dwords[2], sizedwords-2, level+1);
2128 } else {
2129 uint32_t gpuaddr = dwords[0];
2130 printf("%sgpuaddr:%08x\n", levels[level], gpuaddr);
2131 dump_float((float *)&dwords[1], sizedwords-1, level+1);
2132 }
2133 }
2134
2135 static void
2136 cp_rmw(uint32_t *dwords, uint32_t sizedwords, int level)
2137 {
2138 uint32_t val = dwords[0] & 0xffff;
2139 uint32_t and = dwords[1];
2140 uint32_t or = dwords[2];
2141 printl(3, "%srmw (%s & 0x%08x) | 0x%08x)\n", levels[level], regname(val, 1), and, or);
2142 if (needs_wfi)
2143 printl(2, "NEEDS WFI: rmw (%s & 0x%08x) | 0x%08x)\n", regname(val, 1), and, or);
2144 reg_set(val, (reg_val(val) & and) | or);
2145 }
2146
2147 static void
2148 cp_reg_mem(uint32_t *dwords, uint32_t sizedwords, int level)
2149 {
2150 uint32_t val = dwords[0] & 0xffff;
2151 printl(3, "%sbase register: %s\n", levels[level], regname(val, 1));
2152
2153 if (quiet(2))
2154 return;
2155
2156 uint64_t gpuaddr = dwords[1] | (((uint64_t)dwords[2]) << 32);
2157 printf("%sgpuaddr:%016lx\n", levels[level], gpuaddr);
2158 void *ptr = hostptr(gpuaddr);
2159 if (ptr) {
2160 uint32_t cnt = (dwords[0] >> 19) & 0x3ff;
2161 dump_hex(ptr, cnt, level + 1);
2162 }
2163 }
2164
2165 struct draw_state {
2166 uint16_t enable_mask;
2167 uint16_t flags;
2168 uint32_t count;
2169 uint64_t addr;
2170 };
2171
2172 struct draw_state state[32];
2173
2174 #define FLAG_DIRTY 0x1
2175 #define FLAG_DISABLE 0x2
2176 #define FLAG_DISABLE_ALL_GROUPS 0x4
2177 #define FLAG_LOAD_IMMED 0x8
2178
2179 static int draw_mode;
2180
2181 static void
2182 disable_group(unsigned group_id)
2183 {
2184 struct draw_state *ds = &state[group_id];
2185 memset(ds, 0, sizeof(*ds));
2186 }
2187
2188 static void
2189 disable_all_groups(void)
2190 {
2191 for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
2192 disable_group(i);
2193 }
2194
2195 static void
2196 load_group(unsigned group_id, int level)
2197 {
2198 struct draw_state *ds = &state[group_id];
2199
2200 if (!ds->count)
2201 return;
2202
2203 printl(2, "%sgroup_id: %u\n", levels[level], group_id);
2204 printl(2, "%scount: %d\n", levels[level], ds->count);
2205 printl(2, "%saddr: %016llx\n", levels[level], ds->addr);
2206 printl(2, "%sflags: %x\n", levels[level], ds->flags);
2207
2208 if (options->gpu_id >= 600) {
2209 printl(2, "%senable_mask: 0x%x\n", levels[level], ds->enable_mask);
2210
2211 if (!(ds->enable_mask & enable_mask)) {
2212 printl(2, "%s\tskipped!\n\n", levels[level]);
2213 return;
2214 }
2215 }
2216
2217 void *ptr = hostptr(ds->addr);
2218 if (ptr) {
2219 if (!quiet(2))
2220 dump_hex(ptr, ds->count, level+1);
2221
2222 ib++;
2223 dump_commands(ptr, ds->count, level+1);
2224 ib--;
2225 }
2226 }
2227
2228 static void
2229 load_all_groups(int level)
2230 {
2231 /* sanity check, we should never recursively hit recursion here, and if
2232 * we do bad things happen:
2233 */
2234 static bool loading_groups = false;
2235 if (loading_groups) {
2236 printf("ERROR: nothing in draw state should trigger recursively loading groups!\n");
2237 return;
2238 }
2239 loading_groups = true;
2240 for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
2241 load_group(i, level);
2242 loading_groups = false;
2243
2244 /* in 'query-compare' mode, defer disabling all groups until we have a
2245 * chance to process the query:
2246 */
2247 if (!options->query_compare)
2248 disable_all_groups();
2249 }
2250
2251 static void
2252 cp_set_draw_state(uint32_t *dwords, uint32_t sizedwords, int level)
2253 {
2254 uint32_t i;
2255
2256 for (i = 0; i < sizedwords; ) {
2257 struct draw_state *ds;
2258 uint32_t count = dwords[i] & 0xffff;
2259 uint32_t group_id = (dwords[i] >> 24) & 0x1f;
2260 uint32_t enable_mask = (dwords[i] >> 20) & 0xf;
2261 uint32_t flags = (dwords[i] >> 16) & 0xf;
2262 uint64_t addr;
2263
2264 if (is_64b()) {
2265 addr = dwords[i + 1];
2266 addr |= ((uint64_t)dwords[i + 2]) << 32;
2267 i += 3;
2268 } else {
2269 addr = dwords[i + 1];
2270 i += 2;
2271 }
2272
2273 if (flags & FLAG_DISABLE_ALL_GROUPS) {
2274 disable_all_groups();
2275 continue;
2276 }
2277
2278 if (flags & FLAG_DISABLE) {
2279 disable_group(group_id);
2280 continue;
2281 }
2282
2283 assert(group_id < ARRAY_SIZE(state));
2284 disable_group(group_id);
2285
2286 ds = &state[group_id];
2287
2288 ds->enable_mask = enable_mask;
2289 ds->flags = flags;
2290 ds->count = count;
2291 ds->addr = addr;
2292
2293 if (flags & FLAG_LOAD_IMMED) {
2294 load_group(group_id, level);
2295 disable_group(group_id);
2296 }
2297 }
2298 }
2299
2300 static void
2301 cp_set_mode(uint32_t *dwords, uint32_t sizedwords, int level)
2302 {
2303 draw_mode = dwords[0];
2304 }
2305
2306 /* execute compute shader */
2307 static void
2308 cp_exec_cs(uint32_t *dwords, uint32_t sizedwords, int level)
2309 {
2310 do_query("compute", 0);
2311 dump_register_summary(level);
2312 }
2313
2314 static void
2315 cp_exec_cs_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2316 {
2317 uint64_t addr;
2318
2319 if (is_64b()) {
2320 addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2321 } else {
2322 addr = dwords[1];
2323 }
2324
2325 printl(3, "%saddr: %016llx\n", levels[level], addr);
2326 dump_gpuaddr_size(addr, level, 0x10, 2);
2327
2328 do_query("compute", 0);
2329 dump_register_summary(level);
2330 }
2331
2332 static void
2333 cp_set_marker(uint32_t *dwords, uint32_t sizedwords, int level)
2334 {
2335 render_mode = rnn_enumname(rnn, "a6xx_render_mode", dwords[0] & 0xf);
2336
2337 if (!strcmp(render_mode, "RM6_BINNING")) {
2338 enable_mask = MODE_BINNING;
2339 } else if (!strcmp(render_mode, "RM6_GMEM")) {
2340 enable_mask = MODE_GMEM;
2341 } else if (!strcmp(render_mode, "RM6_BYPASS")) {
2342 enable_mask = MODE_BYPASS;
2343 }
2344 }
2345
2346 static void
2347 cp_set_render_mode(uint32_t *dwords, uint32_t sizedwords, int level)
2348 {
2349 uint64_t addr;
2350 uint32_t *ptr, len;
2351
2352 assert(is_64b());
2353
2354 /* TODO seems to have two ptrs, 9 dwords total (incl pkt7 hdr)..
2355 * not sure if this can come in different sizes.
2356 *
2357 * First ptr doesn't seem to be cmdstream, second one does.
2358 *
2359 * Comment from downstream kernel:
2360 *
2361 * SRM -- set render mode (ex binning, direct render etc)
2362 * SRM is set by UMD usually at start of IB to tell CP the type of
2363 * preemption.
2364 * KMD needs to set SRM to NULL to indicate CP that rendering is
2365 * done by IB.
2366 * ------------------------------------------------------------------
2367 *
2368 * Seems to always be one of these two:
2369 * 70ec0008 00000001 001c0000 00000000 00000010 00000003 0000000d 001c2000 00000000
2370 * 70ec0008 00000001 001c0000 00000000 00000000 00000003 0000000d 001c2000 00000000
2371 *
2372 */
2373
2374 assert(options->gpu_id >= 500);
2375
2376 render_mode = rnn_enumname(rnn, "render_mode_cmd", dwords[0]);
2377
2378 if (sizedwords == 1)
2379 return;
2380
2381 addr = dwords[1];
2382 addr |= ((uint64_t)dwords[2]) << 32;
2383
2384 mode = dwords[3];
2385
2386 dump_gpuaddr(addr, level+1);
2387
2388 if (sizedwords == 5)
2389 return;
2390
2391 assert(sizedwords == 8);
2392
2393 len = dwords[5];
2394 addr = dwords[6];
2395 addr |= ((uint64_t)dwords[7]) << 32;
2396
2397 printl(3, "%saddr: 0x%016lx\n", levels[level], addr);
2398 printl(3, "%slen: 0x%x\n", levels[level], len);
2399
2400 ptr = hostptr(addr);
2401
2402 if (ptr) {
2403 if (!quiet(2)) {
2404 ib++;
2405 dump_commands(ptr, len, level+1);
2406 ib--;
2407 dump_hex(ptr, len, level+1);
2408 }
2409 }
2410 }
2411
2412 static void
2413 cp_compute_checkpoint(uint32_t *dwords, uint32_t sizedwords, int level)
2414 {
2415 uint64_t addr;
2416 uint32_t *ptr, len;
2417
2418 assert(is_64b());
2419 assert(options->gpu_id >= 500);
2420
2421 assert(sizedwords == 8);
2422
2423 addr = dwords[5];
2424 addr |= ((uint64_t)dwords[6]) << 32;
2425 len = dwords[7];
2426
2427 printl(3, "%saddr: 0x%016lx\n", levels[level], addr);
2428 printl(3, "%slen: 0x%x\n", levels[level], len);
2429
2430 ptr = hostptr(addr);
2431
2432 if (ptr) {
2433 if (!quiet(2)) {
2434 ib++;
2435 dump_commands(ptr, len, level+1);
2436 ib--;
2437 dump_hex(ptr, len, level+1);
2438 }
2439 }
2440 }
2441
2442 static void
2443 cp_blit(uint32_t *dwords, uint32_t sizedwords, int level)
2444 {
2445 do_query(rnn_enumname(rnn, "cp_blit_cmd", dwords[0]), 0);
2446 print_mode(level);
2447 dump_register_summary(level);
2448 }
2449
2450 static void
2451 cp_context_reg_bunch(uint32_t *dwords, uint32_t sizedwords, int level)
2452 {
2453 int i;
2454
2455 /* NOTE: seems to write same reg multiple times.. not sure if different parts of
2456 * these are triggered by the FLUSH_SO_n events?? (if that is what they actually
2457 * are?)
2458 */
2459 bool saved_summary = summary;
2460 summary = false;
2461
2462 for (i = 0; i < sizedwords; i += 2) {
2463 dump_register(dwords[i+0], dwords[i+1], level+1);
2464 reg_set(dwords[i+0], dwords[i+1]);
2465 }
2466
2467 summary = saved_summary;
2468 }
2469
2470 static void
2471 cp_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
2472 {
2473 uint32_t reg = dwords[1] & 0xffff;
2474
2475 dump_register(reg, dwords[2], level+1);
2476 reg_set(reg, dwords[2]);
2477 }
2478
2479 static void
2480 cp_set_ctxswitch_ib(uint32_t *dwords, uint32_t sizedwords, int level)
2481 {
2482 uint64_t addr;
2483 uint32_t size = dwords[2] & 0xffff;
2484 void *ptr;
2485
2486 addr = dwords[0] | ((uint64_t)dwords[1] << 32);
2487
2488 printf("addr=%lx\n", addr);
2489 ptr = hostptr(addr);
2490 if (ptr) {
2491 dump_commands(ptr, size, level+1);
2492 }
2493 }
2494
2495 static void
2496 cp_skip_ib2_enable_global(uint32_t *dwords, uint32_t sizedwords, int level)
2497 {
2498 skip_ib2_enable_global = dwords[0];
2499 }
2500
2501 static void
2502 cp_skip_ib2_enable_local(uint32_t *dwords, uint32_t sizedwords, int level)
2503 {
2504 skip_ib2_enable_local = dwords[0];
2505 }
2506
2507 #define CP(x, fxn, ...) { "CP_" #x, fxn, ##__VA_ARGS__ }
2508 static const struct type3_op {
2509 const char *name;
2510 void (*fxn)(uint32_t *dwords, uint32_t sizedwords, int level);
2511 struct {
2512 bool load_all_groups;
2513 } options;
2514 } type3_op[] = {
2515 CP(NOP, cp_nop),
2516 CP(INDIRECT_BUFFER, cp_indirect),
2517 CP(INDIRECT_BUFFER_PFD, cp_indirect),
2518 CP(WAIT_FOR_IDLE, cp_wfi),
2519 CP(REG_RMW, cp_rmw),
2520 CP(REG_TO_MEM, cp_reg_mem),
2521 CP(MEM_TO_REG, cp_reg_mem), /* same layout as CP_REG_TO_MEM */
2522 CP(MEM_WRITE, cp_mem_write),
2523 CP(EVENT_WRITE, cp_event_write),
2524 CP(RUN_OPENCL, cp_run_cl),
2525 CP(DRAW_INDX, cp_draw_indx, {.load_all_groups=true}),
2526 CP(DRAW_INDX_2, cp_draw_indx_2, {.load_all_groups=true}),
2527 CP(SET_CONSTANT, cp_set_const),
2528 CP(IM_LOAD_IMMEDIATE, cp_im_loadi),
2529 CP(WIDE_REG_WRITE, cp_wide_reg_write),
2530
2531 /* for a3xx */
2532 CP(LOAD_STATE, cp_load_state),
2533 CP(SET_BIN, cp_set_bin),
2534
2535 /* for a4xx */
2536 CP(LOAD_STATE4, cp_load_state),
2537 CP(SET_DRAW_STATE, cp_set_draw_state),
2538 CP(DRAW_INDX_OFFSET, cp_draw_indx_offset, {.load_all_groups=true}),
2539 CP(EXEC_CS, cp_exec_cs, {.load_all_groups=true}),
2540 CP(EXEC_CS_INDIRECT, cp_exec_cs_indirect, {.load_all_groups=true}),
2541
2542 /* for a5xx */
2543 CP(SET_RENDER_MODE, cp_set_render_mode),
2544 CP(COMPUTE_CHECKPOINT, cp_compute_checkpoint),
2545 CP(BLIT, cp_blit),
2546 CP(CONTEXT_REG_BUNCH, cp_context_reg_bunch),
2547 CP(DRAW_INDIRECT, cp_draw_indirect, {.load_all_groups=true}),
2548 CP(DRAW_INDX_INDIRECT, cp_draw_indx_indirect, {.load_all_groups=true}),
2549 CP(SKIP_IB2_ENABLE_GLOBAL, cp_skip_ib2_enable_global),
2550 CP(SKIP_IB2_ENABLE_LOCAL, cp_skip_ib2_enable_local),
2551
2552 /* for a6xx */
2553 CP(LOAD_STATE6_GEOM, cp_load_state),
2554 CP(LOAD_STATE6_FRAG, cp_load_state),
2555 CP(LOAD_STATE6, cp_load_state),
2556 CP(SET_MODE, cp_set_mode),
2557 CP(SET_MARKER, cp_set_marker),
2558 CP(REG_WRITE, cp_reg_write),
2559
2560 CP(SET_CTXSWITCH_IB, cp_set_ctxswitch_ib),
2561 };
2562
2563 static void
2564 noop_fxn(uint32_t *dwords, uint32_t sizedwords, int level)
2565 {
2566 }
2567
2568 static const struct type3_op *
2569 get_type3_op(unsigned opc)
2570 {
2571 static const struct type3_op dummy_op = {
2572 .fxn = noop_fxn,
2573 };
2574 const char *name = pktname(opc);
2575
2576 if (!name)
2577 return &dummy_op;
2578
2579 for (unsigned i = 0; i < ARRAY_SIZE(type3_op); i++)
2580 if (!strcmp(name, type3_op[i].name))
2581 return &type3_op[i];
2582
2583 return &dummy_op;
2584 }
2585
2586 void
2587 dump_commands(uint32_t *dwords, uint32_t sizedwords, int level)
2588 {
2589 int dwords_left = sizedwords;
2590 uint32_t count = 0; /* dword count including packet header */
2591 uint32_t val;
2592
2593 // assert(dwords);
2594 if (!dwords) {
2595 printf("NULL cmd buffer!\n");
2596 return;
2597 }
2598
2599 draws[ib] = 0;
2600
2601 while (dwords_left > 0) {
2602
2603 current_draw_count = draw_count;
2604
2605 /* hack, this looks like a -1 underflow, in some versions
2606 * when it tries to write zero registers via pkt0
2607 */
2608 // if ((dwords[0] >> 16) == 0xffff)
2609 // goto skip;
2610
2611 if (pkt_is_type0(dwords[0])) {
2612 printl(3, "t0");
2613 count = type0_pkt_size(dwords[0]) + 1;
2614 val = type0_pkt_offset(dwords[0]);
2615 assert(val < regcnt());
2616 printl(3, "%swrite %s%s (%04x)\n", levels[level+1], regname(val, 1),
2617 (dwords[0] & 0x8000) ? " (same register)" : "", val);
2618 dump_registers(val, dwords+1, count-1, level+2);
2619 if (!quiet(3))
2620 dump_hex(dwords, count, level+1);
2621 } else if (pkt_is_type4(dwords[0])) {
2622 /* basically the same(ish) as type0 prior to a5xx */
2623 printl(3, "t4");
2624 count = type4_pkt_size(dwords[0]) + 1;
2625 val = type4_pkt_offset(dwords[0]);
2626 assert(val < regcnt());
2627 printl(3, "%swrite %s (%04x)\n", levels[level+1], regname(val, 1), val);
2628 dump_registers(val, dwords+1, count-1, level+2);
2629 if (!quiet(3))
2630 dump_hex(dwords, count, level+1);
2631 #if 0
2632 } else if (pkt_is_type1(dwords[0])) {
2633 printl(3, "t1");
2634 count = 3;
2635 val = dwords[0] & 0xfff;
2636 printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
2637 dump_registers(val, dwords+1, 1, level+2);
2638 val = (dwords[0] >> 12) & 0xfff;
2639 printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
2640 dump_registers(val, dwords+2, 1, level+2);
2641 if (!quiet(3))
2642 dump_hex(dwords, count, level+1);
2643 } else if (pkt_is_type2(dwords[0])) {
2644 printl(3, "t2");
2645 printf("%sNOP\n", levels[level+1]);
2646 count = 1;
2647 if (!quiet(3))
2648 dump_hex(dwords, count, level+1);
2649 #endif
2650 } else if (pkt_is_type3(dwords[0])) {
2651 count = type3_pkt_size(dwords[0]) + 1;
2652 val = cp_type3_opcode(dwords[0]);
2653 const struct type3_op *op = get_type3_op(val);
2654 if (op->options.load_all_groups)
2655 load_all_groups(level+1);
2656 printl(3, "t3");
2657 const char *name = pktname(val);
2658 if (!quiet(2)) {
2659 printf("\t%sopcode: %s%s%s (%02x) (%d dwords)%s\n", levels[level],
2660 rnn->vc->colors->bctarg, name, rnn->vc->colors->reset,
2661 val, count, (dwords[0] & 0x1) ? " (predicated)" : "");
2662 }
2663 if (name)
2664 dump_domain(dwords+1, count-1, level+2, name);
2665 op->fxn(dwords+1, count-1, level+1);
2666 if (!quiet(2))
2667 dump_hex(dwords, count, level+1);
2668 } else if (pkt_is_type7(dwords[0])) {
2669 count = type7_pkt_size(dwords[0]) + 1;
2670 val = cp_type7_opcode(dwords[0]);
2671 const struct type3_op *op = get_type3_op(val);
2672 if (op->options.load_all_groups)
2673 load_all_groups(level+1);
2674 printl(3, "t7");
2675 const char *name = pktname(val);
2676 if (!quiet(2)) {
2677 printf("\t%sopcode: %s%s%s (%02x) (%d dwords)\n", levels[level],
2678 rnn->vc->colors->bctarg, name, rnn->vc->colors->reset,
2679 val, count);
2680 }
2681 if (name) {
2682 /* special hack for two packets that decode the same way
2683 * on a6xx:
2684 */
2685 if (!strcmp(name, "CP_LOAD_STATE6_FRAG") ||
2686 !strcmp(name, "CP_LOAD_STATE6_GEOM"))
2687 name = "CP_LOAD_STATE6";
2688 dump_domain(dwords+1, count-1, level+2, name);
2689 }
2690 op->fxn(dwords+1, count-1, level+1);
2691 if (!quiet(2))
2692 dump_hex(dwords, count, level+1);
2693 } else if (pkt_is_type2(dwords[0])) {
2694 printl(3, "t2");
2695 printl(3, "%snop\n", levels[level+1]);
2696 } else {
2697 /* for 5xx+ we can do a passable job of looking for start of next valid packet: */
2698 if (options->gpu_id >= 500) {
2699 while (dwords_left > 0) {
2700 if (pkt_is_type7(dwords[0]) || pkt_is_type4(dwords[0]))
2701 break;
2702 printf("bad type! %08x\n", dwords[0]);
2703 dwords++;
2704 dwords_left--;
2705 }
2706 } else {
2707 printf("bad type! %08x\n", dwords[0]);
2708 return;
2709 }
2710 }
2711
2712 dwords += count;
2713 dwords_left -= count;
2714
2715 }
2716
2717 if (dwords_left < 0)
2718 printf("**** this ain't right!! dwords_left=%d\n", dwords_left);
2719 }