Added few more stubs so that control reaches to DestroyDevice().
[mesa.git] / src / freedreno / decode / cffdec.c
1 /*
2 * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <err.h>
27 #include <inttypes.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <stdint.h>
31 #include <stdarg.h>
32 #include <stdbool.h>
33 #include <unistd.h>
34 #include <sys/types.h>
35 #include <sys/stat.h>
36 #include <sys/wait.h>
37 #include <fcntl.h>
38 #include <string.h>
39 #include <assert.h>
40 #include <signal.h>
41 #include <errno.h>
42
43 #include "redump.h"
44 #include "disasm.h"
45 #include "script.h"
46 #include "rnnutil.h"
47 #include "buffers.h"
48 #include "cffdec.h"
49
50 /* ************************************************************************* */
51 /* originally based on kernel recovery dump code: */
52
53 static const struct cffdec_options *options;
54
55 static bool needs_wfi = false;
56 static bool summary = false;
57 static bool in_summary = false;
58 static int vertices;
59
60 static inline unsigned regcnt(void)
61 {
62 if (options->gpu_id >= 500)
63 return 0xffff;
64 else
65 return 0x7fff;
66 }
67
68 static int is_64b(void)
69 {
70 return options->gpu_id >= 500;
71 }
72
73
74 static int draws[3];
75 static struct {
76 uint64_t base;
77 uint32_t size; /* in dwords */
78 /* Generally cmdstream consists of multiple IB calls to different
79 * buffers, which are themselves often re-used for each tile. The
80 * triggered flag serves two purposes to help make it more clear
81 * what part of the cmdstream is before vs after the the GPU hang:
82 *
83 * 1) if in IB2 we are passed the point within the IB2 buffer where
84 * the GPU hung, but IB1 is not passed the point within its
85 * buffer where the GPU had hung, then we know the GPU hang
86 * happens on a future use of that IB2 buffer.
87 *
88 * 2) if in an IB1 or IB2 buffer that is not the one where the GPU
89 * hung, but we've already passed the trigger point at the same
90 * IB level, we know that we are passed the point where the GPU
91 * had hung.
92 *
93 * So this is a one way switch, false->true. And a higher #'d
94 * IB level isn't considered triggered unless the lower #'d IB
95 * level is.
96 */
97 bool triggered;
98 } ibs[4];
99 static int ib;
100
101 static int draw_count;
102 static int current_draw_count;
103
104 /* query mode.. to handle symbolic register name queries, we need to
105 * defer parsing query string until after gpu_id is know and rnn db
106 * loaded:
107 */
108 static int *queryvals;
109
110 static bool
111 quiet(int lvl)
112 {
113 if ((options->draw_filter != -1) && (options->draw_filter != current_draw_count))
114 return true;
115 if ((lvl >= 3) && (summary || options->querystrs || options->script))
116 return true;
117 if ((lvl >= 2) && (options->querystrs || options->script))
118 return true;
119 return false;
120 }
121
122 void
123 printl(int lvl, const char *fmt, ...)
124 {
125 va_list args;
126 if (quiet(lvl))
127 return;
128 va_start(args, fmt);
129 vprintf(fmt, args);
130 va_end(args);
131 }
132
133 static const char *levels[] = {
134 "\t",
135 "\t\t",
136 "\t\t\t",
137 "\t\t\t\t",
138 "\t\t\t\t\t",
139 "\t\t\t\t\t\t",
140 "\t\t\t\t\t\t\t",
141 "\t\t\t\t\t\t\t\t",
142 "\t\t\t\t\t\t\t\t\t",
143 "x",
144 "x",
145 "x",
146 "x",
147 "x",
148 "x",
149 };
150
151 enum state_src_t {
152 STATE_SRC_DIRECT,
153 STATE_SRC_INDIRECT,
154 STATE_SRC_BINDLESS,
155 };
156
157 /* SDS (CP_SET_DRAW_STATE) helpers: */
158 static void load_all_groups(int level);
159 static void disable_all_groups(void);
160
161 static void dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, int level);
162 static void dump_tex_const(uint32_t *texsamp, int num_unit, int level);
163
164 static bool
165 highlight_gpuaddr(uint64_t gpuaddr)
166 {
167 if (!options->color)
168 return false;
169
170 if (!options->ibs[ib].base)
171 return false;
172
173 if ((ib > 0) && options->ibs[ib-1].base && !ibs[ib-1].triggered)
174 return false;
175
176 if (ibs[ib].triggered)
177 return true;
178
179 if (options->ibs[ib].base != ibs[ib].base)
180 return false;
181
182 uint64_t start = ibs[ib].base + 4 * (ibs[ib].size - options->ibs[ib].rem);
183 uint64_t end = ibs[ib].base + 4 * ibs[ib].size;
184
185 bool triggered = (start <= gpuaddr) && (gpuaddr <= end);
186
187 ibs[ib].triggered |= triggered;
188
189 if (triggered)
190 printf("ESTIMATED CRASH LOCATION!\n");
191
192 return triggered;
193 }
194
195 static void
196 dump_hex(uint32_t *dwords, uint32_t sizedwords, int level)
197 {
198 int i, j;
199 int lastzero = 1;
200
201 if (quiet(2))
202 return;
203
204 for (i = 0; i < sizedwords; i += 8) {
205 int zero = 1;
206
207 /* always show first row: */
208 if (i == 0)
209 zero = 0;
210
211 for (j = 0; (j < 8) && (i+j < sizedwords) && zero; j++)
212 if (dwords[i+j])
213 zero = 0;
214
215 if (zero && !lastzero)
216 printf("*\n");
217
218 lastzero = zero;
219
220 if (zero)
221 continue;
222
223 uint64_t addr = gpuaddr(&dwords[i]);
224 bool highlight = highlight_gpuaddr(addr);
225
226 if (highlight)
227 printf("\x1b[0;1;31m");
228
229 if (is_64b()) {
230 printf("%016"PRIx64":%s", addr, levels[level]);
231 } else {
232 printf("%08x:%s", (uint32_t)addr, levels[level]);
233 }
234
235 if (highlight)
236 printf("\x1b[0m");
237
238 printf("%04x:", i * 4);
239
240 for (j = 0; (j < 8) && (i+j < sizedwords); j++) {
241 printf(" %08x", dwords[i+j]);
242 }
243
244 printf("\n");
245 }
246 }
247
248 static void
249 dump_float(float *dwords, uint32_t sizedwords, int level)
250 {
251 int i;
252 for (i = 0; i < sizedwords; i++) {
253 if ((i % 8) == 0) {
254 if (is_64b()) {
255 printf("%016"PRIx64":%s", gpuaddr(dwords), levels[level]);
256 } else {
257 printf("%08x:%s", (uint32_t)gpuaddr(dwords), levels[level]);
258 }
259 } else {
260 printf(" ");
261 }
262 printf("%8f", *(dwords++));
263 if ((i % 8) == 7)
264 printf("\n");
265 }
266 if (i % 8)
267 printf("\n");
268 }
269
270 /* I believe the surface format is low bits:
271 #define RB_COLOR_INFO__COLOR_FORMAT_MASK 0x0000000fL
272 comments in sys2gmem_tex_const indicate that address is [31:12], but
273 looks like at least some of the bits above the format have different meaning..
274 */
275 static void parse_dword_addr(uint32_t dword, uint32_t *gpuaddr,
276 uint32_t *flags, uint32_t mask)
277 {
278 assert(!is_64b()); /* this is only used on a2xx */
279 *gpuaddr = dword & ~mask;
280 *flags = dword & mask;
281 }
282
283 static uint32_t type0_reg_vals[0xffff + 1];
284 static uint8_t type0_reg_rewritten[sizeof(type0_reg_vals)/8]; /* written since last draw */
285 static uint8_t type0_reg_written[sizeof(type0_reg_vals)/8];
286 static uint32_t lastvals[ARRAY_SIZE(type0_reg_vals)];
287
288 static bool reg_rewritten(uint32_t regbase)
289 {
290 return !!(type0_reg_rewritten[regbase/8] & (1 << (regbase % 8)));
291 }
292
293 bool reg_written(uint32_t regbase)
294 {
295 return !!(type0_reg_written[regbase/8] & (1 << (regbase % 8)));
296 }
297
298 static void clear_rewritten(void)
299 {
300 memset(type0_reg_rewritten, 0, sizeof(type0_reg_rewritten));
301 }
302
303 static void clear_written(void)
304 {
305 memset(type0_reg_written, 0, sizeof(type0_reg_written));
306 clear_rewritten();
307 }
308
309 uint32_t reg_lastval(uint32_t regbase)
310 {
311 return lastvals[regbase];
312 }
313
314 static void
315 clear_lastvals(void)
316 {
317 memset(lastvals, 0, sizeof(lastvals));
318 }
319
320 uint32_t
321 reg_val(uint32_t regbase)
322 {
323 return type0_reg_vals[regbase];
324 }
325
326 void
327 reg_set(uint32_t regbase, uint32_t val)
328 {
329 assert(regbase < regcnt());
330 type0_reg_vals[regbase] = val;
331 type0_reg_written[regbase/8] |= (1 << (regbase % 8));
332 type0_reg_rewritten[regbase/8] |= (1 << (regbase % 8));
333 }
334
335 static void
336 reg_dump_scratch(const char *name, uint32_t dword, int level)
337 {
338 unsigned r;
339
340 if (quiet(3))
341 return;
342
343 r = regbase("CP_SCRATCH[0].REG");
344
345 // if not, try old a2xx/a3xx version:
346 if (!r)
347 r = regbase("CP_SCRATCH_REG0");
348
349 if (!r)
350 return;
351
352 printf("%s:%u,%u,%u,%u\n", levels[level],
353 reg_val(r + 4), reg_val(r + 5),
354 reg_val(r + 6), reg_val(r + 7));
355 }
356
357 static void
358 dump_gpuaddr_size(uint64_t gpuaddr, int level, int sizedwords, int quietlvl)
359 {
360 void *buf;
361
362 if (quiet(quietlvl))
363 return;
364
365 buf = hostptr(gpuaddr);
366 if (buf) {
367 dump_hex(buf, sizedwords, level+1);
368 }
369 }
370
371 static void
372 dump_gpuaddr(uint64_t gpuaddr, int level)
373 {
374 dump_gpuaddr_size(gpuaddr, level, 64, 3);
375 }
376
377 static void
378 reg_dump_gpuaddr(const char *name, uint32_t dword, int level)
379 {
380 dump_gpuaddr(dword, level);
381 }
382
383 uint32_t gpuaddr_lo;
384 static void
385 reg_gpuaddr_lo(const char *name, uint32_t dword, int level)
386 {
387 gpuaddr_lo = dword;
388 }
389
390 static void
391 reg_dump_gpuaddr_hi(const char *name, uint32_t dword, int level)
392 {
393 dump_gpuaddr(gpuaddr_lo | (((uint64_t)dword) << 32), level);
394 }
395
396
397 static void
398 dump_shader(const char *ext, void *buf, int bufsz)
399 {
400 if (options->dump_shaders) {
401 static int n = 0;
402 char filename[8];
403 int fd;
404 sprintf(filename, "%04d.%s", n++, ext);
405 fd = open(filename, O_WRONLY| O_TRUNC | O_CREAT, 0644);
406 write(fd, buf, bufsz);
407 close(fd);
408 }
409 }
410
411 static void
412 disasm_gpuaddr(const char *name, uint64_t gpuaddr, int level)
413 {
414 void *buf;
415
416 gpuaddr &= 0xfffffffffffffff0;
417
418 if (quiet(3))
419 return;
420
421 buf = hostptr(gpuaddr);
422 if (buf) {
423 uint32_t sizedwords = hostlen(gpuaddr) / 4;
424 const char *ext;
425
426 dump_hex(buf, min(64, sizedwords), level+1);
427 try_disasm_a3xx(buf, sizedwords, level+2, stdout, options->gpu_id);
428
429 /* this is a bit ugly way, but oh well.. */
430 if (strstr(name, "SP_VS_OBJ")) {
431 ext = "vo3";
432 } else if (strstr(name, "SP_FS_OBJ")) {
433 ext = "fo3";
434 } else if (strstr(name, "SP_GS_OBJ")) {
435 ext = "go3";
436 } else if (strstr(name, "SP_CS_OBJ")) {
437 ext = "co3";
438 } else {
439 ext = NULL;
440 }
441
442 if (ext)
443 dump_shader(ext, buf, sizedwords * 4);
444 }
445 }
446
447 static void
448 reg_disasm_gpuaddr(const char *name, uint32_t dword, int level)
449 {
450 disasm_gpuaddr(name, dword, level);
451 }
452
453 static void
454 reg_disasm_gpuaddr_hi(const char *name, uint32_t dword, int level)
455 {
456 disasm_gpuaddr(name, gpuaddr_lo | (((uint64_t)dword) << 32), level);
457 }
458
459 /* Find the value of the TEX_COUNT register that corresponds to the named
460 * TEX_SAMP/TEX_CONST reg.
461 *
462 * Note, this kinda assumes an equal # of samplers and textures, but not
463 * really sure if there is a much better option. I suppose on a6xx we
464 * could instead decode the bitfields in SP_xS_CONFIG
465 */
466 static int
467 get_tex_count(const char *name)
468 {
469 char count_reg[strlen(name) + 5];
470 char *p;
471
472 p = strstr(name, "CONST");
473 if (!p)
474 p = strstr(name, "SAMP");
475 if (!p)
476 return 0;
477
478 int n = p - name;
479 strncpy(count_reg, name, n);
480 strcpy(count_reg + n, "COUNT");
481
482 return reg_val(regbase(count_reg));
483 }
484
485 static void
486 reg_dump_tex_samp_hi(const char *name, uint32_t dword, int level)
487 {
488 if (!in_summary)
489 return;
490
491 int num_unit = get_tex_count(name);
492 uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
493 void *buf = hostptr(gpuaddr);
494
495 if (!buf)
496 return;
497
498 dump_tex_samp(buf, STATE_SRC_DIRECT, num_unit, level+1);
499 }
500
501 static void
502 reg_dump_tex_const_hi(const char *name, uint32_t dword, int level)
503 {
504 if (!in_summary)
505 return;
506
507 int num_unit = get_tex_count(name);
508 uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
509 void *buf = hostptr(gpuaddr);
510
511 if (!buf)
512 return;
513
514 dump_tex_const(buf, num_unit, level+1);
515 }
516
517 /*
518 * Registers with special handling (rnndec_decode() handles rest):
519 */
520 #define REG(x, fxn) { #x, fxn }
521 static struct {
522 const char *regname;
523 void (*fxn)(const char *name, uint32_t dword, int level);
524 uint32_t regbase;
525 } reg_a2xx[] = {
526 REG(CP_SCRATCH_REG0, reg_dump_scratch),
527 REG(CP_SCRATCH_REG1, reg_dump_scratch),
528 REG(CP_SCRATCH_REG2, reg_dump_scratch),
529 REG(CP_SCRATCH_REG3, reg_dump_scratch),
530 REG(CP_SCRATCH_REG4, reg_dump_scratch),
531 REG(CP_SCRATCH_REG5, reg_dump_scratch),
532 REG(CP_SCRATCH_REG6, reg_dump_scratch),
533 REG(CP_SCRATCH_REG7, reg_dump_scratch),
534 {NULL},
535 }, reg_a3xx[] = {
536 REG(CP_SCRATCH_REG0, reg_dump_scratch),
537 REG(CP_SCRATCH_REG1, reg_dump_scratch),
538 REG(CP_SCRATCH_REG2, reg_dump_scratch),
539 REG(CP_SCRATCH_REG3, reg_dump_scratch),
540 REG(CP_SCRATCH_REG4, reg_dump_scratch),
541 REG(CP_SCRATCH_REG5, reg_dump_scratch),
542 REG(CP_SCRATCH_REG6, reg_dump_scratch),
543 REG(CP_SCRATCH_REG7, reg_dump_scratch),
544 REG(VSC_SIZE_ADDRESS, reg_dump_gpuaddr),
545 REG(SP_VS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
546 REG(SP_FS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
547 REG(SP_VS_OBJ_START_REG, reg_disasm_gpuaddr),
548 REG(SP_FS_OBJ_START_REG, reg_disasm_gpuaddr),
549 REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
550 {NULL},
551 }, reg_a4xx[] = {
552 REG(CP_SCRATCH[0].REG, reg_dump_scratch),
553 REG(CP_SCRATCH[0x1].REG, reg_dump_scratch),
554 REG(CP_SCRATCH[0x2].REG, reg_dump_scratch),
555 REG(CP_SCRATCH[0x3].REG, reg_dump_scratch),
556 REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
557 REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
558 REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
559 REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
560 REG(SP_VS_PVT_MEM_ADDR, reg_dump_gpuaddr),
561 REG(SP_FS_PVT_MEM_ADDR, reg_dump_gpuaddr),
562 REG(SP_GS_PVT_MEM_ADDR, reg_dump_gpuaddr),
563 REG(SP_HS_PVT_MEM_ADDR, reg_dump_gpuaddr),
564 REG(SP_DS_PVT_MEM_ADDR, reg_dump_gpuaddr),
565 REG(SP_CS_PVT_MEM_ADDR, reg_dump_gpuaddr),
566 REG(SP_VS_OBJ_START, reg_disasm_gpuaddr),
567 REG(SP_FS_OBJ_START, reg_disasm_gpuaddr),
568 REG(SP_GS_OBJ_START, reg_disasm_gpuaddr),
569 REG(SP_HS_OBJ_START, reg_disasm_gpuaddr),
570 REG(SP_DS_OBJ_START, reg_disasm_gpuaddr),
571 REG(SP_CS_OBJ_START, reg_disasm_gpuaddr),
572 REG(TPL1_TP_VS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
573 REG(TPL1_TP_HS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
574 REG(TPL1_TP_DS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
575 REG(TPL1_TP_GS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
576 REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
577 {NULL},
578 }, reg_a5xx[] = {
579 REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
580 REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
581 REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
582 REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
583 REG(SP_VS_OBJ_START_LO, reg_gpuaddr_lo),
584 REG(SP_VS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
585 REG(SP_HS_OBJ_START_LO, reg_gpuaddr_lo),
586 REG(SP_HS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
587 REG(SP_DS_OBJ_START_LO, reg_gpuaddr_lo),
588 REG(SP_DS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
589 REG(SP_GS_OBJ_START_LO, reg_gpuaddr_lo),
590 REG(SP_GS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
591 REG(SP_FS_OBJ_START_LO, reg_gpuaddr_lo),
592 REG(SP_FS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
593 REG(SP_CS_OBJ_START_LO, reg_gpuaddr_lo),
594 REG(SP_CS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
595 REG(TPL1_VS_TEX_CONST_LO, reg_gpuaddr_lo),
596 REG(TPL1_VS_TEX_CONST_HI, reg_dump_tex_const_hi),
597 REG(TPL1_VS_TEX_SAMP_LO, reg_gpuaddr_lo),
598 REG(TPL1_VS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
599 REG(TPL1_HS_TEX_CONST_LO, reg_gpuaddr_lo),
600 REG(TPL1_HS_TEX_CONST_HI, reg_dump_tex_const_hi),
601 REG(TPL1_HS_TEX_SAMP_LO, reg_gpuaddr_lo),
602 REG(TPL1_HS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
603 REG(TPL1_DS_TEX_CONST_LO, reg_gpuaddr_lo),
604 REG(TPL1_DS_TEX_CONST_HI, reg_dump_tex_const_hi),
605 REG(TPL1_DS_TEX_SAMP_LO, reg_gpuaddr_lo),
606 REG(TPL1_DS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
607 REG(TPL1_GS_TEX_CONST_LO, reg_gpuaddr_lo),
608 REG(TPL1_GS_TEX_CONST_HI, reg_dump_tex_const_hi),
609 REG(TPL1_GS_TEX_SAMP_LO, reg_gpuaddr_lo),
610 REG(TPL1_GS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
611 REG(TPL1_FS_TEX_CONST_LO, reg_gpuaddr_lo),
612 REG(TPL1_FS_TEX_CONST_HI, reg_dump_tex_const_hi),
613 REG(TPL1_FS_TEX_SAMP_LO, reg_gpuaddr_lo),
614 REG(TPL1_FS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
615 REG(TPL1_CS_TEX_CONST_LO, reg_gpuaddr_lo),
616 REG(TPL1_CS_TEX_CONST_HI, reg_dump_tex_const_hi),
617 REG(TPL1_CS_TEX_SAMP_LO, reg_gpuaddr_lo),
618 REG(TPL1_CS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
619 REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_LO, reg_gpuaddr_lo),
620 REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_HI, reg_dump_gpuaddr_hi),
621 // REG(RB_MRT_FLAG_BUFFER[0].ADDR_LO, reg_gpuaddr_lo),
622 // REG(RB_MRT_FLAG_BUFFER[0].ADDR_HI, reg_dump_gpuaddr_hi),
623 // REG(RB_MRT_FLAG_BUFFER[1].ADDR_LO, reg_gpuaddr_lo),
624 // REG(RB_MRT_FLAG_BUFFER[1].ADDR_HI, reg_dump_gpuaddr_hi),
625 // REG(RB_MRT_FLAG_BUFFER[2].ADDR_LO, reg_gpuaddr_lo),
626 // REG(RB_MRT_FLAG_BUFFER[2].ADDR_HI, reg_dump_gpuaddr_hi),
627 // REG(RB_MRT_FLAG_BUFFER[3].ADDR_LO, reg_gpuaddr_lo),
628 // REG(RB_MRT_FLAG_BUFFER[3].ADDR_HI, reg_dump_gpuaddr_hi),
629 // REG(RB_MRT_FLAG_BUFFER[4].ADDR_LO, reg_gpuaddr_lo),
630 // REG(RB_MRT_FLAG_BUFFER[4].ADDR_HI, reg_dump_gpuaddr_hi),
631 // REG(RB_MRT_FLAG_BUFFER[5].ADDR_LO, reg_gpuaddr_lo),
632 // REG(RB_MRT_FLAG_BUFFER[5].ADDR_HI, reg_dump_gpuaddr_hi),
633 // REG(RB_MRT_FLAG_BUFFER[6].ADDR_LO, reg_gpuaddr_lo),
634 // REG(RB_MRT_FLAG_BUFFER[6].ADDR_HI, reg_dump_gpuaddr_hi),
635 // REG(RB_MRT_FLAG_BUFFER[7].ADDR_LO, reg_gpuaddr_lo),
636 // REG(RB_MRT_FLAG_BUFFER[7].ADDR_HI, reg_dump_gpuaddr_hi),
637 // REG(RB_BLIT_FLAG_DST_LO, reg_gpuaddr_lo),
638 // REG(RB_BLIT_FLAG_DST_HI, reg_dump_gpuaddr_hi),
639 // REG(RB_MRT[0].BASE_LO, reg_gpuaddr_lo),
640 // REG(RB_MRT[0].BASE_HI, reg_dump_gpuaddr_hi),
641 // REG(RB_DEPTH_BUFFER_BASE_LO, reg_gpuaddr_lo),
642 // REG(RB_DEPTH_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
643 // REG(RB_DEPTH_FLAG_BUFFER_BASE_LO, reg_gpuaddr_lo),
644 // REG(RB_DEPTH_FLAG_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
645 // REG(RB_BLIT_DST_LO, reg_gpuaddr_lo),
646 // REG(RB_BLIT_DST_HI, reg_dump_gpuaddr_hi),
647
648 // REG(RB_2D_SRC_LO, reg_gpuaddr_lo),
649 // REG(RB_2D_SRC_HI, reg_dump_gpuaddr_hi),
650 // REG(RB_2D_SRC_FLAGS_LO, reg_gpuaddr_lo),
651 // REG(RB_2D_SRC_FLAGS_HI, reg_dump_gpuaddr_hi),
652 // REG(RB_2D_DST_LO, reg_gpuaddr_lo),
653 // REG(RB_2D_DST_HI, reg_dump_gpuaddr_hi),
654 // REG(RB_2D_DST_FLAGS_LO, reg_gpuaddr_lo),
655 // REG(RB_2D_DST_FLAGS_HI, reg_dump_gpuaddr_hi),
656
657 {NULL},
658 }, reg_a6xx[] = {
659 REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
660 REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
661 REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
662 REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
663
664 REG(SP_VS_OBJ_START_LO, reg_gpuaddr_lo),
665 REG(SP_VS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
666 REG(SP_HS_OBJ_START_LO, reg_gpuaddr_lo),
667 REG(SP_HS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
668 REG(SP_DS_OBJ_START_LO, reg_gpuaddr_lo),
669 REG(SP_DS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
670 REG(SP_GS_OBJ_START_LO, reg_gpuaddr_lo),
671 REG(SP_GS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
672 REG(SP_FS_OBJ_START_LO, reg_gpuaddr_lo),
673 REG(SP_FS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
674 REG(SP_CS_OBJ_START_LO, reg_gpuaddr_lo),
675 REG(SP_CS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
676
677 REG(SP_VS_TEX_CONST_LO, reg_gpuaddr_lo),
678 REG(SP_VS_TEX_CONST_HI, reg_dump_tex_const_hi),
679 REG(SP_VS_TEX_SAMP_LO, reg_gpuaddr_lo),
680 REG(SP_VS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
681 REG(SP_HS_TEX_CONST_LO, reg_gpuaddr_lo),
682 REG(SP_HS_TEX_CONST_HI, reg_dump_tex_const_hi),
683 REG(SP_HS_TEX_SAMP_LO, reg_gpuaddr_lo),
684 REG(SP_HS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
685 REG(SP_DS_TEX_CONST_LO, reg_gpuaddr_lo),
686 REG(SP_DS_TEX_CONST_HI, reg_dump_tex_const_hi),
687 REG(SP_DS_TEX_SAMP_LO, reg_gpuaddr_lo),
688 REG(SP_DS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
689 REG(SP_GS_TEX_CONST_LO, reg_gpuaddr_lo),
690 REG(SP_GS_TEX_CONST_HI, reg_dump_tex_const_hi),
691 REG(SP_GS_TEX_SAMP_LO, reg_gpuaddr_lo),
692 REG(SP_GS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
693 REG(SP_FS_TEX_CONST_LO, reg_gpuaddr_lo),
694 REG(SP_FS_TEX_CONST_HI, reg_dump_tex_const_hi),
695 REG(SP_FS_TEX_SAMP_LO, reg_gpuaddr_lo),
696 REG(SP_FS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
697 REG(SP_CS_TEX_CONST_LO, reg_gpuaddr_lo),
698 REG(SP_CS_TEX_CONST_HI, reg_dump_tex_const_hi),
699 REG(SP_CS_TEX_SAMP_LO, reg_gpuaddr_lo),
700 REG(SP_CS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
701
702 {NULL},
703 }, *type0_reg;
704
705 static struct rnn *rnn;
706
707 static void
708 init_rnn(const char *gpuname)
709 {
710 rnn = rnn_new(!options->color);
711
712 rnn_load(rnn, gpuname);
713
714 if (options->querystrs) {
715 int i;
716 queryvals = calloc(options->nquery, sizeof(queryvals[0]));
717
718 for (i = 0; i < options->nquery; i++) {
719 int val = strtol(options->querystrs[i], NULL, 0);
720
721 if (val == 0)
722 val = regbase(options->querystrs[i]);
723
724 queryvals[i] = val;
725 printf("querystr: %s -> 0x%x\n", options->querystrs[i], queryvals[i]);
726 }
727 }
728
729 for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
730 type0_reg[idx].regbase = regbase(type0_reg[idx].regname);
731 if (!type0_reg[idx].regbase) {
732 printf("invalid register name: %s\n", type0_reg[idx].regname);
733 exit(1);
734 }
735 }
736 }
737
738 void
739 reset_regs(void)
740 {
741 clear_written();
742 clear_lastvals();
743 memset(&ibs, 0, sizeof(ibs));
744 }
745
746 void
747 cffdec_init(const struct cffdec_options *_options)
748 {
749 options = _options;
750 summary = options->summary;
751
752 /* in case we're decoding multiple files: */
753 free(queryvals);
754 reset_regs();
755 draw_count = 0;
756
757 /* TODO we need an API to free/cleanup any previous rnn */
758
759 switch (options->gpu_id) {
760 case 200 ... 299:
761 type0_reg = reg_a2xx;
762 init_rnn("a2xx");
763 break;
764 case 300 ... 399:
765 type0_reg = reg_a3xx;
766 init_rnn("a3xx");
767 break;
768 case 400 ... 499:
769 type0_reg = reg_a4xx;
770 init_rnn("a4xx");
771 break;
772 case 500 ... 599:
773 type0_reg = reg_a5xx;
774 init_rnn("a5xx");
775 break;
776 case 600 ... 699:
777 type0_reg = reg_a6xx;
778 init_rnn("a6xx");
779 break;
780 default:
781 errx(-1, "unsupported gpu");
782 }
783 }
784
785 const char *
786 pktname(unsigned opc)
787 {
788 return rnn_enumname(rnn, "adreno_pm4_type3_packets", opc);
789 }
790
791 const char *
792 regname(uint32_t regbase, int color)
793 {
794 return rnn_regname(rnn, regbase, color);
795 }
796
797 uint32_t
798 regbase(const char *name)
799 {
800 return rnn_regbase(rnn, name);
801 }
802
803 static int
804 endswith(uint32_t regbase, const char *suffix)
805 {
806 const char *name = regname(regbase, 0);
807 const char *s = strstr(name, suffix);
808 if (!s)
809 return 0;
810 return (s - strlen(name) + strlen(suffix)) == name;
811 }
812
813 void
814 dump_register_val(uint32_t regbase, uint32_t dword, int level)
815 {
816 struct rnndecaddrinfo *info = rnn_reginfo(rnn, regbase);
817
818 if (info && info->typeinfo) {
819 uint64_t gpuaddr = 0;
820 char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, dword);
821 printf("%s%s: %s", levels[level], info->name, decoded);
822
823 /* Try and figure out if we are looking at a gpuaddr.. this
824 * might be useful for other gen's too, but at least a5xx has
825 * the _HI/_LO suffix we can look for. Maybe a better approach
826 * would be some special annotation in the xml..
827 */
828 if (options->gpu_id >= 500) {
829 if (endswith(regbase, "_HI") && endswith(regbase-1, "_LO")) {
830 gpuaddr = (((uint64_t)dword) << 32) | reg_val(regbase-1);
831 } else if (endswith(regbase, "_LO") && endswith(regbase+1, "_HI")) {
832 gpuaddr = (((uint64_t)reg_val(regbase+1)) << 32) | dword;
833 }
834 }
835
836 if (gpuaddr && hostptr(gpuaddr)) {
837 printf("\t\tbase=%"PRIx64", offset=%"PRIu64", size=%u",
838 gpubaseaddr(gpuaddr),
839 gpuaddr - gpubaseaddr(gpuaddr),
840 hostlen(gpubaseaddr(gpuaddr)));
841 }
842
843 printf("\n");
844
845 free(decoded);
846 } else if (info) {
847 printf("%s%s: %08x\n", levels[level], info->name, dword);
848 } else {
849 printf("%s<%04x>: %08x\n", levels[level], regbase, dword);
850 }
851
852 if (info) {
853 free(info->name);
854 free(info);
855 }
856 }
857
858 static void
859 dump_register(uint32_t regbase, uint32_t dword, int level)
860 {
861 if (!quiet(3)) {
862 dump_register_val(regbase, dword, level);
863 }
864
865 for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
866 if (type0_reg[idx].regbase == regbase) {
867 type0_reg[idx].fxn(type0_reg[idx].regname, dword, level);
868 break;
869 }
870 }
871 }
872
873 static bool
874 is_banked_reg(uint32_t regbase)
875 {
876 return (0x2000 <= regbase) && (regbase < 0x2400);
877 }
878
879 static void
880 dump_registers(uint32_t regbase, uint32_t *dwords, uint32_t sizedwords, int level)
881 {
882 while (sizedwords--) {
883 int last_summary = summary;
884
885 /* access to non-banked registers needs a WFI:
886 * TODO banked register range for a2xx??
887 */
888 if (needs_wfi && !is_banked_reg(regbase))
889 printl(2, "NEEDS WFI: %s (%x)\n", regname(regbase, 1), regbase);
890
891 reg_set(regbase, *dwords);
892 dump_register(regbase, *dwords, level);
893 regbase++;
894 dwords++;
895 summary = last_summary;
896 }
897 }
898
899 static void
900 dump_domain(uint32_t *dwords, uint32_t sizedwords, int level,
901 const char *name)
902 {
903 struct rnndomain *dom;
904 int i;
905
906 dom = rnn_finddomain(rnn->db, name);
907
908 if (!dom)
909 return;
910
911 if (script_packet)
912 script_packet(dwords, sizedwords, rnn, dom);
913
914 if (quiet(2))
915 return;
916
917 for (i = 0; i < sizedwords; i++) {
918 struct rnndecaddrinfo *info = rnndec_decodeaddr(rnn->vc, dom, i, 0);
919 char *decoded;
920 if (!(info && info->typeinfo))
921 break;
922 uint64_t value = dwords[i];
923 if (info->typeinfo->high >= 32 && i < sizedwords - 1) {
924 value |= (uint64_t) dwords[i + 1] << 32;
925 i++; /* skip the next dword since we're printing it now */
926 }
927 decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value);
928 /* Unlike the register printing path, we don't print the name
929 * of the register, so if it doesn't contain other named
930 * things (i.e. it isn't a bitset) then print the register
931 * name as if it's a bitset with a single entry. This avoids
932 * having to create a dummy register with a single entry to
933 * get a name in the decoding.
934 */
935 if (info->typeinfo->type == RNN_TTYPE_BITSET ||
936 info->typeinfo->type == RNN_TTYPE_INLINE_BITSET) {
937 printf("%s%s\n", levels[level], decoded);
938 } else {
939 printf("%s{ %s%s%s = %s }\n", levels[level],
940 rnn->vc->colors->rname, info->name,
941 rnn->vc->colors->reset, decoded);
942 }
943 free(decoded);
944 free(info->name);
945 free(info);
946 }
947 }
948
949
950 static uint32_t bin_x1, bin_x2, bin_y1, bin_y2;
951 static unsigned mode;
952 static const char *render_mode;
953 static enum {
954 MODE_BINNING = 0x1,
955 MODE_GMEM = 0x2,
956 MODE_BYPASS = 0x4,
957 MODE_ALL = MODE_BINNING | MODE_GMEM | MODE_BYPASS,
958 } enable_mask = MODE_ALL;
959 static bool skip_ib2_enable_global;
960 static bool skip_ib2_enable_local;
961
962 static void
963 print_mode(int level)
964 {
965 if ((options->gpu_id >= 500) && !quiet(2)) {
966 printf("%smode: %s\n", levels[level], render_mode);
967 printf("%sskip_ib2: g=%d, l=%d\n", levels[level], skip_ib2_enable_global, skip_ib2_enable_local);
968 }
969 }
970
971 static bool
972 skip_query(void)
973 {
974 switch (options->query_mode) {
975 case QUERY_ALL:
976 /* never skip: */
977 return false;
978 case QUERY_WRITTEN:
979 for (int i = 0; i < options->nquery; i++) {
980 uint32_t regbase = queryvals[i];
981 if (!reg_written(regbase)) {
982 continue;
983 }
984 if (reg_rewritten(regbase)) {
985 return false;
986 }
987 }
988 return true;
989 case QUERY_DELTA:
990 for (int i = 0; i < options->nquery; i++) {
991 uint32_t regbase = queryvals[i];
992 if (!reg_written(regbase)) {
993 continue;
994 }
995 uint32_t lastval = reg_val(regbase);
996 if (lastval != lastvals[regbase]) {
997 return false;
998 }
999 }
1000 return true;
1001 }
1002 return true;
1003 }
1004
1005 static void
1006 __do_query(const char *primtype, uint32_t num_indices)
1007 {
1008 int n = 0;
1009
1010 if ((500 <= options->gpu_id) && (options->gpu_id < 700)) {
1011 uint32_t scissor_tl = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_TL"));
1012 uint32_t scissor_br = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_BR"));
1013
1014 bin_x1 = scissor_tl & 0xffff;
1015 bin_y1 = scissor_tl >> 16;
1016 bin_x2 = scissor_br & 0xffff;
1017 bin_y2 = scissor_br >> 16;
1018 }
1019
1020 for (int i = 0; i < options->nquery; i++) {
1021 uint32_t regbase = queryvals[i];
1022 if (reg_written(regbase)) {
1023 uint32_t lastval = reg_val(regbase);
1024 printf("%4d: %s(%u,%u-%u,%u):%u:", draw_count, primtype,
1025 bin_x1, bin_y1, bin_x2, bin_y2, num_indices);
1026 if (options->gpu_id >= 500)
1027 printf("%s:", render_mode);
1028 printf("\t%08x", lastval);
1029 if (lastval != lastvals[regbase]) {
1030 printf("!");
1031 } else {
1032 printf(" ");
1033 }
1034 if (reg_rewritten(regbase)) {
1035 printf("+");
1036 } else {
1037 printf(" ");
1038 }
1039 dump_register_val(regbase, lastval, 0);
1040 n++;
1041 }
1042 }
1043
1044 if (n > 1)
1045 printf("\n");
1046 }
1047
1048 static void
1049 do_query_compare(const char *primtype, uint32_t num_indices)
1050 {
1051 unsigned saved_enable_mask = enable_mask;
1052 const char *saved_render_mode = render_mode;
1053
1054 /* in 'query-compare' mode, we want to see if the register is writtten
1055 * or changed in any mode:
1056 *
1057 * (NOTE: this could cause false-positive for 'query-delta' if the reg
1058 * is written with different values in binning vs sysmem/gmem mode, as
1059 * we don't track previous values per-mode, but I think we can live with
1060 * that)
1061 */
1062 enable_mask = MODE_ALL;
1063
1064 clear_rewritten();
1065 load_all_groups(0);
1066
1067 if (!skip_query()) {
1068 /* dump binning pass values: */
1069 enable_mask = MODE_BINNING;
1070 render_mode = "BINNING";
1071 clear_rewritten();
1072 load_all_groups(0);
1073 __do_query(primtype, num_indices);
1074
1075 /* dump draw pass values: */
1076 enable_mask = MODE_GMEM | MODE_BYPASS;
1077 render_mode = "DRAW";
1078 clear_rewritten();
1079 load_all_groups(0);
1080 __do_query(primtype, num_indices);
1081
1082 printf("\n");
1083 }
1084
1085 enable_mask = saved_enable_mask;
1086 render_mode = saved_render_mode;
1087
1088 disable_all_groups();
1089 }
1090
1091 /* well, actually query and script..
1092 * NOTE: call this before dump_register_summary()
1093 */
1094 static void
1095 do_query(const char *primtype, uint32_t num_indices)
1096 {
1097 if (script_draw)
1098 script_draw(primtype, num_indices);
1099
1100 if (options->query_compare) {
1101 do_query_compare(primtype, num_indices);
1102 return;
1103 }
1104
1105 if (skip_query())
1106 return;
1107
1108 __do_query(primtype, num_indices);
1109 }
1110
1111 static void
1112 cp_im_loadi(uint32_t *dwords, uint32_t sizedwords, int level)
1113 {
1114 uint32_t start = dwords[1] >> 16;
1115 uint32_t size = dwords[1] & 0xffff;
1116 const char *type = NULL, *ext = NULL;
1117 gl_shader_stage disasm_type;
1118
1119 switch (dwords[0]) {
1120 case 0:
1121 type = "vertex";
1122 ext = "vo";
1123 disasm_type = MESA_SHADER_VERTEX;
1124 break;
1125 case 1:
1126 type = "fragment";
1127 ext = "fo";
1128 disasm_type = MESA_SHADER_FRAGMENT;
1129 break;
1130 default:
1131 type = "<unknown>";
1132 disasm_type = 0;
1133 break;
1134 }
1135
1136 printf("%s%s shader, start=%04x, size=%04x\n", levels[level], type, start, size);
1137 disasm_a2xx(dwords + 2, sizedwords - 2, level+2, disasm_type);
1138
1139 /* dump raw shader: */
1140 if (ext)
1141 dump_shader(ext, dwords + 2, (sizedwords - 2) * 4);
1142 }
1143
1144 static void
1145 cp_wide_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
1146 {
1147 uint32_t reg = dwords[0] & 0xffff;
1148 int i;
1149 for (i = 1; i < sizedwords; i++) {
1150 dump_register(reg, dwords[i], level+1);
1151 reg_set(reg, dwords[i]);
1152 reg++;
1153 }
1154 }
1155
1156 enum state_t {
1157 TEX_SAMP = 1,
1158 TEX_CONST,
1159 TEX_MIPADDR, /* a3xx only */
1160 SHADER_PROG,
1161 SHADER_CONST,
1162
1163 // image/ssbo state:
1164 SSBO_0,
1165 SSBO_1,
1166 SSBO_2,
1167
1168 UBO,
1169
1170 // unknown things, just to hexdumps:
1171 UNKNOWN_DWORDS,
1172 UNKNOWN_2DWORDS,
1173 UNKNOWN_4DWORDS,
1174 };
1175
1176 enum adreno_state_block {
1177 SB_VERT_TEX = 0,
1178 SB_VERT_MIPADDR = 1,
1179 SB_FRAG_TEX = 2,
1180 SB_FRAG_MIPADDR = 3,
1181 SB_VERT_SHADER = 4,
1182 SB_GEOM_SHADER = 5,
1183 SB_FRAG_SHADER = 6,
1184 SB_COMPUTE_SHADER = 7,
1185 };
1186
1187 /* TODO there is probably a clever way to let rnndec parse things so
1188 * we don't have to care about packet format differences across gens
1189 */
1190
1191 static void
1192 a3xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage, enum state_t *state,
1193 enum state_src_t *src)
1194 {
1195 unsigned state_block_id = (dwords[0] >> 19) & 0x7;
1196 unsigned state_type = dwords[1] & 0x3;
1197 static const struct {
1198 gl_shader_stage stage;
1199 enum state_t state;
1200 } lookup[0xf][0x3] = {
1201 [SB_VERT_TEX][0] = { MESA_SHADER_VERTEX, TEX_SAMP },
1202 [SB_VERT_TEX][1] = { MESA_SHADER_VERTEX, TEX_CONST },
1203 [SB_FRAG_TEX][0] = { MESA_SHADER_FRAGMENT, TEX_SAMP },
1204 [SB_FRAG_TEX][1] = { MESA_SHADER_FRAGMENT, TEX_CONST },
1205 [SB_VERT_SHADER][0] = { MESA_SHADER_VERTEX, SHADER_PROG },
1206 [SB_VERT_SHADER][1] = { MESA_SHADER_VERTEX, SHADER_CONST },
1207 [SB_FRAG_SHADER][0] = { MESA_SHADER_FRAGMENT, SHADER_PROG },
1208 [SB_FRAG_SHADER][1] = { MESA_SHADER_FRAGMENT, SHADER_CONST },
1209 };
1210
1211 *stage = lookup[state_block_id][state_type].stage;
1212 *state = lookup[state_block_id][state_type].state;
1213 unsigned state_src = (dwords[0] >> 16) & 0x7;
1214 if (state_src == 0 /* SS_DIRECT */)
1215 *src = STATE_SRC_DIRECT;
1216 else
1217 *src = STATE_SRC_INDIRECT;
1218 }
1219
1220 static enum state_src_t
1221 _get_state_src(unsigned dword0)
1222 {
1223 switch ((dword0 >> 16) & 0x3) {
1224 case 0: /* SS4_DIRECT / SS6_DIRECT */
1225 return STATE_SRC_DIRECT;
1226 case 2: /* SS4_INDIRECT / SS6_INDIRECT */
1227 return STATE_SRC_INDIRECT;
1228 case 1: /* SS6_BINDLESS */
1229 return STATE_SRC_BINDLESS;
1230 default:
1231 return STATE_SRC_DIRECT;
1232 }
1233 }
1234
1235 static void
1236 _get_state_type(unsigned state_block_id, unsigned state_type,
1237 gl_shader_stage *stage, enum state_t *state)
1238 {
1239 static const struct {
1240 gl_shader_stage stage;
1241 enum state_t state;
1242 } lookup[0x10][0x4] = {
1243 // SB4_VS_TEX:
1244 [0x0][0] = { MESA_SHADER_VERTEX, TEX_SAMP },
1245 [0x0][1] = { MESA_SHADER_VERTEX, TEX_CONST },
1246 [0x0][2] = { MESA_SHADER_VERTEX, UBO },
1247 // SB4_HS_TEX:
1248 [0x1][0] = { MESA_SHADER_TESS_CTRL, TEX_SAMP },
1249 [0x1][1] = { MESA_SHADER_TESS_CTRL, TEX_CONST },
1250 [0x1][2] = { MESA_SHADER_TESS_CTRL, UBO },
1251 // SB4_DS_TEX:
1252 [0x2][0] = { MESA_SHADER_TESS_EVAL, TEX_SAMP },
1253 [0x2][1] = { MESA_SHADER_TESS_EVAL, TEX_CONST },
1254 [0x2][2] = { MESA_SHADER_TESS_EVAL, UBO },
1255 // SB4_GS_TEX:
1256 [0x3][0] = { MESA_SHADER_GEOMETRY, TEX_SAMP },
1257 [0x3][1] = { MESA_SHADER_GEOMETRY, TEX_CONST },
1258 [0x3][2] = { MESA_SHADER_GEOMETRY, UBO },
1259 // SB4_FS_TEX:
1260 [0x4][0] = { MESA_SHADER_FRAGMENT, TEX_SAMP },
1261 [0x4][1] = { MESA_SHADER_FRAGMENT, TEX_CONST },
1262 [0x4][2] = { MESA_SHADER_FRAGMENT, UBO },
1263 // SB4_CS_TEX:
1264 [0x5][0] = { MESA_SHADER_COMPUTE, TEX_SAMP },
1265 [0x5][1] = { MESA_SHADER_COMPUTE, TEX_CONST },
1266 [0x5][2] = { MESA_SHADER_COMPUTE, UBO },
1267 // SB4_VS_SHADER:
1268 [0x8][0] = { MESA_SHADER_VERTEX, SHADER_PROG },
1269 [0x8][1] = { MESA_SHADER_VERTEX, SHADER_CONST },
1270 [0x8][2] = { MESA_SHADER_VERTEX, UBO },
1271 // SB4_HS_SHADER
1272 [0x9][0] = { MESA_SHADER_TESS_CTRL, SHADER_PROG },
1273 [0x9][1] = { MESA_SHADER_TESS_CTRL, SHADER_CONST },
1274 [0x9][2] = { MESA_SHADER_TESS_CTRL, UBO },
1275 // SB4_DS_SHADER
1276 [0xa][0] = { MESA_SHADER_TESS_EVAL, SHADER_PROG },
1277 [0xa][1] = { MESA_SHADER_TESS_EVAL, SHADER_CONST },
1278 [0xa][2] = { MESA_SHADER_TESS_EVAL, UBO },
1279 // SB4_GS_SHADER
1280 [0xb][0] = { MESA_SHADER_GEOMETRY, SHADER_PROG },
1281 [0xb][1] = { MESA_SHADER_GEOMETRY, SHADER_CONST },
1282 [0xb][2] = { MESA_SHADER_GEOMETRY, UBO },
1283 // SB4_FS_SHADER:
1284 [0xc][0] = { MESA_SHADER_FRAGMENT, SHADER_PROG },
1285 [0xc][1] = { MESA_SHADER_FRAGMENT, SHADER_CONST },
1286 [0xc][2] = { MESA_SHADER_FRAGMENT, UBO },
1287 // SB4_CS_SHADER:
1288 [0xd][0] = { MESA_SHADER_COMPUTE, SHADER_PROG },
1289 [0xd][1] = { MESA_SHADER_COMPUTE, SHADER_CONST },
1290 [0xd][2] = { MESA_SHADER_COMPUTE, UBO },
1291 [0xd][3] = { MESA_SHADER_COMPUTE, SSBO_0 }, /* a6xx location */
1292 // SB4_SSBO (shared across all stages)
1293 [0xe][0] = { 0, SSBO_0 }, /* a5xx (and a4xx?) location */
1294 [0xe][1] = { 0, SSBO_1 },
1295 [0xe][2] = { 0, SSBO_2 },
1296 // SB4_CS_SSBO
1297 [0xf][0] = { MESA_SHADER_COMPUTE, SSBO_0 },
1298 [0xf][1] = { MESA_SHADER_COMPUTE, SSBO_1 },
1299 [0xf][2] = { MESA_SHADER_COMPUTE, SSBO_2 },
1300 // unknown things
1301 /* This looks like combined UBO state for 3d stages (a5xx and
1302 * before?? I think a6xx has UBO state per shader stage:
1303 */
1304 [0x6][2] = { 0, UBO },
1305 [0x7][1] = { 0, UNKNOWN_2DWORDS },
1306 };
1307
1308 *stage = lookup[state_block_id][state_type].stage;
1309 *state = lookup[state_block_id][state_type].state;
1310 }
1311
1312 static void
1313 a4xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage, enum state_t *state,
1314 enum state_src_t *src)
1315 {
1316 unsigned state_block_id = (dwords[0] >> 18) & 0xf;
1317 unsigned state_type = dwords[1] & 0x3;
1318 _get_state_type(state_block_id, state_type, stage, state);
1319 *src = _get_state_src(dwords[0]);
1320 }
1321
1322 static void
1323 a6xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage, enum state_t *state,
1324 enum state_src_t *src)
1325 {
1326 unsigned state_block_id = (dwords[0] >> 18) & 0xf;
1327 unsigned state_type = (dwords[0] >> 14) & 0x3;
1328 _get_state_type(state_block_id, state_type, stage, state);
1329 *src = _get_state_src(dwords[0]);
1330 }
1331
1332 static void
1333 dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, int level)
1334 {
1335 for (int i = 0; i < num_unit; i++) {
1336 /* work-around to reduce noise for opencl blob which always
1337 * writes the max # regardless of # of textures used
1338 */
1339 if ((num_unit == 16) && (texsamp[0] == 0) && (texsamp[1] == 0))
1340 break;
1341
1342 if ((300 <= options->gpu_id) && (options->gpu_id < 400)) {
1343 dump_domain(texsamp, 2, level+2, "A3XX_TEX_SAMP");
1344 dump_hex(texsamp, 2, level+1);
1345 texsamp += 2;
1346 } else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) {
1347 dump_domain(texsamp, 2, level+2, "A4XX_TEX_SAMP");
1348 dump_hex(texsamp, 2, level+1);
1349 texsamp += 2;
1350 } else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
1351 dump_domain(texsamp, 4, level+2, "A5XX_TEX_SAMP");
1352 dump_hex(texsamp, 4, level+1);
1353 texsamp += 4;
1354 } else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) {
1355 dump_domain(texsamp, 4, level+2, "A6XX_TEX_SAMP");
1356 dump_hex(texsamp, 4, level+1);
1357 texsamp += src == STATE_SRC_BINDLESS ? 16 : 4;
1358 }
1359 }
1360 }
1361
1362 static void
1363 dump_tex_const(uint32_t *texconst, int num_unit, int level)
1364 {
1365 for (int i = 0; i < num_unit; i++) {
1366 /* work-around to reduce noise for opencl blob which always
1367 * writes the max # regardless of # of textures used
1368 */
1369 if ((num_unit == 16) &&
1370 (texconst[0] == 0) && (texconst[1] == 0) &&
1371 (texconst[2] == 0) && (texconst[3] == 0))
1372 break;
1373
1374 if ((300 <= options->gpu_id) && (options->gpu_id < 400)) {
1375 dump_domain(texconst, 4, level+2, "A3XX_TEX_CONST");
1376 dump_hex(texconst, 4, level+1);
1377 texconst += 4;
1378 } else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) {
1379 dump_domain(texconst, 8, level+2, "A4XX_TEX_CONST");
1380 if (options->dump_textures) {
1381 uint32_t addr = texconst[4] & ~0x1f;
1382 dump_gpuaddr(addr, level-2);
1383 }
1384 dump_hex(texconst, 8, level+1);
1385 texconst += 8;
1386 } else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
1387 dump_domain(texconst, 12, level+2, "A5XX_TEX_CONST");
1388 if (options->dump_textures) {
1389 uint64_t addr = (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
1390 dump_gpuaddr_size(addr, level-2, hostlen(addr) / 4, 3);
1391 }
1392 dump_hex(texconst, 12, level+1);
1393 texconst += 12;
1394 } else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) {
1395 dump_domain(texconst, 16, level+2, "A6XX_TEX_CONST");
1396 if (options->dump_textures) {
1397 uint64_t addr = (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
1398 dump_gpuaddr_size(addr, level-2, hostlen(addr) / 4, 3);
1399 }
1400 dump_hex(texconst, 16, level+1);
1401 texconst += 16;
1402 }
1403 }
1404 }
1405
1406 static void
1407 cp_load_state(uint32_t *dwords, uint32_t sizedwords, int level)
1408 {
1409 gl_shader_stage stage;
1410 enum state_t state;
1411 enum state_src_t src;
1412 uint32_t num_unit = (dwords[0] >> 22) & 0x1ff;
1413 uint64_t ext_src_addr;
1414 void *contents;
1415 int i;
1416
1417 if (quiet(2) && !options->script)
1418 return;
1419
1420 if (options->gpu_id >= 600)
1421 a6xx_get_state_type(dwords, &stage, &state, &src);
1422 else if (options->gpu_id >= 400)
1423 a4xx_get_state_type(dwords, &stage, &state, &src);
1424 else
1425 a3xx_get_state_type(dwords, &stage, &state, &src);
1426
1427 switch (src) {
1428 case STATE_SRC_DIRECT: ext_src_addr = 0; break;
1429 case STATE_SRC_INDIRECT:
1430 if (is_64b()) {
1431 ext_src_addr = dwords[1] & 0xfffffffc;
1432 ext_src_addr |= ((uint64_t)dwords[2]) << 32;
1433 } else {
1434 ext_src_addr = dwords[1] & 0xfffffffc;
1435 }
1436
1437 break;
1438 case STATE_SRC_BINDLESS: {
1439 const unsigned base_reg =
1440 stage == MESA_SHADER_COMPUTE ?
1441 regbase("HLSQ_CS_BINDLESS_BASE[0]") :
1442 regbase("HLSQ_BINDLESS_BASE[0]");
1443
1444 if (is_64b()) {
1445 const unsigned reg = base_reg + (dwords[1] >> 28) * 2;
1446 ext_src_addr = reg_val(reg) & 0xfffffffc;
1447 ext_src_addr |= ((uint64_t)reg_val(reg + 1)) << 32;
1448 } else {
1449 const unsigned reg = base_reg + (dwords[1] >> 28);
1450 ext_src_addr = reg_val(reg) & 0xfffffffc;
1451 }
1452
1453 ext_src_addr += 4 * (dwords[1] & 0xffffff);
1454 break;
1455 }
1456 }
1457
1458 if (ext_src_addr)
1459 contents = hostptr(ext_src_addr);
1460 else
1461 contents = is_64b() ? dwords + 3 : dwords + 2;
1462
1463 if (!contents)
1464 return;
1465
1466 switch (state) {
1467 case SHADER_PROG: {
1468 const char *ext = NULL;
1469
1470 if (quiet(2))
1471 return;
1472
1473 if (options->gpu_id >= 400)
1474 num_unit *= 16;
1475 else if (options->gpu_id >= 300)
1476 num_unit *= 4;
1477
1478 /* shaders:
1479 *
1480 * note: num_unit seems to be # of instruction groups, where
1481 * an instruction group has 4 64bit instructions.
1482 */
1483 if (stage == MESA_SHADER_VERTEX) {
1484 ext = "vo3";
1485 } else if (stage == MESA_SHADER_GEOMETRY) {
1486 ext = "go3";
1487 } else if (stage == MESA_SHADER_COMPUTE) {
1488 ext = "co3";
1489 } else if (stage == MESA_SHADER_FRAGMENT){
1490 ext = "fo3";
1491 }
1492
1493 if (contents)
1494 try_disasm_a3xx(contents, num_unit * 2, level+2, stdout, options->gpu_id);
1495
1496 /* dump raw shader: */
1497 if (ext)
1498 dump_shader(ext, contents, num_unit * 2 * 4);
1499
1500 break;
1501 }
1502 case SHADER_CONST: {
1503 if (quiet(2))
1504 return;
1505
1506 /* uniforms/consts:
1507 *
1508 * note: num_unit seems to be # of pairs of dwords??
1509 */
1510
1511 if (options->gpu_id >= 400)
1512 num_unit *= 2;
1513
1514 dump_float(contents, num_unit*2, level+1);
1515 dump_hex(contents, num_unit*2, level+1);
1516
1517 break;
1518 }
1519 case TEX_MIPADDR: {
1520 uint32_t *addrs = contents;
1521
1522 if (quiet(2))
1523 return;
1524
1525 /* mipmap consts block just appears to be array of num_unit gpu addr's: */
1526 for (i = 0; i < num_unit; i++) {
1527 void *ptr = hostptr(addrs[i]);
1528 printf("%s%2d: %08x\n", levels[level+1], i, addrs[i]);
1529 if (options->dump_textures) {
1530 printf("base=%08x\n", (uint32_t)gpubaseaddr(addrs[i]));
1531 dump_hex(ptr, hostlen(addrs[i])/4, level+1);
1532 }
1533 }
1534 break;
1535 }
1536 case TEX_SAMP: {
1537 dump_tex_samp(contents, src, num_unit, level);
1538 break;
1539 }
1540 case TEX_CONST: {
1541 dump_tex_const(contents, num_unit, level);
1542 break;
1543 }
1544 case SSBO_0: {
1545 uint32_t *ssboconst = (uint32_t *)contents;
1546
1547 for (i = 0; i < num_unit; i++) {
1548 int sz = 4;
1549 if (400 <= options->gpu_id && options->gpu_id < 500) {
1550 dump_domain(ssboconst, 4, level+2, "A4XX_SSBO_0");
1551 } else if (500 <= options->gpu_id && options->gpu_id < 600) {
1552 dump_domain(ssboconst, 4, level+2, "A5XX_SSBO_0");
1553 } else if (600 <= options->gpu_id && options->gpu_id < 700) {
1554 sz = 16;
1555 dump_domain(ssboconst, 16, level+2, "A6XX_IBO");
1556 }
1557 dump_hex(ssboconst, sz, level+1);
1558 ssboconst += sz;
1559 }
1560 break;
1561 }
1562 case SSBO_1: {
1563 uint32_t *ssboconst = (uint32_t *)contents;
1564
1565 for (i = 0; i < num_unit; i++) {
1566 if (400 <= options->gpu_id && options->gpu_id < 500)
1567 dump_domain(ssboconst, 2, level+2, "A4XX_SSBO_1");
1568 else if (500 <= options->gpu_id && options->gpu_id < 600)
1569 dump_domain(ssboconst, 2, level+2, "A5XX_SSBO_1");
1570 dump_hex(ssboconst, 2, level+1);
1571 ssboconst += 2;
1572 }
1573 break;
1574 }
1575 case SSBO_2: {
1576 uint32_t *ssboconst = (uint32_t *)contents;
1577
1578 for (i = 0; i < num_unit; i++) {
1579 /* TODO a4xx and a5xx might be same: */
1580 if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
1581 dump_domain(ssboconst, 2, level+2, "A5XX_SSBO_2");
1582 dump_hex(ssboconst, 2, level+1);
1583 }
1584 if (options->dump_textures) {
1585 uint64_t addr = (((uint64_t)ssboconst[1] & 0x1ffff) << 32) | ssboconst[0];
1586 dump_gpuaddr_size(addr, level-2, hostlen(addr) / 4, 3);
1587 }
1588 ssboconst += 2;
1589 }
1590 break;
1591 }
1592 case UBO: {
1593 uint32_t *uboconst = (uint32_t *)contents;
1594
1595 for (i = 0; i < num_unit; i++) {
1596 // TODO probably similar on a4xx..
1597 if (500 <= options->gpu_id && options->gpu_id < 600)
1598 dump_domain(uboconst, 2, level+2, "A5XX_UBO");
1599 else if (600 <= options->gpu_id && options->gpu_id < 700)
1600 dump_domain(uboconst, 2, level+2, "A6XX_UBO");
1601 dump_hex(uboconst, 2, level+1);
1602 uboconst += src == STATE_SRC_BINDLESS ? 16 : 2;
1603 }
1604 break;
1605 }
1606 case UNKNOWN_DWORDS: {
1607 if (quiet(2))
1608 return;
1609 dump_hex(contents, num_unit, level+1);
1610 break;
1611 }
1612 case UNKNOWN_2DWORDS: {
1613 if (quiet(2))
1614 return;
1615 dump_hex(contents, num_unit * 2, level+1);
1616 break;
1617 }
1618 case UNKNOWN_4DWORDS: {
1619 if (quiet(2))
1620 return;
1621 dump_hex(contents, num_unit * 4, level+1);
1622 break;
1623 }
1624 default:
1625 if (quiet(2))
1626 return;
1627 /* hmm.. */
1628 dump_hex(contents, num_unit, level+1);
1629 break;
1630 }
1631 }
1632
1633 static void
1634 cp_set_bin(uint32_t *dwords, uint32_t sizedwords, int level)
1635 {
1636 bin_x1 = dwords[1] & 0xffff;
1637 bin_y1 = dwords[1] >> 16;
1638 bin_x2 = dwords[2] & 0xffff;
1639 bin_y2 = dwords[2] >> 16;
1640 }
1641
1642 static void
1643 dump_a2xx_tex_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val, int level)
1644 {
1645 uint32_t w, h, p;
1646 uint32_t gpuaddr, flags, mip_gpuaddr, mip_flags;
1647 uint32_t min, mag, swiz, clamp_x, clamp_y, clamp_z;
1648 static const char *filter[] = {
1649 "point", "bilinear", "bicubic",
1650 };
1651 static const char *clamp[] = {
1652 "wrap", "mirror", "clamp-last-texel",
1653 };
1654 static const char swiznames[] = "xyzw01??";
1655
1656 /* see sys2gmem_tex_const[] in adreno_a2xxx.c */
1657
1658 /* Texture, FormatXYZW=Unsigned, ClampXYZ=Wrap/Repeat,
1659 * RFMode=ZeroClamp-1, Dim=1:2d, pitch
1660 */
1661 p = (dwords[0] >> 22) << 5;
1662 clamp_x = (dwords[0] >> 10) & 0x3;
1663 clamp_y = (dwords[0] >> 13) & 0x3;
1664 clamp_z = (dwords[0] >> 16) & 0x3;
1665
1666 /* Format=6:8888_WZYX, EndianSwap=0:None, ReqSize=0:256bit, DimHi=0,
1667 * NearestClamp=1:OGL Mode
1668 */
1669 parse_dword_addr(dwords[1], &gpuaddr, &flags, 0xfff);
1670
1671 /* Width, Height, EndianSwap=0:None */
1672 w = (dwords[2] & 0x1fff) + 1;
1673 h = ((dwords[2] >> 13) & 0x1fff) + 1;
1674
1675 /* NumFormat=0:RF, DstSelXYZW=XYZW, ExpAdj=0, MagFilt=MinFilt=0:Point,
1676 * Mip=2:BaseMap
1677 */
1678 mag = (dwords[3] >> 19) & 0x3;
1679 min = (dwords[3] >> 21) & 0x3;
1680 swiz = (dwords[3] >> 1) & 0xfff;
1681
1682 /* VolMag=VolMin=0:Point, MinMipLvl=0, MaxMipLvl=1, LodBiasH=V=0,
1683 * Dim3d=0
1684 */
1685 // XXX
1686
1687 /* BorderColor=0:ABGRBlack, ForceBC=0:diable, TriJuice=0, Aniso=0,
1688 * Dim=1:2d, MipPacking=0
1689 */
1690 parse_dword_addr(dwords[5], &mip_gpuaddr, &mip_flags, 0xfff);
1691
1692 printf("%sset texture const %04x\n", levels[level], val);
1693 printf("%sclamp x/y/z: %s/%s/%s\n", levels[level+1],
1694 clamp[clamp_x], clamp[clamp_y], clamp[clamp_z]);
1695 printf("%sfilter min/mag: %s/%s\n", levels[level+1], filter[min], filter[mag]);
1696 printf("%sswizzle: %c%c%c%c\n", levels[level+1],
1697 swiznames[(swiz >> 0) & 0x7], swiznames[(swiz >> 3) & 0x7],
1698 swiznames[(swiz >> 6) & 0x7], swiznames[(swiz >> 9) & 0x7]);
1699 printf("%saddr=%08x (flags=%03x), size=%dx%d, pitch=%d, format=%s\n",
1700 levels[level+1], gpuaddr, flags, w, h, p,
1701 rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf));
1702 printf("%smipaddr=%08x (flags=%03x)\n", levels[level+1],
1703 mip_gpuaddr, mip_flags);
1704 }
1705
1706 static void
1707 dump_a2xx_shader_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val, int level)
1708 {
1709 int i;
1710 printf("%sset shader const %04x\n", levels[level], val);
1711 for (i = 0; i < sizedwords; ) {
1712 uint32_t gpuaddr, flags;
1713 parse_dword_addr(dwords[i++], &gpuaddr, &flags, 0xf);
1714 void *addr = hostptr(gpuaddr);
1715 if (addr) {
1716 const char * fmt =
1717 rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf);
1718 uint32_t size = dwords[i++];
1719 printf("%saddr=%08x, size=%d, format=%s\n", levels[level+1],
1720 gpuaddr, size, fmt);
1721 // TODO maybe dump these as bytes instead of dwords?
1722 size = (size + 3) / 4; // for now convert to dwords
1723 dump_hex(addr, min(size, 64), level + 1);
1724 if (size > min(size, 64))
1725 printf("%s\t\t...\n", levels[level+1]);
1726 dump_float(addr, min(size, 64), level + 1);
1727 if (size > min(size, 64))
1728 printf("%s\t\t...\n", levels[level+1]);
1729 }
1730 }
1731 }
1732
1733 static void
1734 cp_set_const(uint32_t *dwords, uint32_t sizedwords, int level)
1735 {
1736 uint32_t val = dwords[0] & 0xffff;
1737 switch((dwords[0] >> 16) & 0xf) {
1738 case 0x0:
1739 dump_float((float *)(dwords+1), sizedwords-1, level+1);
1740 break;
1741 case 0x1:
1742 /* need to figure out how const space is partitioned between
1743 * attributes, textures, etc..
1744 */
1745 if (val < 0x78) {
1746 dump_a2xx_tex_const(dwords+1, sizedwords-1, val, level);
1747 } else {
1748 dump_a2xx_shader_const(dwords+1, sizedwords-1, val, level);
1749 }
1750 break;
1751 case 0x2:
1752 printf("%sset bool const %04x\n", levels[level], val);
1753 break;
1754 case 0x3:
1755 printf("%sset loop const %04x\n", levels[level], val);
1756 break;
1757 case 0x4:
1758 val += 0x2000;
1759 if (dwords[0] & 0x80000000) {
1760 uint32_t srcreg = dwords[1];
1761 uint32_t dstval = dwords[2];
1762
1763 /* TODO: not sure what happens w/ payload != 2.. */
1764 assert(sizedwords == 3);
1765 assert(srcreg < ARRAY_SIZE(type0_reg_vals));
1766
1767 /* note: rnn_regname uses a static buf so we can't do
1768 * two regname() calls for one printf..
1769 */
1770 printf("%s%s = %08x + ", levels[level], regname(val, 1), dstval);
1771 printf("%s (%08x)\n", regname(srcreg, 1), type0_reg_vals[srcreg]);
1772
1773 dstval += type0_reg_vals[srcreg];
1774
1775 dump_registers(val, &dstval, 1, level+1);
1776 } else {
1777 dump_registers(val, dwords+1, sizedwords-1, level+1);
1778 }
1779 break;
1780 }
1781 }
1782
1783 static void dump_register_summary(int level);
1784
1785 static void
1786 cp_event_write(uint32_t *dwords, uint32_t sizedwords, int level)
1787 {
1788 const char *name = rnn_enumname(rnn, "vgt_event_type", dwords[0]);
1789 printl(2, "%sevent %s\n", levels[level], name);
1790
1791 if (name && (options->gpu_id > 500)) {
1792 char eventname[64];
1793 snprintf(eventname, sizeof(eventname), "EVENT:%s", name);
1794 if (!strcmp(name, "BLIT")) {
1795 do_query(eventname, 0);
1796 print_mode(level);
1797 dump_register_summary(level);
1798 }
1799 }
1800 }
1801
1802 static void
1803 dump_register_summary(int level)
1804 {
1805 uint32_t i;
1806 bool saved_summary = summary;
1807 summary = false;
1808
1809 in_summary = true;
1810
1811 /* dump current state of registers: */
1812 printl(2, "%sdraw[%i] register values\n", levels[level], draw_count);
1813 for (i = 0; i < regcnt(); i++) {
1814 uint32_t regbase = i;
1815 uint32_t lastval = reg_val(regbase);
1816 /* skip registers that haven't been updated since last draw/blit: */
1817 if (!(options->allregs || reg_rewritten(regbase)))
1818 continue;
1819 if (!reg_written(regbase))
1820 continue;
1821 if (lastval != lastvals[regbase]) {
1822 printl(2, "!");
1823 lastvals[regbase] = lastval;
1824 } else {
1825 printl(2, " ");
1826 }
1827 if (reg_rewritten(regbase)) {
1828 printl(2, "+");
1829 } else {
1830 printl(2, " ");
1831 }
1832 printl(2, "\t%08x", lastval);
1833 if (!quiet(2)) {
1834 dump_register(regbase, lastval, level);
1835 }
1836 }
1837
1838 clear_rewritten();
1839
1840 in_summary = false;
1841
1842 draw_count++;
1843 summary = saved_summary;
1844 }
1845
1846 static uint32_t
1847 draw_indx_common(uint32_t *dwords, int level)
1848 {
1849 uint32_t prim_type = dwords[1] & 0x1f;
1850 uint32_t source_select = (dwords[1] >> 6) & 0x3;
1851 uint32_t num_indices = dwords[2];
1852 const char *primtype;
1853
1854 primtype = rnn_enumname(rnn, "pc_di_primtype", prim_type);
1855
1856 do_query(primtype, num_indices);
1857
1858 printl(2, "%sdraw: %d\n", levels[level], draws[ib]);
1859 printl(2, "%sprim_type: %s (%d)\n", levels[level], primtype,
1860 prim_type);
1861 printl(2, "%ssource_select: %s (%d)\n", levels[level],
1862 rnn_enumname(rnn, "pc_di_src_sel", source_select),
1863 source_select);
1864 printl(2, "%snum_indices: %d\n", levels[level], num_indices);
1865
1866 vertices += num_indices;
1867
1868 draws[ib]++;
1869
1870 return num_indices;
1871 }
1872
1873 enum pc_di_index_size {
1874 INDEX_SIZE_IGN = 0,
1875 INDEX_SIZE_16_BIT = 0,
1876 INDEX_SIZE_32_BIT = 1,
1877 INDEX_SIZE_8_BIT = 2,
1878 INDEX_SIZE_INVALID = 0,
1879 };
1880
1881 static void
1882 cp_draw_indx(uint32_t *dwords, uint32_t sizedwords, int level)
1883 {
1884 uint32_t num_indices = draw_indx_common(dwords, level);
1885
1886 assert(!is_64b());
1887
1888 /* if we have an index buffer, dump that: */
1889 if (sizedwords == 5) {
1890 void *ptr = hostptr(dwords[3]);
1891 printl(2, "%sgpuaddr: %08x\n", levels[level], dwords[3]);
1892 printl(2, "%sidx_size: %d\n", levels[level], dwords[4]);
1893 if (ptr) {
1894 enum pc_di_index_size size =
1895 ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
1896 if (!quiet(2)) {
1897 int i;
1898 printf("%sidxs: ", levels[level]);
1899 if (size == INDEX_SIZE_8_BIT) {
1900 uint8_t *idx = ptr;
1901 for (i = 0; i < dwords[4]; i++)
1902 printf(" %u", idx[i]);
1903 } else if (size == INDEX_SIZE_16_BIT) {
1904 uint16_t *idx = ptr;
1905 for (i = 0; i < dwords[4]/2; i++)
1906 printf(" %u", idx[i]);
1907 } else if (size == INDEX_SIZE_32_BIT) {
1908 uint32_t *idx = ptr;
1909 for (i = 0; i < dwords[4]/4; i++)
1910 printf(" %u", idx[i]);
1911 }
1912 printf("\n");
1913 dump_hex(ptr, dwords[4]/4, level+1);
1914 }
1915 }
1916 }
1917
1918 /* don't bother dumping registers for the dummy draw_indx's.. */
1919 if (num_indices > 0)
1920 dump_register_summary(level);
1921
1922 needs_wfi = true;
1923 }
1924
1925 static void
1926 cp_draw_indx_2(uint32_t *dwords, uint32_t sizedwords, int level)
1927 {
1928 uint32_t num_indices = draw_indx_common(dwords, level);
1929 enum pc_di_index_size size =
1930 ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
1931 void *ptr = &dwords[3];
1932 int sz = 0;
1933
1934 assert(!is_64b());
1935
1936 /* CP_DRAW_INDX_2 has embedded/inline idx buffer: */
1937 if (!quiet(2)) {
1938 int i;
1939 printf("%sidxs: ", levels[level]);
1940 if (size == INDEX_SIZE_8_BIT) {
1941 uint8_t *idx = ptr;
1942 for (i = 0; i < num_indices; i++)
1943 printf(" %u", idx[i]);
1944 sz = num_indices;
1945 } else if (size == INDEX_SIZE_16_BIT) {
1946 uint16_t *idx = ptr;
1947 for (i = 0; i < num_indices; i++)
1948 printf(" %u", idx[i]);
1949 sz = num_indices * 2;
1950 } else if (size == INDEX_SIZE_32_BIT) {
1951 uint32_t *idx = ptr;
1952 for (i = 0; i < num_indices; i++)
1953 printf(" %u", idx[i]);
1954 sz = num_indices * 4;
1955 }
1956 printf("\n");
1957 dump_hex(ptr, sz / 4, level+1);
1958 }
1959
1960 /* don't bother dumping registers for the dummy draw_indx's.. */
1961 if (num_indices > 0)
1962 dump_register_summary(level);
1963 }
1964
1965 static void
1966 cp_draw_indx_offset(uint32_t *dwords, uint32_t sizedwords, int level)
1967 {
1968 uint32_t num_indices = dwords[2];
1969 uint32_t prim_type = dwords[0] & 0x1f;
1970
1971 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), num_indices);
1972 print_mode(level);
1973
1974 /* don't bother dumping registers for the dummy draw_indx's.. */
1975 if (num_indices > 0)
1976 dump_register_summary(level);
1977 }
1978
1979 static void
1980 cp_draw_indx_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
1981 {
1982 uint32_t prim_type = dwords[0] & 0x1f;
1983 uint64_t addr;
1984
1985 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
1986 print_mode(level);
1987
1988 if (is_64b())
1989 addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
1990 else
1991 addr = dwords[1];
1992 dump_gpuaddr_size(addr, level, 0x10, 2);
1993
1994 if (is_64b())
1995 addr = (((uint64_t)dwords[5] & 0x1ffff) << 32) | dwords[4];
1996 else
1997 addr = dwords[3];
1998 dump_gpuaddr_size(addr, level, 0x10, 2);
1999
2000 dump_register_summary(level);
2001 }
2002
2003 static void
2004 cp_draw_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2005 {
2006 uint32_t prim_type = dwords[0] & 0x1f;
2007 uint64_t addr;
2008
2009 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2010 print_mode(level);
2011
2012 addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2013 dump_gpuaddr_size(addr, level, 0x10, 2);
2014
2015 dump_register_summary(level);
2016 }
2017
2018 static void
2019 cp_draw_indirect_multi(uint32_t *dwords, uint32_t sizedwords, int level)
2020 {
2021 uint32_t prim_type = dwords[0] & 0x1f;
2022 uint32_t count = dwords[2];
2023
2024 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2025 print_mode(level);
2026
2027 struct rnndomain *domain = rnn_finddomain(rnn->db, "CP_DRAW_INDIRECT_MULTI");
2028 uint32_t count_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT_COUNT");
2029 uint32_t addr_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT");
2030 uint64_t stride_dword = rnndec_decodereg(rnn->vc, domain, "STRIDE");
2031
2032 if (count_dword) {
2033 uint64_t count_addr = ((uint64_t)dwords[count_dword + 1] << 32) | dwords[count_dword];
2034 uint32_t *buf = hostptr(count_addr);
2035
2036 /* Don't print more draws than this if we don't know the indirect
2037 * count. It's possible the user will give ~0 or some other large
2038 * value, expecting the GPU to fill in the draw count, and we don't
2039 * want to print a gazillion draws in that case:
2040 */
2041 const uint32_t max_draw_count = 0x100;
2042
2043 /* Assume the indirect count is garbage if it's larger than this
2044 * (quite large) value or 0. Hopefully this catches most cases.
2045 */
2046 const uint32_t max_indirect_draw_count = 0x10000;
2047
2048 if (buf) {
2049 printf("%sindirect count: %u\n", levels[level], *buf);
2050 if (*buf == 0 || *buf > max_indirect_draw_count) {
2051 /* garbage value */
2052 count = min(count, max_draw_count);
2053 } else {
2054 /* not garbage */
2055 count = min(count, *buf);
2056 }
2057 } else {
2058 count = min(count, max_draw_count);
2059 }
2060 }
2061
2062 if (addr_dword && stride_dword) {
2063 uint64_t addr = ((uint64_t)dwords[addr_dword + 1] << 32) | dwords[addr_dword];
2064 uint32_t stride = dwords[stride_dword];
2065
2066 for (unsigned i = 0; i < count; i++, addr += stride) {
2067 printf("%sdraw %d:\n", levels[level], i);
2068 dump_gpuaddr_size(addr, level, 0x10, 2);
2069 }
2070 }
2071
2072 dump_register_summary(level);
2073 }
2074
2075 static void
2076 cp_run_cl(uint32_t *dwords, uint32_t sizedwords, int level)
2077 {
2078 do_query("COMPUTE", 1);
2079 dump_register_summary(level);
2080 }
2081
2082 static void
2083 cp_nop(uint32_t *dwords, uint32_t sizedwords, int level)
2084 {
2085 const char *buf = (void *)dwords;
2086 int i;
2087
2088 if (quiet(3))
2089 return;
2090
2091 // blob doesn't use CP_NOP for string_marker but it does
2092 // use it for things that end up looking like, but aren't
2093 // ascii chars:
2094 if (!options->decode_markers)
2095 return;
2096
2097 for (i = 0; i < 4 * sizedwords; i++) {
2098 if (buf[i] == '\0')
2099 break;
2100 if (isascii(buf[i]))
2101 printf("%c", buf[i]);
2102 }
2103 printf("\n");
2104 }
2105
2106 static void
2107 cp_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2108 {
2109 /* traverse indirect buffers */
2110 uint64_t ibaddr;
2111 uint32_t ibsize;
2112 uint32_t *ptr = NULL;
2113
2114 if (is_64b()) {
2115 /* a5xx+.. high 32b of gpu addr, then size: */
2116 ibaddr = dwords[0];
2117 ibaddr |= ((uint64_t)dwords[1]) << 32;
2118 ibsize = dwords[2];
2119 } else {
2120 ibaddr = dwords[0];
2121 ibsize = dwords[1];
2122 }
2123
2124 if (!quiet(3)) {
2125 if (is_64b()) {
2126 printf("%sibaddr:%016"PRIx64"\n", levels[level], ibaddr);
2127 } else {
2128 printf("%sibaddr:%08x\n", levels[level], (uint32_t)ibaddr);
2129 }
2130 printf("%sibsize:%08x\n", levels[level], ibsize);
2131 }
2132
2133 if (options->once && has_dumped(ibaddr, enable_mask))
2134 return;
2135
2136 /* 'query-compare' mode implies 'once' mode, although we need only to
2137 * process the cmdstream for *any* enable_mask mode, since we are
2138 * comparing binning vs draw reg values at the same time, ie. it is
2139 * not useful to process the same draw in both binning and draw pass.
2140 */
2141 if (options->query_compare && has_dumped(ibaddr, MODE_ALL))
2142 return;
2143
2144 /* map gpuaddr back to hostptr: */
2145 ptr = hostptr(ibaddr);
2146
2147 if (ptr) {
2148 /* If the GPU hung within the target IB, the trigger point will be
2149 * just after the current CP_INDIRECT_BUFFER. Because the IB is
2150 * executed but never returns. Account for this by checking if
2151 * the IB returned:
2152 */
2153 highlight_gpuaddr(gpuaddr(&dwords[is_64b() ? 3 : 2]));
2154
2155 ib++;
2156 ibs[ib].base = ibaddr;
2157 ibs[ib].size = ibsize;
2158
2159 dump_commands(ptr, ibsize, level);
2160 ib--;
2161 } else {
2162 fprintf(stderr, "could not find: %016"PRIx64" (%d)\n", ibaddr, ibsize);
2163 }
2164 }
2165
2166 static void
2167 cp_wfi(uint32_t *dwords, uint32_t sizedwords, int level)
2168 {
2169 needs_wfi = false;
2170 }
2171
2172 static void
2173 cp_mem_write(uint32_t *dwords, uint32_t sizedwords, int level)
2174 {
2175 if (quiet(2))
2176 return;
2177
2178 if (is_64b()) {
2179 uint64_t gpuaddr = dwords[0] | (((uint64_t)dwords[1]) << 32);
2180 printf("%sgpuaddr:%016"PRIx64"\n", levels[level], gpuaddr);
2181 dump_hex(&dwords[2], sizedwords-2, level+1);
2182
2183 if (pkt_is_type4(dwords[2]) || pkt_is_type7(dwords[2]))
2184 dump_commands(&dwords[2], sizedwords-2, level+1);
2185 } else {
2186 uint32_t gpuaddr = dwords[0];
2187 printf("%sgpuaddr:%08x\n", levels[level], gpuaddr);
2188 dump_float((float *)&dwords[1], sizedwords-1, level+1);
2189 }
2190 }
2191
2192 static void
2193 cp_rmw(uint32_t *dwords, uint32_t sizedwords, int level)
2194 {
2195 uint32_t val = dwords[0] & 0xffff;
2196 uint32_t and = dwords[1];
2197 uint32_t or = dwords[2];
2198 printl(3, "%srmw (%s & 0x%08x) | 0x%08x)\n", levels[level], regname(val, 1), and, or);
2199 if (needs_wfi)
2200 printl(2, "NEEDS WFI: rmw (%s & 0x%08x) | 0x%08x)\n", regname(val, 1), and, or);
2201 reg_set(val, (reg_val(val) & and) | or);
2202 }
2203
2204 static void
2205 cp_reg_mem(uint32_t *dwords, uint32_t sizedwords, int level)
2206 {
2207 uint32_t val = dwords[0] & 0xffff;
2208 printl(3, "%sbase register: %s\n", levels[level], regname(val, 1));
2209
2210 if (quiet(2))
2211 return;
2212
2213 uint64_t gpuaddr = dwords[1] | (((uint64_t)dwords[2]) << 32);
2214 printf("%sgpuaddr:%016"PRIx64"\n", levels[level], gpuaddr);
2215 void *ptr = hostptr(gpuaddr);
2216 if (ptr) {
2217 uint32_t cnt = (dwords[0] >> 19) & 0x3ff;
2218 dump_hex(ptr, cnt, level + 1);
2219 }
2220 }
2221
2222 struct draw_state {
2223 uint16_t enable_mask;
2224 uint16_t flags;
2225 uint32_t count;
2226 uint64_t addr;
2227 };
2228
2229 struct draw_state state[32];
2230
2231 #define FLAG_DIRTY 0x1
2232 #define FLAG_DISABLE 0x2
2233 #define FLAG_DISABLE_ALL_GROUPS 0x4
2234 #define FLAG_LOAD_IMMED 0x8
2235
2236 static int draw_mode;
2237
2238 static void
2239 disable_group(unsigned group_id)
2240 {
2241 struct draw_state *ds = &state[group_id];
2242 memset(ds, 0, sizeof(*ds));
2243 }
2244
2245 static void
2246 disable_all_groups(void)
2247 {
2248 for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
2249 disable_group(i);
2250 }
2251
2252 static void
2253 load_group(unsigned group_id, int level)
2254 {
2255 struct draw_state *ds = &state[group_id];
2256
2257 if (!ds->count)
2258 return;
2259
2260 printl(2, "%sgroup_id: %u\n", levels[level], group_id);
2261 printl(2, "%scount: %d\n", levels[level], ds->count);
2262 printl(2, "%saddr: %016llx\n", levels[level], ds->addr);
2263 printl(2, "%sflags: %x\n", levels[level], ds->flags);
2264
2265 if (options->gpu_id >= 600) {
2266 printl(2, "%senable_mask: 0x%x\n", levels[level], ds->enable_mask);
2267
2268 if (!(ds->enable_mask & enable_mask)) {
2269 printl(2, "%s\tskipped!\n\n", levels[level]);
2270 return;
2271 }
2272 }
2273
2274 void *ptr = hostptr(ds->addr);
2275 if (ptr) {
2276 if (!quiet(2))
2277 dump_hex(ptr, ds->count, level+1);
2278
2279 ib++;
2280 dump_commands(ptr, ds->count, level+1);
2281 ib--;
2282 }
2283 }
2284
2285 static void
2286 load_all_groups(int level)
2287 {
2288 /* sanity check, we should never recursively hit recursion here, and if
2289 * we do bad things happen:
2290 */
2291 static bool loading_groups = false;
2292 if (loading_groups) {
2293 printf("ERROR: nothing in draw state should trigger recursively loading groups!\n");
2294 return;
2295 }
2296 loading_groups = true;
2297 for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
2298 load_group(i, level);
2299 loading_groups = false;
2300
2301 /* in 'query-compare' mode, defer disabling all groups until we have a
2302 * chance to process the query:
2303 */
2304 if (!options->query_compare)
2305 disable_all_groups();
2306 }
2307
2308 static void
2309 cp_set_draw_state(uint32_t *dwords, uint32_t sizedwords, int level)
2310 {
2311 uint32_t i;
2312
2313 for (i = 0; i < sizedwords; ) {
2314 struct draw_state *ds;
2315 uint32_t count = dwords[i] & 0xffff;
2316 uint32_t group_id = (dwords[i] >> 24) & 0x1f;
2317 uint32_t enable_mask = (dwords[i] >> 20) & 0xf;
2318 uint32_t flags = (dwords[i] >> 16) & 0xf;
2319 uint64_t addr;
2320
2321 if (is_64b()) {
2322 addr = dwords[i + 1];
2323 addr |= ((uint64_t)dwords[i + 2]) << 32;
2324 i += 3;
2325 } else {
2326 addr = dwords[i + 1];
2327 i += 2;
2328 }
2329
2330 if (flags & FLAG_DISABLE_ALL_GROUPS) {
2331 disable_all_groups();
2332 continue;
2333 }
2334
2335 if (flags & FLAG_DISABLE) {
2336 disable_group(group_id);
2337 continue;
2338 }
2339
2340 assert(group_id < ARRAY_SIZE(state));
2341 disable_group(group_id);
2342
2343 ds = &state[group_id];
2344
2345 ds->enable_mask = enable_mask;
2346 ds->flags = flags;
2347 ds->count = count;
2348 ds->addr = addr;
2349
2350 if (flags & FLAG_LOAD_IMMED) {
2351 load_group(group_id, level);
2352 disable_group(group_id);
2353 }
2354 }
2355 }
2356
2357 static void
2358 cp_set_mode(uint32_t *dwords, uint32_t sizedwords, int level)
2359 {
2360 draw_mode = dwords[0];
2361 }
2362
2363 /* execute compute shader */
2364 static void
2365 cp_exec_cs(uint32_t *dwords, uint32_t sizedwords, int level)
2366 {
2367 do_query("compute", 0);
2368 dump_register_summary(level);
2369 }
2370
2371 static void
2372 cp_exec_cs_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2373 {
2374 uint64_t addr;
2375
2376 if (is_64b()) {
2377 addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2378 } else {
2379 addr = dwords[1];
2380 }
2381
2382 printl(3, "%saddr: %016llx\n", levels[level], addr);
2383 dump_gpuaddr_size(addr, level, 0x10, 2);
2384
2385 do_query("compute", 0);
2386 dump_register_summary(level);
2387 }
2388
2389 static void
2390 cp_set_marker(uint32_t *dwords, uint32_t sizedwords, int level)
2391 {
2392 render_mode = rnn_enumname(rnn, "a6xx_render_mode", dwords[0] & 0xf);
2393
2394 if (!strcmp(render_mode, "RM6_BINNING")) {
2395 enable_mask = MODE_BINNING;
2396 } else if (!strcmp(render_mode, "RM6_GMEM")) {
2397 enable_mask = MODE_GMEM;
2398 } else if (!strcmp(render_mode, "RM6_BYPASS")) {
2399 enable_mask = MODE_BYPASS;
2400 }
2401 }
2402
2403 static void
2404 cp_set_render_mode(uint32_t *dwords, uint32_t sizedwords, int level)
2405 {
2406 uint64_t addr;
2407 uint32_t *ptr, len;
2408
2409 assert(is_64b());
2410
2411 /* TODO seems to have two ptrs, 9 dwords total (incl pkt7 hdr)..
2412 * not sure if this can come in different sizes.
2413 *
2414 * First ptr doesn't seem to be cmdstream, second one does.
2415 *
2416 * Comment from downstream kernel:
2417 *
2418 * SRM -- set render mode (ex binning, direct render etc)
2419 * SRM is set by UMD usually at start of IB to tell CP the type of
2420 * preemption.
2421 * KMD needs to set SRM to NULL to indicate CP that rendering is
2422 * done by IB.
2423 * ------------------------------------------------------------------
2424 *
2425 * Seems to always be one of these two:
2426 * 70ec0008 00000001 001c0000 00000000 00000010 00000003 0000000d 001c2000 00000000
2427 * 70ec0008 00000001 001c0000 00000000 00000000 00000003 0000000d 001c2000 00000000
2428 *
2429 */
2430
2431 assert(options->gpu_id >= 500);
2432
2433 render_mode = rnn_enumname(rnn, "render_mode_cmd", dwords[0]);
2434
2435 if (sizedwords == 1)
2436 return;
2437
2438 addr = dwords[1];
2439 addr |= ((uint64_t)dwords[2]) << 32;
2440
2441 mode = dwords[3];
2442
2443 dump_gpuaddr(addr, level+1);
2444
2445 if (sizedwords == 5)
2446 return;
2447
2448 assert(sizedwords == 8);
2449
2450 len = dwords[5];
2451 addr = dwords[6];
2452 addr |= ((uint64_t)dwords[7]) << 32;
2453
2454 printl(3, "%saddr: 0x%016lx\n", levels[level], addr);
2455 printl(3, "%slen: 0x%x\n", levels[level], len);
2456
2457 ptr = hostptr(addr);
2458
2459 if (ptr) {
2460 if (!quiet(2)) {
2461 ib++;
2462 dump_commands(ptr, len, level+1);
2463 ib--;
2464 dump_hex(ptr, len, level+1);
2465 }
2466 }
2467 }
2468
2469 static void
2470 cp_compute_checkpoint(uint32_t *dwords, uint32_t sizedwords, int level)
2471 {
2472 uint64_t addr;
2473 uint32_t *ptr, len;
2474
2475 assert(is_64b());
2476 assert(options->gpu_id >= 500);
2477
2478 assert(sizedwords == 8);
2479
2480 addr = dwords[5];
2481 addr |= ((uint64_t)dwords[6]) << 32;
2482 len = dwords[7];
2483
2484 printl(3, "%saddr: 0x%016"PRIx64"\n", levels[level], addr);
2485 printl(3, "%slen: 0x%x\n", levels[level], len);
2486
2487 ptr = hostptr(addr);
2488
2489 if (ptr) {
2490 if (!quiet(2)) {
2491 ib++;
2492 dump_commands(ptr, len, level+1);
2493 ib--;
2494 dump_hex(ptr, len, level+1);
2495 }
2496 }
2497 }
2498
2499 static void
2500 cp_blit(uint32_t *dwords, uint32_t sizedwords, int level)
2501 {
2502 do_query(rnn_enumname(rnn, "cp_blit_cmd", dwords[0]), 0);
2503 print_mode(level);
2504 dump_register_summary(level);
2505 }
2506
2507 static void
2508 cp_context_reg_bunch(uint32_t *dwords, uint32_t sizedwords, int level)
2509 {
2510 int i;
2511
2512 /* NOTE: seems to write same reg multiple times.. not sure if different parts of
2513 * these are triggered by the FLUSH_SO_n events?? (if that is what they actually
2514 * are?)
2515 */
2516 bool saved_summary = summary;
2517 summary = false;
2518
2519 for (i = 0; i < sizedwords; i += 2) {
2520 dump_register(dwords[i+0], dwords[i+1], level+1);
2521 reg_set(dwords[i+0], dwords[i+1]);
2522 }
2523
2524 summary = saved_summary;
2525 }
2526
2527 static void
2528 cp_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
2529 {
2530 uint32_t reg = dwords[1] & 0xffff;
2531
2532 dump_register(reg, dwords[2], level+1);
2533 reg_set(reg, dwords[2]);
2534 }
2535
2536 static void
2537 cp_set_ctxswitch_ib(uint32_t *dwords, uint32_t sizedwords, int level)
2538 {
2539 uint64_t addr;
2540 uint32_t size = dwords[2] & 0xffff;
2541 void *ptr;
2542
2543 addr = dwords[0] | ((uint64_t)dwords[1] << 32);
2544
2545 printf("addr=%"PRIx64"\n", addr);
2546 ptr = hostptr(addr);
2547 if (ptr) {
2548 dump_commands(ptr, size, level+1);
2549 }
2550 }
2551
2552 static void
2553 cp_skip_ib2_enable_global(uint32_t *dwords, uint32_t sizedwords, int level)
2554 {
2555 skip_ib2_enable_global = dwords[0];
2556 }
2557
2558 static void
2559 cp_skip_ib2_enable_local(uint32_t *dwords, uint32_t sizedwords, int level)
2560 {
2561 skip_ib2_enable_local = dwords[0];
2562 }
2563
2564 #define CP(x, fxn, ...) { "CP_" #x, fxn, ##__VA_ARGS__ }
2565 static const struct type3_op {
2566 const char *name;
2567 void (*fxn)(uint32_t *dwords, uint32_t sizedwords, int level);
2568 struct {
2569 bool load_all_groups;
2570 } options;
2571 } type3_op[] = {
2572 CP(NOP, cp_nop),
2573 CP(INDIRECT_BUFFER, cp_indirect),
2574 CP(INDIRECT_BUFFER_PFD, cp_indirect),
2575 CP(WAIT_FOR_IDLE, cp_wfi),
2576 CP(REG_RMW, cp_rmw),
2577 CP(REG_TO_MEM, cp_reg_mem),
2578 CP(MEM_TO_REG, cp_reg_mem), /* same layout as CP_REG_TO_MEM */
2579 CP(MEM_WRITE, cp_mem_write),
2580 CP(EVENT_WRITE, cp_event_write),
2581 CP(RUN_OPENCL, cp_run_cl),
2582 CP(DRAW_INDX, cp_draw_indx, {.load_all_groups=true}),
2583 CP(DRAW_INDX_2, cp_draw_indx_2, {.load_all_groups=true}),
2584 CP(SET_CONSTANT, cp_set_const),
2585 CP(IM_LOAD_IMMEDIATE, cp_im_loadi),
2586 CP(WIDE_REG_WRITE, cp_wide_reg_write),
2587
2588 /* for a3xx */
2589 CP(LOAD_STATE, cp_load_state),
2590 CP(SET_BIN, cp_set_bin),
2591
2592 /* for a4xx */
2593 CP(LOAD_STATE4, cp_load_state),
2594 CP(SET_DRAW_STATE, cp_set_draw_state),
2595 CP(DRAW_INDX_OFFSET, cp_draw_indx_offset, {.load_all_groups=true}),
2596 CP(EXEC_CS, cp_exec_cs, {.load_all_groups=true}),
2597 CP(EXEC_CS_INDIRECT, cp_exec_cs_indirect, {.load_all_groups=true}),
2598
2599 /* for a5xx */
2600 CP(SET_RENDER_MODE, cp_set_render_mode),
2601 CP(COMPUTE_CHECKPOINT, cp_compute_checkpoint),
2602 CP(BLIT, cp_blit),
2603 CP(CONTEXT_REG_BUNCH, cp_context_reg_bunch),
2604 CP(DRAW_INDIRECT, cp_draw_indirect, {.load_all_groups=true}),
2605 CP(DRAW_INDX_INDIRECT, cp_draw_indx_indirect, {.load_all_groups=true}),
2606 CP(DRAW_INDIRECT_MULTI, cp_draw_indirect_multi, {.load_all_groups=true}),
2607 CP(SKIP_IB2_ENABLE_GLOBAL, cp_skip_ib2_enable_global),
2608 CP(SKIP_IB2_ENABLE_LOCAL, cp_skip_ib2_enable_local),
2609
2610 /* for a6xx */
2611 CP(LOAD_STATE6_GEOM, cp_load_state),
2612 CP(LOAD_STATE6_FRAG, cp_load_state),
2613 CP(LOAD_STATE6, cp_load_state),
2614 CP(SET_MODE, cp_set_mode),
2615 CP(SET_MARKER, cp_set_marker),
2616 CP(REG_WRITE, cp_reg_write),
2617
2618 CP(SET_CTXSWITCH_IB, cp_set_ctxswitch_ib),
2619 };
2620
2621 static void
2622 noop_fxn(uint32_t *dwords, uint32_t sizedwords, int level)
2623 {
2624 }
2625
2626 static const struct type3_op *
2627 get_type3_op(unsigned opc)
2628 {
2629 static const struct type3_op dummy_op = {
2630 .fxn = noop_fxn,
2631 };
2632 const char *name = pktname(opc);
2633
2634 if (!name)
2635 return &dummy_op;
2636
2637 for (unsigned i = 0; i < ARRAY_SIZE(type3_op); i++)
2638 if (!strcmp(name, type3_op[i].name))
2639 return &type3_op[i];
2640
2641 return &dummy_op;
2642 }
2643
2644 void
2645 dump_commands(uint32_t *dwords, uint32_t sizedwords, int level)
2646 {
2647 int dwords_left = sizedwords;
2648 uint32_t count = 0; /* dword count including packet header */
2649 uint32_t val;
2650
2651 // assert(dwords);
2652 if (!dwords) {
2653 printf("NULL cmd buffer!\n");
2654 return;
2655 }
2656
2657 draws[ib] = 0;
2658
2659 while (dwords_left > 0) {
2660
2661 current_draw_count = draw_count;
2662
2663 /* hack, this looks like a -1 underflow, in some versions
2664 * when it tries to write zero registers via pkt0
2665 */
2666 // if ((dwords[0] >> 16) == 0xffff)
2667 // goto skip;
2668
2669 if (pkt_is_type0(dwords[0])) {
2670 printl(3, "t0");
2671 count = type0_pkt_size(dwords[0]) + 1;
2672 val = type0_pkt_offset(dwords[0]);
2673 assert(val < regcnt());
2674 printl(3, "%swrite %s%s (%04x)\n", levels[level+1], regname(val, 1),
2675 (dwords[0] & 0x8000) ? " (same register)" : "", val);
2676 dump_registers(val, dwords+1, count-1, level+2);
2677 if (!quiet(3))
2678 dump_hex(dwords, count, level+1);
2679 } else if (pkt_is_type4(dwords[0])) {
2680 /* basically the same(ish) as type0 prior to a5xx */
2681 printl(3, "t4");
2682 count = type4_pkt_size(dwords[0]) + 1;
2683 val = type4_pkt_offset(dwords[0]);
2684 assert(val < regcnt());
2685 printl(3, "%swrite %s (%04x)\n", levels[level+1], regname(val, 1), val);
2686 dump_registers(val, dwords+1, count-1, level+2);
2687 if (!quiet(3))
2688 dump_hex(dwords, count, level+1);
2689 #if 0
2690 } else if (pkt_is_type1(dwords[0])) {
2691 printl(3, "t1");
2692 count = 3;
2693 val = dwords[0] & 0xfff;
2694 printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
2695 dump_registers(val, dwords+1, 1, level+2);
2696 val = (dwords[0] >> 12) & 0xfff;
2697 printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
2698 dump_registers(val, dwords+2, 1, level+2);
2699 if (!quiet(3))
2700 dump_hex(dwords, count, level+1);
2701 } else if (pkt_is_type2(dwords[0])) {
2702 printl(3, "t2");
2703 printf("%sNOP\n", levels[level+1]);
2704 count = 1;
2705 if (!quiet(3))
2706 dump_hex(dwords, count, level+1);
2707 #endif
2708 } else if (pkt_is_type3(dwords[0])) {
2709 count = type3_pkt_size(dwords[0]) + 1;
2710 val = cp_type3_opcode(dwords[0]);
2711 const struct type3_op *op = get_type3_op(val);
2712 if (op->options.load_all_groups)
2713 load_all_groups(level+1);
2714 printl(3, "t3");
2715 const char *name = pktname(val);
2716 if (!quiet(2)) {
2717 printf("\t%sopcode: %s%s%s (%02x) (%d dwords)%s\n", levels[level],
2718 rnn->vc->colors->bctarg, name, rnn->vc->colors->reset,
2719 val, count, (dwords[0] & 0x1) ? " (predicated)" : "");
2720 }
2721 if (name)
2722 dump_domain(dwords+1, count-1, level+2, name);
2723 op->fxn(dwords+1, count-1, level+1);
2724 if (!quiet(2))
2725 dump_hex(dwords, count, level+1);
2726 } else if (pkt_is_type7(dwords[0])) {
2727 count = type7_pkt_size(dwords[0]) + 1;
2728 val = cp_type7_opcode(dwords[0]);
2729 const struct type3_op *op = get_type3_op(val);
2730 if (op->options.load_all_groups)
2731 load_all_groups(level+1);
2732 printl(3, "t7");
2733 const char *name = pktname(val);
2734 if (!quiet(2)) {
2735 printf("\t%sopcode: %s%s%s (%02x) (%d dwords)\n", levels[level],
2736 rnn->vc->colors->bctarg, name, rnn->vc->colors->reset,
2737 val, count);
2738 }
2739 if (name) {
2740 /* special hack for two packets that decode the same way
2741 * on a6xx:
2742 */
2743 if (!strcmp(name, "CP_LOAD_STATE6_FRAG") ||
2744 !strcmp(name, "CP_LOAD_STATE6_GEOM"))
2745 name = "CP_LOAD_STATE6";
2746 dump_domain(dwords+1, count-1, level+2, name);
2747 }
2748 op->fxn(dwords+1, count-1, level+1);
2749 if (!quiet(2))
2750 dump_hex(dwords, count, level+1);
2751 } else if (pkt_is_type2(dwords[0])) {
2752 printl(3, "t2");
2753 printl(3, "%snop\n", levels[level+1]);
2754 } else {
2755 /* for 5xx+ we can do a passable job of looking for start of next valid packet: */
2756 if (options->gpu_id >= 500) {
2757 while (dwords_left > 0) {
2758 if (pkt_is_type7(dwords[0]) || pkt_is_type4(dwords[0]))
2759 break;
2760 printf("bad type! %08x\n", dwords[0]);
2761 dwords++;
2762 dwords_left--;
2763 }
2764 } else {
2765 printf("bad type! %08x\n", dwords[0]);
2766 return;
2767 }
2768 }
2769
2770 dwords += count;
2771 dwords_left -= count;
2772
2773 }
2774
2775 if (dwords_left < 0)
2776 printf("**** this ain't right!! dwords_left=%d\n", dwords_left);
2777 }