2 * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <sys/types.h>
50 /* ************************************************************************* */
51 /* originally based on kernel recovery dump code: */
53 static const struct cffdec_options
*options
;
55 static bool needs_wfi
= false;
56 static bool summary
= false;
57 static bool in_summary
= false;
60 static inline unsigned regcnt(void)
62 if (options
->gpu_id
>= 500)
68 static int is_64b(void)
70 return options
->gpu_id
>= 500;
77 uint32_t size
; /* in dwords */
78 /* Generally cmdstream consists of multiple IB calls to different
79 * buffers, which are themselves often re-used for each tile. The
80 * triggered flag serves two purposes to help make it more clear
81 * what part of the cmdstream is before vs after the the GPU hang:
83 * 1) if in IB2 we are passed the point within the IB2 buffer where
84 * the GPU hung, but IB1 is not passed the point within its
85 * buffer where the GPU had hung, then we know the GPU hang
86 * happens on a future use of that IB2 buffer.
88 * 2) if in an IB1 or IB2 buffer that is not the one where the GPU
89 * hung, but we've already passed the trigger point at the same
90 * IB level, we know that we are passed the point where the GPU
93 * So this is a one way switch, false->true. And a higher #'d
94 * IB level isn't considered triggered unless the lower #'d IB
101 static int draw_count
;
102 static int current_draw_count
;
104 /* query mode.. to handle symbolic register name queries, we need to
105 * defer parsing query string until after gpu_id is know and rnn db
108 static int *queryvals
;
113 if ((options
->draw_filter
!= -1) && (options
->draw_filter
!= current_draw_count
))
115 if ((lvl
>= 3) && (summary
|| options
->querystrs
|| options
->script
))
117 if ((lvl
>= 2) && (options
->querystrs
|| options
->script
))
123 printl(int lvl
, const char *fmt
, ...)
133 static const char *levels
[] = {
142 "\t\t\t\t\t\t\t\t\t",
157 /* SDS (CP_SET_DRAW_STATE) helpers: */
158 static void load_all_groups(int level
);
159 static void disable_all_groups(void);
161 static void dump_tex_samp(uint32_t *texsamp
, enum state_src_t src
, int num_unit
, int level
);
162 static void dump_tex_const(uint32_t *texsamp
, int num_unit
, int level
);
165 highlight_gpuaddr(uint64_t gpuaddr
)
170 if (!options
->ibs
[ib
].base
)
173 if ((ib
> 0) && options
->ibs
[ib
-1].base
&& !ibs
[ib
-1].triggered
)
176 if (ibs
[ib
].triggered
)
179 if (options
->ibs
[ib
].base
!= ibs
[ib
].base
)
182 uint64_t start
= ibs
[ib
].base
+ 4 * (ibs
[ib
].size
- options
->ibs
[ib
].rem
);
183 uint64_t end
= ibs
[ib
].base
+ 4 * ibs
[ib
].size
;
185 bool triggered
= (start
<= gpuaddr
) && (gpuaddr
<= end
);
187 ibs
[ib
].triggered
|= triggered
;
190 printf("ESTIMATED CRASH LOCATION!\n");
196 dump_hex(uint32_t *dwords
, uint32_t sizedwords
, int level
)
204 for (i
= 0; i
< sizedwords
; i
+= 8) {
207 /* always show first row: */
211 for (j
= 0; (j
< 8) && (i
+j
< sizedwords
) && zero
; j
++)
215 if (zero
&& !lastzero
)
223 uint64_t addr
= gpuaddr(&dwords
[i
]);
224 bool highlight
= highlight_gpuaddr(addr
);
227 printf("\x1b[0;1;31m");
230 printf("%016"PRIx64
":%s", addr
, levels
[level
]);
232 printf("%08x:%s", (uint32_t)addr
, levels
[level
]);
238 printf("%04x:", i
* 4);
240 for (j
= 0; (j
< 8) && (i
+j
< sizedwords
); j
++) {
241 printf(" %08x", dwords
[i
+j
]);
249 dump_float(float *dwords
, uint32_t sizedwords
, int level
)
252 for (i
= 0; i
< sizedwords
; i
++) {
255 printf("%016"PRIx64
":%s", gpuaddr(dwords
), levels
[level
]);
257 printf("%08x:%s", (uint32_t)gpuaddr(dwords
), levels
[level
]);
262 printf("%8f", *(dwords
++));
270 /* I believe the surface format is low bits:
271 #define RB_COLOR_INFO__COLOR_FORMAT_MASK 0x0000000fL
272 comments in sys2gmem_tex_const indicate that address is [31:12], but
273 looks like at least some of the bits above the format have different meaning..
275 static void parse_dword_addr(uint32_t dword
, uint32_t *gpuaddr
,
276 uint32_t *flags
, uint32_t mask
)
278 assert(!is_64b()); /* this is only used on a2xx */
279 *gpuaddr
= dword
& ~mask
;
280 *flags
= dword
& mask
;
283 static uint32_t type0_reg_vals
[0xffff + 1];
284 static uint8_t type0_reg_rewritten
[sizeof(type0_reg_vals
)/8]; /* written since last draw */
285 static uint8_t type0_reg_written
[sizeof(type0_reg_vals
)/8];
286 static uint32_t lastvals
[ARRAY_SIZE(type0_reg_vals
)];
288 static bool reg_rewritten(uint32_t regbase
)
290 return !!(type0_reg_rewritten
[regbase
/8] & (1 << (regbase
% 8)));
293 bool reg_written(uint32_t regbase
)
295 return !!(type0_reg_written
[regbase
/8] & (1 << (regbase
% 8)));
298 static void clear_rewritten(void)
300 memset(type0_reg_rewritten
, 0, sizeof(type0_reg_rewritten
));
303 static void clear_written(void)
305 memset(type0_reg_written
, 0, sizeof(type0_reg_written
));
309 uint32_t reg_lastval(uint32_t regbase
)
311 return lastvals
[regbase
];
317 memset(lastvals
, 0, sizeof(lastvals
));
321 reg_val(uint32_t regbase
)
323 return type0_reg_vals
[regbase
];
327 reg_set(uint32_t regbase
, uint32_t val
)
329 assert(regbase
< regcnt());
330 type0_reg_vals
[regbase
] = val
;
331 type0_reg_written
[regbase
/8] |= (1 << (regbase
% 8));
332 type0_reg_rewritten
[regbase
/8] |= (1 << (regbase
% 8));
336 reg_dump_scratch(const char *name
, uint32_t dword
, int level
)
343 r
= regbase("CP_SCRATCH[0].REG");
345 // if not, try old a2xx/a3xx version:
347 r
= regbase("CP_SCRATCH_REG0");
352 printf("%s:%u,%u,%u,%u\n", levels
[level
],
353 reg_val(r
+ 4), reg_val(r
+ 5),
354 reg_val(r
+ 6), reg_val(r
+ 7));
358 dump_gpuaddr_size(uint64_t gpuaddr
, int level
, int sizedwords
, int quietlvl
)
365 buf
= hostptr(gpuaddr
);
367 dump_hex(buf
, sizedwords
, level
+1);
372 dump_gpuaddr(uint64_t gpuaddr
, int level
)
374 dump_gpuaddr_size(gpuaddr
, level
, 64, 3);
378 reg_dump_gpuaddr(const char *name
, uint32_t dword
, int level
)
380 dump_gpuaddr(dword
, level
);
385 reg_gpuaddr_lo(const char *name
, uint32_t dword
, int level
)
391 reg_dump_gpuaddr_hi(const char *name
, uint32_t dword
, int level
)
393 dump_gpuaddr(gpuaddr_lo
| (((uint64_t)dword
) << 32), level
);
398 dump_shader(const char *ext
, void *buf
, int bufsz
)
400 if (options
->dump_shaders
) {
404 sprintf(filename
, "%04d.%s", n
++, ext
);
405 fd
= open(filename
, O_WRONLY
| O_TRUNC
| O_CREAT
, 0644);
406 write(fd
, buf
, bufsz
);
412 disasm_gpuaddr(const char *name
, uint64_t gpuaddr
, int level
)
416 gpuaddr
&= 0xfffffffffffffff0;
421 buf
= hostptr(gpuaddr
);
423 uint32_t sizedwords
= hostlen(gpuaddr
) / 4;
426 dump_hex(buf
, min(64, sizedwords
), level
+1);
427 try_disasm_a3xx(buf
, sizedwords
, level
+2, stdout
, options
->gpu_id
);
429 /* this is a bit ugly way, but oh well.. */
430 if (strstr(name
, "SP_VS_OBJ")) {
432 } else if (strstr(name
, "SP_FS_OBJ")) {
434 } else if (strstr(name
, "SP_GS_OBJ")) {
436 } else if (strstr(name
, "SP_CS_OBJ")) {
443 dump_shader(ext
, buf
, sizedwords
* 4);
448 reg_disasm_gpuaddr(const char *name
, uint32_t dword
, int level
)
450 disasm_gpuaddr(name
, dword
, level
);
454 reg_disasm_gpuaddr_hi(const char *name
, uint32_t dword
, int level
)
456 disasm_gpuaddr(name
, gpuaddr_lo
| (((uint64_t)dword
) << 32), level
);
459 /* Find the value of the TEX_COUNT register that corresponds to the named
460 * TEX_SAMP/TEX_CONST reg.
462 * Note, this kinda assumes an equal # of samplers and textures, but not
463 * really sure if there is a much better option. I suppose on a6xx we
464 * could instead decode the bitfields in SP_xS_CONFIG
467 get_tex_count(const char *name
)
469 char count_reg
[strlen(name
) + 5];
472 p
= strstr(name
, "CONST");
474 p
= strstr(name
, "SAMP");
479 strncpy(count_reg
, name
, n
);
480 strcpy(count_reg
+ n
, "COUNT");
482 return reg_val(regbase(count_reg
));
486 reg_dump_tex_samp_hi(const char *name
, uint32_t dword
, int level
)
491 int num_unit
= get_tex_count(name
);
492 uint64_t gpuaddr
= gpuaddr_lo
| (((uint64_t)dword
) << 32);
493 void *buf
= hostptr(gpuaddr
);
498 dump_tex_samp(buf
, STATE_SRC_DIRECT
, num_unit
, level
+1);
502 reg_dump_tex_const_hi(const char *name
, uint32_t dword
, int level
)
507 int num_unit
= get_tex_count(name
);
508 uint64_t gpuaddr
= gpuaddr_lo
| (((uint64_t)dword
) << 32);
509 void *buf
= hostptr(gpuaddr
);
514 dump_tex_const(buf
, num_unit
, level
+1);
518 * Registers with special handling (rnndec_decode() handles rest):
520 #define REG(x, fxn) { #x, fxn }
523 void (*fxn
)(const char *name
, uint32_t dword
, int level
);
526 REG(CP_SCRATCH_REG0
, reg_dump_scratch
),
527 REG(CP_SCRATCH_REG1
, reg_dump_scratch
),
528 REG(CP_SCRATCH_REG2
, reg_dump_scratch
),
529 REG(CP_SCRATCH_REG3
, reg_dump_scratch
),
530 REG(CP_SCRATCH_REG4
, reg_dump_scratch
),
531 REG(CP_SCRATCH_REG5
, reg_dump_scratch
),
532 REG(CP_SCRATCH_REG6
, reg_dump_scratch
),
533 REG(CP_SCRATCH_REG7
, reg_dump_scratch
),
536 REG(CP_SCRATCH_REG0
, reg_dump_scratch
),
537 REG(CP_SCRATCH_REG1
, reg_dump_scratch
),
538 REG(CP_SCRATCH_REG2
, reg_dump_scratch
),
539 REG(CP_SCRATCH_REG3
, reg_dump_scratch
),
540 REG(CP_SCRATCH_REG4
, reg_dump_scratch
),
541 REG(CP_SCRATCH_REG5
, reg_dump_scratch
),
542 REG(CP_SCRATCH_REG6
, reg_dump_scratch
),
543 REG(CP_SCRATCH_REG7
, reg_dump_scratch
),
544 REG(VSC_SIZE_ADDRESS
, reg_dump_gpuaddr
),
545 REG(SP_VS_PVT_MEM_ADDR_REG
, reg_dump_gpuaddr
),
546 REG(SP_FS_PVT_MEM_ADDR_REG
, reg_dump_gpuaddr
),
547 REG(SP_VS_OBJ_START_REG
, reg_disasm_gpuaddr
),
548 REG(SP_FS_OBJ_START_REG
, reg_disasm_gpuaddr
),
549 REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR
, reg_dump_gpuaddr
),
552 REG(CP_SCRATCH
[0].REG
, reg_dump_scratch
),
553 REG(CP_SCRATCH
[0x1].REG
, reg_dump_scratch
),
554 REG(CP_SCRATCH
[0x2].REG
, reg_dump_scratch
),
555 REG(CP_SCRATCH
[0x3].REG
, reg_dump_scratch
),
556 REG(CP_SCRATCH
[0x4].REG
, reg_dump_scratch
),
557 REG(CP_SCRATCH
[0x5].REG
, reg_dump_scratch
),
558 REG(CP_SCRATCH
[0x6].REG
, reg_dump_scratch
),
559 REG(CP_SCRATCH
[0x7].REG
, reg_dump_scratch
),
560 REG(SP_VS_PVT_MEM_ADDR
, reg_dump_gpuaddr
),
561 REG(SP_FS_PVT_MEM_ADDR
, reg_dump_gpuaddr
),
562 REG(SP_GS_PVT_MEM_ADDR
, reg_dump_gpuaddr
),
563 REG(SP_HS_PVT_MEM_ADDR
, reg_dump_gpuaddr
),
564 REG(SP_DS_PVT_MEM_ADDR
, reg_dump_gpuaddr
),
565 REG(SP_CS_PVT_MEM_ADDR
, reg_dump_gpuaddr
),
566 REG(SP_VS_OBJ_START
, reg_disasm_gpuaddr
),
567 REG(SP_FS_OBJ_START
, reg_disasm_gpuaddr
),
568 REG(SP_GS_OBJ_START
, reg_disasm_gpuaddr
),
569 REG(SP_HS_OBJ_START
, reg_disasm_gpuaddr
),
570 REG(SP_DS_OBJ_START
, reg_disasm_gpuaddr
),
571 REG(SP_CS_OBJ_START
, reg_disasm_gpuaddr
),
572 REG(TPL1_TP_VS_BORDER_COLOR_BASE_ADDR
, reg_dump_gpuaddr
),
573 REG(TPL1_TP_HS_BORDER_COLOR_BASE_ADDR
, reg_dump_gpuaddr
),
574 REG(TPL1_TP_DS_BORDER_COLOR_BASE_ADDR
, reg_dump_gpuaddr
),
575 REG(TPL1_TP_GS_BORDER_COLOR_BASE_ADDR
, reg_dump_gpuaddr
),
576 REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR
, reg_dump_gpuaddr
),
579 REG(CP_SCRATCH
[0x4].REG
, reg_dump_scratch
),
580 REG(CP_SCRATCH
[0x5].REG
, reg_dump_scratch
),
581 REG(CP_SCRATCH
[0x6].REG
, reg_dump_scratch
),
582 REG(CP_SCRATCH
[0x7].REG
, reg_dump_scratch
),
583 REG(SP_VS_OBJ_START_LO
, reg_gpuaddr_lo
),
584 REG(SP_VS_OBJ_START_HI
, reg_disasm_gpuaddr_hi
),
585 REG(SP_HS_OBJ_START_LO
, reg_gpuaddr_lo
),
586 REG(SP_HS_OBJ_START_HI
, reg_disasm_gpuaddr_hi
),
587 REG(SP_DS_OBJ_START_LO
, reg_gpuaddr_lo
),
588 REG(SP_DS_OBJ_START_HI
, reg_disasm_gpuaddr_hi
),
589 REG(SP_GS_OBJ_START_LO
, reg_gpuaddr_lo
),
590 REG(SP_GS_OBJ_START_HI
, reg_disasm_gpuaddr_hi
),
591 REG(SP_FS_OBJ_START_LO
, reg_gpuaddr_lo
),
592 REG(SP_FS_OBJ_START_HI
, reg_disasm_gpuaddr_hi
),
593 REG(SP_CS_OBJ_START_LO
, reg_gpuaddr_lo
),
594 REG(SP_CS_OBJ_START_HI
, reg_disasm_gpuaddr_hi
),
595 REG(TPL1_VS_TEX_CONST_LO
, reg_gpuaddr_lo
),
596 REG(TPL1_VS_TEX_CONST_HI
, reg_dump_tex_const_hi
),
597 REG(TPL1_VS_TEX_SAMP_LO
, reg_gpuaddr_lo
),
598 REG(TPL1_VS_TEX_SAMP_HI
, reg_dump_tex_samp_hi
),
599 REG(TPL1_HS_TEX_CONST_LO
, reg_gpuaddr_lo
),
600 REG(TPL1_HS_TEX_CONST_HI
, reg_dump_tex_const_hi
),
601 REG(TPL1_HS_TEX_SAMP_LO
, reg_gpuaddr_lo
),
602 REG(TPL1_HS_TEX_SAMP_HI
, reg_dump_tex_samp_hi
),
603 REG(TPL1_DS_TEX_CONST_LO
, reg_gpuaddr_lo
),
604 REG(TPL1_DS_TEX_CONST_HI
, reg_dump_tex_const_hi
),
605 REG(TPL1_DS_TEX_SAMP_LO
, reg_gpuaddr_lo
),
606 REG(TPL1_DS_TEX_SAMP_HI
, reg_dump_tex_samp_hi
),
607 REG(TPL1_GS_TEX_CONST_LO
, reg_gpuaddr_lo
),
608 REG(TPL1_GS_TEX_CONST_HI
, reg_dump_tex_const_hi
),
609 REG(TPL1_GS_TEX_SAMP_LO
, reg_gpuaddr_lo
),
610 REG(TPL1_GS_TEX_SAMP_HI
, reg_dump_tex_samp_hi
),
611 REG(TPL1_FS_TEX_CONST_LO
, reg_gpuaddr_lo
),
612 REG(TPL1_FS_TEX_CONST_HI
, reg_dump_tex_const_hi
),
613 REG(TPL1_FS_TEX_SAMP_LO
, reg_gpuaddr_lo
),
614 REG(TPL1_FS_TEX_SAMP_HI
, reg_dump_tex_samp_hi
),
615 REG(TPL1_CS_TEX_CONST_LO
, reg_gpuaddr_lo
),
616 REG(TPL1_CS_TEX_CONST_HI
, reg_dump_tex_const_hi
),
617 REG(TPL1_CS_TEX_SAMP_LO
, reg_gpuaddr_lo
),
618 REG(TPL1_CS_TEX_SAMP_HI
, reg_dump_tex_samp_hi
),
619 REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_LO
, reg_gpuaddr_lo
),
620 REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_HI
, reg_dump_gpuaddr_hi
),
621 // REG(RB_MRT_FLAG_BUFFER[0].ADDR_LO, reg_gpuaddr_lo),
622 // REG(RB_MRT_FLAG_BUFFER[0].ADDR_HI, reg_dump_gpuaddr_hi),
623 // REG(RB_MRT_FLAG_BUFFER[1].ADDR_LO, reg_gpuaddr_lo),
624 // REG(RB_MRT_FLAG_BUFFER[1].ADDR_HI, reg_dump_gpuaddr_hi),
625 // REG(RB_MRT_FLAG_BUFFER[2].ADDR_LO, reg_gpuaddr_lo),
626 // REG(RB_MRT_FLAG_BUFFER[2].ADDR_HI, reg_dump_gpuaddr_hi),
627 // REG(RB_MRT_FLAG_BUFFER[3].ADDR_LO, reg_gpuaddr_lo),
628 // REG(RB_MRT_FLAG_BUFFER[3].ADDR_HI, reg_dump_gpuaddr_hi),
629 // REG(RB_MRT_FLAG_BUFFER[4].ADDR_LO, reg_gpuaddr_lo),
630 // REG(RB_MRT_FLAG_BUFFER[4].ADDR_HI, reg_dump_gpuaddr_hi),
631 // REG(RB_MRT_FLAG_BUFFER[5].ADDR_LO, reg_gpuaddr_lo),
632 // REG(RB_MRT_FLAG_BUFFER[5].ADDR_HI, reg_dump_gpuaddr_hi),
633 // REG(RB_MRT_FLAG_BUFFER[6].ADDR_LO, reg_gpuaddr_lo),
634 // REG(RB_MRT_FLAG_BUFFER[6].ADDR_HI, reg_dump_gpuaddr_hi),
635 // REG(RB_MRT_FLAG_BUFFER[7].ADDR_LO, reg_gpuaddr_lo),
636 // REG(RB_MRT_FLAG_BUFFER[7].ADDR_HI, reg_dump_gpuaddr_hi),
637 // REG(RB_BLIT_FLAG_DST_LO, reg_gpuaddr_lo),
638 // REG(RB_BLIT_FLAG_DST_HI, reg_dump_gpuaddr_hi),
639 // REG(RB_MRT[0].BASE_LO, reg_gpuaddr_lo),
640 // REG(RB_MRT[0].BASE_HI, reg_dump_gpuaddr_hi),
641 // REG(RB_DEPTH_BUFFER_BASE_LO, reg_gpuaddr_lo),
642 // REG(RB_DEPTH_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
643 // REG(RB_DEPTH_FLAG_BUFFER_BASE_LO, reg_gpuaddr_lo),
644 // REG(RB_DEPTH_FLAG_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
645 // REG(RB_BLIT_DST_LO, reg_gpuaddr_lo),
646 // REG(RB_BLIT_DST_HI, reg_dump_gpuaddr_hi),
648 // REG(RB_2D_SRC_LO, reg_gpuaddr_lo),
649 // REG(RB_2D_SRC_HI, reg_dump_gpuaddr_hi),
650 // REG(RB_2D_SRC_FLAGS_LO, reg_gpuaddr_lo),
651 // REG(RB_2D_SRC_FLAGS_HI, reg_dump_gpuaddr_hi),
652 // REG(RB_2D_DST_LO, reg_gpuaddr_lo),
653 // REG(RB_2D_DST_HI, reg_dump_gpuaddr_hi),
654 // REG(RB_2D_DST_FLAGS_LO, reg_gpuaddr_lo),
655 // REG(RB_2D_DST_FLAGS_HI, reg_dump_gpuaddr_hi),
659 REG(CP_SCRATCH
[0x4].REG
, reg_dump_scratch
),
660 REG(CP_SCRATCH
[0x5].REG
, reg_dump_scratch
),
661 REG(CP_SCRATCH
[0x6].REG
, reg_dump_scratch
),
662 REG(CP_SCRATCH
[0x7].REG
, reg_dump_scratch
),
664 REG(SP_VS_OBJ_START_LO
, reg_gpuaddr_lo
),
665 REG(SP_VS_OBJ_START_HI
, reg_disasm_gpuaddr_hi
),
666 REG(SP_HS_OBJ_START_LO
, reg_gpuaddr_lo
),
667 REG(SP_HS_OBJ_START_HI
, reg_disasm_gpuaddr_hi
),
668 REG(SP_DS_OBJ_START_LO
, reg_gpuaddr_lo
),
669 REG(SP_DS_OBJ_START_HI
, reg_disasm_gpuaddr_hi
),
670 REG(SP_GS_OBJ_START_LO
, reg_gpuaddr_lo
),
671 REG(SP_GS_OBJ_START_HI
, reg_disasm_gpuaddr_hi
),
672 REG(SP_FS_OBJ_START_LO
, reg_gpuaddr_lo
),
673 REG(SP_FS_OBJ_START_HI
, reg_disasm_gpuaddr_hi
),
674 REG(SP_CS_OBJ_START_LO
, reg_gpuaddr_lo
),
675 REG(SP_CS_OBJ_START_HI
, reg_disasm_gpuaddr_hi
),
677 REG(SP_VS_TEX_CONST_LO
, reg_gpuaddr_lo
),
678 REG(SP_VS_TEX_CONST_HI
, reg_dump_tex_const_hi
),
679 REG(SP_VS_TEX_SAMP_LO
, reg_gpuaddr_lo
),
680 REG(SP_VS_TEX_SAMP_HI
, reg_dump_tex_samp_hi
),
681 REG(SP_HS_TEX_CONST_LO
, reg_gpuaddr_lo
),
682 REG(SP_HS_TEX_CONST_HI
, reg_dump_tex_const_hi
),
683 REG(SP_HS_TEX_SAMP_LO
, reg_gpuaddr_lo
),
684 REG(SP_HS_TEX_SAMP_HI
, reg_dump_tex_samp_hi
),
685 REG(SP_DS_TEX_CONST_LO
, reg_gpuaddr_lo
),
686 REG(SP_DS_TEX_CONST_HI
, reg_dump_tex_const_hi
),
687 REG(SP_DS_TEX_SAMP_LO
, reg_gpuaddr_lo
),
688 REG(SP_DS_TEX_SAMP_HI
, reg_dump_tex_samp_hi
),
689 REG(SP_GS_TEX_CONST_LO
, reg_gpuaddr_lo
),
690 REG(SP_GS_TEX_CONST_HI
, reg_dump_tex_const_hi
),
691 REG(SP_GS_TEX_SAMP_LO
, reg_gpuaddr_lo
),
692 REG(SP_GS_TEX_SAMP_HI
, reg_dump_tex_samp_hi
),
693 REG(SP_FS_TEX_CONST_LO
, reg_gpuaddr_lo
),
694 REG(SP_FS_TEX_CONST_HI
, reg_dump_tex_const_hi
),
695 REG(SP_FS_TEX_SAMP_LO
, reg_gpuaddr_lo
),
696 REG(SP_FS_TEX_SAMP_HI
, reg_dump_tex_samp_hi
),
697 REG(SP_CS_TEX_CONST_LO
, reg_gpuaddr_lo
),
698 REG(SP_CS_TEX_CONST_HI
, reg_dump_tex_const_hi
),
699 REG(SP_CS_TEX_SAMP_LO
, reg_gpuaddr_lo
),
700 REG(SP_CS_TEX_SAMP_HI
, reg_dump_tex_samp_hi
),
705 static struct rnn
*rnn
;
708 init_rnn(const char *gpuname
)
710 rnn
= rnn_new(!options
->color
);
712 rnn_load(rnn
, gpuname
);
714 if (options
->querystrs
) {
716 queryvals
= calloc(options
->nquery
, sizeof(queryvals
[0]));
718 for (i
= 0; i
< options
->nquery
; i
++) {
719 int val
= strtol(options
->querystrs
[i
], NULL
, 0);
722 val
= regbase(options
->querystrs
[i
]);
725 printf("querystr: %s -> 0x%x\n", options
->querystrs
[i
], queryvals
[i
]);
729 for (unsigned idx
= 0; type0_reg
[idx
].regname
; idx
++) {
730 type0_reg
[idx
].regbase
= regbase(type0_reg
[idx
].regname
);
731 if (!type0_reg
[idx
].regbase
) {
732 printf("invalid register name: %s\n", type0_reg
[idx
].regname
);
743 memset(&ibs
, 0, sizeof(ibs
));
747 cffdec_init(const struct cffdec_options
*_options
)
750 summary
= options
->summary
;
752 /* in case we're decoding multiple files: */
757 /* TODO we need an API to free/cleanup any previous rnn */
759 switch (options
->gpu_id
) {
761 type0_reg
= reg_a2xx
;
765 type0_reg
= reg_a3xx
;
769 type0_reg
= reg_a4xx
;
773 type0_reg
= reg_a5xx
;
777 type0_reg
= reg_a6xx
;
781 errx(-1, "unsupported gpu");
786 pktname(unsigned opc
)
788 return rnn_enumname(rnn
, "adreno_pm4_type3_packets", opc
);
792 regname(uint32_t regbase
, int color
)
794 return rnn_regname(rnn
, regbase
, color
);
798 regbase(const char *name
)
800 return rnn_regbase(rnn
, name
);
804 endswith(uint32_t regbase
, const char *suffix
)
806 const char *name
= regname(regbase
, 0);
807 const char *s
= strstr(name
, suffix
);
810 return (s
- strlen(name
) + strlen(suffix
)) == name
;
814 dump_register_val(uint32_t regbase
, uint32_t dword
, int level
)
816 struct rnndecaddrinfo
*info
= rnn_reginfo(rnn
, regbase
);
818 if (info
&& info
->typeinfo
) {
819 uint64_t gpuaddr
= 0;
820 char *decoded
= rnndec_decodeval(rnn
->vc
, info
->typeinfo
, dword
);
821 printf("%s%s: %s", levels
[level
], info
->name
, decoded
);
823 /* Try and figure out if we are looking at a gpuaddr.. this
824 * might be useful for other gen's too, but at least a5xx has
825 * the _HI/_LO suffix we can look for. Maybe a better approach
826 * would be some special annotation in the xml..
828 if (options
->gpu_id
>= 500) {
829 if (endswith(regbase
, "_HI") && endswith(regbase
-1, "_LO")) {
830 gpuaddr
= (((uint64_t)dword
) << 32) | reg_val(regbase
-1);
831 } else if (endswith(regbase
, "_LO") && endswith(regbase
+1, "_HI")) {
832 gpuaddr
= (((uint64_t)reg_val(regbase
+1)) << 32) | dword
;
836 if (gpuaddr
&& hostptr(gpuaddr
)) {
837 printf("\t\tbase=%"PRIx64
", offset=%"PRIu64
", size=%u",
838 gpubaseaddr(gpuaddr
),
839 gpuaddr
- gpubaseaddr(gpuaddr
),
840 hostlen(gpubaseaddr(gpuaddr
)));
847 printf("%s%s: %08x\n", levels
[level
], info
->name
, dword
);
849 printf("%s<%04x>: %08x\n", levels
[level
], regbase
, dword
);
859 dump_register(uint32_t regbase
, uint32_t dword
, int level
)
862 dump_register_val(regbase
, dword
, level
);
865 for (unsigned idx
= 0; type0_reg
[idx
].regname
; idx
++) {
866 if (type0_reg
[idx
].regbase
== regbase
) {
867 type0_reg
[idx
].fxn(type0_reg
[idx
].regname
, dword
, level
);
874 is_banked_reg(uint32_t regbase
)
876 return (0x2000 <= regbase
) && (regbase
< 0x2400);
880 dump_registers(uint32_t regbase
, uint32_t *dwords
, uint32_t sizedwords
, int level
)
882 while (sizedwords
--) {
883 int last_summary
= summary
;
885 /* access to non-banked registers needs a WFI:
886 * TODO banked register range for a2xx??
888 if (needs_wfi
&& !is_banked_reg(regbase
))
889 printl(2, "NEEDS WFI: %s (%x)\n", regname(regbase
, 1), regbase
);
891 reg_set(regbase
, *dwords
);
892 dump_register(regbase
, *dwords
, level
);
895 summary
= last_summary
;
900 dump_domain(uint32_t *dwords
, uint32_t sizedwords
, int level
,
903 struct rnndomain
*dom
;
906 dom
= rnn_finddomain(rnn
->db
, name
);
912 script_packet(dwords
, sizedwords
, rnn
, dom
);
917 for (i
= 0; i
< sizedwords
; i
++) {
918 struct rnndecaddrinfo
*info
= rnndec_decodeaddr(rnn
->vc
, dom
, i
, 0);
920 if (!(info
&& info
->typeinfo
))
922 uint64_t value
= dwords
[i
];
923 if (info
->typeinfo
->high
>= 32 && i
< sizedwords
- 1) {
924 value
|= (uint64_t) dwords
[i
+ 1] << 32;
925 i
++; /* skip the next dword since we're printing it now */
927 decoded
= rnndec_decodeval(rnn
->vc
, info
->typeinfo
, value
);
928 /* Unlike the register printing path, we don't print the name
929 * of the register, so if it doesn't contain other named
930 * things (i.e. it isn't a bitset) then print the register
931 * name as if it's a bitset with a single entry. This avoids
932 * having to create a dummy register with a single entry to
933 * get a name in the decoding.
935 if (info
->typeinfo
->type
== RNN_TTYPE_BITSET
||
936 info
->typeinfo
->type
== RNN_TTYPE_INLINE_BITSET
) {
937 printf("%s%s\n", levels
[level
], decoded
);
939 printf("%s{ %s%s%s = %s }\n", levels
[level
],
940 rnn
->vc
->colors
->rname
, info
->name
,
941 rnn
->vc
->colors
->reset
, decoded
);
950 static uint32_t bin_x1
, bin_x2
, bin_y1
, bin_y2
;
951 static unsigned mode
;
952 static const char *render_mode
;
957 MODE_ALL
= MODE_BINNING
| MODE_GMEM
| MODE_BYPASS
,
958 } enable_mask
= MODE_ALL
;
959 static bool skip_ib2_enable_global
;
960 static bool skip_ib2_enable_local
;
963 print_mode(int level
)
965 if ((options
->gpu_id
>= 500) && !quiet(2)) {
966 printf("%smode: %s\n", levels
[level
], render_mode
);
967 printf("%sskip_ib2: g=%d, l=%d\n", levels
[level
], skip_ib2_enable_global
, skip_ib2_enable_local
);
974 switch (options
->query_mode
) {
979 for (int i
= 0; i
< options
->nquery
; i
++) {
980 uint32_t regbase
= queryvals
[i
];
981 if (!reg_written(regbase
)) {
984 if (reg_rewritten(regbase
)) {
990 for (int i
= 0; i
< options
->nquery
; i
++) {
991 uint32_t regbase
= queryvals
[i
];
992 if (!reg_written(regbase
)) {
995 uint32_t lastval
= reg_val(regbase
);
996 if (lastval
!= lastvals
[regbase
]) {
1006 __do_query(const char *primtype
, uint32_t num_indices
)
1010 if ((500 <= options
->gpu_id
) && (options
->gpu_id
< 700)) {
1011 uint32_t scissor_tl
= reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_TL"));
1012 uint32_t scissor_br
= reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_BR"));
1014 bin_x1
= scissor_tl
& 0xffff;
1015 bin_y1
= scissor_tl
>> 16;
1016 bin_x2
= scissor_br
& 0xffff;
1017 bin_y2
= scissor_br
>> 16;
1020 for (int i
= 0; i
< options
->nquery
; i
++) {
1021 uint32_t regbase
= queryvals
[i
];
1022 if (reg_written(regbase
)) {
1023 uint32_t lastval
= reg_val(regbase
);
1024 printf("%4d: %s(%u,%u-%u,%u):%u:", draw_count
, primtype
,
1025 bin_x1
, bin_y1
, bin_x2
, bin_y2
, num_indices
);
1026 if (options
->gpu_id
>= 500)
1027 printf("%s:", render_mode
);
1028 printf("\t%08x", lastval
);
1029 if (lastval
!= lastvals
[regbase
]) {
1034 if (reg_rewritten(regbase
)) {
1039 dump_register_val(regbase
, lastval
, 0);
1049 do_query_compare(const char *primtype
, uint32_t num_indices
)
1051 unsigned saved_enable_mask
= enable_mask
;
1052 const char *saved_render_mode
= render_mode
;
1054 /* in 'query-compare' mode, we want to see if the register is writtten
1055 * or changed in any mode:
1057 * (NOTE: this could cause false-positive for 'query-delta' if the reg
1058 * is written with different values in binning vs sysmem/gmem mode, as
1059 * we don't track previous values per-mode, but I think we can live with
1062 enable_mask
= MODE_ALL
;
1067 if (!skip_query()) {
1068 /* dump binning pass values: */
1069 enable_mask
= MODE_BINNING
;
1070 render_mode
= "BINNING";
1073 __do_query(primtype
, num_indices
);
1075 /* dump draw pass values: */
1076 enable_mask
= MODE_GMEM
| MODE_BYPASS
;
1077 render_mode
= "DRAW";
1080 __do_query(primtype
, num_indices
);
1085 enable_mask
= saved_enable_mask
;
1086 render_mode
= saved_render_mode
;
1088 disable_all_groups();
1091 /* well, actually query and script..
1092 * NOTE: call this before dump_register_summary()
1095 do_query(const char *primtype
, uint32_t num_indices
)
1098 script_draw(primtype
, num_indices
);
1100 if (options
->query_compare
) {
1101 do_query_compare(primtype
, num_indices
);
1108 __do_query(primtype
, num_indices
);
1112 cp_im_loadi(uint32_t *dwords
, uint32_t sizedwords
, int level
)
1114 uint32_t start
= dwords
[1] >> 16;
1115 uint32_t size
= dwords
[1] & 0xffff;
1116 const char *type
= NULL
, *ext
= NULL
;
1117 gl_shader_stage disasm_type
;
1119 switch (dwords
[0]) {
1123 disasm_type
= MESA_SHADER_VERTEX
;
1128 disasm_type
= MESA_SHADER_FRAGMENT
;
1136 printf("%s%s shader, start=%04x, size=%04x\n", levels
[level
], type
, start
, size
);
1137 disasm_a2xx(dwords
+ 2, sizedwords
- 2, level
+2, disasm_type
);
1139 /* dump raw shader: */
1141 dump_shader(ext
, dwords
+ 2, (sizedwords
- 2) * 4);
1145 cp_wide_reg_write(uint32_t *dwords
, uint32_t sizedwords
, int level
)
1147 uint32_t reg
= dwords
[0] & 0xffff;
1149 for (i
= 1; i
< sizedwords
; i
++) {
1150 dump_register(reg
, dwords
[i
], level
+1);
1151 reg_set(reg
, dwords
[i
]);
1159 TEX_MIPADDR
, /* a3xx only */
1163 // image/ssbo state:
1170 // unknown things, just to hexdumps:
1176 enum adreno_state_block
{
1178 SB_VERT_MIPADDR
= 1,
1180 SB_FRAG_MIPADDR
= 3,
1184 SB_COMPUTE_SHADER
= 7,
1187 /* TODO there is probably a clever way to let rnndec parse things so
1188 * we don't have to care about packet format differences across gens
1192 a3xx_get_state_type(uint32_t *dwords
, gl_shader_stage
*stage
, enum state_t
*state
,
1193 enum state_src_t
*src
)
1195 unsigned state_block_id
= (dwords
[0] >> 19) & 0x7;
1196 unsigned state_type
= dwords
[1] & 0x3;
1197 static const struct {
1198 gl_shader_stage stage
;
1200 } lookup
[0xf][0x3] = {
1201 [SB_VERT_TEX
][0] = { MESA_SHADER_VERTEX
, TEX_SAMP
},
1202 [SB_VERT_TEX
][1] = { MESA_SHADER_VERTEX
, TEX_CONST
},
1203 [SB_FRAG_TEX
][0] = { MESA_SHADER_FRAGMENT
, TEX_SAMP
},
1204 [SB_FRAG_TEX
][1] = { MESA_SHADER_FRAGMENT
, TEX_CONST
},
1205 [SB_VERT_SHADER
][0] = { MESA_SHADER_VERTEX
, SHADER_PROG
},
1206 [SB_VERT_SHADER
][1] = { MESA_SHADER_VERTEX
, SHADER_CONST
},
1207 [SB_FRAG_SHADER
][0] = { MESA_SHADER_FRAGMENT
, SHADER_PROG
},
1208 [SB_FRAG_SHADER
][1] = { MESA_SHADER_FRAGMENT
, SHADER_CONST
},
1211 *stage
= lookup
[state_block_id
][state_type
].stage
;
1212 *state
= lookup
[state_block_id
][state_type
].state
;
1213 unsigned state_src
= (dwords
[0] >> 16) & 0x7;
1214 if (state_src
== 0 /* SS_DIRECT */)
1215 *src
= STATE_SRC_DIRECT
;
1217 *src
= STATE_SRC_INDIRECT
;
1220 static enum state_src_t
1221 _get_state_src(unsigned dword0
)
1223 switch ((dword0
>> 16) & 0x3) {
1224 case 0: /* SS4_DIRECT / SS6_DIRECT */
1225 return STATE_SRC_DIRECT
;
1226 case 2: /* SS4_INDIRECT / SS6_INDIRECT */
1227 return STATE_SRC_INDIRECT
;
1228 case 1: /* SS6_BINDLESS */
1229 return STATE_SRC_BINDLESS
;
1231 return STATE_SRC_DIRECT
;
1236 _get_state_type(unsigned state_block_id
, unsigned state_type
,
1237 gl_shader_stage
*stage
, enum state_t
*state
)
1239 static const struct {
1240 gl_shader_stage stage
;
1242 } lookup
[0x10][0x4] = {
1244 [0x0][0] = { MESA_SHADER_VERTEX
, TEX_SAMP
},
1245 [0x0][1] = { MESA_SHADER_VERTEX
, TEX_CONST
},
1246 [0x0][2] = { MESA_SHADER_VERTEX
, UBO
},
1248 [0x1][0] = { MESA_SHADER_TESS_CTRL
, TEX_SAMP
},
1249 [0x1][1] = { MESA_SHADER_TESS_CTRL
, TEX_CONST
},
1250 [0x1][2] = { MESA_SHADER_TESS_CTRL
, UBO
},
1252 [0x2][0] = { MESA_SHADER_TESS_EVAL
, TEX_SAMP
},
1253 [0x2][1] = { MESA_SHADER_TESS_EVAL
, TEX_CONST
},
1254 [0x2][2] = { MESA_SHADER_TESS_EVAL
, UBO
},
1256 [0x3][0] = { MESA_SHADER_GEOMETRY
, TEX_SAMP
},
1257 [0x3][1] = { MESA_SHADER_GEOMETRY
, TEX_CONST
},
1258 [0x3][2] = { MESA_SHADER_GEOMETRY
, UBO
},
1260 [0x4][0] = { MESA_SHADER_FRAGMENT
, TEX_SAMP
},
1261 [0x4][1] = { MESA_SHADER_FRAGMENT
, TEX_CONST
},
1262 [0x4][2] = { MESA_SHADER_FRAGMENT
, UBO
},
1264 [0x5][0] = { MESA_SHADER_COMPUTE
, TEX_SAMP
},
1265 [0x5][1] = { MESA_SHADER_COMPUTE
, TEX_CONST
},
1266 [0x5][2] = { MESA_SHADER_COMPUTE
, UBO
},
1268 [0x8][0] = { MESA_SHADER_VERTEX
, SHADER_PROG
},
1269 [0x8][1] = { MESA_SHADER_VERTEX
, SHADER_CONST
},
1270 [0x8][2] = { MESA_SHADER_VERTEX
, UBO
},
1272 [0x9][0] = { MESA_SHADER_TESS_CTRL
, SHADER_PROG
},
1273 [0x9][1] = { MESA_SHADER_TESS_CTRL
, SHADER_CONST
},
1274 [0x9][2] = { MESA_SHADER_TESS_CTRL
, UBO
},
1276 [0xa][0] = { MESA_SHADER_TESS_EVAL
, SHADER_PROG
},
1277 [0xa][1] = { MESA_SHADER_TESS_EVAL
, SHADER_CONST
},
1278 [0xa][2] = { MESA_SHADER_TESS_EVAL
, UBO
},
1280 [0xb][0] = { MESA_SHADER_GEOMETRY
, SHADER_PROG
},
1281 [0xb][1] = { MESA_SHADER_GEOMETRY
, SHADER_CONST
},
1282 [0xb][2] = { MESA_SHADER_GEOMETRY
, UBO
},
1284 [0xc][0] = { MESA_SHADER_FRAGMENT
, SHADER_PROG
},
1285 [0xc][1] = { MESA_SHADER_FRAGMENT
, SHADER_CONST
},
1286 [0xc][2] = { MESA_SHADER_FRAGMENT
, UBO
},
1288 [0xd][0] = { MESA_SHADER_COMPUTE
, SHADER_PROG
},
1289 [0xd][1] = { MESA_SHADER_COMPUTE
, SHADER_CONST
},
1290 [0xd][2] = { MESA_SHADER_COMPUTE
, UBO
},
1291 [0xd][3] = { MESA_SHADER_COMPUTE
, SSBO_0
}, /* a6xx location */
1292 // SB4_SSBO (shared across all stages)
1293 [0xe][0] = { 0, SSBO_0
}, /* a5xx (and a4xx?) location */
1294 [0xe][1] = { 0, SSBO_1
},
1295 [0xe][2] = { 0, SSBO_2
},
1297 [0xf][0] = { MESA_SHADER_COMPUTE
, SSBO_0
},
1298 [0xf][1] = { MESA_SHADER_COMPUTE
, SSBO_1
},
1299 [0xf][2] = { MESA_SHADER_COMPUTE
, SSBO_2
},
1301 /* This looks like combined UBO state for 3d stages (a5xx and
1302 * before?? I think a6xx has UBO state per shader stage:
1304 [0x6][2] = { 0, UBO
},
1305 [0x7][1] = { 0, UNKNOWN_2DWORDS
},
1308 *stage
= lookup
[state_block_id
][state_type
].stage
;
1309 *state
= lookup
[state_block_id
][state_type
].state
;
1313 a4xx_get_state_type(uint32_t *dwords
, gl_shader_stage
*stage
, enum state_t
*state
,
1314 enum state_src_t
*src
)
1316 unsigned state_block_id
= (dwords
[0] >> 18) & 0xf;
1317 unsigned state_type
= dwords
[1] & 0x3;
1318 _get_state_type(state_block_id
, state_type
, stage
, state
);
1319 *src
= _get_state_src(dwords
[0]);
1323 a6xx_get_state_type(uint32_t *dwords
, gl_shader_stage
*stage
, enum state_t
*state
,
1324 enum state_src_t
*src
)
1326 unsigned state_block_id
= (dwords
[0] >> 18) & 0xf;
1327 unsigned state_type
= (dwords
[0] >> 14) & 0x3;
1328 _get_state_type(state_block_id
, state_type
, stage
, state
);
1329 *src
= _get_state_src(dwords
[0]);
1333 dump_tex_samp(uint32_t *texsamp
, enum state_src_t src
, int num_unit
, int level
)
1335 for (int i
= 0; i
< num_unit
; i
++) {
1336 /* work-around to reduce noise for opencl blob which always
1337 * writes the max # regardless of # of textures used
1339 if ((num_unit
== 16) && (texsamp
[0] == 0) && (texsamp
[1] == 0))
1342 if ((300 <= options
->gpu_id
) && (options
->gpu_id
< 400)) {
1343 dump_domain(texsamp
, 2, level
+2, "A3XX_TEX_SAMP");
1344 dump_hex(texsamp
, 2, level
+1);
1346 } else if ((400 <= options
->gpu_id
) && (options
->gpu_id
< 500)) {
1347 dump_domain(texsamp
, 2, level
+2, "A4XX_TEX_SAMP");
1348 dump_hex(texsamp
, 2, level
+1);
1350 } else if ((500 <= options
->gpu_id
) && (options
->gpu_id
< 600)) {
1351 dump_domain(texsamp
, 4, level
+2, "A5XX_TEX_SAMP");
1352 dump_hex(texsamp
, 4, level
+1);
1354 } else if ((600 <= options
->gpu_id
) && (options
->gpu_id
< 700)) {
1355 dump_domain(texsamp
, 4, level
+2, "A6XX_TEX_SAMP");
1356 dump_hex(texsamp
, 4, level
+1);
1357 texsamp
+= src
== STATE_SRC_BINDLESS
? 16 : 4;
1363 dump_tex_const(uint32_t *texconst
, int num_unit
, int level
)
1365 for (int i
= 0; i
< num_unit
; i
++) {
1366 /* work-around to reduce noise for opencl blob which always
1367 * writes the max # regardless of # of textures used
1369 if ((num_unit
== 16) &&
1370 (texconst
[0] == 0) && (texconst
[1] == 0) &&
1371 (texconst
[2] == 0) && (texconst
[3] == 0))
1374 if ((300 <= options
->gpu_id
) && (options
->gpu_id
< 400)) {
1375 dump_domain(texconst
, 4, level
+2, "A3XX_TEX_CONST");
1376 dump_hex(texconst
, 4, level
+1);
1378 } else if ((400 <= options
->gpu_id
) && (options
->gpu_id
< 500)) {
1379 dump_domain(texconst
, 8, level
+2, "A4XX_TEX_CONST");
1380 if (options
->dump_textures
) {
1381 uint32_t addr
= texconst
[4] & ~0x1f;
1382 dump_gpuaddr(addr
, level
-2);
1384 dump_hex(texconst
, 8, level
+1);
1386 } else if ((500 <= options
->gpu_id
) && (options
->gpu_id
< 600)) {
1387 dump_domain(texconst
, 12, level
+2, "A5XX_TEX_CONST");
1388 if (options
->dump_textures
) {
1389 uint64_t addr
= (((uint64_t)texconst
[5] & 0x1ffff) << 32) | texconst
[4];
1390 dump_gpuaddr_size(addr
, level
-2, hostlen(addr
) / 4, 3);
1392 dump_hex(texconst
, 12, level
+1);
1394 } else if ((600 <= options
->gpu_id
) && (options
->gpu_id
< 700)) {
1395 dump_domain(texconst
, 16, level
+2, "A6XX_TEX_CONST");
1396 if (options
->dump_textures
) {
1397 uint64_t addr
= (((uint64_t)texconst
[5] & 0x1ffff) << 32) | texconst
[4];
1398 dump_gpuaddr_size(addr
, level
-2, hostlen(addr
) / 4, 3);
1400 dump_hex(texconst
, 16, level
+1);
1407 cp_load_state(uint32_t *dwords
, uint32_t sizedwords
, int level
)
1409 gl_shader_stage stage
;
1411 enum state_src_t src
;
1412 uint32_t num_unit
= (dwords
[0] >> 22) & 0x1ff;
1413 uint64_t ext_src_addr
;
1417 if (quiet(2) && !options
->script
)
1420 if (options
->gpu_id
>= 600)
1421 a6xx_get_state_type(dwords
, &stage
, &state
, &src
);
1422 else if (options
->gpu_id
>= 400)
1423 a4xx_get_state_type(dwords
, &stage
, &state
, &src
);
1425 a3xx_get_state_type(dwords
, &stage
, &state
, &src
);
1428 case STATE_SRC_DIRECT
: ext_src_addr
= 0; break;
1429 case STATE_SRC_INDIRECT
:
1431 ext_src_addr
= dwords
[1] & 0xfffffffc;
1432 ext_src_addr
|= ((uint64_t)dwords
[2]) << 32;
1434 ext_src_addr
= dwords
[1] & 0xfffffffc;
1438 case STATE_SRC_BINDLESS
: {
1439 const unsigned base_reg
=
1440 stage
== MESA_SHADER_COMPUTE
?
1441 regbase("HLSQ_CS_BINDLESS_BASE[0]") :
1442 regbase("HLSQ_BINDLESS_BASE[0]");
1445 const unsigned reg
= base_reg
+ (dwords
[1] >> 28) * 2;
1446 ext_src_addr
= reg_val(reg
) & 0xfffffffc;
1447 ext_src_addr
|= ((uint64_t)reg_val(reg
+ 1)) << 32;
1449 const unsigned reg
= base_reg
+ (dwords
[1] >> 28);
1450 ext_src_addr
= reg_val(reg
) & 0xfffffffc;
1453 ext_src_addr
+= 4 * (dwords
[1] & 0xffffff);
1459 contents
= hostptr(ext_src_addr
);
1461 contents
= is_64b() ? dwords
+ 3 : dwords
+ 2;
1468 const char *ext
= NULL
;
1473 if (options
->gpu_id
>= 400)
1475 else if (options
->gpu_id
>= 300)
1480 * note: num_unit seems to be # of instruction groups, where
1481 * an instruction group has 4 64bit instructions.
1483 if (stage
== MESA_SHADER_VERTEX
) {
1485 } else if (stage
== MESA_SHADER_GEOMETRY
) {
1487 } else if (stage
== MESA_SHADER_COMPUTE
) {
1489 } else if (stage
== MESA_SHADER_FRAGMENT
){
1494 try_disasm_a3xx(contents
, num_unit
* 2, level
+2, stdout
, options
->gpu_id
);
1496 /* dump raw shader: */
1498 dump_shader(ext
, contents
, num_unit
* 2 * 4);
1502 case SHADER_CONST
: {
1508 * note: num_unit seems to be # of pairs of dwords??
1511 if (options
->gpu_id
>= 400)
1514 dump_float(contents
, num_unit
*2, level
+1);
1515 dump_hex(contents
, num_unit
*2, level
+1);
1520 uint32_t *addrs
= contents
;
1525 /* mipmap consts block just appears to be array of num_unit gpu addr's: */
1526 for (i
= 0; i
< num_unit
; i
++) {
1527 void *ptr
= hostptr(addrs
[i
]);
1528 printf("%s%2d: %08x\n", levels
[level
+1], i
, addrs
[i
]);
1529 if (options
->dump_textures
) {
1530 printf("base=%08x\n", (uint32_t)gpubaseaddr(addrs
[i
]));
1531 dump_hex(ptr
, hostlen(addrs
[i
])/4, level
+1);
1537 dump_tex_samp(contents
, src
, num_unit
, level
);
1541 dump_tex_const(contents
, num_unit
, level
);
1545 uint32_t *ssboconst
= (uint32_t *)contents
;
1547 for (i
= 0; i
< num_unit
; i
++) {
1549 if (400 <= options
->gpu_id
&& options
->gpu_id
< 500) {
1550 dump_domain(ssboconst
, 4, level
+2, "A4XX_SSBO_0");
1551 } else if (500 <= options
->gpu_id
&& options
->gpu_id
< 600) {
1552 dump_domain(ssboconst
, 4, level
+2, "A5XX_SSBO_0");
1553 } else if (600 <= options
->gpu_id
&& options
->gpu_id
< 700) {
1555 dump_domain(ssboconst
, 16, level
+2, "A6XX_IBO");
1557 dump_hex(ssboconst
, sz
, level
+1);
1563 uint32_t *ssboconst
= (uint32_t *)contents
;
1565 for (i
= 0; i
< num_unit
; i
++) {
1566 if (400 <= options
->gpu_id
&& options
->gpu_id
< 500)
1567 dump_domain(ssboconst
, 2, level
+2, "A4XX_SSBO_1");
1568 else if (500 <= options
->gpu_id
&& options
->gpu_id
< 600)
1569 dump_domain(ssboconst
, 2, level
+2, "A5XX_SSBO_1");
1570 dump_hex(ssboconst
, 2, level
+1);
1576 uint32_t *ssboconst
= (uint32_t *)contents
;
1578 for (i
= 0; i
< num_unit
; i
++) {
1579 /* TODO a4xx and a5xx might be same: */
1580 if ((500 <= options
->gpu_id
) && (options
->gpu_id
< 600)) {
1581 dump_domain(ssboconst
, 2, level
+2, "A5XX_SSBO_2");
1582 dump_hex(ssboconst
, 2, level
+1);
1584 if (options
->dump_textures
) {
1585 uint64_t addr
= (((uint64_t)ssboconst
[1] & 0x1ffff) << 32) | ssboconst
[0];
1586 dump_gpuaddr_size(addr
, level
-2, hostlen(addr
) / 4, 3);
1593 uint32_t *uboconst
= (uint32_t *)contents
;
1595 for (i
= 0; i
< num_unit
; i
++) {
1596 // TODO probably similar on a4xx..
1597 if (500 <= options
->gpu_id
&& options
->gpu_id
< 600)
1598 dump_domain(uboconst
, 2, level
+2, "A5XX_UBO");
1599 else if (600 <= options
->gpu_id
&& options
->gpu_id
< 700)
1600 dump_domain(uboconst
, 2, level
+2, "A6XX_UBO");
1601 dump_hex(uboconst
, 2, level
+1);
1602 uboconst
+= src
== STATE_SRC_BINDLESS
? 16 : 2;
1606 case UNKNOWN_DWORDS
: {
1609 dump_hex(contents
, num_unit
, level
+1);
1612 case UNKNOWN_2DWORDS
: {
1615 dump_hex(contents
, num_unit
* 2, level
+1);
1618 case UNKNOWN_4DWORDS
: {
1621 dump_hex(contents
, num_unit
* 4, level
+1);
1628 dump_hex(contents
, num_unit
, level
+1);
1634 cp_set_bin(uint32_t *dwords
, uint32_t sizedwords
, int level
)
1636 bin_x1
= dwords
[1] & 0xffff;
1637 bin_y1
= dwords
[1] >> 16;
1638 bin_x2
= dwords
[2] & 0xffff;
1639 bin_y2
= dwords
[2] >> 16;
1643 dump_a2xx_tex_const(uint32_t *dwords
, uint32_t sizedwords
, uint32_t val
, int level
)
1646 uint32_t gpuaddr
, flags
, mip_gpuaddr
, mip_flags
;
1647 uint32_t min
, mag
, swiz
, clamp_x
, clamp_y
, clamp_z
;
1648 static const char *filter
[] = {
1649 "point", "bilinear", "bicubic",
1651 static const char *clamp
[] = {
1652 "wrap", "mirror", "clamp-last-texel",
1654 static const char swiznames
[] = "xyzw01??";
1656 /* see sys2gmem_tex_const[] in adreno_a2xxx.c */
1658 /* Texture, FormatXYZW=Unsigned, ClampXYZ=Wrap/Repeat,
1659 * RFMode=ZeroClamp-1, Dim=1:2d, pitch
1661 p
= (dwords
[0] >> 22) << 5;
1662 clamp_x
= (dwords
[0] >> 10) & 0x3;
1663 clamp_y
= (dwords
[0] >> 13) & 0x3;
1664 clamp_z
= (dwords
[0] >> 16) & 0x3;
1666 /* Format=6:8888_WZYX, EndianSwap=0:None, ReqSize=0:256bit, DimHi=0,
1667 * NearestClamp=1:OGL Mode
1669 parse_dword_addr(dwords
[1], &gpuaddr
, &flags
, 0xfff);
1671 /* Width, Height, EndianSwap=0:None */
1672 w
= (dwords
[2] & 0x1fff) + 1;
1673 h
= ((dwords
[2] >> 13) & 0x1fff) + 1;
1675 /* NumFormat=0:RF, DstSelXYZW=XYZW, ExpAdj=0, MagFilt=MinFilt=0:Point,
1678 mag
= (dwords
[3] >> 19) & 0x3;
1679 min
= (dwords
[3] >> 21) & 0x3;
1680 swiz
= (dwords
[3] >> 1) & 0xfff;
1682 /* VolMag=VolMin=0:Point, MinMipLvl=0, MaxMipLvl=1, LodBiasH=V=0,
1687 /* BorderColor=0:ABGRBlack, ForceBC=0:diable, TriJuice=0, Aniso=0,
1688 * Dim=1:2d, MipPacking=0
1690 parse_dword_addr(dwords
[5], &mip_gpuaddr
, &mip_flags
, 0xfff);
1692 printf("%sset texture const %04x\n", levels
[level
], val
);
1693 printf("%sclamp x/y/z: %s/%s/%s\n", levels
[level
+1],
1694 clamp
[clamp_x
], clamp
[clamp_y
], clamp
[clamp_z
]);
1695 printf("%sfilter min/mag: %s/%s\n", levels
[level
+1], filter
[min
], filter
[mag
]);
1696 printf("%sswizzle: %c%c%c%c\n", levels
[level
+1],
1697 swiznames
[(swiz
>> 0) & 0x7], swiznames
[(swiz
>> 3) & 0x7],
1698 swiznames
[(swiz
>> 6) & 0x7], swiznames
[(swiz
>> 9) & 0x7]);
1699 printf("%saddr=%08x (flags=%03x), size=%dx%d, pitch=%d, format=%s\n",
1700 levels
[level
+1], gpuaddr
, flags
, w
, h
, p
,
1701 rnn_enumname(rnn
, "a2xx_sq_surfaceformat", flags
& 0xf));
1702 printf("%smipaddr=%08x (flags=%03x)\n", levels
[level
+1],
1703 mip_gpuaddr
, mip_flags
);
1707 dump_a2xx_shader_const(uint32_t *dwords
, uint32_t sizedwords
, uint32_t val
, int level
)
1710 printf("%sset shader const %04x\n", levels
[level
], val
);
1711 for (i
= 0; i
< sizedwords
; ) {
1712 uint32_t gpuaddr
, flags
;
1713 parse_dword_addr(dwords
[i
++], &gpuaddr
, &flags
, 0xf);
1714 void *addr
= hostptr(gpuaddr
);
1717 rnn_enumname(rnn
, "a2xx_sq_surfaceformat", flags
& 0xf);
1718 uint32_t size
= dwords
[i
++];
1719 printf("%saddr=%08x, size=%d, format=%s\n", levels
[level
+1],
1720 gpuaddr
, size
, fmt
);
1721 // TODO maybe dump these as bytes instead of dwords?
1722 size
= (size
+ 3) / 4; // for now convert to dwords
1723 dump_hex(addr
, min(size
, 64), level
+ 1);
1724 if (size
> min(size
, 64))
1725 printf("%s\t\t...\n", levels
[level
+1]);
1726 dump_float(addr
, min(size
, 64), level
+ 1);
1727 if (size
> min(size
, 64))
1728 printf("%s\t\t...\n", levels
[level
+1]);
1734 cp_set_const(uint32_t *dwords
, uint32_t sizedwords
, int level
)
1736 uint32_t val
= dwords
[0] & 0xffff;
1737 switch((dwords
[0] >> 16) & 0xf) {
1739 dump_float((float *)(dwords
+1), sizedwords
-1, level
+1);
1742 /* need to figure out how const space is partitioned between
1743 * attributes, textures, etc..
1746 dump_a2xx_tex_const(dwords
+1, sizedwords
-1, val
, level
);
1748 dump_a2xx_shader_const(dwords
+1, sizedwords
-1, val
, level
);
1752 printf("%sset bool const %04x\n", levels
[level
], val
);
1755 printf("%sset loop const %04x\n", levels
[level
], val
);
1759 if (dwords
[0] & 0x80000000) {
1760 uint32_t srcreg
= dwords
[1];
1761 uint32_t dstval
= dwords
[2];
1763 /* TODO: not sure what happens w/ payload != 2.. */
1764 assert(sizedwords
== 3);
1765 assert(srcreg
< ARRAY_SIZE(type0_reg_vals
));
1767 /* note: rnn_regname uses a static buf so we can't do
1768 * two regname() calls for one printf..
1770 printf("%s%s = %08x + ", levels
[level
], regname(val
, 1), dstval
);
1771 printf("%s (%08x)\n", regname(srcreg
, 1), type0_reg_vals
[srcreg
]);
1773 dstval
+= type0_reg_vals
[srcreg
];
1775 dump_registers(val
, &dstval
, 1, level
+1);
1777 dump_registers(val
, dwords
+1, sizedwords
-1, level
+1);
1783 static void dump_register_summary(int level
);
1786 cp_event_write(uint32_t *dwords
, uint32_t sizedwords
, int level
)
1788 const char *name
= rnn_enumname(rnn
, "vgt_event_type", dwords
[0]);
1789 printl(2, "%sevent %s\n", levels
[level
], name
);
1791 if (name
&& (options
->gpu_id
> 500)) {
1793 snprintf(eventname
, sizeof(eventname
), "EVENT:%s", name
);
1794 if (!strcmp(name
, "BLIT")) {
1795 do_query(eventname
, 0);
1797 dump_register_summary(level
);
1803 dump_register_summary(int level
)
1806 bool saved_summary
= summary
;
1811 /* dump current state of registers: */
1812 printl(2, "%sdraw[%i] register values\n", levels
[level
], draw_count
);
1813 for (i
= 0; i
< regcnt(); i
++) {
1814 uint32_t regbase
= i
;
1815 uint32_t lastval
= reg_val(regbase
);
1816 /* skip registers that haven't been updated since last draw/blit: */
1817 if (!(options
->allregs
|| reg_rewritten(regbase
)))
1819 if (!reg_written(regbase
))
1821 if (lastval
!= lastvals
[regbase
]) {
1823 lastvals
[regbase
] = lastval
;
1827 if (reg_rewritten(regbase
)) {
1832 printl(2, "\t%08x", lastval
);
1834 dump_register(regbase
, lastval
, level
);
1843 summary
= saved_summary
;
1847 draw_indx_common(uint32_t *dwords
, int level
)
1849 uint32_t prim_type
= dwords
[1] & 0x1f;
1850 uint32_t source_select
= (dwords
[1] >> 6) & 0x3;
1851 uint32_t num_indices
= dwords
[2];
1852 const char *primtype
;
1854 primtype
= rnn_enumname(rnn
, "pc_di_primtype", prim_type
);
1856 do_query(primtype
, num_indices
);
1858 printl(2, "%sdraw: %d\n", levels
[level
], draws
[ib
]);
1859 printl(2, "%sprim_type: %s (%d)\n", levels
[level
], primtype
,
1861 printl(2, "%ssource_select: %s (%d)\n", levels
[level
],
1862 rnn_enumname(rnn
, "pc_di_src_sel", source_select
),
1864 printl(2, "%snum_indices: %d\n", levels
[level
], num_indices
);
1866 vertices
+= num_indices
;
1873 enum pc_di_index_size
{
1875 INDEX_SIZE_16_BIT
= 0,
1876 INDEX_SIZE_32_BIT
= 1,
1877 INDEX_SIZE_8_BIT
= 2,
1878 INDEX_SIZE_INVALID
= 0,
1882 cp_draw_indx(uint32_t *dwords
, uint32_t sizedwords
, int level
)
1884 uint32_t num_indices
= draw_indx_common(dwords
, level
);
1888 /* if we have an index buffer, dump that: */
1889 if (sizedwords
== 5) {
1890 void *ptr
= hostptr(dwords
[3]);
1891 printl(2, "%sgpuaddr: %08x\n", levels
[level
], dwords
[3]);
1892 printl(2, "%sidx_size: %d\n", levels
[level
], dwords
[4]);
1894 enum pc_di_index_size size
=
1895 ((dwords
[1] >> 11) & 1) | ((dwords
[1] >> 12) & 2);
1898 printf("%sidxs: ", levels
[level
]);
1899 if (size
== INDEX_SIZE_8_BIT
) {
1901 for (i
= 0; i
< dwords
[4]; i
++)
1902 printf(" %u", idx
[i
]);
1903 } else if (size
== INDEX_SIZE_16_BIT
) {
1904 uint16_t *idx
= ptr
;
1905 for (i
= 0; i
< dwords
[4]/2; i
++)
1906 printf(" %u", idx
[i
]);
1907 } else if (size
== INDEX_SIZE_32_BIT
) {
1908 uint32_t *idx
= ptr
;
1909 for (i
= 0; i
< dwords
[4]/4; i
++)
1910 printf(" %u", idx
[i
]);
1913 dump_hex(ptr
, dwords
[4]/4, level
+1);
1918 /* don't bother dumping registers for the dummy draw_indx's.. */
1919 if (num_indices
> 0)
1920 dump_register_summary(level
);
1926 cp_draw_indx_2(uint32_t *dwords
, uint32_t sizedwords
, int level
)
1928 uint32_t num_indices
= draw_indx_common(dwords
, level
);
1929 enum pc_di_index_size size
=
1930 ((dwords
[1] >> 11) & 1) | ((dwords
[1] >> 12) & 2);
1931 void *ptr
= &dwords
[3];
1936 /* CP_DRAW_INDX_2 has embedded/inline idx buffer: */
1939 printf("%sidxs: ", levels
[level
]);
1940 if (size
== INDEX_SIZE_8_BIT
) {
1942 for (i
= 0; i
< num_indices
; i
++)
1943 printf(" %u", idx
[i
]);
1945 } else if (size
== INDEX_SIZE_16_BIT
) {
1946 uint16_t *idx
= ptr
;
1947 for (i
= 0; i
< num_indices
; i
++)
1948 printf(" %u", idx
[i
]);
1949 sz
= num_indices
* 2;
1950 } else if (size
== INDEX_SIZE_32_BIT
) {
1951 uint32_t *idx
= ptr
;
1952 for (i
= 0; i
< num_indices
; i
++)
1953 printf(" %u", idx
[i
]);
1954 sz
= num_indices
* 4;
1957 dump_hex(ptr
, sz
/ 4, level
+1);
1960 /* don't bother dumping registers for the dummy draw_indx's.. */
1961 if (num_indices
> 0)
1962 dump_register_summary(level
);
1966 cp_draw_indx_offset(uint32_t *dwords
, uint32_t sizedwords
, int level
)
1968 uint32_t num_indices
= dwords
[2];
1969 uint32_t prim_type
= dwords
[0] & 0x1f;
1971 do_query(rnn_enumname(rnn
, "pc_di_primtype", prim_type
), num_indices
);
1974 /* don't bother dumping registers for the dummy draw_indx's.. */
1975 if (num_indices
> 0)
1976 dump_register_summary(level
);
1980 cp_draw_indx_indirect(uint32_t *dwords
, uint32_t sizedwords
, int level
)
1982 uint32_t prim_type
= dwords
[0] & 0x1f;
1985 do_query(rnn_enumname(rnn
, "pc_di_primtype", prim_type
), 0);
1989 addr
= (((uint64_t)dwords
[2] & 0x1ffff) << 32) | dwords
[1];
1992 dump_gpuaddr_size(addr
, level
, 0x10, 2);
1995 addr
= (((uint64_t)dwords
[5] & 0x1ffff) << 32) | dwords
[4];
1998 dump_gpuaddr_size(addr
, level
, 0x10, 2);
2000 dump_register_summary(level
);
2004 cp_draw_indirect(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2006 uint32_t prim_type
= dwords
[0] & 0x1f;
2009 do_query(rnn_enumname(rnn
, "pc_di_primtype", prim_type
), 0);
2012 addr
= (((uint64_t)dwords
[2] & 0x1ffff) << 32) | dwords
[1];
2013 dump_gpuaddr_size(addr
, level
, 0x10, 2);
2015 dump_register_summary(level
);
2019 cp_draw_indirect_multi(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2021 uint32_t prim_type
= dwords
[0] & 0x1f;
2022 uint32_t count
= dwords
[2];
2024 do_query(rnn_enumname(rnn
, "pc_di_primtype", prim_type
), 0);
2027 struct rnndomain
*domain
= rnn_finddomain(rnn
->db
, "CP_DRAW_INDIRECT_MULTI");
2028 uint32_t count_dword
= rnndec_decodereg(rnn
->vc
, domain
, "INDIRECT_COUNT");
2029 uint32_t addr_dword
= rnndec_decodereg(rnn
->vc
, domain
, "INDIRECT");
2030 uint64_t stride_dword
= rnndec_decodereg(rnn
->vc
, domain
, "STRIDE");
2033 uint64_t count_addr
= ((uint64_t)dwords
[count_dword
+ 1] << 32) | dwords
[count_dword
];
2034 uint32_t *buf
= hostptr(count_addr
);
2036 /* Don't print more draws than this if we don't know the indirect
2037 * count. It's possible the user will give ~0 or some other large
2038 * value, expecting the GPU to fill in the draw count, and we don't
2039 * want to print a gazillion draws in that case:
2041 const uint32_t max_draw_count
= 0x100;
2043 /* Assume the indirect count is garbage if it's larger than this
2044 * (quite large) value or 0. Hopefully this catches most cases.
2046 const uint32_t max_indirect_draw_count
= 0x10000;
2049 printf("%sindirect count: %u\n", levels
[level
], *buf
);
2050 if (*buf
== 0 || *buf
> max_indirect_draw_count
) {
2052 count
= min(count
, max_draw_count
);
2055 count
= min(count
, *buf
);
2058 count
= min(count
, max_draw_count
);
2062 if (addr_dword
&& stride_dword
) {
2063 uint64_t addr
= ((uint64_t)dwords
[addr_dword
+ 1] << 32) | dwords
[addr_dword
];
2064 uint32_t stride
= dwords
[stride_dword
];
2066 for (unsigned i
= 0; i
< count
; i
++, addr
+= stride
) {
2067 printf("%sdraw %d:\n", levels
[level
], i
);
2068 dump_gpuaddr_size(addr
, level
, 0x10, 2);
2072 dump_register_summary(level
);
2076 cp_run_cl(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2078 do_query("COMPUTE", 1);
2079 dump_register_summary(level
);
2083 cp_nop(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2085 const char *buf
= (void *)dwords
;
2091 // blob doesn't use CP_NOP for string_marker but it does
2092 // use it for things that end up looking like, but aren't
2094 if (!options
->decode_markers
)
2097 for (i
= 0; i
< 4 * sizedwords
; i
++) {
2100 if (isascii(buf
[i
]))
2101 printf("%c", buf
[i
]);
2107 cp_indirect(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2109 /* traverse indirect buffers */
2112 uint32_t *ptr
= NULL
;
2115 /* a5xx+.. high 32b of gpu addr, then size: */
2117 ibaddr
|= ((uint64_t)dwords
[1]) << 32;
2126 printf("%sibaddr:%016"PRIx64
"\n", levels
[level
], ibaddr
);
2128 printf("%sibaddr:%08x\n", levels
[level
], (uint32_t)ibaddr
);
2130 printf("%sibsize:%08x\n", levels
[level
], ibsize
);
2133 if (options
->once
&& has_dumped(ibaddr
, enable_mask
))
2136 /* 'query-compare' mode implies 'once' mode, although we need only to
2137 * process the cmdstream for *any* enable_mask mode, since we are
2138 * comparing binning vs draw reg values at the same time, ie. it is
2139 * not useful to process the same draw in both binning and draw pass.
2141 if (options
->query_compare
&& has_dumped(ibaddr
, MODE_ALL
))
2144 /* map gpuaddr back to hostptr: */
2145 ptr
= hostptr(ibaddr
);
2148 /* If the GPU hung within the target IB, the trigger point will be
2149 * just after the current CP_INDIRECT_BUFFER. Because the IB is
2150 * executed but never returns. Account for this by checking if
2153 highlight_gpuaddr(gpuaddr(&dwords
[is_64b() ? 3 : 2]));
2156 ibs
[ib
].base
= ibaddr
;
2157 ibs
[ib
].size
= ibsize
;
2159 dump_commands(ptr
, ibsize
, level
);
2162 fprintf(stderr
, "could not find: %016"PRIx64
" (%d)\n", ibaddr
, ibsize
);
2167 cp_wfi(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2173 cp_mem_write(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2179 uint64_t gpuaddr
= dwords
[0] | (((uint64_t)dwords
[1]) << 32);
2180 printf("%sgpuaddr:%016"PRIx64
"\n", levels
[level
], gpuaddr
);
2181 dump_hex(&dwords
[2], sizedwords
-2, level
+1);
2183 if (pkt_is_type4(dwords
[2]) || pkt_is_type7(dwords
[2]))
2184 dump_commands(&dwords
[2], sizedwords
-2, level
+1);
2186 uint32_t gpuaddr
= dwords
[0];
2187 printf("%sgpuaddr:%08x\n", levels
[level
], gpuaddr
);
2188 dump_float((float *)&dwords
[1], sizedwords
-1, level
+1);
2193 cp_rmw(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2195 uint32_t val
= dwords
[0] & 0xffff;
2196 uint32_t and = dwords
[1];
2197 uint32_t or = dwords
[2];
2198 printl(3, "%srmw (%s & 0x%08x) | 0x%08x)\n", levels
[level
], regname(val
, 1), and, or);
2200 printl(2, "NEEDS WFI: rmw (%s & 0x%08x) | 0x%08x)\n", regname(val
, 1), and, or);
2201 reg_set(val
, (reg_val(val
) & and) | or);
2205 cp_reg_mem(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2207 uint32_t val
= dwords
[0] & 0xffff;
2208 printl(3, "%sbase register: %s\n", levels
[level
], regname(val
, 1));
2213 uint64_t gpuaddr
= dwords
[1] | (((uint64_t)dwords
[2]) << 32);
2214 printf("%sgpuaddr:%016"PRIx64
"\n", levels
[level
], gpuaddr
);
2215 void *ptr
= hostptr(gpuaddr
);
2217 uint32_t cnt
= (dwords
[0] >> 19) & 0x3ff;
2218 dump_hex(ptr
, cnt
, level
+ 1);
2223 uint16_t enable_mask
;
2229 struct draw_state state
[32];
2231 #define FLAG_DIRTY 0x1
2232 #define FLAG_DISABLE 0x2
2233 #define FLAG_DISABLE_ALL_GROUPS 0x4
2234 #define FLAG_LOAD_IMMED 0x8
2236 static int draw_mode
;
2239 disable_group(unsigned group_id
)
2241 struct draw_state
*ds
= &state
[group_id
];
2242 memset(ds
, 0, sizeof(*ds
));
2246 disable_all_groups(void)
2248 for (unsigned i
= 0; i
< ARRAY_SIZE(state
); i
++)
2253 load_group(unsigned group_id
, int level
)
2255 struct draw_state
*ds
= &state
[group_id
];
2260 printl(2, "%sgroup_id: %u\n", levels
[level
], group_id
);
2261 printl(2, "%scount: %d\n", levels
[level
], ds
->count
);
2262 printl(2, "%saddr: %016llx\n", levels
[level
], ds
->addr
);
2263 printl(2, "%sflags: %x\n", levels
[level
], ds
->flags
);
2265 if (options
->gpu_id
>= 600) {
2266 printl(2, "%senable_mask: 0x%x\n", levels
[level
], ds
->enable_mask
);
2268 if (!(ds
->enable_mask
& enable_mask
)) {
2269 printl(2, "%s\tskipped!\n\n", levels
[level
]);
2274 void *ptr
= hostptr(ds
->addr
);
2277 dump_hex(ptr
, ds
->count
, level
+1);
2280 dump_commands(ptr
, ds
->count
, level
+1);
2286 load_all_groups(int level
)
2288 /* sanity check, we should never recursively hit recursion here, and if
2289 * we do bad things happen:
2291 static bool loading_groups
= false;
2292 if (loading_groups
) {
2293 printf("ERROR: nothing in draw state should trigger recursively loading groups!\n");
2296 loading_groups
= true;
2297 for (unsigned i
= 0; i
< ARRAY_SIZE(state
); i
++)
2298 load_group(i
, level
);
2299 loading_groups
= false;
2301 /* in 'query-compare' mode, defer disabling all groups until we have a
2302 * chance to process the query:
2304 if (!options
->query_compare
)
2305 disable_all_groups();
2309 cp_set_draw_state(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2313 for (i
= 0; i
< sizedwords
; ) {
2314 struct draw_state
*ds
;
2315 uint32_t count
= dwords
[i
] & 0xffff;
2316 uint32_t group_id
= (dwords
[i
] >> 24) & 0x1f;
2317 uint32_t enable_mask
= (dwords
[i
] >> 20) & 0xf;
2318 uint32_t flags
= (dwords
[i
] >> 16) & 0xf;
2322 addr
= dwords
[i
+ 1];
2323 addr
|= ((uint64_t)dwords
[i
+ 2]) << 32;
2326 addr
= dwords
[i
+ 1];
2330 if (flags
& FLAG_DISABLE_ALL_GROUPS
) {
2331 disable_all_groups();
2335 if (flags
& FLAG_DISABLE
) {
2336 disable_group(group_id
);
2340 assert(group_id
< ARRAY_SIZE(state
));
2341 disable_group(group_id
);
2343 ds
= &state
[group_id
];
2345 ds
->enable_mask
= enable_mask
;
2350 if (flags
& FLAG_LOAD_IMMED
) {
2351 load_group(group_id
, level
);
2352 disable_group(group_id
);
2358 cp_set_mode(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2360 draw_mode
= dwords
[0];
2363 /* execute compute shader */
2365 cp_exec_cs(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2367 do_query("compute", 0);
2368 dump_register_summary(level
);
2372 cp_exec_cs_indirect(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2377 addr
= (((uint64_t)dwords
[2] & 0x1ffff) << 32) | dwords
[1];
2382 printl(3, "%saddr: %016llx\n", levels
[level
], addr
);
2383 dump_gpuaddr_size(addr
, level
, 0x10, 2);
2385 do_query("compute", 0);
2386 dump_register_summary(level
);
2390 cp_set_marker(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2392 render_mode
= rnn_enumname(rnn
, "a6xx_render_mode", dwords
[0] & 0xf);
2394 if (!strcmp(render_mode
, "RM6_BINNING")) {
2395 enable_mask
= MODE_BINNING
;
2396 } else if (!strcmp(render_mode
, "RM6_GMEM")) {
2397 enable_mask
= MODE_GMEM
;
2398 } else if (!strcmp(render_mode
, "RM6_BYPASS")) {
2399 enable_mask
= MODE_BYPASS
;
2404 cp_set_render_mode(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2411 /* TODO seems to have two ptrs, 9 dwords total (incl pkt7 hdr)..
2412 * not sure if this can come in different sizes.
2414 * First ptr doesn't seem to be cmdstream, second one does.
2416 * Comment from downstream kernel:
2418 * SRM -- set render mode (ex binning, direct render etc)
2419 * SRM is set by UMD usually at start of IB to tell CP the type of
2421 * KMD needs to set SRM to NULL to indicate CP that rendering is
2423 * ------------------------------------------------------------------
2425 * Seems to always be one of these two:
2426 * 70ec0008 00000001 001c0000 00000000 00000010 00000003 0000000d 001c2000 00000000
2427 * 70ec0008 00000001 001c0000 00000000 00000000 00000003 0000000d 001c2000 00000000
2431 assert(options
->gpu_id
>= 500);
2433 render_mode
= rnn_enumname(rnn
, "render_mode_cmd", dwords
[0]);
2435 if (sizedwords
== 1)
2439 addr
|= ((uint64_t)dwords
[2]) << 32;
2443 dump_gpuaddr(addr
, level
+1);
2445 if (sizedwords
== 5)
2448 assert(sizedwords
== 8);
2452 addr
|= ((uint64_t)dwords
[7]) << 32;
2454 printl(3, "%saddr: 0x%016lx\n", levels
[level
], addr
);
2455 printl(3, "%slen: 0x%x\n", levels
[level
], len
);
2457 ptr
= hostptr(addr
);
2462 dump_commands(ptr
, len
, level
+1);
2464 dump_hex(ptr
, len
, level
+1);
2470 cp_compute_checkpoint(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2476 assert(options
->gpu_id
>= 500);
2478 assert(sizedwords
== 8);
2481 addr
|= ((uint64_t)dwords
[6]) << 32;
2484 printl(3, "%saddr: 0x%016"PRIx64
"\n", levels
[level
], addr
);
2485 printl(3, "%slen: 0x%x\n", levels
[level
], len
);
2487 ptr
= hostptr(addr
);
2492 dump_commands(ptr
, len
, level
+1);
2494 dump_hex(ptr
, len
, level
+1);
2500 cp_blit(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2502 do_query(rnn_enumname(rnn
, "cp_blit_cmd", dwords
[0]), 0);
2504 dump_register_summary(level
);
2508 cp_context_reg_bunch(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2512 /* NOTE: seems to write same reg multiple times.. not sure if different parts of
2513 * these are triggered by the FLUSH_SO_n events?? (if that is what they actually
2516 bool saved_summary
= summary
;
2519 for (i
= 0; i
< sizedwords
; i
+= 2) {
2520 dump_register(dwords
[i
+0], dwords
[i
+1], level
+1);
2521 reg_set(dwords
[i
+0], dwords
[i
+1]);
2524 summary
= saved_summary
;
2528 cp_reg_write(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2530 uint32_t reg
= dwords
[1] & 0xffff;
2532 dump_register(reg
, dwords
[2], level
+1);
2533 reg_set(reg
, dwords
[2]);
2537 cp_set_ctxswitch_ib(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2540 uint32_t size
= dwords
[2] & 0xffff;
2543 addr
= dwords
[0] | ((uint64_t)dwords
[1] << 32);
2545 printf("addr=%"PRIx64
"\n", addr
);
2546 ptr
= hostptr(addr
);
2548 dump_commands(ptr
, size
, level
+1);
2553 cp_skip_ib2_enable_global(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2555 skip_ib2_enable_global
= dwords
[0];
2559 cp_skip_ib2_enable_local(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2561 skip_ib2_enable_local
= dwords
[0];
2564 #define CP(x, fxn, ...) { "CP_" #x, fxn, ##__VA_ARGS__ }
2565 static const struct type3_op
{
2567 void (*fxn
)(uint32_t *dwords
, uint32_t sizedwords
, int level
);
2569 bool load_all_groups
;
2573 CP(INDIRECT_BUFFER
, cp_indirect
),
2574 CP(INDIRECT_BUFFER_PFD
, cp_indirect
),
2575 CP(WAIT_FOR_IDLE
, cp_wfi
),
2576 CP(REG_RMW
, cp_rmw
),
2577 CP(REG_TO_MEM
, cp_reg_mem
),
2578 CP(MEM_TO_REG
, cp_reg_mem
), /* same layout as CP_REG_TO_MEM */
2579 CP(MEM_WRITE
, cp_mem_write
),
2580 CP(EVENT_WRITE
, cp_event_write
),
2581 CP(RUN_OPENCL
, cp_run_cl
),
2582 CP(DRAW_INDX
, cp_draw_indx
, {.load_all_groups
=true}),
2583 CP(DRAW_INDX_2
, cp_draw_indx_2
, {.load_all_groups
=true}),
2584 CP(SET_CONSTANT
, cp_set_const
),
2585 CP(IM_LOAD_IMMEDIATE
, cp_im_loadi
),
2586 CP(WIDE_REG_WRITE
, cp_wide_reg_write
),
2589 CP(LOAD_STATE
, cp_load_state
),
2590 CP(SET_BIN
, cp_set_bin
),
2593 CP(LOAD_STATE4
, cp_load_state
),
2594 CP(SET_DRAW_STATE
, cp_set_draw_state
),
2595 CP(DRAW_INDX_OFFSET
, cp_draw_indx_offset
, {.load_all_groups
=true}),
2596 CP(EXEC_CS
, cp_exec_cs
, {.load_all_groups
=true}),
2597 CP(EXEC_CS_INDIRECT
, cp_exec_cs_indirect
, {.load_all_groups
=true}),
2600 CP(SET_RENDER_MODE
, cp_set_render_mode
),
2601 CP(COMPUTE_CHECKPOINT
, cp_compute_checkpoint
),
2603 CP(CONTEXT_REG_BUNCH
, cp_context_reg_bunch
),
2604 CP(DRAW_INDIRECT
, cp_draw_indirect
, {.load_all_groups
=true}),
2605 CP(DRAW_INDX_INDIRECT
, cp_draw_indx_indirect
, {.load_all_groups
=true}),
2606 CP(DRAW_INDIRECT_MULTI
, cp_draw_indirect_multi
, {.load_all_groups
=true}),
2607 CP(SKIP_IB2_ENABLE_GLOBAL
, cp_skip_ib2_enable_global
),
2608 CP(SKIP_IB2_ENABLE_LOCAL
, cp_skip_ib2_enable_local
),
2611 CP(LOAD_STATE6_GEOM
, cp_load_state
),
2612 CP(LOAD_STATE6_FRAG
, cp_load_state
),
2613 CP(LOAD_STATE6
, cp_load_state
),
2614 CP(SET_MODE
, cp_set_mode
),
2615 CP(SET_MARKER
, cp_set_marker
),
2616 CP(REG_WRITE
, cp_reg_write
),
2618 CP(SET_CTXSWITCH_IB
, cp_set_ctxswitch_ib
),
2622 noop_fxn(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2626 static const struct type3_op
*
2627 get_type3_op(unsigned opc
)
2629 static const struct type3_op dummy_op
= {
2632 const char *name
= pktname(opc
);
2637 for (unsigned i
= 0; i
< ARRAY_SIZE(type3_op
); i
++)
2638 if (!strcmp(name
, type3_op
[i
].name
))
2639 return &type3_op
[i
];
2645 dump_commands(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2647 int dwords_left
= sizedwords
;
2648 uint32_t count
= 0; /* dword count including packet header */
2653 printf("NULL cmd buffer!\n");
2659 while (dwords_left
> 0) {
2661 current_draw_count
= draw_count
;
2663 /* hack, this looks like a -1 underflow, in some versions
2664 * when it tries to write zero registers via pkt0
2666 // if ((dwords[0] >> 16) == 0xffff)
2669 if (pkt_is_type0(dwords
[0])) {
2671 count
= type0_pkt_size(dwords
[0]) + 1;
2672 val
= type0_pkt_offset(dwords
[0]);
2673 assert(val
< regcnt());
2674 printl(3, "%swrite %s%s (%04x)\n", levels
[level
+1], regname(val
, 1),
2675 (dwords
[0] & 0x8000) ? " (same register)" : "", val
);
2676 dump_registers(val
, dwords
+1, count
-1, level
+2);
2678 dump_hex(dwords
, count
, level
+1);
2679 } else if (pkt_is_type4(dwords
[0])) {
2680 /* basically the same(ish) as type0 prior to a5xx */
2682 count
= type4_pkt_size(dwords
[0]) + 1;
2683 val
= type4_pkt_offset(dwords
[0]);
2684 assert(val
< regcnt());
2685 printl(3, "%swrite %s (%04x)\n", levels
[level
+1], regname(val
, 1), val
);
2686 dump_registers(val
, dwords
+1, count
-1, level
+2);
2688 dump_hex(dwords
, count
, level
+1);
2690 } else if (pkt_is_type1(dwords
[0])) {
2693 val
= dwords
[0] & 0xfff;
2694 printl(3, "%swrite %s\n", levels
[level
+1], regname(val
, 1));
2695 dump_registers(val
, dwords
+1, 1, level
+2);
2696 val
= (dwords
[0] >> 12) & 0xfff;
2697 printl(3, "%swrite %s\n", levels
[level
+1], regname(val
, 1));
2698 dump_registers(val
, dwords
+2, 1, level
+2);
2700 dump_hex(dwords
, count
, level
+1);
2701 } else if (pkt_is_type2(dwords
[0])) {
2703 printf("%sNOP\n", levels
[level
+1]);
2706 dump_hex(dwords
, count
, level
+1);
2708 } else if (pkt_is_type3(dwords
[0])) {
2709 count
= type3_pkt_size(dwords
[0]) + 1;
2710 val
= cp_type3_opcode(dwords
[0]);
2711 const struct type3_op
*op
= get_type3_op(val
);
2712 if (op
->options
.load_all_groups
)
2713 load_all_groups(level
+1);
2715 const char *name
= pktname(val
);
2717 printf("\t%sopcode: %s%s%s (%02x) (%d dwords)%s\n", levels
[level
],
2718 rnn
->vc
->colors
->bctarg
, name
, rnn
->vc
->colors
->reset
,
2719 val
, count
, (dwords
[0] & 0x1) ? " (predicated)" : "");
2722 dump_domain(dwords
+1, count
-1, level
+2, name
);
2723 op
->fxn(dwords
+1, count
-1, level
+1);
2725 dump_hex(dwords
, count
, level
+1);
2726 } else if (pkt_is_type7(dwords
[0])) {
2727 count
= type7_pkt_size(dwords
[0]) + 1;
2728 val
= cp_type7_opcode(dwords
[0]);
2729 const struct type3_op
*op
= get_type3_op(val
);
2730 if (op
->options
.load_all_groups
)
2731 load_all_groups(level
+1);
2733 const char *name
= pktname(val
);
2735 printf("\t%sopcode: %s%s%s (%02x) (%d dwords)\n", levels
[level
],
2736 rnn
->vc
->colors
->bctarg
, name
, rnn
->vc
->colors
->reset
,
2740 /* special hack for two packets that decode the same way
2743 if (!strcmp(name
, "CP_LOAD_STATE6_FRAG") ||
2744 !strcmp(name
, "CP_LOAD_STATE6_GEOM"))
2745 name
= "CP_LOAD_STATE6";
2746 dump_domain(dwords
+1, count
-1, level
+2, name
);
2748 op
->fxn(dwords
+1, count
-1, level
+1);
2750 dump_hex(dwords
, count
, level
+1);
2751 } else if (pkt_is_type2(dwords
[0])) {
2753 printl(3, "%snop\n", levels
[level
+1]);
2755 /* for 5xx+ we can do a passable job of looking for start of next valid packet: */
2756 if (options
->gpu_id
>= 500) {
2757 while (dwords_left
> 0) {
2758 if (pkt_is_type7(dwords
[0]) || pkt_is_type4(dwords
[0]))
2760 printf("bad type! %08x\n", dwords
[0]);
2765 printf("bad type! %08x\n", dwords
[0]);
2771 dwords_left
-= count
;
2775 if (dwords_left
< 0)
2776 printf("**** this ain't right!! dwords_left=%d\n", dwords_left
);