2 * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <sys/types.h>
50 /* ************************************************************************* */
51 /* originally based on kernel recovery dump code: */
53 static const struct cffdec_options
*options
;
55 static bool needs_wfi
= false;
56 static bool summary
= false;
57 static bool in_summary
= false;
60 static inline unsigned regcnt(void)
62 if (options
->gpu_id
>= 500)
68 static int is_64b(void)
70 return options
->gpu_id
>= 500;
77 uint32_t size
; /* in dwords */
78 /* Generally cmdstream consists of multiple IB calls to different
79 * buffers, which are themselves often re-used for each tile. The
80 * triggered flag serves two purposes to help make it more clear
81 * what part of the cmdstream is before vs after the the GPU hang:
83 * 1) if in IB2 we are passed the point within the IB2 buffer where
84 * the GPU hung, but IB1 is not passed the point within its
85 * buffer where the GPU had hung, then we know the GPU hang
86 * happens on a future use of that IB2 buffer.
88 * 2) if in an IB1 or IB2 buffer that is not the one where the GPU
89 * hung, but we've already passed the trigger point at the same
90 * IB level, we know that we are passed the point where the GPU
93 * So this is a one way switch, false->true. And a higher #'d
94 * IB level isn't considered triggered unless the lower #'d IB
101 static int draw_count
;
102 static int current_draw_count
;
104 /* query mode.. to handle symbolic register name queries, we need to
105 * defer parsing query string until after gpu_id is know and rnn db
108 static int *queryvals
;
113 if ((options
->draw_filter
!= -1) && (options
->draw_filter
!= current_draw_count
))
115 if ((lvl
>= 3) && (summary
|| options
->querystrs
|| options
->script
))
117 if ((lvl
>= 2) && (options
->querystrs
|| options
->script
))
123 printl(int lvl
, const char *fmt
, ...)
133 static const char *levels
[] = {
142 "\t\t\t\t\t\t\t\t\t",
157 /* SDS (CP_SET_DRAW_STATE) helpers: */
158 static void load_all_groups(int level
);
159 static void disable_all_groups(void);
161 static void dump_tex_samp(uint32_t *texsamp
, enum state_src_t src
, int num_unit
, int level
);
162 static void dump_tex_const(uint32_t *texsamp
, int num_unit
, int level
);
165 highlight_gpuaddr(uint64_t gpuaddr
)
170 if (!options
->ibs
[ib
].base
)
173 if ((ib
> 0) && options
->ibs
[ib
-1].base
&& !ibs
[ib
-1].triggered
)
176 if (ibs
[ib
].triggered
)
179 if (options
->ibs
[ib
].base
!= ibs
[ib
].base
)
182 uint64_t start
= ibs
[ib
].base
+ 4 * (ibs
[ib
].size
- options
->ibs
[ib
].rem
);
183 uint64_t end
= ibs
[ib
].base
+ 4 * ibs
[ib
].size
;
185 bool triggered
= (start
<= gpuaddr
) && (gpuaddr
<= end
);
187 ibs
[ib
].triggered
|= triggered
;
190 printf("ESTIMATED CRASH LOCATION!\n");
196 dump_hex(uint32_t *dwords
, uint32_t sizedwords
, int level
)
204 for (i
= 0; i
< sizedwords
; i
+= 8) {
207 /* always show first row: */
211 for (j
= 0; (j
< 8) && (i
+j
< sizedwords
) && zero
; j
++)
215 if (zero
&& !lastzero
)
223 uint64_t addr
= gpuaddr(&dwords
[i
]);
224 bool highlight
= highlight_gpuaddr(addr
);
227 printf("\x1b[0;1;31m");
230 printf("%016lx:%s", addr
, levels
[level
]);
232 printf("%08x:%s", (uint32_t)addr
, levels
[level
]);
238 printf("%04x:", i
* 4);
240 for (j
= 0; (j
< 8) && (i
+j
< sizedwords
); j
++) {
241 printf(" %08x", dwords
[i
+j
]);
249 dump_float(float *dwords
, uint32_t sizedwords
, int level
)
252 for (i
= 0; i
< sizedwords
; i
++) {
255 printf("%016lx:%s", gpuaddr(dwords
), levels
[level
]);
257 printf("%08x:%s", (uint32_t)gpuaddr(dwords
), levels
[level
]);
262 printf("%8f", *(dwords
++));
270 /* I believe the surface format is low bits:
271 #define RB_COLOR_INFO__COLOR_FORMAT_MASK 0x0000000fL
272 comments in sys2gmem_tex_const indicate that address is [31:12], but
273 looks like at least some of the bits above the format have different meaning..
275 static void parse_dword_addr(uint32_t dword
, uint32_t *gpuaddr
,
276 uint32_t *flags
, uint32_t mask
)
278 assert(!is_64b()); /* this is only used on a2xx */
279 *gpuaddr
= dword
& ~mask
;
280 *flags
= dword
& mask
;
283 static uint32_t type0_reg_vals
[0xffff + 1];
284 static uint8_t type0_reg_rewritten
[sizeof(type0_reg_vals
)/8]; /* written since last draw */
285 static uint8_t type0_reg_written
[sizeof(type0_reg_vals
)/8];
286 static uint32_t lastvals
[ARRAY_SIZE(type0_reg_vals
)];
288 static bool reg_rewritten(uint32_t regbase
)
290 return !!(type0_reg_rewritten
[regbase
/8] & (1 << (regbase
% 8)));
293 bool reg_written(uint32_t regbase
)
295 return !!(type0_reg_written
[regbase
/8] & (1 << (regbase
% 8)));
298 static void clear_rewritten(void)
300 memset(type0_reg_rewritten
, 0, sizeof(type0_reg_rewritten
));
303 static void clear_written(void)
305 memset(type0_reg_written
, 0, sizeof(type0_reg_written
));
309 uint32_t reg_lastval(uint32_t regbase
)
311 return lastvals
[regbase
];
317 memset(lastvals
, 0, sizeof(lastvals
));
321 reg_val(uint32_t regbase
)
323 return type0_reg_vals
[regbase
];
327 reg_set(uint32_t regbase
, uint32_t val
)
329 assert(regbase
< regcnt());
330 type0_reg_vals
[regbase
] = val
;
331 type0_reg_written
[regbase
/8] |= (1 << (regbase
% 8));
332 type0_reg_rewritten
[regbase
/8] |= (1 << (regbase
% 8));
336 reg_dump_scratch(const char *name
, uint32_t dword
, int level
)
343 r
= regbase("CP_SCRATCH[0].REG");
345 // if not, try old a2xx/a3xx version:
347 r
= regbase("CP_SCRATCH_REG0");
352 printf("%s:%u,%u,%u,%u\n", levels
[level
],
353 reg_val(r
+ 4), reg_val(r
+ 5),
354 reg_val(r
+ 6), reg_val(r
+ 7));
358 dump_gpuaddr_size(uint64_t gpuaddr
, int level
, int sizedwords
, int quietlvl
)
365 buf
= hostptr(gpuaddr
);
367 dump_hex(buf
, sizedwords
, level
+1);
372 dump_gpuaddr(uint64_t gpuaddr
, int level
)
374 dump_gpuaddr_size(gpuaddr
, level
, 64, 3);
378 reg_dump_gpuaddr(const char *name
, uint32_t dword
, int level
)
380 dump_gpuaddr(dword
, level
);
385 reg_gpuaddr_lo(const char *name
, uint32_t dword
, int level
)
391 reg_dump_gpuaddr_hi(const char *name
, uint32_t dword
, int level
)
393 dump_gpuaddr(gpuaddr_lo
| (((uint64_t)dword
) << 32), level
);
398 dump_shader(const char *ext
, void *buf
, int bufsz
)
400 if (options
->dump_shaders
) {
404 sprintf(filename
, "%04d.%s", n
++, ext
);
405 fd
= open(filename
, O_WRONLY
| O_TRUNC
| O_CREAT
, 0644);
406 write(fd
, buf
, bufsz
);
412 disasm_gpuaddr(const char *name
, uint64_t gpuaddr
, int level
)
416 gpuaddr
&= 0xfffffffffffffff0;
421 buf
= hostptr(gpuaddr
);
423 uint32_t sizedwords
= hostlen(gpuaddr
) / 4;
426 dump_hex(buf
, min(64, sizedwords
), level
+1);
427 disasm_a3xx(buf
, sizedwords
, level
+2, stdout
, options
->gpu_id
);
429 /* this is a bit ugly way, but oh well.. */
430 if (strstr(name
, "SP_VS_OBJ")) {
432 } else if (strstr(name
, "SP_FS_OBJ")) {
434 } else if (strstr(name
, "SP_GS_OBJ")) {
436 } else if (strstr(name
, "SP_CS_OBJ")) {
443 dump_shader(ext
, buf
, sizedwords
* 4);
448 reg_disasm_gpuaddr(const char *name
, uint32_t dword
, int level
)
450 disasm_gpuaddr(name
, dword
, level
);
454 reg_disasm_gpuaddr_hi(const char *name
, uint32_t dword
, int level
)
456 disasm_gpuaddr(name
, gpuaddr_lo
| (((uint64_t)dword
) << 32), level
);
459 /* Find the value of the TEX_COUNT register that corresponds to the named
460 * TEX_SAMP/TEX_CONST reg.
462 * Note, this kinda assumes an equal # of samplers and textures, but not
463 * really sure if there is a much better option. I suppose on a6xx we
464 * could instead decode the bitfields in SP_xS_CONFIG
467 get_tex_count(const char *name
)
469 char count_reg
[strlen(name
) + 5];
472 p
= strstr(name
, "CONST");
474 p
= strstr(name
, "SAMP");
479 strncpy(count_reg
, name
, n
);
480 strcpy(count_reg
+ n
, "COUNT");
482 return reg_val(regbase(count_reg
));
486 reg_dump_tex_samp_hi(const char *name
, uint32_t dword
, int level
)
491 int num_unit
= get_tex_count(name
);
492 uint64_t gpuaddr
= gpuaddr_lo
| (((uint64_t)dword
) << 32);
493 void *buf
= hostptr(gpuaddr
);
498 dump_tex_samp(buf
, STATE_SRC_DIRECT
, num_unit
, level
+1);
502 reg_dump_tex_const_hi(const char *name
, uint32_t dword
, int level
)
507 int num_unit
= get_tex_count(name
);
508 uint64_t gpuaddr
= gpuaddr_lo
| (((uint64_t)dword
) << 32);
509 void *buf
= hostptr(gpuaddr
);
514 dump_tex_const(buf
, num_unit
, level
+1);
518 * Registers with special handling (rnndec_decode() handles rest):
520 #define REG(x, fxn) { #x, fxn }
523 void (*fxn
)(const char *name
, uint32_t dword
, int level
);
526 REG(CP_SCRATCH_REG0
, reg_dump_scratch
),
527 REG(CP_SCRATCH_REG1
, reg_dump_scratch
),
528 REG(CP_SCRATCH_REG2
, reg_dump_scratch
),
529 REG(CP_SCRATCH_REG3
, reg_dump_scratch
),
530 REG(CP_SCRATCH_REG4
, reg_dump_scratch
),
531 REG(CP_SCRATCH_REG5
, reg_dump_scratch
),
532 REG(CP_SCRATCH_REG6
, reg_dump_scratch
),
533 REG(CP_SCRATCH_REG7
, reg_dump_scratch
),
536 REG(CP_SCRATCH_REG0
, reg_dump_scratch
),
537 REG(CP_SCRATCH_REG1
, reg_dump_scratch
),
538 REG(CP_SCRATCH_REG2
, reg_dump_scratch
),
539 REG(CP_SCRATCH_REG3
, reg_dump_scratch
),
540 REG(CP_SCRATCH_REG4
, reg_dump_scratch
),
541 REG(CP_SCRATCH_REG5
, reg_dump_scratch
),
542 REG(CP_SCRATCH_REG6
, reg_dump_scratch
),
543 REG(CP_SCRATCH_REG7
, reg_dump_scratch
),
544 REG(VSC_SIZE_ADDRESS
, reg_dump_gpuaddr
),
545 REG(SP_VS_PVT_MEM_ADDR_REG
, reg_dump_gpuaddr
),
546 REG(SP_FS_PVT_MEM_ADDR_REG
, reg_dump_gpuaddr
),
547 REG(SP_VS_OBJ_START_REG
, reg_disasm_gpuaddr
),
548 REG(SP_FS_OBJ_START_REG
, reg_disasm_gpuaddr
),
549 REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR
, reg_dump_gpuaddr
),
552 REG(CP_SCRATCH
[0].REG
, reg_dump_scratch
),
553 REG(CP_SCRATCH
[0x1].REG
, reg_dump_scratch
),
554 REG(CP_SCRATCH
[0x2].REG
, reg_dump_scratch
),
555 REG(CP_SCRATCH
[0x3].REG
, reg_dump_scratch
),
556 REG(CP_SCRATCH
[0x4].REG
, reg_dump_scratch
),
557 REG(CP_SCRATCH
[0x5].REG
, reg_dump_scratch
),
558 REG(CP_SCRATCH
[0x6].REG
, reg_dump_scratch
),
559 REG(CP_SCRATCH
[0x7].REG
, reg_dump_scratch
),
560 REG(SP_VS_PVT_MEM_ADDR
, reg_dump_gpuaddr
),
561 REG(SP_FS_PVT_MEM_ADDR
, reg_dump_gpuaddr
),
562 REG(SP_GS_PVT_MEM_ADDR
, reg_dump_gpuaddr
),
563 REG(SP_HS_PVT_MEM_ADDR
, reg_dump_gpuaddr
),
564 REG(SP_DS_PVT_MEM_ADDR
, reg_dump_gpuaddr
),
565 REG(SP_CS_PVT_MEM_ADDR
, reg_dump_gpuaddr
),
566 REG(SP_VS_OBJ_START
, reg_disasm_gpuaddr
),
567 REG(SP_FS_OBJ_START
, reg_disasm_gpuaddr
),
568 REG(SP_GS_OBJ_START
, reg_disasm_gpuaddr
),
569 REG(SP_HS_OBJ_START
, reg_disasm_gpuaddr
),
570 REG(SP_DS_OBJ_START
, reg_disasm_gpuaddr
),
571 REG(SP_CS_OBJ_START
, reg_disasm_gpuaddr
),
572 REG(TPL1_TP_VS_BORDER_COLOR_BASE_ADDR
, reg_dump_gpuaddr
),
573 REG(TPL1_TP_HS_BORDER_COLOR_BASE_ADDR
, reg_dump_gpuaddr
),
574 REG(TPL1_TP_DS_BORDER_COLOR_BASE_ADDR
, reg_dump_gpuaddr
),
575 REG(TPL1_TP_GS_BORDER_COLOR_BASE_ADDR
, reg_dump_gpuaddr
),
576 REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR
, reg_dump_gpuaddr
),
579 REG(CP_SCRATCH
[0x4].REG
, reg_dump_scratch
),
580 REG(CP_SCRATCH
[0x5].REG
, reg_dump_scratch
),
581 REG(CP_SCRATCH
[0x6].REG
, reg_dump_scratch
),
582 REG(CP_SCRATCH
[0x7].REG
, reg_dump_scratch
),
583 REG(SP_VS_OBJ_START_LO
, reg_gpuaddr_lo
),
584 REG(SP_VS_OBJ_START_HI
, reg_disasm_gpuaddr_hi
),
585 REG(SP_HS_OBJ_START_LO
, reg_gpuaddr_lo
),
586 REG(SP_HS_OBJ_START_HI
, reg_disasm_gpuaddr_hi
),
587 REG(SP_DS_OBJ_START_LO
, reg_gpuaddr_lo
),
588 REG(SP_DS_OBJ_START_HI
, reg_disasm_gpuaddr_hi
),
589 REG(SP_GS_OBJ_START_LO
, reg_gpuaddr_lo
),
590 REG(SP_GS_OBJ_START_HI
, reg_disasm_gpuaddr_hi
),
591 REG(SP_FS_OBJ_START_LO
, reg_gpuaddr_lo
),
592 REG(SP_FS_OBJ_START_HI
, reg_disasm_gpuaddr_hi
),
593 REG(SP_CS_OBJ_START_LO
, reg_gpuaddr_lo
),
594 REG(SP_CS_OBJ_START_HI
, reg_disasm_gpuaddr_hi
),
595 REG(TPL1_VS_TEX_CONST_LO
, reg_gpuaddr_lo
),
596 REG(TPL1_VS_TEX_CONST_HI
, reg_dump_tex_const_hi
),
597 REG(TPL1_VS_TEX_SAMP_LO
, reg_gpuaddr_lo
),
598 REG(TPL1_VS_TEX_SAMP_HI
, reg_dump_tex_samp_hi
),
599 REG(TPL1_HS_TEX_CONST_LO
, reg_gpuaddr_lo
),
600 REG(TPL1_HS_TEX_CONST_HI
, reg_dump_tex_const_hi
),
601 REG(TPL1_HS_TEX_SAMP_LO
, reg_gpuaddr_lo
),
602 REG(TPL1_HS_TEX_SAMP_HI
, reg_dump_tex_samp_hi
),
603 REG(TPL1_DS_TEX_CONST_LO
, reg_gpuaddr_lo
),
604 REG(TPL1_DS_TEX_CONST_HI
, reg_dump_tex_const_hi
),
605 REG(TPL1_DS_TEX_SAMP_LO
, reg_gpuaddr_lo
),
606 REG(TPL1_DS_TEX_SAMP_HI
, reg_dump_tex_samp_hi
),
607 REG(TPL1_GS_TEX_CONST_LO
, reg_gpuaddr_lo
),
608 REG(TPL1_GS_TEX_CONST_HI
, reg_dump_tex_const_hi
),
609 REG(TPL1_GS_TEX_SAMP_LO
, reg_gpuaddr_lo
),
610 REG(TPL1_GS_TEX_SAMP_HI
, reg_dump_tex_samp_hi
),
611 REG(TPL1_FS_TEX_CONST_LO
, reg_gpuaddr_lo
),
612 REG(TPL1_FS_TEX_CONST_HI
, reg_dump_tex_const_hi
),
613 REG(TPL1_FS_TEX_SAMP_LO
, reg_gpuaddr_lo
),
614 REG(TPL1_FS_TEX_SAMP_HI
, reg_dump_tex_samp_hi
),
615 REG(TPL1_CS_TEX_CONST_LO
, reg_gpuaddr_lo
),
616 REG(TPL1_CS_TEX_CONST_HI
, reg_dump_tex_const_hi
),
617 REG(TPL1_CS_TEX_SAMP_LO
, reg_gpuaddr_lo
),
618 REG(TPL1_CS_TEX_SAMP_HI
, reg_dump_tex_samp_hi
),
619 REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_LO
, reg_gpuaddr_lo
),
620 REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_HI
, reg_dump_gpuaddr_hi
),
621 // REG(RB_MRT_FLAG_BUFFER[0].ADDR_LO, reg_gpuaddr_lo),
622 // REG(RB_MRT_FLAG_BUFFER[0].ADDR_HI, reg_dump_gpuaddr_hi),
623 // REG(RB_MRT_FLAG_BUFFER[1].ADDR_LO, reg_gpuaddr_lo),
624 // REG(RB_MRT_FLAG_BUFFER[1].ADDR_HI, reg_dump_gpuaddr_hi),
625 // REG(RB_MRT_FLAG_BUFFER[2].ADDR_LO, reg_gpuaddr_lo),
626 // REG(RB_MRT_FLAG_BUFFER[2].ADDR_HI, reg_dump_gpuaddr_hi),
627 // REG(RB_MRT_FLAG_BUFFER[3].ADDR_LO, reg_gpuaddr_lo),
628 // REG(RB_MRT_FLAG_BUFFER[3].ADDR_HI, reg_dump_gpuaddr_hi),
629 // REG(RB_MRT_FLAG_BUFFER[4].ADDR_LO, reg_gpuaddr_lo),
630 // REG(RB_MRT_FLAG_BUFFER[4].ADDR_HI, reg_dump_gpuaddr_hi),
631 // REG(RB_MRT_FLAG_BUFFER[5].ADDR_LO, reg_gpuaddr_lo),
632 // REG(RB_MRT_FLAG_BUFFER[5].ADDR_HI, reg_dump_gpuaddr_hi),
633 // REG(RB_MRT_FLAG_BUFFER[6].ADDR_LO, reg_gpuaddr_lo),
634 // REG(RB_MRT_FLAG_BUFFER[6].ADDR_HI, reg_dump_gpuaddr_hi),
635 // REG(RB_MRT_FLAG_BUFFER[7].ADDR_LO, reg_gpuaddr_lo),
636 // REG(RB_MRT_FLAG_BUFFER[7].ADDR_HI, reg_dump_gpuaddr_hi),
637 // REG(RB_BLIT_FLAG_DST_LO, reg_gpuaddr_lo),
638 // REG(RB_BLIT_FLAG_DST_HI, reg_dump_gpuaddr_hi),
639 // REG(RB_MRT[0].BASE_LO, reg_gpuaddr_lo),
640 // REG(RB_MRT[0].BASE_HI, reg_dump_gpuaddr_hi),
641 // REG(RB_DEPTH_BUFFER_BASE_LO, reg_gpuaddr_lo),
642 // REG(RB_DEPTH_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
643 // REG(RB_DEPTH_FLAG_BUFFER_BASE_LO, reg_gpuaddr_lo),
644 // REG(RB_DEPTH_FLAG_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
645 // REG(RB_BLIT_DST_LO, reg_gpuaddr_lo),
646 // REG(RB_BLIT_DST_HI, reg_dump_gpuaddr_hi),
648 // REG(RB_2D_SRC_LO, reg_gpuaddr_lo),
649 // REG(RB_2D_SRC_HI, reg_dump_gpuaddr_hi),
650 // REG(RB_2D_SRC_FLAGS_LO, reg_gpuaddr_lo),
651 // REG(RB_2D_SRC_FLAGS_HI, reg_dump_gpuaddr_hi),
652 // REG(RB_2D_DST_LO, reg_gpuaddr_lo),
653 // REG(RB_2D_DST_HI, reg_dump_gpuaddr_hi),
654 // REG(RB_2D_DST_FLAGS_LO, reg_gpuaddr_lo),
655 // REG(RB_2D_DST_FLAGS_HI, reg_dump_gpuaddr_hi),
659 REG(CP_SCRATCH
[0x4].REG
, reg_dump_scratch
),
660 REG(CP_SCRATCH
[0x5].REG
, reg_dump_scratch
),
661 REG(CP_SCRATCH
[0x6].REG
, reg_dump_scratch
),
662 REG(CP_SCRATCH
[0x7].REG
, reg_dump_scratch
),
664 REG(SP_VS_OBJ_START_LO
, reg_gpuaddr_lo
),
665 REG(SP_VS_OBJ_START_HI
, reg_disasm_gpuaddr_hi
),
666 REG(SP_HS_OBJ_START_LO
, reg_gpuaddr_lo
),
667 REG(SP_HS_OBJ_START_HI
, reg_disasm_gpuaddr_hi
),
668 REG(SP_DS_OBJ_START_LO
, reg_gpuaddr_lo
),
669 REG(SP_DS_OBJ_START_HI
, reg_disasm_gpuaddr_hi
),
670 REG(SP_GS_OBJ_START_LO
, reg_gpuaddr_lo
),
671 REG(SP_GS_OBJ_START_HI
, reg_disasm_gpuaddr_hi
),
672 REG(SP_FS_OBJ_START_LO
, reg_gpuaddr_lo
),
673 REG(SP_FS_OBJ_START_HI
, reg_disasm_gpuaddr_hi
),
674 REG(SP_CS_OBJ_START_LO
, reg_gpuaddr_lo
),
675 REG(SP_CS_OBJ_START_HI
, reg_disasm_gpuaddr_hi
),
677 REG(SP_VS_TEX_CONST_LO
, reg_gpuaddr_lo
),
678 REG(SP_VS_TEX_CONST_HI
, reg_dump_tex_const_hi
),
679 REG(SP_VS_TEX_SAMP_LO
, reg_gpuaddr_lo
),
680 REG(SP_VS_TEX_SAMP_HI
, reg_dump_tex_samp_hi
),
681 REG(SP_HS_TEX_CONST_LO
, reg_gpuaddr_lo
),
682 REG(SP_HS_TEX_CONST_HI
, reg_dump_tex_const_hi
),
683 REG(SP_HS_TEX_SAMP_LO
, reg_gpuaddr_lo
),
684 REG(SP_HS_TEX_SAMP_HI
, reg_dump_tex_samp_hi
),
685 REG(SP_DS_TEX_CONST_LO
, reg_gpuaddr_lo
),
686 REG(SP_DS_TEX_CONST_HI
, reg_dump_tex_const_hi
),
687 REG(SP_DS_TEX_SAMP_LO
, reg_gpuaddr_lo
),
688 REG(SP_DS_TEX_SAMP_HI
, reg_dump_tex_samp_hi
),
689 REG(SP_GS_TEX_CONST_LO
, reg_gpuaddr_lo
),
690 REG(SP_GS_TEX_CONST_HI
, reg_dump_tex_const_hi
),
691 REG(SP_GS_TEX_SAMP_LO
, reg_gpuaddr_lo
),
692 REG(SP_GS_TEX_SAMP_HI
, reg_dump_tex_samp_hi
),
693 REG(SP_FS_TEX_CONST_LO
, reg_gpuaddr_lo
),
694 REG(SP_FS_TEX_CONST_HI
, reg_dump_tex_const_hi
),
695 REG(SP_FS_TEX_SAMP_LO
, reg_gpuaddr_lo
),
696 REG(SP_FS_TEX_SAMP_HI
, reg_dump_tex_samp_hi
),
697 REG(SP_CS_TEX_CONST_LO
, reg_gpuaddr_lo
),
698 REG(SP_CS_TEX_CONST_HI
, reg_dump_tex_const_hi
),
699 REG(SP_CS_TEX_SAMP_LO
, reg_gpuaddr_lo
),
700 REG(SP_CS_TEX_SAMP_HI
, reg_dump_tex_samp_hi
),
705 static struct rnn
*rnn
;
708 init_rnn(const char *gpuname
)
710 rnn
= rnn_new(!options
->color
);
712 rnn_load(rnn
, gpuname
);
714 if (options
->querystrs
) {
716 queryvals
= calloc(options
->nquery
, sizeof(queryvals
[0]));
718 for (i
= 0; i
< options
->nquery
; i
++) {
719 int val
= strtol(options
->querystrs
[i
], NULL
, 0);
722 val
= regbase(options
->querystrs
[i
]);
725 printf("querystr: %s -> 0x%x\n", options
->querystrs
[i
], queryvals
[i
]);
729 for (unsigned idx
= 0; type0_reg
[idx
].regname
; idx
++) {
730 type0_reg
[idx
].regbase
= regbase(type0_reg
[idx
].regname
);
731 if (!type0_reg
[idx
].regbase
) {
732 printf("invalid register name: %s\n", type0_reg
[idx
].regname
);
743 memset(&ibs
, 0, sizeof(ibs
));
747 cffdec_init(const struct cffdec_options
*_options
)
750 summary
= options
->summary
;
752 /* in case we're decoding multiple files: */
757 /* TODO we need an API to free/cleanup any previous rnn */
759 switch (options
->gpu_id
) {
761 type0_reg
= reg_a2xx
;
765 type0_reg
= reg_a3xx
;
769 type0_reg
= reg_a4xx
;
773 type0_reg
= reg_a5xx
;
777 type0_reg
= reg_a6xx
;
781 errx(-1, "unsupported gpu");
786 pktname(unsigned opc
)
788 return rnn_enumname(rnn
, "adreno_pm4_type3_packets", opc
);
792 regname(uint32_t regbase
, int color
)
794 return rnn_regname(rnn
, regbase
, color
);
798 regbase(const char *name
)
800 return rnn_regbase(rnn
, name
);
804 endswith(uint32_t regbase
, const char *suffix
)
806 const char *name
= regname(regbase
, 0);
807 const char *s
= strstr(name
, suffix
);
810 return (s
- strlen(name
) + strlen(suffix
)) == name
;
814 dump_register_val(uint32_t regbase
, uint32_t dword
, int level
)
816 struct rnndecaddrinfo
*info
= rnn_reginfo(rnn
, regbase
);
818 if (info
&& info
->typeinfo
) {
819 uint64_t gpuaddr
= 0;
820 char *decoded
= rnndec_decodeval(rnn
->vc
, info
->typeinfo
, dword
);
821 printf("%s%s: %s", levels
[level
], info
->name
, decoded
);
823 /* Try and figure out if we are looking at a gpuaddr.. this
824 * might be useful for other gen's too, but at least a5xx has
825 * the _HI/_LO suffix we can look for. Maybe a better approach
826 * would be some special annotation in the xml..
828 if (options
->gpu_id
>= 500) {
829 if (endswith(regbase
, "_HI") && endswith(regbase
-1, "_LO")) {
830 gpuaddr
= (((uint64_t)dword
) << 32) | reg_val(regbase
-1);
831 } else if (endswith(regbase
, "_LO") && endswith(regbase
+1, "_HI")) {
832 gpuaddr
= (((uint64_t)reg_val(regbase
+1)) << 32) | dword
;
836 if (gpuaddr
&& hostptr(gpuaddr
)) {
837 printf("\t\tbase=%lx, offset=%lu, size=%u",
838 gpubaseaddr(gpuaddr
),
839 gpuaddr
- gpubaseaddr(gpuaddr
),
840 hostlen(gpubaseaddr(gpuaddr
)));
847 printf("%s%s: %08x\n", levels
[level
], info
->name
, dword
);
849 printf("%s<%04x>: %08x\n", levels
[level
], regbase
, dword
);
859 dump_register(uint32_t regbase
, uint32_t dword
, int level
)
862 dump_register_val(regbase
, dword
, level
);
865 for (unsigned idx
= 0; type0_reg
[idx
].regname
; idx
++) {
866 if (type0_reg
[idx
].regbase
== regbase
) {
867 type0_reg
[idx
].fxn(type0_reg
[idx
].regname
, dword
, level
);
874 is_banked_reg(uint32_t regbase
)
876 return (0x2000 <= regbase
) && (regbase
< 0x2400);
880 dump_registers(uint32_t regbase
, uint32_t *dwords
, uint32_t sizedwords
, int level
)
882 while (sizedwords
--) {
883 int last_summary
= summary
;
885 /* access to non-banked registers needs a WFI:
886 * TODO banked register range for a2xx??
888 if (needs_wfi
&& !is_banked_reg(regbase
))
889 printl(2, "NEEDS WFI: %s (%x)\n", regname(regbase
, 1), regbase
);
891 reg_set(regbase
, *dwords
);
892 dump_register(regbase
, *dwords
, level
);
895 summary
= last_summary
;
900 dump_domain(uint32_t *dwords
, uint32_t sizedwords
, int level
,
903 struct rnndomain
*dom
;
906 dom
= rnn_finddomain(rnn
->db
, name
);
912 script_packet(dwords
, sizedwords
, rnn
, dom
);
917 for (i
= 0; i
< sizedwords
; i
++) {
918 struct rnndecaddrinfo
*info
= rnndec_decodeaddr(rnn
->vc
, dom
, i
, 0);
920 if (!(info
&& info
->typeinfo
))
922 uint64_t value
= dwords
[i
];
923 if (info
->typeinfo
->high
>= 32 && i
< sizedwords
- 1) {
924 value
|= (uint64_t) dwords
[i
+ 1] << 32;
925 i
++; /* skip the next dword since we're printing it now */
927 decoded
= rnndec_decodeval(rnn
->vc
, info
->typeinfo
, value
);
928 /* Unlike the register printing path, we don't print the name
929 * of the register, so if it doesn't contain other named
930 * things (i.e. it isn't a bitset) then print the register
931 * name as if it's a bitset with a single entry. This avoids
932 * having to create a dummy register with a single entry to
933 * get a name in the decoding.
935 if (info
->typeinfo
->type
== RNN_TTYPE_BITSET
||
936 info
->typeinfo
->type
== RNN_TTYPE_INLINE_BITSET
) {
937 printf("%s%s\n", levels
[level
], decoded
);
939 printf("%s{ %s%s%s = %s }\n", levels
[level
],
940 rnn
->vc
->colors
->rname
, info
->name
,
941 rnn
->vc
->colors
->reset
, decoded
);
950 static uint32_t bin_x1
, bin_x2
, bin_y1
, bin_y2
;
951 static unsigned mode
;
952 static const char *render_mode
;
957 MODE_ALL
= MODE_BINNING
| MODE_GMEM
| MODE_BYPASS
,
958 } enable_mask
= MODE_ALL
;
959 static bool skip_ib2_enable_global
;
960 static bool skip_ib2_enable_local
;
963 print_mode(int level
)
965 if ((options
->gpu_id
>= 500) && !quiet(2)) {
966 printf("%smode: %s\n", levels
[level
], render_mode
);
967 printf("%sskip_ib2: g=%d, l=%d\n", levels
[level
], skip_ib2_enable_global
, skip_ib2_enable_local
);
974 switch (options
->query_mode
) {
979 for (int i
= 0; i
< options
->nquery
; i
++) {
980 uint32_t regbase
= queryvals
[i
];
981 if (!reg_written(regbase
)) {
984 if (reg_rewritten(regbase
)) {
990 for (int i
= 0; i
< options
->nquery
; i
++) {
991 uint32_t regbase
= queryvals
[i
];
992 if (!reg_written(regbase
)) {
995 uint32_t lastval
= reg_val(regbase
);
996 if (lastval
!= lastvals
[regbase
]) {
1006 __do_query(const char *primtype
, uint32_t num_indices
)
1010 if ((500 <= options
->gpu_id
) && (options
->gpu_id
< 700)) {
1011 uint32_t scissor_tl
= reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_TL"));
1012 uint32_t scissor_br
= reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_BR"));
1014 bin_x1
= scissor_tl
& 0xffff;
1015 bin_y1
= scissor_tl
>> 16;
1016 bin_x2
= scissor_br
& 0xffff;
1017 bin_y2
= scissor_br
>> 16;
1020 for (int i
= 0; i
< options
->nquery
; i
++) {
1021 uint32_t regbase
= queryvals
[i
];
1022 if (reg_written(regbase
)) {
1023 uint32_t lastval
= reg_val(regbase
);
1024 printf("%4d: %s(%u,%u-%u,%u):%u:", draw_count
, primtype
,
1025 bin_x1
, bin_y1
, bin_x2
, bin_y2
, num_indices
);
1026 if (options
->gpu_id
>= 500)
1027 printf("%s:", render_mode
);
1028 printf("\t%08x", lastval
);
1029 if (lastval
!= lastvals
[regbase
]) {
1034 if (reg_rewritten(regbase
)) {
1039 dump_register_val(regbase
, lastval
, 0);
1049 do_query_compare(const char *primtype
, uint32_t num_indices
)
1051 unsigned saved_enable_mask
= enable_mask
;
1052 const char *saved_render_mode
= render_mode
;
1054 /* in 'query-compare' mode, we want to see if the register is writtten
1055 * or changed in any mode:
1057 * (NOTE: this could cause false-positive for 'query-delta' if the reg
1058 * is written with different values in binning vs sysmem/gmem mode, as
1059 * we don't track previous values per-mode, but I think we can live with
1062 enable_mask
= MODE_ALL
;
1067 if (!skip_query()) {
1068 /* dump binning pass values: */
1069 enable_mask
= MODE_BINNING
;
1070 render_mode
= "BINNING";
1073 __do_query(primtype
, num_indices
);
1075 /* dump draw pass values: */
1076 enable_mask
= MODE_GMEM
| MODE_BYPASS
;
1077 render_mode
= "DRAW";
1080 __do_query(primtype
, num_indices
);
1085 enable_mask
= saved_enable_mask
;
1086 render_mode
= saved_render_mode
;
1088 disable_all_groups();
1091 /* well, actually query and script..
1092 * NOTE: call this before dump_register_summary()
1095 do_query(const char *primtype
, uint32_t num_indices
)
1098 script_draw(primtype
, num_indices
);
1100 if (options
->query_compare
) {
1101 do_query_compare(primtype
, num_indices
);
1108 __do_query(primtype
, num_indices
);
1112 cp_im_loadi(uint32_t *dwords
, uint32_t sizedwords
, int level
)
1114 uint32_t start
= dwords
[1] >> 16;
1115 uint32_t size
= dwords
[1] & 0xffff;
1116 const char *type
= NULL
, *ext
= NULL
;
1117 enum shader_t disasm_type
;
1119 switch (dwords
[0]) {
1123 disasm_type
= SHADER_VERTEX
;
1128 disasm_type
= SHADER_FRAGMENT
;
1136 printf("%s%s shader, start=%04x, size=%04x\n", levels
[level
], type
, start
, size
);
1137 disasm_a2xx(dwords
+ 2, sizedwords
- 2, level
+2, disasm_type
);
1139 /* dump raw shader: */
1141 dump_shader(ext
, dwords
+ 2, (sizedwords
- 2) * 4);
1145 cp_wide_reg_write(uint32_t *dwords
, uint32_t sizedwords
, int level
)
1147 uint32_t reg
= dwords
[0] & 0xffff;
1149 for (i
= 1; i
< sizedwords
; i
++) {
1150 dump_register(reg
, dwords
[i
], level
+1);
1151 reg_set(reg
, dwords
[i
]);
1159 TEX_MIPADDR
, /* a3xx only */
1163 // image/ssbo state:
1170 // unknown things, just to hexdumps:
1176 enum adreno_state_block
{
1178 SB_VERT_MIPADDR
= 1,
1180 SB_FRAG_MIPADDR
= 3,
1184 SB_COMPUTE_SHADER
= 7,
1187 /* TODO there is probably a clever way to let rnndec parse things so
1188 * we don't have to care about packet format differences across gens
1192 a3xx_get_state_type(uint32_t *dwords
, enum shader_t
*stage
, enum state_t
*state
,
1193 enum state_src_t
*src
)
1195 unsigned state_block_id
= (dwords
[0] >> 19) & 0x7;
1196 unsigned state_type
= dwords
[1] & 0x3;
1197 static const struct {
1198 enum shader_t stage
;
1200 } lookup
[0xf][0x3] = {
1201 [SB_VERT_TEX
][0] = { SHADER_VERTEX
, TEX_SAMP
},
1202 [SB_VERT_TEX
][1] = { SHADER_VERTEX
, TEX_CONST
},
1203 [SB_FRAG_TEX
][0] = { SHADER_FRAGMENT
, TEX_SAMP
},
1204 [SB_FRAG_TEX
][1] = { SHADER_FRAGMENT
, TEX_CONST
},
1205 [SB_VERT_SHADER
][0] = { SHADER_VERTEX
, SHADER_PROG
},
1206 [SB_VERT_SHADER
][1] = { SHADER_VERTEX
, SHADER_CONST
},
1207 [SB_FRAG_SHADER
][0] = { SHADER_FRAGMENT
, SHADER_PROG
},
1208 [SB_FRAG_SHADER
][1] = { SHADER_FRAGMENT
, SHADER_CONST
},
1211 *stage
= lookup
[state_block_id
][state_type
].stage
;
1212 *state
= lookup
[state_block_id
][state_type
].state
;
1213 unsigned state_src
= (dwords
[0] >> 16) & 0x7;
1214 if (state_src
== 0 /* SS_DIRECT */)
1215 *src
= STATE_SRC_DIRECT
;
1217 *src
= STATE_SRC_INDIRECT
;
1220 static enum state_src_t
1221 _get_state_src(unsigned dword0
)
1223 switch ((dword0
>> 16) & 0x3) {
1224 case 0: /* SS4_DIRECT / SS6_DIRECT */
1225 return STATE_SRC_DIRECT
;
1226 case 2: /* SS4_INDIRECT / SS6_INDIRECT */
1227 return STATE_SRC_INDIRECT
;
1228 case 1: /* SS6_BINDLESS */
1229 return STATE_SRC_BINDLESS
;
1231 return STATE_SRC_DIRECT
;
1236 _get_state_type(unsigned state_block_id
, unsigned state_type
,
1237 enum shader_t
*stage
, enum state_t
*state
)
1239 static const struct {
1240 enum shader_t stage
;
1242 } lookup
[0x10][0x4] = {
1244 [0x0][0] = { SHADER_VERTEX
, TEX_SAMP
},
1245 [0x0][1] = { SHADER_VERTEX
, TEX_CONST
},
1246 [0x0][2] = { SHADER_VERTEX
, UBO
},
1248 [0x1][0] = { SHADER_TCS
, TEX_SAMP
},
1249 [0x1][1] = { SHADER_TCS
, TEX_CONST
},
1250 [0x1][2] = { SHADER_TCS
, UBO
},
1252 [0x2][0] = { SHADER_TES
, TEX_SAMP
},
1253 [0x2][1] = { SHADER_TES
, TEX_CONST
},
1254 [0x2][2] = { SHADER_TES
, UBO
},
1256 [0x3][0] = { SHADER_GEOM
, TEX_SAMP
},
1257 [0x3][1] = { SHADER_GEOM
, TEX_CONST
},
1258 [0x3][2] = { SHADER_GEOM
, UBO
},
1260 [0x4][0] = { SHADER_FRAGMENT
, TEX_SAMP
},
1261 [0x4][1] = { SHADER_FRAGMENT
, TEX_CONST
},
1262 [0x4][2] = { SHADER_FRAGMENT
, UBO
},
1264 [0x5][0] = { SHADER_COMPUTE
, TEX_SAMP
},
1265 [0x5][1] = { SHADER_COMPUTE
, TEX_CONST
},
1266 [0x5][2] = { SHADER_COMPUTE
, UBO
},
1268 [0x8][0] = { SHADER_VERTEX
, SHADER_PROG
},
1269 [0x8][1] = { SHADER_VERTEX
, SHADER_CONST
},
1270 [0x8][2] = { SHADER_VERTEX
, UBO
},
1272 [0x9][0] = { SHADER_TCS
, SHADER_PROG
},
1273 [0x9][1] = { SHADER_TCS
, SHADER_CONST
},
1274 [0x9][2] = { SHADER_TCS
, UBO
},
1276 [0xa][0] = { SHADER_TES
, SHADER_PROG
},
1277 [0xa][1] = { SHADER_TES
, SHADER_CONST
},
1278 [0xa][2] = { SHADER_TES
, UBO
},
1280 [0xb][0] = { SHADER_GEOM
, SHADER_PROG
},
1281 [0xb][1] = { SHADER_GEOM
, SHADER_CONST
},
1282 [0xb][2] = { SHADER_GEOM
, UBO
},
1284 [0xc][0] = { SHADER_FRAGMENT
, SHADER_PROG
},
1285 [0xc][1] = { SHADER_FRAGMENT
, SHADER_CONST
},
1286 [0xc][2] = { SHADER_FRAGMENT
, UBO
},
1288 [0xd][0] = { SHADER_COMPUTE
, SHADER_PROG
},
1289 [0xd][1] = { SHADER_COMPUTE
, SHADER_CONST
},
1290 [0xd][2] = { SHADER_COMPUTE
, UBO
},
1291 [0xd][3] = { SHADER_COMPUTE
, SSBO_0
}, /* a6xx location */
1292 // SB4_SSBO (shared across all stages)
1293 [0xe][0] = { 0, SSBO_0
}, /* a5xx (and a4xx?) location */
1294 [0xe][1] = { 0, SSBO_1
},
1295 [0xe][2] = { 0, SSBO_2
},
1297 [0xf][0] = { SHADER_COMPUTE
, SSBO_0
},
1298 [0xf][1] = { SHADER_COMPUTE
, SSBO_1
},
1299 [0xf][2] = { SHADER_COMPUTE
, SSBO_2
},
1301 /* This looks like combined UBO state for 3d stages (a5xx and
1302 * before?? I think a6xx has UBO state per shader stage:
1304 [0x6][2] = { 0, UBO
},
1305 [0x7][1] = { 0, UNKNOWN_2DWORDS
},
1308 *stage
= lookup
[state_block_id
][state_type
].stage
;
1309 *state
= lookup
[state_block_id
][state_type
].state
;
1313 a4xx_get_state_type(uint32_t *dwords
, enum shader_t
*stage
, enum state_t
*state
,
1314 enum state_src_t
*src
)
1316 unsigned state_block_id
= (dwords
[0] >> 18) & 0xf;
1317 unsigned state_type
= dwords
[1] & 0x3;
1318 _get_state_type(state_block_id
, state_type
, stage
, state
);
1319 *src
= _get_state_src(dwords
[0]);
1323 a6xx_get_state_type(uint32_t *dwords
, enum shader_t
*stage
, enum state_t
*state
,
1324 enum state_src_t
*src
)
1326 unsigned state_block_id
= (dwords
[0] >> 18) & 0xf;
1327 unsigned state_type
= (dwords
[0] >> 14) & 0x3;
1328 _get_state_type(state_block_id
, state_type
, stage
, state
);
1329 *src
= _get_state_src(dwords
[0]);
1333 dump_tex_samp(uint32_t *texsamp
, enum state_src_t src
, int num_unit
, int level
)
1335 for (int i
= 0; i
< num_unit
; i
++) {
1336 /* work-around to reduce noise for opencl blob which always
1337 * writes the max # regardless of # of textures used
1339 if ((num_unit
== 16) && (texsamp
[0] == 0) && (texsamp
[1] == 0))
1342 if ((300 <= options
->gpu_id
) && (options
->gpu_id
< 400)) {
1343 dump_domain(texsamp
, 2, level
+2, "A3XX_TEX_SAMP");
1344 dump_hex(texsamp
, 2, level
+1);
1346 } else if ((400 <= options
->gpu_id
) && (options
->gpu_id
< 500)) {
1347 dump_domain(texsamp
, 2, level
+2, "A4XX_TEX_SAMP");
1348 dump_hex(texsamp
, 2, level
+1);
1350 } else if ((500 <= options
->gpu_id
) && (options
->gpu_id
< 600)) {
1351 dump_domain(texsamp
, 4, level
+2, "A5XX_TEX_SAMP");
1352 dump_hex(texsamp
, 4, level
+1);
1354 } else if ((600 <= options
->gpu_id
) && (options
->gpu_id
< 700)) {
1355 dump_domain(texsamp
, 4, level
+2, "A6XX_TEX_SAMP");
1356 dump_hex(texsamp
, 4, level
+1);
1357 texsamp
+= src
== STATE_SRC_BINDLESS
? 16 : 4;
1363 dump_tex_const(uint32_t *texconst
, int num_unit
, int level
)
1365 for (int i
= 0; i
< num_unit
; i
++) {
1366 /* work-around to reduce noise for opencl blob which always
1367 * writes the max # regardless of # of textures used
1369 if ((num_unit
== 16) &&
1370 (texconst
[0] == 0) && (texconst
[1] == 0) &&
1371 (texconst
[2] == 0) && (texconst
[3] == 0))
1374 if ((300 <= options
->gpu_id
) && (options
->gpu_id
< 400)) {
1375 dump_domain(texconst
, 4, level
+2, "A3XX_TEX_CONST");
1376 dump_hex(texconst
, 4, level
+1);
1378 } else if ((400 <= options
->gpu_id
) && (options
->gpu_id
< 500)) {
1379 dump_domain(texconst
, 8, level
+2, "A4XX_TEX_CONST");
1380 if (options
->dump_textures
) {
1381 uint32_t addr
= texconst
[4] & ~0x1f;
1382 dump_gpuaddr(addr
, level
-2);
1384 dump_hex(texconst
, 8, level
+1);
1386 } else if ((500 <= options
->gpu_id
) && (options
->gpu_id
< 600)) {
1387 dump_domain(texconst
, 12, level
+2, "A5XX_TEX_CONST");
1388 if (options
->dump_textures
) {
1389 uint64_t addr
= (((uint64_t)texconst
[5] & 0x1ffff) << 32) | texconst
[4];
1390 dump_gpuaddr_size(addr
, level
-2, hostlen(addr
) / 4, 3);
1392 dump_hex(texconst
, 12, level
+1);
1394 } else if ((600 <= options
->gpu_id
) && (options
->gpu_id
< 700)) {
1395 dump_domain(texconst
, 16, level
+2, "A6XX_TEX_CONST");
1396 if (options
->dump_textures
) {
1397 uint64_t addr
= (((uint64_t)texconst
[5] & 0x1ffff) << 32) | texconst
[4];
1398 dump_gpuaddr_size(addr
, level
-2, hostlen(addr
) / 4, 3);
1400 dump_hex(texconst
, 16, level
+1);
1407 cp_load_state(uint32_t *dwords
, uint32_t sizedwords
, int level
)
1409 enum shader_t stage
;
1411 enum state_src_t src
;
1412 uint32_t num_unit
= (dwords
[0] >> 22) & 0x1ff;
1413 uint64_t ext_src_addr
;
1417 if (quiet(2) && !options
->script
)
1420 if (options
->gpu_id
>= 600)
1421 a6xx_get_state_type(dwords
, &stage
, &state
, &src
);
1422 else if (options
->gpu_id
>= 400)
1423 a4xx_get_state_type(dwords
, &stage
, &state
, &src
);
1425 a3xx_get_state_type(dwords
, &stage
, &state
, &src
);
1428 case STATE_SRC_DIRECT
: ext_src_addr
= 0; break;
1429 case STATE_SRC_INDIRECT
:
1431 ext_src_addr
= dwords
[1] & 0xfffffffc;
1432 ext_src_addr
|= ((uint64_t)dwords
[2]) << 32;
1434 ext_src_addr
= dwords
[1] & 0xfffffffc;
1438 case STATE_SRC_BINDLESS
: {
1439 const unsigned base_reg
=
1440 stage
== SHADER_COMPUTE
? regbase("HLSQ_CS_BINDLESS_BASE[0]") : regbase("HLSQ_BINDLESS_BASE[0]");
1443 const unsigned reg
= base_reg
+ (dwords
[1] >> 28) * 2;
1444 ext_src_addr
= reg_val(reg
) & 0xfffffffc;
1445 ext_src_addr
|= ((uint64_t)reg_val(reg
+ 1)) << 32;
1447 const unsigned reg
= base_reg
+ (dwords
[1] >> 28);
1448 ext_src_addr
= reg_val(reg
) & 0xfffffffc;
1451 ext_src_addr
+= 4 * (dwords
[1] & 0xffffff);
1457 contents
= hostptr(ext_src_addr
);
1459 contents
= is_64b() ? dwords
+ 3 : dwords
+ 2;
1466 const char *ext
= NULL
;
1471 if (options
->gpu_id
>= 400)
1473 else if (options
->gpu_id
>= 300)
1478 * note: num_unit seems to be # of instruction groups, where
1479 * an instruction group has 4 64bit instructions.
1481 if (stage
== SHADER_VERTEX
) {
1483 } else if (stage
== SHADER_GEOM
) {
1485 } else if (stage
== SHADER_COMPUTE
) {
1487 } else if (stage
== SHADER_FRAGMENT
){
1492 disasm_a3xx(contents
, num_unit
* 2, level
+2, stdout
, options
->gpu_id
);
1494 /* dump raw shader: */
1496 dump_shader(ext
, contents
, num_unit
* 2 * 4);
1500 case SHADER_CONST
: {
1506 * note: num_unit seems to be # of pairs of dwords??
1509 if (options
->gpu_id
>= 400)
1512 dump_float(contents
, num_unit
*2, level
+1);
1513 dump_hex(contents
, num_unit
*2, level
+1);
1518 uint32_t *addrs
= contents
;
1523 /* mipmap consts block just appears to be array of num_unit gpu addr's: */
1524 for (i
= 0; i
< num_unit
; i
++) {
1525 void *ptr
= hostptr(addrs
[i
]);
1526 printf("%s%2d: %08x\n", levels
[level
+1], i
, addrs
[i
]);
1527 if (options
->dump_textures
) {
1528 printf("base=%08x\n", (uint32_t)gpubaseaddr(addrs
[i
]));
1529 dump_hex(ptr
, hostlen(addrs
[i
])/4, level
+1);
1535 dump_tex_samp(contents
, src
, num_unit
, level
);
1539 dump_tex_const(contents
, num_unit
, level
);
1543 uint32_t *ssboconst
= (uint32_t *)contents
;
1545 for (i
= 0; i
< num_unit
; i
++) {
1547 if (400 <= options
->gpu_id
&& options
->gpu_id
< 500) {
1548 dump_domain(ssboconst
, 4, level
+2, "A4XX_SSBO_0");
1549 } else if (500 <= options
->gpu_id
&& options
->gpu_id
< 600) {
1550 dump_domain(ssboconst
, 4, level
+2, "A5XX_SSBO_0");
1551 } else if (600 <= options
->gpu_id
&& options
->gpu_id
< 700) {
1553 dump_domain(ssboconst
, 16, level
+2, "A6XX_IBO");
1555 dump_hex(ssboconst
, sz
, level
+1);
1561 uint32_t *ssboconst
= (uint32_t *)contents
;
1563 for (i
= 0; i
< num_unit
; i
++) {
1564 if (400 <= options
->gpu_id
&& options
->gpu_id
< 500)
1565 dump_domain(ssboconst
, 2, level
+2, "A4XX_SSBO_1");
1566 else if (500 <= options
->gpu_id
&& options
->gpu_id
< 600)
1567 dump_domain(ssboconst
, 2, level
+2, "A5XX_SSBO_1");
1568 dump_hex(ssboconst
, 2, level
+1);
1574 uint32_t *ssboconst
= (uint32_t *)contents
;
1576 for (i
= 0; i
< num_unit
; i
++) {
1577 /* TODO a4xx and a5xx might be same: */
1578 if ((500 <= options
->gpu_id
) && (options
->gpu_id
< 600)) {
1579 dump_domain(ssboconst
, 2, level
+2, "A5XX_SSBO_2");
1580 dump_hex(ssboconst
, 2, level
+1);
1582 if (options
->dump_textures
) {
1583 uint64_t addr
= (((uint64_t)ssboconst
[1] & 0x1ffff) << 32) | ssboconst
[0];
1584 dump_gpuaddr_size(addr
, level
-2, hostlen(addr
) / 4, 3);
1591 uint32_t *uboconst
= (uint32_t *)contents
;
1593 for (i
= 0; i
< num_unit
; i
++) {
1594 // TODO probably similar on a4xx..
1595 if (500 <= options
->gpu_id
&& options
->gpu_id
< 600)
1596 dump_domain(uboconst
, 2, level
+2, "A5XX_UBO");
1597 else if (600 <= options
->gpu_id
&& options
->gpu_id
< 700)
1598 dump_domain(uboconst
, 2, level
+2, "A6XX_UBO");
1599 dump_hex(uboconst
, 2, level
+1);
1600 uboconst
+= src
== STATE_SRC_BINDLESS
? 16 : 2;
1604 case UNKNOWN_DWORDS
: {
1607 dump_hex(contents
, num_unit
, level
+1);
1610 case UNKNOWN_2DWORDS
: {
1613 dump_hex(contents
, num_unit
* 2, level
+1);
1616 case UNKNOWN_4DWORDS
: {
1619 dump_hex(contents
, num_unit
* 4, level
+1);
1626 dump_hex(contents
, num_unit
, level
+1);
1632 cp_set_bin(uint32_t *dwords
, uint32_t sizedwords
, int level
)
1634 bin_x1
= dwords
[1] & 0xffff;
1635 bin_y1
= dwords
[1] >> 16;
1636 bin_x2
= dwords
[2] & 0xffff;
1637 bin_y2
= dwords
[2] >> 16;
1641 dump_a2xx_tex_const(uint32_t *dwords
, uint32_t sizedwords
, uint32_t val
, int level
)
1644 uint32_t gpuaddr
, flags
, mip_gpuaddr
, mip_flags
;
1645 uint32_t min
, mag
, swiz
, clamp_x
, clamp_y
, clamp_z
;
1646 static const char *filter
[] = {
1647 "point", "bilinear", "bicubic",
1649 static const char *clamp
[] = {
1650 "wrap", "mirror", "clamp-last-texel",
1652 static const char swiznames
[] = "xyzw01??";
1654 /* see sys2gmem_tex_const[] in adreno_a2xxx.c */
1656 /* Texture, FormatXYZW=Unsigned, ClampXYZ=Wrap/Repeat,
1657 * RFMode=ZeroClamp-1, Dim=1:2d, pitch
1659 p
= (dwords
[0] >> 22) << 5;
1660 clamp_x
= (dwords
[0] >> 10) & 0x3;
1661 clamp_y
= (dwords
[0] >> 13) & 0x3;
1662 clamp_z
= (dwords
[0] >> 16) & 0x3;
1664 /* Format=6:8888_WZYX, EndianSwap=0:None, ReqSize=0:256bit, DimHi=0,
1665 * NearestClamp=1:OGL Mode
1667 parse_dword_addr(dwords
[1], &gpuaddr
, &flags
, 0xfff);
1669 /* Width, Height, EndianSwap=0:None */
1670 w
= (dwords
[2] & 0x1fff) + 1;
1671 h
= ((dwords
[2] >> 13) & 0x1fff) + 1;
1673 /* NumFormat=0:RF, DstSelXYZW=XYZW, ExpAdj=0, MagFilt=MinFilt=0:Point,
1676 mag
= (dwords
[3] >> 19) & 0x3;
1677 min
= (dwords
[3] >> 21) & 0x3;
1678 swiz
= (dwords
[3] >> 1) & 0xfff;
1680 /* VolMag=VolMin=0:Point, MinMipLvl=0, MaxMipLvl=1, LodBiasH=V=0,
1685 /* BorderColor=0:ABGRBlack, ForceBC=0:diable, TriJuice=0, Aniso=0,
1686 * Dim=1:2d, MipPacking=0
1688 parse_dword_addr(dwords
[5], &mip_gpuaddr
, &mip_flags
, 0xfff);
1690 printf("%sset texture const %04x\n", levels
[level
], val
);
1691 printf("%sclamp x/y/z: %s/%s/%s\n", levels
[level
+1],
1692 clamp
[clamp_x
], clamp
[clamp_y
], clamp
[clamp_z
]);
1693 printf("%sfilter min/mag: %s/%s\n", levels
[level
+1], filter
[min
], filter
[mag
]);
1694 printf("%sswizzle: %c%c%c%c\n", levels
[level
+1],
1695 swiznames
[(swiz
>> 0) & 0x7], swiznames
[(swiz
>> 3) & 0x7],
1696 swiznames
[(swiz
>> 6) & 0x7], swiznames
[(swiz
>> 9) & 0x7]);
1697 printf("%saddr=%08x (flags=%03x), size=%dx%d, pitch=%d, format=%s\n",
1698 levels
[level
+1], gpuaddr
, flags
, w
, h
, p
,
1699 rnn_enumname(rnn
, "a2xx_sq_surfaceformat", flags
& 0xf));
1700 printf("%smipaddr=%08x (flags=%03x)\n", levels
[level
+1],
1701 mip_gpuaddr
, mip_flags
);
1705 dump_a2xx_shader_const(uint32_t *dwords
, uint32_t sizedwords
, uint32_t val
, int level
)
1708 printf("%sset shader const %04x\n", levels
[level
], val
);
1709 for (i
= 0; i
< sizedwords
; ) {
1710 uint32_t gpuaddr
, flags
;
1711 parse_dword_addr(dwords
[i
++], &gpuaddr
, &flags
, 0xf);
1712 void *addr
= hostptr(gpuaddr
);
1715 rnn_enumname(rnn
, "a2xx_sq_surfaceformat", flags
& 0xf);
1716 uint32_t size
= dwords
[i
++];
1717 printf("%saddr=%08x, size=%d, format=%s\n", levels
[level
+1],
1718 gpuaddr
, size
, fmt
);
1719 // TODO maybe dump these as bytes instead of dwords?
1720 size
= (size
+ 3) / 4; // for now convert to dwords
1721 dump_hex(addr
, min(size
, 64), level
+ 1);
1722 if (size
> min(size
, 64))
1723 printf("%s\t\t...\n", levels
[level
+1]);
1724 dump_float(addr
, min(size
, 64), level
+ 1);
1725 if (size
> min(size
, 64))
1726 printf("%s\t\t...\n", levels
[level
+1]);
1732 cp_set_const(uint32_t *dwords
, uint32_t sizedwords
, int level
)
1734 uint32_t val
= dwords
[0] & 0xffff;
1735 switch((dwords
[0] >> 16) & 0xf) {
1737 dump_float((float *)(dwords
+1), sizedwords
-1, level
+1);
1740 /* need to figure out how const space is partitioned between
1741 * attributes, textures, etc..
1744 dump_a2xx_tex_const(dwords
+1, sizedwords
-1, val
, level
);
1746 dump_a2xx_shader_const(dwords
+1, sizedwords
-1, val
, level
);
1750 printf("%sset bool const %04x\n", levels
[level
], val
);
1753 printf("%sset loop const %04x\n", levels
[level
], val
);
1757 if (dwords
[0] & 0x80000000) {
1758 uint32_t srcreg
= dwords
[1];
1759 uint32_t dstval
= dwords
[2];
1761 /* TODO: not sure what happens w/ payload != 2.. */
1762 assert(sizedwords
== 3);
1763 assert(srcreg
< ARRAY_SIZE(type0_reg_vals
));
1765 /* note: rnn_regname uses a static buf so we can't do
1766 * two regname() calls for one printf..
1768 printf("%s%s = %08x + ", levels
[level
], regname(val
, 1), dstval
);
1769 printf("%s (%08x)\n", regname(srcreg
, 1), type0_reg_vals
[srcreg
]);
1771 dstval
+= type0_reg_vals
[srcreg
];
1773 dump_registers(val
, &dstval
, 1, level
+1);
1775 dump_registers(val
, dwords
+1, sizedwords
-1, level
+1);
1781 static void dump_register_summary(int level
);
1784 cp_event_write(uint32_t *dwords
, uint32_t sizedwords
, int level
)
1786 const char *name
= rnn_enumname(rnn
, "vgt_event_type", dwords
[0]);
1787 printl(2, "%sevent %s\n", levels
[level
], name
);
1789 if (name
&& (options
->gpu_id
> 500)) {
1791 snprintf(eventname
, sizeof(eventname
), "EVENT:%s", name
);
1792 if (!strcmp(name
, "BLIT")) {
1793 do_query(eventname
, 0);
1795 dump_register_summary(level
);
1801 dump_register_summary(int level
)
1804 bool saved_summary
= summary
;
1809 /* dump current state of registers: */
1810 printl(2, "%sdraw[%i] register values\n", levels
[level
], draw_count
);
1811 for (i
= 0; i
< regcnt(); i
++) {
1812 uint32_t regbase
= i
;
1813 uint32_t lastval
= reg_val(regbase
);
1814 /* skip registers that haven't been updated since last draw/blit: */
1815 if (!(options
->allregs
|| reg_rewritten(regbase
)))
1817 if (!reg_written(regbase
))
1819 if (lastval
!= lastvals
[regbase
]) {
1821 lastvals
[regbase
] = lastval
;
1825 if (reg_rewritten(regbase
)) {
1830 printl(2, "\t%08x", lastval
);
1832 dump_register(regbase
, lastval
, level
);
1841 summary
= saved_summary
;
1845 draw_indx_common(uint32_t *dwords
, int level
)
1847 uint32_t prim_type
= dwords
[1] & 0x1f;
1848 uint32_t source_select
= (dwords
[1] >> 6) & 0x3;
1849 uint32_t num_indices
= dwords
[2];
1850 const char *primtype
;
1852 primtype
= rnn_enumname(rnn
, "pc_di_primtype", prim_type
);
1854 do_query(primtype
, num_indices
);
1856 printl(2, "%sdraw: %d\n", levels
[level
], draws
[ib
]);
1857 printl(2, "%sprim_type: %s (%d)\n", levels
[level
], primtype
,
1859 printl(2, "%ssource_select: %s (%d)\n", levels
[level
],
1860 rnn_enumname(rnn
, "pc_di_src_sel", source_select
),
1862 printl(2, "%snum_indices: %d\n", levels
[level
], num_indices
);
1864 vertices
+= num_indices
;
1871 enum pc_di_index_size
{
1873 INDEX_SIZE_16_BIT
= 0,
1874 INDEX_SIZE_32_BIT
= 1,
1875 INDEX_SIZE_8_BIT
= 2,
1876 INDEX_SIZE_INVALID
= 0,
1880 cp_draw_indx(uint32_t *dwords
, uint32_t sizedwords
, int level
)
1882 uint32_t num_indices
= draw_indx_common(dwords
, level
);
1886 /* if we have an index buffer, dump that: */
1887 if (sizedwords
== 5) {
1888 void *ptr
= hostptr(dwords
[3]);
1889 printl(2, "%sgpuaddr: %08x\n", levels
[level
], dwords
[3]);
1890 printl(2, "%sidx_size: %d\n", levels
[level
], dwords
[4]);
1892 enum pc_di_index_size size
=
1893 ((dwords
[1] >> 11) & 1) | ((dwords
[1] >> 12) & 2);
1896 printf("%sidxs: ", levels
[level
]);
1897 if (size
== INDEX_SIZE_8_BIT
) {
1899 for (i
= 0; i
< dwords
[4]; i
++)
1900 printf(" %u", idx
[i
]);
1901 } else if (size
== INDEX_SIZE_16_BIT
) {
1902 uint16_t *idx
= ptr
;
1903 for (i
= 0; i
< dwords
[4]/2; i
++)
1904 printf(" %u", idx
[i
]);
1905 } else if (size
== INDEX_SIZE_32_BIT
) {
1906 uint32_t *idx
= ptr
;
1907 for (i
= 0; i
< dwords
[4]/4; i
++)
1908 printf(" %u", idx
[i
]);
1911 dump_hex(ptr
, dwords
[4]/4, level
+1);
1916 /* don't bother dumping registers for the dummy draw_indx's.. */
1917 if (num_indices
> 0)
1918 dump_register_summary(level
);
1924 cp_draw_indx_2(uint32_t *dwords
, uint32_t sizedwords
, int level
)
1926 uint32_t num_indices
= draw_indx_common(dwords
, level
);
1927 enum pc_di_index_size size
=
1928 ((dwords
[1] >> 11) & 1) | ((dwords
[1] >> 12) & 2);
1929 void *ptr
= &dwords
[3];
1934 /* CP_DRAW_INDX_2 has embedded/inline idx buffer: */
1937 printf("%sidxs: ", levels
[level
]);
1938 if (size
== INDEX_SIZE_8_BIT
) {
1940 for (i
= 0; i
< num_indices
; i
++)
1941 printf(" %u", idx
[i
]);
1943 } else if (size
== INDEX_SIZE_16_BIT
) {
1944 uint16_t *idx
= ptr
;
1945 for (i
= 0; i
< num_indices
; i
++)
1946 printf(" %u", idx
[i
]);
1947 sz
= num_indices
* 2;
1948 } else if (size
== INDEX_SIZE_32_BIT
) {
1949 uint32_t *idx
= ptr
;
1950 for (i
= 0; i
< num_indices
; i
++)
1951 printf(" %u", idx
[i
]);
1952 sz
= num_indices
* 4;
1955 dump_hex(ptr
, sz
/ 4, level
+1);
1958 /* don't bother dumping registers for the dummy draw_indx's.. */
1959 if (num_indices
> 0)
1960 dump_register_summary(level
);
1964 cp_draw_indx_offset(uint32_t *dwords
, uint32_t sizedwords
, int level
)
1966 uint32_t num_indices
= dwords
[2];
1967 uint32_t prim_type
= dwords
[0] & 0x1f;
1969 do_query(rnn_enumname(rnn
, "pc_di_primtype", prim_type
), num_indices
);
1972 /* don't bother dumping registers for the dummy draw_indx's.. */
1973 if (num_indices
> 0)
1974 dump_register_summary(level
);
1978 cp_draw_indx_indirect(uint32_t *dwords
, uint32_t sizedwords
, int level
)
1980 uint32_t prim_type
= dwords
[0] & 0x1f;
1983 do_query(rnn_enumname(rnn
, "pc_di_primtype", prim_type
), 0);
1987 addr
= (((uint64_t)dwords
[2] & 0x1ffff) << 32) | dwords
[1];
1990 dump_gpuaddr_size(addr
, level
, 0x10, 2);
1993 addr
= (((uint64_t)dwords
[5] & 0x1ffff) << 32) | dwords
[4];
1996 dump_gpuaddr_size(addr
, level
, 0x10, 2);
1998 dump_register_summary(level
);
2002 cp_draw_indirect(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2004 uint32_t prim_type
= dwords
[0] & 0x1f;
2007 do_query(rnn_enumname(rnn
, "pc_di_primtype", prim_type
), 0);
2010 addr
= (((uint64_t)dwords
[2] & 0x1ffff) << 32) | dwords
[1];
2011 dump_gpuaddr_size(addr
, level
, 0x10, 2);
2013 dump_register_summary(level
);
2017 cp_run_cl(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2019 do_query("COMPUTE", 1);
2020 dump_register_summary(level
);
2024 cp_nop(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2026 const char *buf
= (void *)dwords
;
2032 // blob doesn't use CP_NOP for string_marker but it does
2033 // use it for things that end up looking like, but aren't
2035 if (!options
->decode_markers
)
2038 for (i
= 0; i
< 4 * sizedwords
; i
++) {
2041 if (isascii(buf
[i
]))
2042 printf("%c", buf
[i
]);
2048 cp_indirect(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2050 /* traverse indirect buffers */
2053 uint32_t *ptr
= NULL
;
2056 /* a5xx+.. high 32b of gpu addr, then size: */
2058 ibaddr
|= ((uint64_t)dwords
[1]) << 32;
2067 printf("%sibaddr:%016lx\n", levels
[level
], ibaddr
);
2069 printf("%sibaddr:%08x\n", levels
[level
], (uint32_t)ibaddr
);
2071 printf("%sibsize:%08x\n", levels
[level
], ibsize
);
2074 if (options
->once
&& has_dumped(ibaddr
, enable_mask
))
2077 /* 'query-compare' mode implies 'once' mode, although we need only to
2078 * process the cmdstream for *any* enable_mask mode, since we are
2079 * comparing binning vs draw reg values at the same time, ie. it is
2080 * not useful to process the same draw in both binning and draw pass.
2082 if (options
->query_compare
&& has_dumped(ibaddr
, MODE_ALL
))
2085 /* map gpuaddr back to hostptr: */
2086 ptr
= hostptr(ibaddr
);
2089 /* If the GPU hung within the target IB, the trigger point will be
2090 * just after the current CP_INDIRECT_BUFFER. Because the IB is
2091 * executed but never returns. Account for this by checking if
2094 highlight_gpuaddr(gpuaddr(&dwords
[is_64b() ? 3 : 2]));
2097 ibs
[ib
].base
= ibaddr
;
2098 ibs
[ib
].size
= ibsize
;
2100 dump_commands(ptr
, ibsize
, level
);
2103 fprintf(stderr
, "could not find: %016"PRIx64
" (%d)\n", ibaddr
, ibsize
);
2108 cp_wfi(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2114 cp_mem_write(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2120 uint64_t gpuaddr
= dwords
[0] | (((uint64_t)dwords
[1]) << 32);
2121 printf("%sgpuaddr:%016lx\n", levels
[level
], gpuaddr
);
2122 dump_hex(&dwords
[2], sizedwords
-2, level
+1);
2124 if (pkt_is_type4(dwords
[2]) || pkt_is_type7(dwords
[2]))
2125 dump_commands(&dwords
[2], sizedwords
-2, level
+1);
2127 uint32_t gpuaddr
= dwords
[0];
2128 printf("%sgpuaddr:%08x\n", levels
[level
], gpuaddr
);
2129 dump_float((float *)&dwords
[1], sizedwords
-1, level
+1);
2134 cp_rmw(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2136 uint32_t val
= dwords
[0] & 0xffff;
2137 uint32_t and = dwords
[1];
2138 uint32_t or = dwords
[2];
2139 printl(3, "%srmw (%s & 0x%08x) | 0x%08x)\n", levels
[level
], regname(val
, 1), and, or);
2141 printl(2, "NEEDS WFI: rmw (%s & 0x%08x) | 0x%08x)\n", regname(val
, 1), and, or);
2142 reg_set(val
, (reg_val(val
) & and) | or);
2146 cp_reg_mem(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2148 uint32_t val
= dwords
[0] & 0xffff;
2149 printl(3, "%sbase register: %s\n", levels
[level
], regname(val
, 1));
2154 uint64_t gpuaddr
= dwords
[1] | (((uint64_t)dwords
[2]) << 32);
2155 printf("%sgpuaddr:%016lx\n", levels
[level
], gpuaddr
);
2156 void *ptr
= hostptr(gpuaddr
);
2158 uint32_t cnt
= (dwords
[0] >> 19) & 0x3ff;
2159 dump_hex(ptr
, cnt
, level
+ 1);
2164 uint16_t enable_mask
;
2170 struct draw_state state
[32];
2172 #define FLAG_DIRTY 0x1
2173 #define FLAG_DISABLE 0x2
2174 #define FLAG_DISABLE_ALL_GROUPS 0x4
2175 #define FLAG_LOAD_IMMED 0x8
2177 static int draw_mode
;
2180 disable_group(unsigned group_id
)
2182 struct draw_state
*ds
= &state
[group_id
];
2183 memset(ds
, 0, sizeof(*ds
));
2187 disable_all_groups(void)
2189 for (unsigned i
= 0; i
< ARRAY_SIZE(state
); i
++)
2194 load_group(unsigned group_id
, int level
)
2196 struct draw_state
*ds
= &state
[group_id
];
2201 printl(2, "%sgroup_id: %u\n", levels
[level
], group_id
);
2202 printl(2, "%scount: %d\n", levels
[level
], ds
->count
);
2203 printl(2, "%saddr: %016llx\n", levels
[level
], ds
->addr
);
2204 printl(2, "%sflags: %x\n", levels
[level
], ds
->flags
);
2206 if (options
->gpu_id
>= 600) {
2207 printl(2, "%senable_mask: 0x%x\n", levels
[level
], ds
->enable_mask
);
2209 if (!(ds
->enable_mask
& enable_mask
)) {
2210 printl(2, "%s\tskipped!\n\n", levels
[level
]);
2215 void *ptr
= hostptr(ds
->addr
);
2218 dump_hex(ptr
, ds
->count
, level
+1);
2221 dump_commands(ptr
, ds
->count
, level
+1);
2227 load_all_groups(int level
)
2229 /* sanity check, we should never recursively hit recursion here, and if
2230 * we do bad things happen:
2232 static bool loading_groups
= false;
2233 if (loading_groups
) {
2234 printf("ERROR: nothing in draw state should trigger recursively loading groups!\n");
2237 loading_groups
= true;
2238 for (unsigned i
= 0; i
< ARRAY_SIZE(state
); i
++)
2239 load_group(i
, level
);
2240 loading_groups
= false;
2242 /* in 'query-compare' mode, defer disabling all groups until we have a
2243 * chance to process the query:
2245 if (!options
->query_compare
)
2246 disable_all_groups();
2250 cp_set_draw_state(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2254 for (i
= 0; i
< sizedwords
; ) {
2255 struct draw_state
*ds
;
2256 uint32_t count
= dwords
[i
] & 0xffff;
2257 uint32_t group_id
= (dwords
[i
] >> 24) & 0x1f;
2258 uint32_t enable_mask
= (dwords
[i
] >> 20) & 0xf;
2259 uint32_t flags
= (dwords
[i
] >> 16) & 0xf;
2263 addr
= dwords
[i
+ 1];
2264 addr
|= ((uint64_t)dwords
[i
+ 2]) << 32;
2267 addr
= dwords
[i
+ 1];
2271 if (flags
& FLAG_DISABLE_ALL_GROUPS
) {
2272 disable_all_groups();
2276 if (flags
& FLAG_DISABLE
) {
2277 disable_group(group_id
);
2281 assert(group_id
< ARRAY_SIZE(state
));
2282 disable_group(group_id
);
2284 ds
= &state
[group_id
];
2286 ds
->enable_mask
= enable_mask
;
2291 if (flags
& FLAG_LOAD_IMMED
) {
2292 load_group(group_id
, level
);
2293 disable_group(group_id
);
2299 cp_set_mode(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2301 draw_mode
= dwords
[0];
2304 /* execute compute shader */
2306 cp_exec_cs(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2308 do_query("compute", 0);
2309 dump_register_summary(level
);
2313 cp_exec_cs_indirect(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2318 addr
= (((uint64_t)dwords
[2] & 0x1ffff) << 32) | dwords
[1];
2323 printl(3, "%saddr: %016llx\n", levels
[level
], addr
);
2324 dump_gpuaddr_size(addr
, level
, 0x10, 2);
2326 do_query("compute", 0);
2327 dump_register_summary(level
);
2331 cp_set_marker(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2333 render_mode
= rnn_enumname(rnn
, "a6xx_render_mode", dwords
[0] & 0xf);
2335 if (!strcmp(render_mode
, "RM6_BINNING")) {
2336 enable_mask
= MODE_BINNING
;
2337 } else if (!strcmp(render_mode
, "RM6_GMEM")) {
2338 enable_mask
= MODE_GMEM
;
2339 } else if (!strcmp(render_mode
, "RM6_BYPASS")) {
2340 enable_mask
= MODE_BYPASS
;
2345 cp_set_render_mode(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2352 /* TODO seems to have two ptrs, 9 dwords total (incl pkt7 hdr)..
2353 * not sure if this can come in different sizes.
2355 * First ptr doesn't seem to be cmdstream, second one does.
2357 * Comment from downstream kernel:
2359 * SRM -- set render mode (ex binning, direct render etc)
2360 * SRM is set by UMD usually at start of IB to tell CP the type of
2362 * KMD needs to set SRM to NULL to indicate CP that rendering is
2364 * ------------------------------------------------------------------
2366 * Seems to always be one of these two:
2367 * 70ec0008 00000001 001c0000 00000000 00000010 00000003 0000000d 001c2000 00000000
2368 * 70ec0008 00000001 001c0000 00000000 00000000 00000003 0000000d 001c2000 00000000
2372 assert(options
->gpu_id
>= 500);
2374 render_mode
= rnn_enumname(rnn
, "render_mode_cmd", dwords
[0]);
2376 if (sizedwords
== 1)
2380 addr
|= ((uint64_t)dwords
[2]) << 32;
2384 dump_gpuaddr(addr
, level
+1);
2386 if (sizedwords
== 5)
2389 assert(sizedwords
== 8);
2393 addr
|= ((uint64_t)dwords
[7]) << 32;
2395 printl(3, "%saddr: 0x%016lx\n", levels
[level
], addr
);
2396 printl(3, "%slen: 0x%x\n", levels
[level
], len
);
2398 ptr
= hostptr(addr
);
2403 dump_commands(ptr
, len
, level
+1);
2405 dump_hex(ptr
, len
, level
+1);
2411 cp_compute_checkpoint(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2417 assert(options
->gpu_id
>= 500);
2419 assert(sizedwords
== 8);
2422 addr
|= ((uint64_t)dwords
[6]) << 32;
2425 printl(3, "%saddr: 0x%016lx\n", levels
[level
], addr
);
2426 printl(3, "%slen: 0x%x\n", levels
[level
], len
);
2428 ptr
= hostptr(addr
);
2433 dump_commands(ptr
, len
, level
+1);
2435 dump_hex(ptr
, len
, level
+1);
2441 cp_blit(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2443 do_query(rnn_enumname(rnn
, "cp_blit_cmd", dwords
[0]), 0);
2445 dump_register_summary(level
);
2449 cp_context_reg_bunch(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2453 /* NOTE: seems to write same reg multiple times.. not sure if different parts of
2454 * these are triggered by the FLUSH_SO_n events?? (if that is what they actually
2457 bool saved_summary
= summary
;
2460 for (i
= 0; i
< sizedwords
; i
+= 2) {
2461 dump_register(dwords
[i
+0], dwords
[i
+1], level
+1);
2462 reg_set(dwords
[i
+0], dwords
[i
+1]);
2465 summary
= saved_summary
;
2469 cp_reg_write(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2471 uint32_t reg
= dwords
[1] & 0xffff;
2473 dump_register(reg
, dwords
[2], level
+1);
2474 reg_set(reg
, dwords
[2]);
2478 cp_set_ctxswitch_ib(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2481 uint32_t size
= dwords
[2] & 0xffff;
2484 addr
= dwords
[0] | ((uint64_t)dwords
[1] << 32);
2486 printf("addr=%lx\n", addr
);
2487 ptr
= hostptr(addr
);
2489 dump_commands(ptr
, size
, level
+1);
2494 cp_skip_ib2_enable_global(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2496 skip_ib2_enable_global
= dwords
[0];
2500 cp_skip_ib2_enable_local(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2502 skip_ib2_enable_local
= dwords
[0];
2505 #define CP(x, fxn, ...) { "CP_" #x, fxn, ##__VA_ARGS__ }
2506 static const struct type3_op
{
2508 void (*fxn
)(uint32_t *dwords
, uint32_t sizedwords
, int level
);
2510 bool load_all_groups
;
2514 CP(INDIRECT_BUFFER
, cp_indirect
),
2515 CP(INDIRECT_BUFFER_PFD
, cp_indirect
),
2516 CP(WAIT_FOR_IDLE
, cp_wfi
),
2517 CP(REG_RMW
, cp_rmw
),
2518 CP(REG_TO_MEM
, cp_reg_mem
),
2519 CP(MEM_TO_REG
, cp_reg_mem
), /* same layout as CP_REG_TO_MEM */
2520 CP(MEM_WRITE
, cp_mem_write
),
2521 CP(EVENT_WRITE
, cp_event_write
),
2522 CP(RUN_OPENCL
, cp_run_cl
),
2523 CP(DRAW_INDX
, cp_draw_indx
, {.load_all_groups
=true}),
2524 CP(DRAW_INDX_2
, cp_draw_indx_2
, {.load_all_groups
=true}),
2525 CP(SET_CONSTANT
, cp_set_const
),
2526 CP(IM_LOAD_IMMEDIATE
, cp_im_loadi
),
2527 CP(WIDE_REG_WRITE
, cp_wide_reg_write
),
2530 CP(LOAD_STATE
, cp_load_state
),
2531 CP(SET_BIN
, cp_set_bin
),
2534 CP(LOAD_STATE4
, cp_load_state
),
2535 CP(SET_DRAW_STATE
, cp_set_draw_state
),
2536 CP(DRAW_INDX_OFFSET
, cp_draw_indx_offset
, {.load_all_groups
=true}),
2537 CP(EXEC_CS
, cp_exec_cs
, {.load_all_groups
=true}),
2538 CP(EXEC_CS_INDIRECT
, cp_exec_cs_indirect
, {.load_all_groups
=true}),
2541 CP(SET_RENDER_MODE
, cp_set_render_mode
),
2542 CP(COMPUTE_CHECKPOINT
, cp_compute_checkpoint
),
2544 CP(CONTEXT_REG_BUNCH
, cp_context_reg_bunch
),
2545 CP(DRAW_INDIRECT
, cp_draw_indirect
, {.load_all_groups
=true}),
2546 CP(DRAW_INDX_INDIRECT
, cp_draw_indx_indirect
, {.load_all_groups
=true}),
2547 CP(SKIP_IB2_ENABLE_GLOBAL
, cp_skip_ib2_enable_global
),
2548 CP(SKIP_IB2_ENABLE_LOCAL
, cp_skip_ib2_enable_local
),
2551 CP(LOAD_STATE6_GEOM
, cp_load_state
),
2552 CP(LOAD_STATE6_FRAG
, cp_load_state
),
2553 CP(LOAD_STATE6
, cp_load_state
),
2554 CP(SET_MODE
, cp_set_mode
),
2555 CP(SET_MARKER
, cp_set_marker
),
2556 CP(REG_WRITE
, cp_reg_write
),
2558 CP(SET_CTXSWITCH_IB
, cp_set_ctxswitch_ib
),
2562 noop_fxn(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2566 static const struct type3_op
*
2567 get_type3_op(unsigned opc
)
2569 static const struct type3_op dummy_op
= {
2572 const char *name
= pktname(opc
);
2577 for (unsigned i
= 0; i
< ARRAY_SIZE(type3_op
); i
++)
2578 if (!strcmp(name
, type3_op
[i
].name
))
2579 return &type3_op
[i
];
2585 dump_commands(uint32_t *dwords
, uint32_t sizedwords
, int level
)
2587 int dwords_left
= sizedwords
;
2588 uint32_t count
= 0; /* dword count including packet header */
2593 printf("NULL cmd buffer!\n");
2599 while (dwords_left
> 0) {
2601 current_draw_count
= draw_count
;
2603 /* hack, this looks like a -1 underflow, in some versions
2604 * when it tries to write zero registers via pkt0
2606 // if ((dwords[0] >> 16) == 0xffff)
2609 if (pkt_is_type0(dwords
[0])) {
2611 count
= type0_pkt_size(dwords
[0]) + 1;
2612 val
= type0_pkt_offset(dwords
[0]);
2613 assert(val
< regcnt());
2614 printl(3, "%swrite %s%s (%04x)\n", levels
[level
+1], regname(val
, 1),
2615 (dwords
[0] & 0x8000) ? " (same register)" : "", val
);
2616 dump_registers(val
, dwords
+1, count
-1, level
+2);
2618 dump_hex(dwords
, count
, level
+1);
2619 } else if (pkt_is_type4(dwords
[0])) {
2620 /* basically the same(ish) as type0 prior to a5xx */
2622 count
= type4_pkt_size(dwords
[0]) + 1;
2623 val
= type4_pkt_offset(dwords
[0]);
2624 assert(val
< regcnt());
2625 printl(3, "%swrite %s (%04x)\n", levels
[level
+1], regname(val
, 1), val
);
2626 dump_registers(val
, dwords
+1, count
-1, level
+2);
2628 dump_hex(dwords
, count
, level
+1);
2630 } else if (pkt_is_type1(dwords
[0])) {
2633 val
= dwords
[0] & 0xfff;
2634 printl(3, "%swrite %s\n", levels
[level
+1], regname(val
, 1));
2635 dump_registers(val
, dwords
+1, 1, level
+2);
2636 val
= (dwords
[0] >> 12) & 0xfff;
2637 printl(3, "%swrite %s\n", levels
[level
+1], regname(val
, 1));
2638 dump_registers(val
, dwords
+2, 1, level
+2);
2640 dump_hex(dwords
, count
, level
+1);
2641 } else if (pkt_is_type2(dwords
[0])) {
2643 printf("%sNOP\n", levels
[level
+1]);
2646 dump_hex(dwords
, count
, level
+1);
2648 } else if (pkt_is_type3(dwords
[0])) {
2649 count
= type3_pkt_size(dwords
[0]) + 1;
2650 val
= cp_type3_opcode(dwords
[0]);
2651 const struct type3_op
*op
= get_type3_op(val
);
2652 if (op
->options
.load_all_groups
)
2653 load_all_groups(level
+1);
2655 const char *name
= pktname(val
);
2657 printf("\t%sopcode: %s%s%s (%02x) (%d dwords)%s\n", levels
[level
],
2658 rnn
->vc
->colors
->bctarg
, name
, rnn
->vc
->colors
->reset
,
2659 val
, count
, (dwords
[0] & 0x1) ? " (predicated)" : "");
2662 dump_domain(dwords
+1, count
-1, level
+2, name
);
2663 op
->fxn(dwords
+1, count
-1, level
+1);
2665 dump_hex(dwords
, count
, level
+1);
2666 } else if (pkt_is_type7(dwords
[0])) {
2667 count
= type7_pkt_size(dwords
[0]) + 1;
2668 val
= cp_type7_opcode(dwords
[0]);
2669 const struct type3_op
*op
= get_type3_op(val
);
2670 if (op
->options
.load_all_groups
)
2671 load_all_groups(level
+1);
2673 const char *name
= pktname(val
);
2675 printf("\t%sopcode: %s%s%s (%02x) (%d dwords)\n", levels
[level
],
2676 rnn
->vc
->colors
->bctarg
, name
, rnn
->vc
->colors
->reset
,
2680 /* special hack for two packets that decode the same way
2683 if (!strcmp(name
, "CP_LOAD_STATE6_FRAG") ||
2684 !strcmp(name
, "CP_LOAD_STATE6_GEOM"))
2685 name
= "CP_LOAD_STATE6";
2686 dump_domain(dwords
+1, count
-1, level
+2, name
);
2688 op
->fxn(dwords
+1, count
-1, level
+1);
2690 dump_hex(dwords
, count
, level
+1);
2691 } else if (pkt_is_type2(dwords
[0])) {
2693 printl(3, "%snop\n", levels
[level
+1]);
2695 /* for 5xx+ we can do a passable job of looking for start of next valid packet: */
2696 if (options
->gpu_id
>= 500) {
2697 while (dwords_left
> 0) {
2698 if (pkt_is_type7(dwords
[0]) || pkt_is_type4(dwords
[0]))
2700 printf("bad type! %08x\n", dwords
[0]);
2705 printf("bad type! %08x\n", dwords
[0]);
2711 dwords_left
-= count
;
2715 if (dwords_left
< 0)
2716 printf("**** this ain't right!! dwords_left=%d\n", dwords_left
);