2 * Copyright © 2020 Google, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 * Decoder for devcoredump traces from drm/msm. In case of a gpu crash/hang,
26 * the coredump should be found in:
28 * /sys/class/devcoredump/devcd<n>/data
30 * The crashdump will hang around for 5min, it can be cleared by writing to
33 * echo 1 > /sys/class/devcoredump/devcd<n>/data
35 * (the driver won't log any new crashdumps until the previous one is cleared
36 * or times out after 5min)
58 #include "ir3/instr-a3xx.h"
64 static struct rnn
*rnn_gmu
;
65 static struct rnn
*rnn_control
;
66 static struct rnn
*rnn_pipe
;
68 static struct cffdec_options options
= {
72 static inline bool is_a6xx(void) { return (600 <= options
.gpu_id
) && (options
.gpu_id
< 700); }
73 static inline bool is_a5xx(void) { return (500 <= options
.gpu_id
) && (options
.gpu_id
< 600); }
74 static inline bool is_64b(void) { return options
.gpu_id
>= 500; }
77 * Helpers to read register values:
80 /* read registers that are 64b on 64b GPUs (ie. a5xx+) */
82 regval64(const char *name
)
84 unsigned reg
= regbase(name
);
86 uint64_t val
= reg_val(reg
);
88 val
|= ((uint64_t)reg_val(reg
+ 1)) << 32;
93 regval(const char *name
)
95 unsigned reg
= regbase(name
);
101 * Line reading and string helpers:
105 replacestr(char *line
, const char *find
, const char *replace
)
109 if (!(s
= strstr(line
, find
)))
112 tail
= s
+ strlen(find
);
115 asprintf(&newline
, "%.*s%s%s", (int)(s
- line
), line
, replace
, tail
);
121 static char *lastline
;
122 static char *pushedline
;
127 char *r
= pushedline
;
137 if (getline(&r
, &n
, in
) < 0)
140 /* Handle section name typo's from earlier kernels: */
141 r
= replacestr(r
, "CP_MEMPOOOL", "CP_MEMPOOL");
142 r
= replacestr(r
, "CP_SEQ_STAT", "CP_SQE_STAT");
152 pushedline
= lastline
;
156 popline_ascii85(uint32_t sizedwords
)
158 const char *line
= popline();
160 /* At this point we exepct the ascii85 data to be indented *some*
161 * amount, and to terminate at the end of the line. So just eat
162 * up the leading whitespace.
164 assert(*line
== ' ');
168 uint32_t *buf
= calloc(1, 4 * sizedwords
);
171 while (*line
!= '\n') {
179 for (int i
= 0; (i
< 5) && (*line
!= '\n'); i
++) {
181 accum
+= *line
- '!';
192 startswith(const char *line
, const char *start
)
194 return strstr(line
, start
) == line
;
198 parseline(const char *line
, const char *fmt
, ...)
200 int fmtlen
= strlen(fmt
);
204 /* scan fmt string to extract expected # of conversions: */
205 for (int i
= 0; i
< fmtlen
; i
++) {
207 if (i
== (l
- 1)) { /* prev char was %, ie. we have %% */
219 if (vsscanf(line
, fmt
, ap
) != n
) {
220 fprintf(stderr
, "parse error scanning: '%s'\n", fmt
);
226 #define foreach_line_in_section(_line) \
227 for (const char *_line = popline(); _line; _line = popline()) \
228 /* check for start of next section */ \
229 if (_line[0] != ' ') { \
235 * Provide our own disasm assert() handler, so that we can recover
236 * after attempting to disassemble things that might not be valid
240 static bool jmp_env_valid
;
241 static jmp_buf jmp_env
;
244 ir3_assert_handler(const char *expr
, const char *file
, int line
,
247 printf("\n%s:%u: %s: Assertion `%s' failed.\n", file
, line
, func
, expr
);
253 #define TRY(x) do { \
254 assert(!jmp_env_valid); \
255 if (setjmp(jmp_env) == 0) { \
256 jmp_env_valid = true; \
259 jmp_env_valid = false; \
263 * Decode ringbuffer section:
275 decode_ringbuffer(void)
279 foreach_line_in_section (line
) {
280 if (startswith(line
, " - id:")) {
281 parseline(line
, " - id: %d", &id
);
282 assert(id
< ARRAY_SIZE(ringbuffers
));
283 } else if (startswith(line
, " iova:")) {
284 parseline(line
, " iova: %"PRIx64
, &ringbuffers
[id
].iova
);
285 } else if (startswith(line
, " rptr:")) {
286 parseline(line
, " rptr: %d", &ringbuffers
[id
].rptr
);
287 } else if (startswith(line
, " wptr:")) {
288 parseline(line
, " wptr: %d", &ringbuffers
[id
].wptr
);
289 } else if (startswith(line
, " size:")) {
290 parseline(line
, " size: %d", &ringbuffers
[id
].size
);
291 } else if (startswith(line
, " data: !!ascii85 |")) {
292 ringbuffers
[id
].buf
= popline_ascii85(ringbuffers
[id
].size
/ 4);
293 add_buffer(ringbuffers
[id
].iova
, ringbuffers
[id
].size
, ringbuffers
[id
].buf
);
302 valid_header(uint32_t pkt
)
304 if (options
.gpu_id
>= 500) {
305 return pkt_is_type4(pkt
) || pkt_is_type7(pkt
);
307 /* TODO maybe we can check validish looking pkt3 opc or pkt0
308 * register offset.. the cmds sent by kernel are usually
309 * fairly limited (other than initialization) which confines
310 * the search space a bit..
319 uint64_t rb_base
= regval64("CP_RB_BASE");
321 printf("got rb_base=%"PRIx64
"\n", rb_base
);
323 options
.ibs
[1].base
= regval64("CP_IB1_BASE");
324 options
.ibs
[1].rem
= regval("CP_IB1_REM_SIZE");
325 options
.ibs
[2].base
= regval64("CP_IB2_BASE");
326 options
.ibs
[2].rem
= regval("CP_IB2_REM_SIZE");
328 /* Adjust remaining size to account for cmdstream slurped into ROQ
329 * but not yet consumed by SQE
331 * TODO add support for earlier GPUs once we tease out the needed
332 * registers.. see crashit.c in msmtest for hints.
334 * TODO it would be nice to be able to extract out register bitfields
335 * by name rather than hard-coding this.
338 options
.ibs
[1].rem
+= regval("CP_CSQ_IB1_STAT") >> 16;
339 options
.ibs
[2].rem
+= regval("CP_CSQ_IB2_STAT") >> 16;
342 printf("IB1: %"PRIx64
", %u\n", options
.ibs
[1].base
, options
.ibs
[1].rem
);
343 printf("IB2: %"PRIx64
", %u\n", options
.ibs
[2].base
, options
.ibs
[2].rem
);
345 /* now that we've got the regvals we want, reset register state
346 * so we aren't seeing values from decode_registers();
350 for (int id
= 0; id
< ARRAY_SIZE(ringbuffers
); id
++) {
351 if (ringbuffers
[id
].iova
!= rb_base
)
353 if (!ringbuffers
[id
].size
)
356 printf("found ring!\n");
358 /* The kernel level ringbuffer (RB) wraps around, which
359 * cffdec doesn't really deal with.. so figure out how
360 * many dwords are unread
362 unsigned ringszdw
= ringbuffers
[id
].size
>> 2; /* in dwords */
364 /* helper macro to deal with modulo size math: */
365 #define mod_add(b, v) ((ringszdw + (int)(b) + (int)(v)) % ringszdw)
367 /* The rptr will (most likely) have moved past the IB to
368 * userspace cmdstream, so back up a bit, and then advance
369 * until we find a valid start of a packet.. this is going
370 * to be less reliable on a4xx and before (pkt0/pkt3),
371 * compared to pkt4/pkt7 with parity bits
373 const int lookback
= 12;
374 unsigned rptr
= mod_add(ringbuffers
[id
].rptr
, -lookback
);
376 for (int idx
= 0; idx
< lookback
; idx
++) {
377 if (valid_header(ringbuffers
[id
].buf
[rptr
]))
379 rptr
= mod_add(rptr
, 1);
382 unsigned cmdszdw
= mod_add(ringbuffers
[id
].wptr
, -rptr
);
384 printf("got cmdszdw=%d\n", cmdszdw
);
385 uint32_t *buf
= malloc(cmdszdw
* 4);
387 for (int idx
= 0; idx
< cmdszdw
; idx
++) {
388 int p
= mod_add(rptr
, idx
);
389 buf
[idx
] = ringbuffers
[id
].buf
[p
];
392 dump_commands(buf
, cmdszdw
, 0);
398 * Decode 'bos' (buffers) section:
407 foreach_line_in_section (line
) {
408 if (startswith(line
, " - iova:")) {
409 parseline(line
, " - iova: %"PRIx64
, &iova
);
410 } else if (startswith(line
, " size:")) {
411 parseline(line
, " size: %u", &size
);
412 } else if (startswith(line
, " data: !!ascii85 |")) {
413 uint32_t *buf
= popline_ascii85(size
/ 4);
416 dump_hex_ascii(buf
, size
, 1);
418 add_buffer(iova
, size
, buf
);
428 * Decode registers section:
432 dump_register(struct rnn
*rnn
, uint32_t offset
, uint32_t value
)
434 struct rnndecaddrinfo
*info
= rnn_reginfo(rnn
, offset
);
435 if (info
&& info
->typeinfo
) {
436 char *decoded
= rnndec_decodeval(rnn
->vc
, info
->typeinfo
, value
);
437 printf("%s: %s\n", info
->name
, decoded
);
439 printf("%s: %08x\n", info
->name
, value
);
441 printf("<%04x>: %08x\n", offset
, value
);
446 decode_gmu_registers(void)
448 foreach_line_in_section (line
) {
449 uint32_t offset
, value
;
450 parseline(line
, " - { offset: %x, value: %x }", &offset
, &value
);
452 printf("\t%08x\t", value
);
453 dump_register(rnn_gmu
, offset
/4, value
);
458 decode_registers(void)
460 foreach_line_in_section (line
) {
461 uint32_t offset
, value
;
462 parseline(line
, " - { offset: %x, value: %x }", &offset
, &value
);
464 reg_set(offset
/4, value
);
465 printf("\t%08x", value
);
466 dump_register_val(offset
/4, value
, 0);
470 /* similar to registers section, but for banked context regs: */
472 decode_clusters(void)
474 foreach_line_in_section (line
) {
475 if (startswith(line
, " - cluster-name:") ||
476 startswith(line
, " - context:")) {
481 uint32_t offset
, value
;
482 parseline(line
, " - { offset: %x, value: %x }", &offset
, &value
);
484 printf("\t%08x", value
);
485 dump_register_val(offset
/4, value
, 0);
490 * Decode indexed-registers.. these aren't like normal registers, but a
491 * sort of FIFO where successive reads pop out associated debug state.
495 dump_cp_sqe_stat(uint32_t *stat
)
497 printf("\t PC: %04x\n", stat
[0]);
500 if (is_a6xx() && valid_header(stat
[0])) {
501 if (pkt_is_type7(stat
[0])) {
502 unsigned opc
= cp_type7_opcode(stat
[0]);
503 const char *name
= pktname(opc
);
505 printf("\tPKT: %s\n", name
);
507 /* Not sure if this case can happen: */
511 for (int i
= 0; i
< 16; i
++) {
512 printf("\t$%02x: %08x\t\t$%02x: %08x\n",
513 i
+ 1, stat
[i
], i
+ 16 + 1, stat
[i
+ 16]);
518 dump_control_regs(uint32_t *regs
)
523 /* Control regs 0x100-0x17f are a scratch space to be used by the
524 * firmware however it wants, unlike lower regs which involve some
525 * fixed-function units. Therefore only these registers get dumped
528 for (uint32_t i
= 0; i
< 0x80; i
++) {
529 printf("\t%08x\t", regs
[i
]);
530 dump_register(rnn_control
, i
+ 0x100, regs
[i
]);
535 dump_cp_ucode_dbg(uint32_t *dbg
)
537 /* Notes on the data:
538 * There seems to be a section every 4096 DWORD's. The sections aren't
539 * all the same size, so the rest of the 4096 DWORD's are filled with
540 * mirrors of the actual data.
543 for (int section
= 0; section
< 6; section
++, dbg
+= 0x1000) {
546 /* Contains scattered data from a630_sqe.fw: */
547 printf("\tSQE instruction cache:\n");
548 dump_hex_ascii(dbg
, 4 * 0x400, 1);
551 printf("\tUnknown 1:\n");
552 dump_hex_ascii(dbg
, 4 * 0x80, 1);
555 printf("\tUnknown 2:\n");
556 dump_hex_ascii(dbg
, 4 * 0x200, 1);
559 printf("\tUnknown 3:\n");
560 dump_hex_ascii(dbg
, 4 * 0x80, 1);
563 /* Don't bother printing this normally */
565 printf("\tSQE packet jumptable contents:\n");
566 dump_hex_ascii(dbg
, 4 * 0x80, 1);
570 printf("\tSQE scratch control regs:\n");
571 dump_control_regs(dbg
);
578 dump_mem_pool_reg_write(unsigned reg
, uint32_t data
, unsigned context
, bool pipe
)
581 struct rnndecaddrinfo
*info
= rnn_reginfo(rnn_pipe
, reg
);
582 printf("\t\twrite %s (%02x) pipe\n", info
->name
, reg
);
584 if (!strcmp(info
->typeinfo
->name
, "void")) {
585 /* registers that ignore their payload */
588 dump_register(rnn_pipe
, reg
, data
);
591 printf("\t\twrite %s (%05x) context %d\n", regname(reg
, 1), reg
, context
);
592 dump_register_val(reg
, data
, 2);
597 dump_mem_pool_chunk(const uint32_t *chunk
)
599 struct __attribute__((packed
)) {
600 bool reg0_enabled
: 1;
601 bool reg1_enabled
: 1;
608 uint32_t reg0_context
: 1;
609 uint32_t reg1_context
: 1;
610 uint32_t padding
: 22;
613 memcpy(&fields
, chunk
, 4 * sizeof(uint32_t));
615 if (fields
.reg0_enabled
) {
616 dump_mem_pool_reg_write(fields
.reg0
, fields
.data0
, fields
.reg0_context
, fields
.reg0_pipe
);
619 if (fields
.reg1_enabled
) {
620 dump_mem_pool_reg_write(fields
.reg1
, fields
.data1
, fields
.reg1_context
, fields
.reg1_pipe
);
625 dump_cp_mem_pool(uint32_t *mempool
)
627 /* The mem pool is a shared pool of memory used for storing in-flight
628 * register writes. There are 6 different queues, one for each
629 * cluster. Writing to $data (or for some special registers, $addr)
630 * pushes data onto the appropriate queue, and each queue is pulled
631 * from by the appropriate cluster. The queues are thus written to
632 * in-order, but may be read out-of-order.
634 * The queues are conceptually divided into 128-bit "chunks", and the
635 * read and write pointers are in units of chunks. These chunks are
636 * organized internally into 8-chunk "blocks", and memory is allocated
637 * dynamically in terms of blocks. Each queue is represented as a
638 * singly-linked list of blocks, as well as 3-bit start/end chunk
639 * pointers that point within the first/last block. The next pointers
640 * are located in a separate array, rather than inline.
643 /* TODO: The firmware CP_MEM_POOL save/restore routines do something
646 * cread $02, [ $00 + 0 ]
647 * and $02, $02, 0x118
649 * brne $02, 0, #label
655 * I think that control register 0 is the GPU version, and some
656 * versions have a smaller mem pool. It seems some models have a mem
657 * pool that's half the size, and a bunch of offsets are shifted
658 * accordingly. Unfortunately the kernel driver's dumping code doesn't
659 * seem to take this into account, even the downstream android driver,
660 * and we don't know which versions 0x8, 0x10, or 0x100 correspond
661 * to. Or maybe we can use CP_DBG_MEM_POOL_SIZE to figure this out?
663 bool small_mem_pool
= false;
665 /* The array of next pointers for each block. */
666 const uint32_t *next_pointers
= small_mem_pool
? &mempool
[0x800] : &mempool
[0x1000];
668 /* Maximum number of blocks in the pool, also the size of the pointers
671 const int num_blocks
= small_mem_pool
? 0x30 : 0x80;
673 /* Number of queues */
674 const unsigned num_queues
= 6;
676 /* Unfortunately the per-queue state is a little more complicated than
677 * a simple pair of begin/end pointers. Instead of a single beginning
678 * block, there are *two*, with the property that either the two are
679 * equal or the second is the "next" of the first. Similarly there are
680 * two end blocks. Thus the queue either looks like this:
682 * A -> B -> ... -> C -> D
684 * Or like this, or some combination:
688 * However, there's only one beginning/end chunk offset. Now the
689 * question is, which of A or B is the actual start? I.e. is the chunk
690 * offset an offset inside A or B? It depends. I'll show a typical read
691 * cycle, starting here (read pointer marked with a *) with a chunk
695 * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
696 * |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_| -> |_|_|_|_|_|_|_|_|
698 * Once the pointer advances far enough, the hardware decides to free
699 * A, after which the read-side state looks like:
702 * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
703 * |_|_|_|_|_|_|_|_| |_|_|_|*|_|_|_|_| -> |_|_|_|_|_|_|_|_|
705 * Then after advancing the pointer a bit more, the hardware fetches
706 * the "next" pointer for A and stores it in B:
709 * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
710 * |_|_|_|_|_|_|_|_| |_|_|_|_|_|_|_|*| -> |_|_|_|_|_|_|_|_|
712 * Then the read pointer advances into B, at which point we've come
713 * back to the first state having advanced a whole block:
716 * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
717 * |_|_|_|_|_|_|_|_| |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_|
720 * There is a similar cycle for the write pointer. Now, the question
721 * is, how do we know which state we're in? We need to know this to
722 * know whether the pointer (*) is in A or B if they're different. It
723 * seems like there should be some bit somewhere describing this, but
724 * after lots of experimentation I've come up empty-handed. For now we
725 * assume that if the pointer is in the first half, then we're in
726 * either the first or second state and use B, and otherwise we're in
727 * the second or third state and use A. So far I haven't seen anything
728 * that violates this assumption.
733 uint32_t padding0
[7]; /* Mirrors of unk0 */
737 uint32_t first_block
: 32 - 3;
739 uint32_t padding1
[2]; /* Mirrors of writer[4], writer[5] */
742 uint32_t padding2
[7]; /* Mirrors of unk1 */
744 uint32_t writer_second_block
[6];
745 uint32_t padding3
[2];
748 uint32_t padding4
[2];
752 uint32_t first_block
: 32 - 3;
754 uint32_t padding5
[2]; /* Mirrors of reader[4], reader[5] */
757 uint32_t padding6
[7]; /* Mirrors of unk3 */
759 uint32_t reader_second_block
[6];
760 uint32_t padding7
[2];
762 uint32_t block_count
[6];
766 uint32_t padding9
[7]; /* Mirrors of unk4 */
769 const uint32_t *data1_ptr
= small_mem_pool
? &mempool
[0xc00] : &mempool
[0x1800];
770 memcpy(&data1
, data1_ptr
, sizeof(data1
));
772 /* Based on the kernel, the first dword is the mem pool size (in
773 * blocks?) and mirrors CP_MEM_POOL_DBG_SIZE.
775 const uint32_t *data2_ptr
= small_mem_pool
? &mempool
[0x1000] : &mempool
[0x2000];
776 const int data2_size
= 0x60;
778 /* This seems to be the size of each queue in chunks. */
779 const uint32_t *queue_sizes
= &data2_ptr
[0x18];
781 printf("\tdata2:\n");
782 dump_hex_ascii(data2_ptr
, 4 * data2_size
, 1);
784 /* These seem to be some kind of counter of allocated/deallocated blocks */
786 printf("\tunk0: %x\n", data1
.unk0
);
787 printf("\tunk1: %x\n", data1
.unk1
);
788 printf("\tunk3: %x\n", data1
.unk3
);
789 printf("\tunk4: %x\n\n", data1
.unk4
);
792 for (int queue
= 0; queue
< num_queues
; queue
++) {
793 const char *cluster_names
[6] = {
794 "FE", "SP_VS", "PC_VS", "GRAS", "SP_PS", "PS"
796 printf("\tCLUSTER_%s:\n\n", cluster_names
[queue
]);
799 printf("\t\twriter_first_block: 0x%x\n", data1
.writer
[queue
].first_block
);
800 printf("\t\twriter_second_block: 0x%x\n", data1
.writer_second_block
[queue
]);
801 printf("\t\twriter_chunk: %d\n", data1
.writer
[queue
].chunk
);
802 printf("\t\treader_first_block: 0x%x\n", data1
.reader
[queue
].first_block
);
803 printf("\t\treader_second_block: 0x%x\n", data1
.reader_second_block
[queue
]);
804 printf("\t\treader_chunk: %d\n", data1
.reader
[queue
].chunk
);
805 printf("\t\tblock_count: %d\n", data1
.block_count
[queue
]);
806 printf("\t\tunk2: 0x%x\n", data1
.unk2
[queue
]);
807 printf("\t\tqueue_size: %d\n\n", queue_sizes
[queue
]);
810 uint32_t cur_chunk
= data1
.reader
[queue
].chunk
;
811 uint32_t cur_block
= cur_chunk
> 3 ?
812 data1
.reader
[queue
].first_block
:
813 data1
.reader_second_block
[queue
];
814 uint32_t last_chunk
= data1
.writer
[queue
].chunk
;
815 uint32_t last_block
= last_chunk
> 3 ?
816 data1
.writer
[queue
].first_block
:
817 data1
.writer_second_block
[queue
];
820 printf("\tblock %x\n", cur_block
);
821 if (cur_block
>= num_blocks
) {
822 fprintf(stderr
, "block %x too large\n", cur_block
);
825 unsigned calculated_queue_size
= 0;
826 while (cur_block
!= last_block
|| cur_chunk
!= last_chunk
) {
827 calculated_queue_size
++;
828 uint32_t *chunk_ptr
= &mempool
[cur_block
* 0x20 + cur_chunk
* 4];
830 dump_mem_pool_chunk(chunk_ptr
);
832 printf("\t%05x: %08x %08x %08x %08x\n",
833 4 * (cur_block
* 0x20 + cur_chunk
+ 4),
834 chunk_ptr
[0], chunk_ptr
[1], chunk_ptr
[2], chunk_ptr
[3]);
837 if (cur_chunk
== 8) {
838 cur_block
= next_pointers
[cur_block
];
840 printf("\tblock %x\n", cur_block
);
841 if (cur_block
>= num_blocks
) {
842 fprintf(stderr
, "block %x too large\n", cur_block
);
848 if (calculated_queue_size
!= queue_sizes
[queue
]) {
849 printf("\t\tCALCULATED SIZE %d DOES NOT MATCH!\n", calculated_queue_size
);
856 decode_indexed_registers(void)
859 uint32_t sizedwords
= 0;
861 foreach_line_in_section (line
) {
862 if (startswith(line
, " - regs-name:")) {
864 parseline(line
, " - regs-name: %ms", &name
);
865 } else if (startswith(line
, " dwords:")) {
866 parseline(line
, " dwords: %u", &sizedwords
);
867 } else if (startswith(line
, " data: !!ascii85 |")) {
868 uint32_t *buf
= popline_ascii85(sizedwords
);
870 /* some of the sections are pretty large, and are (at least
871 * so far) not useful, so skip them if not in verbose mode:
873 bool dump
= verbose
||
874 !strcmp(name
, "CP_SQE_STAT") ||
875 !strcmp(name
, "CP_DRAW_STATE") ||
876 !strcmp(name
, "CP_ROQ") ||
879 if (!strcmp(name
, "CP_SQE_STAT"))
880 dump_cp_sqe_stat(buf
);
882 if (!strcmp(name
, "CP_UCODE_DBG_DATA"))
883 dump_cp_ucode_dbg(buf
);
885 if (!strcmp(name
, "CP_MEMPOOL"))
886 dump_cp_mem_pool(buf
);
889 dump_hex_ascii(buf
, 4 * sizedwords
, 1);
901 * Decode shader-blocks:
905 decode_shader_blocks(void)
908 uint32_t sizedwords
= 0;
910 foreach_line_in_section (line
) {
911 if (startswith(line
, " - type:")) {
913 parseline(line
, " - type: %ms", &type
);
914 } else if (startswith(line
, " size:")) {
915 parseline(line
, " size: %u", &sizedwords
);
916 } else if (startswith(line
, " data: !!ascii85 |")) {
917 uint32_t *buf
= popline_ascii85(sizedwords
);
919 /* some of the sections are pretty large, and are (at least
920 * so far) not useful, so skip them if not in verbose mode:
922 bool dump
= verbose
||
923 !strcmp(type
, "A6XX_SP_INST_DATA") ||
924 !strcmp(type
, "A6XX_HLSQ_INST_RAM") ||
927 if (!strcmp(type
, "A6XX_SP_INST_DATA") ||
928 !strcmp(type
, "A6XX_HLSQ_INST_RAM")) {
929 /* TODO this section actually contains multiple shaders
930 * (or parts of shaders?), so perhaps we should search
931 * for ends of shaders and decode each?
933 TRY(disasm_a3xx(buf
, sizedwords
, 1, stdout
, options
.gpu_id
));
937 dump_hex_ascii(buf
, 4 * sizedwords
, 1);
951 * Decode debugbus section:
955 decode_debugbus(void)
958 uint32_t sizedwords
= 0;
960 foreach_line_in_section (line
) {
961 if (startswith(line
, " - debugbus-block:")) {
963 parseline(line
, " - debugbus-block: %ms", &block
);
964 } else if (startswith(line
, " count:")) {
965 parseline(line
, " count: %u", &sizedwords
);
966 } else if (startswith(line
, " data: !!ascii85 |")) {
967 uint32_t *buf
= popline_ascii85(sizedwords
);
969 /* some of the sections are pretty large, and are (at least
970 * so far) not useful, so skip them if not in verbose mode:
972 bool dump
= verbose
||
976 dump_hex_ascii(buf
, 4 * sizedwords
, 1);
988 * Main crashdump decode loop:
996 while ((line
= popline())) {
998 if (startswith(line
, "revision:")) {
999 parseline(line
, "revision: %u", &options
.gpu_id
);
1000 printf("Got gpu_id=%u\n", options
.gpu_id
);
1002 cffdec_init(&options
);
1005 rnn_gmu
= rnn_new(!options
.color
);
1006 rnn_load_file(rnn_gmu
, "adreno/a6xx_gmu.xml", "A6XX");
1007 rnn_control
= rnn_new(!options
.color
);
1008 rnn_load_file(rnn_control
, "adreno/adreno_control_regs.xml", "A6XX_CONTROL_REG");
1009 rnn_pipe
= rnn_new(!options
.color
);
1010 rnn_load_file(rnn_pipe
, "adreno/adreno_pipe_regs.xml", "A6XX_PIPE_REG");
1011 } else if (is_a5xx()) {
1012 rnn_control
= rnn_new(!options
.color
);
1013 rnn_load_file(rnn_control
, "adreno/adreno_control_regs.xml", "A5XX_CONTROL_REG");
1017 } else if (startswith(line
, "bos:")) {
1019 } else if (startswith(line
, "ringbuffer:")) {
1020 decode_ringbuffer();
1021 } else if (startswith(line
, "registers:")) {
1024 /* after we've recorded buffer contents, and CP register values,
1025 * we can take a stab at decoding the cmdstream:
1028 } else if (startswith(line
, "registers-gmu:")) {
1029 decode_gmu_registers();
1030 } else if (startswith(line
, "indexed-registers:")) {
1031 decode_indexed_registers();
1032 } else if (startswith(line
, "shader-blocks:")) {
1033 decode_shader_blocks();
1034 } else if (startswith(line
, "clusters:")) {
1036 } else if (startswith(line
, "debugbus:")) {
1043 * Usage and argument parsing:
1049 fprintf(stderr
, "Usage:\n\n"
1050 "\tcrashdec [-achmsv] [-f FILE]\n\n"
1052 "\t-a, --allregs - show all registers (including ones not written since\n"
1053 "\t previous draw) at each draw\n"
1054 "\t-c, --color - use colors\n"
1055 "\t-f, --file=FILE - read input from specified file (rather than stdin)\n"
1056 "\t-h, --help - this usage message\n"
1057 "\t-m, --markers - try to decode CP_NOP string markers\n"
1058 "\t-s, --summary - don't show individual register writes, but just show\n"
1059 "\t register values on draws\n"
1060 "\t-v, --verbose - dump more verbose output, including contents of\n"
1061 "\t less interesting buffers\n"
1067 static const struct option opts
[] = {
1068 { .name
= "allregs", .has_arg
= 0, NULL
, 'a' },
1069 { .name
= "color", .has_arg
= 0, NULL
, 'c' },
1070 { .name
= "file", .has_arg
= 1, NULL
, 'f' },
1071 { .name
= "help", .has_arg
= 0, NULL
, 'h' },
1072 { .name
= "markers", .has_arg
= 0, NULL
, 'm' },
1073 { .name
= "summary", .has_arg
= 0, NULL
, 's' },
1074 { .name
= "verbose", .has_arg
= 0, NULL
, 'v' },
1078 static bool interactive
;
1091 main(int argc
, char **argv
)
1095 interactive
= isatty(STDOUT_FILENO
);
1096 options
.color
= interactive
;
1098 /* default to read from stdin: */
1101 while ((c
= getopt_long(argc
, argv
, "acf:hmsv", opts
, NULL
)) != -1) {
1104 options
.allregs
= true;
1107 options
.color
= true;
1110 in
= fopen(optarg
, "r");
1113 options
.decode_markers
= true;
1116 options
.summary
= true;
1127 disasm_a3xx_set_debug(PRINT_RAW
);