2 * Copyright © 2020 Google, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 * Decoder for devcoredump traces from drm/msm. In case of a gpu crash/hang,
26 * the coredump should be found in:
28 * /sys/class/devcoredump/devcd<n>/data
30 * The crashdump will hang around for 5min, it can be cleared by writing to
33 * echo 1 > /sys/class/devcoredump/devcd<n>/data
35 * (the driver won't log any new crashdumps until the previous one is cleared
36 * or times out after 5min)
57 #include "ir3/instr-a3xx.h"
63 static struct rnn
*rnn_gmu
;
64 static struct rnn
*rnn_control
;
65 static struct rnn
*rnn_pipe
;
67 static struct cffdec_options options
= {
71 static inline bool is_a6xx(void) { return (600 <= options
.gpu_id
) && (options
.gpu_id
< 700); }
72 static inline bool is_a5xx(void) { return (500 <= options
.gpu_id
) && (options
.gpu_id
< 600); }
73 static inline bool is_64b(void) { return options
.gpu_id
>= 500; }
76 * Helpers to read register values:
79 /* read registers that are 64b on 64b GPUs (ie. a5xx+) */
81 regval64(const char *name
)
83 unsigned reg
= regbase(name
);
85 uint64_t val
= reg_val(reg
);
87 val
|= ((uint64_t)reg_val(reg
+ 1)) << 32;
92 regval(const char *name
)
94 unsigned reg
= regbase(name
);
100 * Line reading and string helpers:
104 replacestr(char *line
, const char *find
, const char *replace
)
108 if (!(s
= strstr(line
, find
)))
111 tail
= s
+ strlen(find
);
114 asprintf(&newline
, "%.*s%s%s", (int)(s
- line
), line
, replace
, tail
);
120 static char *lastline
;
121 static char *pushedline
;
126 char *r
= pushedline
;
136 if (getline(&r
, &n
, in
) < 0)
139 /* Handle section name typo's from earlier kernels: */
140 r
= replacestr(r
, "CP_MEMPOOOL", "CP_MEMPOOL");
141 r
= replacestr(r
, "CP_SEQ_STAT", "CP_SQE_STAT");
151 pushedline
= lastline
;
155 popline_ascii85(uint32_t sizedwords
)
157 const char *line
= popline();
159 /* At this point we exepct the ascii85 data to be indented *some*
160 * amount, and to terminate at the end of the line. So just eat
161 * up the leading whitespace.
163 assert(*line
== ' ');
167 uint32_t *buf
= calloc(1, 4 * sizedwords
);
170 while (*line
!= '\n') {
178 for (int i
= 0; (i
< 5) && (*line
!= '\n'); i
++) {
180 accum
+= *line
- '!';
191 startswith(const char *line
, const char *start
)
193 return strstr(line
, start
) == line
;
197 parseline(const char *line
, const char *fmt
, ...)
199 int fmtlen
= strlen(fmt
);
203 /* scan fmt string to extract expected # of conversions: */
204 for (int i
= 0; i
< fmtlen
; i
++) {
206 if (i
== (l
- 1)) { /* prev char was %, ie. we have %% */
218 if (vsscanf(line
, fmt
, ap
) != n
) {
219 fprintf(stderr
, "parse error scanning: '%s'\n", fmt
);
225 #define foreach_line_in_section(_line) \
226 for (const char *_line = popline(); _line; _line = popline()) \
227 /* check for start of next section */ \
228 if (_line[0] != ' ') { \
234 * Decode ringbuffer section:
246 decode_ringbuffer(void)
250 foreach_line_in_section (line
) {
251 if (startswith(line
, " - id:")) {
252 parseline(line
, " - id: %d", &id
);
253 assert(id
< ARRAY_SIZE(ringbuffers
));
254 } else if (startswith(line
, " iova:")) {
255 parseline(line
, " iova: %"PRIx64
, &ringbuffers
[id
].iova
);
256 } else if (startswith(line
, " rptr:")) {
257 parseline(line
, " rptr: %d", &ringbuffers
[id
].rptr
);
258 } else if (startswith(line
, " wptr:")) {
259 parseline(line
, " wptr: %d", &ringbuffers
[id
].wptr
);
260 } else if (startswith(line
, " size:")) {
261 parseline(line
, " size: %d", &ringbuffers
[id
].size
);
262 } else if (startswith(line
, " data: !!ascii85 |")) {
263 ringbuffers
[id
].buf
= popline_ascii85(ringbuffers
[id
].size
/ 4);
264 add_buffer(ringbuffers
[id
].iova
, ringbuffers
[id
].size
, ringbuffers
[id
].buf
);
273 valid_header(uint32_t pkt
)
275 if (options
.gpu_id
>= 500) {
276 return pkt_is_type4(pkt
) || pkt_is_type7(pkt
);
278 /* TODO maybe we can check validish looking pkt3 opc or pkt0
279 * register offset.. the cmds sent by kernel are usually
280 * fairly limited (other than initialization) which confines
281 * the search space a bit..
290 uint64_t rb_base
= regval64("CP_RB_BASE");
292 printf("got rb_base=%"PRIx64
"\n", rb_base
);
294 options
.ibs
[1].base
= regval64("CP_IB1_BASE");
295 options
.ibs
[1].rem
= regval("CP_IB1_REM_SIZE");
296 options
.ibs
[2].base
= regval64("CP_IB2_BASE");
297 options
.ibs
[2].rem
= regval("CP_IB2_REM_SIZE");
299 /* Adjust remaining size to account for cmdstream slurped into ROQ
300 * but not yet consumed by SQE
302 * TODO add support for earlier GPUs once we tease out the needed
303 * registers.. see crashit.c in msmtest for hints.
305 * TODO it would be nice to be able to extract out register bitfields
306 * by name rather than hard-coding this.
309 options
.ibs
[1].rem
+= regval("CP_CSQ_IB1_STAT") >> 16;
310 options
.ibs
[2].rem
+= regval("CP_CSQ_IB2_STAT") >> 16;
313 printf("IB1: %"PRIx64
", %u\n", options
.ibs
[1].base
, options
.ibs
[1].rem
);
314 printf("IB2: %"PRIx64
", %u\n", options
.ibs
[2].base
, options
.ibs
[2].rem
);
316 /* now that we've got the regvals we want, reset register state
317 * so we aren't seeing values from decode_registers();
321 for (int id
= 0; id
< ARRAY_SIZE(ringbuffers
); id
++) {
322 if (ringbuffers
[id
].iova
!= rb_base
)
324 if (!ringbuffers
[id
].size
)
327 printf("found ring!\n");
329 /* The kernel level ringbuffer (RB) wraps around, which
330 * cffdec doesn't really deal with.. so figure out how
331 * many dwords are unread
333 unsigned ringszdw
= ringbuffers
[id
].size
>> 2; /* in dwords */
335 /* helper macro to deal with modulo size math: */
336 #define mod_add(b, v) ((ringszdw + (int)(b) + (int)(v)) % ringszdw)
338 /* The rptr will (most likely) have moved past the IB to
339 * userspace cmdstream, so back up a bit, and then advance
340 * until we find a valid start of a packet.. this is going
341 * to be less reliable on a4xx and before (pkt0/pkt3),
342 * compared to pkt4/pkt7 with parity bits
344 const int lookback
= 12;
345 unsigned rptr
= mod_add(ringbuffers
[id
].rptr
, -lookback
);
347 for (int idx
= 0; idx
< lookback
; idx
++) {
348 if (valid_header(ringbuffers
[id
].buf
[rptr
]))
350 rptr
= mod_add(rptr
, 1);
353 unsigned cmdszdw
= mod_add(ringbuffers
[id
].wptr
, -rptr
);
355 printf("got cmdszdw=%d\n", cmdszdw
);
356 uint32_t *buf
= malloc(cmdszdw
* 4);
358 for (int idx
= 0; idx
< cmdszdw
; idx
++) {
359 int p
= mod_add(rptr
, idx
);
360 buf
[idx
] = ringbuffers
[id
].buf
[p
];
363 dump_commands(buf
, cmdszdw
, 0);
369 * Decode 'bos' (buffers) section:
378 foreach_line_in_section (line
) {
379 if (startswith(line
, " - iova:")) {
380 parseline(line
, " - iova: %"PRIx64
, &iova
);
381 } else if (startswith(line
, " size:")) {
382 parseline(line
, " size: %u", &size
);
383 } else if (startswith(line
, " data: !!ascii85 |")) {
384 uint32_t *buf
= popline_ascii85(size
/ 4);
387 dump_hex_ascii(buf
, size
, 1);
389 add_buffer(iova
, size
, buf
);
399 * Decode registers section:
403 dump_register(struct rnn
*rnn
, uint32_t offset
, uint32_t value
)
405 struct rnndecaddrinfo
*info
= rnn_reginfo(rnn
, offset
);
406 if (info
&& info
->typeinfo
) {
407 char *decoded
= rnndec_decodeval(rnn
->vc
, info
->typeinfo
, value
);
408 printf("%s: %s\n", info
->name
, decoded
);
410 printf("%s: %08x\n", info
->name
, value
);
412 printf("<%04x>: %08x\n", offset
, value
);
417 decode_gmu_registers(void)
419 foreach_line_in_section (line
) {
420 uint32_t offset
, value
;
421 parseline(line
, " - { offset: %x, value: %x }", &offset
, &value
);
423 printf("\t%08x\t", value
);
424 dump_register(rnn_gmu
, offset
/4, value
);
429 decode_registers(void)
431 foreach_line_in_section (line
) {
432 uint32_t offset
, value
;
433 parseline(line
, " - { offset: %x, value: %x }", &offset
, &value
);
435 reg_set(offset
/4, value
);
436 printf("\t%08x", value
);
437 dump_register_val(offset
/4, value
, 0);
441 /* similar to registers section, but for banked context regs: */
443 decode_clusters(void)
445 foreach_line_in_section (line
) {
446 if (startswith(line
, " - cluster-name:") ||
447 startswith(line
, " - context:")) {
452 uint32_t offset
, value
;
453 parseline(line
, " - { offset: %x, value: %x }", &offset
, &value
);
455 printf("\t%08x", value
);
456 dump_register_val(offset
/4, value
, 0);
461 * Decode indexed-registers.. these aren't like normal registers, but a
462 * sort of FIFO where successive reads pop out associated debug state.
466 dump_cp_sqe_stat(uint32_t *stat
)
468 printf("\t PC: %04x\n", stat
[0]);
471 if (is_a6xx() && valid_header(stat
[0])) {
472 if (pkt_is_type7(stat
[0])) {
473 unsigned opc
= cp_type7_opcode(stat
[0]);
474 const char *name
= pktname(opc
);
476 printf("\tPKT: %s\n", name
);
478 /* Not sure if this case can happen: */
482 for (int i
= 0; i
< 16; i
++) {
483 printf("\t$%02x: %08x\t\t$%02x: %08x\n",
484 i
+ 1, stat
[i
], i
+ 16 + 1, stat
[i
+ 16]);
489 dump_control_regs(uint32_t *regs
)
494 /* Control regs 0x100-0x17f are a scratch space to be used by the
495 * firmware however it wants, unlike lower regs which involve some
496 * fixed-function units. Therefore only these registers get dumped
499 for (uint32_t i
= 0; i
< 0x80; i
++) {
500 printf("\t%08x\t", regs
[i
]);
501 dump_register(rnn_control
, i
+ 0x100, regs
[i
]);
506 dump_cp_ucode_dbg(uint32_t *dbg
)
508 /* Notes on the data:
509 * There seems to be a section every 4096 DWORD's. The sections aren't
510 * all the same size, so the rest of the 4096 DWORD's are filled with
511 * mirrors of the actual data.
514 for (int section
= 0; section
< 6; section
++, dbg
+= 0x1000) {
517 /* Contains scattered data from a630_sqe.fw: */
518 printf("\tSQE instruction cache:\n");
519 dump_hex_ascii(dbg
, 4 * 0x400, 1);
522 printf("\tUnknown 1:\n");
523 dump_hex_ascii(dbg
, 4 * 0x80, 1);
526 printf("\tUnknown 2:\n");
527 dump_hex_ascii(dbg
, 4 * 0x200, 1);
530 printf("\tUnknown 3:\n");
531 dump_hex_ascii(dbg
, 4 * 0x80, 1);
534 /* Don't bother printing this normally */
536 printf("\tSQE packet jumptable contents:\n");
537 dump_hex_ascii(dbg
, 4 * 0x80, 1);
541 printf("\tSQE scratch control regs:\n");
542 dump_control_regs(dbg
);
549 dump_mem_pool_reg_write(unsigned reg
, uint32_t data
, unsigned context
, bool pipe
)
552 struct rnndecaddrinfo
*info
= rnn_reginfo(rnn_pipe
, reg
);
553 printf("\t\twrite %s (%02x) pipe\n", info
->name
, reg
);
555 if (!strcmp(info
->typeinfo
->name
, "void")) {
556 /* registers that ignore their payload */
559 dump_register(rnn_pipe
, reg
, data
);
562 printf("\t\twrite %s (%05x) context %d\n", regname(reg
, 1), reg
, context
);
563 dump_register_val(reg
, data
, 2);
568 dump_mem_pool_chunk(const uint32_t *chunk
)
570 struct __attribute__((packed
)) {
571 bool reg0_enabled
: 1;
572 bool reg1_enabled
: 1;
579 uint32_t reg0_context
: 1;
580 uint32_t reg1_context
: 1;
581 uint32_t padding
: 22;
584 memcpy(&fields
, chunk
, 4 * sizeof(uint32_t));
586 if (fields
.reg0_enabled
) {
587 dump_mem_pool_reg_write(fields
.reg0
, fields
.data0
, fields
.reg0_context
, fields
.reg0_pipe
);
590 if (fields
.reg1_enabled
) {
591 dump_mem_pool_reg_write(fields
.reg1
, fields
.data1
, fields
.reg1_context
, fields
.reg1_pipe
);
596 dump_cp_mem_pool(uint32_t *mempool
)
598 /* The mem pool is a shared pool of memory used for storing in-flight
599 * register writes. There are 6 different queues, one for each
600 * cluster. Writing to $data (or for some special registers, $addr)
601 * pushes data onto the appropriate queue, and each queue is pulled
602 * from by the appropriate cluster. The queues are thus written to
603 * in-order, but may be read out-of-order.
605 * The queues are conceptually divided into 128-bit "chunks", and the
606 * read and write pointers are in units of chunks. These chunks are
607 * organized internally into 8-chunk "blocks", and memory is allocated
608 * dynamically in terms of blocks. Each queue is represented as a
609 * singly-linked list of blocks, as well as 3-bit start/end chunk
610 * pointers that point within the first/last block. The next pointers
611 * are located in a separate array, rather than inline.
614 /* TODO: The firmware CP_MEM_POOL save/restore routines do something
617 * cread $02, [ $00 + 0 ]
618 * and $02, $02, 0x118
620 * brne $02, 0, #label
626 * I think that control register 0 is the GPU version, and some
627 * versions have a smaller mem pool. It seems some models have a mem
628 * pool that's half the size, and a bunch of offsets are shifted
629 * accordingly. Unfortunately the kernel driver's dumping code doesn't
630 * seem to take this into account, even the downstream android driver,
631 * and we don't know which versions 0x8, 0x10, or 0x100 correspond
632 * to. Or maybe we can use CP_DBG_MEM_POOL_SIZE to figure this out?
634 bool small_mem_pool
= false;
636 /* The array of next pointers for each block. */
637 const uint32_t *next_pointers
= small_mem_pool
? &mempool
[0x800] : &mempool
[0x1000];
639 /* Maximum number of blocks in the pool, also the size of the pointers
642 const int num_blocks
= small_mem_pool
? 0x30 : 0x80;
644 /* Number of queues */
645 const unsigned num_queues
= 6;
647 /* Unfortunately the per-queue state is a little more complicated than
648 * a simple pair of begin/end pointers. Instead of a single beginning
649 * block, there are *two*, with the property that either the two are
650 * equal or the second is the "next" of the first. Similarly there are
651 * two end blocks. Thus the queue either looks like this:
653 * A -> B -> ... -> C -> D
655 * Or like this, or some combination:
659 * However, there's only one beginning/end chunk offset. Now the
660 * question is, which of A or B is the actual start? I.e. is the chunk
661 * offset an offset inside A or B? It depends. I'll show a typical read
662 * cycle, starting here (read pointer marked with a *) with a chunk
666 * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
667 * |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_| -> |_|_|_|_|_|_|_|_|
669 * Once the pointer advances far enough, the hardware decides to free
670 * A, after which the read-side state looks like:
673 * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
674 * |_|_|_|_|_|_|_|_| |_|_|_|*|_|_|_|_| -> |_|_|_|_|_|_|_|_|
676 * Then after advancing the pointer a bit more, the hardware fetches
677 * the "next" pointer for A and stores it in B:
680 * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
681 * |_|_|_|_|_|_|_|_| |_|_|_|_|_|_|_|*| -> |_|_|_|_|_|_|_|_|
683 * Then the read pointer advances into B, at which point we've come
684 * back to the first state having advanced a whole block:
687 * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
688 * |_|_|_|_|_|_|_|_| |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_|
691 * There is a similar cycle for the write pointer. Now, the question
692 * is, how do we know which state we're in? We need to know this to
693 * know whether the pointer (*) is in A or B if they're different. It
694 * seems like there should be some bit somewhere describing this, but
695 * after lots of experimentation I've come up empty-handed. For now we
696 * assume that if the pointer is in the first half, then we're in
697 * either the first or second state and use B, and otherwise we're in
698 * the second or third state and use A. So far I haven't seen anything
699 * that violates this assumption.
704 uint32_t padding0
[7]; /* Mirrors of unk0 */
708 uint32_t first_block
: 32 - 3;
710 uint32_t padding1
[2]; /* Mirrors of writer[4], writer[5] */
713 uint32_t padding2
[7]; /* Mirrors of unk1 */
715 uint32_t writer_second_block
[6];
716 uint32_t padding3
[2];
719 uint32_t padding4
[2];
723 uint32_t first_block
: 32 - 3;
725 uint32_t padding5
[2]; /* Mirrors of reader[4], reader[5] */
728 uint32_t padding6
[7]; /* Mirrors of unk3 */
730 uint32_t reader_second_block
[6];
731 uint32_t padding7
[2];
733 uint32_t block_count
[6];
737 uint32_t padding9
[7]; /* Mirrors of unk4 */
740 const uint32_t *data1_ptr
= small_mem_pool
? &mempool
[0xc00] : &mempool
[0x1800];
741 memcpy(&data1
, data1_ptr
, sizeof(data1
));
743 /* Based on the kernel, the first dword is the mem pool size (in
744 * blocks?) and mirrors CP_MEM_POOL_DBG_SIZE.
746 const uint32_t *data2_ptr
= small_mem_pool
? &mempool
[0x1000] : &mempool
[0x2000];
747 const int data2_size
= 0x60;
749 /* This seems to be the size of each queue in chunks. */
750 const uint32_t *queue_sizes
= &data2_ptr
[0x18];
752 printf("\tdata2:\n");
753 dump_hex_ascii(data2_ptr
, 4 * data2_size
, 1);
755 /* These seem to be some kind of counter of allocated/deallocated blocks */
757 printf("\tunk0: %x\n", data1
.unk0
);
758 printf("\tunk1: %x\n", data1
.unk1
);
759 printf("\tunk3: %x\n", data1
.unk3
);
760 printf("\tunk4: %x\n\n", data1
.unk4
);
763 for (int queue
= 0; queue
< num_queues
; queue
++) {
764 const char *cluster_names
[6] = {
765 "FE", "SP_VS", "PC_VS", "GRAS", "SP_PS", "PS"
767 printf("\tCLUSTER_%s:\n\n", cluster_names
[queue
]);
770 printf("\t\twriter_first_block: 0x%x\n", data1
.writer
[queue
].first_block
);
771 printf("\t\twriter_second_block: 0x%x\n", data1
.writer_second_block
[queue
]);
772 printf("\t\twriter_chunk: %d\n", data1
.writer
[queue
].chunk
);
773 printf("\t\treader_first_block: 0x%x\n", data1
.reader
[queue
].first_block
);
774 printf("\t\treader_second_block: 0x%x\n", data1
.reader_second_block
[queue
]);
775 printf("\t\treader_chunk: %d\n", data1
.reader
[queue
].chunk
);
776 printf("\t\tblock_count: %d\n", data1
.block_count
[queue
]);
777 printf("\t\tunk2: 0x%x\n", data1
.unk2
[queue
]);
778 printf("\t\tqueue_size: %d\n\n", queue_sizes
[queue
]);
781 uint32_t cur_chunk
= data1
.reader
[queue
].chunk
;
782 uint32_t cur_block
= cur_chunk
> 3 ?
783 data1
.reader
[queue
].first_block
:
784 data1
.reader_second_block
[queue
];
785 uint32_t last_chunk
= data1
.writer
[queue
].chunk
;
786 uint32_t last_block
= last_chunk
> 3 ?
787 data1
.writer
[queue
].first_block
:
788 data1
.writer_second_block
[queue
];
791 printf("\tblock %x\n", cur_block
);
792 if (cur_block
>= num_blocks
) {
793 fprintf(stderr
, "block %x too large\n", cur_block
);
796 unsigned calculated_queue_size
= 0;
797 while (cur_block
!= last_block
|| cur_chunk
!= last_chunk
) {
798 calculated_queue_size
++;
799 uint32_t *chunk_ptr
= &mempool
[cur_block
* 0x20 + cur_chunk
* 4];
801 dump_mem_pool_chunk(chunk_ptr
);
803 printf("\t%05x: %08x %08x %08x %08x\n",
804 4 * (cur_block
* 0x20 + cur_chunk
+ 4),
805 chunk_ptr
[0], chunk_ptr
[1], chunk_ptr
[2], chunk_ptr
[3]);
808 if (cur_chunk
== 8) {
809 cur_block
= next_pointers
[cur_block
];
811 printf("\tblock %x\n", cur_block
);
812 if (cur_block
>= num_blocks
) {
813 fprintf(stderr
, "block %x too large\n", cur_block
);
819 if (calculated_queue_size
!= queue_sizes
[queue
]) {
820 printf("\t\tCALCULATED SIZE %d DOES NOT MATCH!\n", calculated_queue_size
);
827 decode_indexed_registers(void)
830 uint32_t sizedwords
= 0;
832 foreach_line_in_section (line
) {
833 if (startswith(line
, " - regs-name:")) {
835 parseline(line
, " - regs-name: %ms", &name
);
836 } else if (startswith(line
, " dwords:")) {
837 parseline(line
, " dwords: %u", &sizedwords
);
838 } else if (startswith(line
, " data: !!ascii85 |")) {
839 uint32_t *buf
= popline_ascii85(sizedwords
);
841 /* some of the sections are pretty large, and are (at least
842 * so far) not useful, so skip them if not in verbose mode:
844 bool dump
= verbose
||
845 !strcmp(name
, "CP_SQE_STAT") ||
846 !strcmp(name
, "CP_DRAW_STATE") ||
847 !strcmp(name
, "CP_ROQ") ||
850 if (!strcmp(name
, "CP_SQE_STAT"))
851 dump_cp_sqe_stat(buf
);
853 if (!strcmp(name
, "CP_UCODE_DBG_DATA"))
854 dump_cp_ucode_dbg(buf
);
856 if (!strcmp(name
, "CP_MEMPOOL"))
857 dump_cp_mem_pool(buf
);
860 dump_hex_ascii(buf
, 4 * sizedwords
, 1);
872 * Decode shader-blocks:
876 decode_shader_blocks(void)
879 uint32_t sizedwords
= 0;
881 foreach_line_in_section (line
) {
882 if (startswith(line
, " - type:")) {
884 parseline(line
, " - type: %ms", &type
);
885 } else if (startswith(line
, " size:")) {
886 parseline(line
, " size: %u", &sizedwords
);
887 } else if (startswith(line
, " data: !!ascii85 |")) {
888 uint32_t *buf
= popline_ascii85(sizedwords
);
890 /* some of the sections are pretty large, and are (at least
891 * so far) not useful, so skip them if not in verbose mode:
893 bool dump
= verbose
||
894 !strcmp(type
, "A6XX_SP_INST_DATA") ||
895 !strcmp(type
, "A6XX_HLSQ_INST_RAM") ||
898 if (!strcmp(type
, "A6XX_SP_INST_DATA") ||
899 !strcmp(type
, "A6XX_HLSQ_INST_RAM")) {
900 /* TODO this section actually contains multiple shaders
901 * (or parts of shaders?), so perhaps we should search
902 * for ends of shaders and decode each?
904 try_disasm_a3xx(buf
, sizedwords
, 1, stdout
, options
.gpu_id
);
908 dump_hex_ascii(buf
, 4 * sizedwords
, 1);
922 * Decode debugbus section:
926 decode_debugbus(void)
929 uint32_t sizedwords
= 0;
931 foreach_line_in_section (line
) {
932 if (startswith(line
, " - debugbus-block:")) {
934 parseline(line
, " - debugbus-block: %ms", &block
);
935 } else if (startswith(line
, " count:")) {
936 parseline(line
, " count: %u", &sizedwords
);
937 } else if (startswith(line
, " data: !!ascii85 |")) {
938 uint32_t *buf
= popline_ascii85(sizedwords
);
940 /* some of the sections are pretty large, and are (at least
941 * so far) not useful, so skip them if not in verbose mode:
943 bool dump
= verbose
||
947 dump_hex_ascii(buf
, 4 * sizedwords
, 1);
959 * Main crashdump decode loop:
967 while ((line
= popline())) {
969 if (startswith(line
, "revision:")) {
970 parseline(line
, "revision: %u", &options
.gpu_id
);
971 printf("Got gpu_id=%u\n", options
.gpu_id
);
973 cffdec_init(&options
);
976 rnn_gmu
= rnn_new(!options
.color
);
977 rnn_load_file(rnn_gmu
, "adreno/a6xx_gmu.xml", "A6XX");
978 rnn_control
= rnn_new(!options
.color
);
979 rnn_load_file(rnn_control
, "adreno/adreno_control_regs.xml", "A6XX_CONTROL_REG");
980 rnn_pipe
= rnn_new(!options
.color
);
981 rnn_load_file(rnn_pipe
, "adreno/adreno_pipe_regs.xml", "A6XX_PIPE_REG");
982 } else if (is_a5xx()) {
983 rnn_control
= rnn_new(!options
.color
);
984 rnn_load_file(rnn_control
, "adreno/adreno_control_regs.xml", "A5XX_CONTROL_REG");
988 } else if (startswith(line
, "bos:")) {
990 } else if (startswith(line
, "ringbuffer:")) {
992 } else if (startswith(line
, "registers:")) {
995 /* after we've recorded buffer contents, and CP register values,
996 * we can take a stab at decoding the cmdstream:
999 } else if (startswith(line
, "registers-gmu:")) {
1000 decode_gmu_registers();
1001 } else if (startswith(line
, "indexed-registers:")) {
1002 decode_indexed_registers();
1003 } else if (startswith(line
, "shader-blocks:")) {
1004 decode_shader_blocks();
1005 } else if (startswith(line
, "clusters:")) {
1007 } else if (startswith(line
, "debugbus:")) {
1014 * Usage and argument parsing:
1020 fprintf(stderr
, "Usage:\n\n"
1021 "\tcrashdec [-achmsv] [-f FILE]\n\n"
1023 "\t-a, --allregs - show all registers (including ones not written since\n"
1024 "\t previous draw) at each draw\n"
1025 "\t-c, --color - use colors\n"
1026 "\t-f, --file=FILE - read input from specified file (rather than stdin)\n"
1027 "\t-h, --help - this usage message\n"
1028 "\t-m, --markers - try to decode CP_NOP string markers\n"
1029 "\t-s, --summary - don't show individual register writes, but just show\n"
1030 "\t register values on draws\n"
1031 "\t-v, --verbose - dump more verbose output, including contents of\n"
1032 "\t less interesting buffers\n"
1038 static const struct option opts
[] = {
1039 { .name
= "allregs", .has_arg
= 0, NULL
, 'a' },
1040 { .name
= "color", .has_arg
= 0, NULL
, 'c' },
1041 { .name
= "file", .has_arg
= 1, NULL
, 'f' },
1042 { .name
= "help", .has_arg
= 0, NULL
, 'h' },
1043 { .name
= "markers", .has_arg
= 0, NULL
, 'm' },
1044 { .name
= "summary", .has_arg
= 0, NULL
, 's' },
1045 { .name
= "verbose", .has_arg
= 0, NULL
, 'v' },
1049 static bool interactive
;
1062 main(int argc
, char **argv
)
1066 interactive
= isatty(STDOUT_FILENO
);
1067 options
.color
= interactive
;
1069 /* default to read from stdin: */
1072 while ((c
= getopt_long(argc
, argv
, "acf:hmsv", opts
, NULL
)) != -1) {
1075 options
.allregs
= true;
1078 options
.color
= true;
1081 in
= fopen(optarg
, "r");
1084 options
.decode_markers
= true;
1087 options
.summary
= true;
1098 disasm_a3xx_set_debug(PRINT_RAW
);