2 * Copyright © 2020 Google, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 * Decoder for devcoredump traces from drm/msm. In case of a gpu crash/hang,
26 * the coredump should be found in:
28 * /sys/class/devcoredump/devcd<n>/data
30 * The crashdump will hang around for 5min, it can be cleared by writing to
33 * echo 1 > /sys/class/devcoredump/devcd<n>/data
35 * (the driver won't log any new crashdumps until the previous one is cleared
36 * or times out after 5min)
58 #include "instr-a3xx.h"
64 static struct rnn
*rnn_gmu
;
65 static struct rnn
*rnn_control
;
66 static struct rnn
*rnn_pipe
;
68 static struct cffdec_options options
= {
72 static inline bool is_a6xx(void) { return (600 <= options
.gpu_id
) && (options
.gpu_id
< 700); }
73 static inline bool is_a5xx(void) { return (500 <= options
.gpu_id
) && (options
.gpu_id
< 600); }
74 static inline bool is_64b(void) { return options
.gpu_id
>= 500; }
77 * Helpers to read register values:
80 /* read registers that are 64b on 64b GPUs (ie. a5xx+) */
82 regval64(const char *name
)
84 unsigned reg
= regbase(name
);
86 uint64_t val
= reg_val(reg
);
88 val
|= ((uint64_t)reg_val(reg
+ 1)) << 32;
93 regval(const char *name
)
95 unsigned reg
= regbase(name
);
101 * Line reading and string helpers:
104 static char *lastline
;
105 static char *pushedline
;
110 char *r
= pushedline
;
120 if (getline(&r
, &n
, in
) < 0)
131 pushedline
= lastline
;
135 popline_ascii85(uint32_t sizedwords
)
137 const char *line
= popline();
139 /* At this point we exepct the ascii85 data to be indented *some*
140 * amount, and to terminate at the end of the line. So just eat
141 * up the leading whitespace.
143 assert(*line
== ' ');
147 uint32_t *buf
= calloc(1, 4 * sizedwords
);
150 while (*line
!= '\n') {
158 for (int i
= 0; (i
< 5) && (*line
!= '\n'); i
++) {
160 accum
+= *line
- '!';
171 startswith(const char *line
, const char *start
)
173 return strstr(line
, start
) == line
;
177 parseline(const char *line
, const char *fmt
, ...)
179 int fmtlen
= strlen(fmt
);
183 /* scan fmt string to extract expected # of conversions: */
184 for (int i
= 0; i
< fmtlen
; i
++) {
186 if (i
== (l
- 1)) { /* prev char was %, ie. we have %% */
198 if (vsscanf(line
, fmt
, ap
) != n
) {
199 fprintf(stderr
, "parse error scanning: '%s'\n", fmt
);
205 #define foreach_line_in_section(_line) \
206 for (const char *_line = popline(); _line; _line = popline()) \
207 /* check for start of next section */ \
208 if (_line[0] != ' ') { \
214 * Provide our own disasm assert() handler, so that we can recover
215 * after attempting to disassemble things that might not be valid
219 static bool jmp_env_valid
;
220 static jmp_buf jmp_env
;
223 ir3_assert_handler(const char *expr
, const char *file
, int line
,
226 printf("%s:%u: %s: Assertion `%s' failed.\n", file
, line
, func
, expr
);
232 #define TRY(x) do { \
233 assert(!jmp_env_valid); \
234 if (setjmp(jmp_env) == 0) { \
235 jmp_env_valid = true; \
238 jmp_env_valid = false; \
242 * Decode ringbuffer section:
254 decode_ringbuffer(void)
258 foreach_line_in_section (line
) {
259 if (startswith(line
, " - id:")) {
260 parseline(line
, " - id: %d", &id
);
261 assert(id
< ARRAY_SIZE(ringbuffers
));
262 } else if (startswith(line
, " iova:")) {
263 parseline(line
, " iova: %"PRIx64
, &ringbuffers
[id
].iova
);
264 } else if (startswith(line
, " rptr:")) {
265 parseline(line
, " rptr: %d", &ringbuffers
[id
].rptr
);
266 } else if (startswith(line
, " wptr:")) {
267 parseline(line
, " wptr: %d", &ringbuffers
[id
].wptr
);
268 } else if (startswith(line
, " size:")) {
269 parseline(line
, " size: %d", &ringbuffers
[id
].size
);
270 } else if (startswith(line
, " data: !!ascii85 |")) {
271 ringbuffers
[id
].buf
= popline_ascii85(ringbuffers
[id
].size
/ 4);
272 add_buffer(ringbuffers
[id
].iova
, ringbuffers
[id
].size
, ringbuffers
[id
].buf
);
281 valid_header(uint32_t pkt
)
283 if (options
.gpu_id
>= 500) {
284 return pkt_is_type4(pkt
) || pkt_is_type7(pkt
);
286 /* TODO maybe we can check validish looking pkt3 opc or pkt0
287 * register offset.. the cmds sent by kernel are usually
288 * fairly limited (other than initialization) which confines
289 * the search space a bit..
298 uint64_t rb_base
= regval64("CP_RB_BASE");
300 printf("got rb_base=%"PRIx64
"\n", rb_base
);
302 options
.ibs
[1].base
= regval64("CP_IB1_BASE");
303 options
.ibs
[1].rem
= regval("CP_IB1_REM_SIZE");
304 options
.ibs
[2].base
= regval64("CP_IB2_BASE");
305 options
.ibs
[2].rem
= regval("CP_IB2_REM_SIZE");
307 /* Adjust remaining size to account for cmdstream slurped into ROQ
308 * but not yet consumed by SQE
310 * TODO add support for earlier GPUs once we tease out the needed
311 * registers.. see crashit.c in msmtest for hints.
313 * TODO it would be nice to be able to extract out register bitfields
314 * by name rather than hard-coding this.
317 options
.ibs
[1].rem
+= regval("CP_CSQ_IB1_STAT") >> 16;
318 options
.ibs
[2].rem
+= regval("CP_CSQ_IB2_STAT") >> 16;
321 printf("IB1: %"PRIx64
", %u\n", options
.ibs
[1].base
, options
.ibs
[1].rem
);
322 printf("IB2: %"PRIx64
", %u\n", options
.ibs
[2].base
, options
.ibs
[2].rem
);
324 /* now that we've got the regvals we want, reset register state
325 * so we aren't seeing values from decode_registers();
329 for (int id
= 0; id
< ARRAY_SIZE(ringbuffers
); id
++) {
330 if (ringbuffers
[id
].iova
!= rb_base
)
332 if (!ringbuffers
[id
].size
)
335 printf("found ring!\n");
337 /* The kernel level ringbuffer (RB) wraps around, which
338 * cffdec doesn't really deal with.. so figure out how
339 * many dwords are unread
341 unsigned ringszdw
= ringbuffers
[id
].size
>> 2; /* in dwords */
343 /* helper macro to deal with modulo size math: */
344 #define mod_add(b, v) ((ringszdw + (int)(b) + (int)(v)) % ringszdw)
346 /* The rptr will (most likely) have moved past the IB to
347 * userspace cmdstream, so back up a bit, and then advance
348 * until we find a valid start of a packet.. this is going
349 * to be less reliable on a4xx and before (pkt0/pkt3),
350 * compared to pkt4/pkt7 with parity bits
352 const int lookback
= 12;
353 unsigned rptr
= mod_add(ringbuffers
[id
].rptr
, -lookback
);
355 for (int idx
= 0; idx
< lookback
; idx
++) {
356 if (valid_header(ringbuffers
[id
].buf
[rptr
]))
358 rptr
= mod_add(rptr
, 1);
361 unsigned cmdszdw
= mod_add(ringbuffers
[id
].wptr
, -rptr
);
363 printf("got cmdszdw=%d\n", cmdszdw
);
364 uint32_t *buf
= malloc(cmdszdw
* 4);
366 for (int idx
= 0; idx
< cmdszdw
; idx
++) {
367 int p
= mod_add(rptr
, idx
);
368 buf
[idx
] = ringbuffers
[id
].buf
[p
];
371 dump_commands(buf
, cmdszdw
, 0);
377 * Decode 'bos' (buffers) section:
386 foreach_line_in_section (line
) {
387 if (startswith(line
, " - iova:")) {
388 parseline(line
, " - iova: %"PRIx64
, &iova
);
389 } else if (startswith(line
, " size:")) {
390 parseline(line
, " size: %u", &size
);
391 } else if (startswith(line
, " data: !!ascii85 |")) {
392 uint32_t *buf
= popline_ascii85(size
/ 4);
395 dump_hex_ascii(buf
, size
, 1);
397 add_buffer(iova
, size
, buf
);
407 * Decode registers section:
411 dump_register(struct rnn
*rnn
, uint32_t offset
, uint32_t value
)
413 struct rnndecaddrinfo
*info
= rnn_reginfo(rnn
, offset
);
414 if (info
&& info
->typeinfo
) {
415 char *decoded
= rnndec_decodeval(rnn
->vc
, info
->typeinfo
, value
);
416 printf("%s: %s\n", info
->name
, decoded
);
418 printf("%s: %08x\n", info
->name
, value
);
420 printf("<%04x>: %08x\n", offset
, value
);
425 decode_gmu_registers(void)
427 foreach_line_in_section (line
) {
428 uint32_t offset
, value
;
429 parseline(line
, " - { offset: %x, value: %x }", &offset
, &value
);
431 printf("\t%08x\t", value
);
432 dump_register(rnn_gmu
, offset
/4, value
);
437 decode_registers(void)
439 foreach_line_in_section (line
) {
440 uint32_t offset
, value
;
441 parseline(line
, " - { offset: %x, value: %x }", &offset
, &value
);
443 reg_set(offset
/4, value
);
444 printf("\t%08x", value
);
445 dump_register_val(offset
/4, value
, 0);
449 /* similar to registers section, but for banked context regs: */
451 decode_clusters(void)
453 foreach_line_in_section (line
) {
454 if (startswith(line
, " - cluster-name:") ||
455 startswith(line
, " - context:")) {
460 uint32_t offset
, value
;
461 parseline(line
, " - { offset: %x, value: %x }", &offset
, &value
);
463 printf("\t%08x", value
);
464 dump_register_val(offset
/4, value
, 0);
469 * Decode indexed-registers.. these aren't like normal registers, but a
470 * sort of FIFO where successive reads pop out associated debug state.
474 dump_cp_seq_stat(uint32_t *stat
)
476 printf("\t PC: %04x\n", stat
[0]);
479 if (is_a6xx() && valid_header(stat
[0])) {
480 if (pkt_is_type7(stat
[0])) {
481 unsigned opc
= cp_type7_opcode(stat
[0]);
482 const char *name
= pktname(opc
);
484 printf("\tPKT: %s\n", name
);
486 /* Not sure if this case can happen: */
490 for (int i
= 0; i
< 16; i
++) {
491 printf("\t$%02x: %08x\t\t$%02x: %08x\n",
492 i
+ 1, stat
[i
], i
+ 16 + 1, stat
[i
+ 16]);
497 dump_control_regs(uint32_t *regs
)
502 /* Control regs 0x100-0x17f are a scratch space to be used by the
503 * firmware however it wants, unlike lower regs which involve some
504 * fixed-function units. Therefore only these registers get dumped
507 for (uint32_t i
= 0; i
< 0x80; i
++) {
508 printf("\t%08x\t", regs
[i
]);
509 dump_register(rnn_control
, i
+ 0x100, regs
[i
]);
514 dump_cp_ucode_dbg(uint32_t *dbg
)
516 /* Notes on the data:
517 * There seems to be a section every 4096 DWORD's. The sections aren't
518 * all the same size, so the rest of the 4096 DWORD's are filled with
519 * mirrors of the actual data.
522 for (int section
= 0; section
< 6; section
++, dbg
+= 0x1000) {
525 /* Contains scattered data from a630_sqe.fw: */
526 printf("\tSQE instruction cache:\n");
527 dump_hex_ascii(dbg
, 4 * 0x400, 1);
530 printf("\tUnknown 1:\n");
531 dump_hex_ascii(dbg
, 4 * 0x80, 1);
534 printf("\tUnknown 2:\n");
535 dump_hex_ascii(dbg
, 4 * 0x200, 1);
538 printf("\tUnknown 3:\n");
539 dump_hex_ascii(dbg
, 4 * 0x80, 1);
542 /* Don't bother printing this normally */
544 printf("\tSQE packet jumptable contents:\n");
545 dump_hex_ascii(dbg
, 4 * 0x80, 1);
549 printf("\tSQE scratch control regs:\n");
550 dump_control_regs(dbg
);
557 dump_mem_pool_reg_write(unsigned reg
, uint32_t data
, unsigned context
, bool pipe
)
560 struct rnndecaddrinfo
*info
= rnn_reginfo(rnn_pipe
, reg
);
561 printf("\t\twrite %s (%02x) pipe\n", info
->name
, reg
);
563 if (!strcmp(info
->typeinfo
->name
, "void")) {
564 /* registers that ignore their payload */
567 dump_register(rnn_pipe
, reg
, data
);
570 printf("\t\twrite %s (%05x) context %d\n", regname(reg
, 1), reg
, context
);
571 dump_register_val(reg
, data
, 2);
576 dump_mem_pool_chunk(const uint32_t *chunk
)
578 struct __attribute__((packed
)) {
579 bool reg0_enabled
: 1;
580 bool reg1_enabled
: 1;
587 uint32_t reg0_context
: 1;
588 uint32_t reg1_context
: 1;
589 uint32_t padding
: 22;
592 memcpy(&fields
, chunk
, 4 * sizeof(uint32_t));
594 if (fields
.reg0_enabled
) {
595 dump_mem_pool_reg_write(fields
.reg0
, fields
.data0
, fields
.reg0_context
, fields
.reg0_pipe
);
598 if (fields
.reg1_enabled
) {
599 dump_mem_pool_reg_write(fields
.reg1
, fields
.data1
, fields
.reg1_context
, fields
.reg1_pipe
);
604 dump_cp_mem_pool(uint32_t *mempool
)
606 /* The mem pool is a shared pool of memory used for storing in-flight
607 * register writes. There are 6 different queues, one for each
608 * cluster. Writing to $data (or for some special registers, $addr)
609 * pushes data onto the appropriate queue, and each queue is pulled
610 * from by the appropriate cluster. The queues are thus written to
611 * in-order, but may be read out-of-order.
613 * The queues are conceptually divided into 128-bit "chunks", and the
614 * read and write pointers are in units of chunks. These chunks are
615 * organized internally into 8-chunk "blocks", and memory is allocated
616 * dynamically in terms of blocks. Each queue is represented as a
617 * singly-linked list of blocks, as well as 3-bit start/end chunk
618 * pointers that point within the first/last block. The next pointers
619 * are located in a separate array, rather than inline.
622 /* TODO: The firmware CP_MEM_POOL save/restore routines do something
625 * cread $02, [ $00 + 0 ]
626 * and $02, $02, 0x118
628 * brne $02, 0, #label
634 * I think that control register 0 is the GPU version, and some
635 * versions have a smaller mem pool. It seems some models have a mem
636 * pool that's half the size, and a bunch of offsets are shifted
637 * accordingly. Unfortunately the kernel driver's dumping code doesn't
638 * seem to take this into account, even the downstream android driver,
639 * and we don't know which versions 0x8, 0x10, or 0x100 correspond
640 * to. Or maybe we can use CP_DBG_MEM_POOL_SIZE to figure this out?
642 bool small_mem_pool
= false;
644 /* The array of next pointers for each block. */
645 const uint32_t *next_pointers
= small_mem_pool
? &mempool
[0x800] : &mempool
[0x1000];
647 /* Maximum number of blocks in the pool, also the size of the pointers
650 const int num_blocks
= small_mem_pool
? 0x30 : 0x80;
652 /* Number of queues */
653 const unsigned num_queues
= 6;
655 /* Unfortunately the per-queue state is a little more complicated than
656 * a simple pair of begin/end pointers. Instead of a single beginning
657 * block, there are *two*, with the property that either the two are
658 * equal or the second is the "next" of the first. Similarly there are
659 * two end blocks. Thus the queue either looks like this:
661 * A -> B -> ... -> C -> D
663 * Or like this, or some combination:
667 * However, there's only one beginning/end chunk offset. Now the
668 * question is, which of A or B is the actual start? I.e. is the chunk
669 * offset an offset inside A or B? It depends. I'll show a typical read
670 * cycle, starting here (read pointer marked with a *) with a chunk
674 * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
675 * |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_| -> |_|_|_|_|_|_|_|_|
677 * Once the pointer advances far enough, the hardware decides to free
678 * A, after which the read-side state looks like:
681 * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
682 * |_|_|_|_|_|_|_|_| |_|_|_|*|_|_|_|_| -> |_|_|_|_|_|_|_|_|
684 * Then after advancing the pointer a bit more, the hardware fetches
685 * the "next" pointer for A and stores it in B:
688 * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
689 * |_|_|_|_|_|_|_|_| |_|_|_|_|_|_|_|*| -> |_|_|_|_|_|_|_|_|
691 * Then the read pointer advances into B, at which point we've come
692 * back to the first state having advanced a whole block:
695 * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
696 * |_|_|_|_|_|_|_|_| |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_|
699 * There is a similar cycle for the write pointer. Now, the question
700 * is, how do we know which state we're in? We need to know this to
701 * know whether the pointer (*) is in A or B if they're different. It
702 * seems like there should be some bit somewhere describing this, but
703 * after lots of experimentation I've come up empty-handed. For now we
704 * assume that if the pointer is in the first half, then we're in
705 * either the first or second state and use B, and otherwise we're in
706 * the second or third state and use A. So far I haven't seen anything
707 * that violates this assumption.
712 uint32_t padding0
[7]; /* Mirrors of unk0 */
716 uint32_t first_block
: 32 - 3;
718 uint32_t padding1
[2]; /* Mirrors of writer[4], writer[5] */
721 uint32_t padding2
[7]; /* Mirrors of unk1 */
723 uint32_t writer_second_block
[6];
724 uint32_t padding3
[2];
727 uint32_t padding4
[2];
731 uint32_t first_block
: 32 - 3;
733 uint32_t padding5
[2]; /* Mirrors of reader[4], reader[5] */
736 uint32_t padding6
[7]; /* Mirrors of unk3 */
738 uint32_t reader_second_block
[6];
739 uint32_t padding7
[2];
741 uint32_t block_count
[6];
745 uint32_t padding9
[7]; /* Mirrors of unk4 */
748 const uint32_t *data1_ptr
= small_mem_pool
? &mempool
[0xc00] : &mempool
[0x1800];
749 memcpy(&data1
, data1_ptr
, sizeof(data1
));
751 /* Based on the kernel, the first dword is the mem pool size (in
752 * blocks?) and mirrors CP_MEM_POOL_DBG_SIZE.
754 const uint32_t *data2_ptr
= small_mem_pool
? &mempool
[0x1000] : &mempool
[0x2000];
755 const int data2_size
= 0x60;
757 /* This seems to be the size of each queue in chunks. */
758 const uint32_t *queue_sizes
= &data2_ptr
[0x18];
760 printf("\tdata2:\n");
761 dump_hex_ascii(data2_ptr
, 4 * data2_size
, 1);
763 /* These seem to be some kind of counter of allocated/deallocated blocks */
765 printf("\tunk0: %x\n", data1
.unk0
);
766 printf("\tunk1: %x\n", data1
.unk1
);
767 printf("\tunk3: %x\n", data1
.unk3
);
768 printf("\tunk4: %x\n\n", data1
.unk4
);
771 for (int queue
= 0; queue
< num_queues
; queue
++) {
772 const char *cluster_names
[6] = {
773 "FE", "SP_VS", "PC_VS", "GRAS", "SP_PS", "PS"
775 printf("\tCLUSTER_%s:\n\n", cluster_names
[queue
]);
778 printf("\t\twriter_first_block: 0x%x\n", data1
.writer
[queue
].first_block
);
779 printf("\t\twriter_second_block: 0x%x\n", data1
.writer_second_block
[queue
]);
780 printf("\t\twriter_chunk: %d\n", data1
.writer
[queue
].chunk
);
781 printf("\t\treader_first_block: 0x%x\n", data1
.reader
[queue
].first_block
);
782 printf("\t\treader_second_block: 0x%x\n", data1
.reader_second_block
[queue
]);
783 printf("\t\treader_chunk: %d\n", data1
.reader
[queue
].chunk
);
784 printf("\t\tblock_count: %d\n", data1
.block_count
[queue
]);
785 printf("\t\tunk2: 0x%x\n", data1
.unk2
[queue
]);
786 printf("\t\tqueue_size: %d\n\n", queue_sizes
[queue
]);
789 uint32_t cur_chunk
= data1
.reader
[queue
].chunk
;
790 uint32_t cur_block
= cur_chunk
> 3 ?
791 data1
.reader
[queue
].first_block
:
792 data1
.reader_second_block
[queue
];
793 uint32_t last_chunk
= data1
.writer
[queue
].chunk
;
794 uint32_t last_block
= last_chunk
> 3 ?
795 data1
.writer
[queue
].first_block
:
796 data1
.writer_second_block
[queue
];
799 printf("\tblock %x\n", cur_block
);
800 if (cur_block
>= num_blocks
) {
801 fprintf(stderr
, "block %x too large\n", cur_block
);
804 unsigned calculated_queue_size
= 0;
805 while (cur_block
!= last_block
|| cur_chunk
!= last_chunk
) {
806 calculated_queue_size
++;
807 uint32_t *chunk_ptr
= &mempool
[cur_block
* 0x20 + cur_chunk
* 4];
809 dump_mem_pool_chunk(chunk_ptr
);
811 printf("\t%05x: %08x %08x %08x %08x\n",
812 4 * (cur_block
* 0x20 + cur_chunk
+ 4),
813 chunk_ptr
[0], chunk_ptr
[1], chunk_ptr
[2], chunk_ptr
[3]);
816 if (cur_chunk
== 8) {
817 cur_block
= next_pointers
[cur_block
];
819 printf("\tblock %x\n", cur_block
);
820 if (cur_block
>= num_blocks
) {
821 fprintf(stderr
, "block %x too large\n", cur_block
);
827 if (calculated_queue_size
!= queue_sizes
[queue
]) {
828 printf("\t\tCALCULATED SIZE %d DOES NOT MATCH!\n", calculated_queue_size
);
835 decode_indexed_registers(void)
838 uint32_t sizedwords
= 0;
840 foreach_line_in_section (line
) {
841 if (startswith(line
, " - regs-name:")) {
843 parseline(line
, " - regs-name: %ms", &name
);
844 } else if (startswith(line
, " dwords:")) {
845 parseline(line
, " dwords: %u", &sizedwords
);
846 } else if (startswith(line
, " data: !!ascii85 |")) {
847 uint32_t *buf
= popline_ascii85(sizedwords
);
849 /* some of the sections are pretty large, and are (at least
850 * so far) not useful, so skip them if not in verbose mode:
852 bool dump
= verbose
||
853 !strcmp(name
, "CP_SEQ_STAT") ||
854 !strcmp(name
, "CP_DRAW_STATE") ||
855 !strcmp(name
, "CP_ROQ") ||
858 if (!strcmp(name
, "CP_SEQ_STAT"))
859 dump_cp_seq_stat(buf
);
861 if (!strcmp(name
, "CP_UCODE_DBG_DATA"))
862 dump_cp_ucode_dbg(buf
);
864 /* note that name was typo'd in earlier kernels: */
865 if (!strcmp(name
, "CP_MEMPOOL") || !strcmp(name
, "CP_MEMPOOOL"))
866 dump_cp_mem_pool(buf
);
869 dump_hex_ascii(buf
, 4 * sizedwords
, 1);
880 * Decode shader-blocks:
884 decode_shader_blocks(void)
887 uint32_t sizedwords
= 0;
889 foreach_line_in_section (line
) {
890 if (startswith(line
, " - type:")) {
892 parseline(line
, " - type: %ms", &type
);
893 } else if (startswith(line
, " size:")) {
894 parseline(line
, " size: %u", &sizedwords
);
895 } else if (startswith(line
, " data: !!ascii85 |")) {
896 uint32_t *buf
= popline_ascii85(sizedwords
);
898 /* some of the sections are pretty large, and are (at least
899 * so far) not useful, so skip them if not in verbose mode:
901 bool dump
= verbose
||
902 !strcmp(type
, "A6XX_SP_INST_DATA") ||
903 !strcmp(type
, "A6XX_HLSQ_INST_RAM") ||
906 if (!strcmp(type
, "A6XX_SP_INST_DATA") ||
907 !strcmp(type
, "A6XX_HLSQ_INST_RAM")) {
908 /* TODO this section actually contains multiple shaders
909 * (or parts of shaders?), so perhaps we should search
910 * for ends of shaders and decode each?
912 TRY(disasm_a3xx(buf
, sizedwords
, 1, stdout
, options
.gpu_id
));
916 dump_hex_ascii(buf
, 4 * sizedwords
, 1);
930 * Decode debugbus section:
934 decode_debugbus(void)
937 uint32_t sizedwords
= 0;
939 foreach_line_in_section (line
) {
940 if (startswith(line
, " - debugbus-block:")) {
942 parseline(line
, " - debugbus-block: %ms", &block
);
943 } else if (startswith(line
, " count:")) {
944 parseline(line
, " count: %u", &sizedwords
);
945 } else if (startswith(line
, " data: !!ascii85 |")) {
946 uint32_t *buf
= popline_ascii85(sizedwords
);
948 /* some of the sections are pretty large, and are (at least
949 * so far) not useful, so skip them if not in verbose mode:
951 bool dump
= verbose
||
955 dump_hex_ascii(buf
, 4 * sizedwords
, 1);
967 * Main crashdump decode loop:
975 while ((line
= popline())) {
977 if (startswith(line
, "revision:")) {
978 parseline(line
, "revision: %u", &options
.gpu_id
);
979 printf("Got gpu_id=%u\n", options
.gpu_id
);
981 cffdec_init(&options
);
984 rnn_gmu
= rnn_new(!options
.color
);
985 rnn_load_file(rnn_gmu
, "adreno/a6xx_gmu.xml", "A6XX");
986 rnn_control
= rnn_new(!options
.color
);
987 rnn_load_file(rnn_control
, "adreno/adreno_control_regs.xml", "A6XX_CONTROL_REG");
988 rnn_pipe
= rnn_new(!options
.color
);
989 rnn_load_file(rnn_pipe
, "adreno/adreno_pipe_regs.xml", "A6XX_PIPE_REG");
990 } else if (is_a5xx()) {
991 rnn_control
= rnn_new(!options
.color
);
992 rnn_load_file(rnn_control
, "adreno/adreno_control_regs.xml", "A5XX_CONTROL_REG");
996 } else if (startswith(line
, "bos:")) {
998 } else if (startswith(line
, "ringbuffer:")) {
1000 } else if (startswith(line
, "registers:")) {
1003 /* after we've recorded buffer contents, and CP register values,
1004 * we can take a stab at decoding the cmdstream:
1007 } else if (startswith(line
, "registers-gmu:")) {
1008 decode_gmu_registers();
1009 } else if (startswith(line
, "indexed-registers:")) {
1010 decode_indexed_registers();
1011 } else if (startswith(line
, "shader-blocks:")) {
1012 decode_shader_blocks();
1013 } else if (startswith(line
, "clusters:")) {
1015 } else if (startswith(line
, "debugbus:")) {
1022 * Usage and argument parsing:
1028 fprintf(stderr
, "Usage:\n\n"
1029 "\tcrashdec [-achmsv] [-f FILE]\n\n"
1031 "\t-a, --allregs - show all registers (including ones not written since\n"
1032 "\t previous draw) at each draw\n"
1033 "\t-c, --color - use colors\n"
1034 "\t-f, --file=FILE - read input from specified file (rather than stdin)\n"
1035 "\t-h, --help - this usage message\n"
1036 "\t-m, --markers - try to decode CP_NOP string markers\n"
1037 "\t-s, --summary - don't show individual register writes, but just show\n"
1038 "\t register values on draws\n"
1039 "\t-v, --verbose - dump more verbose output, including contents of\n"
1040 "\t less interesting buffers\n"
1046 static const struct option opts
[] = {
1047 { .name
= "allregs", .has_arg
= 0, NULL
, 'a' },
1048 { .name
= "color", .has_arg
= 0, NULL
, 'c' },
1049 { .name
= "file", .has_arg
= 1, NULL
, 'f' },
1050 { .name
= "help", .has_arg
= 0, NULL
, 'h' },
1051 { .name
= "markers", .has_arg
= 0, NULL
, 'm' },
1052 { .name
= "summary", .has_arg
= 0, NULL
, 's' },
1053 { .name
= "verbose", .has_arg
= 0, NULL
, 'v' },
1057 static bool interactive
;
1070 main(int argc
, char **argv
)
1074 interactive
= isatty(STDOUT_FILENO
);
1075 options
.color
= interactive
;
1077 /* default to read from stdin: */
1080 while ((c
= getopt_long(argc
, argv
, "acf:hmsv", opts
, NULL
)) != -1) {
1083 options
.allregs
= true;
1086 options
.color
= true;
1089 in
= fopen(optarg
, "r");
1092 options
.decode_markers
= true;
1095 options
.summary
= true;