freedreno/registers: install gzip'd register database
[mesa.git] / src / freedreno / decode / crashdec.c
1 /*
2 * Copyright © 2020 Google, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 /*
25 * Decoder for devcoredump traces from drm/msm. In case of a gpu crash/hang,
26 * the coredump should be found in:
27 *
28 * /sys/class/devcoredump/devcd<n>/data
29 *
30 * The crashdump will hang around for 5min, it can be cleared by writing to
31 * the file, ie:
32 *
33 * echo 1 > /sys/class/devcoredump/devcd<n>/data
34 *
35 * (the driver won't log any new crashdumps until the previous one is cleared
36 * or times out after 5min)
37 */
38
39
40 #include <assert.h>
41 #include <getopt.h>
42 #include <inttypes.h>
43 #include <setjmp.h>
44 #include <stdarg.h>
45 #include <stdbool.h>
46 #include <stdint.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <unistd.h>
51
52 #include "buffers.h"
53 #include "cffdec.h"
54 #include "disasm.h"
55 #include "pager.h"
56 #include "rnnutil.h"
57 #include "util.h"
58 #include "ir3/instr-a3xx.h"
59
60
61 static FILE *in;
62 static bool verbose;
63
64 static struct rnn *rnn_gmu;
65 static struct rnn *rnn_control;
66 static struct rnn *rnn_pipe;
67
68 static struct cffdec_options options = {
69 .draw_filter = -1,
70 };
71
72 static inline bool is_a6xx(void) { return (600 <= options.gpu_id) && (options.gpu_id < 700); }
73 static inline bool is_a5xx(void) { return (500 <= options.gpu_id) && (options.gpu_id < 600); }
74 static inline bool is_64b(void) { return options.gpu_id >= 500; }
75
76 /*
77 * Helpers to read register values:
78 */
79
80 /* read registers that are 64b on 64b GPUs (ie. a5xx+) */
81 static uint64_t
82 regval64(const char *name)
83 {
84 unsigned reg = regbase(name);
85 assert(reg);
86 uint64_t val = reg_val(reg);
87 if (is_64b())
88 val |= ((uint64_t)reg_val(reg + 1)) << 32;
89 return val;
90 }
91
92 static uint32_t
93 regval(const char *name)
94 {
95 unsigned reg = regbase(name);
96 assert(reg);
97 return reg_val(reg);
98 }
99
100 /*
101 * Line reading and string helpers:
102 */
103
104 static char *lastline;
105 static char *pushedline;
106
107 static const char *
108 popline(void)
109 {
110 char *r = pushedline;
111
112 if (r) {
113 pushedline = NULL;
114 return r;
115 }
116
117 free(lastline);
118
119 size_t n = 0;
120 if (getline(&r, &n, in) < 0)
121 exit(0);
122
123 lastline = r;
124 return r;
125 }
126
127 static void
128 pushline(void)
129 {
130 assert(!pushedline);
131 pushedline = lastline;
132 }
133
134 static uint32_t *
135 popline_ascii85(uint32_t sizedwords)
136 {
137 const char *line = popline();
138
139 /* At this point we exepct the ascii85 data to be indented *some*
140 * amount, and to terminate at the end of the line. So just eat
141 * up the leading whitespace.
142 */
143 assert(*line == ' ');
144 while (*line == ' ')
145 line++;
146
147 uint32_t *buf = calloc(1, 4 * sizedwords);
148 int idx = 0;
149
150 while (*line != '\n') {
151 if (*line == 'z') {
152 buf[idx++] = 0;
153 line++;
154 continue;
155 }
156
157 uint32_t accum = 0;
158 for (int i = 0; (i < 5) && (*line != '\n'); i++) {
159 accum *= 85;
160 accum += *line - '!';
161 line++;
162 }
163
164 buf[idx++] = accum;
165 }
166
167 return buf;
168 }
169
170 static bool
171 startswith(const char *line, const char *start)
172 {
173 return strstr(line, start) == line;
174 }
175
176 static void
177 parseline(const char *line, const char *fmt, ...)
178 {
179 int fmtlen = strlen(fmt);
180 int n = 0;
181 int l = 0;
182
183 /* scan fmt string to extract expected # of conversions: */
184 for (int i = 0; i < fmtlen; i++) {
185 if (fmt[i] == '%') {
186 if (i == (l - 1)) { /* prev char was %, ie. we have %% */
187 n--;
188 l = 0;
189 } else {
190 n++;
191 l = i;
192 }
193 }
194 }
195
196 va_list ap;
197 va_start(ap, fmt);
198 if (vsscanf(line, fmt, ap) != n) {
199 fprintf(stderr, "parse error scanning: '%s'\n", fmt);
200 exit(1);
201 }
202 va_end(ap);
203 }
204
205 #define foreach_line_in_section(_line) \
206 for (const char *_line = popline(); _line; _line = popline()) \
207 /* check for start of next section */ \
208 if (_line[0] != ' ') { \
209 pushline(); \
210 break; \
211 } else
212
213 /*
214 * Provide our own disasm assert() handler, so that we can recover
215 * after attempting to disassemble things that might not be valid
216 * instructions:
217 */
218
219 static bool jmp_env_valid;
220 static jmp_buf jmp_env;
221
222 void
223 ir3_assert_handler(const char *expr, const char *file, int line,
224 const char *func)
225 {
226 printf("\n%s:%u: %s: Assertion `%s' failed.\n", file, line, func, expr);
227 if (jmp_env_valid)
228 longjmp(jmp_env, 1);
229 abort();
230 }
231
232 #define TRY(x) do { \
233 assert(!jmp_env_valid); \
234 if (setjmp(jmp_env) == 0) { \
235 jmp_env_valid = true; \
236 x; \
237 } \
238 jmp_env_valid = false; \
239 } while (0)
240
241 /*
242 * Decode ringbuffer section:
243 */
244
245 static struct {
246 uint64_t iova;
247 uint32_t rptr;
248 uint32_t wptr;
249 uint32_t size;
250 uint32_t *buf;
251 } ringbuffers[5];
252
253 static void
254 decode_ringbuffer(void)
255 {
256 int id = 0;
257
258 foreach_line_in_section (line) {
259 if (startswith(line, " - id:")) {
260 parseline(line, " - id: %d", &id);
261 assert(id < ARRAY_SIZE(ringbuffers));
262 } else if (startswith(line, " iova:")) {
263 parseline(line, " iova: %"PRIx64, &ringbuffers[id].iova);
264 } else if (startswith(line, " rptr:")) {
265 parseline(line, " rptr: %d", &ringbuffers[id].rptr);
266 } else if (startswith(line, " wptr:")) {
267 parseline(line, " wptr: %d", &ringbuffers[id].wptr);
268 } else if (startswith(line, " size:")) {
269 parseline(line, " size: %d", &ringbuffers[id].size);
270 } else if (startswith(line, " data: !!ascii85 |")) {
271 ringbuffers[id].buf = popline_ascii85(ringbuffers[id].size / 4);
272 add_buffer(ringbuffers[id].iova, ringbuffers[id].size, ringbuffers[id].buf);
273 continue;
274 }
275
276 printf("%s", line);
277 }
278 }
279
280 static bool
281 valid_header(uint32_t pkt)
282 {
283 if (options.gpu_id >= 500) {
284 return pkt_is_type4(pkt) || pkt_is_type7(pkt);
285 } else {
286 /* TODO maybe we can check validish looking pkt3 opc or pkt0
287 * register offset.. the cmds sent by kernel are usually
288 * fairly limited (other than initialization) which confines
289 * the search space a bit..
290 */
291 return true;
292 }
293 }
294
295 static void
296 dump_cmdstream(void)
297 {
298 uint64_t rb_base = regval64("CP_RB_BASE");
299
300 printf("got rb_base=%"PRIx64"\n", rb_base);
301
302 options.ibs[1].base = regval64("CP_IB1_BASE");
303 options.ibs[1].rem = regval("CP_IB1_REM_SIZE");
304 options.ibs[2].base = regval64("CP_IB2_BASE");
305 options.ibs[2].rem = regval("CP_IB2_REM_SIZE");
306
307 /* Adjust remaining size to account for cmdstream slurped into ROQ
308 * but not yet consumed by SQE
309 *
310 * TODO add support for earlier GPUs once we tease out the needed
311 * registers.. see crashit.c in msmtest for hints.
312 *
313 * TODO it would be nice to be able to extract out register bitfields
314 * by name rather than hard-coding this.
315 */
316 if (is_a6xx()) {
317 options.ibs[1].rem += regval("CP_CSQ_IB1_STAT") >> 16;
318 options.ibs[2].rem += regval("CP_CSQ_IB2_STAT") >> 16;
319 }
320
321 printf("IB1: %"PRIx64", %u\n", options.ibs[1].base, options.ibs[1].rem);
322 printf("IB2: %"PRIx64", %u\n", options.ibs[2].base, options.ibs[2].rem);
323
324 /* now that we've got the regvals we want, reset register state
325 * so we aren't seeing values from decode_registers();
326 */
327 reset_regs();
328
329 for (int id = 0; id < ARRAY_SIZE(ringbuffers); id++) {
330 if (ringbuffers[id].iova != rb_base)
331 continue;
332 if (!ringbuffers[id].size)
333 continue;
334
335 printf("found ring!\n");
336
337 /* The kernel level ringbuffer (RB) wraps around, which
338 * cffdec doesn't really deal with.. so figure out how
339 * many dwords are unread
340 */
341 unsigned ringszdw = ringbuffers[id].size >> 2; /* in dwords */
342
343 /* helper macro to deal with modulo size math: */
344 #define mod_add(b, v) ((ringszdw + (int)(b) + (int)(v)) % ringszdw)
345
346 /* The rptr will (most likely) have moved past the IB to
347 * userspace cmdstream, so back up a bit, and then advance
348 * until we find a valid start of a packet.. this is going
349 * to be less reliable on a4xx and before (pkt0/pkt3),
350 * compared to pkt4/pkt7 with parity bits
351 */
352 const int lookback = 12;
353 unsigned rptr = mod_add(ringbuffers[id].rptr, -lookback);
354
355 for (int idx = 0; idx < lookback; idx++) {
356 if (valid_header(ringbuffers[id].buf[rptr]))
357 break;
358 rptr = mod_add(rptr, 1);
359 }
360
361 unsigned cmdszdw = mod_add(ringbuffers[id].wptr, -rptr);
362
363 printf("got cmdszdw=%d\n", cmdszdw);
364 uint32_t *buf = malloc(cmdszdw * 4);
365
366 for (int idx = 0; idx < cmdszdw; idx++) {
367 int p = mod_add(rptr, idx);
368 buf[idx] = ringbuffers[id].buf[p];
369 }
370
371 dump_commands(buf, cmdszdw, 0);
372 free(buf);
373 }
374 }
375
376 /*
377 * Decode 'bos' (buffers) section:
378 */
379
380 static void
381 decode_bos(void)
382 {
383 uint32_t size = 0;
384 uint64_t iova = 0;
385
386 foreach_line_in_section (line) {
387 if (startswith(line, " - iova:")) {
388 parseline(line, " - iova: %"PRIx64, &iova);
389 } else if (startswith(line, " size:")) {
390 parseline(line, " size: %u", &size);
391 } else if (startswith(line, " data: !!ascii85 |")) {
392 uint32_t *buf = popline_ascii85(size / 4);
393
394 if (verbose)
395 dump_hex_ascii(buf, size, 1);
396
397 add_buffer(iova, size, buf);
398
399 continue;
400 }
401
402 printf("%s", line);
403 }
404 }
405
406 /*
407 * Decode registers section:
408 */
409
410 static void
411 dump_register(struct rnn *rnn, uint32_t offset, uint32_t value)
412 {
413 struct rnndecaddrinfo *info = rnn_reginfo(rnn, offset);
414 if (info && info->typeinfo) {
415 char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value);
416 printf("%s: %s\n", info->name, decoded);
417 } else if (info) {
418 printf("%s: %08x\n", info->name, value);
419 } else {
420 printf("<%04x>: %08x\n", offset, value);
421 }
422 }
423
424 static void
425 decode_gmu_registers(void)
426 {
427 foreach_line_in_section (line) {
428 uint32_t offset, value;
429 parseline(line, " - { offset: %x, value: %x }", &offset, &value);
430
431 printf("\t%08x\t", value);
432 dump_register(rnn_gmu, offset/4, value);
433 }
434 }
435
436 static void
437 decode_registers(void)
438 {
439 foreach_line_in_section (line) {
440 uint32_t offset, value;
441 parseline(line, " - { offset: %x, value: %x }", &offset, &value);
442
443 reg_set(offset/4, value);
444 printf("\t%08x", value);
445 dump_register_val(offset/4, value, 0);
446 }
447 }
448
449 /* similar to registers section, but for banked context regs: */
450 static void
451 decode_clusters(void)
452 {
453 foreach_line_in_section (line) {
454 if (startswith(line, " - cluster-name:") ||
455 startswith(line, " - context:")) {
456 printf("%s", line);
457 continue;
458 }
459
460 uint32_t offset, value;
461 parseline(line, " - { offset: %x, value: %x }", &offset, &value);
462
463 printf("\t%08x", value);
464 dump_register_val(offset/4, value, 0);
465 }
466 }
467
468 /*
469 * Decode indexed-registers.. these aren't like normal registers, but a
470 * sort of FIFO where successive reads pop out associated debug state.
471 */
472
473 static void
474 dump_cp_seq_stat(uint32_t *stat)
475 {
476 printf("\t PC: %04x\n", stat[0]);
477 stat++;
478
479 if (is_a6xx() && valid_header(stat[0])) {
480 if (pkt_is_type7(stat[0])) {
481 unsigned opc = cp_type7_opcode(stat[0]);
482 const char *name = pktname(opc);
483 if (name)
484 printf("\tPKT: %s\n", name);
485 } else {
486 /* Not sure if this case can happen: */
487 }
488 }
489
490 for (int i = 0; i < 16; i++) {
491 printf("\t$%02x: %08x\t\t$%02x: %08x\n",
492 i + 1, stat[i], i + 16 + 1, stat[i + 16]);
493 }
494 }
495
496 static void
497 dump_control_regs(uint32_t *regs)
498 {
499 if (!rnn_control)
500 return;
501
502 /* Control regs 0x100-0x17f are a scratch space to be used by the
503 * firmware however it wants, unlike lower regs which involve some
504 * fixed-function units. Therefore only these registers get dumped
505 * directly.
506 */
507 for (uint32_t i = 0; i < 0x80; i++) {
508 printf("\t%08x\t", regs[i]);
509 dump_register(rnn_control, i + 0x100, regs[i]);
510 }
511 }
512
513 static void
514 dump_cp_ucode_dbg(uint32_t *dbg)
515 {
516 /* Notes on the data:
517 * There seems to be a section every 4096 DWORD's. The sections aren't
518 * all the same size, so the rest of the 4096 DWORD's are filled with
519 * mirrors of the actual data.
520 */
521
522 for (int section = 0; section < 6; section++, dbg += 0x1000) {
523 switch (section) {
524 case 0:
525 /* Contains scattered data from a630_sqe.fw: */
526 printf("\tSQE instruction cache:\n");
527 dump_hex_ascii(dbg, 4 * 0x400, 1);
528 break;
529 case 1:
530 printf("\tUnknown 1:\n");
531 dump_hex_ascii(dbg, 4 * 0x80, 1);
532 break;
533 case 2:
534 printf("\tUnknown 2:\n");
535 dump_hex_ascii(dbg, 4 * 0x200, 1);
536 break;
537 case 3:
538 printf("\tUnknown 3:\n");
539 dump_hex_ascii(dbg, 4 * 0x80, 1);
540 break;
541 case 4:
542 /* Don't bother printing this normally */
543 if (verbose) {
544 printf("\tSQE packet jumptable contents:\n");
545 dump_hex_ascii(dbg, 4 * 0x80, 1);
546 }
547 break;
548 case 5:
549 printf("\tSQE scratch control regs:\n");
550 dump_control_regs(dbg);
551 break;
552 }
553 }
554 }
555
556 static void
557 dump_mem_pool_reg_write(unsigned reg, uint32_t data, unsigned context, bool pipe)
558 {
559 if (pipe) {
560 struct rnndecaddrinfo *info = rnn_reginfo(rnn_pipe, reg);
561 printf("\t\twrite %s (%02x) pipe\n", info->name, reg);
562
563 if (!strcmp(info->typeinfo->name, "void")) {
564 /* registers that ignore their payload */
565 } else {
566 printf("\t\t\t");
567 dump_register(rnn_pipe, reg, data);
568 }
569 } else {
570 printf("\t\twrite %s (%05x) context %d\n", regname(reg, 1), reg, context);
571 dump_register_val(reg, data, 2);
572 }
573 }
574
575 static void
576 dump_mem_pool_chunk(const uint32_t *chunk)
577 {
578 struct __attribute__((packed)) {
579 bool reg0_enabled : 1;
580 bool reg1_enabled : 1;
581 uint32_t data0 : 32;
582 uint32_t data1 : 32;
583 uint32_t reg0 : 18;
584 uint32_t reg1 : 18;
585 bool reg0_pipe : 1;
586 bool reg1_pipe : 1;
587 uint32_t reg0_context : 1;
588 uint32_t reg1_context : 1;
589 uint32_t padding : 22;
590 } fields;
591
592 memcpy(&fields, chunk, 4 * sizeof(uint32_t));
593
594 if (fields.reg0_enabled) {
595 dump_mem_pool_reg_write(fields.reg0, fields.data0, fields.reg0_context, fields.reg0_pipe);
596 }
597
598 if (fields.reg1_enabled) {
599 dump_mem_pool_reg_write(fields.reg1, fields.data1, fields.reg1_context, fields.reg1_pipe);
600 }
601 }
602
603 static void
604 dump_cp_mem_pool(uint32_t *mempool)
605 {
606 /* The mem pool is a shared pool of memory used for storing in-flight
607 * register writes. There are 6 different queues, one for each
608 * cluster. Writing to $data (or for some special registers, $addr)
609 * pushes data onto the appropriate queue, and each queue is pulled
610 * from by the appropriate cluster. The queues are thus written to
611 * in-order, but may be read out-of-order.
612 *
613 * The queues are conceptually divided into 128-bit "chunks", and the
614 * read and write pointers are in units of chunks. These chunks are
615 * organized internally into 8-chunk "blocks", and memory is allocated
616 * dynamically in terms of blocks. Each queue is represented as a
617 * singly-linked list of blocks, as well as 3-bit start/end chunk
618 * pointers that point within the first/last block. The next pointers
619 * are located in a separate array, rather than inline.
620 */
621
622 /* TODO: The firmware CP_MEM_POOL save/restore routines do something
623 * like:
624 *
625 * cread $02, [ $00 + 0 ]
626 * and $02, $02, 0x118
627 * ...
628 * brne $02, 0, #label
629 * mov $03, 0x2000
630 * mov $03, 0x1000
631 * label:
632 * ...
633 *
634 * I think that control register 0 is the GPU version, and some
635 * versions have a smaller mem pool. It seems some models have a mem
636 * pool that's half the size, and a bunch of offsets are shifted
637 * accordingly. Unfortunately the kernel driver's dumping code doesn't
638 * seem to take this into account, even the downstream android driver,
639 * and we don't know which versions 0x8, 0x10, or 0x100 correspond
640 * to. Or maybe we can use CP_DBG_MEM_POOL_SIZE to figure this out?
641 */
642 bool small_mem_pool = false;
643
644 /* The array of next pointers for each block. */
645 const uint32_t *next_pointers = small_mem_pool ? &mempool[0x800] : &mempool[0x1000];
646
647 /* Maximum number of blocks in the pool, also the size of the pointers
648 * array.
649 */
650 const int num_blocks = small_mem_pool ? 0x30 : 0x80;
651
652 /* Number of queues */
653 const unsigned num_queues = 6;
654
655 /* Unfortunately the per-queue state is a little more complicated than
656 * a simple pair of begin/end pointers. Instead of a single beginning
657 * block, there are *two*, with the property that either the two are
658 * equal or the second is the "next" of the first. Similarly there are
659 * two end blocks. Thus the queue either looks like this:
660 *
661 * A -> B -> ... -> C -> D
662 *
663 * Or like this, or some combination:
664 *
665 * A/B -> ... -> C/D
666 *
667 * However, there's only one beginning/end chunk offset. Now the
668 * question is, which of A or B is the actual start? I.e. is the chunk
669 * offset an offset inside A or B? It depends. I'll show a typical read
670 * cycle, starting here (read pointer marked with a *) with a chunk
671 * offset of 0:
672 *
673 * A B
674 * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
675 * |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_| -> |_|_|_|_|_|_|_|_|
676 *
677 * Once the pointer advances far enough, the hardware decides to free
678 * A, after which the read-side state looks like:
679 *
680 * (free) A/B
681 * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
682 * |_|_|_|_|_|_|_|_| |_|_|_|*|_|_|_|_| -> |_|_|_|_|_|_|_|_|
683 *
684 * Then after advancing the pointer a bit more, the hardware fetches
685 * the "next" pointer for A and stores it in B:
686 *
687 * (free) A B
688 * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
689 * |_|_|_|_|_|_|_|_| |_|_|_|_|_|_|_|*| -> |_|_|_|_|_|_|_|_|
690 *
691 * Then the read pointer advances into B, at which point we've come
692 * back to the first state having advanced a whole block:
693 *
694 * (free) A B
695 * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
696 * |_|_|_|_|_|_|_|_| |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_|
697 *
698 *
699 * There is a similar cycle for the write pointer. Now, the question
700 * is, how do we know which state we're in? We need to know this to
701 * know whether the pointer (*) is in A or B if they're different. It
702 * seems like there should be some bit somewhere describing this, but
703 * after lots of experimentation I've come up empty-handed. For now we
704 * assume that if the pointer is in the first half, then we're in
705 * either the first or second state and use B, and otherwise we're in
706 * the second or third state and use A. So far I haven't seen anything
707 * that violates this assumption.
708 */
709
710 struct {
711 uint32_t unk0;
712 uint32_t padding0[7]; /* Mirrors of unk0 */
713
714 struct {
715 uint32_t chunk : 3;
716 uint32_t first_block : 32 - 3;
717 } writer[6];
718 uint32_t padding1[2]; /* Mirrors of writer[4], writer[5] */
719
720 uint32_t unk1;
721 uint32_t padding2[7]; /* Mirrors of unk1 */
722
723 uint32_t writer_second_block[6];
724 uint32_t padding3[2];
725
726 uint32_t unk2[6];
727 uint32_t padding4[2];
728
729 struct {
730 uint32_t chunk : 3;
731 uint32_t first_block : 32 - 3;
732 } reader[6];
733 uint32_t padding5[2]; /* Mirrors of reader[4], reader[5] */
734
735 uint32_t unk3;
736 uint32_t padding6[7]; /* Mirrors of unk3 */
737
738 uint32_t reader_second_block[6];
739 uint32_t padding7[2];
740
741 uint32_t block_count[6];
742 uint32_t padding[2];
743
744 uint32_t unk4;
745 uint32_t padding9[7]; /* Mirrors of unk4 */
746 } data1;
747
748 const uint32_t *data1_ptr = small_mem_pool ? &mempool[0xc00] : &mempool[0x1800];
749 memcpy(&data1, data1_ptr, sizeof(data1));
750
751 /* Based on the kernel, the first dword is the mem pool size (in
752 * blocks?) and mirrors CP_MEM_POOL_DBG_SIZE.
753 */
754 const uint32_t *data2_ptr = small_mem_pool ? &mempool[0x1000] : &mempool[0x2000];
755 const int data2_size = 0x60;
756
757 /* This seems to be the size of each queue in chunks. */
758 const uint32_t *queue_sizes = &data2_ptr[0x18];
759
760 printf("\tdata2:\n");
761 dump_hex_ascii(data2_ptr, 4 * data2_size, 1);
762
763 /* These seem to be some kind of counter of allocated/deallocated blocks */
764 if (verbose) {
765 printf("\tunk0: %x\n", data1.unk0);
766 printf("\tunk1: %x\n", data1.unk1);
767 printf("\tunk3: %x\n", data1.unk3);
768 printf("\tunk4: %x\n\n", data1.unk4);
769 }
770
771 for (int queue = 0; queue < num_queues; queue++) {
772 const char *cluster_names[6] = {
773 "FE", "SP_VS", "PC_VS", "GRAS", "SP_PS", "PS"
774 };
775 printf("\tCLUSTER_%s:\n\n", cluster_names[queue]);
776
777 if (verbose) {
778 printf("\t\twriter_first_block: 0x%x\n", data1.writer[queue].first_block);
779 printf("\t\twriter_second_block: 0x%x\n", data1.writer_second_block[queue]);
780 printf("\t\twriter_chunk: %d\n", data1.writer[queue].chunk);
781 printf("\t\treader_first_block: 0x%x\n", data1.reader[queue].first_block);
782 printf("\t\treader_second_block: 0x%x\n", data1.reader_second_block[queue]);
783 printf("\t\treader_chunk: %d\n", data1.reader[queue].chunk);
784 printf("\t\tblock_count: %d\n", data1.block_count[queue]);
785 printf("\t\tunk2: 0x%x\n", data1.unk2[queue]);
786 printf("\t\tqueue_size: %d\n\n", queue_sizes[queue]);
787 }
788
789 uint32_t cur_chunk = data1.reader[queue].chunk;
790 uint32_t cur_block = cur_chunk > 3 ?
791 data1.reader[queue].first_block :
792 data1.reader_second_block[queue];
793 uint32_t last_chunk = data1.writer[queue].chunk;
794 uint32_t last_block = last_chunk > 3 ?
795 data1.writer[queue].first_block :
796 data1.writer_second_block[queue];
797
798 if (verbose)
799 printf("\tblock %x\n", cur_block);
800 if (cur_block >= num_blocks) {
801 fprintf(stderr, "block %x too large\n", cur_block);
802 exit(1);
803 }
804 unsigned calculated_queue_size = 0;
805 while (cur_block != last_block || cur_chunk != last_chunk) {
806 calculated_queue_size++;
807 uint32_t *chunk_ptr = &mempool[cur_block * 0x20 + cur_chunk * 4];
808
809 dump_mem_pool_chunk(chunk_ptr);
810
811 printf("\t%05x: %08x %08x %08x %08x\n",
812 4 * (cur_block * 0x20 + cur_chunk + 4),
813 chunk_ptr[0], chunk_ptr[1], chunk_ptr[2], chunk_ptr[3]);
814
815 cur_chunk++;
816 if (cur_chunk == 8) {
817 cur_block = next_pointers[cur_block];
818 if (verbose)
819 printf("\tblock %x\n", cur_block);
820 if (cur_block >= num_blocks) {
821 fprintf(stderr, "block %x too large\n", cur_block);
822 exit(1);
823 }
824 cur_chunk = 0;
825 }
826 }
827 if (calculated_queue_size != queue_sizes[queue]) {
828 printf("\t\tCALCULATED SIZE %d DOES NOT MATCH!\n", calculated_queue_size);
829 }
830 printf("\n");
831 }
832 }
833
834 static void
835 decode_indexed_registers(void)
836 {
837 char *name = NULL;
838 uint32_t sizedwords = 0;
839
840 foreach_line_in_section (line) {
841 if (startswith(line, " - regs-name:")) {
842 free(name);
843 parseline(line, " - regs-name: %ms", &name);
844 } else if (startswith(line, " dwords:")) {
845 parseline(line, " dwords: %u", &sizedwords);
846 } else if (startswith(line, " data: !!ascii85 |")) {
847 uint32_t *buf = popline_ascii85(sizedwords);
848
849 /* some of the sections are pretty large, and are (at least
850 * so far) not useful, so skip them if not in verbose mode:
851 */
852 bool dump = verbose ||
853 !strcmp(name, "CP_SEQ_STAT") ||
854 !strcmp(name, "CP_DRAW_STATE") ||
855 !strcmp(name, "CP_ROQ") ||
856 0;
857
858 if (!strcmp(name, "CP_SEQ_STAT"))
859 dump_cp_seq_stat(buf);
860
861 if (!strcmp(name, "CP_UCODE_DBG_DATA"))
862 dump_cp_ucode_dbg(buf);
863
864 /* note that name was typo'd in earlier kernels: */
865 if (!strcmp(name, "CP_MEMPOOL") || !strcmp(name, "CP_MEMPOOOL"))
866 dump_cp_mem_pool(buf);
867
868 if (dump)
869 dump_hex_ascii(buf, 4 * sizedwords, 1);
870 free(buf);
871
872 continue;
873 }
874
875 printf("%s", line);
876 }
877 }
878
879 /*
880 * Decode shader-blocks:
881 */
882
883 static void
884 decode_shader_blocks(void)
885 {
886 char *type = NULL;
887 uint32_t sizedwords = 0;
888
889 foreach_line_in_section (line) {
890 if (startswith(line, " - type:")) {
891 free(type);
892 parseline(line, " - type: %ms", &type);
893 } else if (startswith(line, " size:")) {
894 parseline(line, " size: %u", &sizedwords);
895 } else if (startswith(line, " data: !!ascii85 |")) {
896 uint32_t *buf = popline_ascii85(sizedwords);
897
898 /* some of the sections are pretty large, and are (at least
899 * so far) not useful, so skip them if not in verbose mode:
900 */
901 bool dump = verbose ||
902 !strcmp(type, "A6XX_SP_INST_DATA") ||
903 !strcmp(type, "A6XX_HLSQ_INST_RAM") ||
904 0;
905
906 if (!strcmp(type, "A6XX_SP_INST_DATA") ||
907 !strcmp(type, "A6XX_HLSQ_INST_RAM")) {
908 /* TODO this section actually contains multiple shaders
909 * (or parts of shaders?), so perhaps we should search
910 * for ends of shaders and decode each?
911 */
912 TRY(disasm_a3xx(buf, sizedwords, 1, stdout, options.gpu_id));
913 }
914
915 if (dump)
916 dump_hex_ascii(buf, 4 * sizedwords, 1);
917
918 free(buf);
919
920 continue;
921 }
922
923 printf("%s", line);
924 }
925
926 free(type);
927 }
928
929 /*
930 * Decode debugbus section:
931 */
932
933 static void
934 decode_debugbus(void)
935 {
936 char *block = NULL;
937 uint32_t sizedwords = 0;
938
939 foreach_line_in_section (line) {
940 if (startswith(line, " - debugbus-block:")) {
941 free(block);
942 parseline(line, " - debugbus-block: %ms", &block);
943 } else if (startswith(line, " count:")) {
944 parseline(line, " count: %u", &sizedwords);
945 } else if (startswith(line, " data: !!ascii85 |")) {
946 uint32_t *buf = popline_ascii85(sizedwords);
947
948 /* some of the sections are pretty large, and are (at least
949 * so far) not useful, so skip them if not in verbose mode:
950 */
951 bool dump = verbose ||
952 0;
953
954 if (dump)
955 dump_hex_ascii(buf, 4 * sizedwords, 1);
956
957 free(buf);
958
959 continue;
960 }
961
962 printf("%s", line);
963 }
964 }
965
966 /*
967 * Main crashdump decode loop:
968 */
969
970 static void
971 decode(void)
972 {
973 const char *line;
974
975 while ((line = popline())) {
976 printf("%s", line);
977 if (startswith(line, "revision:")) {
978 parseline(line, "revision: %u", &options.gpu_id);
979 printf("Got gpu_id=%u\n", options.gpu_id);
980
981 cffdec_init(&options);
982
983 if (is_a6xx()) {
984 rnn_gmu = rnn_new(!options.color);
985 rnn_load_file(rnn_gmu, "adreno/a6xx_gmu.xml", "A6XX");
986 rnn_control = rnn_new(!options.color);
987 rnn_load_file(rnn_control, "adreno/adreno_control_regs.xml", "A6XX_CONTROL_REG");
988 rnn_pipe = rnn_new(!options.color);
989 rnn_load_file(rnn_pipe, "adreno/adreno_pipe_regs.xml", "A6XX_PIPE_REG");
990 } else if (is_a5xx()) {
991 rnn_control = rnn_new(!options.color);
992 rnn_load_file(rnn_control, "adreno/adreno_control_regs.xml", "A5XX_CONTROL_REG");
993 } else {
994 rnn_control = NULL;
995 }
996 } else if (startswith(line, "bos:")) {
997 decode_bos();
998 } else if (startswith(line, "ringbuffer:")) {
999 decode_ringbuffer();
1000 } else if (startswith(line, "registers:")) {
1001 decode_registers();
1002
1003 /* after we've recorded buffer contents, and CP register values,
1004 * we can take a stab at decoding the cmdstream:
1005 */
1006 dump_cmdstream();
1007 } else if (startswith(line, "registers-gmu:")) {
1008 decode_gmu_registers();
1009 } else if (startswith(line, "indexed-registers:")) {
1010 decode_indexed_registers();
1011 } else if (startswith(line, "shader-blocks:")) {
1012 decode_shader_blocks();
1013 } else if (startswith(line, "clusters:")) {
1014 decode_clusters();
1015 } else if (startswith(line, "debugbus:")) {
1016 decode_debugbus();
1017 }
1018 }
1019 }
1020
1021 /*
1022 * Usage and argument parsing:
1023 */
1024
1025 static void
1026 usage(void)
1027 {
1028 fprintf(stderr, "Usage:\n\n"
1029 "\tcrashdec [-achmsv] [-f FILE]\n\n"
1030 "Options:\n"
1031 "\t-a, --allregs - show all registers (including ones not written since\n"
1032 "\t previous draw) at each draw\n"
1033 "\t-c, --color - use colors\n"
1034 "\t-f, --file=FILE - read input from specified file (rather than stdin)\n"
1035 "\t-h, --help - this usage message\n"
1036 "\t-m, --markers - try to decode CP_NOP string markers\n"
1037 "\t-s, --summary - don't show individual register writes, but just show\n"
1038 "\t register values on draws\n"
1039 "\t-v, --verbose - dump more verbose output, including contents of\n"
1040 "\t less interesting buffers\n"
1041 "\n"
1042 );
1043 exit(2);
1044 }
1045
1046 static const struct option opts[] = {
1047 { .name = "allregs", .has_arg = 0, NULL, 'a' },
1048 { .name = "color", .has_arg = 0, NULL, 'c' },
1049 { .name = "file", .has_arg = 1, NULL, 'f' },
1050 { .name = "help", .has_arg = 0, NULL, 'h' },
1051 { .name = "markers", .has_arg = 0, NULL, 'm' },
1052 { .name = "summary", .has_arg = 0, NULL, 's' },
1053 { .name = "verbose", .has_arg = 0, NULL, 'v' },
1054 {}
1055 };
1056
1057 static bool interactive;
1058
1059 static void
1060 cleanup(void)
1061 {
1062 fflush(stdout);
1063
1064 if (interactive) {
1065 pager_close();
1066 }
1067 }
1068
1069 int
1070 main(int argc, char **argv)
1071 {
1072 int c;
1073
1074 interactive = isatty(STDOUT_FILENO);
1075 options.color = interactive;
1076
1077 /* default to read from stdin: */
1078 in = stdin;
1079
1080 while ((c = getopt_long(argc, argv, "acf:hmsv", opts, NULL)) != -1) {
1081 switch (c) {
1082 case 'a':
1083 options.allregs = true;
1084 break;
1085 case 'c':
1086 options.color = true;
1087 break;
1088 case 'f':
1089 in = fopen(optarg, "r");
1090 break;
1091 case 'm':
1092 options.decode_markers = true;
1093 break;
1094 case 's':
1095 options.summary = true;
1096 break;
1097 case 'v':
1098 verbose = true;
1099 break;
1100 case 'h':
1101 default:
1102 usage();
1103 }
1104 }
1105
1106 disasm_a3xx_set_debug(PRINT_RAW);
1107
1108 if (interactive) {
1109 pager_open();
1110 }
1111
1112 atexit(cleanup);
1113
1114 decode();
1115 cleanup();
1116 }