radeonsi: log draw and compute state into log context
[mesa.git] / src / gallium / drivers / radeonsi / si_debug.c
1 /*
2 * Copyright 2015 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Marek Olšák <maraeo@gmail.com>
25 */
26
27 #include "si_pipe.h"
28 #include "si_compute.h"
29 #include "sid.h"
30 #include "gfx9d.h"
31 #include "sid_tables.h"
32 #include "ddebug/dd_util.h"
33 #include "util/u_log.h"
34 #include "util/u_memory.h"
35 #include "ac_debug.h"
36
37 static void si_dump_bo_list(struct si_context *sctx,
38 const struct radeon_saved_cs *saved, FILE *f);
39
40 DEBUG_GET_ONCE_OPTION(replace_shaders, "RADEON_REPLACE_SHADERS", NULL)
41
42 static void si_dump_shader(struct si_screen *sscreen,
43 enum pipe_shader_type processor,
44 const struct si_shader *shader, FILE *f)
45 {
46 if (shader->shader_log)
47 fwrite(shader->shader_log, shader->shader_log_size, 1, f);
48 else
49 si_shader_dump(sscreen, shader, NULL, processor, f, false);
50 }
51
52 struct si_log_chunk_shader {
53 /* The shader destroy code assumes a current context for unlinking of
54 * PM4 packets etc.
55 *
56 * While we should be able to destroy shaders without a context, doing
57 * so would happen only very rarely and be therefore likely to fail
58 * just when you're trying to debug something. Let's just remember the
59 * current context in the chunk.
60 */
61 struct si_context *ctx;
62 struct si_shader *shader;
63
64 /* For keep-alive reference counts */
65 struct si_shader_selector *sel;
66 struct si_compute *program;
67 };
68
69 static void
70 si_log_chunk_shader_destroy(void *data)
71 {
72 struct si_log_chunk_shader *chunk = data;
73 si_shader_selector_reference(chunk->ctx, &chunk->sel, NULL);
74 si_compute_reference(&chunk->program, NULL);
75 FREE(chunk);
76 }
77
78 static void
79 si_log_chunk_shader_print(void *data, FILE *f)
80 {
81 struct si_log_chunk_shader *chunk = data;
82 struct si_screen *sscreen = chunk->ctx->screen;
83 si_dump_shader(sscreen, chunk->shader->selector->info.processor,
84 chunk->shader, f);
85 }
86
87 static struct u_log_chunk_type si_log_chunk_type_shader = {
88 .destroy = si_log_chunk_shader_destroy,
89 .print = si_log_chunk_shader_print,
90 };
91
92 static void si_dump_gfx_shader(struct si_context *ctx,
93 const struct si_shader_ctx_state *state,
94 struct u_log_context *log)
95 {
96 struct si_shader *current = state->current;
97
98 if (!state->cso || !current)
99 return;
100
101 struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader);
102 chunk->ctx = ctx;
103 chunk->shader = current;
104 si_shader_selector_reference(ctx, &chunk->sel, current->selector);
105 u_log_chunk(log, &si_log_chunk_type_shader, chunk);
106 }
107
108 static void si_dump_compute_shader(const struct si_cs_shader_state *state,
109 struct u_log_context *log)
110 {
111 if (!state->program || state->program != state->emitted_program)
112 return;
113
114 struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader);
115 chunk->shader = &state->program->shader;
116 si_compute_reference(&chunk->program, state->program);
117 u_log_chunk(log, &si_log_chunk_type_shader, chunk);
118 }
119
120 /**
121 * Shader compiles can be overridden with arbitrary ELF objects by setting
122 * the environment variable RADEON_REPLACE_SHADERS=num1:filename1[;num2:filename2]
123 */
124 bool si_replace_shader(unsigned num, struct ac_shader_binary *binary)
125 {
126 const char *p = debug_get_option_replace_shaders();
127 const char *semicolon;
128 char *copy = NULL;
129 FILE *f;
130 long filesize, nread;
131 char *buf = NULL;
132 bool replaced = false;
133
134 if (!p)
135 return false;
136
137 while (*p) {
138 unsigned long i;
139 char *endp;
140 i = strtoul(p, &endp, 0);
141
142 p = endp;
143 if (*p != ':') {
144 fprintf(stderr, "RADEON_REPLACE_SHADERS formatted badly.\n");
145 exit(1);
146 }
147 ++p;
148
149 if (i == num)
150 break;
151
152 p = strchr(p, ';');
153 if (!p)
154 return false;
155 ++p;
156 }
157 if (!*p)
158 return false;
159
160 semicolon = strchr(p, ';');
161 if (semicolon) {
162 p = copy = strndup(p, semicolon - p);
163 if (!copy) {
164 fprintf(stderr, "out of memory\n");
165 return false;
166 }
167 }
168
169 fprintf(stderr, "radeonsi: replace shader %u by %s\n", num, p);
170
171 f = fopen(p, "r");
172 if (!f) {
173 perror("radeonsi: failed to open file");
174 goto out_free;
175 }
176
177 if (fseek(f, 0, SEEK_END) != 0)
178 goto file_error;
179
180 filesize = ftell(f);
181 if (filesize < 0)
182 goto file_error;
183
184 if (fseek(f, 0, SEEK_SET) != 0)
185 goto file_error;
186
187 buf = MALLOC(filesize);
188 if (!buf) {
189 fprintf(stderr, "out of memory\n");
190 goto out_close;
191 }
192
193 nread = fread(buf, 1, filesize, f);
194 if (nread != filesize)
195 goto file_error;
196
197 ac_elf_read(buf, filesize, binary);
198 replaced = true;
199
200 out_close:
201 fclose(f);
202 out_free:
203 FREE(buf);
204 free(copy);
205 return replaced;
206
207 file_error:
208 perror("radeonsi: reading shader");
209 goto out_close;
210 }
211
212 /* Parsed IBs are difficult to read without colors. Use "less -R file" to
213 * read them, or use "aha -b -f file" to convert them to html.
214 */
215 #define COLOR_RESET "\033[0m"
216 #define COLOR_RED "\033[31m"
217 #define COLOR_GREEN "\033[1;32m"
218 #define COLOR_YELLOW "\033[1;33m"
219 #define COLOR_CYAN "\033[1;36m"
220
221 static void si_dump_mmapped_reg(struct si_context *sctx, FILE *f,
222 unsigned offset)
223 {
224 struct radeon_winsys *ws = sctx->b.ws;
225 uint32_t value;
226
227 if (ws->read_registers(ws, offset, 1, &value))
228 ac_dump_reg(f, offset, value, ~0);
229 }
230
231 static void si_dump_debug_registers(struct si_context *sctx, FILE *f)
232 {
233 if (sctx->screen->b.info.drm_major == 2 &&
234 sctx->screen->b.info.drm_minor < 42)
235 return; /* no radeon support */
236
237 fprintf(f, "Memory-mapped registers:\n");
238 si_dump_mmapped_reg(sctx, f, R_008010_GRBM_STATUS);
239
240 /* No other registers can be read on DRM < 3.1.0. */
241 if (sctx->screen->b.info.drm_major < 3 ||
242 sctx->screen->b.info.drm_minor < 1) {
243 fprintf(f, "\n");
244 return;
245 }
246
247 si_dump_mmapped_reg(sctx, f, R_008008_GRBM_STATUS2);
248 si_dump_mmapped_reg(sctx, f, R_008014_GRBM_STATUS_SE0);
249 si_dump_mmapped_reg(sctx, f, R_008018_GRBM_STATUS_SE1);
250 si_dump_mmapped_reg(sctx, f, R_008038_GRBM_STATUS_SE2);
251 si_dump_mmapped_reg(sctx, f, R_00803C_GRBM_STATUS_SE3);
252 si_dump_mmapped_reg(sctx, f, R_00D034_SDMA0_STATUS_REG);
253 si_dump_mmapped_reg(sctx, f, R_00D834_SDMA1_STATUS_REG);
254 if (sctx->b.chip_class <= VI) {
255 si_dump_mmapped_reg(sctx, f, R_000E50_SRBM_STATUS);
256 si_dump_mmapped_reg(sctx, f, R_000E4C_SRBM_STATUS2);
257 si_dump_mmapped_reg(sctx, f, R_000E54_SRBM_STATUS3);
258 }
259 si_dump_mmapped_reg(sctx, f, R_008680_CP_STAT);
260 si_dump_mmapped_reg(sctx, f, R_008674_CP_STALLED_STAT1);
261 si_dump_mmapped_reg(sctx, f, R_008678_CP_STALLED_STAT2);
262 si_dump_mmapped_reg(sctx, f, R_008670_CP_STALLED_STAT3);
263 si_dump_mmapped_reg(sctx, f, R_008210_CP_CPC_STATUS);
264 si_dump_mmapped_reg(sctx, f, R_008214_CP_CPC_BUSY_STAT);
265 si_dump_mmapped_reg(sctx, f, R_008218_CP_CPC_STALLED_STAT1);
266 si_dump_mmapped_reg(sctx, f, R_00821C_CP_CPF_STATUS);
267 si_dump_mmapped_reg(sctx, f, R_008220_CP_CPF_BUSY_STAT);
268 si_dump_mmapped_reg(sctx, f, R_008224_CP_CPF_STALLED_STAT1);
269 fprintf(f, "\n");
270 }
271
272 struct si_log_chunk_cs {
273 struct si_context *ctx;
274 struct si_saved_cs *cs;
275 bool dump_bo_list;
276 unsigned gfx_begin, gfx_end;
277 unsigned ce_begin, ce_end;
278 };
279
280 static void si_log_chunk_type_cs_destroy(void *data)
281 {
282 struct si_log_chunk_cs *chunk = data;
283 si_saved_cs_reference(&chunk->cs, NULL);
284 free(chunk);
285 }
286
287 static void si_parse_current_ib(FILE *f, struct radeon_winsys_cs *cs,
288 unsigned begin, unsigned end,
289 unsigned last_trace_id, const char *name,
290 enum chip_class chip_class)
291 {
292 unsigned orig_end = end;
293
294 assert(begin <= end);
295
296 fprintf(f, "------------------ %s begin (dw = %u) ------------------\n",
297 name, begin);
298
299 for (unsigned prev_idx = 0; prev_idx < cs->num_prev; ++prev_idx) {
300 struct radeon_winsys_cs_chunk *chunk = &cs->prev[prev_idx];
301
302 if (begin < chunk->cdw) {
303 ac_parse_ib_chunk(f, chunk->buf + begin,
304 MIN2(end, chunk->cdw) - begin,
305 last_trace_id, chip_class, NULL, NULL);
306 }
307
308 if (end <= chunk->cdw)
309 return;
310
311 if (begin < chunk->cdw)
312 fprintf(f, "\n---------- Next %s Chunk ----------\n\n",
313 name);
314
315 begin -= MIN2(begin, chunk->cdw);
316 end -= chunk->cdw;
317 }
318
319 assert(end <= cs->current.cdw);
320
321 ac_parse_ib_chunk(f, cs->current.buf + begin, end - begin, last_trace_id,
322 chip_class, NULL, NULL);
323
324 fprintf(f, "------------------- %s end (dw = %u) -------------------\n\n",
325 name, orig_end);
326 }
327
328 static void si_log_chunk_type_cs_print(void *data, FILE *f)
329 {
330 struct si_log_chunk_cs *chunk = data;
331 struct si_context *ctx = chunk->ctx;
332 struct si_saved_cs *scs = chunk->cs;
333 int last_trace_id = -1;
334 int last_ce_trace_id = -1;
335
336 /* We are expecting that the ddebug pipe has already
337 * waited for the context, so this buffer should be idle.
338 * If the GPU is hung, there is no point in waiting for it.
339 */
340 uint32_t *map = ctx->b.ws->buffer_map(scs->trace_buf->buf,
341 NULL,
342 PIPE_TRANSFER_UNSYNCHRONIZED |
343 PIPE_TRANSFER_READ);
344 if (map) {
345 last_trace_id = map[0];
346 last_ce_trace_id = map[1];
347 }
348
349 if (chunk->gfx_end != chunk->gfx_begin) {
350 if (chunk->gfx_begin == 0) {
351 if (ctx->init_config)
352 ac_parse_ib(f, ctx->init_config->pm4, ctx->init_config->ndw,
353 -1, "IB2: Init config", ctx->b.chip_class,
354 NULL, NULL);
355
356 if (ctx->init_config_gs_rings)
357 ac_parse_ib(f, ctx->init_config_gs_rings->pm4,
358 ctx->init_config_gs_rings->ndw,
359 -1, "IB2: Init GS rings", ctx->b.chip_class,
360 NULL, NULL);
361 }
362
363 if (scs->flushed) {
364 ac_parse_ib(f, scs->gfx.ib + chunk->gfx_begin,
365 chunk->gfx_end - chunk->gfx_begin,
366 last_trace_id, "IB", ctx->b.chip_class,
367 NULL, NULL);
368 } else {
369 si_parse_current_ib(f, ctx->b.gfx.cs, chunk->gfx_begin,
370 chunk->gfx_end, last_trace_id, "IB",
371 ctx->b.chip_class);
372 }
373 }
374
375 if (chunk->ce_end != chunk->ce_begin) {
376 assert(ctx->ce_ib);
377
378 if (scs->flushed) {
379 ac_parse_ib(f, scs->ce.ib + chunk->ce_begin,
380 chunk->ce_end - chunk->ce_begin,
381 last_ce_trace_id, "CE IB", ctx->b.chip_class,
382 NULL, NULL);
383 } else {
384 si_parse_current_ib(f, ctx->ce_ib, chunk->ce_begin,
385 chunk->ce_end, last_ce_trace_id, "CE IB",
386 ctx->b.chip_class);
387 }
388 }
389
390 if (chunk->dump_bo_list) {
391 fprintf(f, "Flushing.\n\n");
392 si_dump_bo_list(ctx, &scs->gfx, f);
393 }
394 }
395
396 static const struct u_log_chunk_type si_log_chunk_type_cs = {
397 .destroy = si_log_chunk_type_cs_destroy,
398 .print = si_log_chunk_type_cs_print,
399 };
400
401 static void si_log_cs(struct si_context *ctx, struct u_log_context *log,
402 bool dump_bo_list)
403 {
404 assert(ctx->current_saved_cs);
405
406 struct si_saved_cs *scs = ctx->current_saved_cs;
407 unsigned gfx_cur = ctx->b.gfx.cs->prev_dw + ctx->b.gfx.cs->current.cdw;
408 unsigned ce_cur = 0;
409
410 if (ctx->ce_ib)
411 ce_cur = ctx->ce_ib->prev_dw + ctx->ce_ib->current.cdw;
412
413 if (!dump_bo_list &&
414 gfx_cur == scs->gfx_last_dw &&
415 ce_cur == scs->ce_last_dw)
416 return;
417
418 struct si_log_chunk_cs *chunk = calloc(1, sizeof(*chunk));
419
420 chunk->ctx = ctx;
421 si_saved_cs_reference(&chunk->cs, scs);
422 chunk->dump_bo_list = dump_bo_list;
423
424 chunk->gfx_begin = scs->gfx_last_dw;
425 chunk->gfx_end = gfx_cur;
426 scs->gfx_last_dw = gfx_cur;
427
428 chunk->ce_begin = scs->ce_last_dw;
429 chunk->ce_end = ce_cur;
430 scs->ce_last_dw = ce_cur;
431
432 u_log_chunk(log, &si_log_chunk_type_cs, chunk);
433 }
434
435 void si_auto_log_cs(void *data, struct u_log_context *log)
436 {
437 struct si_context *ctx = (struct si_context *)data;
438 si_log_cs(ctx, log, false);
439 }
440
441 void si_log_hw_flush(struct si_context *sctx)
442 {
443 if (!sctx->b.log)
444 return;
445
446 si_log_cs(sctx, sctx->b.log, true);
447 }
448
449 static const char *priority_to_string(enum radeon_bo_priority priority)
450 {
451 #define ITEM(x) [RADEON_PRIO_##x] = #x
452 static const char *table[64] = {
453 ITEM(FENCE),
454 ITEM(TRACE),
455 ITEM(SO_FILLED_SIZE),
456 ITEM(QUERY),
457 ITEM(IB1),
458 ITEM(IB2),
459 ITEM(DRAW_INDIRECT),
460 ITEM(INDEX_BUFFER),
461 ITEM(VCE),
462 ITEM(UVD),
463 ITEM(SDMA_BUFFER),
464 ITEM(SDMA_TEXTURE),
465 ITEM(CP_DMA),
466 ITEM(CONST_BUFFER),
467 ITEM(DESCRIPTORS),
468 ITEM(BORDER_COLORS),
469 ITEM(SAMPLER_BUFFER),
470 ITEM(VERTEX_BUFFER),
471 ITEM(SHADER_RW_BUFFER),
472 ITEM(COMPUTE_GLOBAL),
473 ITEM(SAMPLER_TEXTURE),
474 ITEM(SHADER_RW_IMAGE),
475 ITEM(SAMPLER_TEXTURE_MSAA),
476 ITEM(COLOR_BUFFER),
477 ITEM(DEPTH_BUFFER),
478 ITEM(COLOR_BUFFER_MSAA),
479 ITEM(DEPTH_BUFFER_MSAA),
480 ITEM(CMASK),
481 ITEM(DCC),
482 ITEM(HTILE),
483 ITEM(SHADER_BINARY),
484 ITEM(SHADER_RINGS),
485 ITEM(SCRATCH_BUFFER),
486 };
487 #undef ITEM
488
489 assert(priority < ARRAY_SIZE(table));
490 return table[priority];
491 }
492
493 static int bo_list_compare_va(const struct radeon_bo_list_item *a,
494 const struct radeon_bo_list_item *b)
495 {
496 return a->vm_address < b->vm_address ? -1 :
497 a->vm_address > b->vm_address ? 1 : 0;
498 }
499
500 static void si_dump_bo_list(struct si_context *sctx,
501 const struct radeon_saved_cs *saved, FILE *f)
502 {
503 unsigned i,j;
504
505 if (!saved->bo_list)
506 return;
507
508 /* Sort the list according to VM adddresses first. */
509 qsort(saved->bo_list, saved->bo_count,
510 sizeof(saved->bo_list[0]), (void*)bo_list_compare_va);
511
512 fprintf(f, "Buffer list (in units of pages = 4kB):\n"
513 COLOR_YELLOW " Size VM start page "
514 "VM end page Usage" COLOR_RESET "\n");
515
516 for (i = 0; i < saved->bo_count; i++) {
517 /* Note: Buffer sizes are expected to be aligned to 4k by the winsys. */
518 const unsigned page_size = sctx->b.screen->info.gart_page_size;
519 uint64_t va = saved->bo_list[i].vm_address;
520 uint64_t size = saved->bo_list[i].bo_size;
521 bool hit = false;
522
523 /* If there's unused virtual memory between 2 buffers, print it. */
524 if (i) {
525 uint64_t previous_va_end = saved->bo_list[i-1].vm_address +
526 saved->bo_list[i-1].bo_size;
527
528 if (va > previous_va_end) {
529 fprintf(f, " %10"PRIu64" -- hole --\n",
530 (va - previous_va_end) / page_size);
531 }
532 }
533
534 /* Print the buffer. */
535 fprintf(f, " %10"PRIu64" 0x%013"PRIX64" 0x%013"PRIX64" ",
536 size / page_size, va / page_size, (va + size) / page_size);
537
538 /* Print the usage. */
539 for (j = 0; j < 64; j++) {
540 if (!(saved->bo_list[i].priority_usage & (1ull << j)))
541 continue;
542
543 fprintf(f, "%s%s", !hit ? "" : ", ", priority_to_string(j));
544 hit = true;
545 }
546 fprintf(f, "\n");
547 }
548 fprintf(f, "\nNote: The holes represent memory not used by the IB.\n"
549 " Other buffers can still be allocated there.\n\n");
550 }
551
552 static void si_dump_framebuffer(struct si_context *sctx, struct u_log_context *log)
553 {
554 struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
555 struct r600_texture *rtex;
556 int i;
557
558 for (i = 0; i < state->nr_cbufs; i++) {
559 if (!state->cbufs[i])
560 continue;
561
562 rtex = (struct r600_texture*)state->cbufs[i]->texture;
563 u_log_printf(log, COLOR_YELLOW "Color buffer %i:" COLOR_RESET "\n", i);
564 r600_print_texture_info(sctx->b.screen, rtex, log);
565 u_log_printf(log, "\n");
566 }
567
568 if (state->zsbuf) {
569 rtex = (struct r600_texture*)state->zsbuf->texture;
570 u_log_printf(log, COLOR_YELLOW "Depth-stencil buffer:" COLOR_RESET "\n");
571 r600_print_texture_info(sctx->b.screen, rtex, log);
572 u_log_printf(log, "\n");
573 }
574 }
575
576 typedef unsigned (*slot_remap_func)(unsigned);
577
578 struct si_log_chunk_desc_list {
579 /** Pointer to memory map of buffer where the list is uploader */
580 uint32_t *gpu_list;
581 /** Reference of buffer where the list is uploaded, so that gpu_list
582 * is kept live. */
583 struct r600_resource *buf;
584
585 const char *shader_name;
586 const char *elem_name;
587 slot_remap_func slot_remap;
588 unsigned element_dw_size;
589 unsigned num_elements;
590
591 uint32_t list[0];
592 };
593
594 static void
595 si_log_chunk_desc_list_destroy(void *data)
596 {
597 struct si_log_chunk_desc_list *chunk = data;
598 r600_resource_reference(&chunk->buf, NULL);
599 FREE(chunk);
600 }
601
602 static void
603 si_log_chunk_desc_list_print(void *data, FILE *f)
604 {
605 struct si_log_chunk_desc_list *chunk = data;
606
607 for (unsigned i = 0; i < chunk->num_elements; i++) {
608 unsigned cpu_dw_offset = i * chunk->element_dw_size;
609 unsigned gpu_dw_offset = chunk->slot_remap(i) * chunk->element_dw_size;
610 const char *list_note = chunk->gpu_list ? "GPU list" : "CPU list";
611 uint32_t *cpu_list = chunk->list + cpu_dw_offset;
612 uint32_t *gpu_list = chunk->gpu_list ? chunk->gpu_list + gpu_dw_offset : cpu_list;
613
614 fprintf(f, COLOR_GREEN "%s%s slot %u (%s):" COLOR_RESET "\n",
615 chunk->shader_name, chunk->elem_name, i, list_note);
616
617 switch (chunk->element_dw_size) {
618 case 4:
619 for (unsigned j = 0; j < 4; j++)
620 ac_dump_reg(f, R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
621 gpu_list[j], 0xffffffff);
622 break;
623 case 8:
624 for (unsigned j = 0; j < 8; j++)
625 ac_dump_reg(f, R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
626 gpu_list[j], 0xffffffff);
627
628 fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
629 for (unsigned j = 0; j < 4; j++)
630 ac_dump_reg(f, R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
631 gpu_list[4+j], 0xffffffff);
632 break;
633 case 16:
634 for (unsigned j = 0; j < 8; j++)
635 ac_dump_reg(f, R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
636 gpu_list[j], 0xffffffff);
637
638 fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
639 for (unsigned j = 0; j < 4; j++)
640 ac_dump_reg(f, R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
641 gpu_list[4+j], 0xffffffff);
642
643 fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n");
644 for (unsigned j = 0; j < 8; j++)
645 ac_dump_reg(f, R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
646 gpu_list[8+j], 0xffffffff);
647
648 fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n");
649 for (unsigned j = 0; j < 4; j++)
650 ac_dump_reg(f, R_008F30_SQ_IMG_SAMP_WORD0 + j*4,
651 gpu_list[12+j], 0xffffffff);
652 break;
653 }
654
655 if (memcmp(gpu_list, cpu_list, chunk->element_dw_size * 4) != 0) {
656 fprintf(f, COLOR_RED "!!!!! This slot was corrupted in GPU memory !!!!!"
657 COLOR_RESET "\n");
658 }
659
660 fprintf(f, "\n");
661 }
662
663 }
664
665 static const struct u_log_chunk_type si_log_chunk_type_descriptor_list = {
666 .destroy = si_log_chunk_desc_list_destroy,
667 .print = si_log_chunk_desc_list_print,
668 };
669
670 static void si_dump_descriptor_list(struct si_descriptors *desc,
671 const char *shader_name,
672 const char *elem_name,
673 unsigned element_dw_size,
674 unsigned num_elements,
675 slot_remap_func slot_remap,
676 struct u_log_context *log)
677 {
678 if (!desc->list)
679 return;
680
681 struct si_log_chunk_desc_list *chunk =
682 CALLOC_VARIANT_LENGTH_STRUCT(si_log_chunk_desc_list,
683 4 * element_dw_size * num_elements);
684 chunk->shader_name = shader_name;
685 chunk->elem_name = elem_name;
686 chunk->element_dw_size = element_dw_size;
687 chunk->num_elements = num_elements;
688 chunk->slot_remap = slot_remap;
689
690 r600_resource_reference(&chunk->buf, desc->buffer);
691 chunk->gpu_list = desc->gpu_list;
692
693 for (unsigned i = 0; i < num_elements; ++i) {
694 memcpy(&chunk->list[i * element_dw_size],
695 &desc->list[slot_remap(i) * element_dw_size],
696 4 * element_dw_size);
697 }
698
699 u_log_chunk(log, &si_log_chunk_type_descriptor_list, chunk);
700 }
701
702 static unsigned si_identity(unsigned slot)
703 {
704 return slot;
705 }
706
707 static void si_dump_descriptors(struct si_context *sctx,
708 enum pipe_shader_type processor,
709 const struct tgsi_shader_info *info,
710 struct u_log_context *log)
711 {
712 struct si_descriptors *descs =
713 &sctx->descriptors[SI_DESCS_FIRST_SHADER +
714 processor * SI_NUM_SHADER_DESCS];
715 static const char *shader_name[] = {"VS", "PS", "GS", "TCS", "TES", "CS"};
716 const char *name = shader_name[processor];
717 unsigned enabled_constbuf, enabled_shaderbuf, enabled_samplers;
718 unsigned enabled_images;
719
720 if (info) {
721 enabled_constbuf = info->const_buffers_declared;
722 enabled_shaderbuf = info->shader_buffers_declared;
723 enabled_samplers = info->samplers_declared;
724 enabled_images = info->images_declared;
725 } else {
726 enabled_constbuf = sctx->const_and_shader_buffers[processor].enabled_mask >>
727 SI_NUM_SHADER_BUFFERS;
728 enabled_shaderbuf = sctx->const_and_shader_buffers[processor].enabled_mask &
729 u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS);
730 enabled_shaderbuf = util_bitreverse(enabled_shaderbuf) >>
731 (32 - SI_NUM_SHADER_BUFFERS);
732 enabled_samplers = sctx->samplers[processor].views.enabled_mask;
733 enabled_images = sctx->images[processor].enabled_mask;
734 }
735
736 if (processor == PIPE_SHADER_VERTEX) {
737 assert(info); /* only CS may not have an info struct */
738
739 si_dump_descriptor_list(&sctx->vertex_buffers, name,
740 " - Vertex buffer", 4, info->num_inputs,
741 si_identity, log);
742 }
743
744 si_dump_descriptor_list(&descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS],
745 name, " - Constant buffer", 4,
746 util_last_bit(enabled_constbuf),
747 si_get_constbuf_slot, log);
748 si_dump_descriptor_list(&descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS],
749 name, " - Shader buffer", 4,
750 util_last_bit(enabled_shaderbuf),
751 si_get_shaderbuf_slot, log);
752 si_dump_descriptor_list(&descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES],
753 name, " - Sampler", 16,
754 util_last_bit(enabled_samplers),
755 si_get_sampler_slot, log);
756 si_dump_descriptor_list(&descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES],
757 name, " - Image", 8,
758 util_last_bit(enabled_images),
759 si_get_image_slot, log);
760 }
761
762 static void si_dump_gfx_descriptors(struct si_context *sctx,
763 const struct si_shader_ctx_state *state,
764 struct u_log_context *log)
765 {
766 if (!state->cso || !state->current)
767 return;
768
769 si_dump_descriptors(sctx, state->cso->type, &state->cso->info, log);
770 }
771
772 static void si_dump_compute_descriptors(struct si_context *sctx,
773 struct u_log_context *log)
774 {
775 if (!sctx->cs_shader_state.program ||
776 sctx->cs_shader_state.program != sctx->cs_shader_state.emitted_program)
777 return;
778
779 si_dump_descriptors(sctx, PIPE_SHADER_COMPUTE, NULL, log);
780 }
781
782 struct si_shader_inst {
783 char text[160]; /* one disasm line */
784 unsigned offset; /* instruction offset */
785 unsigned size; /* instruction size = 4 or 8 */
786 };
787
788 /* Split a disassembly string into lines and add them to the array pointed
789 * to by "instructions". */
790 static void si_add_split_disasm(const char *disasm,
791 uint64_t start_addr,
792 unsigned *num,
793 struct si_shader_inst *instructions)
794 {
795 struct si_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
796 char *next;
797
798 while ((next = strchr(disasm, '\n'))) {
799 struct si_shader_inst *inst = &instructions[*num];
800 unsigned len = next - disasm;
801
802 assert(len < ARRAY_SIZE(inst->text));
803 memcpy(inst->text, disasm, len);
804 inst->text[len] = 0;
805 inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
806
807 const char *semicolon = strchr(disasm, ';');
808 assert(semicolon);
809 /* More than 16 chars after ";" means the instruction is 8 bytes long. */
810 inst->size = next - semicolon > 16 ? 8 : 4;
811
812 snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len,
813 " [PC=0x%"PRIx64", off=%u, size=%u]",
814 start_addr + inst->offset, inst->offset, inst->size);
815
816 last_inst = inst;
817 (*num)++;
818 disasm = next + 1;
819 }
820 }
821
822 #define MAX_WAVES_PER_CHIP (64 * 40)
823
824 struct si_wave_info {
825 unsigned se; /* shader engine */
826 unsigned sh; /* shader array */
827 unsigned cu; /* compute unit */
828 unsigned simd;
829 unsigned wave;
830 uint32_t status;
831 uint64_t pc; /* program counter */
832 uint32_t inst_dw0;
833 uint32_t inst_dw1;
834 uint64_t exec;
835 bool matched; /* whether the wave is used by a currently-bound shader */
836 };
837
838 static int compare_wave(const void *p1, const void *p2)
839 {
840 struct si_wave_info *w1 = (struct si_wave_info *)p1;
841 struct si_wave_info *w2 = (struct si_wave_info *)p2;
842
843 /* Sort waves according to PC and then SE, SH, CU, etc. */
844 if (w1->pc < w2->pc)
845 return -1;
846 if (w1->pc > w2->pc)
847 return 1;
848 if (w1->se < w2->se)
849 return -1;
850 if (w1->se > w2->se)
851 return 1;
852 if (w1->sh < w2->sh)
853 return -1;
854 if (w1->sh > w2->sh)
855 return 1;
856 if (w1->cu < w2->cu)
857 return -1;
858 if (w1->cu > w2->cu)
859 return 1;
860 if (w1->simd < w2->simd)
861 return -1;
862 if (w1->simd > w2->simd)
863 return 1;
864 if (w1->wave < w2->wave)
865 return -1;
866 if (w1->wave > w2->wave)
867 return 1;
868
869 return 0;
870 }
871
872 /* Return wave information. "waves" should be a large enough array. */
873 static unsigned si_get_wave_info(struct si_wave_info waves[MAX_WAVES_PER_CHIP])
874 {
875 char line[2000];
876 unsigned num_waves = 0;
877
878 FILE *p = popen("umr -wa", "r");
879 if (!p)
880 return 0;
881
882 if (!fgets(line, sizeof(line), p) ||
883 strncmp(line, "SE", 2) != 0) {
884 pclose(p);
885 return 0;
886 }
887
888 while (fgets(line, sizeof(line), p)) {
889 struct si_wave_info *w;
890 uint32_t pc_hi, pc_lo, exec_hi, exec_lo;
891
892 assert(num_waves < MAX_WAVES_PER_CHIP);
893 w = &waves[num_waves];
894
895 if (sscanf(line, "%u %u %u %u %u %x %x %x %x %x %x %x",
896 &w->se, &w->sh, &w->cu, &w->simd, &w->wave,
897 &w->status, &pc_hi, &pc_lo, &w->inst_dw0,
898 &w->inst_dw1, &exec_hi, &exec_lo) == 12) {
899 w->pc = ((uint64_t)pc_hi << 32) | pc_lo;
900 w->exec = ((uint64_t)exec_hi << 32) | exec_lo;
901 w->matched = false;
902 num_waves++;
903 }
904 }
905
906 qsort(waves, num_waves, sizeof(struct si_wave_info), compare_wave);
907
908 pclose(p);
909 return num_waves;
910 }
911
912 /* If the shader is being executed, print its asm instructions, and annotate
913 * those that are being executed right now with information about waves that
914 * execute them. This is most useful during a GPU hang.
915 */
916 static void si_print_annotated_shader(struct si_shader *shader,
917 struct si_wave_info *waves,
918 unsigned num_waves,
919 FILE *f)
920 {
921 if (!shader || !shader->binary.disasm_string)
922 return;
923
924 uint64_t start_addr = shader->bo->gpu_address;
925 uint64_t end_addr = start_addr + shader->bo->b.b.width0;
926 unsigned i;
927
928 /* See if any wave executes the shader. */
929 for (i = 0; i < num_waves; i++) {
930 if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
931 break;
932 }
933 if (i == num_waves)
934 return; /* the shader is not being executed */
935
936 /* Remember the first found wave. The waves are sorted according to PC. */
937 waves = &waves[i];
938 num_waves -= i;
939
940 /* Get the list of instructions.
941 * Buffer size / 4 is the upper bound of the instruction count.
942 */
943 unsigned num_inst = 0;
944 struct si_shader_inst *instructions =
945 calloc(shader->bo->b.b.width0 / 4, sizeof(struct si_shader_inst));
946
947 if (shader->prolog) {
948 si_add_split_disasm(shader->prolog->binary.disasm_string,
949 start_addr, &num_inst, instructions);
950 }
951 if (shader->previous_stage) {
952 si_add_split_disasm(shader->previous_stage->binary.disasm_string,
953 start_addr, &num_inst, instructions);
954 }
955 if (shader->prolog2) {
956 si_add_split_disasm(shader->prolog2->binary.disasm_string,
957 start_addr, &num_inst, instructions);
958 }
959 si_add_split_disasm(shader->binary.disasm_string,
960 start_addr, &num_inst, instructions);
961 if (shader->epilog) {
962 si_add_split_disasm(shader->epilog->binary.disasm_string,
963 start_addr, &num_inst, instructions);
964 }
965
966 fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
967 si_get_shader_name(shader, shader->selector->type));
968
969 /* Print instructions with annotations. */
970 for (i = 0; i < num_inst; i++) {
971 struct si_shader_inst *inst = &instructions[i];
972
973 fprintf(f, "%s\n", inst->text);
974
975 /* Print which waves execute the instruction right now. */
976 while (num_waves && start_addr + inst->offset == waves->pc) {
977 fprintf(f,
978 " " COLOR_GREEN "^ SE%u SH%u CU%u "
979 "SIMD%u WAVE%u EXEC=%016"PRIx64 " ",
980 waves->se, waves->sh, waves->cu, waves->simd,
981 waves->wave, waves->exec);
982
983 if (inst->size == 4) {
984 fprintf(f, "INST32=%08X" COLOR_RESET "\n",
985 waves->inst_dw0);
986 } else {
987 fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n",
988 waves->inst_dw0, waves->inst_dw1);
989 }
990
991 waves->matched = true;
992 waves = &waves[1];
993 num_waves--;
994 }
995 }
996
997 fprintf(f, "\n\n");
998 free(instructions);
999 }
1000
1001 static void si_dump_annotated_shaders(struct si_context *sctx, FILE *f)
1002 {
1003 struct si_wave_info waves[MAX_WAVES_PER_CHIP];
1004 unsigned num_waves = si_get_wave_info(waves);
1005
1006 fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET
1007 "\n\n", num_waves);
1008
1009 si_print_annotated_shader(sctx->vs_shader.current, waves, num_waves, f);
1010 si_print_annotated_shader(sctx->tcs_shader.current, waves, num_waves, f);
1011 si_print_annotated_shader(sctx->tes_shader.current, waves, num_waves, f);
1012 si_print_annotated_shader(sctx->gs_shader.current, waves, num_waves, f);
1013 si_print_annotated_shader(sctx->ps_shader.current, waves, num_waves, f);
1014
1015 /* Print waves executing shaders that are not currently bound. */
1016 unsigned i;
1017 bool found = false;
1018 for (i = 0; i < num_waves; i++) {
1019 if (waves[i].matched)
1020 continue;
1021
1022 if (!found) {
1023 fprintf(f, COLOR_CYAN
1024 "Waves not executing currently-bound shaders:"
1025 COLOR_RESET "\n");
1026 found = true;
1027 }
1028 fprintf(f, " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016"PRIx64
1029 " INST=%08X %08X PC=%"PRIx64"\n",
1030 waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd,
1031 waves[i].wave, waves[i].exec, waves[i].inst_dw0,
1032 waves[i].inst_dw1, waves[i].pc);
1033 }
1034 if (found)
1035 fprintf(f, "\n\n");
1036 }
1037
1038 static void si_dump_command(const char *title, const char *command, FILE *f)
1039 {
1040 char line[2000];
1041
1042 FILE *p = popen(command, "r");
1043 if (!p)
1044 return;
1045
1046 fprintf(f, COLOR_YELLOW "%s: " COLOR_RESET "\n", title);
1047 while (fgets(line, sizeof(line), p))
1048 fputs(line, f);
1049 fprintf(f, "\n\n");
1050 pclose(p);
1051 }
1052
1053 static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
1054 unsigned flags)
1055 {
1056 struct si_context *sctx = (struct si_context*)ctx;
1057
1058 if (sctx->b.log)
1059 u_log_flush(sctx->b.log);
1060
1061 if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS) {
1062 si_dump_debug_registers(sctx, f);
1063
1064 if (flags & PIPE_DUMP_CURRENT_SHADERS) {
1065 si_dump_annotated_shaders(sctx, f);
1066 si_dump_command("Active waves (raw data)", "umr -wa | column -t", f);
1067 si_dump_command("Wave information", "umr -O bits -wa", f);
1068 }
1069 }
1070 }
1071
1072 void si_log_draw_state(struct si_context *sctx, struct u_log_context *log)
1073 {
1074 if (!log)
1075 return;
1076
1077 si_dump_framebuffer(sctx, log);
1078
1079 si_dump_gfx_shader(sctx, &sctx->vs_shader, log);
1080 si_dump_gfx_shader(sctx, &sctx->tcs_shader, log);
1081 si_dump_gfx_shader(sctx, &sctx->tes_shader, log);
1082 si_dump_gfx_shader(sctx, &sctx->gs_shader, log);
1083 si_dump_gfx_shader(sctx, &sctx->ps_shader, log);
1084
1085 si_dump_descriptor_list(&sctx->descriptors[SI_DESCS_RW_BUFFERS],
1086 "", "RW buffers", 4, SI_NUM_RW_BUFFERS,
1087 si_identity, log);
1088 si_dump_gfx_descriptors(sctx, &sctx->vs_shader, log);
1089 si_dump_gfx_descriptors(sctx, &sctx->tcs_shader, log);
1090 si_dump_gfx_descriptors(sctx, &sctx->tes_shader, log);
1091 si_dump_gfx_descriptors(sctx, &sctx->gs_shader, log);
1092 si_dump_gfx_descriptors(sctx, &sctx->ps_shader, log);
1093 }
1094
1095 void si_log_compute_state(struct si_context *sctx, struct u_log_context *log)
1096 {
1097 if (!log)
1098 return;
1099
1100 si_dump_compute_shader(&sctx->cs_shader_state, log);
1101 si_dump_compute_descriptors(sctx, log);
1102 }
1103
1104 static void si_dump_dma(struct si_context *sctx,
1105 struct radeon_saved_cs *saved, FILE *f)
1106 {
1107 static const char ib_name[] = "sDMA IB";
1108 unsigned i;
1109
1110 si_dump_bo_list(sctx, saved, f);
1111
1112 fprintf(f, "------------------ %s begin ------------------\n", ib_name);
1113
1114 for (i = 0; i < saved->num_dw; ++i) {
1115 fprintf(f, " %08x\n", saved->ib[i]);
1116 }
1117
1118 fprintf(f, "------------------- %s end -------------------\n", ib_name);
1119 fprintf(f, "\n");
1120
1121 fprintf(f, "SDMA Dump Done.\n");
1122 }
1123
1124 static bool si_vm_fault_occured(struct si_context *sctx, uint64_t *out_addr)
1125 {
1126 char line[2000];
1127 unsigned sec, usec;
1128 int progress = 0;
1129 uint64_t timestamp = 0;
1130 bool fault = false;
1131
1132 FILE *p = popen("dmesg", "r");
1133 if (!p)
1134 return false;
1135
1136 while (fgets(line, sizeof(line), p)) {
1137 char *msg, len;
1138
1139 if (!line[0] || line[0] == '\n')
1140 continue;
1141
1142 /* Get the timestamp. */
1143 if (sscanf(line, "[%u.%u]", &sec, &usec) != 2) {
1144 static bool hit = false;
1145 if (!hit) {
1146 fprintf(stderr, "%s: failed to parse line '%s'\n",
1147 __func__, line);
1148 hit = true;
1149 }
1150 continue;
1151 }
1152 timestamp = sec * 1000000ull + usec;
1153
1154 /* If just updating the timestamp. */
1155 if (!out_addr)
1156 continue;
1157
1158 /* Process messages only if the timestamp is newer. */
1159 if (timestamp <= sctx->dmesg_timestamp)
1160 continue;
1161
1162 /* Only process the first VM fault. */
1163 if (fault)
1164 continue;
1165
1166 /* Remove trailing \n */
1167 len = strlen(line);
1168 if (len && line[len-1] == '\n')
1169 line[len-1] = 0;
1170
1171 /* Get the message part. */
1172 msg = strchr(line, ']');
1173 if (!msg) {
1174 assert(0);
1175 continue;
1176 }
1177 msg++;
1178
1179 const char *header_line, *addr_line_prefix, *addr_line_format;
1180
1181 if (sctx->b.chip_class >= GFX9) {
1182 /* Match this:
1183 * ..: [gfxhub] VMC page fault (src_id:0 ring:158 vm_id:2 pas_id:0)
1184 * ..: at page 0x0000000219f8f000 from 27
1185 * ..: VM_L2_PROTECTION_FAULT_STATUS:0x0020113C
1186 */
1187 header_line = "VMC page fault";
1188 addr_line_prefix = " at page";
1189 addr_line_format = "%"PRIx64;
1190 } else {
1191 header_line = "GPU fault detected:";
1192 addr_line_prefix = "VM_CONTEXT1_PROTECTION_FAULT_ADDR";
1193 addr_line_format = "%"PRIX64;
1194 }
1195
1196 switch (progress) {
1197 case 0:
1198 if (strstr(msg, header_line))
1199 progress = 1;
1200 break;
1201 case 1:
1202 msg = strstr(msg, addr_line_prefix);
1203 if (msg) {
1204 msg = strstr(msg, "0x");
1205 if (msg) {
1206 msg += 2;
1207 if (sscanf(msg, addr_line_format, out_addr) == 1)
1208 fault = true;
1209 }
1210 }
1211 progress = 0;
1212 break;
1213 default:
1214 progress = 0;
1215 }
1216 }
1217 pclose(p);
1218
1219 if (timestamp > sctx->dmesg_timestamp)
1220 sctx->dmesg_timestamp = timestamp;
1221 return fault;
1222 }
1223
1224 void si_check_vm_faults(struct r600_common_context *ctx,
1225 struct radeon_saved_cs *saved, enum ring_type ring)
1226 {
1227 struct si_context *sctx = (struct si_context *)ctx;
1228 struct pipe_screen *screen = sctx->b.b.screen;
1229 FILE *f;
1230 uint64_t addr;
1231 char cmd_line[4096];
1232
1233 if (!si_vm_fault_occured(sctx, &addr))
1234 return;
1235
1236 f = dd_get_debug_file(false);
1237 if (!f)
1238 return;
1239
1240 fprintf(f, "VM fault report.\n\n");
1241 if (os_get_command_line(cmd_line, sizeof(cmd_line)))
1242 fprintf(f, "Command: %s\n", cmd_line);
1243 fprintf(f, "Driver vendor: %s\n", screen->get_vendor(screen));
1244 fprintf(f, "Device vendor: %s\n", screen->get_device_vendor(screen));
1245 fprintf(f, "Device name: %s\n\n", screen->get_name(screen));
1246 fprintf(f, "Failing VM page: 0x%08"PRIx64"\n\n", addr);
1247
1248 if (sctx->apitrace_call_number)
1249 fprintf(f, "Last apitrace call: %u\n\n",
1250 sctx->apitrace_call_number);
1251
1252 switch (ring) {
1253 case RING_GFX: {
1254 struct u_log_context log;
1255 u_log_context_init(&log);
1256
1257 si_log_draw_state(sctx, &log);
1258 si_log_compute_state(sctx, &log);
1259
1260 u_log_new_page_print(&log, f);
1261 u_log_context_destroy(&log);
1262 break;
1263 }
1264 case RING_DMA:
1265 si_dump_dma(sctx, saved, f);
1266 break;
1267
1268 default:
1269 break;
1270 }
1271
1272 fclose(f);
1273
1274 fprintf(stderr, "Detected a VM fault, exiting...\n");
1275 exit(0);
1276 }
1277
1278 void si_init_debug_functions(struct si_context *sctx)
1279 {
1280 sctx->b.b.dump_debug_state = si_dump_debug_state;
1281 sctx->b.check_vm_faults = si_check_vm_faults;
1282
1283 /* Set the initial dmesg timestamp for this context, so that
1284 * only new messages will be checked for VM faults.
1285 */
1286 if (sctx->screen->b.debug_flags & DBG_CHECK_VM)
1287 si_vm_fault_occured(sctx, NULL);
1288 }