1b97ea40970f263e6e76d3263be9d5c3f2e5fe24
[mesa.git] / src / gallium / drivers / radeonsi / si_debug.c
1 /*
2 * Copyright 2015 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Marek Olšák <maraeo@gmail.com>
25 */
26
27 #include "si_pipe.h"
28 #include "si_compute.h"
29 #include "sid.h"
30 #include "gfx9d.h"
31 #include "sid_tables.h"
32 #include "ddebug/dd_util.h"
33 #include "util/u_log.h"
34 #include "util/u_memory.h"
35 #include "ac_debug.h"
36
37 static void si_dump_bo_list(struct si_context *sctx,
38 const struct radeon_saved_cs *saved, FILE *f);
39
40 DEBUG_GET_ONCE_OPTION(replace_shaders, "RADEON_REPLACE_SHADERS", NULL)
41
42 static void si_dump_shader(struct si_screen *sscreen,
43 enum pipe_shader_type processor,
44 const struct si_shader *shader, FILE *f)
45 {
46 if (shader->shader_log)
47 fwrite(shader->shader_log, shader->shader_log_size, 1, f);
48 else
49 si_shader_dump(sscreen, shader, NULL, processor, f, false);
50 }
51
52 struct si_log_chunk_shader {
53 /* The shader destroy code assumes a current context for unlinking of
54 * PM4 packets etc.
55 *
56 * While we should be able to destroy shaders without a context, doing
57 * so would happen only very rarely and be therefore likely to fail
58 * just when you're trying to debug something. Let's just remember the
59 * current context in the chunk.
60 */
61 struct si_context *ctx;
62 struct si_shader *shader;
63
64 /* For keep-alive reference counts */
65 struct si_shader_selector *sel;
66 struct si_compute *program;
67 };
68
69 static void
70 si_log_chunk_shader_destroy(void *data)
71 {
72 struct si_log_chunk_shader *chunk = data;
73 si_shader_selector_reference(chunk->ctx, &chunk->sel, NULL);
74 si_compute_reference(&chunk->program, NULL);
75 FREE(chunk);
76 }
77
78 static void
79 si_log_chunk_shader_print(void *data, FILE *f)
80 {
81 struct si_log_chunk_shader *chunk = data;
82 struct si_screen *sscreen = chunk->ctx->screen;
83 si_dump_shader(sscreen, chunk->shader->selector->info.processor,
84 chunk->shader, f);
85 }
86
87 static struct u_log_chunk_type si_log_chunk_type_shader = {
88 .destroy = si_log_chunk_shader_destroy,
89 .print = si_log_chunk_shader_print,
90 };
91
92 static void si_dump_gfx_shader(struct si_context *ctx,
93 const struct si_shader_ctx_state *state,
94 struct u_log_context *log)
95 {
96 struct si_shader *current = state->current;
97
98 if (!state->cso || !current)
99 return;
100
101 struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader);
102 chunk->ctx = ctx;
103 chunk->shader = current;
104 si_shader_selector_reference(ctx, &chunk->sel, current->selector);
105 u_log_chunk(log, &si_log_chunk_type_shader, chunk);
106 }
107
108 static void si_dump_compute_shader(const struct si_cs_shader_state *state,
109 struct u_log_context *log)
110 {
111 if (!state->program || state->program != state->emitted_program)
112 return;
113
114 struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader);
115 chunk->shader = &state->program->shader;
116 si_compute_reference(&chunk->program, state->program);
117 u_log_chunk(log, &si_log_chunk_type_shader, chunk);
118 }
119
120 /**
121 * Shader compiles can be overridden with arbitrary ELF objects by setting
122 * the environment variable RADEON_REPLACE_SHADERS=num1:filename1[;num2:filename2]
123 */
124 bool si_replace_shader(unsigned num, struct ac_shader_binary *binary)
125 {
126 const char *p = debug_get_option_replace_shaders();
127 const char *semicolon;
128 char *copy = NULL;
129 FILE *f;
130 long filesize, nread;
131 char *buf = NULL;
132 bool replaced = false;
133
134 if (!p)
135 return false;
136
137 while (*p) {
138 unsigned long i;
139 char *endp;
140 i = strtoul(p, &endp, 0);
141
142 p = endp;
143 if (*p != ':') {
144 fprintf(stderr, "RADEON_REPLACE_SHADERS formatted badly.\n");
145 exit(1);
146 }
147 ++p;
148
149 if (i == num)
150 break;
151
152 p = strchr(p, ';');
153 if (!p)
154 return false;
155 ++p;
156 }
157 if (!*p)
158 return false;
159
160 semicolon = strchr(p, ';');
161 if (semicolon) {
162 p = copy = strndup(p, semicolon - p);
163 if (!copy) {
164 fprintf(stderr, "out of memory\n");
165 return false;
166 }
167 }
168
169 fprintf(stderr, "radeonsi: replace shader %u by %s\n", num, p);
170
171 f = fopen(p, "r");
172 if (!f) {
173 perror("radeonsi: failed to open file");
174 goto out_free;
175 }
176
177 if (fseek(f, 0, SEEK_END) != 0)
178 goto file_error;
179
180 filesize = ftell(f);
181 if (filesize < 0)
182 goto file_error;
183
184 if (fseek(f, 0, SEEK_SET) != 0)
185 goto file_error;
186
187 buf = MALLOC(filesize);
188 if (!buf) {
189 fprintf(stderr, "out of memory\n");
190 goto out_close;
191 }
192
193 nread = fread(buf, 1, filesize, f);
194 if (nread != filesize)
195 goto file_error;
196
197 ac_elf_read(buf, filesize, binary);
198 replaced = true;
199
200 out_close:
201 fclose(f);
202 out_free:
203 FREE(buf);
204 free(copy);
205 return replaced;
206
207 file_error:
208 perror("radeonsi: reading shader");
209 goto out_close;
210 }
211
212 /* Parsed IBs are difficult to read without colors. Use "less -R file" to
213 * read them, or use "aha -b -f file" to convert them to html.
214 */
215 #define COLOR_RESET "\033[0m"
216 #define COLOR_RED "\033[31m"
217 #define COLOR_GREEN "\033[1;32m"
218 #define COLOR_YELLOW "\033[1;33m"
219 #define COLOR_CYAN "\033[1;36m"
220
221 static void si_dump_mmapped_reg(struct si_context *sctx, FILE *f,
222 unsigned offset)
223 {
224 struct radeon_winsys *ws = sctx->b.ws;
225 uint32_t value;
226
227 if (ws->read_registers(ws, offset, 1, &value))
228 ac_dump_reg(f, offset, value, ~0);
229 }
230
231 static void si_dump_debug_registers(struct si_context *sctx, FILE *f)
232 {
233 if (sctx->screen->b.info.drm_major == 2 &&
234 sctx->screen->b.info.drm_minor < 42)
235 return; /* no radeon support */
236
237 fprintf(f, "Memory-mapped registers:\n");
238 si_dump_mmapped_reg(sctx, f, R_008010_GRBM_STATUS);
239
240 /* No other registers can be read on DRM < 3.1.0. */
241 if (sctx->screen->b.info.drm_major < 3 ||
242 sctx->screen->b.info.drm_minor < 1) {
243 fprintf(f, "\n");
244 return;
245 }
246
247 si_dump_mmapped_reg(sctx, f, R_008008_GRBM_STATUS2);
248 si_dump_mmapped_reg(sctx, f, R_008014_GRBM_STATUS_SE0);
249 si_dump_mmapped_reg(sctx, f, R_008018_GRBM_STATUS_SE1);
250 si_dump_mmapped_reg(sctx, f, R_008038_GRBM_STATUS_SE2);
251 si_dump_mmapped_reg(sctx, f, R_00803C_GRBM_STATUS_SE3);
252 si_dump_mmapped_reg(sctx, f, R_00D034_SDMA0_STATUS_REG);
253 si_dump_mmapped_reg(sctx, f, R_00D834_SDMA1_STATUS_REG);
254 if (sctx->b.chip_class <= VI) {
255 si_dump_mmapped_reg(sctx, f, R_000E50_SRBM_STATUS);
256 si_dump_mmapped_reg(sctx, f, R_000E4C_SRBM_STATUS2);
257 si_dump_mmapped_reg(sctx, f, R_000E54_SRBM_STATUS3);
258 }
259 si_dump_mmapped_reg(sctx, f, R_008680_CP_STAT);
260 si_dump_mmapped_reg(sctx, f, R_008674_CP_STALLED_STAT1);
261 si_dump_mmapped_reg(sctx, f, R_008678_CP_STALLED_STAT2);
262 si_dump_mmapped_reg(sctx, f, R_008670_CP_STALLED_STAT3);
263 si_dump_mmapped_reg(sctx, f, R_008210_CP_CPC_STATUS);
264 si_dump_mmapped_reg(sctx, f, R_008214_CP_CPC_BUSY_STAT);
265 si_dump_mmapped_reg(sctx, f, R_008218_CP_CPC_STALLED_STAT1);
266 si_dump_mmapped_reg(sctx, f, R_00821C_CP_CPF_STATUS);
267 si_dump_mmapped_reg(sctx, f, R_008220_CP_CPF_BUSY_STAT);
268 si_dump_mmapped_reg(sctx, f, R_008224_CP_CPF_STALLED_STAT1);
269 fprintf(f, "\n");
270 }
271
272 struct si_log_chunk_cs {
273 struct si_context *ctx;
274 struct si_saved_cs *cs;
275 bool dump_bo_list;
276 unsigned gfx_begin, gfx_end;
277 };
278
279 static void si_log_chunk_type_cs_destroy(void *data)
280 {
281 struct si_log_chunk_cs *chunk = data;
282 si_saved_cs_reference(&chunk->cs, NULL);
283 free(chunk);
284 }
285
286 static void si_parse_current_ib(FILE *f, struct radeon_winsys_cs *cs,
287 unsigned begin, unsigned end,
288 int *last_trace_id, unsigned trace_id_count,
289 const char *name, enum chip_class chip_class)
290 {
291 unsigned orig_end = end;
292
293 assert(begin <= end);
294
295 fprintf(f, "------------------ %s begin (dw = %u) ------------------\n",
296 name, begin);
297
298 for (unsigned prev_idx = 0; prev_idx < cs->num_prev; ++prev_idx) {
299 struct radeon_winsys_cs_chunk *chunk = &cs->prev[prev_idx];
300
301 if (begin < chunk->cdw) {
302 ac_parse_ib_chunk(f, chunk->buf + begin,
303 MIN2(end, chunk->cdw) - begin,
304 last_trace_id, trace_id_count,
305 chip_class, NULL, NULL);
306 }
307
308 if (end <= chunk->cdw)
309 return;
310
311 if (begin < chunk->cdw)
312 fprintf(f, "\n---------- Next %s Chunk ----------\n\n",
313 name);
314
315 begin -= MIN2(begin, chunk->cdw);
316 end -= chunk->cdw;
317 }
318
319 assert(end <= cs->current.cdw);
320
321 ac_parse_ib_chunk(f, cs->current.buf + begin, end - begin, last_trace_id,
322 trace_id_count, chip_class, NULL, NULL);
323
324 fprintf(f, "------------------- %s end (dw = %u) -------------------\n\n",
325 name, orig_end);
326 }
327
328 static void si_log_chunk_type_cs_print(void *data, FILE *f)
329 {
330 struct si_log_chunk_cs *chunk = data;
331 struct si_context *ctx = chunk->ctx;
332 struct si_saved_cs *scs = chunk->cs;
333 int last_trace_id = -1;
334
335 /* We are expecting that the ddebug pipe has already
336 * waited for the context, so this buffer should be idle.
337 * If the GPU is hung, there is no point in waiting for it.
338 */
339 uint32_t *map = ctx->b.ws->buffer_map(scs->trace_buf->buf,
340 NULL,
341 PIPE_TRANSFER_UNSYNCHRONIZED |
342 PIPE_TRANSFER_READ);
343 if (map)
344 last_trace_id = map[0];
345
346 if (chunk->gfx_end != chunk->gfx_begin) {
347 if (chunk->gfx_begin == 0) {
348 if (ctx->init_config)
349 ac_parse_ib(f, ctx->init_config->pm4, ctx->init_config->ndw,
350 NULL, 0, "IB2: Init config", ctx->b.chip_class,
351 NULL, NULL);
352
353 if (ctx->init_config_gs_rings)
354 ac_parse_ib(f, ctx->init_config_gs_rings->pm4,
355 ctx->init_config_gs_rings->ndw,
356 NULL, 0, "IB2: Init GS rings", ctx->b.chip_class,
357 NULL, NULL);
358 }
359
360 if (scs->flushed) {
361 ac_parse_ib(f, scs->gfx.ib + chunk->gfx_begin,
362 chunk->gfx_end - chunk->gfx_begin,
363 &last_trace_id, map ? 1 : 0, "IB", ctx->b.chip_class,
364 NULL, NULL);
365 } else {
366 si_parse_current_ib(f, ctx->b.gfx.cs, chunk->gfx_begin,
367 chunk->gfx_end, &last_trace_id, map ? 1 : 0,
368 "IB", ctx->b.chip_class);
369 }
370 }
371
372 if (chunk->dump_bo_list) {
373 fprintf(f, "Flushing.\n\n");
374 si_dump_bo_list(ctx, &scs->gfx, f);
375 }
376 }
377
378 static const struct u_log_chunk_type si_log_chunk_type_cs = {
379 .destroy = si_log_chunk_type_cs_destroy,
380 .print = si_log_chunk_type_cs_print,
381 };
382
383 static void si_log_cs(struct si_context *ctx, struct u_log_context *log,
384 bool dump_bo_list)
385 {
386 assert(ctx->current_saved_cs);
387
388 struct si_saved_cs *scs = ctx->current_saved_cs;
389 unsigned gfx_cur = ctx->b.gfx.cs->prev_dw + ctx->b.gfx.cs->current.cdw;
390
391 if (!dump_bo_list &&
392 gfx_cur == scs->gfx_last_dw)
393 return;
394
395 struct si_log_chunk_cs *chunk = calloc(1, sizeof(*chunk));
396
397 chunk->ctx = ctx;
398 si_saved_cs_reference(&chunk->cs, scs);
399 chunk->dump_bo_list = dump_bo_list;
400
401 chunk->gfx_begin = scs->gfx_last_dw;
402 chunk->gfx_end = gfx_cur;
403 scs->gfx_last_dw = gfx_cur;
404
405 u_log_chunk(log, &si_log_chunk_type_cs, chunk);
406 }
407
408 void si_auto_log_cs(void *data, struct u_log_context *log)
409 {
410 struct si_context *ctx = (struct si_context *)data;
411 si_log_cs(ctx, log, false);
412 }
413
414 void si_log_hw_flush(struct si_context *sctx)
415 {
416 if (!sctx->b.log)
417 return;
418
419 si_log_cs(sctx, sctx->b.log, true);
420 }
421
422 static const char *priority_to_string(enum radeon_bo_priority priority)
423 {
424 #define ITEM(x) [RADEON_PRIO_##x] = #x
425 static const char *table[64] = {
426 ITEM(FENCE),
427 ITEM(TRACE),
428 ITEM(SO_FILLED_SIZE),
429 ITEM(QUERY),
430 ITEM(IB1),
431 ITEM(IB2),
432 ITEM(DRAW_INDIRECT),
433 ITEM(INDEX_BUFFER),
434 ITEM(VCE),
435 ITEM(UVD),
436 ITEM(SDMA_BUFFER),
437 ITEM(SDMA_TEXTURE),
438 ITEM(CP_DMA),
439 ITEM(CONST_BUFFER),
440 ITEM(DESCRIPTORS),
441 ITEM(BORDER_COLORS),
442 ITEM(SAMPLER_BUFFER),
443 ITEM(VERTEX_BUFFER),
444 ITEM(SHADER_RW_BUFFER),
445 ITEM(COMPUTE_GLOBAL),
446 ITEM(SAMPLER_TEXTURE),
447 ITEM(SHADER_RW_IMAGE),
448 ITEM(SAMPLER_TEXTURE_MSAA),
449 ITEM(COLOR_BUFFER),
450 ITEM(DEPTH_BUFFER),
451 ITEM(COLOR_BUFFER_MSAA),
452 ITEM(DEPTH_BUFFER_MSAA),
453 ITEM(CMASK),
454 ITEM(DCC),
455 ITEM(HTILE),
456 ITEM(SHADER_BINARY),
457 ITEM(SHADER_RINGS),
458 ITEM(SCRATCH_BUFFER),
459 };
460 #undef ITEM
461
462 assert(priority < ARRAY_SIZE(table));
463 return table[priority];
464 }
465
466 static int bo_list_compare_va(const struct radeon_bo_list_item *a,
467 const struct radeon_bo_list_item *b)
468 {
469 return a->vm_address < b->vm_address ? -1 :
470 a->vm_address > b->vm_address ? 1 : 0;
471 }
472
473 static void si_dump_bo_list(struct si_context *sctx,
474 const struct radeon_saved_cs *saved, FILE *f)
475 {
476 unsigned i,j;
477
478 if (!saved->bo_list)
479 return;
480
481 /* Sort the list according to VM adddresses first. */
482 qsort(saved->bo_list, saved->bo_count,
483 sizeof(saved->bo_list[0]), (void*)bo_list_compare_va);
484
485 fprintf(f, "Buffer list (in units of pages = 4kB):\n"
486 COLOR_YELLOW " Size VM start page "
487 "VM end page Usage" COLOR_RESET "\n");
488
489 for (i = 0; i < saved->bo_count; i++) {
490 /* Note: Buffer sizes are expected to be aligned to 4k by the winsys. */
491 const unsigned page_size = sctx->b.screen->info.gart_page_size;
492 uint64_t va = saved->bo_list[i].vm_address;
493 uint64_t size = saved->bo_list[i].bo_size;
494 bool hit = false;
495
496 /* If there's unused virtual memory between 2 buffers, print it. */
497 if (i) {
498 uint64_t previous_va_end = saved->bo_list[i-1].vm_address +
499 saved->bo_list[i-1].bo_size;
500
501 if (va > previous_va_end) {
502 fprintf(f, " %10"PRIu64" -- hole --\n",
503 (va - previous_va_end) / page_size);
504 }
505 }
506
507 /* Print the buffer. */
508 fprintf(f, " %10"PRIu64" 0x%013"PRIX64" 0x%013"PRIX64" ",
509 size / page_size, va / page_size, (va + size) / page_size);
510
511 /* Print the usage. */
512 for (j = 0; j < 64; j++) {
513 if (!(saved->bo_list[i].priority_usage & (1ull << j)))
514 continue;
515
516 fprintf(f, "%s%s", !hit ? "" : ", ", priority_to_string(j));
517 hit = true;
518 }
519 fprintf(f, "\n");
520 }
521 fprintf(f, "\nNote: The holes represent memory not used by the IB.\n"
522 " Other buffers can still be allocated there.\n\n");
523 }
524
525 static void si_dump_framebuffer(struct si_context *sctx, struct u_log_context *log)
526 {
527 struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
528 struct r600_texture *rtex;
529 int i;
530
531 for (i = 0; i < state->nr_cbufs; i++) {
532 if (!state->cbufs[i])
533 continue;
534
535 rtex = (struct r600_texture*)state->cbufs[i]->texture;
536 u_log_printf(log, COLOR_YELLOW "Color buffer %i:" COLOR_RESET "\n", i);
537 r600_print_texture_info(sctx->b.screen, rtex, log);
538 u_log_printf(log, "\n");
539 }
540
541 if (state->zsbuf) {
542 rtex = (struct r600_texture*)state->zsbuf->texture;
543 u_log_printf(log, COLOR_YELLOW "Depth-stencil buffer:" COLOR_RESET "\n");
544 r600_print_texture_info(sctx->b.screen, rtex, log);
545 u_log_printf(log, "\n");
546 }
547 }
548
549 typedef unsigned (*slot_remap_func)(unsigned);
550
551 struct si_log_chunk_desc_list {
552 /** Pointer to memory map of buffer where the list is uploader */
553 uint32_t *gpu_list;
554 /** Reference of buffer where the list is uploaded, so that gpu_list
555 * is kept live. */
556 struct r600_resource *buf;
557
558 const char *shader_name;
559 const char *elem_name;
560 slot_remap_func slot_remap;
561 unsigned element_dw_size;
562 unsigned num_elements;
563
564 uint32_t list[0];
565 };
566
567 static void
568 si_log_chunk_desc_list_destroy(void *data)
569 {
570 struct si_log_chunk_desc_list *chunk = data;
571 r600_resource_reference(&chunk->buf, NULL);
572 FREE(chunk);
573 }
574
575 static void
576 si_log_chunk_desc_list_print(void *data, FILE *f)
577 {
578 struct si_log_chunk_desc_list *chunk = data;
579
580 for (unsigned i = 0; i < chunk->num_elements; i++) {
581 unsigned cpu_dw_offset = i * chunk->element_dw_size;
582 unsigned gpu_dw_offset = chunk->slot_remap(i) * chunk->element_dw_size;
583 const char *list_note = chunk->gpu_list ? "GPU list" : "CPU list";
584 uint32_t *cpu_list = chunk->list + cpu_dw_offset;
585 uint32_t *gpu_list = chunk->gpu_list ? chunk->gpu_list + gpu_dw_offset : cpu_list;
586
587 fprintf(f, COLOR_GREEN "%s%s slot %u (%s):" COLOR_RESET "\n",
588 chunk->shader_name, chunk->elem_name, i, list_note);
589
590 switch (chunk->element_dw_size) {
591 case 4:
592 for (unsigned j = 0; j < 4; j++)
593 ac_dump_reg(f, R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
594 gpu_list[j], 0xffffffff);
595 break;
596 case 8:
597 for (unsigned j = 0; j < 8; j++)
598 ac_dump_reg(f, R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
599 gpu_list[j], 0xffffffff);
600
601 fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
602 for (unsigned j = 0; j < 4; j++)
603 ac_dump_reg(f, R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
604 gpu_list[4+j], 0xffffffff);
605 break;
606 case 16:
607 for (unsigned j = 0; j < 8; j++)
608 ac_dump_reg(f, R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
609 gpu_list[j], 0xffffffff);
610
611 fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
612 for (unsigned j = 0; j < 4; j++)
613 ac_dump_reg(f, R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
614 gpu_list[4+j], 0xffffffff);
615
616 fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n");
617 for (unsigned j = 0; j < 8; j++)
618 ac_dump_reg(f, R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
619 gpu_list[8+j], 0xffffffff);
620
621 fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n");
622 for (unsigned j = 0; j < 4; j++)
623 ac_dump_reg(f, R_008F30_SQ_IMG_SAMP_WORD0 + j*4,
624 gpu_list[12+j], 0xffffffff);
625 break;
626 }
627
628 if (memcmp(gpu_list, cpu_list, chunk->element_dw_size * 4) != 0) {
629 fprintf(f, COLOR_RED "!!!!! This slot was corrupted in GPU memory !!!!!"
630 COLOR_RESET "\n");
631 }
632
633 fprintf(f, "\n");
634 }
635
636 }
637
638 static const struct u_log_chunk_type si_log_chunk_type_descriptor_list = {
639 .destroy = si_log_chunk_desc_list_destroy,
640 .print = si_log_chunk_desc_list_print,
641 };
642
643 static void si_dump_descriptor_list(struct si_descriptors *desc,
644 const char *shader_name,
645 const char *elem_name,
646 unsigned element_dw_size,
647 unsigned num_elements,
648 slot_remap_func slot_remap,
649 struct u_log_context *log)
650 {
651 if (!desc->list)
652 return;
653
654 struct si_log_chunk_desc_list *chunk =
655 CALLOC_VARIANT_LENGTH_STRUCT(si_log_chunk_desc_list,
656 4 * element_dw_size * num_elements);
657 chunk->shader_name = shader_name;
658 chunk->elem_name = elem_name;
659 chunk->element_dw_size = element_dw_size;
660 chunk->num_elements = num_elements;
661 chunk->slot_remap = slot_remap;
662
663 r600_resource_reference(&chunk->buf, desc->buffer);
664 chunk->gpu_list = desc->gpu_list;
665
666 for (unsigned i = 0; i < num_elements; ++i) {
667 memcpy(&chunk->list[i * element_dw_size],
668 &desc->list[slot_remap(i) * element_dw_size],
669 4 * element_dw_size);
670 }
671
672 u_log_chunk(log, &si_log_chunk_type_descriptor_list, chunk);
673 }
674
675 static unsigned si_identity(unsigned slot)
676 {
677 return slot;
678 }
679
680 static void si_dump_descriptors(struct si_context *sctx,
681 enum pipe_shader_type processor,
682 const struct tgsi_shader_info *info,
683 struct u_log_context *log)
684 {
685 struct si_descriptors *descs =
686 &sctx->descriptors[SI_DESCS_FIRST_SHADER +
687 processor * SI_NUM_SHADER_DESCS];
688 static const char *shader_name[] = {"VS", "PS", "GS", "TCS", "TES", "CS"};
689 const char *name = shader_name[processor];
690 unsigned enabled_constbuf, enabled_shaderbuf, enabled_samplers;
691 unsigned enabled_images;
692
693 if (info) {
694 enabled_constbuf = info->const_buffers_declared;
695 enabled_shaderbuf = info->shader_buffers_declared;
696 enabled_samplers = info->samplers_declared;
697 enabled_images = info->images_declared;
698 } else {
699 enabled_constbuf = sctx->const_and_shader_buffers[processor].enabled_mask >>
700 SI_NUM_SHADER_BUFFERS;
701 enabled_shaderbuf = sctx->const_and_shader_buffers[processor].enabled_mask &
702 u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS);
703 enabled_shaderbuf = util_bitreverse(enabled_shaderbuf) >>
704 (32 - SI_NUM_SHADER_BUFFERS);
705 enabled_samplers = sctx->samplers[processor].views.enabled_mask;
706 enabled_images = sctx->images[processor].enabled_mask;
707 }
708
709 if (processor == PIPE_SHADER_VERTEX) {
710 assert(info); /* only CS may not have an info struct */
711
712 si_dump_descriptor_list(&sctx->vertex_buffers, name,
713 " - Vertex buffer", 4, info->num_inputs,
714 si_identity, log);
715 }
716
717 si_dump_descriptor_list(&descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS],
718 name, " - Constant buffer", 4,
719 util_last_bit(enabled_constbuf),
720 si_get_constbuf_slot, log);
721 si_dump_descriptor_list(&descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS],
722 name, " - Shader buffer", 4,
723 util_last_bit(enabled_shaderbuf),
724 si_get_shaderbuf_slot, log);
725 si_dump_descriptor_list(&descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES],
726 name, " - Sampler", 16,
727 util_last_bit(enabled_samplers),
728 si_get_sampler_slot, log);
729 si_dump_descriptor_list(&descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES],
730 name, " - Image", 8,
731 util_last_bit(enabled_images),
732 si_get_image_slot, log);
733 }
734
735 static void si_dump_gfx_descriptors(struct si_context *sctx,
736 const struct si_shader_ctx_state *state,
737 struct u_log_context *log)
738 {
739 if (!state->cso || !state->current)
740 return;
741
742 si_dump_descriptors(sctx, state->cso->type, &state->cso->info, log);
743 }
744
745 static void si_dump_compute_descriptors(struct si_context *sctx,
746 struct u_log_context *log)
747 {
748 if (!sctx->cs_shader_state.program ||
749 sctx->cs_shader_state.program != sctx->cs_shader_state.emitted_program)
750 return;
751
752 si_dump_descriptors(sctx, PIPE_SHADER_COMPUTE, NULL, log);
753 }
754
755 struct si_shader_inst {
756 char text[160]; /* one disasm line */
757 unsigned offset; /* instruction offset */
758 unsigned size; /* instruction size = 4 or 8 */
759 };
760
761 /* Split a disassembly string into lines and add them to the array pointed
762 * to by "instructions". */
763 static void si_add_split_disasm(const char *disasm,
764 uint64_t start_addr,
765 unsigned *num,
766 struct si_shader_inst *instructions)
767 {
768 struct si_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
769 char *next;
770
771 while ((next = strchr(disasm, '\n'))) {
772 struct si_shader_inst *inst = &instructions[*num];
773 unsigned len = next - disasm;
774
775 assert(len < ARRAY_SIZE(inst->text));
776 memcpy(inst->text, disasm, len);
777 inst->text[len] = 0;
778 inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
779
780 const char *semicolon = strchr(disasm, ';');
781 assert(semicolon);
782 /* More than 16 chars after ";" means the instruction is 8 bytes long. */
783 inst->size = next - semicolon > 16 ? 8 : 4;
784
785 snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len,
786 " [PC=0x%"PRIx64", off=%u, size=%u]",
787 start_addr + inst->offset, inst->offset, inst->size);
788
789 last_inst = inst;
790 (*num)++;
791 disasm = next + 1;
792 }
793 }
794
795 #define MAX_WAVES_PER_CHIP (64 * 40)
796
797 struct si_wave_info {
798 unsigned se; /* shader engine */
799 unsigned sh; /* shader array */
800 unsigned cu; /* compute unit */
801 unsigned simd;
802 unsigned wave;
803 uint32_t status;
804 uint64_t pc; /* program counter */
805 uint32_t inst_dw0;
806 uint32_t inst_dw1;
807 uint64_t exec;
808 bool matched; /* whether the wave is used by a currently-bound shader */
809 };
810
811 static int compare_wave(const void *p1, const void *p2)
812 {
813 struct si_wave_info *w1 = (struct si_wave_info *)p1;
814 struct si_wave_info *w2 = (struct si_wave_info *)p2;
815
816 /* Sort waves according to PC and then SE, SH, CU, etc. */
817 if (w1->pc < w2->pc)
818 return -1;
819 if (w1->pc > w2->pc)
820 return 1;
821 if (w1->se < w2->se)
822 return -1;
823 if (w1->se > w2->se)
824 return 1;
825 if (w1->sh < w2->sh)
826 return -1;
827 if (w1->sh > w2->sh)
828 return 1;
829 if (w1->cu < w2->cu)
830 return -1;
831 if (w1->cu > w2->cu)
832 return 1;
833 if (w1->simd < w2->simd)
834 return -1;
835 if (w1->simd > w2->simd)
836 return 1;
837 if (w1->wave < w2->wave)
838 return -1;
839 if (w1->wave > w2->wave)
840 return 1;
841
842 return 0;
843 }
844
845 /* Return wave information. "waves" should be a large enough array. */
846 static unsigned si_get_wave_info(struct si_wave_info waves[MAX_WAVES_PER_CHIP])
847 {
848 char line[2000];
849 unsigned num_waves = 0;
850
851 FILE *p = popen("umr -wa", "r");
852 if (!p)
853 return 0;
854
855 if (!fgets(line, sizeof(line), p) ||
856 strncmp(line, "SE", 2) != 0) {
857 pclose(p);
858 return 0;
859 }
860
861 while (fgets(line, sizeof(line), p)) {
862 struct si_wave_info *w;
863 uint32_t pc_hi, pc_lo, exec_hi, exec_lo;
864
865 assert(num_waves < MAX_WAVES_PER_CHIP);
866 w = &waves[num_waves];
867
868 if (sscanf(line, "%u %u %u %u %u %x %x %x %x %x %x %x",
869 &w->se, &w->sh, &w->cu, &w->simd, &w->wave,
870 &w->status, &pc_hi, &pc_lo, &w->inst_dw0,
871 &w->inst_dw1, &exec_hi, &exec_lo) == 12) {
872 w->pc = ((uint64_t)pc_hi << 32) | pc_lo;
873 w->exec = ((uint64_t)exec_hi << 32) | exec_lo;
874 w->matched = false;
875 num_waves++;
876 }
877 }
878
879 qsort(waves, num_waves, sizeof(struct si_wave_info), compare_wave);
880
881 pclose(p);
882 return num_waves;
883 }
884
885 /* If the shader is being executed, print its asm instructions, and annotate
886 * those that are being executed right now with information about waves that
887 * execute them. This is most useful during a GPU hang.
888 */
889 static void si_print_annotated_shader(struct si_shader *shader,
890 struct si_wave_info *waves,
891 unsigned num_waves,
892 FILE *f)
893 {
894 if (!shader || !shader->binary.disasm_string)
895 return;
896
897 uint64_t start_addr = shader->bo->gpu_address;
898 uint64_t end_addr = start_addr + shader->bo->b.b.width0;
899 unsigned i;
900
901 /* See if any wave executes the shader. */
902 for (i = 0; i < num_waves; i++) {
903 if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
904 break;
905 }
906 if (i == num_waves)
907 return; /* the shader is not being executed */
908
909 /* Remember the first found wave. The waves are sorted according to PC. */
910 waves = &waves[i];
911 num_waves -= i;
912
913 /* Get the list of instructions.
914 * Buffer size / 4 is the upper bound of the instruction count.
915 */
916 unsigned num_inst = 0;
917 struct si_shader_inst *instructions =
918 calloc(shader->bo->b.b.width0 / 4, sizeof(struct si_shader_inst));
919
920 if (shader->prolog) {
921 si_add_split_disasm(shader->prolog->binary.disasm_string,
922 start_addr, &num_inst, instructions);
923 }
924 if (shader->previous_stage) {
925 si_add_split_disasm(shader->previous_stage->binary.disasm_string,
926 start_addr, &num_inst, instructions);
927 }
928 if (shader->prolog2) {
929 si_add_split_disasm(shader->prolog2->binary.disasm_string,
930 start_addr, &num_inst, instructions);
931 }
932 si_add_split_disasm(shader->binary.disasm_string,
933 start_addr, &num_inst, instructions);
934 if (shader->epilog) {
935 si_add_split_disasm(shader->epilog->binary.disasm_string,
936 start_addr, &num_inst, instructions);
937 }
938
939 fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
940 si_get_shader_name(shader, shader->selector->type));
941
942 /* Print instructions with annotations. */
943 for (i = 0; i < num_inst; i++) {
944 struct si_shader_inst *inst = &instructions[i];
945
946 fprintf(f, "%s\n", inst->text);
947
948 /* Print which waves execute the instruction right now. */
949 while (num_waves && start_addr + inst->offset == waves->pc) {
950 fprintf(f,
951 " " COLOR_GREEN "^ SE%u SH%u CU%u "
952 "SIMD%u WAVE%u EXEC=%016"PRIx64 " ",
953 waves->se, waves->sh, waves->cu, waves->simd,
954 waves->wave, waves->exec);
955
956 if (inst->size == 4) {
957 fprintf(f, "INST32=%08X" COLOR_RESET "\n",
958 waves->inst_dw0);
959 } else {
960 fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n",
961 waves->inst_dw0, waves->inst_dw1);
962 }
963
964 waves->matched = true;
965 waves = &waves[1];
966 num_waves--;
967 }
968 }
969
970 fprintf(f, "\n\n");
971 free(instructions);
972 }
973
974 static void si_dump_annotated_shaders(struct si_context *sctx, FILE *f)
975 {
976 struct si_wave_info waves[MAX_WAVES_PER_CHIP];
977 unsigned num_waves = si_get_wave_info(waves);
978
979 fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET
980 "\n\n", num_waves);
981
982 si_print_annotated_shader(sctx->vs_shader.current, waves, num_waves, f);
983 si_print_annotated_shader(sctx->tcs_shader.current, waves, num_waves, f);
984 si_print_annotated_shader(sctx->tes_shader.current, waves, num_waves, f);
985 si_print_annotated_shader(sctx->gs_shader.current, waves, num_waves, f);
986 si_print_annotated_shader(sctx->ps_shader.current, waves, num_waves, f);
987
988 /* Print waves executing shaders that are not currently bound. */
989 unsigned i;
990 bool found = false;
991 for (i = 0; i < num_waves; i++) {
992 if (waves[i].matched)
993 continue;
994
995 if (!found) {
996 fprintf(f, COLOR_CYAN
997 "Waves not executing currently-bound shaders:"
998 COLOR_RESET "\n");
999 found = true;
1000 }
1001 fprintf(f, " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016"PRIx64
1002 " INST=%08X %08X PC=%"PRIx64"\n",
1003 waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd,
1004 waves[i].wave, waves[i].exec, waves[i].inst_dw0,
1005 waves[i].inst_dw1, waves[i].pc);
1006 }
1007 if (found)
1008 fprintf(f, "\n\n");
1009 }
1010
1011 static void si_dump_command(const char *title, const char *command, FILE *f)
1012 {
1013 char line[2000];
1014
1015 FILE *p = popen(command, "r");
1016 if (!p)
1017 return;
1018
1019 fprintf(f, COLOR_YELLOW "%s: " COLOR_RESET "\n", title);
1020 while (fgets(line, sizeof(line), p))
1021 fputs(line, f);
1022 fprintf(f, "\n\n");
1023 pclose(p);
1024 }
1025
1026 static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
1027 unsigned flags)
1028 {
1029 struct si_context *sctx = (struct si_context*)ctx;
1030
1031 if (sctx->b.log)
1032 u_log_flush(sctx->b.log);
1033
1034 if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS) {
1035 si_dump_debug_registers(sctx, f);
1036
1037 si_dump_annotated_shaders(sctx, f);
1038 si_dump_command("Active waves (raw data)", "umr -wa | column -t", f);
1039 si_dump_command("Wave information", "umr -O bits -wa", f);
1040 }
1041 }
1042
1043 void si_log_draw_state(struct si_context *sctx, struct u_log_context *log)
1044 {
1045 if (!log)
1046 return;
1047
1048 si_dump_framebuffer(sctx, log);
1049
1050 si_dump_gfx_shader(sctx, &sctx->vs_shader, log);
1051 si_dump_gfx_shader(sctx, &sctx->tcs_shader, log);
1052 si_dump_gfx_shader(sctx, &sctx->tes_shader, log);
1053 si_dump_gfx_shader(sctx, &sctx->gs_shader, log);
1054 si_dump_gfx_shader(sctx, &sctx->ps_shader, log);
1055
1056 si_dump_descriptor_list(&sctx->descriptors[SI_DESCS_RW_BUFFERS],
1057 "", "RW buffers", 4, SI_NUM_RW_BUFFERS,
1058 si_identity, log);
1059 si_dump_gfx_descriptors(sctx, &sctx->vs_shader, log);
1060 si_dump_gfx_descriptors(sctx, &sctx->tcs_shader, log);
1061 si_dump_gfx_descriptors(sctx, &sctx->tes_shader, log);
1062 si_dump_gfx_descriptors(sctx, &sctx->gs_shader, log);
1063 si_dump_gfx_descriptors(sctx, &sctx->ps_shader, log);
1064 }
1065
1066 void si_log_compute_state(struct si_context *sctx, struct u_log_context *log)
1067 {
1068 if (!log)
1069 return;
1070
1071 si_dump_compute_shader(&sctx->cs_shader_state, log);
1072 si_dump_compute_descriptors(sctx, log);
1073 }
1074
1075 static void si_dump_dma(struct si_context *sctx,
1076 struct radeon_saved_cs *saved, FILE *f)
1077 {
1078 static const char ib_name[] = "sDMA IB";
1079 unsigned i;
1080
1081 si_dump_bo_list(sctx, saved, f);
1082
1083 fprintf(f, "------------------ %s begin ------------------\n", ib_name);
1084
1085 for (i = 0; i < saved->num_dw; ++i) {
1086 fprintf(f, " %08x\n", saved->ib[i]);
1087 }
1088
1089 fprintf(f, "------------------- %s end -------------------\n", ib_name);
1090 fprintf(f, "\n");
1091
1092 fprintf(f, "SDMA Dump Done.\n");
1093 }
1094
1095 static bool si_vm_fault_occured(struct si_context *sctx, uint64_t *out_addr)
1096 {
1097 char line[2000];
1098 unsigned sec, usec;
1099 int progress = 0;
1100 uint64_t timestamp = 0;
1101 bool fault = false;
1102
1103 FILE *p = popen("dmesg", "r");
1104 if (!p)
1105 return false;
1106
1107 while (fgets(line, sizeof(line), p)) {
1108 char *msg, len;
1109
1110 if (!line[0] || line[0] == '\n')
1111 continue;
1112
1113 /* Get the timestamp. */
1114 if (sscanf(line, "[%u.%u]", &sec, &usec) != 2) {
1115 static bool hit = false;
1116 if (!hit) {
1117 fprintf(stderr, "%s: failed to parse line '%s'\n",
1118 __func__, line);
1119 hit = true;
1120 }
1121 continue;
1122 }
1123 timestamp = sec * 1000000ull + usec;
1124
1125 /* If just updating the timestamp. */
1126 if (!out_addr)
1127 continue;
1128
1129 /* Process messages only if the timestamp is newer. */
1130 if (timestamp <= sctx->dmesg_timestamp)
1131 continue;
1132
1133 /* Only process the first VM fault. */
1134 if (fault)
1135 continue;
1136
1137 /* Remove trailing \n */
1138 len = strlen(line);
1139 if (len && line[len-1] == '\n')
1140 line[len-1] = 0;
1141
1142 /* Get the message part. */
1143 msg = strchr(line, ']');
1144 if (!msg) {
1145 assert(0);
1146 continue;
1147 }
1148 msg++;
1149
1150 const char *header_line, *addr_line_prefix, *addr_line_format;
1151
1152 if (sctx->b.chip_class >= GFX9) {
1153 /* Match this:
1154 * ..: [gfxhub] VMC page fault (src_id:0 ring:158 vm_id:2 pas_id:0)
1155 * ..: at page 0x0000000219f8f000 from 27
1156 * ..: VM_L2_PROTECTION_FAULT_STATUS:0x0020113C
1157 */
1158 header_line = "VMC page fault";
1159 addr_line_prefix = " at page";
1160 addr_line_format = "%"PRIx64;
1161 } else {
1162 header_line = "GPU fault detected:";
1163 addr_line_prefix = "VM_CONTEXT1_PROTECTION_FAULT_ADDR";
1164 addr_line_format = "%"PRIX64;
1165 }
1166
1167 switch (progress) {
1168 case 0:
1169 if (strstr(msg, header_line))
1170 progress = 1;
1171 break;
1172 case 1:
1173 msg = strstr(msg, addr_line_prefix);
1174 if (msg) {
1175 msg = strstr(msg, "0x");
1176 if (msg) {
1177 msg += 2;
1178 if (sscanf(msg, addr_line_format, out_addr) == 1)
1179 fault = true;
1180 }
1181 }
1182 progress = 0;
1183 break;
1184 default:
1185 progress = 0;
1186 }
1187 }
1188 pclose(p);
1189
1190 if (timestamp > sctx->dmesg_timestamp)
1191 sctx->dmesg_timestamp = timestamp;
1192 return fault;
1193 }
1194
1195 void si_check_vm_faults(struct r600_common_context *ctx,
1196 struct radeon_saved_cs *saved, enum ring_type ring)
1197 {
1198 struct si_context *sctx = (struct si_context *)ctx;
1199 struct pipe_screen *screen = sctx->b.b.screen;
1200 FILE *f;
1201 uint64_t addr;
1202 char cmd_line[4096];
1203
1204 if (!si_vm_fault_occured(sctx, &addr))
1205 return;
1206
1207 f = dd_get_debug_file(false);
1208 if (!f)
1209 return;
1210
1211 fprintf(f, "VM fault report.\n\n");
1212 if (os_get_command_line(cmd_line, sizeof(cmd_line)))
1213 fprintf(f, "Command: %s\n", cmd_line);
1214 fprintf(f, "Driver vendor: %s\n", screen->get_vendor(screen));
1215 fprintf(f, "Device vendor: %s\n", screen->get_device_vendor(screen));
1216 fprintf(f, "Device name: %s\n\n", screen->get_name(screen));
1217 fprintf(f, "Failing VM page: 0x%08"PRIx64"\n\n", addr);
1218
1219 if (sctx->apitrace_call_number)
1220 fprintf(f, "Last apitrace call: %u\n\n",
1221 sctx->apitrace_call_number);
1222
1223 switch (ring) {
1224 case RING_GFX: {
1225 struct u_log_context log;
1226 u_log_context_init(&log);
1227
1228 si_log_draw_state(sctx, &log);
1229 si_log_compute_state(sctx, &log);
1230
1231 u_log_new_page_print(&log, f);
1232 u_log_context_destroy(&log);
1233 break;
1234 }
1235 case RING_DMA:
1236 si_dump_dma(sctx, saved, f);
1237 break;
1238
1239 default:
1240 break;
1241 }
1242
1243 fclose(f);
1244
1245 fprintf(stderr, "Detected a VM fault, exiting...\n");
1246 exit(0);
1247 }
1248
1249 void si_init_debug_functions(struct si_context *sctx)
1250 {
1251 sctx->b.b.dump_debug_state = si_dump_debug_state;
1252 sctx->b.check_vm_faults = si_check_vm_faults;
1253
1254 /* Set the initial dmesg timestamp for this context, so that
1255 * only new messages will be checked for VM faults.
1256 */
1257 if (sctx->screen->b.debug_flags & DBG_CHECK_VM)
1258 si_vm_fault_occured(sctx, NULL);
1259 }