88fa11acda51d2c5eb0ddf554f4951cee5f80d48
[mesa.git] / src / gallium / drivers / radeonsi / si_debug.c
1 /*
2 * Copyright 2015 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "si_pipe.h"
25 #include "si_compute.h"
26 #include "sid.h"
27 #include "gfx9d.h"
28 #include "sid_tables.h"
29 #include "ddebug/dd_util.h"
30 #include "util/u_log.h"
31 #include "util/u_memory.h"
32 #include "ac_debug.h"
33
34 static void si_dump_bo_list(struct si_context *sctx,
35 const struct radeon_saved_cs *saved, FILE *f);
36
37 DEBUG_GET_ONCE_OPTION(replace_shaders, "RADEON_REPLACE_SHADERS", NULL)
38
39 static void si_dump_shader(struct si_screen *sscreen,
40 enum pipe_shader_type processor,
41 const struct si_shader *shader, FILE *f)
42 {
43 if (shader->shader_log)
44 fwrite(shader->shader_log, shader->shader_log_size, 1, f);
45 else
46 si_shader_dump(sscreen, shader, NULL, processor, f, false);
47 }
48
49 struct si_log_chunk_shader {
50 /* The shader destroy code assumes a current context for unlinking of
51 * PM4 packets etc.
52 *
53 * While we should be able to destroy shaders without a context, doing
54 * so would happen only very rarely and be therefore likely to fail
55 * just when you're trying to debug something. Let's just remember the
56 * current context in the chunk.
57 */
58 struct si_context *ctx;
59 struct si_shader *shader;
60 enum pipe_shader_type processor;
61
62 /* For keep-alive reference counts */
63 struct si_shader_selector *sel;
64 struct si_compute *program;
65 };
66
67 static void
68 si_log_chunk_shader_destroy(void *data)
69 {
70 struct si_log_chunk_shader *chunk = data;
71 si_shader_selector_reference(chunk->ctx, &chunk->sel, NULL);
72 si_compute_reference(&chunk->program, NULL);
73 FREE(chunk);
74 }
75
76 static void
77 si_log_chunk_shader_print(void *data, FILE *f)
78 {
79 struct si_log_chunk_shader *chunk = data;
80 struct si_screen *sscreen = chunk->ctx->screen;
81 si_dump_shader(sscreen, chunk->processor,
82 chunk->shader, f);
83 }
84
85 static struct u_log_chunk_type si_log_chunk_type_shader = {
86 .destroy = si_log_chunk_shader_destroy,
87 .print = si_log_chunk_shader_print,
88 };
89
90 static void si_dump_gfx_shader(struct si_context *ctx,
91 const struct si_shader_ctx_state *state,
92 struct u_log_context *log)
93 {
94 struct si_shader *current = state->current;
95
96 if (!state->cso || !current)
97 return;
98
99 struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader);
100 chunk->ctx = ctx;
101 chunk->processor = state->cso->info.processor;
102 chunk->shader = current;
103 si_shader_selector_reference(ctx, &chunk->sel, current->selector);
104 u_log_chunk(log, &si_log_chunk_type_shader, chunk);
105 }
106
107 static void si_dump_compute_shader(struct si_context *ctx,
108 struct u_log_context *log)
109 {
110 const struct si_cs_shader_state *state = &ctx->cs_shader_state;
111
112 if (!state->program)
113 return;
114
115 struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader);
116 chunk->ctx = ctx;
117 chunk->processor = PIPE_SHADER_COMPUTE;
118 chunk->shader = &state->program->shader;
119 si_compute_reference(&chunk->program, state->program);
120 u_log_chunk(log, &si_log_chunk_type_shader, chunk);
121 }
122
123 /**
124 * Shader compiles can be overridden with arbitrary ELF objects by setting
125 * the environment variable RADEON_REPLACE_SHADERS=num1:filename1[;num2:filename2]
126 */
127 bool si_replace_shader(unsigned num, struct ac_shader_binary *binary)
128 {
129 const char *p = debug_get_option_replace_shaders();
130 const char *semicolon;
131 char *copy = NULL;
132 FILE *f;
133 long filesize, nread;
134 char *buf = NULL;
135 bool replaced = false;
136
137 if (!p)
138 return false;
139
140 while (*p) {
141 unsigned long i;
142 char *endp;
143 i = strtoul(p, &endp, 0);
144
145 p = endp;
146 if (*p != ':') {
147 fprintf(stderr, "RADEON_REPLACE_SHADERS formatted badly.\n");
148 exit(1);
149 }
150 ++p;
151
152 if (i == num)
153 break;
154
155 p = strchr(p, ';');
156 if (!p)
157 return false;
158 ++p;
159 }
160 if (!*p)
161 return false;
162
163 semicolon = strchr(p, ';');
164 if (semicolon) {
165 p = copy = strndup(p, semicolon - p);
166 if (!copy) {
167 fprintf(stderr, "out of memory\n");
168 return false;
169 }
170 }
171
172 fprintf(stderr, "radeonsi: replace shader %u by %s\n", num, p);
173
174 f = fopen(p, "r");
175 if (!f) {
176 perror("radeonsi: failed to open file");
177 goto out_free;
178 }
179
180 if (fseek(f, 0, SEEK_END) != 0)
181 goto file_error;
182
183 filesize = ftell(f);
184 if (filesize < 0)
185 goto file_error;
186
187 if (fseek(f, 0, SEEK_SET) != 0)
188 goto file_error;
189
190 buf = MALLOC(filesize);
191 if (!buf) {
192 fprintf(stderr, "out of memory\n");
193 goto out_close;
194 }
195
196 nread = fread(buf, 1, filesize, f);
197 if (nread != filesize)
198 goto file_error;
199
200 ac_elf_read(buf, filesize, binary);
201 replaced = true;
202
203 out_close:
204 fclose(f);
205 out_free:
206 FREE(buf);
207 free(copy);
208 return replaced;
209
210 file_error:
211 perror("radeonsi: reading shader");
212 goto out_close;
213 }
214
215 /* Parsed IBs are difficult to read without colors. Use "less -R file" to
216 * read them, or use "aha -b -f file" to convert them to html.
217 */
218 #define COLOR_RESET "\033[0m"
219 #define COLOR_RED "\033[31m"
220 #define COLOR_GREEN "\033[1;32m"
221 #define COLOR_YELLOW "\033[1;33m"
222 #define COLOR_CYAN "\033[1;36m"
223
224 static void si_dump_mmapped_reg(struct si_context *sctx, FILE *f,
225 unsigned offset)
226 {
227 struct radeon_winsys *ws = sctx->b.ws;
228 uint32_t value;
229
230 if (ws->read_registers(ws, offset, 1, &value))
231 ac_dump_reg(f, sctx->b.chip_class, offset, value, ~0);
232 }
233
234 static void si_dump_debug_registers(struct si_context *sctx, FILE *f)
235 {
236 if (sctx->screen->b.info.drm_major == 2 &&
237 sctx->screen->b.info.drm_minor < 42)
238 return; /* no radeon support */
239
240 fprintf(f, "Memory-mapped registers:\n");
241 si_dump_mmapped_reg(sctx, f, R_008010_GRBM_STATUS);
242
243 /* No other registers can be read on DRM < 3.1.0. */
244 if (sctx->screen->b.info.drm_major < 3 ||
245 sctx->screen->b.info.drm_minor < 1) {
246 fprintf(f, "\n");
247 return;
248 }
249
250 si_dump_mmapped_reg(sctx, f, R_008008_GRBM_STATUS2);
251 si_dump_mmapped_reg(sctx, f, R_008014_GRBM_STATUS_SE0);
252 si_dump_mmapped_reg(sctx, f, R_008018_GRBM_STATUS_SE1);
253 si_dump_mmapped_reg(sctx, f, R_008038_GRBM_STATUS_SE2);
254 si_dump_mmapped_reg(sctx, f, R_00803C_GRBM_STATUS_SE3);
255 si_dump_mmapped_reg(sctx, f, R_00D034_SDMA0_STATUS_REG);
256 si_dump_mmapped_reg(sctx, f, R_00D834_SDMA1_STATUS_REG);
257 if (sctx->b.chip_class <= VI) {
258 si_dump_mmapped_reg(sctx, f, R_000E50_SRBM_STATUS);
259 si_dump_mmapped_reg(sctx, f, R_000E4C_SRBM_STATUS2);
260 si_dump_mmapped_reg(sctx, f, R_000E54_SRBM_STATUS3);
261 }
262 si_dump_mmapped_reg(sctx, f, R_008680_CP_STAT);
263 si_dump_mmapped_reg(sctx, f, R_008674_CP_STALLED_STAT1);
264 si_dump_mmapped_reg(sctx, f, R_008678_CP_STALLED_STAT2);
265 si_dump_mmapped_reg(sctx, f, R_008670_CP_STALLED_STAT3);
266 si_dump_mmapped_reg(sctx, f, R_008210_CP_CPC_STATUS);
267 si_dump_mmapped_reg(sctx, f, R_008214_CP_CPC_BUSY_STAT);
268 si_dump_mmapped_reg(sctx, f, R_008218_CP_CPC_STALLED_STAT1);
269 si_dump_mmapped_reg(sctx, f, R_00821C_CP_CPF_STATUS);
270 si_dump_mmapped_reg(sctx, f, R_008220_CP_CPF_BUSY_STAT);
271 si_dump_mmapped_reg(sctx, f, R_008224_CP_CPF_STALLED_STAT1);
272 fprintf(f, "\n");
273 }
274
275 struct si_log_chunk_cs {
276 struct si_context *ctx;
277 struct si_saved_cs *cs;
278 bool dump_bo_list;
279 unsigned gfx_begin, gfx_end;
280 };
281
282 static void si_log_chunk_type_cs_destroy(void *data)
283 {
284 struct si_log_chunk_cs *chunk = data;
285 si_saved_cs_reference(&chunk->cs, NULL);
286 free(chunk);
287 }
288
289 static void si_parse_current_ib(FILE *f, struct radeon_winsys_cs *cs,
290 unsigned begin, unsigned end,
291 int *last_trace_id, unsigned trace_id_count,
292 const char *name, enum chip_class chip_class)
293 {
294 unsigned orig_end = end;
295
296 assert(begin <= end);
297
298 fprintf(f, "------------------ %s begin (dw = %u) ------------------\n",
299 name, begin);
300
301 for (unsigned prev_idx = 0; prev_idx < cs->num_prev; ++prev_idx) {
302 struct radeon_winsys_cs_chunk *chunk = &cs->prev[prev_idx];
303
304 if (begin < chunk->cdw) {
305 ac_parse_ib_chunk(f, chunk->buf + begin,
306 MIN2(end, chunk->cdw) - begin,
307 last_trace_id, trace_id_count,
308 chip_class, NULL, NULL);
309 }
310
311 if (end <= chunk->cdw)
312 return;
313
314 if (begin < chunk->cdw)
315 fprintf(f, "\n---------- Next %s Chunk ----------\n\n",
316 name);
317
318 begin -= MIN2(begin, chunk->cdw);
319 end -= chunk->cdw;
320 }
321
322 assert(end <= cs->current.cdw);
323
324 ac_parse_ib_chunk(f, cs->current.buf + begin, end - begin, last_trace_id,
325 trace_id_count, chip_class, NULL, NULL);
326
327 fprintf(f, "------------------- %s end (dw = %u) -------------------\n\n",
328 name, orig_end);
329 }
330
331 static void si_log_chunk_type_cs_print(void *data, FILE *f)
332 {
333 struct si_log_chunk_cs *chunk = data;
334 struct si_context *ctx = chunk->ctx;
335 struct si_saved_cs *scs = chunk->cs;
336 int last_trace_id = -1;
337
338 /* We are expecting that the ddebug pipe has already
339 * waited for the context, so this buffer should be idle.
340 * If the GPU is hung, there is no point in waiting for it.
341 */
342 uint32_t *map = ctx->b.ws->buffer_map(scs->trace_buf->buf,
343 NULL,
344 PIPE_TRANSFER_UNSYNCHRONIZED |
345 PIPE_TRANSFER_READ);
346 if (map)
347 last_trace_id = map[0];
348
349 if (chunk->gfx_end != chunk->gfx_begin) {
350 if (chunk->gfx_begin == 0) {
351 if (ctx->init_config)
352 ac_parse_ib(f, ctx->init_config->pm4, ctx->init_config->ndw,
353 NULL, 0, "IB2: Init config", ctx->b.chip_class,
354 NULL, NULL);
355
356 if (ctx->init_config_gs_rings)
357 ac_parse_ib(f, ctx->init_config_gs_rings->pm4,
358 ctx->init_config_gs_rings->ndw,
359 NULL, 0, "IB2: Init GS rings", ctx->b.chip_class,
360 NULL, NULL);
361 }
362
363 if (scs->flushed) {
364 ac_parse_ib(f, scs->gfx.ib + chunk->gfx_begin,
365 chunk->gfx_end - chunk->gfx_begin,
366 &last_trace_id, map ? 1 : 0, "IB", ctx->b.chip_class,
367 NULL, NULL);
368 } else {
369 si_parse_current_ib(f, ctx->b.gfx.cs, chunk->gfx_begin,
370 chunk->gfx_end, &last_trace_id, map ? 1 : 0,
371 "IB", ctx->b.chip_class);
372 }
373 }
374
375 if (chunk->dump_bo_list) {
376 fprintf(f, "Flushing.\n\n");
377 si_dump_bo_list(ctx, &scs->gfx, f);
378 }
379 }
380
381 static const struct u_log_chunk_type si_log_chunk_type_cs = {
382 .destroy = si_log_chunk_type_cs_destroy,
383 .print = si_log_chunk_type_cs_print,
384 };
385
386 static void si_log_cs(struct si_context *ctx, struct u_log_context *log,
387 bool dump_bo_list)
388 {
389 assert(ctx->current_saved_cs);
390
391 struct si_saved_cs *scs = ctx->current_saved_cs;
392 unsigned gfx_cur = ctx->b.gfx.cs->prev_dw + ctx->b.gfx.cs->current.cdw;
393
394 if (!dump_bo_list &&
395 gfx_cur == scs->gfx_last_dw)
396 return;
397
398 struct si_log_chunk_cs *chunk = calloc(1, sizeof(*chunk));
399
400 chunk->ctx = ctx;
401 si_saved_cs_reference(&chunk->cs, scs);
402 chunk->dump_bo_list = dump_bo_list;
403
404 chunk->gfx_begin = scs->gfx_last_dw;
405 chunk->gfx_end = gfx_cur;
406 scs->gfx_last_dw = gfx_cur;
407
408 u_log_chunk(log, &si_log_chunk_type_cs, chunk);
409 }
410
411 void si_auto_log_cs(void *data, struct u_log_context *log)
412 {
413 struct si_context *ctx = (struct si_context *)data;
414 si_log_cs(ctx, log, false);
415 }
416
417 void si_log_hw_flush(struct si_context *sctx)
418 {
419 if (!sctx->b.log)
420 return;
421
422 si_log_cs(sctx, sctx->b.log, true);
423 }
424
425 static const char *priority_to_string(enum radeon_bo_priority priority)
426 {
427 #define ITEM(x) [RADEON_PRIO_##x] = #x
428 static const char *table[64] = {
429 ITEM(FENCE),
430 ITEM(TRACE),
431 ITEM(SO_FILLED_SIZE),
432 ITEM(QUERY),
433 ITEM(IB1),
434 ITEM(IB2),
435 ITEM(DRAW_INDIRECT),
436 ITEM(INDEX_BUFFER),
437 ITEM(VCE),
438 ITEM(UVD),
439 ITEM(SDMA_BUFFER),
440 ITEM(SDMA_TEXTURE),
441 ITEM(CP_DMA),
442 ITEM(CONST_BUFFER),
443 ITEM(DESCRIPTORS),
444 ITEM(BORDER_COLORS),
445 ITEM(SAMPLER_BUFFER),
446 ITEM(VERTEX_BUFFER),
447 ITEM(SHADER_RW_BUFFER),
448 ITEM(COMPUTE_GLOBAL),
449 ITEM(SAMPLER_TEXTURE),
450 ITEM(SHADER_RW_IMAGE),
451 ITEM(SAMPLER_TEXTURE_MSAA),
452 ITEM(COLOR_BUFFER),
453 ITEM(DEPTH_BUFFER),
454 ITEM(COLOR_BUFFER_MSAA),
455 ITEM(DEPTH_BUFFER_MSAA),
456 ITEM(CMASK),
457 ITEM(DCC),
458 ITEM(HTILE),
459 ITEM(SHADER_BINARY),
460 ITEM(SHADER_RINGS),
461 ITEM(SCRATCH_BUFFER),
462 };
463 #undef ITEM
464
465 assert(priority < ARRAY_SIZE(table));
466 return table[priority];
467 }
468
469 static int bo_list_compare_va(const struct radeon_bo_list_item *a,
470 const struct radeon_bo_list_item *b)
471 {
472 return a->vm_address < b->vm_address ? -1 :
473 a->vm_address > b->vm_address ? 1 : 0;
474 }
475
476 static void si_dump_bo_list(struct si_context *sctx,
477 const struct radeon_saved_cs *saved, FILE *f)
478 {
479 unsigned i,j;
480
481 if (!saved->bo_list)
482 return;
483
484 /* Sort the list according to VM adddresses first. */
485 qsort(saved->bo_list, saved->bo_count,
486 sizeof(saved->bo_list[0]), (void*)bo_list_compare_va);
487
488 fprintf(f, "Buffer list (in units of pages = 4kB):\n"
489 COLOR_YELLOW " Size VM start page "
490 "VM end page Usage" COLOR_RESET "\n");
491
492 for (i = 0; i < saved->bo_count; i++) {
493 /* Note: Buffer sizes are expected to be aligned to 4k by the winsys. */
494 const unsigned page_size = sctx->b.screen->info.gart_page_size;
495 uint64_t va = saved->bo_list[i].vm_address;
496 uint64_t size = saved->bo_list[i].bo_size;
497 bool hit = false;
498
499 /* If there's unused virtual memory between 2 buffers, print it. */
500 if (i) {
501 uint64_t previous_va_end = saved->bo_list[i-1].vm_address +
502 saved->bo_list[i-1].bo_size;
503
504 if (va > previous_va_end) {
505 fprintf(f, " %10"PRIu64" -- hole --\n",
506 (va - previous_va_end) / page_size);
507 }
508 }
509
510 /* Print the buffer. */
511 fprintf(f, " %10"PRIu64" 0x%013"PRIX64" 0x%013"PRIX64" ",
512 size / page_size, va / page_size, (va + size) / page_size);
513
514 /* Print the usage. */
515 for (j = 0; j < 64; j++) {
516 if (!(saved->bo_list[i].priority_usage & (1ull << j)))
517 continue;
518
519 fprintf(f, "%s%s", !hit ? "" : ", ", priority_to_string(j));
520 hit = true;
521 }
522 fprintf(f, "\n");
523 }
524 fprintf(f, "\nNote: The holes represent memory not used by the IB.\n"
525 " Other buffers can still be allocated there.\n\n");
526 }
527
528 static void si_dump_framebuffer(struct si_context *sctx, struct u_log_context *log)
529 {
530 struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
531 struct r600_texture *rtex;
532 int i;
533
534 for (i = 0; i < state->nr_cbufs; i++) {
535 if (!state->cbufs[i])
536 continue;
537
538 rtex = (struct r600_texture*)state->cbufs[i]->texture;
539 u_log_printf(log, COLOR_YELLOW "Color buffer %i:" COLOR_RESET "\n", i);
540 si_print_texture_info(sctx->b.screen, rtex, log);
541 u_log_printf(log, "\n");
542 }
543
544 if (state->zsbuf) {
545 rtex = (struct r600_texture*)state->zsbuf->texture;
546 u_log_printf(log, COLOR_YELLOW "Depth-stencil buffer:" COLOR_RESET "\n");
547 si_print_texture_info(sctx->b.screen, rtex, log);
548 u_log_printf(log, "\n");
549 }
550 }
551
552 typedef unsigned (*slot_remap_func)(unsigned);
553
554 struct si_log_chunk_desc_list {
555 /** Pointer to memory map of buffer where the list is uploader */
556 uint32_t *gpu_list;
557 /** Reference of buffer where the list is uploaded, so that gpu_list
558 * is kept live. */
559 struct r600_resource *buf;
560
561 const char *shader_name;
562 const char *elem_name;
563 slot_remap_func slot_remap;
564 enum chip_class chip_class;
565 unsigned element_dw_size;
566 unsigned num_elements;
567
568 uint32_t list[0];
569 };
570
571 static void
572 si_log_chunk_desc_list_destroy(void *data)
573 {
574 struct si_log_chunk_desc_list *chunk = data;
575 r600_resource_reference(&chunk->buf, NULL);
576 FREE(chunk);
577 }
578
579 static void
580 si_log_chunk_desc_list_print(void *data, FILE *f)
581 {
582 struct si_log_chunk_desc_list *chunk = data;
583
584 for (unsigned i = 0; i < chunk->num_elements; i++) {
585 unsigned cpu_dw_offset = i * chunk->element_dw_size;
586 unsigned gpu_dw_offset = chunk->slot_remap(i) * chunk->element_dw_size;
587 const char *list_note = chunk->gpu_list ? "GPU list" : "CPU list";
588 uint32_t *cpu_list = chunk->list + cpu_dw_offset;
589 uint32_t *gpu_list = chunk->gpu_list ? chunk->gpu_list + gpu_dw_offset : cpu_list;
590
591 fprintf(f, COLOR_GREEN "%s%s slot %u (%s):" COLOR_RESET "\n",
592 chunk->shader_name, chunk->elem_name, i, list_note);
593
594 switch (chunk->element_dw_size) {
595 case 4:
596 for (unsigned j = 0; j < 4; j++)
597 ac_dump_reg(f, chunk->chip_class,
598 R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
599 gpu_list[j], 0xffffffff);
600 break;
601 case 8:
602 for (unsigned j = 0; j < 8; j++)
603 ac_dump_reg(f, chunk->chip_class,
604 R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
605 gpu_list[j], 0xffffffff);
606
607 fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
608 for (unsigned j = 0; j < 4; j++)
609 ac_dump_reg(f, chunk->chip_class,
610 R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
611 gpu_list[4+j], 0xffffffff);
612 break;
613 case 16:
614 for (unsigned j = 0; j < 8; j++)
615 ac_dump_reg(f, chunk->chip_class,
616 R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
617 gpu_list[j], 0xffffffff);
618
619 fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
620 for (unsigned j = 0; j < 4; j++)
621 ac_dump_reg(f, chunk->chip_class,
622 R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
623 gpu_list[4+j], 0xffffffff);
624
625 fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n");
626 for (unsigned j = 0; j < 8; j++)
627 ac_dump_reg(f, chunk->chip_class,
628 R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
629 gpu_list[8+j], 0xffffffff);
630
631 fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n");
632 for (unsigned j = 0; j < 4; j++)
633 ac_dump_reg(f, chunk->chip_class,
634 R_008F30_SQ_IMG_SAMP_WORD0 + j*4,
635 gpu_list[12+j], 0xffffffff);
636 break;
637 }
638
639 if (memcmp(gpu_list, cpu_list, chunk->element_dw_size * 4) != 0) {
640 fprintf(f, COLOR_RED "!!!!! This slot was corrupted in GPU memory !!!!!"
641 COLOR_RESET "\n");
642 }
643
644 fprintf(f, "\n");
645 }
646
647 }
648
649 static const struct u_log_chunk_type si_log_chunk_type_descriptor_list = {
650 .destroy = si_log_chunk_desc_list_destroy,
651 .print = si_log_chunk_desc_list_print,
652 };
653
654 static void si_dump_descriptor_list(struct si_screen *screen,
655 struct si_descriptors *desc,
656 const char *shader_name,
657 const char *elem_name,
658 unsigned element_dw_size,
659 unsigned num_elements,
660 slot_remap_func slot_remap,
661 struct u_log_context *log)
662 {
663 if (!desc->list)
664 return;
665
666 /* In some cases, the caller doesn't know how many elements are really
667 * uploaded. Reduce num_elements to fit in the range of active slots. */
668 unsigned active_range_dw_begin =
669 desc->first_active_slot * desc->element_dw_size;
670 unsigned active_range_dw_end =
671 active_range_dw_begin + desc->num_active_slots * desc->element_dw_size;
672
673 while (num_elements > 0) {
674 int i = slot_remap(num_elements - 1);
675 unsigned dw_begin = i * element_dw_size;
676 unsigned dw_end = dw_begin + element_dw_size;
677
678 if (dw_begin >= active_range_dw_begin && dw_end <= active_range_dw_end)
679 break;
680
681 num_elements--;
682 }
683
684 struct si_log_chunk_desc_list *chunk =
685 CALLOC_VARIANT_LENGTH_STRUCT(si_log_chunk_desc_list,
686 4 * element_dw_size * num_elements);
687 chunk->shader_name = shader_name;
688 chunk->elem_name = elem_name;
689 chunk->element_dw_size = element_dw_size;
690 chunk->num_elements = num_elements;
691 chunk->slot_remap = slot_remap;
692 chunk->chip_class = screen->b.chip_class;
693
694 r600_resource_reference(&chunk->buf, desc->buffer);
695 chunk->gpu_list = desc->gpu_list;
696
697 for (unsigned i = 0; i < num_elements; ++i) {
698 memcpy(&chunk->list[i * element_dw_size],
699 &desc->list[slot_remap(i) * element_dw_size],
700 4 * element_dw_size);
701 }
702
703 u_log_chunk(log, &si_log_chunk_type_descriptor_list, chunk);
704 }
705
706 static unsigned si_identity(unsigned slot)
707 {
708 return slot;
709 }
710
711 static void si_dump_descriptors(struct si_context *sctx,
712 enum pipe_shader_type processor,
713 const struct tgsi_shader_info *info,
714 struct u_log_context *log)
715 {
716 struct si_descriptors *descs =
717 &sctx->descriptors[SI_DESCS_FIRST_SHADER +
718 processor * SI_NUM_SHADER_DESCS];
719 static const char *shader_name[] = {"VS", "PS", "GS", "TCS", "TES", "CS"};
720 const char *name = shader_name[processor];
721 unsigned enabled_constbuf, enabled_shaderbuf, enabled_samplers;
722 unsigned enabled_images;
723
724 if (info) {
725 enabled_constbuf = info->const_buffers_declared;
726 enabled_shaderbuf = info->shader_buffers_declared;
727 enabled_samplers = info->samplers_declared;
728 enabled_images = info->images_declared;
729 } else {
730 enabled_constbuf = sctx->const_and_shader_buffers[processor].enabled_mask >>
731 SI_NUM_SHADER_BUFFERS;
732 enabled_shaderbuf = sctx->const_and_shader_buffers[processor].enabled_mask &
733 u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS);
734 enabled_shaderbuf = util_bitreverse(enabled_shaderbuf) >>
735 (32 - SI_NUM_SHADER_BUFFERS);
736 enabled_samplers = sctx->samplers[processor].enabled_mask;
737 enabled_images = sctx->images[processor].enabled_mask;
738 }
739
740 if (processor == PIPE_SHADER_VERTEX) {
741 assert(info); /* only CS may not have an info struct */
742
743 si_dump_descriptor_list(sctx->screen, &sctx->vertex_buffers, name,
744 " - Vertex buffer", 4, info->num_inputs,
745 si_identity, log);
746 }
747
748 si_dump_descriptor_list(sctx->screen,
749 &descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS],
750 name, " - Constant buffer", 4,
751 util_last_bit(enabled_constbuf),
752 si_get_constbuf_slot, log);
753 si_dump_descriptor_list(sctx->screen,
754 &descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS],
755 name, " - Shader buffer", 4,
756 util_last_bit(enabled_shaderbuf),
757 si_get_shaderbuf_slot, log);
758 si_dump_descriptor_list(sctx->screen,
759 &descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES],
760 name, " - Sampler", 16,
761 util_last_bit(enabled_samplers),
762 si_get_sampler_slot, log);
763 si_dump_descriptor_list(sctx->screen,
764 &descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES],
765 name, " - Image", 8,
766 util_last_bit(enabled_images),
767 si_get_image_slot, log);
768 }
769
770 static void si_dump_gfx_descriptors(struct si_context *sctx,
771 const struct si_shader_ctx_state *state,
772 struct u_log_context *log)
773 {
774 if (!state->cso || !state->current)
775 return;
776
777 si_dump_descriptors(sctx, state->cso->type, &state->cso->info, log);
778 }
779
780 static void si_dump_compute_descriptors(struct si_context *sctx,
781 struct u_log_context *log)
782 {
783 if (!sctx->cs_shader_state.program)
784 return;
785
786 si_dump_descriptors(sctx, PIPE_SHADER_COMPUTE, NULL, log);
787 }
788
789 struct si_shader_inst {
790 char text[160]; /* one disasm line */
791 unsigned offset; /* instruction offset */
792 unsigned size; /* instruction size = 4 or 8 */
793 };
794
795 /* Split a disassembly string into lines and add them to the array pointed
796 * to by "instructions". */
797 static void si_add_split_disasm(const char *disasm,
798 uint64_t start_addr,
799 unsigned *num,
800 struct si_shader_inst *instructions)
801 {
802 struct si_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
803 char *next;
804
805 while ((next = strchr(disasm, '\n'))) {
806 struct si_shader_inst *inst = &instructions[*num];
807 unsigned len = next - disasm;
808
809 assert(len < ARRAY_SIZE(inst->text));
810 memcpy(inst->text, disasm, len);
811 inst->text[len] = 0;
812 inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
813
814 const char *semicolon = strchr(disasm, ';');
815 assert(semicolon);
816 /* More than 16 chars after ";" means the instruction is 8 bytes long. */
817 inst->size = next - semicolon > 16 ? 8 : 4;
818
819 snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len,
820 " [PC=0x%"PRIx64", off=%u, size=%u]",
821 start_addr + inst->offset, inst->offset, inst->size);
822
823 last_inst = inst;
824 (*num)++;
825 disasm = next + 1;
826 }
827 }
828
829 /* If the shader is being executed, print its asm instructions, and annotate
830 * those that are being executed right now with information about waves that
831 * execute them. This is most useful during a GPU hang.
832 */
833 static void si_print_annotated_shader(struct si_shader *shader,
834 struct ac_wave_info *waves,
835 unsigned num_waves,
836 FILE *f)
837 {
838 if (!shader || !shader->binary.disasm_string)
839 return;
840
841 uint64_t start_addr = shader->bo->gpu_address;
842 uint64_t end_addr = start_addr + shader->bo->b.b.width0;
843 unsigned i;
844
845 /* See if any wave executes the shader. */
846 for (i = 0; i < num_waves; i++) {
847 if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
848 break;
849 }
850 if (i == num_waves)
851 return; /* the shader is not being executed */
852
853 /* Remember the first found wave. The waves are sorted according to PC. */
854 waves = &waves[i];
855 num_waves -= i;
856
857 /* Get the list of instructions.
858 * Buffer size / 4 is the upper bound of the instruction count.
859 */
860 unsigned num_inst = 0;
861 struct si_shader_inst *instructions =
862 calloc(shader->bo->b.b.width0 / 4, sizeof(struct si_shader_inst));
863
864 if (shader->prolog) {
865 si_add_split_disasm(shader->prolog->binary.disasm_string,
866 start_addr, &num_inst, instructions);
867 }
868 if (shader->previous_stage) {
869 si_add_split_disasm(shader->previous_stage->binary.disasm_string,
870 start_addr, &num_inst, instructions);
871 }
872 if (shader->prolog2) {
873 si_add_split_disasm(shader->prolog2->binary.disasm_string,
874 start_addr, &num_inst, instructions);
875 }
876 si_add_split_disasm(shader->binary.disasm_string,
877 start_addr, &num_inst, instructions);
878 if (shader->epilog) {
879 si_add_split_disasm(shader->epilog->binary.disasm_string,
880 start_addr, &num_inst, instructions);
881 }
882
883 fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
884 si_get_shader_name(shader, shader->selector->type));
885
886 /* Print instructions with annotations. */
887 for (i = 0; i < num_inst; i++) {
888 struct si_shader_inst *inst = &instructions[i];
889
890 fprintf(f, "%s\n", inst->text);
891
892 /* Print which waves execute the instruction right now. */
893 while (num_waves && start_addr + inst->offset == waves->pc) {
894 fprintf(f,
895 " " COLOR_GREEN "^ SE%u SH%u CU%u "
896 "SIMD%u WAVE%u EXEC=%016"PRIx64 " ",
897 waves->se, waves->sh, waves->cu, waves->simd,
898 waves->wave, waves->exec);
899
900 if (inst->size == 4) {
901 fprintf(f, "INST32=%08X" COLOR_RESET "\n",
902 waves->inst_dw0);
903 } else {
904 fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n",
905 waves->inst_dw0, waves->inst_dw1);
906 }
907
908 waves->matched = true;
909 waves = &waves[1];
910 num_waves--;
911 }
912 }
913
914 fprintf(f, "\n\n");
915 free(instructions);
916 }
917
918 static void si_dump_annotated_shaders(struct si_context *sctx, FILE *f)
919 {
920 struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];
921 unsigned num_waves = ac_get_wave_info(waves);
922
923 fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET
924 "\n\n", num_waves);
925
926 si_print_annotated_shader(sctx->vs_shader.current, waves, num_waves, f);
927 si_print_annotated_shader(sctx->tcs_shader.current, waves, num_waves, f);
928 si_print_annotated_shader(sctx->tes_shader.current, waves, num_waves, f);
929 si_print_annotated_shader(sctx->gs_shader.current, waves, num_waves, f);
930 si_print_annotated_shader(sctx->ps_shader.current, waves, num_waves, f);
931
932 /* Print waves executing shaders that are not currently bound. */
933 unsigned i;
934 bool found = false;
935 for (i = 0; i < num_waves; i++) {
936 if (waves[i].matched)
937 continue;
938
939 if (!found) {
940 fprintf(f, COLOR_CYAN
941 "Waves not executing currently-bound shaders:"
942 COLOR_RESET "\n");
943 found = true;
944 }
945 fprintf(f, " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016"PRIx64
946 " INST=%08X %08X PC=%"PRIx64"\n",
947 waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd,
948 waves[i].wave, waves[i].exec, waves[i].inst_dw0,
949 waves[i].inst_dw1, waves[i].pc);
950 }
951 if (found)
952 fprintf(f, "\n\n");
953 }
954
955 static void si_dump_command(const char *title, const char *command, FILE *f)
956 {
957 char line[2000];
958
959 FILE *p = popen(command, "r");
960 if (!p)
961 return;
962
963 fprintf(f, COLOR_YELLOW "%s: " COLOR_RESET "\n", title);
964 while (fgets(line, sizeof(line), p))
965 fputs(line, f);
966 fprintf(f, "\n\n");
967 pclose(p);
968 }
969
970 static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
971 unsigned flags)
972 {
973 struct si_context *sctx = (struct si_context*)ctx;
974
975 if (sctx->b.log)
976 u_log_flush(sctx->b.log);
977
978 if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS) {
979 si_dump_debug_registers(sctx, f);
980
981 si_dump_annotated_shaders(sctx, f);
982 si_dump_command("Active waves (raw data)", "umr -wa | column -t", f);
983 si_dump_command("Wave information", "umr -O bits -wa", f);
984 }
985 }
986
987 void si_log_draw_state(struct si_context *sctx, struct u_log_context *log)
988 {
989 if (!log)
990 return;
991
992 si_dump_framebuffer(sctx, log);
993
994 si_dump_gfx_shader(sctx, &sctx->vs_shader, log);
995 si_dump_gfx_shader(sctx, &sctx->tcs_shader, log);
996 si_dump_gfx_shader(sctx, &sctx->tes_shader, log);
997 si_dump_gfx_shader(sctx, &sctx->gs_shader, log);
998 si_dump_gfx_shader(sctx, &sctx->ps_shader, log);
999
1000 si_dump_descriptor_list(sctx->screen,
1001 &sctx->descriptors[SI_DESCS_RW_BUFFERS],
1002 "", "RW buffers", 4, SI_NUM_RW_BUFFERS,
1003 si_identity, log);
1004 si_dump_gfx_descriptors(sctx, &sctx->vs_shader, log);
1005 si_dump_gfx_descriptors(sctx, &sctx->tcs_shader, log);
1006 si_dump_gfx_descriptors(sctx, &sctx->tes_shader, log);
1007 si_dump_gfx_descriptors(sctx, &sctx->gs_shader, log);
1008 si_dump_gfx_descriptors(sctx, &sctx->ps_shader, log);
1009 }
1010
1011 void si_log_compute_state(struct si_context *sctx, struct u_log_context *log)
1012 {
1013 if (!log)
1014 return;
1015
1016 si_dump_compute_shader(sctx, log);
1017 si_dump_compute_descriptors(sctx, log);
1018 }
1019
1020 static void si_dump_dma(struct si_context *sctx,
1021 struct radeon_saved_cs *saved, FILE *f)
1022 {
1023 static const char ib_name[] = "sDMA IB";
1024 unsigned i;
1025
1026 si_dump_bo_list(sctx, saved, f);
1027
1028 fprintf(f, "------------------ %s begin ------------------\n", ib_name);
1029
1030 for (i = 0; i < saved->num_dw; ++i) {
1031 fprintf(f, " %08x\n", saved->ib[i]);
1032 }
1033
1034 fprintf(f, "------------------- %s end -------------------\n", ib_name);
1035 fprintf(f, "\n");
1036
1037 fprintf(f, "SDMA Dump Done.\n");
1038 }
1039
1040 void si_check_vm_faults(struct r600_common_context *ctx,
1041 struct radeon_saved_cs *saved, enum ring_type ring)
1042 {
1043 struct si_context *sctx = (struct si_context *)ctx;
1044 struct pipe_screen *screen = sctx->b.b.screen;
1045 FILE *f;
1046 uint64_t addr;
1047 char cmd_line[4096];
1048
1049 if (!ac_vm_fault_occured(sctx->b.chip_class,
1050 &sctx->dmesg_timestamp, &addr))
1051 return;
1052
1053 f = dd_get_debug_file(false);
1054 if (!f)
1055 return;
1056
1057 fprintf(f, "VM fault report.\n\n");
1058 if (os_get_command_line(cmd_line, sizeof(cmd_line)))
1059 fprintf(f, "Command: %s\n", cmd_line);
1060 fprintf(f, "Driver vendor: %s\n", screen->get_vendor(screen));
1061 fprintf(f, "Device vendor: %s\n", screen->get_device_vendor(screen));
1062 fprintf(f, "Device name: %s\n\n", screen->get_name(screen));
1063 fprintf(f, "Failing VM page: 0x%08"PRIx64"\n\n", addr);
1064
1065 if (sctx->apitrace_call_number)
1066 fprintf(f, "Last apitrace call: %u\n\n",
1067 sctx->apitrace_call_number);
1068
1069 switch (ring) {
1070 case RING_GFX: {
1071 struct u_log_context log;
1072 u_log_context_init(&log);
1073
1074 si_log_draw_state(sctx, &log);
1075 si_log_compute_state(sctx, &log);
1076
1077 u_log_new_page_print(&log, f);
1078 u_log_context_destroy(&log);
1079 break;
1080 }
1081 case RING_DMA:
1082 si_dump_dma(sctx, saved, f);
1083 break;
1084
1085 default:
1086 break;
1087 }
1088
1089 fclose(f);
1090
1091 fprintf(stderr, "Detected a VM fault, exiting...\n");
1092 exit(0);
1093 }
1094
1095 void si_init_debug_functions(struct si_context *sctx)
1096 {
1097 sctx->b.b.dump_debug_state = si_dump_debug_state;
1098 sctx->b.check_vm_faults = si_check_vm_faults;
1099
1100 /* Set the initial dmesg timestamp for this context, so that
1101 * only new messages will be checked for VM faults.
1102 */
1103 if (sctx->screen->b.debug_flags & DBG(CHECK_VM))
1104 ac_vm_fault_occured(sctx->b.chip_class,
1105 &sctx->dmesg_timestamp, NULL);
1106 }