radeonsi: remove r600_pipe_common::check_vm_faults
[mesa.git] / src / gallium / drivers / radeonsi / si_debug.c
1 /*
2 * Copyright 2015 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "si_pipe.h"
25 #include "si_compute.h"
26 #include "sid.h"
27 #include "gfx9d.h"
28 #include "sid_tables.h"
29 #include "ddebug/dd_util.h"
30 #include "util/u_dump.h"
31 #include "util/u_log.h"
32 #include "util/u_memory.h"
33 #include "ac_debug.h"
34
35 static void si_dump_bo_list(struct si_context *sctx,
36 const struct radeon_saved_cs *saved, FILE *f);
37
38 DEBUG_GET_ONCE_OPTION(replace_shaders, "RADEON_REPLACE_SHADERS", NULL)
39
40 static void si_dump_shader(struct si_screen *sscreen,
41 enum pipe_shader_type processor,
42 const struct si_shader *shader, FILE *f)
43 {
44 if (shader->shader_log)
45 fwrite(shader->shader_log, shader->shader_log_size, 1, f);
46 else
47 si_shader_dump(sscreen, shader, NULL, processor, f, false);
48 }
49
50 struct si_log_chunk_shader {
51 /* The shader destroy code assumes a current context for unlinking of
52 * PM4 packets etc.
53 *
54 * While we should be able to destroy shaders without a context, doing
55 * so would happen only very rarely and be therefore likely to fail
56 * just when you're trying to debug something. Let's just remember the
57 * current context in the chunk.
58 */
59 struct si_context *ctx;
60 struct si_shader *shader;
61 enum pipe_shader_type processor;
62
63 /* For keep-alive reference counts */
64 struct si_shader_selector *sel;
65 struct si_compute *program;
66 };
67
68 static void
69 si_log_chunk_shader_destroy(void *data)
70 {
71 struct si_log_chunk_shader *chunk = data;
72 si_shader_selector_reference(chunk->ctx, &chunk->sel, NULL);
73 si_compute_reference(&chunk->program, NULL);
74 FREE(chunk);
75 }
76
77 static void
78 si_log_chunk_shader_print(void *data, FILE *f)
79 {
80 struct si_log_chunk_shader *chunk = data;
81 struct si_screen *sscreen = chunk->ctx->screen;
82 si_dump_shader(sscreen, chunk->processor,
83 chunk->shader, f);
84 }
85
86 static struct u_log_chunk_type si_log_chunk_type_shader = {
87 .destroy = si_log_chunk_shader_destroy,
88 .print = si_log_chunk_shader_print,
89 };
90
91 static void si_dump_gfx_shader(struct si_context *ctx,
92 const struct si_shader_ctx_state *state,
93 struct u_log_context *log)
94 {
95 struct si_shader *current = state->current;
96
97 if (!state->cso || !current)
98 return;
99
100 struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader);
101 chunk->ctx = ctx;
102 chunk->processor = state->cso->info.processor;
103 chunk->shader = current;
104 si_shader_selector_reference(ctx, &chunk->sel, current->selector);
105 u_log_chunk(log, &si_log_chunk_type_shader, chunk);
106 }
107
108 static void si_dump_compute_shader(struct si_context *ctx,
109 struct u_log_context *log)
110 {
111 const struct si_cs_shader_state *state = &ctx->cs_shader_state;
112
113 if (!state->program)
114 return;
115
116 struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader);
117 chunk->ctx = ctx;
118 chunk->processor = PIPE_SHADER_COMPUTE;
119 chunk->shader = &state->program->shader;
120 si_compute_reference(&chunk->program, state->program);
121 u_log_chunk(log, &si_log_chunk_type_shader, chunk);
122 }
123
124 /**
125 * Shader compiles can be overridden with arbitrary ELF objects by setting
126 * the environment variable RADEON_REPLACE_SHADERS=num1:filename1[;num2:filename2]
127 */
128 bool si_replace_shader(unsigned num, struct ac_shader_binary *binary)
129 {
130 const char *p = debug_get_option_replace_shaders();
131 const char *semicolon;
132 char *copy = NULL;
133 FILE *f;
134 long filesize, nread;
135 char *buf = NULL;
136 bool replaced = false;
137
138 if (!p)
139 return false;
140
141 while (*p) {
142 unsigned long i;
143 char *endp;
144 i = strtoul(p, &endp, 0);
145
146 p = endp;
147 if (*p != ':') {
148 fprintf(stderr, "RADEON_REPLACE_SHADERS formatted badly.\n");
149 exit(1);
150 }
151 ++p;
152
153 if (i == num)
154 break;
155
156 p = strchr(p, ';');
157 if (!p)
158 return false;
159 ++p;
160 }
161 if (!*p)
162 return false;
163
164 semicolon = strchr(p, ';');
165 if (semicolon) {
166 p = copy = strndup(p, semicolon - p);
167 if (!copy) {
168 fprintf(stderr, "out of memory\n");
169 return false;
170 }
171 }
172
173 fprintf(stderr, "radeonsi: replace shader %u by %s\n", num, p);
174
175 f = fopen(p, "r");
176 if (!f) {
177 perror("radeonsi: failed to open file");
178 goto out_free;
179 }
180
181 if (fseek(f, 0, SEEK_END) != 0)
182 goto file_error;
183
184 filesize = ftell(f);
185 if (filesize < 0)
186 goto file_error;
187
188 if (fseek(f, 0, SEEK_SET) != 0)
189 goto file_error;
190
191 buf = MALLOC(filesize);
192 if (!buf) {
193 fprintf(stderr, "out of memory\n");
194 goto out_close;
195 }
196
197 nread = fread(buf, 1, filesize, f);
198 if (nread != filesize)
199 goto file_error;
200
201 ac_elf_read(buf, filesize, binary);
202 replaced = true;
203
204 out_close:
205 fclose(f);
206 out_free:
207 FREE(buf);
208 free(copy);
209 return replaced;
210
211 file_error:
212 perror("radeonsi: reading shader");
213 goto out_close;
214 }
215
216 /* Parsed IBs are difficult to read without colors. Use "less -R file" to
217 * read them, or use "aha -b -f file" to convert them to html.
218 */
219 #define COLOR_RESET "\033[0m"
220 #define COLOR_RED "\033[31m"
221 #define COLOR_GREEN "\033[1;32m"
222 #define COLOR_YELLOW "\033[1;33m"
223 #define COLOR_CYAN "\033[1;36m"
224
225 static void si_dump_mmapped_reg(struct si_context *sctx, FILE *f,
226 unsigned offset)
227 {
228 struct radeon_winsys *ws = sctx->b.ws;
229 uint32_t value;
230
231 if (ws->read_registers(ws, offset, 1, &value))
232 ac_dump_reg(f, sctx->b.chip_class, offset, value, ~0);
233 }
234
235 static void si_dump_debug_registers(struct si_context *sctx, FILE *f)
236 {
237 if (sctx->screen->info.drm_major == 2 &&
238 sctx->screen->info.drm_minor < 42)
239 return; /* no radeon support */
240
241 fprintf(f, "Memory-mapped registers:\n");
242 si_dump_mmapped_reg(sctx, f, R_008010_GRBM_STATUS);
243
244 /* No other registers can be read on DRM < 3.1.0. */
245 if (sctx->screen->info.drm_major < 3 ||
246 sctx->screen->info.drm_minor < 1) {
247 fprintf(f, "\n");
248 return;
249 }
250
251 si_dump_mmapped_reg(sctx, f, R_008008_GRBM_STATUS2);
252 si_dump_mmapped_reg(sctx, f, R_008014_GRBM_STATUS_SE0);
253 si_dump_mmapped_reg(sctx, f, R_008018_GRBM_STATUS_SE1);
254 si_dump_mmapped_reg(sctx, f, R_008038_GRBM_STATUS_SE2);
255 si_dump_mmapped_reg(sctx, f, R_00803C_GRBM_STATUS_SE3);
256 si_dump_mmapped_reg(sctx, f, R_00D034_SDMA0_STATUS_REG);
257 si_dump_mmapped_reg(sctx, f, R_00D834_SDMA1_STATUS_REG);
258 if (sctx->b.chip_class <= VI) {
259 si_dump_mmapped_reg(sctx, f, R_000E50_SRBM_STATUS);
260 si_dump_mmapped_reg(sctx, f, R_000E4C_SRBM_STATUS2);
261 si_dump_mmapped_reg(sctx, f, R_000E54_SRBM_STATUS3);
262 }
263 si_dump_mmapped_reg(sctx, f, R_008680_CP_STAT);
264 si_dump_mmapped_reg(sctx, f, R_008674_CP_STALLED_STAT1);
265 si_dump_mmapped_reg(sctx, f, R_008678_CP_STALLED_STAT2);
266 si_dump_mmapped_reg(sctx, f, R_008670_CP_STALLED_STAT3);
267 si_dump_mmapped_reg(sctx, f, R_008210_CP_CPC_STATUS);
268 si_dump_mmapped_reg(sctx, f, R_008214_CP_CPC_BUSY_STAT);
269 si_dump_mmapped_reg(sctx, f, R_008218_CP_CPC_STALLED_STAT1);
270 si_dump_mmapped_reg(sctx, f, R_00821C_CP_CPF_STATUS);
271 si_dump_mmapped_reg(sctx, f, R_008220_CP_CPF_BUSY_STAT);
272 si_dump_mmapped_reg(sctx, f, R_008224_CP_CPF_STALLED_STAT1);
273 fprintf(f, "\n");
274 }
275
276 struct si_log_chunk_cs {
277 struct si_context *ctx;
278 struct si_saved_cs *cs;
279 bool dump_bo_list;
280 unsigned gfx_begin, gfx_end;
281 };
282
283 static void si_log_chunk_type_cs_destroy(void *data)
284 {
285 struct si_log_chunk_cs *chunk = data;
286 si_saved_cs_reference(&chunk->cs, NULL);
287 free(chunk);
288 }
289
290 static void si_parse_current_ib(FILE *f, struct radeon_winsys_cs *cs,
291 unsigned begin, unsigned end,
292 int *last_trace_id, unsigned trace_id_count,
293 const char *name, enum chip_class chip_class)
294 {
295 unsigned orig_end = end;
296
297 assert(begin <= end);
298
299 fprintf(f, "------------------ %s begin (dw = %u) ------------------\n",
300 name, begin);
301
302 for (unsigned prev_idx = 0; prev_idx < cs->num_prev; ++prev_idx) {
303 struct radeon_winsys_cs_chunk *chunk = &cs->prev[prev_idx];
304
305 if (begin < chunk->cdw) {
306 ac_parse_ib_chunk(f, chunk->buf + begin,
307 MIN2(end, chunk->cdw) - begin,
308 last_trace_id, trace_id_count,
309 chip_class, NULL, NULL);
310 }
311
312 if (end <= chunk->cdw)
313 return;
314
315 if (begin < chunk->cdw)
316 fprintf(f, "\n---------- Next %s Chunk ----------\n\n",
317 name);
318
319 begin -= MIN2(begin, chunk->cdw);
320 end -= chunk->cdw;
321 }
322
323 assert(end <= cs->current.cdw);
324
325 ac_parse_ib_chunk(f, cs->current.buf + begin, end - begin, last_trace_id,
326 trace_id_count, chip_class, NULL, NULL);
327
328 fprintf(f, "------------------- %s end (dw = %u) -------------------\n\n",
329 name, orig_end);
330 }
331
332 static void si_log_chunk_type_cs_print(void *data, FILE *f)
333 {
334 struct si_log_chunk_cs *chunk = data;
335 struct si_context *ctx = chunk->ctx;
336 struct si_saved_cs *scs = chunk->cs;
337 int last_trace_id = -1;
338
339 /* We are expecting that the ddebug pipe has already
340 * waited for the context, so this buffer should be idle.
341 * If the GPU is hung, there is no point in waiting for it.
342 */
343 uint32_t *map = ctx->b.ws->buffer_map(scs->trace_buf->buf,
344 NULL,
345 PIPE_TRANSFER_UNSYNCHRONIZED |
346 PIPE_TRANSFER_READ);
347 if (map)
348 last_trace_id = map[0];
349
350 if (chunk->gfx_end != chunk->gfx_begin) {
351 if (chunk->gfx_begin == 0) {
352 if (ctx->init_config)
353 ac_parse_ib(f, ctx->init_config->pm4, ctx->init_config->ndw,
354 NULL, 0, "IB2: Init config", ctx->b.chip_class,
355 NULL, NULL);
356
357 if (ctx->init_config_gs_rings)
358 ac_parse_ib(f, ctx->init_config_gs_rings->pm4,
359 ctx->init_config_gs_rings->ndw,
360 NULL, 0, "IB2: Init GS rings", ctx->b.chip_class,
361 NULL, NULL);
362 }
363
364 if (scs->flushed) {
365 ac_parse_ib(f, scs->gfx.ib + chunk->gfx_begin,
366 chunk->gfx_end - chunk->gfx_begin,
367 &last_trace_id, map ? 1 : 0, "IB", ctx->b.chip_class,
368 NULL, NULL);
369 } else {
370 si_parse_current_ib(f, ctx->b.gfx.cs, chunk->gfx_begin,
371 chunk->gfx_end, &last_trace_id, map ? 1 : 0,
372 "IB", ctx->b.chip_class);
373 }
374 }
375
376 if (chunk->dump_bo_list) {
377 fprintf(f, "Flushing. Time: ");
378 util_dump_ns(f, scs->time_flush);
379 fprintf(f, "\n\n");
380 si_dump_bo_list(ctx, &scs->gfx, f);
381 }
382 }
383
384 static const struct u_log_chunk_type si_log_chunk_type_cs = {
385 .destroy = si_log_chunk_type_cs_destroy,
386 .print = si_log_chunk_type_cs_print,
387 };
388
389 static void si_log_cs(struct si_context *ctx, struct u_log_context *log,
390 bool dump_bo_list)
391 {
392 assert(ctx->current_saved_cs);
393
394 struct si_saved_cs *scs = ctx->current_saved_cs;
395 unsigned gfx_cur = ctx->b.gfx.cs->prev_dw + ctx->b.gfx.cs->current.cdw;
396
397 if (!dump_bo_list &&
398 gfx_cur == scs->gfx_last_dw)
399 return;
400
401 struct si_log_chunk_cs *chunk = calloc(1, sizeof(*chunk));
402
403 chunk->ctx = ctx;
404 si_saved_cs_reference(&chunk->cs, scs);
405 chunk->dump_bo_list = dump_bo_list;
406
407 chunk->gfx_begin = scs->gfx_last_dw;
408 chunk->gfx_end = gfx_cur;
409 scs->gfx_last_dw = gfx_cur;
410
411 u_log_chunk(log, &si_log_chunk_type_cs, chunk);
412 }
413
414 void si_auto_log_cs(void *data, struct u_log_context *log)
415 {
416 struct si_context *ctx = (struct si_context *)data;
417 si_log_cs(ctx, log, false);
418 }
419
420 void si_log_hw_flush(struct si_context *sctx)
421 {
422 if (!sctx->b.log)
423 return;
424
425 si_log_cs(sctx, sctx->b.log, true);
426 }
427
428 static const char *priority_to_string(enum radeon_bo_priority priority)
429 {
430 #define ITEM(x) [RADEON_PRIO_##x] = #x
431 static const char *table[64] = {
432 ITEM(FENCE),
433 ITEM(TRACE),
434 ITEM(SO_FILLED_SIZE),
435 ITEM(QUERY),
436 ITEM(IB1),
437 ITEM(IB2),
438 ITEM(DRAW_INDIRECT),
439 ITEM(INDEX_BUFFER),
440 ITEM(VCE),
441 ITEM(UVD),
442 ITEM(SDMA_BUFFER),
443 ITEM(SDMA_TEXTURE),
444 ITEM(CP_DMA),
445 ITEM(CONST_BUFFER),
446 ITEM(DESCRIPTORS),
447 ITEM(BORDER_COLORS),
448 ITEM(SAMPLER_BUFFER),
449 ITEM(VERTEX_BUFFER),
450 ITEM(SHADER_RW_BUFFER),
451 ITEM(COMPUTE_GLOBAL),
452 ITEM(SAMPLER_TEXTURE),
453 ITEM(SHADER_RW_IMAGE),
454 ITEM(SAMPLER_TEXTURE_MSAA),
455 ITEM(COLOR_BUFFER),
456 ITEM(DEPTH_BUFFER),
457 ITEM(COLOR_BUFFER_MSAA),
458 ITEM(DEPTH_BUFFER_MSAA),
459 ITEM(CMASK),
460 ITEM(DCC),
461 ITEM(HTILE),
462 ITEM(SHADER_BINARY),
463 ITEM(SHADER_RINGS),
464 ITEM(SCRATCH_BUFFER),
465 };
466 #undef ITEM
467
468 assert(priority < ARRAY_SIZE(table));
469 return table[priority];
470 }
471
472 static int bo_list_compare_va(const struct radeon_bo_list_item *a,
473 const struct radeon_bo_list_item *b)
474 {
475 return a->vm_address < b->vm_address ? -1 :
476 a->vm_address > b->vm_address ? 1 : 0;
477 }
478
479 static void si_dump_bo_list(struct si_context *sctx,
480 const struct radeon_saved_cs *saved, FILE *f)
481 {
482 unsigned i,j;
483
484 if (!saved->bo_list)
485 return;
486
487 /* Sort the list according to VM adddresses first. */
488 qsort(saved->bo_list, saved->bo_count,
489 sizeof(saved->bo_list[0]), (void*)bo_list_compare_va);
490
491 fprintf(f, "Buffer list (in units of pages = 4kB):\n"
492 COLOR_YELLOW " Size VM start page "
493 "VM end page Usage" COLOR_RESET "\n");
494
495 for (i = 0; i < saved->bo_count; i++) {
496 /* Note: Buffer sizes are expected to be aligned to 4k by the winsys. */
497 const unsigned page_size = sctx->b.screen->info.gart_page_size;
498 uint64_t va = saved->bo_list[i].vm_address;
499 uint64_t size = saved->bo_list[i].bo_size;
500 bool hit = false;
501
502 /* If there's unused virtual memory between 2 buffers, print it. */
503 if (i) {
504 uint64_t previous_va_end = saved->bo_list[i-1].vm_address +
505 saved->bo_list[i-1].bo_size;
506
507 if (va > previous_va_end) {
508 fprintf(f, " %10"PRIu64" -- hole --\n",
509 (va - previous_va_end) / page_size);
510 }
511 }
512
513 /* Print the buffer. */
514 fprintf(f, " %10"PRIu64" 0x%013"PRIX64" 0x%013"PRIX64" ",
515 size / page_size, va / page_size, (va + size) / page_size);
516
517 /* Print the usage. */
518 for (j = 0; j < 64; j++) {
519 if (!(saved->bo_list[i].priority_usage & (1ull << j)))
520 continue;
521
522 fprintf(f, "%s%s", !hit ? "" : ", ", priority_to_string(j));
523 hit = true;
524 }
525 fprintf(f, "\n");
526 }
527 fprintf(f, "\nNote: The holes represent memory not used by the IB.\n"
528 " Other buffers can still be allocated there.\n\n");
529 }
530
531 static void si_dump_framebuffer(struct si_context *sctx, struct u_log_context *log)
532 {
533 struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
534 struct r600_texture *rtex;
535 int i;
536
537 for (i = 0; i < state->nr_cbufs; i++) {
538 if (!state->cbufs[i])
539 continue;
540
541 rtex = (struct r600_texture*)state->cbufs[i]->texture;
542 u_log_printf(log, COLOR_YELLOW "Color buffer %i:" COLOR_RESET "\n", i);
543 si_print_texture_info(sctx->b.screen, rtex, log);
544 u_log_printf(log, "\n");
545 }
546
547 if (state->zsbuf) {
548 rtex = (struct r600_texture*)state->zsbuf->texture;
549 u_log_printf(log, COLOR_YELLOW "Depth-stencil buffer:" COLOR_RESET "\n");
550 si_print_texture_info(sctx->b.screen, rtex, log);
551 u_log_printf(log, "\n");
552 }
553 }
554
555 typedef unsigned (*slot_remap_func)(unsigned);
556
557 struct si_log_chunk_desc_list {
558 /** Pointer to memory map of buffer where the list is uploader */
559 uint32_t *gpu_list;
560 /** Reference of buffer where the list is uploaded, so that gpu_list
561 * is kept live. */
562 struct r600_resource *buf;
563
564 const char *shader_name;
565 const char *elem_name;
566 slot_remap_func slot_remap;
567 enum chip_class chip_class;
568 unsigned element_dw_size;
569 unsigned num_elements;
570
571 uint32_t list[0];
572 };
573
574 static void
575 si_log_chunk_desc_list_destroy(void *data)
576 {
577 struct si_log_chunk_desc_list *chunk = data;
578 r600_resource_reference(&chunk->buf, NULL);
579 FREE(chunk);
580 }
581
582 static void
583 si_log_chunk_desc_list_print(void *data, FILE *f)
584 {
585 struct si_log_chunk_desc_list *chunk = data;
586
587 for (unsigned i = 0; i < chunk->num_elements; i++) {
588 unsigned cpu_dw_offset = i * chunk->element_dw_size;
589 unsigned gpu_dw_offset = chunk->slot_remap(i) * chunk->element_dw_size;
590 const char *list_note = chunk->gpu_list ? "GPU list" : "CPU list";
591 uint32_t *cpu_list = chunk->list + cpu_dw_offset;
592 uint32_t *gpu_list = chunk->gpu_list ? chunk->gpu_list + gpu_dw_offset : cpu_list;
593
594 fprintf(f, COLOR_GREEN "%s%s slot %u (%s):" COLOR_RESET "\n",
595 chunk->shader_name, chunk->elem_name, i, list_note);
596
597 switch (chunk->element_dw_size) {
598 case 4:
599 for (unsigned j = 0; j < 4; j++)
600 ac_dump_reg(f, chunk->chip_class,
601 R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
602 gpu_list[j], 0xffffffff);
603 break;
604 case 8:
605 for (unsigned j = 0; j < 8; j++)
606 ac_dump_reg(f, chunk->chip_class,
607 R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
608 gpu_list[j], 0xffffffff);
609
610 fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
611 for (unsigned j = 0; j < 4; j++)
612 ac_dump_reg(f, chunk->chip_class,
613 R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
614 gpu_list[4+j], 0xffffffff);
615 break;
616 case 16:
617 for (unsigned j = 0; j < 8; j++)
618 ac_dump_reg(f, chunk->chip_class,
619 R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
620 gpu_list[j], 0xffffffff);
621
622 fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
623 for (unsigned j = 0; j < 4; j++)
624 ac_dump_reg(f, chunk->chip_class,
625 R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
626 gpu_list[4+j], 0xffffffff);
627
628 fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n");
629 for (unsigned j = 0; j < 8; j++)
630 ac_dump_reg(f, chunk->chip_class,
631 R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
632 gpu_list[8+j], 0xffffffff);
633
634 fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n");
635 for (unsigned j = 0; j < 4; j++)
636 ac_dump_reg(f, chunk->chip_class,
637 R_008F30_SQ_IMG_SAMP_WORD0 + j*4,
638 gpu_list[12+j], 0xffffffff);
639 break;
640 }
641
642 if (memcmp(gpu_list, cpu_list, chunk->element_dw_size * 4) != 0) {
643 fprintf(f, COLOR_RED "!!!!! This slot was corrupted in GPU memory !!!!!"
644 COLOR_RESET "\n");
645 }
646
647 fprintf(f, "\n");
648 }
649
650 }
651
652 static const struct u_log_chunk_type si_log_chunk_type_descriptor_list = {
653 .destroy = si_log_chunk_desc_list_destroy,
654 .print = si_log_chunk_desc_list_print,
655 };
656
657 static void si_dump_descriptor_list(struct si_screen *screen,
658 struct si_descriptors *desc,
659 const char *shader_name,
660 const char *elem_name,
661 unsigned element_dw_size,
662 unsigned num_elements,
663 slot_remap_func slot_remap,
664 struct u_log_context *log)
665 {
666 if (!desc->list)
667 return;
668
669 /* In some cases, the caller doesn't know how many elements are really
670 * uploaded. Reduce num_elements to fit in the range of active slots. */
671 unsigned active_range_dw_begin =
672 desc->first_active_slot * desc->element_dw_size;
673 unsigned active_range_dw_end =
674 active_range_dw_begin + desc->num_active_slots * desc->element_dw_size;
675
676 while (num_elements > 0) {
677 int i = slot_remap(num_elements - 1);
678 unsigned dw_begin = i * element_dw_size;
679 unsigned dw_end = dw_begin + element_dw_size;
680
681 if (dw_begin >= active_range_dw_begin && dw_end <= active_range_dw_end)
682 break;
683
684 num_elements--;
685 }
686
687 struct si_log_chunk_desc_list *chunk =
688 CALLOC_VARIANT_LENGTH_STRUCT(si_log_chunk_desc_list,
689 4 * element_dw_size * num_elements);
690 chunk->shader_name = shader_name;
691 chunk->elem_name = elem_name;
692 chunk->element_dw_size = element_dw_size;
693 chunk->num_elements = num_elements;
694 chunk->slot_remap = slot_remap;
695 chunk->chip_class = screen->info.chip_class;
696
697 r600_resource_reference(&chunk->buf, desc->buffer);
698 chunk->gpu_list = desc->gpu_list;
699
700 for (unsigned i = 0; i < num_elements; ++i) {
701 memcpy(&chunk->list[i * element_dw_size],
702 &desc->list[slot_remap(i) * element_dw_size],
703 4 * element_dw_size);
704 }
705
706 u_log_chunk(log, &si_log_chunk_type_descriptor_list, chunk);
707 }
708
709 static unsigned si_identity(unsigned slot)
710 {
711 return slot;
712 }
713
714 static void si_dump_descriptors(struct si_context *sctx,
715 enum pipe_shader_type processor,
716 const struct tgsi_shader_info *info,
717 struct u_log_context *log)
718 {
719 struct si_descriptors *descs =
720 &sctx->descriptors[SI_DESCS_FIRST_SHADER +
721 processor * SI_NUM_SHADER_DESCS];
722 static const char *shader_name[] = {"VS", "PS", "GS", "TCS", "TES", "CS"};
723 const char *name = shader_name[processor];
724 unsigned enabled_constbuf, enabled_shaderbuf, enabled_samplers;
725 unsigned enabled_images;
726
727 if (info) {
728 enabled_constbuf = info->const_buffers_declared;
729 enabled_shaderbuf = info->shader_buffers_declared;
730 enabled_samplers = info->samplers_declared;
731 enabled_images = info->images_declared;
732 } else {
733 enabled_constbuf = sctx->const_and_shader_buffers[processor].enabled_mask >>
734 SI_NUM_SHADER_BUFFERS;
735 enabled_shaderbuf = sctx->const_and_shader_buffers[processor].enabled_mask &
736 u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS);
737 enabled_shaderbuf = util_bitreverse(enabled_shaderbuf) >>
738 (32 - SI_NUM_SHADER_BUFFERS);
739 enabled_samplers = sctx->samplers[processor].enabled_mask;
740 enabled_images = sctx->images[processor].enabled_mask;
741 }
742
743 if (processor == PIPE_SHADER_VERTEX &&
744 sctx->vb_descriptors_buffer &&
745 sctx->vb_descriptors_gpu_list &&
746 sctx->vertex_elements) {
747 assert(info); /* only CS may not have an info struct */
748 struct si_descriptors desc = {};
749
750 desc.buffer = sctx->vb_descriptors_buffer;
751 desc.list = sctx->vb_descriptors_gpu_list;
752 desc.gpu_list = sctx->vb_descriptors_gpu_list;
753 desc.element_dw_size = 4;
754 desc.num_active_slots = sctx->vertex_elements->desc_list_byte_size / 16;
755
756 si_dump_descriptor_list(sctx->screen, &desc, name,
757 " - Vertex buffer", 4, info->num_inputs,
758 si_identity, log);
759 }
760
761 si_dump_descriptor_list(sctx->screen,
762 &descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS],
763 name, " - Constant buffer", 4,
764 util_last_bit(enabled_constbuf),
765 si_get_constbuf_slot, log);
766 si_dump_descriptor_list(sctx->screen,
767 &descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS],
768 name, " - Shader buffer", 4,
769 util_last_bit(enabled_shaderbuf),
770 si_get_shaderbuf_slot, log);
771 si_dump_descriptor_list(sctx->screen,
772 &descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES],
773 name, " - Sampler", 16,
774 util_last_bit(enabled_samplers),
775 si_get_sampler_slot, log);
776 si_dump_descriptor_list(sctx->screen,
777 &descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES],
778 name, " - Image", 8,
779 util_last_bit(enabled_images),
780 si_get_image_slot, log);
781 }
782
783 static void si_dump_gfx_descriptors(struct si_context *sctx,
784 const struct si_shader_ctx_state *state,
785 struct u_log_context *log)
786 {
787 if (!state->cso || !state->current)
788 return;
789
790 si_dump_descriptors(sctx, state->cso->type, &state->cso->info, log);
791 }
792
793 static void si_dump_compute_descriptors(struct si_context *sctx,
794 struct u_log_context *log)
795 {
796 if (!sctx->cs_shader_state.program)
797 return;
798
799 si_dump_descriptors(sctx, PIPE_SHADER_COMPUTE, NULL, log);
800 }
801
802 struct si_shader_inst {
803 char text[160]; /* one disasm line */
804 unsigned offset; /* instruction offset */
805 unsigned size; /* instruction size = 4 or 8 */
806 };
807
808 /* Split a disassembly string into lines and add them to the array pointed
809 * to by "instructions". */
810 static void si_add_split_disasm(const char *disasm,
811 uint64_t start_addr,
812 unsigned *num,
813 struct si_shader_inst *instructions)
814 {
815 struct si_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
816 char *next;
817
818 while ((next = strchr(disasm, '\n'))) {
819 struct si_shader_inst *inst = &instructions[*num];
820 unsigned len = next - disasm;
821
822 assert(len < ARRAY_SIZE(inst->text));
823 memcpy(inst->text, disasm, len);
824 inst->text[len] = 0;
825 inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
826
827 const char *semicolon = strchr(disasm, ';');
828 assert(semicolon);
829 /* More than 16 chars after ";" means the instruction is 8 bytes long. */
830 inst->size = next - semicolon > 16 ? 8 : 4;
831
832 snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len,
833 " [PC=0x%"PRIx64", off=%u, size=%u]",
834 start_addr + inst->offset, inst->offset, inst->size);
835
836 last_inst = inst;
837 (*num)++;
838 disasm = next + 1;
839 }
840 }
841
842 /* If the shader is being executed, print its asm instructions, and annotate
843 * those that are being executed right now with information about waves that
844 * execute them. This is most useful during a GPU hang.
845 */
846 static void si_print_annotated_shader(struct si_shader *shader,
847 struct ac_wave_info *waves,
848 unsigned num_waves,
849 FILE *f)
850 {
851 if (!shader || !shader->binary.disasm_string)
852 return;
853
854 uint64_t start_addr = shader->bo->gpu_address;
855 uint64_t end_addr = start_addr + shader->bo->b.b.width0;
856 unsigned i;
857
858 /* See if any wave executes the shader. */
859 for (i = 0; i < num_waves; i++) {
860 if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
861 break;
862 }
863 if (i == num_waves)
864 return; /* the shader is not being executed */
865
866 /* Remember the first found wave. The waves are sorted according to PC. */
867 waves = &waves[i];
868 num_waves -= i;
869
870 /* Get the list of instructions.
871 * Buffer size / 4 is the upper bound of the instruction count.
872 */
873 unsigned num_inst = 0;
874 struct si_shader_inst *instructions =
875 calloc(shader->bo->b.b.width0 / 4, sizeof(struct si_shader_inst));
876
877 if (shader->prolog) {
878 si_add_split_disasm(shader->prolog->binary.disasm_string,
879 start_addr, &num_inst, instructions);
880 }
881 if (shader->previous_stage) {
882 si_add_split_disasm(shader->previous_stage->binary.disasm_string,
883 start_addr, &num_inst, instructions);
884 }
885 if (shader->prolog2) {
886 si_add_split_disasm(shader->prolog2->binary.disasm_string,
887 start_addr, &num_inst, instructions);
888 }
889 si_add_split_disasm(shader->binary.disasm_string,
890 start_addr, &num_inst, instructions);
891 if (shader->epilog) {
892 si_add_split_disasm(shader->epilog->binary.disasm_string,
893 start_addr, &num_inst, instructions);
894 }
895
896 fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
897 si_get_shader_name(shader, shader->selector->type));
898
899 /* Print instructions with annotations. */
900 for (i = 0; i < num_inst; i++) {
901 struct si_shader_inst *inst = &instructions[i];
902
903 fprintf(f, "%s\n", inst->text);
904
905 /* Print which waves execute the instruction right now. */
906 while (num_waves && start_addr + inst->offset == waves->pc) {
907 fprintf(f,
908 " " COLOR_GREEN "^ SE%u SH%u CU%u "
909 "SIMD%u WAVE%u EXEC=%016"PRIx64 " ",
910 waves->se, waves->sh, waves->cu, waves->simd,
911 waves->wave, waves->exec);
912
913 if (inst->size == 4) {
914 fprintf(f, "INST32=%08X" COLOR_RESET "\n",
915 waves->inst_dw0);
916 } else {
917 fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n",
918 waves->inst_dw0, waves->inst_dw1);
919 }
920
921 waves->matched = true;
922 waves = &waves[1];
923 num_waves--;
924 }
925 }
926
927 fprintf(f, "\n\n");
928 free(instructions);
929 }
930
931 static void si_dump_annotated_shaders(struct si_context *sctx, FILE *f)
932 {
933 struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];
934 unsigned num_waves = ac_get_wave_info(waves);
935
936 fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET
937 "\n\n", num_waves);
938
939 si_print_annotated_shader(sctx->vs_shader.current, waves, num_waves, f);
940 si_print_annotated_shader(sctx->tcs_shader.current, waves, num_waves, f);
941 si_print_annotated_shader(sctx->tes_shader.current, waves, num_waves, f);
942 si_print_annotated_shader(sctx->gs_shader.current, waves, num_waves, f);
943 si_print_annotated_shader(sctx->ps_shader.current, waves, num_waves, f);
944
945 /* Print waves executing shaders that are not currently bound. */
946 unsigned i;
947 bool found = false;
948 for (i = 0; i < num_waves; i++) {
949 if (waves[i].matched)
950 continue;
951
952 if (!found) {
953 fprintf(f, COLOR_CYAN
954 "Waves not executing currently-bound shaders:"
955 COLOR_RESET "\n");
956 found = true;
957 }
958 fprintf(f, " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016"PRIx64
959 " INST=%08X %08X PC=%"PRIx64"\n",
960 waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd,
961 waves[i].wave, waves[i].exec, waves[i].inst_dw0,
962 waves[i].inst_dw1, waves[i].pc);
963 }
964 if (found)
965 fprintf(f, "\n\n");
966 }
967
968 static void si_dump_command(const char *title, const char *command, FILE *f)
969 {
970 char line[2000];
971
972 FILE *p = popen(command, "r");
973 if (!p)
974 return;
975
976 fprintf(f, COLOR_YELLOW "%s: " COLOR_RESET "\n", title);
977 while (fgets(line, sizeof(line), p))
978 fputs(line, f);
979 fprintf(f, "\n\n");
980 pclose(p);
981 }
982
983 static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
984 unsigned flags)
985 {
986 struct si_context *sctx = (struct si_context*)ctx;
987
988 if (sctx->b.log)
989 u_log_flush(sctx->b.log);
990
991 if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS) {
992 si_dump_debug_registers(sctx, f);
993
994 si_dump_annotated_shaders(sctx, f);
995 si_dump_command("Active waves (raw data)", "umr -wa | column -t", f);
996 si_dump_command("Wave information", "umr -O bits -wa", f);
997 }
998 }
999
1000 void si_log_draw_state(struct si_context *sctx, struct u_log_context *log)
1001 {
1002 if (!log)
1003 return;
1004
1005 si_dump_framebuffer(sctx, log);
1006
1007 si_dump_gfx_shader(sctx, &sctx->vs_shader, log);
1008 si_dump_gfx_shader(sctx, &sctx->tcs_shader, log);
1009 si_dump_gfx_shader(sctx, &sctx->tes_shader, log);
1010 si_dump_gfx_shader(sctx, &sctx->gs_shader, log);
1011 si_dump_gfx_shader(sctx, &sctx->ps_shader, log);
1012
1013 si_dump_descriptor_list(sctx->screen,
1014 &sctx->descriptors[SI_DESCS_RW_BUFFERS],
1015 "", "RW buffers", 4, SI_NUM_RW_BUFFERS,
1016 si_identity, log);
1017 si_dump_gfx_descriptors(sctx, &sctx->vs_shader, log);
1018 si_dump_gfx_descriptors(sctx, &sctx->tcs_shader, log);
1019 si_dump_gfx_descriptors(sctx, &sctx->tes_shader, log);
1020 si_dump_gfx_descriptors(sctx, &sctx->gs_shader, log);
1021 si_dump_gfx_descriptors(sctx, &sctx->ps_shader, log);
1022 }
1023
1024 void si_log_compute_state(struct si_context *sctx, struct u_log_context *log)
1025 {
1026 if (!log)
1027 return;
1028
1029 si_dump_compute_shader(sctx, log);
1030 si_dump_compute_descriptors(sctx, log);
1031 }
1032
1033 static void si_dump_dma(struct si_context *sctx,
1034 struct radeon_saved_cs *saved, FILE *f)
1035 {
1036 static const char ib_name[] = "sDMA IB";
1037 unsigned i;
1038
1039 si_dump_bo_list(sctx, saved, f);
1040
1041 fprintf(f, "------------------ %s begin ------------------\n", ib_name);
1042
1043 for (i = 0; i < saved->num_dw; ++i) {
1044 fprintf(f, " %08x\n", saved->ib[i]);
1045 }
1046
1047 fprintf(f, "------------------- %s end -------------------\n", ib_name);
1048 fprintf(f, "\n");
1049
1050 fprintf(f, "SDMA Dump Done.\n");
1051 }
1052
1053 void si_check_vm_faults(struct r600_common_context *ctx,
1054 struct radeon_saved_cs *saved, enum ring_type ring)
1055 {
1056 struct si_context *sctx = (struct si_context *)ctx;
1057 struct pipe_screen *screen = sctx->b.b.screen;
1058 FILE *f;
1059 uint64_t addr;
1060 char cmd_line[4096];
1061
1062 if (!ac_vm_fault_occured(sctx->b.chip_class,
1063 &sctx->dmesg_timestamp, &addr))
1064 return;
1065
1066 f = dd_get_debug_file(false);
1067 if (!f)
1068 return;
1069
1070 fprintf(f, "VM fault report.\n\n");
1071 if (os_get_command_line(cmd_line, sizeof(cmd_line)))
1072 fprintf(f, "Command: %s\n", cmd_line);
1073 fprintf(f, "Driver vendor: %s\n", screen->get_vendor(screen));
1074 fprintf(f, "Device vendor: %s\n", screen->get_device_vendor(screen));
1075 fprintf(f, "Device name: %s\n\n", screen->get_name(screen));
1076 fprintf(f, "Failing VM page: 0x%08"PRIx64"\n\n", addr);
1077
1078 if (sctx->apitrace_call_number)
1079 fprintf(f, "Last apitrace call: %u\n\n",
1080 sctx->apitrace_call_number);
1081
1082 switch (ring) {
1083 case RING_GFX: {
1084 struct u_log_context log;
1085 u_log_context_init(&log);
1086
1087 si_log_draw_state(sctx, &log);
1088 si_log_compute_state(sctx, &log);
1089 si_log_cs(sctx, &log, true);
1090
1091 u_log_new_page_print(&log, f);
1092 u_log_context_destroy(&log);
1093 break;
1094 }
1095 case RING_DMA:
1096 si_dump_dma(sctx, saved, f);
1097 break;
1098
1099 default:
1100 break;
1101 }
1102
1103 fclose(f);
1104
1105 fprintf(stderr, "Detected a VM fault, exiting...\n");
1106 exit(0);
1107 }
1108
1109 void si_init_debug_functions(struct si_context *sctx)
1110 {
1111 sctx->b.b.dump_debug_state = si_dump_debug_state;
1112
1113 /* Set the initial dmesg timestamp for this context, so that
1114 * only new messages will be checked for VM faults.
1115 */
1116 if (sctx->screen->debug_flags & DBG(CHECK_VM))
1117 ac_vm_fault_occured(sctx->b.chip_class,
1118 &sctx->dmesg_timestamp, NULL);
1119 }