radeonsi: start using u_log_context for debugging
[mesa.git] / src / gallium / drivers / radeonsi / si_debug.c
1 /*
2 * Copyright 2015 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Marek Olšák <maraeo@gmail.com>
25 */
26
27 #include "si_pipe.h"
28 #include "si_compute.h"
29 #include "sid.h"
30 #include "gfx9d.h"
31 #include "sid_tables.h"
32 #include "ddebug/dd_util.h"
33 #include "util/u_log.h"
34 #include "util/u_memory.h"
35 #include "ac_debug.h"
36
37 DEBUG_GET_ONCE_OPTION(replace_shaders, "RADEON_REPLACE_SHADERS", NULL)
38
39 static void si_dump_shader(struct si_screen *sscreen,
40 enum pipe_shader_type processor,
41 const struct si_shader *shader, FILE *f)
42 {
43 if (shader->shader_log)
44 fwrite(shader->shader_log, shader->shader_log_size, 1, f);
45 else
46 si_shader_dump(sscreen, shader, NULL, processor, f, false);
47 }
48
49 struct si_log_chunk_shader {
50 /* The shader destroy code assumes a current context for unlinking of
51 * PM4 packets etc.
52 *
53 * While we should be able to destroy shaders without a context, doing
54 * so would happen only very rarely and be therefore likely to fail
55 * just when you're trying to debug something. Let's just remember the
56 * current context in the chunk.
57 */
58 struct si_context *ctx;
59 struct si_shader *shader;
60
61 /* For keep-alive reference counts */
62 struct si_shader_selector *sel;
63 struct si_compute *program;
64 };
65
66 static void
67 si_log_chunk_shader_destroy(void *data)
68 {
69 struct si_log_chunk_shader *chunk = data;
70 si_shader_selector_reference(chunk->ctx, &chunk->sel, NULL);
71 si_compute_reference(&chunk->program, NULL);
72 FREE(chunk);
73 }
74
75 static void
76 si_log_chunk_shader_print(void *data, FILE *f)
77 {
78 struct si_log_chunk_shader *chunk = data;
79 struct si_screen *sscreen = chunk->ctx->screen;
80 si_dump_shader(sscreen, chunk->shader->selector->info.processor,
81 chunk->shader, f);
82 }
83
84 static struct u_log_chunk_type si_log_chunk_type_shader = {
85 .destroy = si_log_chunk_shader_destroy,
86 .print = si_log_chunk_shader_print,
87 };
88
89 static void si_dump_gfx_shader(struct si_context *ctx,
90 const struct si_shader_ctx_state *state,
91 struct u_log_context *log)
92 {
93 struct si_shader *current = state->current;
94
95 if (!state->cso || !current)
96 return;
97
98 struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader);
99 chunk->ctx = ctx;
100 chunk->shader = current;
101 si_shader_selector_reference(ctx, &chunk->sel, current->selector);
102 u_log_chunk(log, &si_log_chunk_type_shader, chunk);
103 }
104
105 static void si_dump_compute_shader(const struct si_cs_shader_state *state,
106 struct u_log_context *log)
107 {
108 if (!state->program || state->program != state->emitted_program)
109 return;
110
111 struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader);
112 chunk->shader = &state->program->shader;
113 si_compute_reference(&chunk->program, state->program);
114 u_log_chunk(log, &si_log_chunk_type_shader, chunk);
115 }
116
117 /**
118 * Shader compiles can be overridden with arbitrary ELF objects by setting
119 * the environment variable RADEON_REPLACE_SHADERS=num1:filename1[;num2:filename2]
120 */
121 bool si_replace_shader(unsigned num, struct ac_shader_binary *binary)
122 {
123 const char *p = debug_get_option_replace_shaders();
124 const char *semicolon;
125 char *copy = NULL;
126 FILE *f;
127 long filesize, nread;
128 char *buf = NULL;
129 bool replaced = false;
130
131 if (!p)
132 return false;
133
134 while (*p) {
135 unsigned long i;
136 char *endp;
137 i = strtoul(p, &endp, 0);
138
139 p = endp;
140 if (*p != ':') {
141 fprintf(stderr, "RADEON_REPLACE_SHADERS formatted badly.\n");
142 exit(1);
143 }
144 ++p;
145
146 if (i == num)
147 break;
148
149 p = strchr(p, ';');
150 if (!p)
151 return false;
152 ++p;
153 }
154 if (!*p)
155 return false;
156
157 semicolon = strchr(p, ';');
158 if (semicolon) {
159 p = copy = strndup(p, semicolon - p);
160 if (!copy) {
161 fprintf(stderr, "out of memory\n");
162 return false;
163 }
164 }
165
166 fprintf(stderr, "radeonsi: replace shader %u by %s\n", num, p);
167
168 f = fopen(p, "r");
169 if (!f) {
170 perror("radeonsi: failed to open file");
171 goto out_free;
172 }
173
174 if (fseek(f, 0, SEEK_END) != 0)
175 goto file_error;
176
177 filesize = ftell(f);
178 if (filesize < 0)
179 goto file_error;
180
181 if (fseek(f, 0, SEEK_SET) != 0)
182 goto file_error;
183
184 buf = MALLOC(filesize);
185 if (!buf) {
186 fprintf(stderr, "out of memory\n");
187 goto out_close;
188 }
189
190 nread = fread(buf, 1, filesize, f);
191 if (nread != filesize)
192 goto file_error;
193
194 ac_elf_read(buf, filesize, binary);
195 replaced = true;
196
197 out_close:
198 fclose(f);
199 out_free:
200 FREE(buf);
201 free(copy);
202 return replaced;
203
204 file_error:
205 perror("radeonsi: reading shader");
206 goto out_close;
207 }
208
209 /* Parsed IBs are difficult to read without colors. Use "less -R file" to
210 * read them, or use "aha -b -f file" to convert them to html.
211 */
212 #define COLOR_RESET "\033[0m"
213 #define COLOR_RED "\033[31m"
214 #define COLOR_GREEN "\033[1;32m"
215 #define COLOR_YELLOW "\033[1;33m"
216 #define COLOR_CYAN "\033[1;36m"
217
218 static void si_dump_mmapped_reg(struct si_context *sctx, FILE *f,
219 unsigned offset)
220 {
221 struct radeon_winsys *ws = sctx->b.ws;
222 uint32_t value;
223
224 if (ws->read_registers(ws, offset, 1, &value))
225 ac_dump_reg(f, offset, value, ~0);
226 }
227
228 static void si_dump_debug_registers(struct si_context *sctx, FILE *f)
229 {
230 if (sctx->screen->b.info.drm_major == 2 &&
231 sctx->screen->b.info.drm_minor < 42)
232 return; /* no radeon support */
233
234 fprintf(f, "Memory-mapped registers:\n");
235 si_dump_mmapped_reg(sctx, f, R_008010_GRBM_STATUS);
236
237 /* No other registers can be read on DRM < 3.1.0. */
238 if (sctx->screen->b.info.drm_major < 3 ||
239 sctx->screen->b.info.drm_minor < 1) {
240 fprintf(f, "\n");
241 return;
242 }
243
244 si_dump_mmapped_reg(sctx, f, R_008008_GRBM_STATUS2);
245 si_dump_mmapped_reg(sctx, f, R_008014_GRBM_STATUS_SE0);
246 si_dump_mmapped_reg(sctx, f, R_008018_GRBM_STATUS_SE1);
247 si_dump_mmapped_reg(sctx, f, R_008038_GRBM_STATUS_SE2);
248 si_dump_mmapped_reg(sctx, f, R_00803C_GRBM_STATUS_SE3);
249 si_dump_mmapped_reg(sctx, f, R_00D034_SDMA0_STATUS_REG);
250 si_dump_mmapped_reg(sctx, f, R_00D834_SDMA1_STATUS_REG);
251 if (sctx->b.chip_class <= VI) {
252 si_dump_mmapped_reg(sctx, f, R_000E50_SRBM_STATUS);
253 si_dump_mmapped_reg(sctx, f, R_000E4C_SRBM_STATUS2);
254 si_dump_mmapped_reg(sctx, f, R_000E54_SRBM_STATUS3);
255 }
256 si_dump_mmapped_reg(sctx, f, R_008680_CP_STAT);
257 si_dump_mmapped_reg(sctx, f, R_008674_CP_STALLED_STAT1);
258 si_dump_mmapped_reg(sctx, f, R_008678_CP_STALLED_STAT2);
259 si_dump_mmapped_reg(sctx, f, R_008670_CP_STALLED_STAT3);
260 si_dump_mmapped_reg(sctx, f, R_008210_CP_CPC_STATUS);
261 si_dump_mmapped_reg(sctx, f, R_008214_CP_CPC_BUSY_STAT);
262 si_dump_mmapped_reg(sctx, f, R_008218_CP_CPC_STALLED_STAT1);
263 si_dump_mmapped_reg(sctx, f, R_00821C_CP_CPF_STATUS);
264 si_dump_mmapped_reg(sctx, f, R_008220_CP_CPF_BUSY_STAT);
265 si_dump_mmapped_reg(sctx, f, R_008224_CP_CPF_STALLED_STAT1);
266 fprintf(f, "\n");
267 }
268
269 static void si_dump_last_ib(struct si_context *sctx, FILE *f)
270 {
271 int last_trace_id = -1;
272 int last_ce_trace_id = -1;
273
274 if (!sctx->last_gfx.ib)
275 return;
276
277 if (sctx->last_trace_buf) {
278 /* We are expecting that the ddebug pipe has already
279 * waited for the context, so this buffer should be idle.
280 * If the GPU is hung, there is no point in waiting for it.
281 */
282 uint32_t *map = sctx->b.ws->buffer_map(sctx->last_trace_buf->buf,
283 NULL,
284 PIPE_TRANSFER_UNSYNCHRONIZED |
285 PIPE_TRANSFER_READ);
286 if (map) {
287 last_trace_id = map[0];
288 last_ce_trace_id = map[1];
289 }
290 }
291
292 if (sctx->init_config)
293 ac_parse_ib(f, sctx->init_config->pm4, sctx->init_config->ndw,
294 -1, "IB2: Init config", sctx->b.chip_class,
295 NULL, NULL);
296
297 if (sctx->init_config_gs_rings)
298 ac_parse_ib(f, sctx->init_config_gs_rings->pm4,
299 sctx->init_config_gs_rings->ndw,
300 -1, "IB2: Init GS rings", sctx->b.chip_class,
301 NULL, NULL);
302
303 ac_parse_ib(f, sctx->last_gfx.ib, sctx->last_gfx.num_dw,
304 last_trace_id, "IB", sctx->b.chip_class,
305 NULL, NULL);
306
307 if (sctx->last_ce.ib) {
308 ac_parse_ib(f, sctx->last_ce.ib, sctx->last_ce.num_dw,
309 last_ce_trace_id, "CE IB", sctx->b.chip_class,
310 NULL, NULL);
311 }
312 }
313
314 static const char *priority_to_string(enum radeon_bo_priority priority)
315 {
316 #define ITEM(x) [RADEON_PRIO_##x] = #x
317 static const char *table[64] = {
318 ITEM(FENCE),
319 ITEM(TRACE),
320 ITEM(SO_FILLED_SIZE),
321 ITEM(QUERY),
322 ITEM(IB1),
323 ITEM(IB2),
324 ITEM(DRAW_INDIRECT),
325 ITEM(INDEX_BUFFER),
326 ITEM(VCE),
327 ITEM(UVD),
328 ITEM(SDMA_BUFFER),
329 ITEM(SDMA_TEXTURE),
330 ITEM(CP_DMA),
331 ITEM(CONST_BUFFER),
332 ITEM(DESCRIPTORS),
333 ITEM(BORDER_COLORS),
334 ITEM(SAMPLER_BUFFER),
335 ITEM(VERTEX_BUFFER),
336 ITEM(SHADER_RW_BUFFER),
337 ITEM(COMPUTE_GLOBAL),
338 ITEM(SAMPLER_TEXTURE),
339 ITEM(SHADER_RW_IMAGE),
340 ITEM(SAMPLER_TEXTURE_MSAA),
341 ITEM(COLOR_BUFFER),
342 ITEM(DEPTH_BUFFER),
343 ITEM(COLOR_BUFFER_MSAA),
344 ITEM(DEPTH_BUFFER_MSAA),
345 ITEM(CMASK),
346 ITEM(DCC),
347 ITEM(HTILE),
348 ITEM(SHADER_BINARY),
349 ITEM(SHADER_RINGS),
350 ITEM(SCRATCH_BUFFER),
351 };
352 #undef ITEM
353
354 assert(priority < ARRAY_SIZE(table));
355 return table[priority];
356 }
357
358 static int bo_list_compare_va(const struct radeon_bo_list_item *a,
359 const struct radeon_bo_list_item *b)
360 {
361 return a->vm_address < b->vm_address ? -1 :
362 a->vm_address > b->vm_address ? 1 : 0;
363 }
364
365 static void si_dump_bo_list(struct si_context *sctx,
366 const struct radeon_saved_cs *saved, FILE *f)
367 {
368 unsigned i,j;
369
370 if (!saved->bo_list)
371 return;
372
373 /* Sort the list according to VM adddresses first. */
374 qsort(saved->bo_list, saved->bo_count,
375 sizeof(saved->bo_list[0]), (void*)bo_list_compare_va);
376
377 fprintf(f, "Buffer list (in units of pages = 4kB):\n"
378 COLOR_YELLOW " Size VM start page "
379 "VM end page Usage" COLOR_RESET "\n");
380
381 for (i = 0; i < saved->bo_count; i++) {
382 /* Note: Buffer sizes are expected to be aligned to 4k by the winsys. */
383 const unsigned page_size = sctx->b.screen->info.gart_page_size;
384 uint64_t va = saved->bo_list[i].vm_address;
385 uint64_t size = saved->bo_list[i].bo_size;
386 bool hit = false;
387
388 /* If there's unused virtual memory between 2 buffers, print it. */
389 if (i) {
390 uint64_t previous_va_end = saved->bo_list[i-1].vm_address +
391 saved->bo_list[i-1].bo_size;
392
393 if (va > previous_va_end) {
394 fprintf(f, " %10"PRIu64" -- hole --\n",
395 (va - previous_va_end) / page_size);
396 }
397 }
398
399 /* Print the buffer. */
400 fprintf(f, " %10"PRIu64" 0x%013"PRIX64" 0x%013"PRIX64" ",
401 size / page_size, va / page_size, (va + size) / page_size);
402
403 /* Print the usage. */
404 for (j = 0; j < 64; j++) {
405 if (!(saved->bo_list[i].priority_usage & (1ull << j)))
406 continue;
407
408 fprintf(f, "%s%s", !hit ? "" : ", ", priority_to_string(j));
409 hit = true;
410 }
411 fprintf(f, "\n");
412 }
413 fprintf(f, "\nNote: The holes represent memory not used by the IB.\n"
414 " Other buffers can still be allocated there.\n\n");
415 }
416
417 static void si_dump_framebuffer(struct si_context *sctx, struct u_log_context *log)
418 {
419 struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
420 struct r600_texture *rtex;
421 int i;
422
423 for (i = 0; i < state->nr_cbufs; i++) {
424 if (!state->cbufs[i])
425 continue;
426
427 rtex = (struct r600_texture*)state->cbufs[i]->texture;
428 u_log_printf(log, COLOR_YELLOW "Color buffer %i:" COLOR_RESET "\n", i);
429 r600_print_texture_info(sctx->b.screen, rtex, log);
430 u_log_printf(log, "\n");
431 }
432
433 if (state->zsbuf) {
434 rtex = (struct r600_texture*)state->zsbuf->texture;
435 u_log_printf(log, COLOR_YELLOW "Depth-stencil buffer:" COLOR_RESET "\n");
436 r600_print_texture_info(sctx->b.screen, rtex, log);
437 u_log_printf(log, "\n");
438 }
439 }
440
441 typedef unsigned (*slot_remap_func)(unsigned);
442
443 struct si_log_chunk_desc_list {
444 /** Pointer to memory map of buffer where the list is uploader */
445 uint32_t *gpu_list;
446 /** Reference of buffer where the list is uploaded, so that gpu_list
447 * is kept live. */
448 struct r600_resource *buf;
449
450 const char *shader_name;
451 const char *elem_name;
452 slot_remap_func slot_remap;
453 unsigned element_dw_size;
454 unsigned num_elements;
455
456 uint32_t list[0];
457 };
458
459 static void
460 si_log_chunk_desc_list_destroy(void *data)
461 {
462 struct si_log_chunk_desc_list *chunk = data;
463 r600_resource_reference(&chunk->buf, NULL);
464 FREE(chunk);
465 }
466
467 static void
468 si_log_chunk_desc_list_print(void *data, FILE *f)
469 {
470 struct si_log_chunk_desc_list *chunk = data;
471
472 for (unsigned i = 0; i < chunk->num_elements; i++) {
473 unsigned cpu_dw_offset = i * chunk->element_dw_size;
474 unsigned gpu_dw_offset = chunk->slot_remap(i) * chunk->element_dw_size;
475 const char *list_note = chunk->gpu_list ? "GPU list" : "CPU list";
476 uint32_t *cpu_list = chunk->list + cpu_dw_offset;
477 uint32_t *gpu_list = chunk->gpu_list ? chunk->gpu_list + gpu_dw_offset : cpu_list;
478
479 fprintf(f, COLOR_GREEN "%s%s slot %u (%s):" COLOR_RESET "\n",
480 chunk->shader_name, chunk->elem_name, i, list_note);
481
482 switch (chunk->element_dw_size) {
483 case 4:
484 for (unsigned j = 0; j < 4; j++)
485 ac_dump_reg(f, R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
486 gpu_list[j], 0xffffffff);
487 break;
488 case 8:
489 for (unsigned j = 0; j < 8; j++)
490 ac_dump_reg(f, R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
491 gpu_list[j], 0xffffffff);
492
493 fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
494 for (unsigned j = 0; j < 4; j++)
495 ac_dump_reg(f, R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
496 gpu_list[4+j], 0xffffffff);
497 break;
498 case 16:
499 for (unsigned j = 0; j < 8; j++)
500 ac_dump_reg(f, R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
501 gpu_list[j], 0xffffffff);
502
503 fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
504 for (unsigned j = 0; j < 4; j++)
505 ac_dump_reg(f, R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
506 gpu_list[4+j], 0xffffffff);
507
508 fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n");
509 for (unsigned j = 0; j < 8; j++)
510 ac_dump_reg(f, R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
511 gpu_list[8+j], 0xffffffff);
512
513 fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n");
514 for (unsigned j = 0; j < 4; j++)
515 ac_dump_reg(f, R_008F30_SQ_IMG_SAMP_WORD0 + j*4,
516 gpu_list[12+j], 0xffffffff);
517 break;
518 }
519
520 if (memcmp(gpu_list, cpu_list, chunk->element_dw_size * 4) != 0) {
521 fprintf(f, COLOR_RED "!!!!! This slot was corrupted in GPU memory !!!!!"
522 COLOR_RESET "\n");
523 }
524
525 fprintf(f, "\n");
526 }
527
528 }
529
530 static const struct u_log_chunk_type si_log_chunk_type_descriptor_list = {
531 .destroy = si_log_chunk_desc_list_destroy,
532 .print = si_log_chunk_desc_list_print,
533 };
534
535 static void si_dump_descriptor_list(struct si_descriptors *desc,
536 const char *shader_name,
537 const char *elem_name,
538 unsigned element_dw_size,
539 unsigned num_elements,
540 slot_remap_func slot_remap,
541 struct u_log_context *log)
542 {
543 if (!desc->list)
544 return;
545
546 struct si_log_chunk_desc_list *chunk =
547 CALLOC_VARIANT_LENGTH_STRUCT(si_log_chunk_desc_list,
548 4 * element_dw_size * num_elements);
549 chunk->shader_name = shader_name;
550 chunk->elem_name = elem_name;
551 chunk->element_dw_size = element_dw_size;
552 chunk->num_elements = num_elements;
553 chunk->slot_remap = slot_remap;
554
555 r600_resource_reference(&chunk->buf, desc->buffer);
556 chunk->gpu_list = desc->gpu_list;
557
558 for (unsigned i = 0; i < num_elements; ++i) {
559 memcpy(&chunk->list[i * element_dw_size],
560 &desc->list[slot_remap(i) * element_dw_size],
561 4 * element_dw_size);
562 }
563
564 u_log_chunk(log, &si_log_chunk_type_descriptor_list, chunk);
565 }
566
567 static unsigned si_identity(unsigned slot)
568 {
569 return slot;
570 }
571
572 static void si_dump_descriptors(struct si_context *sctx,
573 enum pipe_shader_type processor,
574 const struct tgsi_shader_info *info,
575 struct u_log_context *log)
576 {
577 struct si_descriptors *descs =
578 &sctx->descriptors[SI_DESCS_FIRST_SHADER +
579 processor * SI_NUM_SHADER_DESCS];
580 static const char *shader_name[] = {"VS", "PS", "GS", "TCS", "TES", "CS"};
581 const char *name = shader_name[processor];
582 unsigned enabled_constbuf, enabled_shaderbuf, enabled_samplers;
583 unsigned enabled_images;
584
585 if (info) {
586 enabled_constbuf = info->const_buffers_declared;
587 enabled_shaderbuf = info->shader_buffers_declared;
588 enabled_samplers = info->samplers_declared;
589 enabled_images = info->images_declared;
590 } else {
591 enabled_constbuf = sctx->const_and_shader_buffers[processor].enabled_mask >>
592 SI_NUM_SHADER_BUFFERS;
593 enabled_shaderbuf = sctx->const_and_shader_buffers[processor].enabled_mask &
594 u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS);
595 enabled_shaderbuf = util_bitreverse(enabled_shaderbuf) >>
596 (32 - SI_NUM_SHADER_BUFFERS);
597 enabled_samplers = sctx->samplers[processor].views.enabled_mask;
598 enabled_images = sctx->images[processor].enabled_mask;
599 }
600
601 if (processor == PIPE_SHADER_VERTEX) {
602 assert(info); /* only CS may not have an info struct */
603
604 si_dump_descriptor_list(&sctx->vertex_buffers, name,
605 " - Vertex buffer", 4, info->num_inputs,
606 si_identity, log);
607 }
608
609 si_dump_descriptor_list(&descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS],
610 name, " - Constant buffer", 4,
611 util_last_bit(enabled_constbuf),
612 si_get_constbuf_slot, log);
613 si_dump_descriptor_list(&descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS],
614 name, " - Shader buffer", 4,
615 util_last_bit(enabled_shaderbuf),
616 si_get_shaderbuf_slot, log);
617 si_dump_descriptor_list(&descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES],
618 name, " - Sampler", 16,
619 util_last_bit(enabled_samplers),
620 si_get_sampler_slot, log);
621 si_dump_descriptor_list(&descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES],
622 name, " - Image", 8,
623 util_last_bit(enabled_images),
624 si_get_image_slot, log);
625 }
626
627 static void si_dump_gfx_descriptors(struct si_context *sctx,
628 const struct si_shader_ctx_state *state,
629 struct u_log_context *log)
630 {
631 if (!state->cso || !state->current)
632 return;
633
634 si_dump_descriptors(sctx, state->cso->type, &state->cso->info, log);
635 }
636
637 static void si_dump_compute_descriptors(struct si_context *sctx,
638 struct u_log_context *log)
639 {
640 if (!sctx->cs_shader_state.program ||
641 sctx->cs_shader_state.program != sctx->cs_shader_state.emitted_program)
642 return;
643
644 si_dump_descriptors(sctx, PIPE_SHADER_COMPUTE, NULL, log);
645 }
646
647 struct si_shader_inst {
648 char text[160]; /* one disasm line */
649 unsigned offset; /* instruction offset */
650 unsigned size; /* instruction size = 4 or 8 */
651 };
652
653 /* Split a disassembly string into lines and add them to the array pointed
654 * to by "instructions". */
655 static void si_add_split_disasm(const char *disasm,
656 uint64_t start_addr,
657 unsigned *num,
658 struct si_shader_inst *instructions)
659 {
660 struct si_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
661 char *next;
662
663 while ((next = strchr(disasm, '\n'))) {
664 struct si_shader_inst *inst = &instructions[*num];
665 unsigned len = next - disasm;
666
667 assert(len < ARRAY_SIZE(inst->text));
668 memcpy(inst->text, disasm, len);
669 inst->text[len] = 0;
670 inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
671
672 const char *semicolon = strchr(disasm, ';');
673 assert(semicolon);
674 /* More than 16 chars after ";" means the instruction is 8 bytes long. */
675 inst->size = next - semicolon > 16 ? 8 : 4;
676
677 snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len,
678 " [PC=0x%"PRIx64", off=%u, size=%u]",
679 start_addr + inst->offset, inst->offset, inst->size);
680
681 last_inst = inst;
682 (*num)++;
683 disasm = next + 1;
684 }
685 }
686
687 #define MAX_WAVES_PER_CHIP (64 * 40)
688
689 struct si_wave_info {
690 unsigned se; /* shader engine */
691 unsigned sh; /* shader array */
692 unsigned cu; /* compute unit */
693 unsigned simd;
694 unsigned wave;
695 uint32_t status;
696 uint64_t pc; /* program counter */
697 uint32_t inst_dw0;
698 uint32_t inst_dw1;
699 uint64_t exec;
700 bool matched; /* whether the wave is used by a currently-bound shader */
701 };
702
703 static int compare_wave(const void *p1, const void *p2)
704 {
705 struct si_wave_info *w1 = (struct si_wave_info *)p1;
706 struct si_wave_info *w2 = (struct si_wave_info *)p2;
707
708 /* Sort waves according to PC and then SE, SH, CU, etc. */
709 if (w1->pc < w2->pc)
710 return -1;
711 if (w1->pc > w2->pc)
712 return 1;
713 if (w1->se < w2->se)
714 return -1;
715 if (w1->se > w2->se)
716 return 1;
717 if (w1->sh < w2->sh)
718 return -1;
719 if (w1->sh > w2->sh)
720 return 1;
721 if (w1->cu < w2->cu)
722 return -1;
723 if (w1->cu > w2->cu)
724 return 1;
725 if (w1->simd < w2->simd)
726 return -1;
727 if (w1->simd > w2->simd)
728 return 1;
729 if (w1->wave < w2->wave)
730 return -1;
731 if (w1->wave > w2->wave)
732 return 1;
733
734 return 0;
735 }
736
737 /* Return wave information. "waves" should be a large enough array. */
738 static unsigned si_get_wave_info(struct si_wave_info waves[MAX_WAVES_PER_CHIP])
739 {
740 char line[2000];
741 unsigned num_waves = 0;
742
743 FILE *p = popen("umr -wa", "r");
744 if (!p)
745 return 0;
746
747 if (!fgets(line, sizeof(line), p) ||
748 strncmp(line, "SE", 2) != 0) {
749 pclose(p);
750 return 0;
751 }
752
753 while (fgets(line, sizeof(line), p)) {
754 struct si_wave_info *w;
755 uint32_t pc_hi, pc_lo, exec_hi, exec_lo;
756
757 assert(num_waves < MAX_WAVES_PER_CHIP);
758 w = &waves[num_waves];
759
760 if (sscanf(line, "%u %u %u %u %u %x %x %x %x %x %x %x",
761 &w->se, &w->sh, &w->cu, &w->simd, &w->wave,
762 &w->status, &pc_hi, &pc_lo, &w->inst_dw0,
763 &w->inst_dw1, &exec_hi, &exec_lo) == 12) {
764 w->pc = ((uint64_t)pc_hi << 32) | pc_lo;
765 w->exec = ((uint64_t)exec_hi << 32) | exec_lo;
766 w->matched = false;
767 num_waves++;
768 }
769 }
770
771 qsort(waves, num_waves, sizeof(struct si_wave_info), compare_wave);
772
773 pclose(p);
774 return num_waves;
775 }
776
777 /* If the shader is being executed, print its asm instructions, and annotate
778 * those that are being executed right now with information about waves that
779 * execute them. This is most useful during a GPU hang.
780 */
781 static void si_print_annotated_shader(struct si_shader *shader,
782 struct si_wave_info *waves,
783 unsigned num_waves,
784 FILE *f)
785 {
786 if (!shader || !shader->binary.disasm_string)
787 return;
788
789 uint64_t start_addr = shader->bo->gpu_address;
790 uint64_t end_addr = start_addr + shader->bo->b.b.width0;
791 unsigned i;
792
793 /* See if any wave executes the shader. */
794 for (i = 0; i < num_waves; i++) {
795 if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
796 break;
797 }
798 if (i == num_waves)
799 return; /* the shader is not being executed */
800
801 /* Remember the first found wave. The waves are sorted according to PC. */
802 waves = &waves[i];
803 num_waves -= i;
804
805 /* Get the list of instructions.
806 * Buffer size / 4 is the upper bound of the instruction count.
807 */
808 unsigned num_inst = 0;
809 struct si_shader_inst *instructions =
810 calloc(shader->bo->b.b.width0 / 4, sizeof(struct si_shader_inst));
811
812 if (shader->prolog) {
813 si_add_split_disasm(shader->prolog->binary.disasm_string,
814 start_addr, &num_inst, instructions);
815 }
816 if (shader->previous_stage) {
817 si_add_split_disasm(shader->previous_stage->binary.disasm_string,
818 start_addr, &num_inst, instructions);
819 }
820 if (shader->prolog2) {
821 si_add_split_disasm(shader->prolog2->binary.disasm_string,
822 start_addr, &num_inst, instructions);
823 }
824 si_add_split_disasm(shader->binary.disasm_string,
825 start_addr, &num_inst, instructions);
826 if (shader->epilog) {
827 si_add_split_disasm(shader->epilog->binary.disasm_string,
828 start_addr, &num_inst, instructions);
829 }
830
831 fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
832 si_get_shader_name(shader, shader->selector->type));
833
834 /* Print instructions with annotations. */
835 for (i = 0; i < num_inst; i++) {
836 struct si_shader_inst *inst = &instructions[i];
837
838 fprintf(f, "%s\n", inst->text);
839
840 /* Print which waves execute the instruction right now. */
841 while (num_waves && start_addr + inst->offset == waves->pc) {
842 fprintf(f,
843 " " COLOR_GREEN "^ SE%u SH%u CU%u "
844 "SIMD%u WAVE%u EXEC=%016"PRIx64 " ",
845 waves->se, waves->sh, waves->cu, waves->simd,
846 waves->wave, waves->exec);
847
848 if (inst->size == 4) {
849 fprintf(f, "INST32=%08X" COLOR_RESET "\n",
850 waves->inst_dw0);
851 } else {
852 fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n",
853 waves->inst_dw0, waves->inst_dw1);
854 }
855
856 waves->matched = true;
857 waves = &waves[1];
858 num_waves--;
859 }
860 }
861
862 fprintf(f, "\n\n");
863 free(instructions);
864 }
865
866 static void si_dump_annotated_shaders(struct si_context *sctx, FILE *f)
867 {
868 struct si_wave_info waves[MAX_WAVES_PER_CHIP];
869 unsigned num_waves = si_get_wave_info(waves);
870
871 fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET
872 "\n\n", num_waves);
873
874 si_print_annotated_shader(sctx->vs_shader.current, waves, num_waves, f);
875 si_print_annotated_shader(sctx->tcs_shader.current, waves, num_waves, f);
876 si_print_annotated_shader(sctx->tes_shader.current, waves, num_waves, f);
877 si_print_annotated_shader(sctx->gs_shader.current, waves, num_waves, f);
878 si_print_annotated_shader(sctx->ps_shader.current, waves, num_waves, f);
879
880 /* Print waves executing shaders that are not currently bound. */
881 unsigned i;
882 bool found = false;
883 for (i = 0; i < num_waves; i++) {
884 if (waves[i].matched)
885 continue;
886
887 if (!found) {
888 fprintf(f, COLOR_CYAN
889 "Waves not executing currently-bound shaders:"
890 COLOR_RESET "\n");
891 found = true;
892 }
893 fprintf(f, " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016"PRIx64
894 " INST=%08X %08X PC=%"PRIx64"\n",
895 waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd,
896 waves[i].wave, waves[i].exec, waves[i].inst_dw0,
897 waves[i].inst_dw1, waves[i].pc);
898 }
899 if (found)
900 fprintf(f, "\n\n");
901 }
902
903 static void si_dump_command(const char *title, const char *command, FILE *f)
904 {
905 char line[2000];
906
907 FILE *p = popen(command, "r");
908 if (!p)
909 return;
910
911 fprintf(f, COLOR_YELLOW "%s: " COLOR_RESET "\n", title);
912 while (fgets(line, sizeof(line), p))
913 fputs(line, f);
914 fprintf(f, "\n\n");
915 pclose(p);
916 }
917
918 static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
919 unsigned flags)
920 {
921 struct si_context *sctx = (struct si_context*)ctx;
922
923 if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS) {
924 si_dump_debug_registers(sctx, f);
925
926 if (flags & PIPE_DUMP_CURRENT_SHADERS) {
927 si_dump_annotated_shaders(sctx, f);
928 si_dump_command("Active waves (raw data)", "umr -wa | column -t", f);
929 si_dump_command("Wave information", "umr -O bits -wa", f);
930 }
931 }
932
933 struct u_log_context log;
934 u_log_context_init(&log);
935
936 if (flags & PIPE_DUMP_CURRENT_STATES)
937 si_dump_framebuffer(sctx, &log);
938
939 if (flags & PIPE_DUMP_CURRENT_SHADERS) {
940 si_dump_gfx_shader(sctx, &sctx->vs_shader, &log);
941 si_dump_gfx_shader(sctx, &sctx->tcs_shader, &log);
942 si_dump_gfx_shader(sctx, &sctx->tes_shader, &log);
943 si_dump_gfx_shader(sctx, &sctx->gs_shader, &log);
944 si_dump_gfx_shader(sctx, &sctx->ps_shader, &log);
945 si_dump_compute_shader(&sctx->cs_shader_state, &log);
946
947 si_dump_descriptor_list(&sctx->descriptors[SI_DESCS_RW_BUFFERS],
948 "", "RW buffers", 4, SI_NUM_RW_BUFFERS,
949 si_identity, &log);
950 si_dump_gfx_descriptors(sctx, &sctx->vs_shader, &log);
951 si_dump_gfx_descriptors(sctx, &sctx->tcs_shader, &log);
952 si_dump_gfx_descriptors(sctx, &sctx->tes_shader, &log);
953 si_dump_gfx_descriptors(sctx, &sctx->gs_shader, &log);
954 si_dump_gfx_descriptors(sctx, &sctx->ps_shader, &log);
955 si_dump_compute_descriptors(sctx, &log);
956 }
957
958 u_log_new_page_print(&log, f);
959 u_log_context_destroy(&log);
960
961 if (flags & PIPE_DUMP_LAST_COMMAND_BUFFER) {
962 si_dump_bo_list(sctx, &sctx->last_gfx, f);
963 si_dump_last_ib(sctx, f);
964
965 fprintf(f, "Done.\n");
966
967 /* dump only once */
968 radeon_clear_saved_cs(&sctx->last_gfx);
969 radeon_clear_saved_cs(&sctx->last_ce);
970 r600_resource_reference(&sctx->last_trace_buf, NULL);
971 }
972 }
973
974 static void si_dump_dma(struct si_context *sctx,
975 struct radeon_saved_cs *saved, FILE *f)
976 {
977 static const char ib_name[] = "sDMA IB";
978 unsigned i;
979
980 si_dump_bo_list(sctx, saved, f);
981
982 fprintf(f, "------------------ %s begin ------------------\n", ib_name);
983
984 for (i = 0; i < saved->num_dw; ++i) {
985 fprintf(f, " %08x\n", saved->ib[i]);
986 }
987
988 fprintf(f, "------------------- %s end -------------------\n", ib_name);
989 fprintf(f, "\n");
990
991 fprintf(f, "SDMA Dump Done.\n");
992 }
993
994 static bool si_vm_fault_occured(struct si_context *sctx, uint64_t *out_addr)
995 {
996 char line[2000];
997 unsigned sec, usec;
998 int progress = 0;
999 uint64_t timestamp = 0;
1000 bool fault = false;
1001
1002 FILE *p = popen("dmesg", "r");
1003 if (!p)
1004 return false;
1005
1006 while (fgets(line, sizeof(line), p)) {
1007 char *msg, len;
1008
1009 if (!line[0] || line[0] == '\n')
1010 continue;
1011
1012 /* Get the timestamp. */
1013 if (sscanf(line, "[%u.%u]", &sec, &usec) != 2) {
1014 static bool hit = false;
1015 if (!hit) {
1016 fprintf(stderr, "%s: failed to parse line '%s'\n",
1017 __func__, line);
1018 hit = true;
1019 }
1020 continue;
1021 }
1022 timestamp = sec * 1000000ull + usec;
1023
1024 /* If just updating the timestamp. */
1025 if (!out_addr)
1026 continue;
1027
1028 /* Process messages only if the timestamp is newer. */
1029 if (timestamp <= sctx->dmesg_timestamp)
1030 continue;
1031
1032 /* Only process the first VM fault. */
1033 if (fault)
1034 continue;
1035
1036 /* Remove trailing \n */
1037 len = strlen(line);
1038 if (len && line[len-1] == '\n')
1039 line[len-1] = 0;
1040
1041 /* Get the message part. */
1042 msg = strchr(line, ']');
1043 if (!msg) {
1044 assert(0);
1045 continue;
1046 }
1047 msg++;
1048
1049 const char *header_line, *addr_line_prefix, *addr_line_format;
1050
1051 if (sctx->b.chip_class >= GFX9) {
1052 /* Match this:
1053 * ..: [gfxhub] VMC page fault (src_id:0 ring:158 vm_id:2 pas_id:0)
1054 * ..: at page 0x0000000219f8f000 from 27
1055 * ..: VM_L2_PROTECTION_FAULT_STATUS:0x0020113C
1056 */
1057 header_line = "VMC page fault";
1058 addr_line_prefix = " at page";
1059 addr_line_format = "%"PRIx64;
1060 } else {
1061 header_line = "GPU fault detected:";
1062 addr_line_prefix = "VM_CONTEXT1_PROTECTION_FAULT_ADDR";
1063 addr_line_format = "%"PRIX64;
1064 }
1065
1066 switch (progress) {
1067 case 0:
1068 if (strstr(msg, header_line))
1069 progress = 1;
1070 break;
1071 case 1:
1072 msg = strstr(msg, addr_line_prefix);
1073 if (msg) {
1074 msg = strstr(msg, "0x");
1075 if (msg) {
1076 msg += 2;
1077 if (sscanf(msg, addr_line_format, out_addr) == 1)
1078 fault = true;
1079 }
1080 }
1081 progress = 0;
1082 break;
1083 default:
1084 progress = 0;
1085 }
1086 }
1087 pclose(p);
1088
1089 if (timestamp > sctx->dmesg_timestamp)
1090 sctx->dmesg_timestamp = timestamp;
1091 return fault;
1092 }
1093
1094 void si_check_vm_faults(struct r600_common_context *ctx,
1095 struct radeon_saved_cs *saved, enum ring_type ring)
1096 {
1097 struct si_context *sctx = (struct si_context *)ctx;
1098 struct pipe_screen *screen = sctx->b.b.screen;
1099 FILE *f;
1100 uint64_t addr;
1101 char cmd_line[4096];
1102
1103 if (!si_vm_fault_occured(sctx, &addr))
1104 return;
1105
1106 f = dd_get_debug_file(false);
1107 if (!f)
1108 return;
1109
1110 fprintf(f, "VM fault report.\n\n");
1111 if (os_get_command_line(cmd_line, sizeof(cmd_line)))
1112 fprintf(f, "Command: %s\n", cmd_line);
1113 fprintf(f, "Driver vendor: %s\n", screen->get_vendor(screen));
1114 fprintf(f, "Device vendor: %s\n", screen->get_device_vendor(screen));
1115 fprintf(f, "Device name: %s\n\n", screen->get_name(screen));
1116 fprintf(f, "Failing VM page: 0x%08"PRIx64"\n\n", addr);
1117
1118 if (sctx->apitrace_call_number)
1119 fprintf(f, "Last apitrace call: %u\n\n",
1120 sctx->apitrace_call_number);
1121
1122 switch (ring) {
1123 case RING_GFX:
1124 si_dump_debug_state(&sctx->b.b, f,
1125 PIPE_DUMP_CURRENT_STATES |
1126 PIPE_DUMP_CURRENT_SHADERS |
1127 PIPE_DUMP_LAST_COMMAND_BUFFER);
1128 break;
1129
1130 case RING_DMA:
1131 si_dump_dma(sctx, saved, f);
1132 break;
1133
1134 default:
1135 break;
1136 }
1137
1138 fclose(f);
1139
1140 fprintf(stderr, "Detected a VM fault, exiting...\n");
1141 exit(0);
1142 }
1143
1144 void si_init_debug_functions(struct si_context *sctx)
1145 {
1146 sctx->b.b.dump_debug_state = si_dump_debug_state;
1147 sctx->b.check_vm_faults = si_check_vm_faults;
1148
1149 /* Set the initial dmesg timestamp for this context, so that
1150 * only new messages will be checked for VM faults.
1151 */
1152 if (sctx->screen->b.debug_flags & DBG_CHECK_VM)
1153 si_vm_fault_occured(sctx, NULL);
1154 }