radeonsi: print CE IBs into ddebug reports
[mesa.git] / src / gallium / drivers / radeonsi / si_debug.c
1 /*
2 * Copyright 2015 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Marek Olšák <maraeo@gmail.com>
25 */
26
27 #include "si_pipe.h"
28 #include "si_compute.h"
29 #include "sid.h"
30 #include "gfx9d.h"
31 #include "sid_tables.h"
32 #include "ddebug/dd_util.h"
33 #include "util/u_memory.h"
34 #include "ac_debug.h"
35
36 DEBUG_GET_ONCE_OPTION(replace_shaders, "RADEON_REPLACE_SHADERS", NULL)
37
38 static void si_dump_shader(struct si_screen *sscreen,
39 enum pipe_shader_type processor,
40 const struct si_shader *shader, FILE *f)
41 {
42 if (shader->shader_log)
43 fwrite(shader->shader_log, shader->shader_log_size, 1, f);
44 else
45 si_shader_dump(sscreen, shader, NULL, processor, f, false);
46 }
47
48 static void si_dump_gfx_shader(struct si_screen *sscreen,
49 const struct si_shader_ctx_state *state, FILE *f)
50 {
51 const struct si_shader *current = state->current;
52
53 if (!state->cso || !current)
54 return;
55
56 si_dump_shader(sscreen, state->cso->info.processor, current, f);
57 }
58
59 static void si_dump_compute_shader(struct si_screen *sscreen,
60 const struct si_cs_shader_state *state, FILE *f)
61 {
62 if (!state->program || state->program != state->emitted_program)
63 return;
64
65 si_dump_shader(sscreen, PIPE_SHADER_COMPUTE, &state->program->shader, f);
66 }
67
68 /**
69 * Shader compiles can be overridden with arbitrary ELF objects by setting
70 * the environment variable RADEON_REPLACE_SHADERS=num1:filename1[;num2:filename2]
71 */
72 bool si_replace_shader(unsigned num, struct ac_shader_binary *binary)
73 {
74 const char *p = debug_get_option_replace_shaders();
75 const char *semicolon;
76 char *copy = NULL;
77 FILE *f;
78 long filesize, nread;
79 char *buf = NULL;
80 bool replaced = false;
81
82 if (!p)
83 return false;
84
85 while (*p) {
86 unsigned long i;
87 char *endp;
88 i = strtoul(p, &endp, 0);
89
90 p = endp;
91 if (*p != ':') {
92 fprintf(stderr, "RADEON_REPLACE_SHADERS formatted badly.\n");
93 exit(1);
94 }
95 ++p;
96
97 if (i == num)
98 break;
99
100 p = strchr(p, ';');
101 if (!p)
102 return false;
103 ++p;
104 }
105 if (!*p)
106 return false;
107
108 semicolon = strchr(p, ';');
109 if (semicolon) {
110 p = copy = strndup(p, semicolon - p);
111 if (!copy) {
112 fprintf(stderr, "out of memory\n");
113 return false;
114 }
115 }
116
117 fprintf(stderr, "radeonsi: replace shader %u by %s\n", num, p);
118
119 f = fopen(p, "r");
120 if (!f) {
121 perror("radeonsi: failed to open file");
122 goto out_free;
123 }
124
125 if (fseek(f, 0, SEEK_END) != 0)
126 goto file_error;
127
128 filesize = ftell(f);
129 if (filesize < 0)
130 goto file_error;
131
132 if (fseek(f, 0, SEEK_SET) != 0)
133 goto file_error;
134
135 buf = MALLOC(filesize);
136 if (!buf) {
137 fprintf(stderr, "out of memory\n");
138 goto out_close;
139 }
140
141 nread = fread(buf, 1, filesize, f);
142 if (nread != filesize)
143 goto file_error;
144
145 ac_elf_read(buf, filesize, binary);
146 replaced = true;
147
148 out_close:
149 fclose(f);
150 out_free:
151 FREE(buf);
152 free(copy);
153 return replaced;
154
155 file_error:
156 perror("radeonsi: reading shader");
157 goto out_close;
158 }
159
160 /* Parsed IBs are difficult to read without colors. Use "less -R file" to
161 * read them, or use "aha -b -f file" to convert them to html.
162 */
163 #define COLOR_RESET "\033[0m"
164 #define COLOR_RED "\033[31m"
165 #define COLOR_GREEN "\033[1;32m"
166 #define COLOR_YELLOW "\033[1;33m"
167 #define COLOR_CYAN "\033[1;36m"
168
169 static void si_dump_mmapped_reg(struct si_context *sctx, FILE *f,
170 unsigned offset)
171 {
172 struct radeon_winsys *ws = sctx->b.ws;
173 uint32_t value;
174
175 if (ws->read_registers(ws, offset, 1, &value))
176 ac_dump_reg(f, offset, value, ~0);
177 }
178
179 static void si_dump_debug_registers(struct si_context *sctx, FILE *f)
180 {
181 if (sctx->screen->b.info.drm_major == 2 &&
182 sctx->screen->b.info.drm_minor < 42)
183 return; /* no radeon support */
184
185 fprintf(f, "Memory-mapped registers:\n");
186 si_dump_mmapped_reg(sctx, f, R_008010_GRBM_STATUS);
187
188 /* No other registers can be read on DRM < 3.1.0. */
189 if (sctx->screen->b.info.drm_major < 3 ||
190 sctx->screen->b.info.drm_minor < 1) {
191 fprintf(f, "\n");
192 return;
193 }
194
195 si_dump_mmapped_reg(sctx, f, R_008008_GRBM_STATUS2);
196 si_dump_mmapped_reg(sctx, f, R_008014_GRBM_STATUS_SE0);
197 si_dump_mmapped_reg(sctx, f, R_008018_GRBM_STATUS_SE1);
198 si_dump_mmapped_reg(sctx, f, R_008038_GRBM_STATUS_SE2);
199 si_dump_mmapped_reg(sctx, f, R_00803C_GRBM_STATUS_SE3);
200 si_dump_mmapped_reg(sctx, f, R_00D034_SDMA0_STATUS_REG);
201 si_dump_mmapped_reg(sctx, f, R_00D834_SDMA1_STATUS_REG);
202 if (sctx->b.chip_class <= VI) {
203 si_dump_mmapped_reg(sctx, f, R_000E50_SRBM_STATUS);
204 si_dump_mmapped_reg(sctx, f, R_000E4C_SRBM_STATUS2);
205 si_dump_mmapped_reg(sctx, f, R_000E54_SRBM_STATUS3);
206 }
207 si_dump_mmapped_reg(sctx, f, R_008680_CP_STAT);
208 si_dump_mmapped_reg(sctx, f, R_008674_CP_STALLED_STAT1);
209 si_dump_mmapped_reg(sctx, f, R_008678_CP_STALLED_STAT2);
210 si_dump_mmapped_reg(sctx, f, R_008670_CP_STALLED_STAT3);
211 si_dump_mmapped_reg(sctx, f, R_008210_CP_CPC_STATUS);
212 si_dump_mmapped_reg(sctx, f, R_008214_CP_CPC_BUSY_STAT);
213 si_dump_mmapped_reg(sctx, f, R_008218_CP_CPC_STALLED_STAT1);
214 si_dump_mmapped_reg(sctx, f, R_00821C_CP_CPF_STATUS);
215 si_dump_mmapped_reg(sctx, f, R_008220_CP_CPF_BUSY_STAT);
216 si_dump_mmapped_reg(sctx, f, R_008224_CP_CPF_STALLED_STAT1);
217 fprintf(f, "\n");
218 }
219
220 static void si_dump_last_ib(struct si_context *sctx, FILE *f)
221 {
222 int last_trace_id = -1;
223 int last_ce_trace_id = -1;
224
225 if (!sctx->last_gfx.ib)
226 return;
227
228 if (sctx->last_trace_buf) {
229 /* We are expecting that the ddebug pipe has already
230 * waited for the context, so this buffer should be idle.
231 * If the GPU is hung, there is no point in waiting for it.
232 */
233 uint32_t *map = sctx->b.ws->buffer_map(sctx->last_trace_buf->buf,
234 NULL,
235 PIPE_TRANSFER_UNSYNCHRONIZED |
236 PIPE_TRANSFER_READ);
237 if (map) {
238 last_trace_id = map[0];
239 last_ce_trace_id = map[1];
240 }
241 }
242
243 if (sctx->init_config)
244 ac_parse_ib(f, sctx->init_config->pm4, sctx->init_config->ndw,
245 -1, "IB2: Init config", sctx->b.chip_class,
246 NULL, NULL);
247
248 if (sctx->init_config_gs_rings)
249 ac_parse_ib(f, sctx->init_config_gs_rings->pm4,
250 sctx->init_config_gs_rings->ndw,
251 -1, "IB2: Init GS rings", sctx->b.chip_class,
252 NULL, NULL);
253
254 ac_parse_ib(f, sctx->last_gfx.ib, sctx->last_gfx.num_dw,
255 last_trace_id, "IB", sctx->b.chip_class,
256 NULL, NULL);
257
258 if (sctx->last_ce.ib) {
259 ac_parse_ib(f, sctx->last_ce.ib, sctx->last_ce.num_dw,
260 last_ce_trace_id, "CE IB", sctx->b.chip_class,
261 NULL, NULL);
262 }
263 }
264
265 static const char *priority_to_string(enum radeon_bo_priority priority)
266 {
267 #define ITEM(x) [RADEON_PRIO_##x] = #x
268 static const char *table[64] = {
269 ITEM(FENCE),
270 ITEM(TRACE),
271 ITEM(SO_FILLED_SIZE),
272 ITEM(QUERY),
273 ITEM(IB1),
274 ITEM(IB2),
275 ITEM(DRAW_INDIRECT),
276 ITEM(INDEX_BUFFER),
277 ITEM(VCE),
278 ITEM(UVD),
279 ITEM(SDMA_BUFFER),
280 ITEM(SDMA_TEXTURE),
281 ITEM(CP_DMA),
282 ITEM(CONST_BUFFER),
283 ITEM(DESCRIPTORS),
284 ITEM(BORDER_COLORS),
285 ITEM(SAMPLER_BUFFER),
286 ITEM(VERTEX_BUFFER),
287 ITEM(SHADER_RW_BUFFER),
288 ITEM(COMPUTE_GLOBAL),
289 ITEM(SAMPLER_TEXTURE),
290 ITEM(SHADER_RW_IMAGE),
291 ITEM(SAMPLER_TEXTURE_MSAA),
292 ITEM(COLOR_BUFFER),
293 ITEM(DEPTH_BUFFER),
294 ITEM(COLOR_BUFFER_MSAA),
295 ITEM(DEPTH_BUFFER_MSAA),
296 ITEM(CMASK),
297 ITEM(DCC),
298 ITEM(HTILE),
299 ITEM(SHADER_BINARY),
300 ITEM(SHADER_RINGS),
301 ITEM(SCRATCH_BUFFER),
302 };
303 #undef ITEM
304
305 assert(priority < ARRAY_SIZE(table));
306 return table[priority];
307 }
308
309 static int bo_list_compare_va(const struct radeon_bo_list_item *a,
310 const struct radeon_bo_list_item *b)
311 {
312 return a->vm_address < b->vm_address ? -1 :
313 a->vm_address > b->vm_address ? 1 : 0;
314 }
315
316 static void si_dump_bo_list(struct si_context *sctx,
317 const struct radeon_saved_cs *saved, FILE *f)
318 {
319 unsigned i,j;
320
321 if (!saved->bo_list)
322 return;
323
324 /* Sort the list according to VM adddresses first. */
325 qsort(saved->bo_list, saved->bo_count,
326 sizeof(saved->bo_list[0]), (void*)bo_list_compare_va);
327
328 fprintf(f, "Buffer list (in units of pages = 4kB):\n"
329 COLOR_YELLOW " Size VM start page "
330 "VM end page Usage" COLOR_RESET "\n");
331
332 for (i = 0; i < saved->bo_count; i++) {
333 /* Note: Buffer sizes are expected to be aligned to 4k by the winsys. */
334 const unsigned page_size = sctx->b.screen->info.gart_page_size;
335 uint64_t va = saved->bo_list[i].vm_address;
336 uint64_t size = saved->bo_list[i].bo_size;
337 bool hit = false;
338
339 /* If there's unused virtual memory between 2 buffers, print it. */
340 if (i) {
341 uint64_t previous_va_end = saved->bo_list[i-1].vm_address +
342 saved->bo_list[i-1].bo_size;
343
344 if (va > previous_va_end) {
345 fprintf(f, " %10"PRIu64" -- hole --\n",
346 (va - previous_va_end) / page_size);
347 }
348 }
349
350 /* Print the buffer. */
351 fprintf(f, " %10"PRIu64" 0x%013"PRIX64" 0x%013"PRIX64" ",
352 size / page_size, va / page_size, (va + size) / page_size);
353
354 /* Print the usage. */
355 for (j = 0; j < 64; j++) {
356 if (!(saved->bo_list[i].priority_usage & (1ull << j)))
357 continue;
358
359 fprintf(f, "%s%s", !hit ? "" : ", ", priority_to_string(j));
360 hit = true;
361 }
362 fprintf(f, "\n");
363 }
364 fprintf(f, "\nNote: The holes represent memory not used by the IB.\n"
365 " Other buffers can still be allocated there.\n\n");
366 }
367
368 static void si_dump_framebuffer(struct si_context *sctx, FILE *f)
369 {
370 struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
371 struct r600_texture *rtex;
372 int i;
373
374 for (i = 0; i < state->nr_cbufs; i++) {
375 if (!state->cbufs[i])
376 continue;
377
378 rtex = (struct r600_texture*)state->cbufs[i]->texture;
379 fprintf(f, COLOR_YELLOW "Color buffer %i:" COLOR_RESET "\n", i);
380 r600_print_texture_info(sctx->b.screen, rtex, f);
381 fprintf(f, "\n");
382 }
383
384 if (state->zsbuf) {
385 rtex = (struct r600_texture*)state->zsbuf->texture;
386 fprintf(f, COLOR_YELLOW "Depth-stencil buffer:" COLOR_RESET "\n");
387 r600_print_texture_info(sctx->b.screen, rtex, f);
388 fprintf(f, "\n");
389 }
390 }
391
392 typedef unsigned (*slot_remap_func)(unsigned);
393
394 static void si_dump_descriptor_list(struct si_descriptors *desc,
395 const char *shader_name,
396 const char *elem_name,
397 unsigned element_dw_size,
398 unsigned num_elements,
399 slot_remap_func slot_remap,
400 FILE *f)
401 {
402 unsigned i, j;
403
404 if (!desc->list)
405 return;
406
407 for (i = 0; i < num_elements; i++) {
408 unsigned dw_offset = slot_remap(i) * element_dw_size;
409 uint32_t *gpu_ptr = desc->gpu_list ? desc->gpu_list : desc->list;
410 const char *list_note = desc->gpu_list ? "GPU list" : "CPU list";
411 uint32_t *cpu_list = desc->list + dw_offset;
412 uint32_t *gpu_list = gpu_ptr + dw_offset;
413
414 fprintf(f, COLOR_GREEN "%s%s slot %u (%s):" COLOR_RESET "\n",
415 shader_name, elem_name, i, list_note);
416
417 switch (element_dw_size) {
418 case 4:
419 for (j = 0; j < 4; j++)
420 ac_dump_reg(f, R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
421 gpu_list[j], 0xffffffff);
422 break;
423 case 8:
424 for (j = 0; j < 8; j++)
425 ac_dump_reg(f, R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
426 gpu_list[j], 0xffffffff);
427
428 fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
429 for (j = 0; j < 4; j++)
430 ac_dump_reg(f, R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
431 gpu_list[4+j], 0xffffffff);
432 break;
433 case 16:
434 for (j = 0; j < 8; j++)
435 ac_dump_reg(f, R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
436 gpu_list[j], 0xffffffff);
437
438 fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
439 for (j = 0; j < 4; j++)
440 ac_dump_reg(f, R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
441 gpu_list[4+j], 0xffffffff);
442
443 fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n");
444 for (j = 0; j < 8; j++)
445 ac_dump_reg(f, R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
446 gpu_list[8+j], 0xffffffff);
447
448 fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n");
449 for (j = 0; j < 4; j++)
450 ac_dump_reg(f, R_008F30_SQ_IMG_SAMP_WORD0 + j*4,
451 gpu_list[12+j], 0xffffffff);
452 break;
453 }
454
455 if (memcmp(gpu_list, cpu_list, desc->element_dw_size * 4) != 0) {
456 fprintf(f, COLOR_RED "!!!!! This slot was corrupted in GPU memory !!!!!"
457 COLOR_RESET "\n");
458 }
459
460 fprintf(f, "\n");
461 }
462 }
463
464 static unsigned si_identity(unsigned slot)
465 {
466 return slot;
467 }
468
469 static void si_dump_descriptors(struct si_context *sctx,
470 enum pipe_shader_type processor,
471 const struct tgsi_shader_info *info, FILE *f)
472 {
473 struct si_descriptors *descs =
474 &sctx->descriptors[SI_DESCS_FIRST_SHADER +
475 processor * SI_NUM_SHADER_DESCS];
476 static const char *shader_name[] = {"VS", "PS", "GS", "TCS", "TES", "CS"};
477 const char *name = shader_name[processor];
478 unsigned enabled_constbuf, enabled_shaderbuf, enabled_samplers;
479 unsigned enabled_images;
480
481 if (info) {
482 enabled_constbuf = info->const_buffers_declared;
483 enabled_shaderbuf = info->shader_buffers_declared;
484 enabled_samplers = info->samplers_declared;
485 enabled_images = info->images_declared;
486 } else {
487 enabled_constbuf = sctx->const_and_shader_buffers[processor].enabled_mask >>
488 SI_NUM_SHADER_BUFFERS;
489 enabled_shaderbuf = sctx->const_and_shader_buffers[processor].enabled_mask &
490 u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS);
491 enabled_shaderbuf = util_bitreverse(enabled_shaderbuf) >>
492 (32 - SI_NUM_SHADER_BUFFERS);
493 enabled_samplers = sctx->samplers[processor].views.enabled_mask;
494 enabled_images = sctx->images[processor].enabled_mask;
495 }
496
497 if (processor == PIPE_SHADER_VERTEX) {
498 assert(info); /* only CS may not have an info struct */
499
500 si_dump_descriptor_list(&sctx->vertex_buffers, name,
501 " - Vertex buffer", 4, info->num_inputs,
502 si_identity, f);
503 }
504
505 si_dump_descriptor_list(&descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS],
506 name, " - Constant buffer", 4,
507 util_last_bit(enabled_constbuf),
508 si_get_constbuf_slot, f);
509 si_dump_descriptor_list(&descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS],
510 name, " - Shader buffer", 4,
511 util_last_bit(enabled_shaderbuf),
512 si_get_shaderbuf_slot, f);
513 si_dump_descriptor_list(&descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES],
514 name, " - Sampler", 16,
515 util_last_bit(enabled_samplers),
516 si_get_sampler_slot, f);
517 si_dump_descriptor_list(&descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES],
518 name, " - Image", 8,
519 util_last_bit(enabled_images),
520 si_get_image_slot, f);
521 }
522
523 static void si_dump_gfx_descriptors(struct si_context *sctx,
524 const struct si_shader_ctx_state *state,
525 FILE *f)
526 {
527 if (!state->cso || !state->current)
528 return;
529
530 si_dump_descriptors(sctx, state->cso->type, &state->cso->info, f);
531 }
532
533 static void si_dump_compute_descriptors(struct si_context *sctx, FILE *f)
534 {
535 if (!sctx->cs_shader_state.program ||
536 sctx->cs_shader_state.program != sctx->cs_shader_state.emitted_program)
537 return;
538
539 si_dump_descriptors(sctx, PIPE_SHADER_COMPUTE, NULL, f);
540 }
541
542 struct si_shader_inst {
543 char text[160]; /* one disasm line */
544 unsigned offset; /* instruction offset */
545 unsigned size; /* instruction size = 4 or 8 */
546 };
547
548 /* Split a disassembly string into lines and add them to the array pointed
549 * to by "instructions". */
550 static void si_add_split_disasm(const char *disasm,
551 uint64_t start_addr,
552 unsigned *num,
553 struct si_shader_inst *instructions)
554 {
555 struct si_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
556 char *next;
557
558 while ((next = strchr(disasm, '\n'))) {
559 struct si_shader_inst *inst = &instructions[*num];
560 unsigned len = next - disasm;
561
562 assert(len < ARRAY_SIZE(inst->text));
563 memcpy(inst->text, disasm, len);
564 inst->text[len] = 0;
565 inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
566
567 const char *semicolon = strchr(disasm, ';');
568 assert(semicolon);
569 /* More than 16 chars after ";" means the instruction is 8 bytes long. */
570 inst->size = next - semicolon > 16 ? 8 : 4;
571
572 snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len,
573 " [PC=0x%"PRIx64", off=%u, size=%u]",
574 start_addr + inst->offset, inst->offset, inst->size);
575
576 last_inst = inst;
577 (*num)++;
578 disasm = next + 1;
579 }
580 }
581
582 #define MAX_WAVES_PER_CHIP (64 * 40)
583
584 struct si_wave_info {
585 unsigned se; /* shader engine */
586 unsigned sh; /* shader array */
587 unsigned cu; /* compute unit */
588 unsigned simd;
589 unsigned wave;
590 uint32_t status;
591 uint64_t pc; /* program counter */
592 uint32_t inst_dw0;
593 uint32_t inst_dw1;
594 uint64_t exec;
595 bool matched; /* whether the wave is used by a currently-bound shader */
596 };
597
598 static int compare_wave(const void *p1, const void *p2)
599 {
600 struct si_wave_info *w1 = (struct si_wave_info *)p1;
601 struct si_wave_info *w2 = (struct si_wave_info *)p2;
602
603 /* Sort waves according to PC and then SE, SH, CU, etc. */
604 if (w1->pc < w2->pc)
605 return -1;
606 if (w1->pc > w2->pc)
607 return 1;
608 if (w1->se < w2->se)
609 return -1;
610 if (w1->se > w2->se)
611 return 1;
612 if (w1->sh < w2->sh)
613 return -1;
614 if (w1->sh > w2->sh)
615 return 1;
616 if (w1->cu < w2->cu)
617 return -1;
618 if (w1->cu > w2->cu)
619 return 1;
620 if (w1->simd < w2->simd)
621 return -1;
622 if (w1->simd > w2->simd)
623 return 1;
624 if (w1->wave < w2->wave)
625 return -1;
626 if (w1->wave > w2->wave)
627 return 1;
628
629 return 0;
630 }
631
632 /* Return wave information. "waves" should be a large enough array. */
633 static unsigned si_get_wave_info(struct si_wave_info waves[MAX_WAVES_PER_CHIP])
634 {
635 char line[2000];
636 unsigned num_waves = 0;
637
638 FILE *p = popen("umr -wa", "r");
639 if (!p)
640 return 0;
641
642 if (!fgets(line, sizeof(line), p) ||
643 strncmp(line, "SE", 2) != 0) {
644 pclose(p);
645 return 0;
646 }
647
648 while (fgets(line, sizeof(line), p)) {
649 struct si_wave_info *w;
650 uint32_t pc_hi, pc_lo, exec_hi, exec_lo;
651
652 assert(num_waves < MAX_WAVES_PER_CHIP);
653 w = &waves[num_waves];
654
655 if (sscanf(line, "%u %u %u %u %u %x %x %x %x %x %x %x",
656 &w->se, &w->sh, &w->cu, &w->simd, &w->wave,
657 &w->status, &pc_hi, &pc_lo, &w->inst_dw0,
658 &w->inst_dw1, &exec_hi, &exec_lo) == 12) {
659 w->pc = ((uint64_t)pc_hi << 32) | pc_lo;
660 w->exec = ((uint64_t)exec_hi << 32) | exec_lo;
661 w->matched = false;
662 num_waves++;
663 }
664 }
665
666 qsort(waves, num_waves, sizeof(struct si_wave_info), compare_wave);
667
668 pclose(p);
669 return num_waves;
670 }
671
672 /* If the shader is being executed, print its asm instructions, and annotate
673 * those that are being executed right now with information about waves that
674 * execute them. This is most useful during a GPU hang.
675 */
676 static void si_print_annotated_shader(struct si_shader *shader,
677 struct si_wave_info *waves,
678 unsigned num_waves,
679 FILE *f)
680 {
681 if (!shader || !shader->binary.disasm_string)
682 return;
683
684 uint64_t start_addr = shader->bo->gpu_address;
685 uint64_t end_addr = start_addr + shader->bo->b.b.width0;
686 unsigned i;
687
688 /* See if any wave executes the shader. */
689 for (i = 0; i < num_waves; i++) {
690 if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
691 break;
692 }
693 if (i == num_waves)
694 return; /* the shader is not being executed */
695
696 /* Remember the first found wave. The waves are sorted according to PC. */
697 waves = &waves[i];
698 num_waves -= i;
699
700 /* Get the list of instructions.
701 * Buffer size / 4 is the upper bound of the instruction count.
702 */
703 unsigned num_inst = 0;
704 struct si_shader_inst *instructions =
705 calloc(shader->bo->b.b.width0 / 4, sizeof(struct si_shader_inst));
706
707 if (shader->prolog) {
708 si_add_split_disasm(shader->prolog->binary.disasm_string,
709 start_addr, &num_inst, instructions);
710 }
711 if (shader->previous_stage) {
712 si_add_split_disasm(shader->previous_stage->binary.disasm_string,
713 start_addr, &num_inst, instructions);
714 }
715 if (shader->prolog2) {
716 si_add_split_disasm(shader->prolog2->binary.disasm_string,
717 start_addr, &num_inst, instructions);
718 }
719 si_add_split_disasm(shader->binary.disasm_string,
720 start_addr, &num_inst, instructions);
721 if (shader->epilog) {
722 si_add_split_disasm(shader->epilog->binary.disasm_string,
723 start_addr, &num_inst, instructions);
724 }
725
726 fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
727 si_get_shader_name(shader, shader->selector->type));
728
729 /* Print instructions with annotations. */
730 for (i = 0; i < num_inst; i++) {
731 struct si_shader_inst *inst = &instructions[i];
732
733 fprintf(f, "%s\n", inst->text);
734
735 /* Print which waves execute the instruction right now. */
736 while (num_waves && start_addr + inst->offset == waves->pc) {
737 fprintf(f,
738 " " COLOR_GREEN "^ SE%u SH%u CU%u "
739 "SIMD%u WAVE%u EXEC=%016"PRIx64 " ",
740 waves->se, waves->sh, waves->cu, waves->simd,
741 waves->wave, waves->exec);
742
743 if (inst->size == 4) {
744 fprintf(f, "INST32=%08X" COLOR_RESET "\n",
745 waves->inst_dw0);
746 } else {
747 fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n",
748 waves->inst_dw0, waves->inst_dw1);
749 }
750
751 waves->matched = true;
752 waves = &waves[1];
753 num_waves--;
754 }
755 }
756
757 fprintf(f, "\n\n");
758 free(instructions);
759 }
760
761 static void si_dump_annotated_shaders(struct si_context *sctx, FILE *f)
762 {
763 struct si_wave_info waves[MAX_WAVES_PER_CHIP];
764 unsigned num_waves = si_get_wave_info(waves);
765
766 fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET
767 "\n\n", num_waves);
768
769 si_print_annotated_shader(sctx->vs_shader.current, waves, num_waves, f);
770 si_print_annotated_shader(sctx->tcs_shader.current, waves, num_waves, f);
771 si_print_annotated_shader(sctx->tes_shader.current, waves, num_waves, f);
772 si_print_annotated_shader(sctx->gs_shader.current, waves, num_waves, f);
773 si_print_annotated_shader(sctx->ps_shader.current, waves, num_waves, f);
774
775 /* Print waves executing shaders that are not currently bound. */
776 unsigned i;
777 bool found = false;
778 for (i = 0; i < num_waves; i++) {
779 if (waves[i].matched)
780 continue;
781
782 if (!found) {
783 fprintf(f, COLOR_CYAN
784 "Waves not executing currently-bound shaders:"
785 COLOR_RESET "\n");
786 found = true;
787 }
788 fprintf(f, " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016"PRIx64
789 " INST=%08X %08X PC=%"PRIx64"\n",
790 waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd,
791 waves[i].wave, waves[i].exec, waves[i].inst_dw0,
792 waves[i].inst_dw1, waves[i].pc);
793 }
794 if (found)
795 fprintf(f, "\n\n");
796 }
797
798 static void si_dump_command(const char *title, const char *command, FILE *f)
799 {
800 char line[2000];
801
802 FILE *p = popen(command, "r");
803 if (!p)
804 return;
805
806 fprintf(f, COLOR_YELLOW "%s: " COLOR_RESET "\n", title);
807 while (fgets(line, sizeof(line), p))
808 fputs(line, f);
809 fprintf(f, "\n\n");
810 pclose(p);
811 }
812
813 static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
814 unsigned flags)
815 {
816 struct si_context *sctx = (struct si_context*)ctx;
817
818 if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS)
819 si_dump_debug_registers(sctx, f);
820
821 if (flags & PIPE_DUMP_CURRENT_STATES)
822 si_dump_framebuffer(sctx, f);
823
824 if (flags & PIPE_DUMP_CURRENT_SHADERS) {
825 si_dump_gfx_shader(sctx->screen, &sctx->vs_shader, f);
826 si_dump_gfx_shader(sctx->screen, &sctx->tcs_shader, f);
827 si_dump_gfx_shader(sctx->screen, &sctx->tes_shader, f);
828 si_dump_gfx_shader(sctx->screen, &sctx->gs_shader, f);
829 si_dump_gfx_shader(sctx->screen, &sctx->ps_shader, f);
830 si_dump_compute_shader(sctx->screen, &sctx->cs_shader_state, f);
831
832 if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS) {
833 si_dump_annotated_shaders(sctx, f);
834 si_dump_command("Active waves (raw data)", "umr -wa | column -t", f);
835 si_dump_command("Wave information", "umr -O bits -wa", f);
836 }
837
838 si_dump_descriptor_list(&sctx->descriptors[SI_DESCS_RW_BUFFERS],
839 "", "RW buffers", 4, SI_NUM_RW_BUFFERS,
840 si_identity, f);
841 si_dump_gfx_descriptors(sctx, &sctx->vs_shader, f);
842 si_dump_gfx_descriptors(sctx, &sctx->tcs_shader, f);
843 si_dump_gfx_descriptors(sctx, &sctx->tes_shader, f);
844 si_dump_gfx_descriptors(sctx, &sctx->gs_shader, f);
845 si_dump_gfx_descriptors(sctx, &sctx->ps_shader, f);
846 si_dump_compute_descriptors(sctx, f);
847 }
848
849 if (flags & PIPE_DUMP_LAST_COMMAND_BUFFER) {
850 si_dump_bo_list(sctx, &sctx->last_gfx, f);
851 si_dump_last_ib(sctx, f);
852
853 fprintf(f, "Done.\n");
854
855 /* dump only once */
856 radeon_clear_saved_cs(&sctx->last_gfx);
857 radeon_clear_saved_cs(&sctx->last_ce);
858 r600_resource_reference(&sctx->last_trace_buf, NULL);
859 }
860 }
861
862 static void si_dump_dma(struct si_context *sctx,
863 struct radeon_saved_cs *saved, FILE *f)
864 {
865 static const char ib_name[] = "sDMA IB";
866 unsigned i;
867
868 si_dump_bo_list(sctx, saved, f);
869
870 fprintf(f, "------------------ %s begin ------------------\n", ib_name);
871
872 for (i = 0; i < saved->num_dw; ++i) {
873 fprintf(f, " %08x\n", saved->ib[i]);
874 }
875
876 fprintf(f, "------------------- %s end -------------------\n", ib_name);
877 fprintf(f, "\n");
878
879 fprintf(f, "SDMA Dump Done.\n");
880 }
881
882 static bool si_vm_fault_occured(struct si_context *sctx, uint64_t *out_addr)
883 {
884 char line[2000];
885 unsigned sec, usec;
886 int progress = 0;
887 uint64_t timestamp = 0;
888 bool fault = false;
889
890 FILE *p = popen("dmesg", "r");
891 if (!p)
892 return false;
893
894 while (fgets(line, sizeof(line), p)) {
895 char *msg, len;
896
897 if (!line[0] || line[0] == '\n')
898 continue;
899
900 /* Get the timestamp. */
901 if (sscanf(line, "[%u.%u]", &sec, &usec) != 2) {
902 static bool hit = false;
903 if (!hit) {
904 fprintf(stderr, "%s: failed to parse line '%s'\n",
905 __func__, line);
906 hit = true;
907 }
908 continue;
909 }
910 timestamp = sec * 1000000ull + usec;
911
912 /* If just updating the timestamp. */
913 if (!out_addr)
914 continue;
915
916 /* Process messages only if the timestamp is newer. */
917 if (timestamp <= sctx->dmesg_timestamp)
918 continue;
919
920 /* Only process the first VM fault. */
921 if (fault)
922 continue;
923
924 /* Remove trailing \n */
925 len = strlen(line);
926 if (len && line[len-1] == '\n')
927 line[len-1] = 0;
928
929 /* Get the message part. */
930 msg = strchr(line, ']');
931 if (!msg) {
932 assert(0);
933 continue;
934 }
935 msg++;
936
937 const char *header_line, *addr_line_prefix, *addr_line_format;
938
939 if (sctx->b.chip_class >= GFX9) {
940 /* Match this:
941 * ..: [gfxhub] VMC page fault (src_id:0 ring:158 vm_id:2 pas_id:0)
942 * ..: at page 0x0000000219f8f000 from 27
943 * ..: VM_L2_PROTECTION_FAULT_STATUS:0x0020113C
944 */
945 header_line = "VMC page fault";
946 addr_line_prefix = " at page";
947 addr_line_format = "%"PRIx64;
948 } else {
949 header_line = "GPU fault detected:";
950 addr_line_prefix = "VM_CONTEXT1_PROTECTION_FAULT_ADDR";
951 addr_line_format = "%"PRIX64;
952 }
953
954 switch (progress) {
955 case 0:
956 if (strstr(msg, header_line))
957 progress = 1;
958 break;
959 case 1:
960 msg = strstr(msg, addr_line_prefix);
961 if (msg) {
962 msg = strstr(msg, "0x");
963 if (msg) {
964 msg += 2;
965 if (sscanf(msg, addr_line_format, out_addr) == 1)
966 fault = true;
967 }
968 }
969 progress = 0;
970 break;
971 default:
972 progress = 0;
973 }
974 }
975 pclose(p);
976
977 if (timestamp > sctx->dmesg_timestamp)
978 sctx->dmesg_timestamp = timestamp;
979 return fault;
980 }
981
982 void si_check_vm_faults(struct r600_common_context *ctx,
983 struct radeon_saved_cs *saved, enum ring_type ring)
984 {
985 struct si_context *sctx = (struct si_context *)ctx;
986 struct pipe_screen *screen = sctx->b.b.screen;
987 FILE *f;
988 uint64_t addr;
989 char cmd_line[4096];
990
991 if (!si_vm_fault_occured(sctx, &addr))
992 return;
993
994 f = dd_get_debug_file(false);
995 if (!f)
996 return;
997
998 fprintf(f, "VM fault report.\n\n");
999 if (os_get_command_line(cmd_line, sizeof(cmd_line)))
1000 fprintf(f, "Command: %s\n", cmd_line);
1001 fprintf(f, "Driver vendor: %s\n", screen->get_vendor(screen));
1002 fprintf(f, "Device vendor: %s\n", screen->get_device_vendor(screen));
1003 fprintf(f, "Device name: %s\n\n", screen->get_name(screen));
1004 fprintf(f, "Failing VM page: 0x%08"PRIx64"\n\n", addr);
1005
1006 if (sctx->apitrace_call_number)
1007 fprintf(f, "Last apitrace call: %u\n\n",
1008 sctx->apitrace_call_number);
1009
1010 switch (ring) {
1011 case RING_GFX:
1012 si_dump_debug_state(&sctx->b.b, f,
1013 PIPE_DUMP_CURRENT_STATES |
1014 PIPE_DUMP_CURRENT_SHADERS |
1015 PIPE_DUMP_LAST_COMMAND_BUFFER);
1016 break;
1017
1018 case RING_DMA:
1019 si_dump_dma(sctx, saved, f);
1020 break;
1021
1022 default:
1023 break;
1024 }
1025
1026 fclose(f);
1027
1028 fprintf(stderr, "Detected a VM fault, exiting...\n");
1029 exit(0);
1030 }
1031
1032 void si_init_debug_functions(struct si_context *sctx)
1033 {
1034 sctx->b.b.dump_debug_state = si_dump_debug_state;
1035 sctx->b.check_vm_faults = si_check_vm_faults;
1036
1037 /* Set the initial dmesg timestamp for this context, so that
1038 * only new messages will be checked for VM faults.
1039 */
1040 if (sctx->screen->b.debug_flags & DBG_CHECK_VM)
1041 si_vm_fault_occured(sctx, NULL);
1042 }