radeonsi: split descriptor list dumping
[mesa.git] / src / gallium / drivers / radeonsi / si_debug.c
1 /*
2 * Copyright 2015 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Marek Olšák <maraeo@gmail.com>
25 */
26
27 #include "si_pipe.h"
28 #include "sid.h"
29 #include "gfx9d.h"
30 #include "sid_tables.h"
31 #include "ddebug/dd_util.h"
32 #include "util/u_memory.h"
33 #include "ac_debug.h"
34
35 DEBUG_GET_ONCE_OPTION(replace_shaders, "RADEON_REPLACE_SHADERS", NULL)
36
37 static void si_dump_shader(struct si_screen *sscreen,
38 enum pipe_shader_type processor,
39 const struct si_shader *shader, FILE *f)
40 {
41 if (shader->shader_log)
42 fwrite(shader->shader_log, shader->shader_log_size, 1, f);
43 else
44 si_shader_dump(sscreen, shader, NULL, processor, f, false);
45 }
46
47 static void si_dump_gfx_shader(struct si_screen *sscreen,
48 const struct si_shader_ctx_state *state, FILE *f)
49 {
50 const struct si_shader *current = state->current;
51
52 if (!state->cso || !current)
53 return;
54
55 si_dump_shader(sscreen, state->cso->info.processor, current, f);
56 }
57
58 /**
59 * Shader compiles can be overridden with arbitrary ELF objects by setting
60 * the environment variable RADEON_REPLACE_SHADERS=num1:filename1[;num2:filename2]
61 */
62 bool si_replace_shader(unsigned num, struct ac_shader_binary *binary)
63 {
64 const char *p = debug_get_option_replace_shaders();
65 const char *semicolon;
66 char *copy = NULL;
67 FILE *f;
68 long filesize, nread;
69 char *buf = NULL;
70 bool replaced = false;
71
72 if (!p)
73 return false;
74
75 while (*p) {
76 unsigned long i;
77 char *endp;
78 i = strtoul(p, &endp, 0);
79
80 p = endp;
81 if (*p != ':') {
82 fprintf(stderr, "RADEON_REPLACE_SHADERS formatted badly.\n");
83 exit(1);
84 }
85 ++p;
86
87 if (i == num)
88 break;
89
90 p = strchr(p, ';');
91 if (!p)
92 return false;
93 ++p;
94 }
95 if (!*p)
96 return false;
97
98 semicolon = strchr(p, ';');
99 if (semicolon) {
100 p = copy = strndup(p, semicolon - p);
101 if (!copy) {
102 fprintf(stderr, "out of memory\n");
103 return false;
104 }
105 }
106
107 fprintf(stderr, "radeonsi: replace shader %u by %s\n", num, p);
108
109 f = fopen(p, "r");
110 if (!f) {
111 perror("radeonsi: failed to open file");
112 goto out_free;
113 }
114
115 if (fseek(f, 0, SEEK_END) != 0)
116 goto file_error;
117
118 filesize = ftell(f);
119 if (filesize < 0)
120 goto file_error;
121
122 if (fseek(f, 0, SEEK_SET) != 0)
123 goto file_error;
124
125 buf = MALLOC(filesize);
126 if (!buf) {
127 fprintf(stderr, "out of memory\n");
128 goto out_close;
129 }
130
131 nread = fread(buf, 1, filesize, f);
132 if (nread != filesize)
133 goto file_error;
134
135 ac_elf_read(buf, filesize, binary);
136 replaced = true;
137
138 out_close:
139 fclose(f);
140 out_free:
141 FREE(buf);
142 free(copy);
143 return replaced;
144
145 file_error:
146 perror("radeonsi: reading shader");
147 goto out_close;
148 }
149
150 /* Parsed IBs are difficult to read without colors. Use "less -R file" to
151 * read them, or use "aha -b -f file" to convert them to html.
152 */
153 #define COLOR_RESET "\033[0m"
154 #define COLOR_RED "\033[31m"
155 #define COLOR_GREEN "\033[1;32m"
156 #define COLOR_YELLOW "\033[1;33m"
157 #define COLOR_CYAN "\033[1;36m"
158
159 static void si_dump_mmapped_reg(struct si_context *sctx, FILE *f,
160 unsigned offset)
161 {
162 struct radeon_winsys *ws = sctx->b.ws;
163 uint32_t value;
164
165 if (ws->read_registers(ws, offset, 1, &value))
166 ac_dump_reg(f, offset, value, ~0);
167 }
168
169 static void si_dump_debug_registers(struct si_context *sctx, FILE *f)
170 {
171 if (sctx->screen->b.info.drm_major == 2 &&
172 sctx->screen->b.info.drm_minor < 42)
173 return; /* no radeon support */
174
175 fprintf(f, "Memory-mapped registers:\n");
176 si_dump_mmapped_reg(sctx, f, R_008010_GRBM_STATUS);
177
178 /* No other registers can be read on DRM < 3.1.0. */
179 if (sctx->screen->b.info.drm_major < 3 ||
180 sctx->screen->b.info.drm_minor < 1) {
181 fprintf(f, "\n");
182 return;
183 }
184
185 si_dump_mmapped_reg(sctx, f, R_008008_GRBM_STATUS2);
186 si_dump_mmapped_reg(sctx, f, R_008014_GRBM_STATUS_SE0);
187 si_dump_mmapped_reg(sctx, f, R_008018_GRBM_STATUS_SE1);
188 si_dump_mmapped_reg(sctx, f, R_008038_GRBM_STATUS_SE2);
189 si_dump_mmapped_reg(sctx, f, R_00803C_GRBM_STATUS_SE3);
190 si_dump_mmapped_reg(sctx, f, R_00D034_SDMA0_STATUS_REG);
191 si_dump_mmapped_reg(sctx, f, R_00D834_SDMA1_STATUS_REG);
192 if (sctx->b.chip_class <= VI) {
193 si_dump_mmapped_reg(sctx, f, R_000E50_SRBM_STATUS);
194 si_dump_mmapped_reg(sctx, f, R_000E4C_SRBM_STATUS2);
195 si_dump_mmapped_reg(sctx, f, R_000E54_SRBM_STATUS3);
196 }
197 si_dump_mmapped_reg(sctx, f, R_008680_CP_STAT);
198 si_dump_mmapped_reg(sctx, f, R_008674_CP_STALLED_STAT1);
199 si_dump_mmapped_reg(sctx, f, R_008678_CP_STALLED_STAT2);
200 si_dump_mmapped_reg(sctx, f, R_008670_CP_STALLED_STAT3);
201 si_dump_mmapped_reg(sctx, f, R_008210_CP_CPC_STATUS);
202 si_dump_mmapped_reg(sctx, f, R_008214_CP_CPC_BUSY_STAT);
203 si_dump_mmapped_reg(sctx, f, R_008218_CP_CPC_STALLED_STAT1);
204 si_dump_mmapped_reg(sctx, f, R_00821C_CP_CPF_STATUS);
205 si_dump_mmapped_reg(sctx, f, R_008220_CP_CPF_BUSY_STAT);
206 si_dump_mmapped_reg(sctx, f, R_008224_CP_CPF_STALLED_STAT1);
207 fprintf(f, "\n");
208 }
209
210 static void si_dump_last_ib(struct si_context *sctx, FILE *f)
211 {
212 int last_trace_id = -1;
213
214 if (!sctx->last_gfx.ib)
215 return;
216
217 if (sctx->last_trace_buf) {
218 /* We are expecting that the ddebug pipe has already
219 * waited for the context, so this buffer should be idle.
220 * If the GPU is hung, there is no point in waiting for it.
221 */
222 uint32_t *map = sctx->b.ws->buffer_map(sctx->last_trace_buf->buf,
223 NULL,
224 PIPE_TRANSFER_UNSYNCHRONIZED |
225 PIPE_TRANSFER_READ);
226 if (map)
227 last_trace_id = *map;
228 }
229
230 if (sctx->init_config)
231 ac_parse_ib(f, sctx->init_config->pm4, sctx->init_config->ndw,
232 -1, "IB2: Init config", sctx->b.chip_class,
233 NULL, NULL);
234
235 if (sctx->init_config_gs_rings)
236 ac_parse_ib(f, sctx->init_config_gs_rings->pm4,
237 sctx->init_config_gs_rings->ndw,
238 -1, "IB2: Init GS rings", sctx->b.chip_class,
239 NULL, NULL);
240
241 ac_parse_ib(f, sctx->last_gfx.ib, sctx->last_gfx.num_dw,
242 last_trace_id, "IB", sctx->b.chip_class,
243 NULL, NULL);
244 }
245
246 static const char *priority_to_string(enum radeon_bo_priority priority)
247 {
248 #define ITEM(x) [RADEON_PRIO_##x] = #x
249 static const char *table[64] = {
250 ITEM(FENCE),
251 ITEM(TRACE),
252 ITEM(SO_FILLED_SIZE),
253 ITEM(QUERY),
254 ITEM(IB1),
255 ITEM(IB2),
256 ITEM(DRAW_INDIRECT),
257 ITEM(INDEX_BUFFER),
258 ITEM(VCE),
259 ITEM(UVD),
260 ITEM(SDMA_BUFFER),
261 ITEM(SDMA_TEXTURE),
262 ITEM(CP_DMA),
263 ITEM(CONST_BUFFER),
264 ITEM(DESCRIPTORS),
265 ITEM(BORDER_COLORS),
266 ITEM(SAMPLER_BUFFER),
267 ITEM(VERTEX_BUFFER),
268 ITEM(SHADER_RW_BUFFER),
269 ITEM(COMPUTE_GLOBAL),
270 ITEM(SAMPLER_TEXTURE),
271 ITEM(SHADER_RW_IMAGE),
272 ITEM(SAMPLER_TEXTURE_MSAA),
273 ITEM(COLOR_BUFFER),
274 ITEM(DEPTH_BUFFER),
275 ITEM(COLOR_BUFFER_MSAA),
276 ITEM(DEPTH_BUFFER_MSAA),
277 ITEM(CMASK),
278 ITEM(DCC),
279 ITEM(HTILE),
280 ITEM(SHADER_BINARY),
281 ITEM(SHADER_RINGS),
282 ITEM(SCRATCH_BUFFER),
283 };
284 #undef ITEM
285
286 assert(priority < ARRAY_SIZE(table));
287 return table[priority];
288 }
289
290 static int bo_list_compare_va(const struct radeon_bo_list_item *a,
291 const struct radeon_bo_list_item *b)
292 {
293 return a->vm_address < b->vm_address ? -1 :
294 a->vm_address > b->vm_address ? 1 : 0;
295 }
296
297 static void si_dump_bo_list(struct si_context *sctx,
298 const struct radeon_saved_cs *saved, FILE *f)
299 {
300 unsigned i,j;
301
302 if (!saved->bo_list)
303 return;
304
305 /* Sort the list according to VM adddresses first. */
306 qsort(saved->bo_list, saved->bo_count,
307 sizeof(saved->bo_list[0]), (void*)bo_list_compare_va);
308
309 fprintf(f, "Buffer list (in units of pages = 4kB):\n"
310 COLOR_YELLOW " Size VM start page "
311 "VM end page Usage" COLOR_RESET "\n");
312
313 for (i = 0; i < saved->bo_count; i++) {
314 /* Note: Buffer sizes are expected to be aligned to 4k by the winsys. */
315 const unsigned page_size = sctx->b.screen->info.gart_page_size;
316 uint64_t va = saved->bo_list[i].vm_address;
317 uint64_t size = saved->bo_list[i].bo_size;
318 bool hit = false;
319
320 /* If there's unused virtual memory between 2 buffers, print it. */
321 if (i) {
322 uint64_t previous_va_end = saved->bo_list[i-1].vm_address +
323 saved->bo_list[i-1].bo_size;
324
325 if (va > previous_va_end) {
326 fprintf(f, " %10"PRIu64" -- hole --\n",
327 (va - previous_va_end) / page_size);
328 }
329 }
330
331 /* Print the buffer. */
332 fprintf(f, " %10"PRIu64" 0x%013"PRIX64" 0x%013"PRIX64" ",
333 size / page_size, va / page_size, (va + size) / page_size);
334
335 /* Print the usage. */
336 for (j = 0; j < 64; j++) {
337 if (!(saved->bo_list[i].priority_usage & (1llu << j)))
338 continue;
339
340 fprintf(f, "%s%s", !hit ? "" : ", ", priority_to_string(j));
341 hit = true;
342 }
343 fprintf(f, "\n");
344 }
345 fprintf(f, "\nNote: The holes represent memory not used by the IB.\n"
346 " Other buffers can still be allocated there.\n\n");
347 }
348
349 static void si_dump_framebuffer(struct si_context *sctx, FILE *f)
350 {
351 struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
352 struct r600_texture *rtex;
353 int i;
354
355 for (i = 0; i < state->nr_cbufs; i++) {
356 if (!state->cbufs[i])
357 continue;
358
359 rtex = (struct r600_texture*)state->cbufs[i]->texture;
360 fprintf(f, COLOR_YELLOW "Color buffer %i:" COLOR_RESET "\n", i);
361 r600_print_texture_info(sctx->b.screen, rtex, f);
362 fprintf(f, "\n");
363 }
364
365 if (state->zsbuf) {
366 rtex = (struct r600_texture*)state->zsbuf->texture;
367 fprintf(f, COLOR_YELLOW "Depth-stencil buffer:" COLOR_RESET "\n");
368 r600_print_texture_info(sctx->b.screen, rtex, f);
369 fprintf(f, "\n");
370 }
371 }
372
373 static void si_dump_descriptor_list(struct si_descriptors *desc,
374 const char *shader_name,
375 const char *elem_name,
376 unsigned num_elements,
377 FILE *f)
378 {
379 unsigned i, j;
380 uint32_t *cpu_list = desc->list;
381 uint32_t *gpu_list = desc->gpu_list;
382 const char *list_note = "GPU list";
383
384 if (!gpu_list) {
385 gpu_list = cpu_list;
386 list_note = "CPU list";
387 }
388
389 for (i = 0; i < num_elements; i++) {
390 fprintf(f, COLOR_GREEN "%s%s slot %u (%s):" COLOR_RESET "\n",
391 shader_name, elem_name, i, list_note);
392
393 switch (desc->element_dw_size) {
394 case 4:
395 for (j = 0; j < 4; j++)
396 ac_dump_reg(f, R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
397 gpu_list[j], 0xffffffff);
398 break;
399 case 8:
400 for (j = 0; j < 8; j++)
401 ac_dump_reg(f, R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
402 gpu_list[j], 0xffffffff);
403
404 fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
405 for (j = 0; j < 4; j++)
406 ac_dump_reg(f, R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
407 gpu_list[4+j], 0xffffffff);
408 break;
409 case 16:
410 for (j = 0; j < 8; j++)
411 ac_dump_reg(f, R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
412 gpu_list[j], 0xffffffff);
413
414 fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
415 for (j = 0; j < 4; j++)
416 ac_dump_reg(f, R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
417 gpu_list[4+j], 0xffffffff);
418
419 fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n");
420 for (j = 0; j < 8; j++)
421 ac_dump_reg(f, R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
422 gpu_list[8+j], 0xffffffff);
423
424 fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n");
425 for (j = 0; j < 4; j++)
426 ac_dump_reg(f, R_008F30_SQ_IMG_SAMP_WORD0 + j*4,
427 gpu_list[12+j], 0xffffffff);
428 break;
429 }
430
431 if (memcmp(gpu_list, cpu_list, desc->element_dw_size * 4) != 0) {
432 fprintf(f, COLOR_RED "!!!!! This slot was corrupted in GPU memory !!!!!"
433 COLOR_RESET "\n");
434 }
435
436 fprintf(f, "\n");
437 gpu_list += desc->element_dw_size;
438 cpu_list += desc->element_dw_size;
439 }
440 }
441
442 static void si_dump_descriptors(struct si_context *sctx,
443 enum pipe_shader_type processor,
444 const struct tgsi_shader_info *info, FILE *f)
445 {
446 struct si_descriptors *descs =
447 &sctx->descriptors[SI_DESCS_FIRST_SHADER +
448 processor * SI_NUM_SHADER_DESCS];
449 static const char *shader_name[] = {"VS", "PS", "GS", "TCS", "TES", "CS"};
450
451 static const char *elem_name[] = {
452 " - Constant buffer",
453 " - Shader buffer",
454 " - Sampler",
455 " - Image",
456 };
457 unsigned num_elements[] = {
458 util_last_bit(info->const_buffers_declared),
459 util_last_bit(info->shader_buffers_declared),
460 util_last_bit(info->samplers_declared),
461 util_last_bit(info->images_declared),
462 };
463
464 if (processor == PIPE_SHADER_VERTEX) {
465 si_dump_descriptor_list(&sctx->vertex_buffers, shader_name[processor],
466 " - Vertex buffer", info->num_inputs, f);
467 }
468
469 for (unsigned i = 0; i < SI_NUM_SHADER_DESCS; ++i, ++descs)
470 si_dump_descriptor_list(descs, shader_name[processor], elem_name[i],
471 num_elements[i], f);
472 }
473
474 static void si_dump_gfx_descriptors(struct si_context *sctx,
475 const struct si_shader_ctx_state *state,
476 FILE *f)
477 {
478 if (!state->cso || !state->current)
479 return;
480
481 si_dump_descriptors(sctx, state->cso->type, &state->cso->info, f);
482 }
483
484 struct si_shader_inst {
485 char text[160]; /* one disasm line */
486 unsigned offset; /* instruction offset */
487 unsigned size; /* instruction size = 4 or 8 */
488 };
489
490 /* Split a disassembly string into lines and add them to the array pointed
491 * to by "instructions". */
492 static void si_add_split_disasm(const char *disasm,
493 uint64_t start_addr,
494 unsigned *num,
495 struct si_shader_inst *instructions)
496 {
497 struct si_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
498 char *next;
499
500 while ((next = strchr(disasm, '\n'))) {
501 struct si_shader_inst *inst = &instructions[*num];
502 unsigned len = next - disasm;
503
504 assert(len < ARRAY_SIZE(inst->text));
505 memcpy(inst->text, disasm, len);
506 inst->text[len] = 0;
507 inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
508
509 const char *semicolon = strchr(disasm, ';');
510 assert(semicolon);
511 /* More than 16 chars after ";" means the instruction is 8 bytes long. */
512 inst->size = next - semicolon > 16 ? 8 : 4;
513
514 snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len,
515 " [PC=0x%"PRIx64", off=%u, size=%u]",
516 start_addr + inst->offset, inst->offset, inst->size);
517
518 last_inst = inst;
519 (*num)++;
520 disasm = next + 1;
521 }
522 }
523
524 #define MAX_WAVES_PER_CHIP (64 * 40)
525
526 struct si_wave_info {
527 unsigned se; /* shader engine */
528 unsigned sh; /* shader array */
529 unsigned cu; /* compute unit */
530 unsigned simd;
531 unsigned wave;
532 uint32_t status;
533 uint64_t pc; /* program counter */
534 uint32_t inst_dw0;
535 uint32_t inst_dw1;
536 uint64_t exec;
537 bool matched; /* whether the wave is used by a currently-bound shader */
538 };
539
540 static int compare_wave(const void *p1, const void *p2)
541 {
542 struct si_wave_info *w1 = (struct si_wave_info *)p1;
543 struct si_wave_info *w2 = (struct si_wave_info *)p2;
544
545 /* Sort waves according to PC and then SE, SH, CU, etc. */
546 if (w1->pc < w2->pc)
547 return -1;
548 if (w1->pc > w2->pc)
549 return 1;
550 if (w1->se < w2->se)
551 return -1;
552 if (w1->se > w2->se)
553 return 1;
554 if (w1->sh < w2->sh)
555 return -1;
556 if (w1->sh > w2->sh)
557 return 1;
558 if (w1->cu < w2->cu)
559 return -1;
560 if (w1->cu > w2->cu)
561 return 1;
562 if (w1->simd < w2->simd)
563 return -1;
564 if (w1->simd > w2->simd)
565 return 1;
566 if (w1->wave < w2->wave)
567 return -1;
568 if (w1->wave > w2->wave)
569 return 1;
570
571 return 0;
572 }
573
574 /* Return wave information. "waves" should be a large enough array. */
575 static unsigned si_get_wave_info(struct si_wave_info waves[MAX_WAVES_PER_CHIP])
576 {
577 char line[2000];
578 unsigned num_waves = 0;
579
580 FILE *p = popen("umr -wa", "r");
581 if (!p)
582 return 0;
583
584 if (!fgets(line, sizeof(line), p) ||
585 strncmp(line, "SE", 2) != 0) {
586 pclose(p);
587 return 0;
588 }
589
590 while (fgets(line, sizeof(line), p)) {
591 struct si_wave_info *w;
592 uint32_t pc_hi, pc_lo, exec_hi, exec_lo;
593
594 assert(num_waves < MAX_WAVES_PER_CHIP);
595 w = &waves[num_waves];
596
597 if (sscanf(line, "%u %u %u %u %u %x %x %x %x %x %x %x",
598 &w->se, &w->sh, &w->cu, &w->simd, &w->wave,
599 &w->status, &pc_hi, &pc_lo, &w->inst_dw0,
600 &w->inst_dw1, &exec_hi, &exec_lo) == 12) {
601 w->pc = ((uint64_t)pc_hi << 32) | pc_lo;
602 w->exec = ((uint64_t)exec_hi << 32) | exec_lo;
603 w->matched = false;
604 num_waves++;
605 }
606 }
607
608 qsort(waves, num_waves, sizeof(struct si_wave_info), compare_wave);
609
610 pclose(p);
611 return num_waves;
612 }
613
614 /* If the shader is being executed, print its asm instructions, and annotate
615 * those that are being executed right now with information about waves that
616 * execute them. This is most useful during a GPU hang.
617 */
618 static void si_print_annotated_shader(struct si_shader *shader,
619 struct si_wave_info *waves,
620 unsigned num_waves,
621 FILE *f)
622 {
623 if (!shader || !shader->binary.disasm_string)
624 return;
625
626 uint64_t start_addr = shader->bo->gpu_address;
627 uint64_t end_addr = start_addr + shader->bo->b.b.width0;
628 unsigned i;
629
630 /* See if any wave executes the shader. */
631 for (i = 0; i < num_waves; i++) {
632 if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
633 break;
634 }
635 if (i == num_waves)
636 return; /* the shader is not being executed */
637
638 /* Remember the first found wave. The waves are sorted according to PC. */
639 waves = &waves[i];
640 num_waves -= i;
641
642 /* Get the list of instructions.
643 * Buffer size / 4 is the upper bound of the instruction count.
644 */
645 unsigned num_inst = 0;
646 struct si_shader_inst *instructions =
647 calloc(shader->bo->b.b.width0 / 4, sizeof(struct si_shader_inst));
648
649 if (shader->prolog) {
650 si_add_split_disasm(shader->prolog->binary.disasm_string,
651 start_addr, &num_inst, instructions);
652 }
653 if (shader->previous_stage) {
654 si_add_split_disasm(shader->previous_stage->binary.disasm_string,
655 start_addr, &num_inst, instructions);
656 }
657 if (shader->prolog2) {
658 si_add_split_disasm(shader->prolog2->binary.disasm_string,
659 start_addr, &num_inst, instructions);
660 }
661 si_add_split_disasm(shader->binary.disasm_string,
662 start_addr, &num_inst, instructions);
663 if (shader->epilog) {
664 si_add_split_disasm(shader->epilog->binary.disasm_string,
665 start_addr, &num_inst, instructions);
666 }
667
668 fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
669 si_get_shader_name(shader, shader->selector->type));
670
671 /* Print instructions with annotations. */
672 for (i = 0; i < num_inst; i++) {
673 struct si_shader_inst *inst = &instructions[i];
674
675 fprintf(f, "%s\n", inst->text);
676
677 /* Print which waves execute the instruction right now. */
678 while (num_waves && start_addr + inst->offset == waves->pc) {
679 fprintf(f,
680 " " COLOR_GREEN "^ SE%u SH%u CU%u "
681 "SIMD%u WAVE%u EXEC=%016"PRIx64 " ",
682 waves->se, waves->sh, waves->cu, waves->simd,
683 waves->wave, waves->exec);
684
685 if (inst->size == 4) {
686 fprintf(f, "INST32=%08X" COLOR_RESET "\n",
687 waves->inst_dw0);
688 } else {
689 fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n",
690 waves->inst_dw0, waves->inst_dw1);
691 }
692
693 waves->matched = true;
694 waves = &waves[1];
695 num_waves--;
696 }
697 }
698
699 fprintf(f, "\n\n");
700 free(instructions);
701 }
702
703 static void si_dump_annotated_shaders(struct si_context *sctx, FILE *f)
704 {
705 struct si_wave_info waves[MAX_WAVES_PER_CHIP];
706 unsigned num_waves = si_get_wave_info(waves);
707
708 fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET
709 "\n\n", num_waves);
710
711 si_print_annotated_shader(sctx->vs_shader.current, waves, num_waves, f);
712 si_print_annotated_shader(sctx->tcs_shader.current, waves, num_waves, f);
713 si_print_annotated_shader(sctx->tes_shader.current, waves, num_waves, f);
714 si_print_annotated_shader(sctx->gs_shader.current, waves, num_waves, f);
715 si_print_annotated_shader(sctx->ps_shader.current, waves, num_waves, f);
716
717 /* Print waves executing shaders that are not currently bound. */
718 unsigned i;
719 bool found = false;
720 for (i = 0; i < num_waves; i++) {
721 if (waves[i].matched)
722 continue;
723
724 if (!found) {
725 fprintf(f, COLOR_CYAN
726 "Waves not executing currently-bound shaders:"
727 COLOR_RESET "\n");
728 found = true;
729 }
730 fprintf(f, " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016"PRIx64
731 " INST=%08X %08X PC=%"PRIx64"\n",
732 waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd,
733 waves[i].wave, waves[i].exec, waves[i].inst_dw0,
734 waves[i].inst_dw1, waves[i].pc);
735 }
736 if (found)
737 fprintf(f, "\n\n");
738 }
739
740 static void si_dump_command(const char *title, const char *command, FILE *f)
741 {
742 char line[2000];
743
744 FILE *p = popen(command, "r");
745 if (!p)
746 return;
747
748 fprintf(f, COLOR_YELLOW "%s: " COLOR_RESET "\n", title);
749 while (fgets(line, sizeof(line), p))
750 fputs(line, f);
751 fprintf(f, "\n\n");
752 pclose(p);
753 }
754
755 static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
756 unsigned flags)
757 {
758 struct si_context *sctx = (struct si_context*)ctx;
759
760 if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS)
761 si_dump_debug_registers(sctx, f);
762
763 if (flags & PIPE_DUMP_CURRENT_STATES)
764 si_dump_framebuffer(sctx, f);
765
766 if (flags & PIPE_DUMP_CURRENT_SHADERS) {
767 si_dump_gfx_shader(sctx->screen, &sctx->vs_shader, f);
768 si_dump_gfx_shader(sctx->screen, &sctx->tcs_shader, f);
769 si_dump_gfx_shader(sctx->screen, &sctx->tes_shader, f);
770 si_dump_gfx_shader(sctx->screen, &sctx->gs_shader, f);
771 si_dump_gfx_shader(sctx->screen, &sctx->ps_shader, f);
772
773 if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS) {
774 si_dump_annotated_shaders(sctx, f);
775 si_dump_command("Active waves (raw data)", "umr -wa | column -t", f);
776 si_dump_command("Wave information", "umr -O bits -wa", f);
777 }
778
779 si_dump_descriptor_list(&sctx->descriptors[SI_DESCS_RW_BUFFERS],
780 "", "RW buffers", SI_NUM_RW_BUFFERS, f);
781 si_dump_gfx_descriptors(sctx, &sctx->vs_shader, f);
782 si_dump_gfx_descriptors(sctx, &sctx->tcs_shader, f);
783 si_dump_gfx_descriptors(sctx, &sctx->tes_shader, f);
784 si_dump_gfx_descriptors(sctx, &sctx->gs_shader, f);
785 si_dump_gfx_descriptors(sctx, &sctx->ps_shader, f);
786 }
787
788 if (flags & PIPE_DUMP_LAST_COMMAND_BUFFER) {
789 si_dump_bo_list(sctx, &sctx->last_gfx, f);
790 si_dump_last_ib(sctx, f);
791
792 fprintf(f, "Done.\n");
793
794 /* dump only once */
795 radeon_clear_saved_cs(&sctx->last_gfx);
796 r600_resource_reference(&sctx->last_trace_buf, NULL);
797 }
798 }
799
800 static void si_dump_dma(struct si_context *sctx,
801 struct radeon_saved_cs *saved, FILE *f)
802 {
803 static const char ib_name[] = "sDMA IB";
804 unsigned i;
805
806 si_dump_bo_list(sctx, saved, f);
807
808 fprintf(f, "------------------ %s begin ------------------\n", ib_name);
809
810 for (i = 0; i < saved->num_dw; ++i) {
811 fprintf(f, " %08x\n", saved->ib[i]);
812 }
813
814 fprintf(f, "------------------- %s end -------------------\n", ib_name);
815 fprintf(f, "\n");
816
817 fprintf(f, "SDMA Dump Done.\n");
818 }
819
820 static bool si_vm_fault_occured(struct si_context *sctx, uint32_t *out_addr)
821 {
822 char line[2000];
823 unsigned sec, usec;
824 int progress = 0;
825 uint64_t timestamp = 0;
826 bool fault = false;
827
828 FILE *p = popen("dmesg", "r");
829 if (!p)
830 return false;
831
832 while (fgets(line, sizeof(line), p)) {
833 char *msg, len;
834
835 if (!line[0] || line[0] == '\n')
836 continue;
837
838 /* Get the timestamp. */
839 if (sscanf(line, "[%u.%u]", &sec, &usec) != 2) {
840 static bool hit = false;
841 if (!hit) {
842 fprintf(stderr, "%s: failed to parse line '%s'\n",
843 __func__, line);
844 hit = true;
845 }
846 continue;
847 }
848 timestamp = sec * 1000000llu + usec;
849
850 /* If just updating the timestamp. */
851 if (!out_addr)
852 continue;
853
854 /* Process messages only if the timestamp is newer. */
855 if (timestamp <= sctx->dmesg_timestamp)
856 continue;
857
858 /* Only process the first VM fault. */
859 if (fault)
860 continue;
861
862 /* Remove trailing \n */
863 len = strlen(line);
864 if (len && line[len-1] == '\n')
865 line[len-1] = 0;
866
867 /* Get the message part. */
868 msg = strchr(line, ']');
869 if (!msg) {
870 assert(0);
871 continue;
872 }
873 msg++;
874
875 switch (progress) {
876 case 0:
877 if (strstr(msg, "GPU fault detected:"))
878 progress = 1;
879 break;
880 case 1:
881 msg = strstr(msg, "VM_CONTEXT1_PROTECTION_FAULT_ADDR");
882 if (msg) {
883 msg = strstr(msg, "0x");
884 if (msg) {
885 msg += 2;
886 if (sscanf(msg, "%X", out_addr) == 1)
887 fault = true;
888 }
889 }
890 progress = 0;
891 break;
892 default:
893 progress = 0;
894 }
895 }
896 pclose(p);
897
898 if (timestamp > sctx->dmesg_timestamp)
899 sctx->dmesg_timestamp = timestamp;
900 return fault;
901 }
902
903 void si_check_vm_faults(struct r600_common_context *ctx,
904 struct radeon_saved_cs *saved, enum ring_type ring)
905 {
906 struct si_context *sctx = (struct si_context *)ctx;
907 struct pipe_screen *screen = sctx->b.b.screen;
908 FILE *f;
909 uint32_t addr;
910 char cmd_line[4096];
911
912 if (!si_vm_fault_occured(sctx, &addr))
913 return;
914
915 f = dd_get_debug_file(false);
916 if (!f)
917 return;
918
919 fprintf(f, "VM fault report.\n\n");
920 if (os_get_command_line(cmd_line, sizeof(cmd_line)))
921 fprintf(f, "Command: %s\n", cmd_line);
922 fprintf(f, "Driver vendor: %s\n", screen->get_vendor(screen));
923 fprintf(f, "Device vendor: %s\n", screen->get_device_vendor(screen));
924 fprintf(f, "Device name: %s\n\n", screen->get_name(screen));
925 fprintf(f, "Failing VM page: 0x%08x\n\n", addr);
926
927 if (sctx->apitrace_call_number)
928 fprintf(f, "Last apitrace call: %u\n\n",
929 sctx->apitrace_call_number);
930
931 switch (ring) {
932 case RING_GFX:
933 si_dump_debug_state(&sctx->b.b, f,
934 PIPE_DUMP_CURRENT_STATES |
935 PIPE_DUMP_CURRENT_SHADERS |
936 PIPE_DUMP_LAST_COMMAND_BUFFER);
937 break;
938
939 case RING_DMA:
940 si_dump_dma(sctx, saved, f);
941 break;
942
943 default:
944 break;
945 }
946
947 fclose(f);
948
949 fprintf(stderr, "Detected a VM fault, exiting...\n");
950 exit(0);
951 }
952
953 void si_init_debug_functions(struct si_context *sctx)
954 {
955 sctx->b.b.dump_debug_state = si_dump_debug_state;
956 sctx->b.check_vm_faults = si_check_vm_faults;
957
958 /* Set the initial dmesg timestamp for this context, so that
959 * only new messages will be checked for VM faults.
960 */
961 if (sctx->screen->b.debug_flags & DBG_CHECK_VM)
962 si_vm_fault_occured(sctx, NULL);
963 }