radeonsi: s/uint/enum pipe_shader_type/
[mesa.git] / src / gallium / drivers / radeonsi / si_debug.c
1 /*
2 * Copyright 2015 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Marek Olšák <maraeo@gmail.com>
25 */
26
27 #include "si_pipe.h"
28 #include "sid.h"
29 #include "sid_tables.h"
30 #include "ddebug/dd_util.h"
31 #include "util/u_memory.h"
32 #include "ac_debug.h"
33
34 DEBUG_GET_ONCE_OPTION(replace_shaders, "RADEON_REPLACE_SHADERS", NULL)
35
36 static void si_dump_shader(struct si_screen *sscreen,
37 struct si_shader_ctx_state *state, FILE *f)
38 {
39 struct si_shader *current = state->current;
40
41 if (!state->cso || !current)
42 return;
43
44 if (current->shader_log)
45 fwrite(current->shader_log, current->shader_log_size, 1, f);
46 else
47 si_shader_dump(sscreen, state->current, NULL,
48 state->cso->info.processor, f, false);
49 }
50
51 /**
52 * Shader compiles can be overridden with arbitrary ELF objects by setting
53 * the environment variable RADEON_REPLACE_SHADERS=num1:filename1[;num2:filename2]
54 */
55 bool si_replace_shader(unsigned num, struct ac_shader_binary *binary)
56 {
57 const char *p = debug_get_option_replace_shaders();
58 const char *semicolon;
59 char *copy = NULL;
60 FILE *f;
61 long filesize, nread;
62 char *buf = NULL;
63 bool replaced = false;
64
65 if (!p)
66 return false;
67
68 while (*p) {
69 unsigned long i;
70 char *endp;
71 i = strtoul(p, &endp, 0);
72
73 p = endp;
74 if (*p != ':') {
75 fprintf(stderr, "RADEON_REPLACE_SHADERS formatted badly.\n");
76 exit(1);
77 }
78 ++p;
79
80 if (i == num)
81 break;
82
83 p = strchr(p, ';');
84 if (!p)
85 return false;
86 ++p;
87 }
88 if (!*p)
89 return false;
90
91 semicolon = strchr(p, ';');
92 if (semicolon) {
93 p = copy = strndup(p, semicolon - p);
94 if (!copy) {
95 fprintf(stderr, "out of memory\n");
96 return false;
97 }
98 }
99
100 fprintf(stderr, "radeonsi: replace shader %u by %s\n", num, p);
101
102 f = fopen(p, "r");
103 if (!f) {
104 perror("radeonsi: failed to open file");
105 goto out_free;
106 }
107
108 if (fseek(f, 0, SEEK_END) != 0)
109 goto file_error;
110
111 filesize = ftell(f);
112 if (filesize < 0)
113 goto file_error;
114
115 if (fseek(f, 0, SEEK_SET) != 0)
116 goto file_error;
117
118 buf = MALLOC(filesize);
119 if (!buf) {
120 fprintf(stderr, "out of memory\n");
121 goto out_close;
122 }
123
124 nread = fread(buf, 1, filesize, f);
125 if (nread != filesize)
126 goto file_error;
127
128 ac_elf_read(buf, filesize, binary);
129 replaced = true;
130
131 out_close:
132 fclose(f);
133 out_free:
134 FREE(buf);
135 free(copy);
136 return replaced;
137
138 file_error:
139 perror("radeonsi: reading shader");
140 goto out_close;
141 }
142
143 /* Parsed IBs are difficult to read without colors. Use "less -R file" to
144 * read them, or use "aha -b -f file" to convert them to html.
145 */
146 #define COLOR_RESET "\033[0m"
147 #define COLOR_RED "\033[31m"
148 #define COLOR_GREEN "\033[1;32m"
149 #define COLOR_YELLOW "\033[1;33m"
150 #define COLOR_CYAN "\033[1;36m"
151
152 static void si_dump_mmapped_reg(struct si_context *sctx, FILE *f,
153 unsigned offset)
154 {
155 struct radeon_winsys *ws = sctx->b.ws;
156 uint32_t value;
157
158 if (ws->read_registers(ws, offset, 1, &value))
159 ac_dump_reg(f, offset, value, ~0);
160 }
161
162 static void si_dump_debug_registers(struct si_context *sctx, FILE *f)
163 {
164 if (sctx->screen->b.info.drm_major == 2 &&
165 sctx->screen->b.info.drm_minor < 42)
166 return; /* no radeon support */
167
168 fprintf(f, "Memory-mapped registers:\n");
169 si_dump_mmapped_reg(sctx, f, R_008010_GRBM_STATUS);
170
171 /* No other registers can be read on DRM < 3.1.0. */
172 if (sctx->screen->b.info.drm_major < 3 ||
173 sctx->screen->b.info.drm_minor < 1) {
174 fprintf(f, "\n");
175 return;
176 }
177
178 si_dump_mmapped_reg(sctx, f, R_008008_GRBM_STATUS2);
179 si_dump_mmapped_reg(sctx, f, R_008014_GRBM_STATUS_SE0);
180 si_dump_mmapped_reg(sctx, f, R_008018_GRBM_STATUS_SE1);
181 si_dump_mmapped_reg(sctx, f, R_008038_GRBM_STATUS_SE2);
182 si_dump_mmapped_reg(sctx, f, R_00803C_GRBM_STATUS_SE3);
183 si_dump_mmapped_reg(sctx, f, R_00D034_SDMA0_STATUS_REG);
184 si_dump_mmapped_reg(sctx, f, R_00D834_SDMA1_STATUS_REG);
185 si_dump_mmapped_reg(sctx, f, R_000E50_SRBM_STATUS);
186 si_dump_mmapped_reg(sctx, f, R_000E4C_SRBM_STATUS2);
187 si_dump_mmapped_reg(sctx, f, R_000E54_SRBM_STATUS3);
188 si_dump_mmapped_reg(sctx, f, R_008680_CP_STAT);
189 si_dump_mmapped_reg(sctx, f, R_008674_CP_STALLED_STAT1);
190 si_dump_mmapped_reg(sctx, f, R_008678_CP_STALLED_STAT2);
191 si_dump_mmapped_reg(sctx, f, R_008670_CP_STALLED_STAT3);
192 si_dump_mmapped_reg(sctx, f, R_008210_CP_CPC_STATUS);
193 si_dump_mmapped_reg(sctx, f, R_008214_CP_CPC_BUSY_STAT);
194 si_dump_mmapped_reg(sctx, f, R_008218_CP_CPC_STALLED_STAT1);
195 si_dump_mmapped_reg(sctx, f, R_00821C_CP_CPF_STATUS);
196 si_dump_mmapped_reg(sctx, f, R_008220_CP_CPF_BUSY_STAT);
197 si_dump_mmapped_reg(sctx, f, R_008224_CP_CPF_STALLED_STAT1);
198 fprintf(f, "\n");
199 }
200
201 static void si_dump_last_ib(struct si_context *sctx, FILE *f)
202 {
203 int last_trace_id = -1;
204
205 if (!sctx->last_gfx.ib)
206 return;
207
208 if (sctx->last_trace_buf) {
209 /* We are expecting that the ddebug pipe has already
210 * waited for the context, so this buffer should be idle.
211 * If the GPU is hung, there is no point in waiting for it.
212 */
213 uint32_t *map = sctx->b.ws->buffer_map(sctx->last_trace_buf->buf,
214 NULL,
215 PIPE_TRANSFER_UNSYNCHRONIZED |
216 PIPE_TRANSFER_READ);
217 if (map)
218 last_trace_id = *map;
219 }
220
221 if (sctx->init_config)
222 ac_parse_ib(f, sctx->init_config->pm4, sctx->init_config->ndw,
223 -1, "IB2: Init config", sctx->b.chip_class,
224 NULL, NULL);
225
226 if (sctx->init_config_gs_rings)
227 ac_parse_ib(f, sctx->init_config_gs_rings->pm4,
228 sctx->init_config_gs_rings->ndw,
229 -1, "IB2: Init GS rings", sctx->b.chip_class,
230 NULL, NULL);
231
232 ac_parse_ib(f, sctx->last_gfx.ib, sctx->last_gfx.num_dw,
233 last_trace_id, "IB", sctx->b.chip_class,
234 NULL, NULL);
235 }
236
237 static const char *priority_to_string(enum radeon_bo_priority priority)
238 {
239 #define ITEM(x) [RADEON_PRIO_##x] = #x
240 static const char *table[64] = {
241 ITEM(FENCE),
242 ITEM(TRACE),
243 ITEM(SO_FILLED_SIZE),
244 ITEM(QUERY),
245 ITEM(IB1),
246 ITEM(IB2),
247 ITEM(DRAW_INDIRECT),
248 ITEM(INDEX_BUFFER),
249 ITEM(VCE),
250 ITEM(UVD),
251 ITEM(SDMA_BUFFER),
252 ITEM(SDMA_TEXTURE),
253 ITEM(CP_DMA),
254 ITEM(CONST_BUFFER),
255 ITEM(DESCRIPTORS),
256 ITEM(BORDER_COLORS),
257 ITEM(SAMPLER_BUFFER),
258 ITEM(VERTEX_BUFFER),
259 ITEM(SHADER_RW_BUFFER),
260 ITEM(COMPUTE_GLOBAL),
261 ITEM(SAMPLER_TEXTURE),
262 ITEM(SHADER_RW_IMAGE),
263 ITEM(SAMPLER_TEXTURE_MSAA),
264 ITEM(COLOR_BUFFER),
265 ITEM(DEPTH_BUFFER),
266 ITEM(COLOR_BUFFER_MSAA),
267 ITEM(DEPTH_BUFFER_MSAA),
268 ITEM(CMASK),
269 ITEM(DCC),
270 ITEM(HTILE),
271 ITEM(SHADER_BINARY),
272 ITEM(SHADER_RINGS),
273 ITEM(SCRATCH_BUFFER),
274 };
275 #undef ITEM
276
277 assert(priority < ARRAY_SIZE(table));
278 return table[priority];
279 }
280
281 static int bo_list_compare_va(const struct radeon_bo_list_item *a,
282 const struct radeon_bo_list_item *b)
283 {
284 return a->vm_address < b->vm_address ? -1 :
285 a->vm_address > b->vm_address ? 1 : 0;
286 }
287
288 static void si_dump_bo_list(struct si_context *sctx,
289 const struct radeon_saved_cs *saved, FILE *f)
290 {
291 unsigned i,j;
292
293 if (!saved->bo_list)
294 return;
295
296 /* Sort the list according to VM adddresses first. */
297 qsort(saved->bo_list, saved->bo_count,
298 sizeof(saved->bo_list[0]), (void*)bo_list_compare_va);
299
300 fprintf(f, "Buffer list (in units of pages = 4kB):\n"
301 COLOR_YELLOW " Size VM start page "
302 "VM end page Usage" COLOR_RESET "\n");
303
304 for (i = 0; i < saved->bo_count; i++) {
305 /* Note: Buffer sizes are expected to be aligned to 4k by the winsys. */
306 const unsigned page_size = sctx->b.screen->info.gart_page_size;
307 uint64_t va = saved->bo_list[i].vm_address;
308 uint64_t size = saved->bo_list[i].bo_size;
309 bool hit = false;
310
311 /* If there's unused virtual memory between 2 buffers, print it. */
312 if (i) {
313 uint64_t previous_va_end = saved->bo_list[i-1].vm_address +
314 saved->bo_list[i-1].bo_size;
315
316 if (va > previous_va_end) {
317 fprintf(f, " %10"PRIu64" -- hole --\n",
318 (va - previous_va_end) / page_size);
319 }
320 }
321
322 /* Print the buffer. */
323 fprintf(f, " %10"PRIu64" 0x%013"PRIX64" 0x%013"PRIX64" ",
324 size / page_size, va / page_size, (va + size) / page_size);
325
326 /* Print the usage. */
327 for (j = 0; j < 64; j++) {
328 if (!(saved->bo_list[i].priority_usage & (1llu << j)))
329 continue;
330
331 fprintf(f, "%s%s", !hit ? "" : ", ", priority_to_string(j));
332 hit = true;
333 }
334 fprintf(f, "\n");
335 }
336 fprintf(f, "\nNote: The holes represent memory not used by the IB.\n"
337 " Other buffers can still be allocated there.\n\n");
338 }
339
340 static void si_dump_framebuffer(struct si_context *sctx, FILE *f)
341 {
342 struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
343 struct r600_texture *rtex;
344 int i;
345
346 for (i = 0; i < state->nr_cbufs; i++) {
347 if (!state->cbufs[i])
348 continue;
349
350 rtex = (struct r600_texture*)state->cbufs[i]->texture;
351 fprintf(f, COLOR_YELLOW "Color buffer %i:" COLOR_RESET "\n", i);
352 r600_print_texture_info(rtex, f);
353 fprintf(f, "\n");
354 }
355
356 if (state->zsbuf) {
357 rtex = (struct r600_texture*)state->zsbuf->texture;
358 fprintf(f, COLOR_YELLOW "Depth-stencil buffer:" COLOR_RESET "\n");
359 r600_print_texture_info(rtex, f);
360 fprintf(f, "\n");
361 }
362 }
363
364 static void si_dump_descriptor_list(struct si_descriptors *desc,
365 const char *shader_name,
366 const char *elem_name,
367 unsigned num_elements,
368 FILE *f)
369 {
370 unsigned i, j;
371 uint32_t *cpu_list = desc->list;
372 uint32_t *gpu_list = desc->gpu_list;
373 const char *list_note = "GPU list";
374
375 if (!gpu_list) {
376 gpu_list = cpu_list;
377 list_note = "CPU list";
378 }
379
380 for (i = 0; i < num_elements; i++) {
381 fprintf(f, COLOR_GREEN "%s%s slot %u (%s):" COLOR_RESET "\n",
382 shader_name, elem_name, i, list_note);
383
384 switch (desc->element_dw_size) {
385 case 4:
386 for (j = 0; j < 4; j++)
387 ac_dump_reg(f, R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
388 gpu_list[j], 0xffffffff);
389 break;
390 case 8:
391 for (j = 0; j < 8; j++)
392 ac_dump_reg(f, R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
393 gpu_list[j], 0xffffffff);
394
395 fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
396 for (j = 0; j < 4; j++)
397 ac_dump_reg(f, R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
398 gpu_list[4+j], 0xffffffff);
399 break;
400 case 16:
401 for (j = 0; j < 8; j++)
402 ac_dump_reg(f, R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
403 gpu_list[j], 0xffffffff);
404
405 fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
406 for (j = 0; j < 4; j++)
407 ac_dump_reg(f, R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
408 gpu_list[4+j], 0xffffffff);
409
410 fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n");
411 for (j = 0; j < 8; j++)
412 ac_dump_reg(f, R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
413 gpu_list[8+j], 0xffffffff);
414
415 fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n");
416 for (j = 0; j < 4; j++)
417 ac_dump_reg(f, R_008F30_SQ_IMG_SAMP_WORD0 + j*4,
418 gpu_list[12+j], 0xffffffff);
419 break;
420 }
421
422 if (memcmp(gpu_list, cpu_list, desc->element_dw_size * 4) != 0) {
423 fprintf(f, COLOR_RED "!!!!! This slot was corrupted in GPU memory !!!!!"
424 COLOR_RESET "\n");
425 }
426
427 fprintf(f, "\n");
428 gpu_list += desc->element_dw_size;
429 cpu_list += desc->element_dw_size;
430 }
431 }
432
433 static void si_dump_descriptors(struct si_context *sctx,
434 struct si_shader_ctx_state *state,
435 FILE *f)
436 {
437 if (!state->cso || !state->current)
438 return;
439
440 unsigned type = state->cso->type;
441 const struct tgsi_shader_info *info = &state->cso->info;
442 struct si_descriptors *descs =
443 &sctx->descriptors[SI_DESCS_FIRST_SHADER +
444 type * SI_NUM_SHADER_DESCS];
445 static const char *shader_name[] = {"VS", "PS", "GS", "TCS", "TES", "CS"};
446
447 static const char *elem_name[] = {
448 " - Constant buffer",
449 " - Shader buffer",
450 " - Sampler",
451 " - Image",
452 };
453 unsigned num_elements[] = {
454 util_last_bit(info->const_buffers_declared),
455 util_last_bit(info->shader_buffers_declared),
456 util_last_bit(info->samplers_declared),
457 util_last_bit(info->images_declared),
458 };
459
460 if (type == PIPE_SHADER_VERTEX) {
461 si_dump_descriptor_list(&sctx->vertex_buffers, shader_name[type],
462 " - Vertex buffer", info->num_inputs, f);
463 }
464
465 for (unsigned i = 0; i < SI_NUM_SHADER_DESCS; ++i, ++descs)
466 si_dump_descriptor_list(descs, shader_name[type], elem_name[i],
467 num_elements[i], f);
468 }
469
470 struct si_shader_inst {
471 char text[160]; /* one disasm line */
472 unsigned offset; /* instruction offset */
473 unsigned size; /* instruction size = 4 or 8 */
474 };
475
476 /* Split a disassembly string into lines and add them to the array pointed
477 * to by "instructions". */
478 static void si_add_split_disasm(const char *disasm,
479 uint64_t start_addr,
480 unsigned *num,
481 struct si_shader_inst *instructions)
482 {
483 struct si_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
484 char *next;
485
486 while ((next = strchr(disasm, '\n'))) {
487 struct si_shader_inst *inst = &instructions[*num];
488 unsigned len = next - disasm;
489
490 assert(len < ARRAY_SIZE(inst->text));
491 memcpy(inst->text, disasm, len);
492 inst->text[len] = 0;
493 inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
494
495 const char *semicolon = strchr(disasm, ';');
496 assert(semicolon);
497 /* More than 16 chars after ";" means the instruction is 8 bytes long. */
498 inst->size = next - semicolon > 16 ? 8 : 4;
499
500 snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len,
501 " [PC=0x%"PRIx64", off=%u, size=%u]",
502 start_addr + inst->offset, inst->offset, inst->size);
503
504 last_inst = inst;
505 (*num)++;
506 disasm = next + 1;
507 }
508 }
509
510 #define MAX_WAVES_PER_CHIP (64 * 40)
511
512 struct si_wave_info {
513 unsigned se; /* shader engine */
514 unsigned sh; /* shader array */
515 unsigned cu; /* compute unit */
516 unsigned simd;
517 unsigned wave;
518 uint32_t status;
519 uint64_t pc; /* program counter */
520 uint32_t inst_dw0;
521 uint32_t inst_dw1;
522 uint64_t exec;
523 bool matched; /* whether the wave is used by a currently-bound shader */
524 };
525
526 static int compare_wave(const void *p1, const void *p2)
527 {
528 struct si_wave_info *w1 = (struct si_wave_info *)p1;
529 struct si_wave_info *w2 = (struct si_wave_info *)p2;
530
531 /* Sort waves according to PC and then SE, SH, CU, etc. */
532 if (w1->pc < w2->pc)
533 return -1;
534 if (w1->pc > w2->pc)
535 return 1;
536 if (w1->se < w2->se)
537 return -1;
538 if (w1->se > w2->se)
539 return 1;
540 if (w1->sh < w2->sh)
541 return -1;
542 if (w1->sh > w2->sh)
543 return 1;
544 if (w1->cu < w2->cu)
545 return -1;
546 if (w1->cu > w2->cu)
547 return 1;
548 if (w1->simd < w2->simd)
549 return -1;
550 if (w1->simd > w2->simd)
551 return 1;
552 if (w1->wave < w2->wave)
553 return -1;
554 if (w1->wave > w2->wave)
555 return 1;
556
557 return 0;
558 }
559
560 /* Return wave information. "waves" should be a large enough array. */
561 static unsigned si_get_wave_info(struct si_wave_info waves[MAX_WAVES_PER_CHIP])
562 {
563 char line[2000];
564 unsigned num_waves = 0;
565
566 FILE *p = popen("umr -wa", "r");
567 if (!p)
568 return 0;
569
570 if (!fgets(line, sizeof(line), p) ||
571 strncmp(line, "SE", 2) != 0) {
572 pclose(p);
573 return 0;
574 }
575
576 while (fgets(line, sizeof(line), p)) {
577 struct si_wave_info *w;
578 uint32_t pc_hi, pc_lo, exec_hi, exec_lo;
579
580 assert(num_waves < MAX_WAVES_PER_CHIP);
581 w = &waves[num_waves];
582
583 if (sscanf(line, "%u %u %u %u %u %x %x %x %x %x %x %x",
584 &w->se, &w->sh, &w->cu, &w->simd, &w->wave,
585 &w->status, &pc_hi, &pc_lo, &w->inst_dw0,
586 &w->inst_dw1, &exec_hi, &exec_lo) == 12) {
587 w->pc = ((uint64_t)pc_hi << 32) | pc_lo;
588 w->exec = ((uint64_t)exec_hi << 32) | exec_lo;
589 w->matched = false;
590 num_waves++;
591 }
592 }
593
594 qsort(waves, num_waves, sizeof(struct si_wave_info), compare_wave);
595
596 pclose(p);
597 return num_waves;
598 }
599
600 /* If the shader is being executed, print its asm instructions, and annotate
601 * those that are being executed right now with information about waves that
602 * execute them. This is most useful during a GPU hang.
603 */
604 static void si_print_annotated_shader(struct si_shader *shader,
605 struct si_wave_info *waves,
606 unsigned num_waves,
607 FILE *f)
608 {
609 if (!shader || !shader->binary.disasm_string)
610 return;
611
612 uint64_t start_addr = shader->bo->gpu_address;
613 uint64_t end_addr = start_addr + shader->bo->b.b.width0;
614 unsigned i;
615
616 /* See if any wave executes the shader. */
617 for (i = 0; i < num_waves; i++) {
618 if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
619 break;
620 }
621 if (i == num_waves)
622 return; /* the shader is not being executed */
623
624 /* Remember the first found wave. The waves are sorted according to PC. */
625 waves = &waves[i];
626 num_waves -= i;
627
628 /* Get the list of instructions.
629 * Buffer size / 4 is the upper bound of the instruction count.
630 */
631 unsigned num_inst = 0;
632 struct si_shader_inst *instructions =
633 calloc(shader->bo->b.b.width0 / 4, sizeof(struct si_shader_inst));
634
635 if (shader->prolog) {
636 si_add_split_disasm(shader->prolog->binary.disasm_string,
637 start_addr, &num_inst, instructions);
638 }
639 si_add_split_disasm(shader->binary.disasm_string,
640 start_addr, &num_inst, instructions);
641 if (shader->epilog) {
642 si_add_split_disasm(shader->epilog->binary.disasm_string,
643 start_addr, &num_inst, instructions);
644 }
645
646 fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
647 si_get_shader_name(shader, shader->selector->type));
648
649 /* Print instructions with annotations. */
650 for (i = 0; i < num_inst; i++) {
651 struct si_shader_inst *inst = &instructions[i];
652
653 fprintf(f, "%s\n", inst->text);
654
655 /* Print which waves execute the instruction right now. */
656 while (num_waves && start_addr + inst->offset == waves->pc) {
657 fprintf(f,
658 " " COLOR_GREEN "^ SE%u SH%u CU%u "
659 "SIMD%u WAVE%u EXEC=%016"PRIx64 " ",
660 waves->se, waves->sh, waves->cu, waves->simd,
661 waves->wave, waves->exec);
662
663 if (inst->size == 4) {
664 fprintf(f, "INST32=%08X" COLOR_RESET "\n",
665 waves->inst_dw0);
666 } else {
667 fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n",
668 waves->inst_dw0, waves->inst_dw1);
669 }
670
671 waves->matched = true;
672 waves = &waves[1];
673 num_waves--;
674 }
675 }
676
677 fprintf(f, "\n\n");
678 free(instructions);
679 }
680
681 static void si_dump_annotated_shaders(struct si_context *sctx, FILE *f)
682 {
683 struct si_wave_info waves[MAX_WAVES_PER_CHIP];
684 unsigned num_waves = si_get_wave_info(waves);
685
686 fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET
687 "\n\n", num_waves);
688
689 si_print_annotated_shader(sctx->vs_shader.current, waves, num_waves, f);
690 si_print_annotated_shader(sctx->tcs_shader.current, waves, num_waves, f);
691 si_print_annotated_shader(sctx->tes_shader.current, waves, num_waves, f);
692 si_print_annotated_shader(sctx->gs_shader.current, waves, num_waves, f);
693 si_print_annotated_shader(sctx->ps_shader.current, waves, num_waves, f);
694
695 /* Print waves executing shaders that are not currently bound. */
696 unsigned i;
697 bool found = false;
698 for (i = 0; i < num_waves; i++) {
699 if (waves[i].matched)
700 continue;
701
702 if (!found) {
703 fprintf(f, COLOR_CYAN
704 "Waves not executing currently-bound shaders:"
705 COLOR_RESET "\n");
706 found = true;
707 }
708 fprintf(f, " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016"PRIx64
709 " INST=%08X %08X PC=%"PRIx64"\n",
710 waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd,
711 waves[i].wave, waves[i].exec, waves[i].inst_dw0,
712 waves[i].inst_dw1, waves[i].pc);
713 }
714 if (found)
715 fprintf(f, "\n\n");
716 }
717
718 static void si_dump_command(const char *title, const char *command, FILE *f)
719 {
720 char line[2000];
721
722 FILE *p = popen(command, "r");
723 if (!p)
724 return;
725
726 fprintf(f, COLOR_YELLOW "%s: " COLOR_RESET "\n", title);
727 while (fgets(line, sizeof(line), p))
728 fputs(line, f);
729 fprintf(f, "\n\n");
730 pclose(p);
731 }
732
733 static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
734 unsigned flags)
735 {
736 struct si_context *sctx = (struct si_context*)ctx;
737
738 if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS)
739 si_dump_debug_registers(sctx, f);
740
741 if (flags & PIPE_DUMP_CURRENT_STATES)
742 si_dump_framebuffer(sctx, f);
743
744 if (flags & PIPE_DUMP_CURRENT_SHADERS) {
745 si_dump_shader(sctx->screen, &sctx->vs_shader, f);
746 si_dump_shader(sctx->screen, &sctx->tcs_shader, f);
747 si_dump_shader(sctx->screen, &sctx->tes_shader, f);
748 si_dump_shader(sctx->screen, &sctx->gs_shader, f);
749 si_dump_shader(sctx->screen, &sctx->ps_shader, f);
750
751 if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS) {
752 si_dump_annotated_shaders(sctx, f);
753 si_dump_command("Active waves (raw data)", "umr -wa | column -t", f);
754 si_dump_command("Wave information", "umr -O bits -wa", f);
755 }
756
757 si_dump_descriptor_list(&sctx->descriptors[SI_DESCS_RW_BUFFERS],
758 "", "RW buffers", SI_NUM_RW_BUFFERS, f);
759 si_dump_descriptors(sctx, &sctx->vs_shader, f);
760 si_dump_descriptors(sctx, &sctx->tcs_shader, f);
761 si_dump_descriptors(sctx, &sctx->tes_shader, f);
762 si_dump_descriptors(sctx, &sctx->gs_shader, f);
763 si_dump_descriptors(sctx, &sctx->ps_shader, f);
764 }
765
766 if (flags & PIPE_DUMP_LAST_COMMAND_BUFFER) {
767 si_dump_bo_list(sctx, &sctx->last_gfx, f);
768 si_dump_last_ib(sctx, f);
769
770 fprintf(f, "Done.\n");
771
772 /* dump only once */
773 radeon_clear_saved_cs(&sctx->last_gfx);
774 r600_resource_reference(&sctx->last_trace_buf, NULL);
775 }
776 }
777
778 static void si_dump_dma(struct si_context *sctx,
779 struct radeon_saved_cs *saved, FILE *f)
780 {
781 static const char ib_name[] = "sDMA IB";
782 unsigned i;
783
784 si_dump_bo_list(sctx, saved, f);
785
786 fprintf(f, "------------------ %s begin ------------------\n", ib_name);
787
788 for (i = 0; i < saved->num_dw; ++i) {
789 fprintf(f, " %08x\n", saved->ib[i]);
790 }
791
792 fprintf(f, "------------------- %s end -------------------\n", ib_name);
793 fprintf(f, "\n");
794
795 fprintf(f, "SDMA Dump Done.\n");
796 }
797
798 static bool si_vm_fault_occured(struct si_context *sctx, uint32_t *out_addr)
799 {
800 char line[2000];
801 unsigned sec, usec;
802 int progress = 0;
803 uint64_t timestamp = 0;
804 bool fault = false;
805
806 FILE *p = popen("dmesg", "r");
807 if (!p)
808 return false;
809
810 while (fgets(line, sizeof(line), p)) {
811 char *msg, len;
812
813 if (!line[0] || line[0] == '\n')
814 continue;
815
816 /* Get the timestamp. */
817 if (sscanf(line, "[%u.%u]", &sec, &usec) != 2) {
818 static bool hit = false;
819 if (!hit) {
820 fprintf(stderr, "%s: failed to parse line '%s'\n",
821 __func__, line);
822 hit = true;
823 }
824 continue;
825 }
826 timestamp = sec * 1000000llu + usec;
827
828 /* If just updating the timestamp. */
829 if (!out_addr)
830 continue;
831
832 /* Process messages only if the timestamp is newer. */
833 if (timestamp <= sctx->dmesg_timestamp)
834 continue;
835
836 /* Only process the first VM fault. */
837 if (fault)
838 continue;
839
840 /* Remove trailing \n */
841 len = strlen(line);
842 if (len && line[len-1] == '\n')
843 line[len-1] = 0;
844
845 /* Get the message part. */
846 msg = strchr(line, ']');
847 if (!msg) {
848 assert(0);
849 continue;
850 }
851 msg++;
852
853 switch (progress) {
854 case 0:
855 if (strstr(msg, "GPU fault detected:"))
856 progress = 1;
857 break;
858 case 1:
859 msg = strstr(msg, "VM_CONTEXT1_PROTECTION_FAULT_ADDR");
860 if (msg) {
861 msg = strstr(msg, "0x");
862 if (msg) {
863 msg += 2;
864 if (sscanf(msg, "%X", out_addr) == 1)
865 fault = true;
866 }
867 }
868 progress = 0;
869 break;
870 default:
871 progress = 0;
872 }
873 }
874 pclose(p);
875
876 if (timestamp > sctx->dmesg_timestamp)
877 sctx->dmesg_timestamp = timestamp;
878 return fault;
879 }
880
881 void si_check_vm_faults(struct r600_common_context *ctx,
882 struct radeon_saved_cs *saved, enum ring_type ring)
883 {
884 struct si_context *sctx = (struct si_context *)ctx;
885 struct pipe_screen *screen = sctx->b.b.screen;
886 FILE *f;
887 uint32_t addr;
888 char cmd_line[4096];
889
890 if (!si_vm_fault_occured(sctx, &addr))
891 return;
892
893 f = dd_get_debug_file(false);
894 if (!f)
895 return;
896
897 fprintf(f, "VM fault report.\n\n");
898 if (os_get_command_line(cmd_line, sizeof(cmd_line)))
899 fprintf(f, "Command: %s\n", cmd_line);
900 fprintf(f, "Driver vendor: %s\n", screen->get_vendor(screen));
901 fprintf(f, "Device vendor: %s\n", screen->get_device_vendor(screen));
902 fprintf(f, "Device name: %s\n\n", screen->get_name(screen));
903 fprintf(f, "Failing VM page: 0x%08x\n\n", addr);
904
905 if (sctx->apitrace_call_number)
906 fprintf(f, "Last apitrace call: %u\n\n",
907 sctx->apitrace_call_number);
908
909 switch (ring) {
910 case RING_GFX:
911 si_dump_debug_state(&sctx->b.b, f,
912 PIPE_DUMP_CURRENT_STATES |
913 PIPE_DUMP_CURRENT_SHADERS |
914 PIPE_DUMP_LAST_COMMAND_BUFFER);
915 break;
916
917 case RING_DMA:
918 si_dump_dma(sctx, saved, f);
919 break;
920
921 default:
922 break;
923 }
924
925 fclose(f);
926
927 fprintf(stderr, "Detected a VM fault, exiting...\n");
928 exit(0);
929 }
930
931 void si_init_debug_functions(struct si_context *sctx)
932 {
933 sctx->b.b.dump_debug_state = si_dump_debug_state;
934 sctx->b.check_vm_faults = si_check_vm_faults;
935
936 /* Set the initial dmesg timestamp for this context, so that
937 * only new messages will be checked for VM faults.
938 */
939 if (sctx->screen->b.debug_flags & DBG_CHECK_VM)
940 si_vm_fault_occured(sctx, NULL);
941 }