radeonsi: add si_shader::prolog2
[mesa.git] / src / gallium / drivers / radeonsi / si_debug.c
1 /*
2 * Copyright 2015 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Marek Olšák <maraeo@gmail.com>
25 */
26
27 #include "si_pipe.h"
28 #include "sid.h"
29 #include "gfx9d.h"
30 #include "sid_tables.h"
31 #include "ddebug/dd_util.h"
32 #include "util/u_memory.h"
33 #include "ac_debug.h"
34
35 DEBUG_GET_ONCE_OPTION(replace_shaders, "RADEON_REPLACE_SHADERS", NULL)
36
37 static void si_dump_shader(struct si_screen *sscreen,
38 struct si_shader_ctx_state *state, FILE *f)
39 {
40 struct si_shader *current = state->current;
41
42 if (!state->cso || !current)
43 return;
44
45 if (current->shader_log)
46 fwrite(current->shader_log, current->shader_log_size, 1, f);
47 else
48 si_shader_dump(sscreen, state->current, NULL,
49 state->cso->info.processor, f, false);
50 }
51
52 /**
53 * Shader compiles can be overridden with arbitrary ELF objects by setting
54 * the environment variable RADEON_REPLACE_SHADERS=num1:filename1[;num2:filename2]
55 */
56 bool si_replace_shader(unsigned num, struct ac_shader_binary *binary)
57 {
58 const char *p = debug_get_option_replace_shaders();
59 const char *semicolon;
60 char *copy = NULL;
61 FILE *f;
62 long filesize, nread;
63 char *buf = NULL;
64 bool replaced = false;
65
66 if (!p)
67 return false;
68
69 while (*p) {
70 unsigned long i;
71 char *endp;
72 i = strtoul(p, &endp, 0);
73
74 p = endp;
75 if (*p != ':') {
76 fprintf(stderr, "RADEON_REPLACE_SHADERS formatted badly.\n");
77 exit(1);
78 }
79 ++p;
80
81 if (i == num)
82 break;
83
84 p = strchr(p, ';');
85 if (!p)
86 return false;
87 ++p;
88 }
89 if (!*p)
90 return false;
91
92 semicolon = strchr(p, ';');
93 if (semicolon) {
94 p = copy = strndup(p, semicolon - p);
95 if (!copy) {
96 fprintf(stderr, "out of memory\n");
97 return false;
98 }
99 }
100
101 fprintf(stderr, "radeonsi: replace shader %u by %s\n", num, p);
102
103 f = fopen(p, "r");
104 if (!f) {
105 perror("radeonsi: failed to open file");
106 goto out_free;
107 }
108
109 if (fseek(f, 0, SEEK_END) != 0)
110 goto file_error;
111
112 filesize = ftell(f);
113 if (filesize < 0)
114 goto file_error;
115
116 if (fseek(f, 0, SEEK_SET) != 0)
117 goto file_error;
118
119 buf = MALLOC(filesize);
120 if (!buf) {
121 fprintf(stderr, "out of memory\n");
122 goto out_close;
123 }
124
125 nread = fread(buf, 1, filesize, f);
126 if (nread != filesize)
127 goto file_error;
128
129 ac_elf_read(buf, filesize, binary);
130 replaced = true;
131
132 out_close:
133 fclose(f);
134 out_free:
135 FREE(buf);
136 free(copy);
137 return replaced;
138
139 file_error:
140 perror("radeonsi: reading shader");
141 goto out_close;
142 }
143
144 /* Parsed IBs are difficult to read without colors. Use "less -R file" to
145 * read them, or use "aha -b -f file" to convert them to html.
146 */
147 #define COLOR_RESET "\033[0m"
148 #define COLOR_RED "\033[31m"
149 #define COLOR_GREEN "\033[1;32m"
150 #define COLOR_YELLOW "\033[1;33m"
151 #define COLOR_CYAN "\033[1;36m"
152
153 static void si_dump_mmapped_reg(struct si_context *sctx, FILE *f,
154 unsigned offset)
155 {
156 struct radeon_winsys *ws = sctx->b.ws;
157 uint32_t value;
158
159 if (ws->read_registers(ws, offset, 1, &value))
160 ac_dump_reg(f, offset, value, ~0);
161 }
162
163 static void si_dump_debug_registers(struct si_context *sctx, FILE *f)
164 {
165 if (sctx->screen->b.info.drm_major == 2 &&
166 sctx->screen->b.info.drm_minor < 42)
167 return; /* no radeon support */
168
169 fprintf(f, "Memory-mapped registers:\n");
170 si_dump_mmapped_reg(sctx, f, R_008010_GRBM_STATUS);
171
172 /* No other registers can be read on DRM < 3.1.0. */
173 if (sctx->screen->b.info.drm_major < 3 ||
174 sctx->screen->b.info.drm_minor < 1) {
175 fprintf(f, "\n");
176 return;
177 }
178
179 si_dump_mmapped_reg(sctx, f, R_008008_GRBM_STATUS2);
180 si_dump_mmapped_reg(sctx, f, R_008014_GRBM_STATUS_SE0);
181 si_dump_mmapped_reg(sctx, f, R_008018_GRBM_STATUS_SE1);
182 si_dump_mmapped_reg(sctx, f, R_008038_GRBM_STATUS_SE2);
183 si_dump_mmapped_reg(sctx, f, R_00803C_GRBM_STATUS_SE3);
184 si_dump_mmapped_reg(sctx, f, R_00D034_SDMA0_STATUS_REG);
185 si_dump_mmapped_reg(sctx, f, R_00D834_SDMA1_STATUS_REG);
186 if (sctx->b.chip_class <= VI) {
187 si_dump_mmapped_reg(sctx, f, R_000E50_SRBM_STATUS);
188 si_dump_mmapped_reg(sctx, f, R_000E4C_SRBM_STATUS2);
189 si_dump_mmapped_reg(sctx, f, R_000E54_SRBM_STATUS3);
190 }
191 si_dump_mmapped_reg(sctx, f, R_008680_CP_STAT);
192 si_dump_mmapped_reg(sctx, f, R_008674_CP_STALLED_STAT1);
193 si_dump_mmapped_reg(sctx, f, R_008678_CP_STALLED_STAT2);
194 si_dump_mmapped_reg(sctx, f, R_008670_CP_STALLED_STAT3);
195 si_dump_mmapped_reg(sctx, f, R_008210_CP_CPC_STATUS);
196 si_dump_mmapped_reg(sctx, f, R_008214_CP_CPC_BUSY_STAT);
197 si_dump_mmapped_reg(sctx, f, R_008218_CP_CPC_STALLED_STAT1);
198 si_dump_mmapped_reg(sctx, f, R_00821C_CP_CPF_STATUS);
199 si_dump_mmapped_reg(sctx, f, R_008220_CP_CPF_BUSY_STAT);
200 si_dump_mmapped_reg(sctx, f, R_008224_CP_CPF_STALLED_STAT1);
201 fprintf(f, "\n");
202 }
203
204 static void si_dump_last_ib(struct si_context *sctx, FILE *f)
205 {
206 int last_trace_id = -1;
207
208 if (!sctx->last_gfx.ib)
209 return;
210
211 if (sctx->last_trace_buf) {
212 /* We are expecting that the ddebug pipe has already
213 * waited for the context, so this buffer should be idle.
214 * If the GPU is hung, there is no point in waiting for it.
215 */
216 uint32_t *map = sctx->b.ws->buffer_map(sctx->last_trace_buf->buf,
217 NULL,
218 PIPE_TRANSFER_UNSYNCHRONIZED |
219 PIPE_TRANSFER_READ);
220 if (map)
221 last_trace_id = *map;
222 }
223
224 if (sctx->init_config)
225 ac_parse_ib(f, sctx->init_config->pm4, sctx->init_config->ndw,
226 -1, "IB2: Init config", sctx->b.chip_class,
227 NULL, NULL);
228
229 if (sctx->init_config_gs_rings)
230 ac_parse_ib(f, sctx->init_config_gs_rings->pm4,
231 sctx->init_config_gs_rings->ndw,
232 -1, "IB2: Init GS rings", sctx->b.chip_class,
233 NULL, NULL);
234
235 ac_parse_ib(f, sctx->last_gfx.ib, sctx->last_gfx.num_dw,
236 last_trace_id, "IB", sctx->b.chip_class,
237 NULL, NULL);
238 }
239
240 static const char *priority_to_string(enum radeon_bo_priority priority)
241 {
242 #define ITEM(x) [RADEON_PRIO_##x] = #x
243 static const char *table[64] = {
244 ITEM(FENCE),
245 ITEM(TRACE),
246 ITEM(SO_FILLED_SIZE),
247 ITEM(QUERY),
248 ITEM(IB1),
249 ITEM(IB2),
250 ITEM(DRAW_INDIRECT),
251 ITEM(INDEX_BUFFER),
252 ITEM(VCE),
253 ITEM(UVD),
254 ITEM(SDMA_BUFFER),
255 ITEM(SDMA_TEXTURE),
256 ITEM(CP_DMA),
257 ITEM(CONST_BUFFER),
258 ITEM(DESCRIPTORS),
259 ITEM(BORDER_COLORS),
260 ITEM(SAMPLER_BUFFER),
261 ITEM(VERTEX_BUFFER),
262 ITEM(SHADER_RW_BUFFER),
263 ITEM(COMPUTE_GLOBAL),
264 ITEM(SAMPLER_TEXTURE),
265 ITEM(SHADER_RW_IMAGE),
266 ITEM(SAMPLER_TEXTURE_MSAA),
267 ITEM(COLOR_BUFFER),
268 ITEM(DEPTH_BUFFER),
269 ITEM(COLOR_BUFFER_MSAA),
270 ITEM(DEPTH_BUFFER_MSAA),
271 ITEM(CMASK),
272 ITEM(DCC),
273 ITEM(HTILE),
274 ITEM(SHADER_BINARY),
275 ITEM(SHADER_RINGS),
276 ITEM(SCRATCH_BUFFER),
277 };
278 #undef ITEM
279
280 assert(priority < ARRAY_SIZE(table));
281 return table[priority];
282 }
283
284 static int bo_list_compare_va(const struct radeon_bo_list_item *a,
285 const struct radeon_bo_list_item *b)
286 {
287 return a->vm_address < b->vm_address ? -1 :
288 a->vm_address > b->vm_address ? 1 : 0;
289 }
290
291 static void si_dump_bo_list(struct si_context *sctx,
292 const struct radeon_saved_cs *saved, FILE *f)
293 {
294 unsigned i,j;
295
296 if (!saved->bo_list)
297 return;
298
299 /* Sort the list according to VM adddresses first. */
300 qsort(saved->bo_list, saved->bo_count,
301 sizeof(saved->bo_list[0]), (void*)bo_list_compare_va);
302
303 fprintf(f, "Buffer list (in units of pages = 4kB):\n"
304 COLOR_YELLOW " Size VM start page "
305 "VM end page Usage" COLOR_RESET "\n");
306
307 for (i = 0; i < saved->bo_count; i++) {
308 /* Note: Buffer sizes are expected to be aligned to 4k by the winsys. */
309 const unsigned page_size = sctx->b.screen->info.gart_page_size;
310 uint64_t va = saved->bo_list[i].vm_address;
311 uint64_t size = saved->bo_list[i].bo_size;
312 bool hit = false;
313
314 /* If there's unused virtual memory between 2 buffers, print it. */
315 if (i) {
316 uint64_t previous_va_end = saved->bo_list[i-1].vm_address +
317 saved->bo_list[i-1].bo_size;
318
319 if (va > previous_va_end) {
320 fprintf(f, " %10"PRIu64" -- hole --\n",
321 (va - previous_va_end) / page_size);
322 }
323 }
324
325 /* Print the buffer. */
326 fprintf(f, " %10"PRIu64" 0x%013"PRIX64" 0x%013"PRIX64" ",
327 size / page_size, va / page_size, (va + size) / page_size);
328
329 /* Print the usage. */
330 for (j = 0; j < 64; j++) {
331 if (!(saved->bo_list[i].priority_usage & (1llu << j)))
332 continue;
333
334 fprintf(f, "%s%s", !hit ? "" : ", ", priority_to_string(j));
335 hit = true;
336 }
337 fprintf(f, "\n");
338 }
339 fprintf(f, "\nNote: The holes represent memory not used by the IB.\n"
340 " Other buffers can still be allocated there.\n\n");
341 }
342
343 static void si_dump_framebuffer(struct si_context *sctx, FILE *f)
344 {
345 struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
346 struct r600_texture *rtex;
347 int i;
348
349 for (i = 0; i < state->nr_cbufs; i++) {
350 if (!state->cbufs[i])
351 continue;
352
353 rtex = (struct r600_texture*)state->cbufs[i]->texture;
354 fprintf(f, COLOR_YELLOW "Color buffer %i:" COLOR_RESET "\n", i);
355 r600_print_texture_info(sctx->b.screen, rtex, f);
356 fprintf(f, "\n");
357 }
358
359 if (state->zsbuf) {
360 rtex = (struct r600_texture*)state->zsbuf->texture;
361 fprintf(f, COLOR_YELLOW "Depth-stencil buffer:" COLOR_RESET "\n");
362 r600_print_texture_info(sctx->b.screen, rtex, f);
363 fprintf(f, "\n");
364 }
365 }
366
367 static void si_dump_descriptor_list(struct si_descriptors *desc,
368 const char *shader_name,
369 const char *elem_name,
370 unsigned num_elements,
371 FILE *f)
372 {
373 unsigned i, j;
374 uint32_t *cpu_list = desc->list;
375 uint32_t *gpu_list = desc->gpu_list;
376 const char *list_note = "GPU list";
377
378 if (!gpu_list) {
379 gpu_list = cpu_list;
380 list_note = "CPU list";
381 }
382
383 for (i = 0; i < num_elements; i++) {
384 fprintf(f, COLOR_GREEN "%s%s slot %u (%s):" COLOR_RESET "\n",
385 shader_name, elem_name, i, list_note);
386
387 switch (desc->element_dw_size) {
388 case 4:
389 for (j = 0; j < 4; j++)
390 ac_dump_reg(f, R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
391 gpu_list[j], 0xffffffff);
392 break;
393 case 8:
394 for (j = 0; j < 8; j++)
395 ac_dump_reg(f, R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
396 gpu_list[j], 0xffffffff);
397
398 fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
399 for (j = 0; j < 4; j++)
400 ac_dump_reg(f, R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
401 gpu_list[4+j], 0xffffffff);
402 break;
403 case 16:
404 for (j = 0; j < 8; j++)
405 ac_dump_reg(f, R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
406 gpu_list[j], 0xffffffff);
407
408 fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
409 for (j = 0; j < 4; j++)
410 ac_dump_reg(f, R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
411 gpu_list[4+j], 0xffffffff);
412
413 fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n");
414 for (j = 0; j < 8; j++)
415 ac_dump_reg(f, R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
416 gpu_list[8+j], 0xffffffff);
417
418 fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n");
419 for (j = 0; j < 4; j++)
420 ac_dump_reg(f, R_008F30_SQ_IMG_SAMP_WORD0 + j*4,
421 gpu_list[12+j], 0xffffffff);
422 break;
423 }
424
425 if (memcmp(gpu_list, cpu_list, desc->element_dw_size * 4) != 0) {
426 fprintf(f, COLOR_RED "!!!!! This slot was corrupted in GPU memory !!!!!"
427 COLOR_RESET "\n");
428 }
429
430 fprintf(f, "\n");
431 gpu_list += desc->element_dw_size;
432 cpu_list += desc->element_dw_size;
433 }
434 }
435
436 static void si_dump_descriptors(struct si_context *sctx,
437 struct si_shader_ctx_state *state,
438 FILE *f)
439 {
440 if (!state->cso || !state->current)
441 return;
442
443 unsigned type = state->cso->type;
444 const struct tgsi_shader_info *info = &state->cso->info;
445 struct si_descriptors *descs =
446 &sctx->descriptors[SI_DESCS_FIRST_SHADER +
447 type * SI_NUM_SHADER_DESCS];
448 static const char *shader_name[] = {"VS", "PS", "GS", "TCS", "TES", "CS"};
449
450 static const char *elem_name[] = {
451 " - Constant buffer",
452 " - Shader buffer",
453 " - Sampler",
454 " - Image",
455 };
456 unsigned num_elements[] = {
457 util_last_bit(info->const_buffers_declared),
458 util_last_bit(info->shader_buffers_declared),
459 util_last_bit(info->samplers_declared),
460 util_last_bit(info->images_declared),
461 };
462
463 if (type == PIPE_SHADER_VERTEX) {
464 si_dump_descriptor_list(&sctx->vertex_buffers, shader_name[type],
465 " - Vertex buffer", info->num_inputs, f);
466 }
467
468 for (unsigned i = 0; i < SI_NUM_SHADER_DESCS; ++i, ++descs)
469 si_dump_descriptor_list(descs, shader_name[type], elem_name[i],
470 num_elements[i], f);
471 }
472
473 struct si_shader_inst {
474 char text[160]; /* one disasm line */
475 unsigned offset; /* instruction offset */
476 unsigned size; /* instruction size = 4 or 8 */
477 };
478
479 /* Split a disassembly string into lines and add them to the array pointed
480 * to by "instructions". */
481 static void si_add_split_disasm(const char *disasm,
482 uint64_t start_addr,
483 unsigned *num,
484 struct si_shader_inst *instructions)
485 {
486 struct si_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
487 char *next;
488
489 while ((next = strchr(disasm, '\n'))) {
490 struct si_shader_inst *inst = &instructions[*num];
491 unsigned len = next - disasm;
492
493 assert(len < ARRAY_SIZE(inst->text));
494 memcpy(inst->text, disasm, len);
495 inst->text[len] = 0;
496 inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
497
498 const char *semicolon = strchr(disasm, ';');
499 assert(semicolon);
500 /* More than 16 chars after ";" means the instruction is 8 bytes long. */
501 inst->size = next - semicolon > 16 ? 8 : 4;
502
503 snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len,
504 " [PC=0x%"PRIx64", off=%u, size=%u]",
505 start_addr + inst->offset, inst->offset, inst->size);
506
507 last_inst = inst;
508 (*num)++;
509 disasm = next + 1;
510 }
511 }
512
513 #define MAX_WAVES_PER_CHIP (64 * 40)
514
515 struct si_wave_info {
516 unsigned se; /* shader engine */
517 unsigned sh; /* shader array */
518 unsigned cu; /* compute unit */
519 unsigned simd;
520 unsigned wave;
521 uint32_t status;
522 uint64_t pc; /* program counter */
523 uint32_t inst_dw0;
524 uint32_t inst_dw1;
525 uint64_t exec;
526 bool matched; /* whether the wave is used by a currently-bound shader */
527 };
528
529 static int compare_wave(const void *p1, const void *p2)
530 {
531 struct si_wave_info *w1 = (struct si_wave_info *)p1;
532 struct si_wave_info *w2 = (struct si_wave_info *)p2;
533
534 /* Sort waves according to PC and then SE, SH, CU, etc. */
535 if (w1->pc < w2->pc)
536 return -1;
537 if (w1->pc > w2->pc)
538 return 1;
539 if (w1->se < w2->se)
540 return -1;
541 if (w1->se > w2->se)
542 return 1;
543 if (w1->sh < w2->sh)
544 return -1;
545 if (w1->sh > w2->sh)
546 return 1;
547 if (w1->cu < w2->cu)
548 return -1;
549 if (w1->cu > w2->cu)
550 return 1;
551 if (w1->simd < w2->simd)
552 return -1;
553 if (w1->simd > w2->simd)
554 return 1;
555 if (w1->wave < w2->wave)
556 return -1;
557 if (w1->wave > w2->wave)
558 return 1;
559
560 return 0;
561 }
562
563 /* Return wave information. "waves" should be a large enough array. */
564 static unsigned si_get_wave_info(struct si_wave_info waves[MAX_WAVES_PER_CHIP])
565 {
566 char line[2000];
567 unsigned num_waves = 0;
568
569 FILE *p = popen("umr -wa", "r");
570 if (!p)
571 return 0;
572
573 if (!fgets(line, sizeof(line), p) ||
574 strncmp(line, "SE", 2) != 0) {
575 pclose(p);
576 return 0;
577 }
578
579 while (fgets(line, sizeof(line), p)) {
580 struct si_wave_info *w;
581 uint32_t pc_hi, pc_lo, exec_hi, exec_lo;
582
583 assert(num_waves < MAX_WAVES_PER_CHIP);
584 w = &waves[num_waves];
585
586 if (sscanf(line, "%u %u %u %u %u %x %x %x %x %x %x %x",
587 &w->se, &w->sh, &w->cu, &w->simd, &w->wave,
588 &w->status, &pc_hi, &pc_lo, &w->inst_dw0,
589 &w->inst_dw1, &exec_hi, &exec_lo) == 12) {
590 w->pc = ((uint64_t)pc_hi << 32) | pc_lo;
591 w->exec = ((uint64_t)exec_hi << 32) | exec_lo;
592 w->matched = false;
593 num_waves++;
594 }
595 }
596
597 qsort(waves, num_waves, sizeof(struct si_wave_info), compare_wave);
598
599 pclose(p);
600 return num_waves;
601 }
602
603 /* If the shader is being executed, print its asm instructions, and annotate
604 * those that are being executed right now with information about waves that
605 * execute them. This is most useful during a GPU hang.
606 */
607 static void si_print_annotated_shader(struct si_shader *shader,
608 struct si_wave_info *waves,
609 unsigned num_waves,
610 FILE *f)
611 {
612 if (!shader || !shader->binary.disasm_string)
613 return;
614
615 uint64_t start_addr = shader->bo->gpu_address;
616 uint64_t end_addr = start_addr + shader->bo->b.b.width0;
617 unsigned i;
618
619 /* See if any wave executes the shader. */
620 for (i = 0; i < num_waves; i++) {
621 if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
622 break;
623 }
624 if (i == num_waves)
625 return; /* the shader is not being executed */
626
627 /* Remember the first found wave. The waves are sorted according to PC. */
628 waves = &waves[i];
629 num_waves -= i;
630
631 /* Get the list of instructions.
632 * Buffer size / 4 is the upper bound of the instruction count.
633 */
634 unsigned num_inst = 0;
635 struct si_shader_inst *instructions =
636 calloc(shader->bo->b.b.width0 / 4, sizeof(struct si_shader_inst));
637
638 if (shader->prolog) {
639 si_add_split_disasm(shader->prolog->binary.disasm_string,
640 start_addr, &num_inst, instructions);
641 }
642 if (shader->previous_stage) {
643 si_add_split_disasm(shader->previous_stage->binary.disasm_string,
644 start_addr, &num_inst, instructions);
645 }
646 if (shader->prolog2) {
647 si_add_split_disasm(shader->prolog2->binary.disasm_string,
648 start_addr, &num_inst, instructions);
649 }
650 si_add_split_disasm(shader->binary.disasm_string,
651 start_addr, &num_inst, instructions);
652 if (shader->epilog) {
653 si_add_split_disasm(shader->epilog->binary.disasm_string,
654 start_addr, &num_inst, instructions);
655 }
656
657 fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
658 si_get_shader_name(shader, shader->selector->type));
659
660 /* Print instructions with annotations. */
661 for (i = 0; i < num_inst; i++) {
662 struct si_shader_inst *inst = &instructions[i];
663
664 fprintf(f, "%s\n", inst->text);
665
666 /* Print which waves execute the instruction right now. */
667 while (num_waves && start_addr + inst->offset == waves->pc) {
668 fprintf(f,
669 " " COLOR_GREEN "^ SE%u SH%u CU%u "
670 "SIMD%u WAVE%u EXEC=%016"PRIx64 " ",
671 waves->se, waves->sh, waves->cu, waves->simd,
672 waves->wave, waves->exec);
673
674 if (inst->size == 4) {
675 fprintf(f, "INST32=%08X" COLOR_RESET "\n",
676 waves->inst_dw0);
677 } else {
678 fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n",
679 waves->inst_dw0, waves->inst_dw1);
680 }
681
682 waves->matched = true;
683 waves = &waves[1];
684 num_waves--;
685 }
686 }
687
688 fprintf(f, "\n\n");
689 free(instructions);
690 }
691
692 static void si_dump_annotated_shaders(struct si_context *sctx, FILE *f)
693 {
694 struct si_wave_info waves[MAX_WAVES_PER_CHIP];
695 unsigned num_waves = si_get_wave_info(waves);
696
697 fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET
698 "\n\n", num_waves);
699
700 si_print_annotated_shader(sctx->vs_shader.current, waves, num_waves, f);
701 si_print_annotated_shader(sctx->tcs_shader.current, waves, num_waves, f);
702 si_print_annotated_shader(sctx->tes_shader.current, waves, num_waves, f);
703 si_print_annotated_shader(sctx->gs_shader.current, waves, num_waves, f);
704 si_print_annotated_shader(sctx->ps_shader.current, waves, num_waves, f);
705
706 /* Print waves executing shaders that are not currently bound. */
707 unsigned i;
708 bool found = false;
709 for (i = 0; i < num_waves; i++) {
710 if (waves[i].matched)
711 continue;
712
713 if (!found) {
714 fprintf(f, COLOR_CYAN
715 "Waves not executing currently-bound shaders:"
716 COLOR_RESET "\n");
717 found = true;
718 }
719 fprintf(f, " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016"PRIx64
720 " INST=%08X %08X PC=%"PRIx64"\n",
721 waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd,
722 waves[i].wave, waves[i].exec, waves[i].inst_dw0,
723 waves[i].inst_dw1, waves[i].pc);
724 }
725 if (found)
726 fprintf(f, "\n\n");
727 }
728
729 static void si_dump_command(const char *title, const char *command, FILE *f)
730 {
731 char line[2000];
732
733 FILE *p = popen(command, "r");
734 if (!p)
735 return;
736
737 fprintf(f, COLOR_YELLOW "%s: " COLOR_RESET "\n", title);
738 while (fgets(line, sizeof(line), p))
739 fputs(line, f);
740 fprintf(f, "\n\n");
741 pclose(p);
742 }
743
744 static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
745 unsigned flags)
746 {
747 struct si_context *sctx = (struct si_context*)ctx;
748
749 if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS)
750 si_dump_debug_registers(sctx, f);
751
752 if (flags & PIPE_DUMP_CURRENT_STATES)
753 si_dump_framebuffer(sctx, f);
754
755 if (flags & PIPE_DUMP_CURRENT_SHADERS) {
756 si_dump_shader(sctx->screen, &sctx->vs_shader, f);
757 si_dump_shader(sctx->screen, &sctx->tcs_shader, f);
758 si_dump_shader(sctx->screen, &sctx->tes_shader, f);
759 si_dump_shader(sctx->screen, &sctx->gs_shader, f);
760 si_dump_shader(sctx->screen, &sctx->ps_shader, f);
761
762 if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS) {
763 si_dump_annotated_shaders(sctx, f);
764 si_dump_command("Active waves (raw data)", "umr -wa | column -t", f);
765 si_dump_command("Wave information", "umr -O bits -wa", f);
766 }
767
768 si_dump_descriptor_list(&sctx->descriptors[SI_DESCS_RW_BUFFERS],
769 "", "RW buffers", SI_NUM_RW_BUFFERS, f);
770 si_dump_descriptors(sctx, &sctx->vs_shader, f);
771 si_dump_descriptors(sctx, &sctx->tcs_shader, f);
772 si_dump_descriptors(sctx, &sctx->tes_shader, f);
773 si_dump_descriptors(sctx, &sctx->gs_shader, f);
774 si_dump_descriptors(sctx, &sctx->ps_shader, f);
775 }
776
777 if (flags & PIPE_DUMP_LAST_COMMAND_BUFFER) {
778 si_dump_bo_list(sctx, &sctx->last_gfx, f);
779 si_dump_last_ib(sctx, f);
780
781 fprintf(f, "Done.\n");
782
783 /* dump only once */
784 radeon_clear_saved_cs(&sctx->last_gfx);
785 r600_resource_reference(&sctx->last_trace_buf, NULL);
786 }
787 }
788
789 static void si_dump_dma(struct si_context *sctx,
790 struct radeon_saved_cs *saved, FILE *f)
791 {
792 static const char ib_name[] = "sDMA IB";
793 unsigned i;
794
795 si_dump_bo_list(sctx, saved, f);
796
797 fprintf(f, "------------------ %s begin ------------------\n", ib_name);
798
799 for (i = 0; i < saved->num_dw; ++i) {
800 fprintf(f, " %08x\n", saved->ib[i]);
801 }
802
803 fprintf(f, "------------------- %s end -------------------\n", ib_name);
804 fprintf(f, "\n");
805
806 fprintf(f, "SDMA Dump Done.\n");
807 }
808
809 static bool si_vm_fault_occured(struct si_context *sctx, uint32_t *out_addr)
810 {
811 char line[2000];
812 unsigned sec, usec;
813 int progress = 0;
814 uint64_t timestamp = 0;
815 bool fault = false;
816
817 FILE *p = popen("dmesg", "r");
818 if (!p)
819 return false;
820
821 while (fgets(line, sizeof(line), p)) {
822 char *msg, len;
823
824 if (!line[0] || line[0] == '\n')
825 continue;
826
827 /* Get the timestamp. */
828 if (sscanf(line, "[%u.%u]", &sec, &usec) != 2) {
829 static bool hit = false;
830 if (!hit) {
831 fprintf(stderr, "%s: failed to parse line '%s'\n",
832 __func__, line);
833 hit = true;
834 }
835 continue;
836 }
837 timestamp = sec * 1000000llu + usec;
838
839 /* If just updating the timestamp. */
840 if (!out_addr)
841 continue;
842
843 /* Process messages only if the timestamp is newer. */
844 if (timestamp <= sctx->dmesg_timestamp)
845 continue;
846
847 /* Only process the first VM fault. */
848 if (fault)
849 continue;
850
851 /* Remove trailing \n */
852 len = strlen(line);
853 if (len && line[len-1] == '\n')
854 line[len-1] = 0;
855
856 /* Get the message part. */
857 msg = strchr(line, ']');
858 if (!msg) {
859 assert(0);
860 continue;
861 }
862 msg++;
863
864 switch (progress) {
865 case 0:
866 if (strstr(msg, "GPU fault detected:"))
867 progress = 1;
868 break;
869 case 1:
870 msg = strstr(msg, "VM_CONTEXT1_PROTECTION_FAULT_ADDR");
871 if (msg) {
872 msg = strstr(msg, "0x");
873 if (msg) {
874 msg += 2;
875 if (sscanf(msg, "%X", out_addr) == 1)
876 fault = true;
877 }
878 }
879 progress = 0;
880 break;
881 default:
882 progress = 0;
883 }
884 }
885 pclose(p);
886
887 if (timestamp > sctx->dmesg_timestamp)
888 sctx->dmesg_timestamp = timestamp;
889 return fault;
890 }
891
892 void si_check_vm_faults(struct r600_common_context *ctx,
893 struct radeon_saved_cs *saved, enum ring_type ring)
894 {
895 struct si_context *sctx = (struct si_context *)ctx;
896 struct pipe_screen *screen = sctx->b.b.screen;
897 FILE *f;
898 uint32_t addr;
899 char cmd_line[4096];
900
901 if (!si_vm_fault_occured(sctx, &addr))
902 return;
903
904 f = dd_get_debug_file(false);
905 if (!f)
906 return;
907
908 fprintf(f, "VM fault report.\n\n");
909 if (os_get_command_line(cmd_line, sizeof(cmd_line)))
910 fprintf(f, "Command: %s\n", cmd_line);
911 fprintf(f, "Driver vendor: %s\n", screen->get_vendor(screen));
912 fprintf(f, "Device vendor: %s\n", screen->get_device_vendor(screen));
913 fprintf(f, "Device name: %s\n\n", screen->get_name(screen));
914 fprintf(f, "Failing VM page: 0x%08x\n\n", addr);
915
916 if (sctx->apitrace_call_number)
917 fprintf(f, "Last apitrace call: %u\n\n",
918 sctx->apitrace_call_number);
919
920 switch (ring) {
921 case RING_GFX:
922 si_dump_debug_state(&sctx->b.b, f,
923 PIPE_DUMP_CURRENT_STATES |
924 PIPE_DUMP_CURRENT_SHADERS |
925 PIPE_DUMP_LAST_COMMAND_BUFFER);
926 break;
927
928 case RING_DMA:
929 si_dump_dma(sctx, saved, f);
930 break;
931
932 default:
933 break;
934 }
935
936 fclose(f);
937
938 fprintf(stderr, "Detected a VM fault, exiting...\n");
939 exit(0);
940 }
941
942 void si_init_debug_functions(struct si_context *sctx)
943 {
944 sctx->b.b.dump_debug_state = si_dump_debug_state;
945 sctx->b.check_vm_faults = si_check_vm_faults;
946
947 /* Set the initial dmesg timestamp for this context, so that
948 * only new messages will be checked for VM faults.
949 */
950 if (sctx->screen->b.debug_flags & DBG_CHECK_VM)
951 si_vm_fault_occured(sctx, NULL);
952 }