2 * Copyright 2015 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Marek Olšák <maraeo@gmail.com>
28 #include "si_compute.h"
31 #include "sid_tables.h"
32 #include "ddebug/dd_util.h"
33 #include "util/u_memory.h"
36 DEBUG_GET_ONCE_OPTION(replace_shaders
, "RADEON_REPLACE_SHADERS", NULL
)
38 static void si_dump_shader(struct si_screen
*sscreen
,
39 enum pipe_shader_type processor
,
40 const struct si_shader
*shader
, FILE *f
)
42 if (shader
->shader_log
)
43 fwrite(shader
->shader_log
, shader
->shader_log_size
, 1, f
);
45 si_shader_dump(sscreen
, shader
, NULL
, processor
, f
, false);
48 static void si_dump_gfx_shader(struct si_screen
*sscreen
,
49 const struct si_shader_ctx_state
*state
, FILE *f
)
51 const struct si_shader
*current
= state
->current
;
53 if (!state
->cso
|| !current
)
56 si_dump_shader(sscreen
, state
->cso
->info
.processor
, current
, f
);
59 static void si_dump_compute_shader(struct si_screen
*sscreen
,
60 const struct si_cs_shader_state
*state
, FILE *f
)
62 if (!state
->program
|| state
->program
!= state
->emitted_program
)
65 si_dump_shader(sscreen
, PIPE_SHADER_COMPUTE
, &state
->program
->shader
, f
);
69 * Shader compiles can be overridden with arbitrary ELF objects by setting
70 * the environment variable RADEON_REPLACE_SHADERS=num1:filename1[;num2:filename2]
72 bool si_replace_shader(unsigned num
, struct ac_shader_binary
*binary
)
74 const char *p
= debug_get_option_replace_shaders();
75 const char *semicolon
;
80 bool replaced
= false;
88 i
= strtoul(p
, &endp
, 0);
92 fprintf(stderr
, "RADEON_REPLACE_SHADERS formatted badly.\n");
108 semicolon
= strchr(p
, ';');
110 p
= copy
= strndup(p
, semicolon
- p
);
112 fprintf(stderr
, "out of memory\n");
117 fprintf(stderr
, "radeonsi: replace shader %u by %s\n", num
, p
);
121 perror("radeonsi: failed to open file");
125 if (fseek(f
, 0, SEEK_END
) != 0)
132 if (fseek(f
, 0, SEEK_SET
) != 0)
135 buf
= MALLOC(filesize
);
137 fprintf(stderr
, "out of memory\n");
141 nread
= fread(buf
, 1, filesize
, f
);
142 if (nread
!= filesize
)
145 ac_elf_read(buf
, filesize
, binary
);
156 perror("radeonsi: reading shader");
160 /* Parsed IBs are difficult to read without colors. Use "less -R file" to
161 * read them, or use "aha -b -f file" to convert them to html.
163 #define COLOR_RESET "\033[0m"
164 #define COLOR_RED "\033[31m"
165 #define COLOR_GREEN "\033[1;32m"
166 #define COLOR_YELLOW "\033[1;33m"
167 #define COLOR_CYAN "\033[1;36m"
169 static void si_dump_mmapped_reg(struct si_context
*sctx
, FILE *f
,
172 struct radeon_winsys
*ws
= sctx
->b
.ws
;
175 if (ws
->read_registers(ws
, offset
, 1, &value
))
176 ac_dump_reg(f
, offset
, value
, ~0);
179 static void si_dump_debug_registers(struct si_context
*sctx
, FILE *f
)
181 if (sctx
->screen
->b
.info
.drm_major
== 2 &&
182 sctx
->screen
->b
.info
.drm_minor
< 42)
183 return; /* no radeon support */
185 fprintf(f
, "Memory-mapped registers:\n");
186 si_dump_mmapped_reg(sctx
, f
, R_008010_GRBM_STATUS
);
188 /* No other registers can be read on DRM < 3.1.0. */
189 if (sctx
->screen
->b
.info
.drm_major
< 3 ||
190 sctx
->screen
->b
.info
.drm_minor
< 1) {
195 si_dump_mmapped_reg(sctx
, f
, R_008008_GRBM_STATUS2
);
196 si_dump_mmapped_reg(sctx
, f
, R_008014_GRBM_STATUS_SE0
);
197 si_dump_mmapped_reg(sctx
, f
, R_008018_GRBM_STATUS_SE1
);
198 si_dump_mmapped_reg(sctx
, f
, R_008038_GRBM_STATUS_SE2
);
199 si_dump_mmapped_reg(sctx
, f
, R_00803C_GRBM_STATUS_SE3
);
200 si_dump_mmapped_reg(sctx
, f
, R_00D034_SDMA0_STATUS_REG
);
201 si_dump_mmapped_reg(sctx
, f
, R_00D834_SDMA1_STATUS_REG
);
202 if (sctx
->b
.chip_class
<= VI
) {
203 si_dump_mmapped_reg(sctx
, f
, R_000E50_SRBM_STATUS
);
204 si_dump_mmapped_reg(sctx
, f
, R_000E4C_SRBM_STATUS2
);
205 si_dump_mmapped_reg(sctx
, f
, R_000E54_SRBM_STATUS3
);
207 si_dump_mmapped_reg(sctx
, f
, R_008680_CP_STAT
);
208 si_dump_mmapped_reg(sctx
, f
, R_008674_CP_STALLED_STAT1
);
209 si_dump_mmapped_reg(sctx
, f
, R_008678_CP_STALLED_STAT2
);
210 si_dump_mmapped_reg(sctx
, f
, R_008670_CP_STALLED_STAT3
);
211 si_dump_mmapped_reg(sctx
, f
, R_008210_CP_CPC_STATUS
);
212 si_dump_mmapped_reg(sctx
, f
, R_008214_CP_CPC_BUSY_STAT
);
213 si_dump_mmapped_reg(sctx
, f
, R_008218_CP_CPC_STALLED_STAT1
);
214 si_dump_mmapped_reg(sctx
, f
, R_00821C_CP_CPF_STATUS
);
215 si_dump_mmapped_reg(sctx
, f
, R_008220_CP_CPF_BUSY_STAT
);
216 si_dump_mmapped_reg(sctx
, f
, R_008224_CP_CPF_STALLED_STAT1
);
220 static void si_dump_last_ib(struct si_context
*sctx
, FILE *f
)
222 int last_trace_id
= -1;
224 if (!sctx
->last_gfx
.ib
)
227 if (sctx
->last_trace_buf
) {
228 /* We are expecting that the ddebug pipe has already
229 * waited for the context, so this buffer should be idle.
230 * If the GPU is hung, there is no point in waiting for it.
232 uint32_t *map
= sctx
->b
.ws
->buffer_map(sctx
->last_trace_buf
->buf
,
234 PIPE_TRANSFER_UNSYNCHRONIZED
|
237 last_trace_id
= *map
;
240 if (sctx
->init_config
)
241 ac_parse_ib(f
, sctx
->init_config
->pm4
, sctx
->init_config
->ndw
,
242 -1, "IB2: Init config", sctx
->b
.chip_class
,
245 if (sctx
->init_config_gs_rings
)
246 ac_parse_ib(f
, sctx
->init_config_gs_rings
->pm4
,
247 sctx
->init_config_gs_rings
->ndw
,
248 -1, "IB2: Init GS rings", sctx
->b
.chip_class
,
251 ac_parse_ib(f
, sctx
->last_gfx
.ib
, sctx
->last_gfx
.num_dw
,
252 last_trace_id
, "IB", sctx
->b
.chip_class
,
256 static const char *priority_to_string(enum radeon_bo_priority priority
)
258 #define ITEM(x) [RADEON_PRIO_##x] = #x
259 static const char *table
[64] = {
262 ITEM(SO_FILLED_SIZE
),
276 ITEM(SAMPLER_BUFFER
),
278 ITEM(SHADER_RW_BUFFER
),
279 ITEM(COMPUTE_GLOBAL
),
280 ITEM(SAMPLER_TEXTURE
),
281 ITEM(SHADER_RW_IMAGE
),
282 ITEM(SAMPLER_TEXTURE_MSAA
),
285 ITEM(COLOR_BUFFER_MSAA
),
286 ITEM(DEPTH_BUFFER_MSAA
),
292 ITEM(SCRATCH_BUFFER
),
296 assert(priority
< ARRAY_SIZE(table
));
297 return table
[priority
];
300 static int bo_list_compare_va(const struct radeon_bo_list_item
*a
,
301 const struct radeon_bo_list_item
*b
)
303 return a
->vm_address
< b
->vm_address
? -1 :
304 a
->vm_address
> b
->vm_address
? 1 : 0;
307 static void si_dump_bo_list(struct si_context
*sctx
,
308 const struct radeon_saved_cs
*saved
, FILE *f
)
315 /* Sort the list according to VM adddresses first. */
316 qsort(saved
->bo_list
, saved
->bo_count
,
317 sizeof(saved
->bo_list
[0]), (void*)bo_list_compare_va
);
319 fprintf(f
, "Buffer list (in units of pages = 4kB):\n"
320 COLOR_YELLOW
" Size VM start page "
321 "VM end page Usage" COLOR_RESET
"\n");
323 for (i
= 0; i
< saved
->bo_count
; i
++) {
324 /* Note: Buffer sizes are expected to be aligned to 4k by the winsys. */
325 const unsigned page_size
= sctx
->b
.screen
->info
.gart_page_size
;
326 uint64_t va
= saved
->bo_list
[i
].vm_address
;
327 uint64_t size
= saved
->bo_list
[i
].bo_size
;
330 /* If there's unused virtual memory between 2 buffers, print it. */
332 uint64_t previous_va_end
= saved
->bo_list
[i
-1].vm_address
+
333 saved
->bo_list
[i
-1].bo_size
;
335 if (va
> previous_va_end
) {
336 fprintf(f
, " %10"PRIu64
" -- hole --\n",
337 (va
- previous_va_end
) / page_size
);
341 /* Print the buffer. */
342 fprintf(f
, " %10"PRIu64
" 0x%013"PRIX64
" 0x%013"PRIX64
" ",
343 size
/ page_size
, va
/ page_size
, (va
+ size
) / page_size
);
345 /* Print the usage. */
346 for (j
= 0; j
< 64; j
++) {
347 if (!(saved
->bo_list
[i
].priority_usage
& (1llu << j
)))
350 fprintf(f
, "%s%s", !hit
? "" : ", ", priority_to_string(j
));
355 fprintf(f
, "\nNote: The holes represent memory not used by the IB.\n"
356 " Other buffers can still be allocated there.\n\n");
359 static void si_dump_framebuffer(struct si_context
*sctx
, FILE *f
)
361 struct pipe_framebuffer_state
*state
= &sctx
->framebuffer
.state
;
362 struct r600_texture
*rtex
;
365 for (i
= 0; i
< state
->nr_cbufs
; i
++) {
366 if (!state
->cbufs
[i
])
369 rtex
= (struct r600_texture
*)state
->cbufs
[i
]->texture
;
370 fprintf(f
, COLOR_YELLOW
"Color buffer %i:" COLOR_RESET
"\n", i
);
371 r600_print_texture_info(sctx
->b
.screen
, rtex
, f
);
376 rtex
= (struct r600_texture
*)state
->zsbuf
->texture
;
377 fprintf(f
, COLOR_YELLOW
"Depth-stencil buffer:" COLOR_RESET
"\n");
378 r600_print_texture_info(sctx
->b
.screen
, rtex
, f
);
383 typedef unsigned (*slot_remap_func
)(unsigned);
385 static void si_dump_descriptor_list(struct si_descriptors
*desc
,
386 const char *shader_name
,
387 const char *elem_name
,
388 unsigned num_elements
,
389 slot_remap_func slot_remap
,
394 for (i
= 0; i
< num_elements
; i
++) {
395 unsigned dw_offset
= slot_remap(i
) * desc
->element_dw_size
;
396 uint32_t *gpu_ptr
= desc
->gpu_list
? desc
->gpu_list
: desc
->list
;
397 const char *list_note
= desc
->gpu_list
? "GPU list" : "CPU list";
398 uint32_t *cpu_list
= desc
->list
+ dw_offset
;
399 uint32_t *gpu_list
= gpu_ptr
+ dw_offset
;
401 fprintf(f
, COLOR_GREEN
"%s%s slot %u (%s):" COLOR_RESET
"\n",
402 shader_name
, elem_name
, i
, list_note
);
404 switch (desc
->element_dw_size
) {
406 for (j
= 0; j
< 4; j
++)
407 ac_dump_reg(f
, R_008F00_SQ_BUF_RSRC_WORD0
+ j
*4,
408 gpu_list
[j
], 0xffffffff);
411 for (j
= 0; j
< 8; j
++)
412 ac_dump_reg(f
, R_008F10_SQ_IMG_RSRC_WORD0
+ j
*4,
413 gpu_list
[j
], 0xffffffff);
415 fprintf(f
, COLOR_CYAN
" Buffer:" COLOR_RESET
"\n");
416 for (j
= 0; j
< 4; j
++)
417 ac_dump_reg(f
, R_008F00_SQ_BUF_RSRC_WORD0
+ j
*4,
418 gpu_list
[4+j
], 0xffffffff);
421 for (j
= 0; j
< 8; j
++)
422 ac_dump_reg(f
, R_008F10_SQ_IMG_RSRC_WORD0
+ j
*4,
423 gpu_list
[j
], 0xffffffff);
425 fprintf(f
, COLOR_CYAN
" Buffer:" COLOR_RESET
"\n");
426 for (j
= 0; j
< 4; j
++)
427 ac_dump_reg(f
, R_008F00_SQ_BUF_RSRC_WORD0
+ j
*4,
428 gpu_list
[4+j
], 0xffffffff);
430 fprintf(f
, COLOR_CYAN
" FMASK:" COLOR_RESET
"\n");
431 for (j
= 0; j
< 8; j
++)
432 ac_dump_reg(f
, R_008F10_SQ_IMG_RSRC_WORD0
+ j
*4,
433 gpu_list
[8+j
], 0xffffffff);
435 fprintf(f
, COLOR_CYAN
" Sampler state:" COLOR_RESET
"\n");
436 for (j
= 0; j
< 4; j
++)
437 ac_dump_reg(f
, R_008F30_SQ_IMG_SAMP_WORD0
+ j
*4,
438 gpu_list
[12+j
], 0xffffffff);
442 if (memcmp(gpu_list
, cpu_list
, desc
->element_dw_size
* 4) != 0) {
443 fprintf(f
, COLOR_RED
"!!!!! This slot was corrupted in GPU memory !!!!!"
451 static unsigned si_identity(unsigned slot
)
456 static void si_dump_descriptors(struct si_context
*sctx
,
457 enum pipe_shader_type processor
,
458 const struct tgsi_shader_info
*info
, FILE *f
)
460 struct si_descriptors
*descs
=
461 &sctx
->descriptors
[SI_DESCS_FIRST_SHADER
+
462 processor
* SI_NUM_SHADER_DESCS
];
463 static const char *shader_name
[] = {"VS", "PS", "GS", "TCS", "TES", "CS"};
465 static const char *elem_name
[] = {
466 " - Constant buffer",
471 static const slot_remap_func remap_func
[] = {
472 si_get_constbuf_slot
,
473 si_get_shaderbuf_slot
,
477 unsigned enabled_slots
[] = {
478 sctx
->const_and_shader_buffers
[processor
].enabled_mask
>> SI_NUM_SHADER_BUFFERS
,
479 util_bitreverse(sctx
->const_and_shader_buffers
[processor
].enabled_mask
&
480 u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS
)),
481 sctx
->samplers
[processor
].views
.enabled_mask
,
482 sctx
->images
[processor
].enabled_mask
,
484 unsigned required_slots
[] = {
485 info
? info
->const_buffers_declared
: 0,
486 info
? info
->shader_buffers_declared
: 0,
487 info
? info
->samplers_declared
: 0,
488 info
? info
->images_declared
: 0,
491 if (processor
== PIPE_SHADER_VERTEX
) {
492 assert(info
); /* only CS may not have an info struct */
494 si_dump_descriptor_list(&sctx
->vertex_buffers
, shader_name
[processor
],
495 " - Vertex buffer", info
->num_inputs
,
499 for (unsigned i
= 0; i
< SI_NUM_SHADER_DESCS
; ++i
, ++descs
)
500 si_dump_descriptor_list(descs
, shader_name
[processor
], elem_name
[i
],
501 util_last_bit(enabled_slots
[i
] | required_slots
[i
]),
505 static void si_dump_gfx_descriptors(struct si_context
*sctx
,
506 const struct si_shader_ctx_state
*state
,
509 if (!state
->cso
|| !state
->current
)
512 si_dump_descriptors(sctx
, state
->cso
->type
, &state
->cso
->info
, f
);
515 static void si_dump_compute_descriptors(struct si_context
*sctx
, FILE *f
)
517 if (!sctx
->cs_shader_state
.program
||
518 sctx
->cs_shader_state
.program
!= sctx
->cs_shader_state
.emitted_program
)
521 si_dump_descriptors(sctx
, PIPE_SHADER_COMPUTE
, NULL
, f
);
524 struct si_shader_inst
{
525 char text
[160]; /* one disasm line */
526 unsigned offset
; /* instruction offset */
527 unsigned size
; /* instruction size = 4 or 8 */
530 /* Split a disassembly string into lines and add them to the array pointed
531 * to by "instructions". */
532 static void si_add_split_disasm(const char *disasm
,
535 struct si_shader_inst
*instructions
)
537 struct si_shader_inst
*last_inst
= *num
? &instructions
[*num
- 1] : NULL
;
540 while ((next
= strchr(disasm
, '\n'))) {
541 struct si_shader_inst
*inst
= &instructions
[*num
];
542 unsigned len
= next
- disasm
;
544 assert(len
< ARRAY_SIZE(inst
->text
));
545 memcpy(inst
->text
, disasm
, len
);
547 inst
->offset
= last_inst
? last_inst
->offset
+ last_inst
->size
: 0;
549 const char *semicolon
= strchr(disasm
, ';');
551 /* More than 16 chars after ";" means the instruction is 8 bytes long. */
552 inst
->size
= next
- semicolon
> 16 ? 8 : 4;
554 snprintf(inst
->text
+ len
, ARRAY_SIZE(inst
->text
) - len
,
555 " [PC=0x%"PRIx64
", off=%u, size=%u]",
556 start_addr
+ inst
->offset
, inst
->offset
, inst
->size
);
564 #define MAX_WAVES_PER_CHIP (64 * 40)
566 struct si_wave_info
{
567 unsigned se
; /* shader engine */
568 unsigned sh
; /* shader array */
569 unsigned cu
; /* compute unit */
573 uint64_t pc
; /* program counter */
577 bool matched
; /* whether the wave is used by a currently-bound shader */
580 static int compare_wave(const void *p1
, const void *p2
)
582 struct si_wave_info
*w1
= (struct si_wave_info
*)p1
;
583 struct si_wave_info
*w2
= (struct si_wave_info
*)p2
;
585 /* Sort waves according to PC and then SE, SH, CU, etc. */
602 if (w1
->simd
< w2
->simd
)
604 if (w1
->simd
> w2
->simd
)
606 if (w1
->wave
< w2
->wave
)
608 if (w1
->wave
> w2
->wave
)
614 /* Return wave information. "waves" should be a large enough array. */
615 static unsigned si_get_wave_info(struct si_wave_info waves
[MAX_WAVES_PER_CHIP
])
618 unsigned num_waves
= 0;
620 FILE *p
= popen("umr -wa", "r");
624 if (!fgets(line
, sizeof(line
), p
) ||
625 strncmp(line
, "SE", 2) != 0) {
630 while (fgets(line
, sizeof(line
), p
)) {
631 struct si_wave_info
*w
;
632 uint32_t pc_hi
, pc_lo
, exec_hi
, exec_lo
;
634 assert(num_waves
< MAX_WAVES_PER_CHIP
);
635 w
= &waves
[num_waves
];
637 if (sscanf(line
, "%u %u %u %u %u %x %x %x %x %x %x %x",
638 &w
->se
, &w
->sh
, &w
->cu
, &w
->simd
, &w
->wave
,
639 &w
->status
, &pc_hi
, &pc_lo
, &w
->inst_dw0
,
640 &w
->inst_dw1
, &exec_hi
, &exec_lo
) == 12) {
641 w
->pc
= ((uint64_t)pc_hi
<< 32) | pc_lo
;
642 w
->exec
= ((uint64_t)exec_hi
<< 32) | exec_lo
;
648 qsort(waves
, num_waves
, sizeof(struct si_wave_info
), compare_wave
);
654 /* If the shader is being executed, print its asm instructions, and annotate
655 * those that are being executed right now with information about waves that
656 * execute them. This is most useful during a GPU hang.
658 static void si_print_annotated_shader(struct si_shader
*shader
,
659 struct si_wave_info
*waves
,
663 if (!shader
|| !shader
->binary
.disasm_string
)
666 uint64_t start_addr
= shader
->bo
->gpu_address
;
667 uint64_t end_addr
= start_addr
+ shader
->bo
->b
.b
.width0
;
670 /* See if any wave executes the shader. */
671 for (i
= 0; i
< num_waves
; i
++) {
672 if (start_addr
<= waves
[i
].pc
&& waves
[i
].pc
<= end_addr
)
676 return; /* the shader is not being executed */
678 /* Remember the first found wave. The waves are sorted according to PC. */
682 /* Get the list of instructions.
683 * Buffer size / 4 is the upper bound of the instruction count.
685 unsigned num_inst
= 0;
686 struct si_shader_inst
*instructions
=
687 calloc(shader
->bo
->b
.b
.width0
/ 4, sizeof(struct si_shader_inst
));
689 if (shader
->prolog
) {
690 si_add_split_disasm(shader
->prolog
->binary
.disasm_string
,
691 start_addr
, &num_inst
, instructions
);
693 if (shader
->previous_stage
) {
694 si_add_split_disasm(shader
->previous_stage
->binary
.disasm_string
,
695 start_addr
, &num_inst
, instructions
);
697 if (shader
->prolog2
) {
698 si_add_split_disasm(shader
->prolog2
->binary
.disasm_string
,
699 start_addr
, &num_inst
, instructions
);
701 si_add_split_disasm(shader
->binary
.disasm_string
,
702 start_addr
, &num_inst
, instructions
);
703 if (shader
->epilog
) {
704 si_add_split_disasm(shader
->epilog
->binary
.disasm_string
,
705 start_addr
, &num_inst
, instructions
);
708 fprintf(f
, COLOR_YELLOW
"%s - annotated disassembly:" COLOR_RESET
"\n",
709 si_get_shader_name(shader
, shader
->selector
->type
));
711 /* Print instructions with annotations. */
712 for (i
= 0; i
< num_inst
; i
++) {
713 struct si_shader_inst
*inst
= &instructions
[i
];
715 fprintf(f
, "%s\n", inst
->text
);
717 /* Print which waves execute the instruction right now. */
718 while (num_waves
&& start_addr
+ inst
->offset
== waves
->pc
) {
720 " " COLOR_GREEN
"^ SE%u SH%u CU%u "
721 "SIMD%u WAVE%u EXEC=%016"PRIx64
" ",
722 waves
->se
, waves
->sh
, waves
->cu
, waves
->simd
,
723 waves
->wave
, waves
->exec
);
725 if (inst
->size
== 4) {
726 fprintf(f
, "INST32=%08X" COLOR_RESET
"\n",
729 fprintf(f
, "INST64=%08X %08X" COLOR_RESET
"\n",
730 waves
->inst_dw0
, waves
->inst_dw1
);
733 waves
->matched
= true;
743 static void si_dump_annotated_shaders(struct si_context
*sctx
, FILE *f
)
745 struct si_wave_info waves
[MAX_WAVES_PER_CHIP
];
746 unsigned num_waves
= si_get_wave_info(waves
);
748 fprintf(f
, COLOR_CYAN
"The number of active waves = %u" COLOR_RESET
751 si_print_annotated_shader(sctx
->vs_shader
.current
, waves
, num_waves
, f
);
752 si_print_annotated_shader(sctx
->tcs_shader
.current
, waves
, num_waves
, f
);
753 si_print_annotated_shader(sctx
->tes_shader
.current
, waves
, num_waves
, f
);
754 si_print_annotated_shader(sctx
->gs_shader
.current
, waves
, num_waves
, f
);
755 si_print_annotated_shader(sctx
->ps_shader
.current
, waves
, num_waves
, f
);
757 /* Print waves executing shaders that are not currently bound. */
760 for (i
= 0; i
< num_waves
; i
++) {
761 if (waves
[i
].matched
)
765 fprintf(f
, COLOR_CYAN
766 "Waves not executing currently-bound shaders:"
770 fprintf(f
, " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016"PRIx64
771 " INST=%08X %08X PC=%"PRIx64
"\n",
772 waves
[i
].se
, waves
[i
].sh
, waves
[i
].cu
, waves
[i
].simd
,
773 waves
[i
].wave
, waves
[i
].exec
, waves
[i
].inst_dw0
,
774 waves
[i
].inst_dw1
, waves
[i
].pc
);
780 static void si_dump_command(const char *title
, const char *command
, FILE *f
)
784 FILE *p
= popen(command
, "r");
788 fprintf(f
, COLOR_YELLOW
"%s: " COLOR_RESET
"\n", title
);
789 while (fgets(line
, sizeof(line
), p
))
795 static void si_dump_debug_state(struct pipe_context
*ctx
, FILE *f
,
798 struct si_context
*sctx
= (struct si_context
*)ctx
;
800 if (flags
& PIPE_DUMP_DEVICE_STATUS_REGISTERS
)
801 si_dump_debug_registers(sctx
, f
);
803 if (flags
& PIPE_DUMP_CURRENT_STATES
)
804 si_dump_framebuffer(sctx
, f
);
806 if (flags
& PIPE_DUMP_CURRENT_SHADERS
) {
807 si_dump_gfx_shader(sctx
->screen
, &sctx
->vs_shader
, f
);
808 si_dump_gfx_shader(sctx
->screen
, &sctx
->tcs_shader
, f
);
809 si_dump_gfx_shader(sctx
->screen
, &sctx
->tes_shader
, f
);
810 si_dump_gfx_shader(sctx
->screen
, &sctx
->gs_shader
, f
);
811 si_dump_gfx_shader(sctx
->screen
, &sctx
->ps_shader
, f
);
812 si_dump_compute_shader(sctx
->screen
, &sctx
->cs_shader_state
, f
);
814 if (flags
& PIPE_DUMP_DEVICE_STATUS_REGISTERS
) {
815 si_dump_annotated_shaders(sctx
, f
);
816 si_dump_command("Active waves (raw data)", "umr -wa | column -t", f
);
817 si_dump_command("Wave information", "umr -O bits -wa", f
);
820 si_dump_descriptor_list(&sctx
->descriptors
[SI_DESCS_RW_BUFFERS
],
821 "", "RW buffers", SI_NUM_RW_BUFFERS
,
823 si_dump_gfx_descriptors(sctx
, &sctx
->vs_shader
, f
);
824 si_dump_gfx_descriptors(sctx
, &sctx
->tcs_shader
, f
);
825 si_dump_gfx_descriptors(sctx
, &sctx
->tes_shader
, f
);
826 si_dump_gfx_descriptors(sctx
, &sctx
->gs_shader
, f
);
827 si_dump_gfx_descriptors(sctx
, &sctx
->ps_shader
, f
);
828 si_dump_compute_descriptors(sctx
, f
);
831 if (flags
& PIPE_DUMP_LAST_COMMAND_BUFFER
) {
832 si_dump_bo_list(sctx
, &sctx
->last_gfx
, f
);
833 si_dump_last_ib(sctx
, f
);
835 fprintf(f
, "Done.\n");
838 radeon_clear_saved_cs(&sctx
->last_gfx
);
839 r600_resource_reference(&sctx
->last_trace_buf
, NULL
);
843 static void si_dump_dma(struct si_context
*sctx
,
844 struct radeon_saved_cs
*saved
, FILE *f
)
846 static const char ib_name
[] = "sDMA IB";
849 si_dump_bo_list(sctx
, saved
, f
);
851 fprintf(f
, "------------------ %s begin ------------------\n", ib_name
);
853 for (i
= 0; i
< saved
->num_dw
; ++i
) {
854 fprintf(f
, " %08x\n", saved
->ib
[i
]);
857 fprintf(f
, "------------------- %s end -------------------\n", ib_name
);
860 fprintf(f
, "SDMA Dump Done.\n");
863 static bool si_vm_fault_occured(struct si_context
*sctx
, uint32_t *out_addr
)
868 uint64_t timestamp
= 0;
871 FILE *p
= popen("dmesg", "r");
875 while (fgets(line
, sizeof(line
), p
)) {
878 if (!line
[0] || line
[0] == '\n')
881 /* Get the timestamp. */
882 if (sscanf(line
, "[%u.%u]", &sec
, &usec
) != 2) {
883 static bool hit
= false;
885 fprintf(stderr
, "%s: failed to parse line '%s'\n",
891 timestamp
= sec
* 1000000llu + usec
;
893 /* If just updating the timestamp. */
897 /* Process messages only if the timestamp is newer. */
898 if (timestamp
<= sctx
->dmesg_timestamp
)
901 /* Only process the first VM fault. */
905 /* Remove trailing \n */
907 if (len
&& line
[len
-1] == '\n')
910 /* Get the message part. */
911 msg
= strchr(line
, ']');
920 if (strstr(msg
, "GPU fault detected:"))
924 msg
= strstr(msg
, "VM_CONTEXT1_PROTECTION_FAULT_ADDR");
926 msg
= strstr(msg
, "0x");
929 if (sscanf(msg
, "%X", out_addr
) == 1)
941 if (timestamp
> sctx
->dmesg_timestamp
)
942 sctx
->dmesg_timestamp
= timestamp
;
946 void si_check_vm_faults(struct r600_common_context
*ctx
,
947 struct radeon_saved_cs
*saved
, enum ring_type ring
)
949 struct si_context
*sctx
= (struct si_context
*)ctx
;
950 struct pipe_screen
*screen
= sctx
->b
.b
.screen
;
955 if (!si_vm_fault_occured(sctx
, &addr
))
958 f
= dd_get_debug_file(false);
962 fprintf(f
, "VM fault report.\n\n");
963 if (os_get_command_line(cmd_line
, sizeof(cmd_line
)))
964 fprintf(f
, "Command: %s\n", cmd_line
);
965 fprintf(f
, "Driver vendor: %s\n", screen
->get_vendor(screen
));
966 fprintf(f
, "Device vendor: %s\n", screen
->get_device_vendor(screen
));
967 fprintf(f
, "Device name: %s\n\n", screen
->get_name(screen
));
968 fprintf(f
, "Failing VM page: 0x%08x\n\n", addr
);
970 if (sctx
->apitrace_call_number
)
971 fprintf(f
, "Last apitrace call: %u\n\n",
972 sctx
->apitrace_call_number
);
976 si_dump_debug_state(&sctx
->b
.b
, f
,
977 PIPE_DUMP_CURRENT_STATES
|
978 PIPE_DUMP_CURRENT_SHADERS
|
979 PIPE_DUMP_LAST_COMMAND_BUFFER
);
983 si_dump_dma(sctx
, saved
, f
);
992 fprintf(stderr
, "Detected a VM fault, exiting...\n");
996 void si_init_debug_functions(struct si_context
*sctx
)
998 sctx
->b
.b
.dump_debug_state
= si_dump_debug_state
;
999 sctx
->b
.check_vm_faults
= si_check_vm_faults
;
1001 /* Set the initial dmesg timestamp for this context, so that
1002 * only new messages will be checked for VM faults.
1004 if (sctx
->screen
->b
.debug_flags
& DBG_CHECK_VM
)
1005 si_vm_fault_occured(sctx
, NULL
);