2 * Copyright 2015 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Marek Olšák <maraeo@gmail.com>
29 #include "sid_tables.h"
30 #include "radeon/radeon_elf_util.h"
31 #include "ddebug/dd_util.h"
32 #include "util/u_memory.h"
35 DEBUG_GET_ONCE_OPTION(replace_shaders
, "RADEON_REPLACE_SHADERS", NULL
)
37 static void si_dump_shader(struct si_screen
*sscreen
,
38 struct si_shader_ctx_state
*state
, FILE *f
)
40 struct si_shader
*current
= state
->current
;
42 if (!state
->cso
|| !current
)
45 if (current
->shader_log
)
46 fwrite(current
->shader_log
, current
->shader_log_size
, 1, f
);
48 si_shader_dump(sscreen
, state
->current
, NULL
,
49 state
->cso
->info
.processor
, f
, false);
53 * Shader compiles can be overridden with arbitrary ELF objects by setting
54 * the environment variable RADEON_REPLACE_SHADERS=num1:filename1[;num2:filename2]
56 bool si_replace_shader(unsigned num
, struct radeon_shader_binary
*binary
)
58 const char *p
= debug_get_option_replace_shaders();
59 const char *semicolon
;
64 bool replaced
= false;
72 i
= strtoul(p
, &endp
, 0);
76 fprintf(stderr
, "RADEON_REPLACE_SHADERS formatted badly.\n");
92 semicolon
= strchr(p
, ';');
94 p
= copy
= strndup(p
, semicolon
- p
);
96 fprintf(stderr
, "out of memory\n");
101 fprintf(stderr
, "radeonsi: replace shader %u by %s\n", num
, p
);
105 perror("radeonsi: failed to open file");
109 if (fseek(f
, 0, SEEK_END
) != 0)
116 if (fseek(f
, 0, SEEK_SET
) != 0)
119 buf
= MALLOC(filesize
);
121 fprintf(stderr
, "out of memory\n");
125 nread
= fread(buf
, 1, filesize
, f
);
126 if (nread
!= filesize
)
129 radeon_elf_read(buf
, filesize
, binary
);
140 perror("radeonsi: reading shader");
144 /* Parsed IBs are difficult to read without colors. Use "less -R file" to
145 * read them, or use "aha -b -f file" to convert them to html.
147 #define COLOR_RESET "\033[0m"
148 #define COLOR_RED "\033[31m"
149 #define COLOR_GREEN "\033[1;32m"
150 #define COLOR_YELLOW "\033[1;33m"
151 #define COLOR_CYAN "\033[1;36m"
153 static void si_dump_mmapped_reg(struct si_context
*sctx
, FILE *f
,
156 struct radeon_winsys
*ws
= sctx
->b
.ws
;
159 if (ws
->read_registers(ws
, offset
, 1, &value
))
160 ac_dump_reg(f
, offset
, value
, ~0);
163 static void si_dump_debug_registers(struct si_context
*sctx
, FILE *f
)
165 if (sctx
->screen
->b
.info
.drm_major
== 2 &&
166 sctx
->screen
->b
.info
.drm_minor
< 42)
167 return; /* no radeon support */
169 fprintf(f
, "Memory-mapped registers:\n");
170 si_dump_mmapped_reg(sctx
, f
, R_008010_GRBM_STATUS
);
172 /* No other registers can be read on DRM < 3.1.0. */
173 if (sctx
->screen
->b
.info
.drm_major
< 3 ||
174 sctx
->screen
->b
.info
.drm_minor
< 1) {
179 si_dump_mmapped_reg(sctx
, f
, R_008008_GRBM_STATUS2
);
180 si_dump_mmapped_reg(sctx
, f
, R_008014_GRBM_STATUS_SE0
);
181 si_dump_mmapped_reg(sctx
, f
, R_008018_GRBM_STATUS_SE1
);
182 si_dump_mmapped_reg(sctx
, f
, R_008038_GRBM_STATUS_SE2
);
183 si_dump_mmapped_reg(sctx
, f
, R_00803C_GRBM_STATUS_SE3
);
184 si_dump_mmapped_reg(sctx
, f
, R_00D034_SDMA0_STATUS_REG
);
185 si_dump_mmapped_reg(sctx
, f
, R_00D834_SDMA1_STATUS_REG
);
186 si_dump_mmapped_reg(sctx
, f
, R_000E50_SRBM_STATUS
);
187 si_dump_mmapped_reg(sctx
, f
, R_000E4C_SRBM_STATUS2
);
188 si_dump_mmapped_reg(sctx
, f
, R_000E54_SRBM_STATUS3
);
189 si_dump_mmapped_reg(sctx
, f
, R_008680_CP_STAT
);
190 si_dump_mmapped_reg(sctx
, f
, R_008674_CP_STALLED_STAT1
);
191 si_dump_mmapped_reg(sctx
, f
, R_008678_CP_STALLED_STAT2
);
192 si_dump_mmapped_reg(sctx
, f
, R_008670_CP_STALLED_STAT3
);
193 si_dump_mmapped_reg(sctx
, f
, R_008210_CP_CPC_STATUS
);
194 si_dump_mmapped_reg(sctx
, f
, R_008214_CP_CPC_BUSY_STAT
);
195 si_dump_mmapped_reg(sctx
, f
, R_008218_CP_CPC_STALLED_STAT1
);
196 si_dump_mmapped_reg(sctx
, f
, R_00821C_CP_CPF_STATUS
);
197 si_dump_mmapped_reg(sctx
, f
, R_008220_CP_CPF_BUSY_STAT
);
198 si_dump_mmapped_reg(sctx
, f
, R_008224_CP_CPF_STALLED_STAT1
);
202 static void si_dump_last_ib(struct si_context
*sctx
, FILE *f
)
204 int last_trace_id
= -1;
206 if (!sctx
->last_gfx
.ib
)
209 if (sctx
->last_trace_buf
) {
210 /* We are expecting that the ddebug pipe has already
211 * waited for the context, so this buffer should be idle.
212 * If the GPU is hung, there is no point in waiting for it.
214 uint32_t *map
= sctx
->b
.ws
->buffer_map(sctx
->last_trace_buf
->buf
,
216 PIPE_TRANSFER_UNSYNCHRONIZED
|
219 last_trace_id
= *map
;
222 if (sctx
->init_config
)
223 ac_parse_ib(f
, sctx
->init_config
->pm4
, sctx
->init_config
->ndw
,
224 -1, "IB2: Init config", sctx
->b
.chip_class
);
226 if (sctx
->init_config_gs_rings
)
227 ac_parse_ib(f
, sctx
->init_config_gs_rings
->pm4
,
228 sctx
->init_config_gs_rings
->ndw
,
229 -1, "IB2: Init GS rings", sctx
->b
.chip_class
);
231 ac_parse_ib(f
, sctx
->last_gfx
.ib
, sctx
->last_gfx
.num_dw
,
232 last_trace_id
, "IB", sctx
->b
.chip_class
);
235 static const char *priority_to_string(enum radeon_bo_priority priority
)
237 #define ITEM(x) [RADEON_PRIO_##x] = #x
238 static const char *table
[64] = {
241 ITEM(SO_FILLED_SIZE
),
255 ITEM(SAMPLER_BUFFER
),
257 ITEM(SHADER_RW_BUFFER
),
258 ITEM(COMPUTE_GLOBAL
),
259 ITEM(SAMPLER_TEXTURE
),
260 ITEM(SHADER_RW_IMAGE
),
261 ITEM(SAMPLER_TEXTURE_MSAA
),
264 ITEM(COLOR_BUFFER_MSAA
),
265 ITEM(DEPTH_BUFFER_MSAA
),
271 ITEM(SCRATCH_BUFFER
),
275 assert(priority
< ARRAY_SIZE(table
));
276 return table
[priority
];
279 static int bo_list_compare_va(const struct radeon_bo_list_item
*a
,
280 const struct radeon_bo_list_item
*b
)
282 return a
->vm_address
< b
->vm_address
? -1 :
283 a
->vm_address
> b
->vm_address
? 1 : 0;
286 static void si_dump_bo_list(struct si_context
*sctx
,
287 const struct radeon_saved_cs
*saved
, FILE *f
)
294 /* Sort the list according to VM adddresses first. */
295 qsort(saved
->bo_list
, saved
->bo_count
,
296 sizeof(saved
->bo_list
[0]), (void*)bo_list_compare_va
);
298 fprintf(f
, "Buffer list (in units of pages = 4kB):\n"
299 COLOR_YELLOW
" Size VM start page "
300 "VM end page Usage" COLOR_RESET
"\n");
302 for (i
= 0; i
< saved
->bo_count
; i
++) {
303 /* Note: Buffer sizes are expected to be aligned to 4k by the winsys. */
304 const unsigned page_size
= sctx
->b
.screen
->info
.gart_page_size
;
305 uint64_t va
= saved
->bo_list
[i
].vm_address
;
306 uint64_t size
= saved
->bo_list
[i
].bo_size
;
309 /* If there's unused virtual memory between 2 buffers, print it. */
311 uint64_t previous_va_end
= saved
->bo_list
[i
-1].vm_address
+
312 saved
->bo_list
[i
-1].bo_size
;
314 if (va
> previous_va_end
) {
315 fprintf(f
, " %10"PRIu64
" -- hole --\n",
316 (va
- previous_va_end
) / page_size
);
320 /* Print the buffer. */
321 fprintf(f
, " %10"PRIu64
" 0x%013"PRIX64
" 0x%013"PRIX64
" ",
322 size
/ page_size
, va
/ page_size
, (va
+ size
) / page_size
);
324 /* Print the usage. */
325 for (j
= 0; j
< 64; j
++) {
326 if (!(saved
->bo_list
[i
].priority_usage
& (1llu << j
)))
329 fprintf(f
, "%s%s", !hit
? "" : ", ", priority_to_string(j
));
334 fprintf(f
, "\nNote: The holes represent memory not used by the IB.\n"
335 " Other buffers can still be allocated there.\n\n");
338 static void si_dump_framebuffer(struct si_context
*sctx
, FILE *f
)
340 struct pipe_framebuffer_state
*state
= &sctx
->framebuffer
.state
;
341 struct r600_texture
*rtex
;
344 for (i
= 0; i
< state
->nr_cbufs
; i
++) {
345 if (!state
->cbufs
[i
])
348 rtex
= (struct r600_texture
*)state
->cbufs
[i
]->texture
;
349 fprintf(f
, COLOR_YELLOW
"Color buffer %i:" COLOR_RESET
"\n", i
);
350 r600_print_texture_info(rtex
, f
);
355 rtex
= (struct r600_texture
*)state
->zsbuf
->texture
;
356 fprintf(f
, COLOR_YELLOW
"Depth-stencil buffer:" COLOR_RESET
"\n");
357 r600_print_texture_info(rtex
, f
);
362 static void si_dump_descriptor_list(struct si_descriptors
*desc
,
363 const char *shader_name
,
364 const char *elem_name
,
365 unsigned num_elements
,
369 uint32_t *cpu_list
= desc
->list
;
370 uint32_t *gpu_list
= desc
->gpu_list
;
371 const char *list_note
= "GPU list";
375 list_note
= "CPU list";
378 for (i
= 0; i
< num_elements
; i
++) {
379 fprintf(f
, COLOR_GREEN
"%s%s slot %u (%s):" COLOR_RESET
"\n",
380 shader_name
, elem_name
, i
, list_note
);
382 switch (desc
->element_dw_size
) {
384 for (j
= 0; j
< 4; j
++)
385 ac_dump_reg(f
, R_008F00_SQ_BUF_RSRC_WORD0
+ j
*4,
386 gpu_list
[j
], 0xffffffff);
389 for (j
= 0; j
< 8; j
++)
390 ac_dump_reg(f
, R_008F10_SQ_IMG_RSRC_WORD0
+ j
*4,
391 gpu_list
[j
], 0xffffffff);
393 fprintf(f
, COLOR_CYAN
" Buffer:" COLOR_RESET
"\n");
394 for (j
= 0; j
< 4; j
++)
395 ac_dump_reg(f
, R_008F00_SQ_BUF_RSRC_WORD0
+ j
*4,
396 gpu_list
[4+j
], 0xffffffff);
399 for (j
= 0; j
< 8; j
++)
400 ac_dump_reg(f
, R_008F10_SQ_IMG_RSRC_WORD0
+ j
*4,
401 gpu_list
[j
], 0xffffffff);
403 fprintf(f
, COLOR_CYAN
" Buffer:" COLOR_RESET
"\n");
404 for (j
= 0; j
< 4; j
++)
405 ac_dump_reg(f
, R_008F00_SQ_BUF_RSRC_WORD0
+ j
*4,
406 gpu_list
[4+j
], 0xffffffff);
408 fprintf(f
, COLOR_CYAN
" FMASK:" COLOR_RESET
"\n");
409 for (j
= 0; j
< 8; j
++)
410 ac_dump_reg(f
, R_008F10_SQ_IMG_RSRC_WORD0
+ j
*4,
411 gpu_list
[8+j
], 0xffffffff);
413 fprintf(f
, COLOR_CYAN
" Sampler state:" COLOR_RESET
"\n");
414 for (j
= 0; j
< 4; j
++)
415 ac_dump_reg(f
, R_008F30_SQ_IMG_SAMP_WORD0
+ j
*4,
416 gpu_list
[12+j
], 0xffffffff);
420 if (memcmp(gpu_list
, cpu_list
, desc
->element_dw_size
* 4) != 0) {
421 fprintf(f
, COLOR_RED
"!!!!! This slot was corrupted in GPU memory !!!!!"
426 gpu_list
+= desc
->element_dw_size
;
427 cpu_list
+= desc
->element_dw_size
;
431 static void si_dump_descriptors(struct si_context
*sctx
,
432 struct si_shader_ctx_state
*state
,
435 if (!state
->cso
|| !state
->current
)
438 unsigned type
= state
->cso
->type
;
439 const struct tgsi_shader_info
*info
= &state
->cso
->info
;
440 struct si_descriptors
*descs
=
441 &sctx
->descriptors
[SI_DESCS_FIRST_SHADER
+
442 type
* SI_NUM_SHADER_DESCS
];
443 static const char *shader_name
[] = {"VS", "PS", "GS", "TCS", "TES", "CS"};
445 static const char *elem_name
[] = {
446 " - Constant buffer",
451 unsigned num_elements
[] = {
452 util_last_bit(info
->const_buffers_declared
),
453 util_last_bit(info
->shader_buffers_declared
),
454 util_last_bit(info
->samplers_declared
),
455 util_last_bit(info
->images_declared
),
458 if (type
== PIPE_SHADER_VERTEX
) {
459 si_dump_descriptor_list(&sctx
->vertex_buffers
, shader_name
[type
],
460 " - Vertex buffer", info
->num_inputs
, f
);
463 for (unsigned i
= 0; i
< SI_NUM_SHADER_DESCS
; ++i
, ++descs
)
464 si_dump_descriptor_list(descs
, shader_name
[type
], elem_name
[i
],
468 static void si_dump_debug_state(struct pipe_context
*ctx
, FILE *f
,
471 struct si_context
*sctx
= (struct si_context
*)ctx
;
473 if (flags
& PIPE_DUMP_DEVICE_STATUS_REGISTERS
)
474 si_dump_debug_registers(sctx
, f
);
476 if (flags
& PIPE_DUMP_CURRENT_STATES
)
477 si_dump_framebuffer(sctx
, f
);
479 if (flags
& PIPE_DUMP_CURRENT_SHADERS
) {
480 si_dump_shader(sctx
->screen
, &sctx
->vs_shader
, f
);
481 si_dump_shader(sctx
->screen
, &sctx
->tcs_shader
, f
);
482 si_dump_shader(sctx
->screen
, &sctx
->tes_shader
, f
);
483 si_dump_shader(sctx
->screen
, &sctx
->gs_shader
, f
);
484 si_dump_shader(sctx
->screen
, &sctx
->ps_shader
, f
);
486 si_dump_descriptor_list(&sctx
->descriptors
[SI_DESCS_RW_BUFFERS
],
487 "", "RW buffers", SI_NUM_RW_BUFFERS
, f
);
488 si_dump_descriptors(sctx
, &sctx
->vs_shader
, f
);
489 si_dump_descriptors(sctx
, &sctx
->tcs_shader
, f
);
490 si_dump_descriptors(sctx
, &sctx
->tes_shader
, f
);
491 si_dump_descriptors(sctx
, &sctx
->gs_shader
, f
);
492 si_dump_descriptors(sctx
, &sctx
->ps_shader
, f
);
495 if (flags
& PIPE_DUMP_LAST_COMMAND_BUFFER
) {
496 si_dump_bo_list(sctx
, &sctx
->last_gfx
, f
);
497 si_dump_last_ib(sctx
, f
);
499 fprintf(f
, "Done.\n");
502 radeon_clear_saved_cs(&sctx
->last_gfx
);
503 r600_resource_reference(&sctx
->last_trace_buf
, NULL
);
507 static void si_dump_dma(struct si_context
*sctx
,
508 struct radeon_saved_cs
*saved
, FILE *f
)
510 static const char ib_name
[] = "sDMA IB";
513 si_dump_bo_list(sctx
, saved
, f
);
515 fprintf(f
, "------------------ %s begin ------------------\n", ib_name
);
517 for (i
= 0; i
< saved
->num_dw
; ++i
) {
518 fprintf(f
, " %08x\n", saved
->ib
[i
]);
521 fprintf(f
, "------------------- %s end -------------------\n", ib_name
);
524 fprintf(f
, "SDMA Dump Done.\n");
527 static bool si_vm_fault_occured(struct si_context
*sctx
, uint32_t *out_addr
)
532 uint64_t timestamp
= 0;
535 FILE *p
= popen("dmesg", "r");
539 while (fgets(line
, sizeof(line
), p
)) {
542 if (!line
[0] || line
[0] == '\n')
545 /* Get the timestamp. */
546 if (sscanf(line
, "[%u.%u]", &sec
, &usec
) != 2) {
547 static bool hit
= false;
549 fprintf(stderr
, "%s: failed to parse line '%s'\n",
555 timestamp
= sec
* 1000000llu + usec
;
557 /* If just updating the timestamp. */
561 /* Process messages only if the timestamp is newer. */
562 if (timestamp
<= sctx
->dmesg_timestamp
)
565 /* Only process the first VM fault. */
569 /* Remove trailing \n */
571 if (len
&& line
[len
-1] == '\n')
574 /* Get the message part. */
575 msg
= strchr(line
, ']');
584 if (strstr(msg
, "GPU fault detected:"))
588 msg
= strstr(msg
, "VM_CONTEXT1_PROTECTION_FAULT_ADDR");
590 msg
= strstr(msg
, "0x");
593 if (sscanf(msg
, "%X", out_addr
) == 1)
605 if (timestamp
> sctx
->dmesg_timestamp
)
606 sctx
->dmesg_timestamp
= timestamp
;
610 void si_check_vm_faults(struct r600_common_context
*ctx
,
611 struct radeon_saved_cs
*saved
, enum ring_type ring
)
613 struct si_context
*sctx
= (struct si_context
*)ctx
;
614 struct pipe_screen
*screen
= sctx
->b
.b
.screen
;
619 if (!si_vm_fault_occured(sctx
, &addr
))
622 f
= dd_get_debug_file(false);
626 fprintf(f
, "VM fault report.\n\n");
627 if (os_get_command_line(cmd_line
, sizeof(cmd_line
)))
628 fprintf(f
, "Command: %s\n", cmd_line
);
629 fprintf(f
, "Driver vendor: %s\n", screen
->get_vendor(screen
));
630 fprintf(f
, "Device vendor: %s\n", screen
->get_device_vendor(screen
));
631 fprintf(f
, "Device name: %s\n\n", screen
->get_name(screen
));
632 fprintf(f
, "Failing VM page: 0x%08x\n\n", addr
);
634 if (sctx
->apitrace_call_number
)
635 fprintf(f
, "Last apitrace call: %u\n\n",
636 sctx
->apitrace_call_number
);
640 si_dump_debug_state(&sctx
->b
.b
, f
,
641 PIPE_DUMP_CURRENT_STATES
|
642 PIPE_DUMP_CURRENT_SHADERS
|
643 PIPE_DUMP_LAST_COMMAND_BUFFER
);
647 si_dump_dma(sctx
, saved
, f
);
656 fprintf(stderr
, "Detected a VM fault, exiting...\n");
660 void si_init_debug_functions(struct si_context
*sctx
)
662 sctx
->b
.b
.dump_debug_state
= si_dump_debug_state
;
663 sctx
->b
.check_vm_faults
= si_check_vm_faults
;
665 /* Set the initial dmesg timestamp for this context, so that
666 * only new messages will be checked for VM faults.
668 if (sctx
->screen
->b
.debug_flags
& DBG_CHECK_VM
)
669 si_vm_fault_occured(sctx
, NULL
);