2 * Copyright 2015 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Marek Olšák <maraeo@gmail.com>
28 #include "si_compute.h"
31 #include "sid_tables.h"
32 #include "ddebug/dd_util.h"
33 #include "util/u_log.h"
34 #include "util/u_memory.h"
37 static void si_dump_bo_list(struct si_context
*sctx
,
38 const struct radeon_saved_cs
*saved
, FILE *f
);
40 DEBUG_GET_ONCE_OPTION(replace_shaders
, "RADEON_REPLACE_SHADERS", NULL
)
42 static void si_dump_shader(struct si_screen
*sscreen
,
43 enum pipe_shader_type processor
,
44 const struct si_shader
*shader
, FILE *f
)
46 if (shader
->shader_log
)
47 fwrite(shader
->shader_log
, shader
->shader_log_size
, 1, f
);
49 si_shader_dump(sscreen
, shader
, NULL
, processor
, f
, false);
52 struct si_log_chunk_shader
{
53 /* The shader destroy code assumes a current context for unlinking of
56 * While we should be able to destroy shaders without a context, doing
57 * so would happen only very rarely and be therefore likely to fail
58 * just when you're trying to debug something. Let's just remember the
59 * current context in the chunk.
61 struct si_context
*ctx
;
62 struct si_shader
*shader
;
63 enum pipe_shader_type processor
;
65 /* For keep-alive reference counts */
66 struct si_shader_selector
*sel
;
67 struct si_compute
*program
;
71 si_log_chunk_shader_destroy(void *data
)
73 struct si_log_chunk_shader
*chunk
= data
;
74 si_shader_selector_reference(chunk
->ctx
, &chunk
->sel
, NULL
);
75 si_compute_reference(&chunk
->program
, NULL
);
80 si_log_chunk_shader_print(void *data
, FILE *f
)
82 struct si_log_chunk_shader
*chunk
= data
;
83 struct si_screen
*sscreen
= chunk
->ctx
->screen
;
84 si_dump_shader(sscreen
, chunk
->processor
,
88 static struct u_log_chunk_type si_log_chunk_type_shader
= {
89 .destroy
= si_log_chunk_shader_destroy
,
90 .print
= si_log_chunk_shader_print
,
93 static void si_dump_gfx_shader(struct si_context
*ctx
,
94 const struct si_shader_ctx_state
*state
,
95 struct u_log_context
*log
)
97 struct si_shader
*current
= state
->current
;
99 if (!state
->cso
|| !current
)
102 struct si_log_chunk_shader
*chunk
= CALLOC_STRUCT(si_log_chunk_shader
);
104 chunk
->processor
= state
->cso
->info
.processor
;
105 chunk
->shader
= current
;
106 si_shader_selector_reference(ctx
, &chunk
->sel
, current
->selector
);
107 u_log_chunk(log
, &si_log_chunk_type_shader
, chunk
);
110 static void si_dump_compute_shader(struct si_context
*ctx
,
111 struct u_log_context
*log
)
113 const struct si_cs_shader_state
*state
= &ctx
->cs_shader_state
;
118 struct si_log_chunk_shader
*chunk
= CALLOC_STRUCT(si_log_chunk_shader
);
120 chunk
->processor
= PIPE_SHADER_COMPUTE
;
121 chunk
->shader
= &state
->program
->shader
;
122 si_compute_reference(&chunk
->program
, state
->program
);
123 u_log_chunk(log
, &si_log_chunk_type_shader
, chunk
);
127 * Shader compiles can be overridden with arbitrary ELF objects by setting
128 * the environment variable RADEON_REPLACE_SHADERS=num1:filename1[;num2:filename2]
130 bool si_replace_shader(unsigned num
, struct ac_shader_binary
*binary
)
132 const char *p
= debug_get_option_replace_shaders();
133 const char *semicolon
;
136 long filesize
, nread
;
138 bool replaced
= false;
146 i
= strtoul(p
, &endp
, 0);
150 fprintf(stderr
, "RADEON_REPLACE_SHADERS formatted badly.\n");
166 semicolon
= strchr(p
, ';');
168 p
= copy
= strndup(p
, semicolon
- p
);
170 fprintf(stderr
, "out of memory\n");
175 fprintf(stderr
, "radeonsi: replace shader %u by %s\n", num
, p
);
179 perror("radeonsi: failed to open file");
183 if (fseek(f
, 0, SEEK_END
) != 0)
190 if (fseek(f
, 0, SEEK_SET
) != 0)
193 buf
= MALLOC(filesize
);
195 fprintf(stderr
, "out of memory\n");
199 nread
= fread(buf
, 1, filesize
, f
);
200 if (nread
!= filesize
)
203 ac_elf_read(buf
, filesize
, binary
);
214 perror("radeonsi: reading shader");
218 /* Parsed IBs are difficult to read without colors. Use "less -R file" to
219 * read them, or use "aha -b -f file" to convert them to html.
221 #define COLOR_RESET "\033[0m"
222 #define COLOR_RED "\033[31m"
223 #define COLOR_GREEN "\033[1;32m"
224 #define COLOR_YELLOW "\033[1;33m"
225 #define COLOR_CYAN "\033[1;36m"
227 static void si_dump_mmapped_reg(struct si_context
*sctx
, FILE *f
,
230 struct radeon_winsys
*ws
= sctx
->b
.ws
;
233 if (ws
->read_registers(ws
, offset
, 1, &value
))
234 ac_dump_reg(f
, sctx
->b
.chip_class
, offset
, value
, ~0);
237 static void si_dump_debug_registers(struct si_context
*sctx
, FILE *f
)
239 if (sctx
->screen
->b
.info
.drm_major
== 2 &&
240 sctx
->screen
->b
.info
.drm_minor
< 42)
241 return; /* no radeon support */
243 fprintf(f
, "Memory-mapped registers:\n");
244 si_dump_mmapped_reg(sctx
, f
, R_008010_GRBM_STATUS
);
246 /* No other registers can be read on DRM < 3.1.0. */
247 if (sctx
->screen
->b
.info
.drm_major
< 3 ||
248 sctx
->screen
->b
.info
.drm_minor
< 1) {
253 si_dump_mmapped_reg(sctx
, f
, R_008008_GRBM_STATUS2
);
254 si_dump_mmapped_reg(sctx
, f
, R_008014_GRBM_STATUS_SE0
);
255 si_dump_mmapped_reg(sctx
, f
, R_008018_GRBM_STATUS_SE1
);
256 si_dump_mmapped_reg(sctx
, f
, R_008038_GRBM_STATUS_SE2
);
257 si_dump_mmapped_reg(sctx
, f
, R_00803C_GRBM_STATUS_SE3
);
258 si_dump_mmapped_reg(sctx
, f
, R_00D034_SDMA0_STATUS_REG
);
259 si_dump_mmapped_reg(sctx
, f
, R_00D834_SDMA1_STATUS_REG
);
260 if (sctx
->b
.chip_class
<= VI
) {
261 si_dump_mmapped_reg(sctx
, f
, R_000E50_SRBM_STATUS
);
262 si_dump_mmapped_reg(sctx
, f
, R_000E4C_SRBM_STATUS2
);
263 si_dump_mmapped_reg(sctx
, f
, R_000E54_SRBM_STATUS3
);
265 si_dump_mmapped_reg(sctx
, f
, R_008680_CP_STAT
);
266 si_dump_mmapped_reg(sctx
, f
, R_008674_CP_STALLED_STAT1
);
267 si_dump_mmapped_reg(sctx
, f
, R_008678_CP_STALLED_STAT2
);
268 si_dump_mmapped_reg(sctx
, f
, R_008670_CP_STALLED_STAT3
);
269 si_dump_mmapped_reg(sctx
, f
, R_008210_CP_CPC_STATUS
);
270 si_dump_mmapped_reg(sctx
, f
, R_008214_CP_CPC_BUSY_STAT
);
271 si_dump_mmapped_reg(sctx
, f
, R_008218_CP_CPC_STALLED_STAT1
);
272 si_dump_mmapped_reg(sctx
, f
, R_00821C_CP_CPF_STATUS
);
273 si_dump_mmapped_reg(sctx
, f
, R_008220_CP_CPF_BUSY_STAT
);
274 si_dump_mmapped_reg(sctx
, f
, R_008224_CP_CPF_STALLED_STAT1
);
278 struct si_log_chunk_cs
{
279 struct si_context
*ctx
;
280 struct si_saved_cs
*cs
;
282 unsigned gfx_begin
, gfx_end
;
285 static void si_log_chunk_type_cs_destroy(void *data
)
287 struct si_log_chunk_cs
*chunk
= data
;
288 si_saved_cs_reference(&chunk
->cs
, NULL
);
292 static void si_parse_current_ib(FILE *f
, struct radeon_winsys_cs
*cs
,
293 unsigned begin
, unsigned end
,
294 int *last_trace_id
, unsigned trace_id_count
,
295 const char *name
, enum chip_class chip_class
)
297 unsigned orig_end
= end
;
299 assert(begin
<= end
);
301 fprintf(f
, "------------------ %s begin (dw = %u) ------------------\n",
304 for (unsigned prev_idx
= 0; prev_idx
< cs
->num_prev
; ++prev_idx
) {
305 struct radeon_winsys_cs_chunk
*chunk
= &cs
->prev
[prev_idx
];
307 if (begin
< chunk
->cdw
) {
308 ac_parse_ib_chunk(f
, chunk
->buf
+ begin
,
309 MIN2(end
, chunk
->cdw
) - begin
,
310 last_trace_id
, trace_id_count
,
311 chip_class
, NULL
, NULL
);
314 if (end
<= chunk
->cdw
)
317 if (begin
< chunk
->cdw
)
318 fprintf(f
, "\n---------- Next %s Chunk ----------\n\n",
321 begin
-= MIN2(begin
, chunk
->cdw
);
325 assert(end
<= cs
->current
.cdw
);
327 ac_parse_ib_chunk(f
, cs
->current
.buf
+ begin
, end
- begin
, last_trace_id
,
328 trace_id_count
, chip_class
, NULL
, NULL
);
330 fprintf(f
, "------------------- %s end (dw = %u) -------------------\n\n",
334 static void si_log_chunk_type_cs_print(void *data
, FILE *f
)
336 struct si_log_chunk_cs
*chunk
= data
;
337 struct si_context
*ctx
= chunk
->ctx
;
338 struct si_saved_cs
*scs
= chunk
->cs
;
339 int last_trace_id
= -1;
341 /* We are expecting that the ddebug pipe has already
342 * waited for the context, so this buffer should be idle.
343 * If the GPU is hung, there is no point in waiting for it.
345 uint32_t *map
= ctx
->b
.ws
->buffer_map(scs
->trace_buf
->buf
,
347 PIPE_TRANSFER_UNSYNCHRONIZED
|
350 last_trace_id
= map
[0];
352 if (chunk
->gfx_end
!= chunk
->gfx_begin
) {
353 if (chunk
->gfx_begin
== 0) {
354 if (ctx
->init_config
)
355 ac_parse_ib(f
, ctx
->init_config
->pm4
, ctx
->init_config
->ndw
,
356 NULL
, 0, "IB2: Init config", ctx
->b
.chip_class
,
359 if (ctx
->init_config_gs_rings
)
360 ac_parse_ib(f
, ctx
->init_config_gs_rings
->pm4
,
361 ctx
->init_config_gs_rings
->ndw
,
362 NULL
, 0, "IB2: Init GS rings", ctx
->b
.chip_class
,
367 ac_parse_ib(f
, scs
->gfx
.ib
+ chunk
->gfx_begin
,
368 chunk
->gfx_end
- chunk
->gfx_begin
,
369 &last_trace_id
, map
? 1 : 0, "IB", ctx
->b
.chip_class
,
372 si_parse_current_ib(f
, ctx
->b
.gfx
.cs
, chunk
->gfx_begin
,
373 chunk
->gfx_end
, &last_trace_id
, map
? 1 : 0,
374 "IB", ctx
->b
.chip_class
);
378 if (chunk
->dump_bo_list
) {
379 fprintf(f
, "Flushing.\n\n");
380 si_dump_bo_list(ctx
, &scs
->gfx
, f
);
384 static const struct u_log_chunk_type si_log_chunk_type_cs
= {
385 .destroy
= si_log_chunk_type_cs_destroy
,
386 .print
= si_log_chunk_type_cs_print
,
389 static void si_log_cs(struct si_context
*ctx
, struct u_log_context
*log
,
392 assert(ctx
->current_saved_cs
);
394 struct si_saved_cs
*scs
= ctx
->current_saved_cs
;
395 unsigned gfx_cur
= ctx
->b
.gfx
.cs
->prev_dw
+ ctx
->b
.gfx
.cs
->current
.cdw
;
398 gfx_cur
== scs
->gfx_last_dw
)
401 struct si_log_chunk_cs
*chunk
= calloc(1, sizeof(*chunk
));
404 si_saved_cs_reference(&chunk
->cs
, scs
);
405 chunk
->dump_bo_list
= dump_bo_list
;
407 chunk
->gfx_begin
= scs
->gfx_last_dw
;
408 chunk
->gfx_end
= gfx_cur
;
409 scs
->gfx_last_dw
= gfx_cur
;
411 u_log_chunk(log
, &si_log_chunk_type_cs
, chunk
);
414 void si_auto_log_cs(void *data
, struct u_log_context
*log
)
416 struct si_context
*ctx
= (struct si_context
*)data
;
417 si_log_cs(ctx
, log
, false);
420 void si_log_hw_flush(struct si_context
*sctx
)
425 si_log_cs(sctx
, sctx
->b
.log
, true);
428 static const char *priority_to_string(enum radeon_bo_priority priority
)
430 #define ITEM(x) [RADEON_PRIO_##x] = #x
431 static const char *table
[64] = {
434 ITEM(SO_FILLED_SIZE
),
448 ITEM(SAMPLER_BUFFER
),
450 ITEM(SHADER_RW_BUFFER
),
451 ITEM(COMPUTE_GLOBAL
),
452 ITEM(SAMPLER_TEXTURE
),
453 ITEM(SHADER_RW_IMAGE
),
454 ITEM(SAMPLER_TEXTURE_MSAA
),
457 ITEM(COLOR_BUFFER_MSAA
),
458 ITEM(DEPTH_BUFFER_MSAA
),
464 ITEM(SCRATCH_BUFFER
),
468 assert(priority
< ARRAY_SIZE(table
));
469 return table
[priority
];
472 static int bo_list_compare_va(const struct radeon_bo_list_item
*a
,
473 const struct radeon_bo_list_item
*b
)
475 return a
->vm_address
< b
->vm_address
? -1 :
476 a
->vm_address
> b
->vm_address
? 1 : 0;
479 static void si_dump_bo_list(struct si_context
*sctx
,
480 const struct radeon_saved_cs
*saved
, FILE *f
)
487 /* Sort the list according to VM adddresses first. */
488 qsort(saved
->bo_list
, saved
->bo_count
,
489 sizeof(saved
->bo_list
[0]), (void*)bo_list_compare_va
);
491 fprintf(f
, "Buffer list (in units of pages = 4kB):\n"
492 COLOR_YELLOW
" Size VM start page "
493 "VM end page Usage" COLOR_RESET
"\n");
495 for (i
= 0; i
< saved
->bo_count
; i
++) {
496 /* Note: Buffer sizes are expected to be aligned to 4k by the winsys. */
497 const unsigned page_size
= sctx
->b
.screen
->info
.gart_page_size
;
498 uint64_t va
= saved
->bo_list
[i
].vm_address
;
499 uint64_t size
= saved
->bo_list
[i
].bo_size
;
502 /* If there's unused virtual memory between 2 buffers, print it. */
504 uint64_t previous_va_end
= saved
->bo_list
[i
-1].vm_address
+
505 saved
->bo_list
[i
-1].bo_size
;
507 if (va
> previous_va_end
) {
508 fprintf(f
, " %10"PRIu64
" -- hole --\n",
509 (va
- previous_va_end
) / page_size
);
513 /* Print the buffer. */
514 fprintf(f
, " %10"PRIu64
" 0x%013"PRIX64
" 0x%013"PRIX64
" ",
515 size
/ page_size
, va
/ page_size
, (va
+ size
) / page_size
);
517 /* Print the usage. */
518 for (j
= 0; j
< 64; j
++) {
519 if (!(saved
->bo_list
[i
].priority_usage
& (1ull << j
)))
522 fprintf(f
, "%s%s", !hit
? "" : ", ", priority_to_string(j
));
527 fprintf(f
, "\nNote: The holes represent memory not used by the IB.\n"
528 " Other buffers can still be allocated there.\n\n");
531 static void si_dump_framebuffer(struct si_context
*sctx
, struct u_log_context
*log
)
533 struct pipe_framebuffer_state
*state
= &sctx
->framebuffer
.state
;
534 struct r600_texture
*rtex
;
537 for (i
= 0; i
< state
->nr_cbufs
; i
++) {
538 if (!state
->cbufs
[i
])
541 rtex
= (struct r600_texture
*)state
->cbufs
[i
]->texture
;
542 u_log_printf(log
, COLOR_YELLOW
"Color buffer %i:" COLOR_RESET
"\n", i
);
543 r600_print_texture_info(sctx
->b
.screen
, rtex
, log
);
544 u_log_printf(log
, "\n");
548 rtex
= (struct r600_texture
*)state
->zsbuf
->texture
;
549 u_log_printf(log
, COLOR_YELLOW
"Depth-stencil buffer:" COLOR_RESET
"\n");
550 r600_print_texture_info(sctx
->b
.screen
, rtex
, log
);
551 u_log_printf(log
, "\n");
555 typedef unsigned (*slot_remap_func
)(unsigned);
557 struct si_log_chunk_desc_list
{
558 /** Pointer to memory map of buffer where the list is uploader */
560 /** Reference of buffer where the list is uploaded, so that gpu_list
562 struct r600_resource
*buf
;
564 const char *shader_name
;
565 const char *elem_name
;
566 slot_remap_func slot_remap
;
567 enum chip_class chip_class
;
568 unsigned element_dw_size
;
569 unsigned num_elements
;
575 si_log_chunk_desc_list_destroy(void *data
)
577 struct si_log_chunk_desc_list
*chunk
= data
;
578 r600_resource_reference(&chunk
->buf
, NULL
);
583 si_log_chunk_desc_list_print(void *data
, FILE *f
)
585 struct si_log_chunk_desc_list
*chunk
= data
;
587 for (unsigned i
= 0; i
< chunk
->num_elements
; i
++) {
588 unsigned cpu_dw_offset
= i
* chunk
->element_dw_size
;
589 unsigned gpu_dw_offset
= chunk
->slot_remap(i
) * chunk
->element_dw_size
;
590 const char *list_note
= chunk
->gpu_list
? "GPU list" : "CPU list";
591 uint32_t *cpu_list
= chunk
->list
+ cpu_dw_offset
;
592 uint32_t *gpu_list
= chunk
->gpu_list
? chunk
->gpu_list
+ gpu_dw_offset
: cpu_list
;
594 fprintf(f
, COLOR_GREEN
"%s%s slot %u (%s):" COLOR_RESET
"\n",
595 chunk
->shader_name
, chunk
->elem_name
, i
, list_note
);
597 switch (chunk
->element_dw_size
) {
599 for (unsigned j
= 0; j
< 4; j
++)
600 ac_dump_reg(f
, chunk
->chip_class
,
601 R_008F00_SQ_BUF_RSRC_WORD0
+ j
*4,
602 gpu_list
[j
], 0xffffffff);
605 for (unsigned j
= 0; j
< 8; j
++)
606 ac_dump_reg(f
, chunk
->chip_class
,
607 R_008F10_SQ_IMG_RSRC_WORD0
+ j
*4,
608 gpu_list
[j
], 0xffffffff);
610 fprintf(f
, COLOR_CYAN
" Buffer:" COLOR_RESET
"\n");
611 for (unsigned j
= 0; j
< 4; j
++)
612 ac_dump_reg(f
, chunk
->chip_class
,
613 R_008F00_SQ_BUF_RSRC_WORD0
+ j
*4,
614 gpu_list
[4+j
], 0xffffffff);
617 for (unsigned j
= 0; j
< 8; j
++)
618 ac_dump_reg(f
, chunk
->chip_class
,
619 R_008F10_SQ_IMG_RSRC_WORD0
+ j
*4,
620 gpu_list
[j
], 0xffffffff);
622 fprintf(f
, COLOR_CYAN
" Buffer:" COLOR_RESET
"\n");
623 for (unsigned j
= 0; j
< 4; j
++)
624 ac_dump_reg(f
, chunk
->chip_class
,
625 R_008F00_SQ_BUF_RSRC_WORD0
+ j
*4,
626 gpu_list
[4+j
], 0xffffffff);
628 fprintf(f
, COLOR_CYAN
" FMASK:" COLOR_RESET
"\n");
629 for (unsigned j
= 0; j
< 8; j
++)
630 ac_dump_reg(f
, chunk
->chip_class
,
631 R_008F10_SQ_IMG_RSRC_WORD0
+ j
*4,
632 gpu_list
[8+j
], 0xffffffff);
634 fprintf(f
, COLOR_CYAN
" Sampler state:" COLOR_RESET
"\n");
635 for (unsigned j
= 0; j
< 4; j
++)
636 ac_dump_reg(f
, chunk
->chip_class
,
637 R_008F30_SQ_IMG_SAMP_WORD0
+ j
*4,
638 gpu_list
[12+j
], 0xffffffff);
642 if (memcmp(gpu_list
, cpu_list
, chunk
->element_dw_size
* 4) != 0) {
643 fprintf(f
, COLOR_RED
"!!!!! This slot was corrupted in GPU memory !!!!!"
652 static const struct u_log_chunk_type si_log_chunk_type_descriptor_list
= {
653 .destroy
= si_log_chunk_desc_list_destroy
,
654 .print
= si_log_chunk_desc_list_print
,
657 static void si_dump_descriptor_list(struct si_screen
*screen
,
658 struct si_descriptors
*desc
,
659 const char *shader_name
,
660 const char *elem_name
,
661 unsigned element_dw_size
,
662 unsigned num_elements
,
663 slot_remap_func slot_remap
,
664 struct u_log_context
*log
)
669 /* In some cases, the caller doesn't know how many elements are really
670 * uploaded. Reduce num_elements to fit in the range of active slots. */
671 unsigned active_range_dw_begin
=
672 desc
->first_active_slot
* desc
->element_dw_size
;
673 unsigned active_range_dw_end
=
674 active_range_dw_begin
+ desc
->num_active_slots
* desc
->element_dw_size
;
676 while (num_elements
> 0) {
677 int i
= slot_remap(num_elements
- 1);
678 unsigned dw_begin
= i
* element_dw_size
;
679 unsigned dw_end
= dw_begin
+ element_dw_size
;
681 if (dw_begin
>= active_range_dw_begin
&& dw_end
<= active_range_dw_end
)
687 struct si_log_chunk_desc_list
*chunk
=
688 CALLOC_VARIANT_LENGTH_STRUCT(si_log_chunk_desc_list
,
689 4 * element_dw_size
* num_elements
);
690 chunk
->shader_name
= shader_name
;
691 chunk
->elem_name
= elem_name
;
692 chunk
->element_dw_size
= element_dw_size
;
693 chunk
->num_elements
= num_elements
;
694 chunk
->slot_remap
= slot_remap
;
695 chunk
->chip_class
= screen
->b
.chip_class
;
697 r600_resource_reference(&chunk
->buf
, desc
->buffer
);
698 chunk
->gpu_list
= desc
->gpu_list
;
700 for (unsigned i
= 0; i
< num_elements
; ++i
) {
701 memcpy(&chunk
->list
[i
* element_dw_size
],
702 &desc
->list
[slot_remap(i
) * element_dw_size
],
703 4 * element_dw_size
);
706 u_log_chunk(log
, &si_log_chunk_type_descriptor_list
, chunk
);
709 static unsigned si_identity(unsigned slot
)
714 static void si_dump_descriptors(struct si_context
*sctx
,
715 enum pipe_shader_type processor
,
716 const struct tgsi_shader_info
*info
,
717 struct u_log_context
*log
)
719 struct si_descriptors
*descs
=
720 &sctx
->descriptors
[SI_DESCS_FIRST_SHADER
+
721 processor
* SI_NUM_SHADER_DESCS
];
722 static const char *shader_name
[] = {"VS", "PS", "GS", "TCS", "TES", "CS"};
723 const char *name
= shader_name
[processor
];
724 unsigned enabled_constbuf
, enabled_shaderbuf
, enabled_samplers
;
725 unsigned enabled_images
;
728 enabled_constbuf
= info
->const_buffers_declared
;
729 enabled_shaderbuf
= info
->shader_buffers_declared
;
730 enabled_samplers
= info
->samplers_declared
;
731 enabled_images
= info
->images_declared
;
733 enabled_constbuf
= sctx
->const_and_shader_buffers
[processor
].enabled_mask
>>
734 SI_NUM_SHADER_BUFFERS
;
735 enabled_shaderbuf
= sctx
->const_and_shader_buffers
[processor
].enabled_mask
&
736 u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS
);
737 enabled_shaderbuf
= util_bitreverse(enabled_shaderbuf
) >>
738 (32 - SI_NUM_SHADER_BUFFERS
);
739 enabled_samplers
= sctx
->samplers
[processor
].views
.enabled_mask
;
740 enabled_images
= sctx
->images
[processor
].enabled_mask
;
743 if (processor
== PIPE_SHADER_VERTEX
) {
744 assert(info
); /* only CS may not have an info struct */
746 si_dump_descriptor_list(sctx
->screen
, &sctx
->vertex_buffers
, name
,
747 " - Vertex buffer", 4, info
->num_inputs
,
751 si_dump_descriptor_list(sctx
->screen
,
752 &descs
[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS
],
753 name
, " - Constant buffer", 4,
754 util_last_bit(enabled_constbuf
),
755 si_get_constbuf_slot
, log
);
756 si_dump_descriptor_list(sctx
->screen
,
757 &descs
[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS
],
758 name
, " - Shader buffer", 4,
759 util_last_bit(enabled_shaderbuf
),
760 si_get_shaderbuf_slot
, log
);
761 si_dump_descriptor_list(sctx
->screen
,
762 &descs
[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES
],
763 name
, " - Sampler", 16,
764 util_last_bit(enabled_samplers
),
765 si_get_sampler_slot
, log
);
766 si_dump_descriptor_list(sctx
->screen
,
767 &descs
[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES
],
769 util_last_bit(enabled_images
),
770 si_get_image_slot
, log
);
773 static void si_dump_gfx_descriptors(struct si_context
*sctx
,
774 const struct si_shader_ctx_state
*state
,
775 struct u_log_context
*log
)
777 if (!state
->cso
|| !state
->current
)
780 si_dump_descriptors(sctx
, state
->cso
->type
, &state
->cso
->info
, log
);
783 static void si_dump_compute_descriptors(struct si_context
*sctx
,
784 struct u_log_context
*log
)
786 if (!sctx
->cs_shader_state
.program
)
789 si_dump_descriptors(sctx
, PIPE_SHADER_COMPUTE
, NULL
, log
);
792 struct si_shader_inst
{
793 char text
[160]; /* one disasm line */
794 unsigned offset
; /* instruction offset */
795 unsigned size
; /* instruction size = 4 or 8 */
798 /* Split a disassembly string into lines and add them to the array pointed
799 * to by "instructions". */
800 static void si_add_split_disasm(const char *disasm
,
803 struct si_shader_inst
*instructions
)
805 struct si_shader_inst
*last_inst
= *num
? &instructions
[*num
- 1] : NULL
;
808 while ((next
= strchr(disasm
, '\n'))) {
809 struct si_shader_inst
*inst
= &instructions
[*num
];
810 unsigned len
= next
- disasm
;
812 assert(len
< ARRAY_SIZE(inst
->text
));
813 memcpy(inst
->text
, disasm
, len
);
815 inst
->offset
= last_inst
? last_inst
->offset
+ last_inst
->size
: 0;
817 const char *semicolon
= strchr(disasm
, ';');
819 /* More than 16 chars after ";" means the instruction is 8 bytes long. */
820 inst
->size
= next
- semicolon
> 16 ? 8 : 4;
822 snprintf(inst
->text
+ len
, ARRAY_SIZE(inst
->text
) - len
,
823 " [PC=0x%"PRIx64
", off=%u, size=%u]",
824 start_addr
+ inst
->offset
, inst
->offset
, inst
->size
);
832 #define MAX_WAVES_PER_CHIP (64 * 40)
834 struct si_wave_info
{
835 unsigned se
; /* shader engine */
836 unsigned sh
; /* shader array */
837 unsigned cu
; /* compute unit */
841 uint64_t pc
; /* program counter */
845 bool matched
; /* whether the wave is used by a currently-bound shader */
848 static int compare_wave(const void *p1
, const void *p2
)
850 struct si_wave_info
*w1
= (struct si_wave_info
*)p1
;
851 struct si_wave_info
*w2
= (struct si_wave_info
*)p2
;
853 /* Sort waves according to PC and then SE, SH, CU, etc. */
870 if (w1
->simd
< w2
->simd
)
872 if (w1
->simd
> w2
->simd
)
874 if (w1
->wave
< w2
->wave
)
876 if (w1
->wave
> w2
->wave
)
882 /* Return wave information. "waves" should be a large enough array. */
883 static unsigned si_get_wave_info(struct si_wave_info waves
[MAX_WAVES_PER_CHIP
])
886 unsigned num_waves
= 0;
888 FILE *p
= popen("umr -wa", "r");
892 if (!fgets(line
, sizeof(line
), p
) ||
893 strncmp(line
, "SE", 2) != 0) {
898 while (fgets(line
, sizeof(line
), p
)) {
899 struct si_wave_info
*w
;
900 uint32_t pc_hi
, pc_lo
, exec_hi
, exec_lo
;
902 assert(num_waves
< MAX_WAVES_PER_CHIP
);
903 w
= &waves
[num_waves
];
905 if (sscanf(line
, "%u %u %u %u %u %x %x %x %x %x %x %x",
906 &w
->se
, &w
->sh
, &w
->cu
, &w
->simd
, &w
->wave
,
907 &w
->status
, &pc_hi
, &pc_lo
, &w
->inst_dw0
,
908 &w
->inst_dw1
, &exec_hi
, &exec_lo
) == 12) {
909 w
->pc
= ((uint64_t)pc_hi
<< 32) | pc_lo
;
910 w
->exec
= ((uint64_t)exec_hi
<< 32) | exec_lo
;
916 qsort(waves
, num_waves
, sizeof(struct si_wave_info
), compare_wave
);
922 /* If the shader is being executed, print its asm instructions, and annotate
923 * those that are being executed right now with information about waves that
924 * execute them. This is most useful during a GPU hang.
926 static void si_print_annotated_shader(struct si_shader
*shader
,
927 struct si_wave_info
*waves
,
931 if (!shader
|| !shader
->binary
.disasm_string
)
934 uint64_t start_addr
= shader
->bo
->gpu_address
;
935 uint64_t end_addr
= start_addr
+ shader
->bo
->b
.b
.width0
;
938 /* See if any wave executes the shader. */
939 for (i
= 0; i
< num_waves
; i
++) {
940 if (start_addr
<= waves
[i
].pc
&& waves
[i
].pc
<= end_addr
)
944 return; /* the shader is not being executed */
946 /* Remember the first found wave. The waves are sorted according to PC. */
950 /* Get the list of instructions.
951 * Buffer size / 4 is the upper bound of the instruction count.
953 unsigned num_inst
= 0;
954 struct si_shader_inst
*instructions
=
955 calloc(shader
->bo
->b
.b
.width0
/ 4, sizeof(struct si_shader_inst
));
957 if (shader
->prolog
) {
958 si_add_split_disasm(shader
->prolog
->binary
.disasm_string
,
959 start_addr
, &num_inst
, instructions
);
961 if (shader
->previous_stage
) {
962 si_add_split_disasm(shader
->previous_stage
->binary
.disasm_string
,
963 start_addr
, &num_inst
, instructions
);
965 if (shader
->prolog2
) {
966 si_add_split_disasm(shader
->prolog2
->binary
.disasm_string
,
967 start_addr
, &num_inst
, instructions
);
969 si_add_split_disasm(shader
->binary
.disasm_string
,
970 start_addr
, &num_inst
, instructions
);
971 if (shader
->epilog
) {
972 si_add_split_disasm(shader
->epilog
->binary
.disasm_string
,
973 start_addr
, &num_inst
, instructions
);
976 fprintf(f
, COLOR_YELLOW
"%s - annotated disassembly:" COLOR_RESET
"\n",
977 si_get_shader_name(shader
, shader
->selector
->type
));
979 /* Print instructions with annotations. */
980 for (i
= 0; i
< num_inst
; i
++) {
981 struct si_shader_inst
*inst
= &instructions
[i
];
983 fprintf(f
, "%s\n", inst
->text
);
985 /* Print which waves execute the instruction right now. */
986 while (num_waves
&& start_addr
+ inst
->offset
== waves
->pc
) {
988 " " COLOR_GREEN
"^ SE%u SH%u CU%u "
989 "SIMD%u WAVE%u EXEC=%016"PRIx64
" ",
990 waves
->se
, waves
->sh
, waves
->cu
, waves
->simd
,
991 waves
->wave
, waves
->exec
);
993 if (inst
->size
== 4) {
994 fprintf(f
, "INST32=%08X" COLOR_RESET
"\n",
997 fprintf(f
, "INST64=%08X %08X" COLOR_RESET
"\n",
998 waves
->inst_dw0
, waves
->inst_dw1
);
1001 waves
->matched
= true;
1011 static void si_dump_annotated_shaders(struct si_context
*sctx
, FILE *f
)
1013 struct si_wave_info waves
[MAX_WAVES_PER_CHIP
];
1014 unsigned num_waves
= si_get_wave_info(waves
);
1016 fprintf(f
, COLOR_CYAN
"The number of active waves = %u" COLOR_RESET
1019 si_print_annotated_shader(sctx
->vs_shader
.current
, waves
, num_waves
, f
);
1020 si_print_annotated_shader(sctx
->tcs_shader
.current
, waves
, num_waves
, f
);
1021 si_print_annotated_shader(sctx
->tes_shader
.current
, waves
, num_waves
, f
);
1022 si_print_annotated_shader(sctx
->gs_shader
.current
, waves
, num_waves
, f
);
1023 si_print_annotated_shader(sctx
->ps_shader
.current
, waves
, num_waves
, f
);
1025 /* Print waves executing shaders that are not currently bound. */
1028 for (i
= 0; i
< num_waves
; i
++) {
1029 if (waves
[i
].matched
)
1033 fprintf(f
, COLOR_CYAN
1034 "Waves not executing currently-bound shaders:"
1038 fprintf(f
, " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016"PRIx64
1039 " INST=%08X %08X PC=%"PRIx64
"\n",
1040 waves
[i
].se
, waves
[i
].sh
, waves
[i
].cu
, waves
[i
].simd
,
1041 waves
[i
].wave
, waves
[i
].exec
, waves
[i
].inst_dw0
,
1042 waves
[i
].inst_dw1
, waves
[i
].pc
);
1048 static void si_dump_command(const char *title
, const char *command
, FILE *f
)
1052 FILE *p
= popen(command
, "r");
1056 fprintf(f
, COLOR_YELLOW
"%s: " COLOR_RESET
"\n", title
);
1057 while (fgets(line
, sizeof(line
), p
))
1063 static void si_dump_debug_state(struct pipe_context
*ctx
, FILE *f
,
1066 struct si_context
*sctx
= (struct si_context
*)ctx
;
1069 u_log_flush(sctx
->b
.log
);
1071 if (flags
& PIPE_DUMP_DEVICE_STATUS_REGISTERS
) {
1072 si_dump_debug_registers(sctx
, f
);
1074 si_dump_annotated_shaders(sctx
, f
);
1075 si_dump_command("Active waves (raw data)", "umr -wa | column -t", f
);
1076 si_dump_command("Wave information", "umr -O bits -wa", f
);
1080 void si_log_draw_state(struct si_context
*sctx
, struct u_log_context
*log
)
1085 si_dump_framebuffer(sctx
, log
);
1087 si_dump_gfx_shader(sctx
, &sctx
->vs_shader
, log
);
1088 si_dump_gfx_shader(sctx
, &sctx
->tcs_shader
, log
);
1089 si_dump_gfx_shader(sctx
, &sctx
->tes_shader
, log
);
1090 si_dump_gfx_shader(sctx
, &sctx
->gs_shader
, log
);
1091 si_dump_gfx_shader(sctx
, &sctx
->ps_shader
, log
);
1093 si_dump_descriptor_list(sctx
->screen
,
1094 &sctx
->descriptors
[SI_DESCS_RW_BUFFERS
],
1095 "", "RW buffers", 4, SI_NUM_RW_BUFFERS
,
1097 si_dump_gfx_descriptors(sctx
, &sctx
->vs_shader
, log
);
1098 si_dump_gfx_descriptors(sctx
, &sctx
->tcs_shader
, log
);
1099 si_dump_gfx_descriptors(sctx
, &sctx
->tes_shader
, log
);
1100 si_dump_gfx_descriptors(sctx
, &sctx
->gs_shader
, log
);
1101 si_dump_gfx_descriptors(sctx
, &sctx
->ps_shader
, log
);
1104 void si_log_compute_state(struct si_context
*sctx
, struct u_log_context
*log
)
1109 si_dump_compute_shader(sctx
, log
);
1110 si_dump_compute_descriptors(sctx
, log
);
1113 static void si_dump_dma(struct si_context
*sctx
,
1114 struct radeon_saved_cs
*saved
, FILE *f
)
1116 static const char ib_name
[] = "sDMA IB";
1119 si_dump_bo_list(sctx
, saved
, f
);
1121 fprintf(f
, "------------------ %s begin ------------------\n", ib_name
);
1123 for (i
= 0; i
< saved
->num_dw
; ++i
) {
1124 fprintf(f
, " %08x\n", saved
->ib
[i
]);
1127 fprintf(f
, "------------------- %s end -------------------\n", ib_name
);
1130 fprintf(f
, "SDMA Dump Done.\n");
1133 void si_check_vm_faults(struct r600_common_context
*ctx
,
1134 struct radeon_saved_cs
*saved
, enum ring_type ring
)
1136 struct si_context
*sctx
= (struct si_context
*)ctx
;
1137 struct pipe_screen
*screen
= sctx
->b
.b
.screen
;
1140 char cmd_line
[4096];
1142 if (!ac_vm_fault_occured(sctx
->b
.chip_class
,
1143 &sctx
->dmesg_timestamp
, &addr
))
1146 f
= dd_get_debug_file(false);
1150 fprintf(f
, "VM fault report.\n\n");
1151 if (os_get_command_line(cmd_line
, sizeof(cmd_line
)))
1152 fprintf(f
, "Command: %s\n", cmd_line
);
1153 fprintf(f
, "Driver vendor: %s\n", screen
->get_vendor(screen
));
1154 fprintf(f
, "Device vendor: %s\n", screen
->get_device_vendor(screen
));
1155 fprintf(f
, "Device name: %s\n\n", screen
->get_name(screen
));
1156 fprintf(f
, "Failing VM page: 0x%08"PRIx64
"\n\n", addr
);
1158 if (sctx
->apitrace_call_number
)
1159 fprintf(f
, "Last apitrace call: %u\n\n",
1160 sctx
->apitrace_call_number
);
1164 struct u_log_context log
;
1165 u_log_context_init(&log
);
1167 si_log_draw_state(sctx
, &log
);
1168 si_log_compute_state(sctx
, &log
);
1170 u_log_new_page_print(&log
, f
);
1171 u_log_context_destroy(&log
);
1175 si_dump_dma(sctx
, saved
, f
);
1184 fprintf(stderr
, "Detected a VM fault, exiting...\n");
1188 void si_init_debug_functions(struct si_context
*sctx
)
1190 sctx
->b
.b
.dump_debug_state
= si_dump_debug_state
;
1191 sctx
->b
.check_vm_faults
= si_check_vm_faults
;
1193 /* Set the initial dmesg timestamp for this context, so that
1194 * only new messages will be checked for VM faults.
1196 if (sctx
->screen
->b
.debug_flags
& DBG_CHECK_VM
)
1197 ac_vm_fault_occured(sctx
->b
.chip_class
,
1198 &sctx
->dmesg_timestamp
, NULL
);