From 4c3f36ec6b59de2245257c819df7628aaa6bf57b Mon Sep 17 00:00:00 2001 From: =?utf8?q?Nicolai=20H=C3=83=C2=A4hnle?= Date: Fri, 4 Aug 2017 18:24:33 +0200 Subject: [PATCH] radeonsi: print saved CS to the log context MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Use the auto logger facility, so that CS chunks will be interleaved with other log info. v2: - fix some crashes when not using CE - fix skipping "previous" chunks of current (unflushed) IB - fix error handling in si_begin_cs_debug Reviewed-by: Marek Olšák --- src/gallium/drivers/radeonsi/si_debug.c | 218 +++++++++++++++---- src/gallium/drivers/radeonsi/si_hw_context.c | 67 ++++-- src/gallium/drivers/radeonsi/si_pipe.c | 7 +- src/gallium/drivers/radeonsi/si_pipe.h | 31 ++- src/gallium/drivers/radeonsi/si_state_draw.c | 28 +-- 5 files changed, 263 insertions(+), 88 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index e797f10f7f0..bf63f9f5216 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -34,6 +34,9 @@ #include "util/u_memory.h" #include "ac_debug.h" +static void si_dump_bo_list(struct si_context *sctx, + const struct radeon_saved_cs *saved, FILE *f); + DEBUG_GET_ONCE_OPTION(replace_shaders, "RADEON_REPLACE_SHADERS", NULL) static void si_dump_shader(struct si_screen *sscreen, @@ -266,51 +269,183 @@ static void si_dump_debug_registers(struct si_context *sctx, FILE *f) fprintf(f, "\n"); } -static void si_dump_last_ib(struct si_context *sctx, FILE *f) +struct si_log_chunk_cs { + struct si_context *ctx; + struct si_saved_cs *cs; + bool dump_bo_list; + unsigned gfx_begin, gfx_end; + unsigned ce_begin, ce_end; +}; + +static void si_log_chunk_type_cs_destroy(void *data) +{ + struct si_log_chunk_cs *chunk = data; + si_saved_cs_reference(&chunk->cs, NULL); + free(chunk); +} + +static void si_parse_current_ib(FILE *f, struct radeon_winsys_cs *cs, + unsigned begin, unsigned end, + unsigned last_trace_id, const char *name, + enum chip_class chip_class) { + unsigned orig_end = end; + + assert(begin <= end); + + fprintf(f, "------------------ %s begin (dw = %u) ------------------\n", + name, begin); + + for (unsigned prev_idx = 0; prev_idx < cs->num_prev; ++prev_idx) { + struct radeon_winsys_cs_chunk *chunk = &cs->prev[prev_idx]; + + if (begin < chunk->cdw) { + ac_parse_ib_chunk(f, chunk->buf + begin, + MIN2(end, chunk->cdw) - begin, + last_trace_id, chip_class, NULL, NULL); + } + + if (end <= chunk->cdw) + return; + + if (begin < chunk->cdw) + fprintf(f, "\n---------- Next %s Chunk ----------\n\n", + name); + + begin -= MIN2(begin, chunk->cdw); + end -= chunk->cdw; + } + + assert(end <= cs->current.cdw); + + ac_parse_ib_chunk(f, cs->current.buf + begin, end - begin, last_trace_id, + chip_class, NULL, NULL); + + fprintf(f, "------------------- %s end (dw = %u) -------------------\n\n", + name, orig_end); +} + +static void si_log_chunk_type_cs_print(void *data, FILE *f) +{ + struct si_log_chunk_cs *chunk = data; + struct si_context *ctx = chunk->ctx; + struct si_saved_cs *scs = chunk->cs; int last_trace_id = -1; int last_ce_trace_id = -1; - if (!sctx->last_gfx.ib) - return; + /* We are expecting that the ddebug pipe has already + * waited for the context, so this buffer should be idle. + * If the GPU is hung, there is no point in waiting for it. + */ + uint32_t *map = ctx->b.ws->buffer_map(scs->trace_buf->buf, + NULL, + PIPE_TRANSFER_UNSYNCHRONIZED | + PIPE_TRANSFER_READ); + if (map) { + last_trace_id = map[0]; + last_ce_trace_id = map[1]; + } + + if (chunk->gfx_end != chunk->gfx_begin) { + if (chunk->gfx_begin == 0) { + if (ctx->init_config) + ac_parse_ib(f, ctx->init_config->pm4, ctx->init_config->ndw, + -1, "IB2: Init config", ctx->b.chip_class, + NULL, NULL); + + if (ctx->init_config_gs_rings) + ac_parse_ib(f, ctx->init_config_gs_rings->pm4, + ctx->init_config_gs_rings->ndw, + -1, "IB2: Init GS rings", ctx->b.chip_class, + NULL, NULL); + } + + if (scs->flushed) { + ac_parse_ib(f, scs->gfx.ib + chunk->gfx_begin, + chunk->gfx_end - chunk->gfx_begin, + last_trace_id, "IB", ctx->b.chip_class, + NULL, NULL); + } else { + si_parse_current_ib(f, ctx->b.gfx.cs, chunk->gfx_begin, + chunk->gfx_end, last_trace_id, "IB", + ctx->b.chip_class); + } + } + + if (chunk->ce_end != chunk->ce_begin) { + assert(ctx->ce_ib); - if (sctx->last_trace_buf) { - /* We are expecting that the ddebug pipe has already - * waited for the context, so this buffer should be idle. - * If the GPU is hung, there is no point in waiting for it. - */ - uint32_t *map = sctx->b.ws->buffer_map(sctx->last_trace_buf->buf, - NULL, - PIPE_TRANSFER_UNSYNCHRONIZED | - PIPE_TRANSFER_READ); - if (map) { - last_trace_id = map[0]; - last_ce_trace_id = map[1]; + if (scs->flushed) { + ac_parse_ib(f, scs->ce.ib + chunk->ce_begin, + chunk->ce_end - chunk->ce_begin, + last_ce_trace_id, "CE IB", ctx->b.chip_class, + NULL, NULL); + } else { + si_parse_current_ib(f, ctx->ce_ib, chunk->ce_begin, + chunk->ce_end, last_ce_trace_id, "CE IB", + ctx->b.chip_class); } } - if (sctx->init_config) - ac_parse_ib(f, sctx->init_config->pm4, sctx->init_config->ndw, - -1, "IB2: Init config", sctx->b.chip_class, - NULL, NULL); - - if (sctx->init_config_gs_rings) - ac_parse_ib(f, sctx->init_config_gs_rings->pm4, - sctx->init_config_gs_rings->ndw, - -1, "IB2: Init GS rings", sctx->b.chip_class, - NULL, NULL); - - ac_parse_ib(f, sctx->last_gfx.ib, sctx->last_gfx.num_dw, - last_trace_id, "IB", sctx->b.chip_class, - NULL, NULL); - - if (sctx->last_ce.ib) { - ac_parse_ib(f, sctx->last_ce.ib, sctx->last_ce.num_dw, - last_ce_trace_id, "CE IB", sctx->b.chip_class, - NULL, NULL); + if (chunk->dump_bo_list) { + fprintf(f, "Flushing.\n\n"); + si_dump_bo_list(ctx, &scs->gfx, f); } } +static const struct u_log_chunk_type si_log_chunk_type_cs = { + .destroy = si_log_chunk_type_cs_destroy, + .print = si_log_chunk_type_cs_print, +}; + +static void si_log_cs(struct si_context *ctx, struct u_log_context *log, + bool dump_bo_list) +{ + assert(ctx->current_saved_cs); + + struct si_saved_cs *scs = ctx->current_saved_cs; + unsigned gfx_cur = ctx->b.gfx.cs->prev_dw + ctx->b.gfx.cs->current.cdw; + unsigned ce_cur = 0; + + if (ctx->ce_ib) + ce_cur = ctx->ce_ib->prev_dw + ctx->ce_ib->current.cdw; + + if (!dump_bo_list && + gfx_cur == scs->gfx_last_dw && + ce_cur == scs->ce_last_dw) + return; + + struct si_log_chunk_cs *chunk = calloc(1, sizeof(*chunk)); + + chunk->ctx = ctx; + si_saved_cs_reference(&chunk->cs, scs); + chunk->dump_bo_list = dump_bo_list; + + chunk->gfx_begin = scs->gfx_last_dw; + chunk->gfx_end = gfx_cur; + scs->gfx_last_dw = gfx_cur; + + chunk->ce_begin = scs->ce_last_dw; + chunk->ce_end = ce_cur; + scs->ce_last_dw = ce_cur; + + u_log_chunk(log, &si_log_chunk_type_cs, chunk); +} + +void si_auto_log_cs(void *data, struct u_log_context *log) +{ + struct si_context *ctx = (struct si_context *)data; + si_log_cs(ctx, log, false); +} + +void si_log_hw_flush(struct si_context *sctx) +{ + if (!sctx->b.log) + return; + + si_log_cs(sctx, sctx->b.log, true); +} + static const char *priority_to_string(enum radeon_bo_priority priority) { #define ITEM(x) [RADEON_PRIO_##x] = #x @@ -920,6 +1055,9 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f, { struct si_context *sctx = (struct si_context*)ctx; + if (sctx->b.log) + u_log_flush(sctx->b.log); + if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS) { si_dump_debug_registers(sctx, f); @@ -957,18 +1095,6 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f, u_log_new_page_print(&log, f); u_log_context_destroy(&log); - - if (flags & PIPE_DUMP_LAST_COMMAND_BUFFER) { - si_dump_bo_list(sctx, &sctx->last_gfx, f); - si_dump_last_ib(sctx, f); - - fprintf(f, "Done.\n"); - - /* dump only once */ - radeon_clear_saved_cs(&sctx->last_gfx); - radeon_clear_saved_cs(&sctx->last_ce); - r600_resource_reference(&sctx->last_trace_buf, NULL); - } } static void si_dump_dma(struct si_context *sctx, diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index d9170c30954..7481d013f3c 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -57,6 +57,14 @@ static unsigned si_ce_needed_cs_space(void) return space; } +void si_destroy_saved_cs(struct si_saved_cs *scs) +{ + radeon_clear_saved_cs(&scs->gfx); + radeon_clear_saved_cs(&scs->ce); + r600_resource_reference(&scs->trace_buf, NULL); + free(scs); +} + /* initialize */ void si_need_cs_space(struct si_context *ctx) { @@ -139,17 +147,15 @@ void si_context_gfx_flush(void *context, unsigned flags, si_emit_cache_flush(ctx); - if (ctx->trace_buf) + if (ctx->current_saved_cs) { si_trace_emit(ctx); + si_log_hw_flush(ctx); - if (ctx->is_debug) { /* Save the IB for debug contexts. */ - radeon_clear_saved_cs(&ctx->last_gfx); - radeon_save_cs(ws, cs, &ctx->last_gfx, true); - radeon_clear_saved_cs(&ctx->last_ce); - radeon_save_cs(ws, ctx->ce_ib, &ctx->last_ce, false); - r600_resource_reference(&ctx->last_trace_buf, ctx->trace_buf); - r600_resource_reference(&ctx->trace_buf, NULL); + radeon_save_cs(ws, cs, &ctx->current_saved_cs->gfx, true); + if (ctx->ce_ib) + radeon_save_cs(ws, ctx->ce_ib, &ctx->current_saved_cs->ce, false); + ctx->current_saved_cs->flushed = true; } /* Flush the CS. */ @@ -165,31 +171,50 @@ void si_context_gfx_flush(void *context, unsigned flags, */ ctx->b.ws->fence_wait(ctx->b.ws, ctx->b.last_gfx_fence, 800*1000*1000); - si_check_vm_faults(&ctx->b, &ctx->last_gfx, RING_GFX); + si_check_vm_faults(&ctx->b, &ctx->current_saved_cs->gfx, RING_GFX); } + if (ctx->current_saved_cs) + si_saved_cs_reference(&ctx->current_saved_cs, NULL); + si_begin_new_cs(ctx); ctx->gfx_flush_in_progress = false; } -void si_begin_new_cs(struct si_context *ctx) +static void si_begin_cs_debug(struct si_context *ctx) { - if (ctx->is_debug) { - static const uint32_t zeros[2]; + static const uint32_t zeros[2]; + assert(!ctx->current_saved_cs); + + ctx->current_saved_cs = calloc(1, sizeof(*ctx->current_saved_cs)); + if (!ctx->current_saved_cs) + return; + + pipe_reference_init(&ctx->current_saved_cs->reference, 1); - /* Create a buffer used for writing trace IDs and initialize it to 0. */ - assert(!ctx->trace_buf); - ctx->trace_buf = (struct r600_resource*) + ctx->current_saved_cs->trace_buf = (struct r600_resource*) pipe_buffer_create(ctx->b.b.screen, 0, PIPE_USAGE_STAGING, 8); - if (ctx->trace_buf) - pipe_buffer_write_nooverlap(&ctx->b.b, &ctx->trace_buf->b.b, - 0, sizeof(zeros), zeros); - ctx->trace_id = 0; + if (!ctx->current_saved_cs->trace_buf) { + free(ctx->current_saved_cs); + ctx->current_saved_cs = NULL; + return; } - if (ctx->trace_buf) - si_trace_emit(ctx); + pipe_buffer_write_nooverlap(&ctx->b.b, &ctx->current_saved_cs->trace_buf->b.b, + 0, sizeof(zeros), zeros); + ctx->current_saved_cs->trace_id = 0; + + si_trace_emit(ctx); + + radeon_add_to_buffer_list(&ctx->b, &ctx->b.gfx, ctx->current_saved_cs->trace_buf, + RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE); +} + +void si_begin_new_cs(struct si_context *ctx) +{ + if (ctx->is_debug) + si_begin_cs_debug(ctx); /* Flush read caches at the beginning of CS not flushed by the kernel. */ if (ctx->b.chip_class >= CIK) diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 88fa97d3b0e..93d43e697da 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -96,9 +96,7 @@ static void si_destroy_context(struct pipe_context *context) LLVMDisposeTargetMachine(sctx->tm); - r600_resource_reference(&sctx->trace_buf, NULL); - r600_resource_reference(&sctx->last_trace_buf, NULL); - radeon_clear_saved_cs(&sctx->last_gfx); + si_saved_cs_reference(&sctx->current_saved_cs, NULL); pb_slabs_deinit(&sctx->bindless_descriptor_slabs); util_dynarray_fini(&sctx->bindless_descriptors); @@ -163,6 +161,9 @@ static void si_set_log_context(struct pipe_context *ctx, { struct si_context *sctx = (struct si_context *)ctx; sctx->b.log = log; + + if (log) + u_log_add_auto_logger(log, si_auto_log_cs, sctx); } static struct pipe_context *si_create_context(struct pipe_screen *screen, diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 8d822874d55..9ba1dfbab49 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -268,6 +268,19 @@ struct si_image_handle struct pipe_image_view view; }; +struct si_saved_cs { + struct pipe_reference reference; + struct si_context *ctx; + struct radeon_saved_cs gfx; + struct radeon_saved_cs ce; + struct r600_resource *trace_buf; + unsigned trace_id; + + unsigned gfx_last_dw; + unsigned ce_last_dw; + bool flushed; +}; + struct si_context { struct r600_common_context b; struct blitter_context *blitter; @@ -419,11 +432,7 @@ struct si_context { /* Debug state. */ bool is_debug; - struct radeon_saved_cs last_gfx; - struct radeon_saved_cs last_ce; - struct r600_resource *last_trace_buf; - struct r600_resource *trace_buf; - unsigned trace_id; + struct si_saved_cs *current_saved_cs; uint64_t dmesg_timestamp; unsigned apitrace_call_number; @@ -497,6 +506,8 @@ void cik_emit_prefetch_L2(struct si_context *sctx); void si_init_cp_dma_functions(struct si_context *sctx); /* si_debug.c */ +void si_auto_log_cs(void *data, struct u_log_context *log); +void si_log_hw_flush(struct si_context *sctx); void si_init_debug_functions(struct si_context *sctx); void si_check_vm_faults(struct r600_common_context *ctx, struct radeon_saved_cs *saved, enum ring_type ring); @@ -506,6 +517,7 @@ bool si_replace_shader(unsigned num, struct ac_shader_binary *binary); void si_init_dma_functions(struct si_context *sctx); /* si_hw_context.c */ +void si_destroy_saved_cs(struct si_saved_cs *scs); void si_context_gfx_flush(void *context, unsigned flags, struct pipe_fence_handle **fence); void si_begin_new_cs(struct si_context *ctx); @@ -603,4 +615,13 @@ si_optimal_tcc_alignment(struct si_context *sctx, unsigned upload_size) return MIN2(alignment, tcc_cache_line_size); } +static inline void +si_saved_cs_reference(struct si_saved_cs **dst, struct si_saved_cs *src) +{ + if (pipe_reference(&(*dst)->reference, &src->reference)) + si_destroy_saved_cs(*dst); + + *dst = src; +} + #endif diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index f17f57051de..4505bc6ed62 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -30,6 +30,7 @@ #include "gfx9d.h" #include "util/u_index_modify.h" +#include "util/u_log.h" #include "util/u_upload_mgr.h" #include "util/u_prim.h" @@ -1434,7 +1435,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) si_ce_post_draw_synchronization(sctx); - if (sctx->trace_buf) + if (unlikely(sctx->current_saved_cs)) si_trace_emit(sctx); /* Workaround for a VGT hang when streamout is enabled. @@ -1464,20 +1465,18 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) void si_trace_emit(struct si_context *sctx) { struct radeon_winsys_cs *cs = sctx->b.gfx.cs; - - sctx->trace_id++; - radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, sctx->trace_buf, - RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE); + uint64_t va = sctx->current_saved_cs->trace_buf->gpu_address; + uint32_t trace_id = ++sctx->current_saved_cs->trace_id; radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME)); - radeon_emit(cs, sctx->trace_buf->gpu_address); - radeon_emit(cs, sctx->trace_buf->gpu_address >> 32); - radeon_emit(cs, sctx->trace_id); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, trace_id); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); - radeon_emit(cs, AC_ENCODE_TRACE_POINT(sctx->trace_id)); + radeon_emit(cs, AC_ENCODE_TRACE_POINT(trace_id)); if (sctx->ce_ib) { struct radeon_winsys_cs *ce = sctx->ce_ib; @@ -1486,10 +1485,13 @@ void si_trace_emit(struct si_context *sctx) radeon_emit(ce, S_370_DST_SEL(V_370_MEM_ASYNC) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_CE)); - radeon_emit(ce, sctx->trace_buf->gpu_address + 4); - radeon_emit(ce, (sctx->trace_buf->gpu_address + 4) >> 32); - radeon_emit(ce, sctx->trace_id); + radeon_emit(ce, va + 4); + radeon_emit(ce, (va + 4) >> 32); + radeon_emit(ce, trace_id); radeon_emit(ce, PKT3(PKT3_NOP, 0, 0)); - radeon_emit(ce, AC_ENCODE_TRACE_POINT(sctx->trace_id)); + radeon_emit(ce, AC_ENCODE_TRACE_POINT(trace_id)); } + + if (sctx->b.log) + u_log_flush(sctx->b.log); } -- 2.30.2