From: Marek Olšák Date: Wed, 19 Aug 2015 09:53:25 +0000 (+0200) Subject: radeonsi: add IB tracing support for debug contexts X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=2c14a6d3b1c53d5814414ce9e91fd8d24c90b787;p=mesa.git radeonsi: add IB tracing support for debug contexts This adds trace points to all IBs and the parser prints them and also prints which trace points were reached (executed) by the CP. This can help pinpoint a problematic packet, draw call, etc. Acked-by: Christian König Acked-by: Alex Deucher --- diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index 72b7989e92c..cf09686c636 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -142,7 +142,8 @@ static void si_parse_set_reg_packet(FILE *f, uint32_t *ib, unsigned count, si_dump_reg(f, reg + i*4, ib[2+i], ~0); } -static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw) +static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw, + int trace_id) { unsigned count = PKT_COUNT_G(ib[0]); unsigned op = PKT3_IT_OPCODE_G(ib[0]); @@ -232,6 +233,36 @@ static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw) if (ib[0] == 0xffff1000) { count = -1; /* One dword NOP. */ break; + } else if (count == 0 && SI_IS_TRACE_POINT(ib[1])) { + unsigned packet_id = SI_GET_TRACE_POINT_ID(ib[1]); + + print_spaces(f, INDENT_PKT); + fprintf(f, COLOR_RED "Trace point ID: %u\n", packet_id); + + if (trace_id == -1) + break; /* tracing was disabled */ + + print_spaces(f, INDENT_PKT); + if (packet_id < trace_id) + fprintf(f, COLOR_RED + "This trace point was reached by the CP." + COLOR_RESET "\n"); + else if (packet_id == trace_id) + fprintf(f, COLOR_RED + "!!!!! This is the last trace point that " + "was reached by the CP !!!!!" + COLOR_RESET "\n"); + else if (packet_id+1 == trace_id) + fprintf(f, COLOR_RED + "!!!!! This is the first trace point that " + "was NOT been reached by the CP !!!!!" + COLOR_RESET "\n"); + else + fprintf(f, COLOR_RED + "!!!!! This trace point was NOT reached " + "by the CP !!!!!" + COLOR_RESET "\n"); + break; } /* fall through, print all dwords */ default: @@ -246,7 +277,17 @@ static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw) return ib; } -static void si_parse_ib(FILE *f, uint32_t *ib, int num_dw) +/** + * Parse and print an IB into a file. + * + * \param f file + * \param ib IB + * \param num_dw size of the IB + * \param chip_class chip class + * \param trace_id the last trace ID that is known to have been reached + * and executed by the CP, typically read from a buffer + */ +static void si_parse_ib(FILE *f, uint32_t *ib, int num_dw, int trace_id) { fprintf(f, "------------------ IB begin ------------------\n"); @@ -255,7 +296,7 @@ static void si_parse_ib(FILE *f, uint32_t *ib, int num_dw) switch (type) { case 3: - ib = si_parse_packet3(f, ib, &num_dw); + ib = si_parse_packet3(f, ib, &num_dw, trace_id); break; case 2: /* type-2 nop */ @@ -342,9 +383,27 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f, si_dump_shader(sctx->ps_shader, "Fragment", f); if (sctx->last_ib) { - si_parse_ib(f, sctx->last_ib, sctx->last_ib_dw_size); + int last_trace_id = -1; + + if (sctx->last_trace_buf) { + /* We are expecting that the ddebug pipe has already + * waited for the context, so this buffer should be idle. + * If the GPU is hung, there is no point in waiting for it. + */ + uint32_t *map = + sctx->b.ws->buffer_map(sctx->last_trace_buf->cs_buf, + NULL, + PIPE_TRANSFER_UNSYNCHRONIZED | + PIPE_TRANSFER_READ); + if (map) + last_trace_id = *map; + } + + si_parse_ib(f, sctx->last_ib, sctx->last_ib_dw_size, + last_trace_id); free(sctx->last_ib); /* dump only once */ sctx->last_ib = NULL; + r600_resource_reference(&sctx->last_trace_buf, NULL); } fprintf(f, "Done.\n"); diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index b726eb3cdd8..110e3163021 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -89,7 +89,7 @@ void si_need_cs_space(struct si_context *ctx, unsigned num_dw, num_dw += ctx->atoms.s.cache_flush->num_dw; if (ctx->screen->b.trace_bo) - num_dw += SI_TRACE_CS_DWORDS; + num_dw += SI_TRACE_CS_DWORDS * 2; /* Flush if there's not enough space. */ if (num_dw > cs->max_dw) { @@ -127,12 +127,17 @@ void si_context_gfx_flush(void *context, unsigned flags, /* force to keep tiling flags */ flags |= RADEON_FLUSH_KEEP_TILING_FLAGS; + if (ctx->trace_buf) + si_trace_emit(ctx); + /* Save the IB for debug contexts. */ if (ctx->is_debug) { free(ctx->last_ib); ctx->last_ib_dw_size = cs->cdw; ctx->last_ib = malloc(cs->cdw * 4); memcpy(ctx->last_ib, cs->buf, cs->cdw * 4); + r600_resource_reference(&ctx->last_trace_buf, ctx->trace_buf); + r600_resource_reference(&ctx->trace_buf, NULL); } /* Flush the CS. */ @@ -148,6 +153,23 @@ void si_context_gfx_flush(void *context, unsigned flags, void si_begin_new_cs(struct si_context *ctx) { + if (ctx->is_debug) { + uint32_t zero = 0; + + /* Create a buffer used for writing trace IDs and initialize it to 0. */ + assert(!ctx->trace_buf); + ctx->trace_buf = (struct r600_resource*) + pipe_buffer_create(ctx->b.b.screen, PIPE_BIND_CUSTOM, + PIPE_USAGE_STAGING, 4); + if (ctx->trace_buf) + pipe_buffer_write_nooverlap(&ctx->b.b, &ctx->trace_buf->b.b, + 0, sizeof(zero), &zero); + ctx->trace_id = 0; + } + + if (ctx->trace_buf) + si_trace_emit(ctx); + /* Flush read caches at the beginning of CS. */ ctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER | SI_CONTEXT_INV_TC_L1 | diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index e5900b74806..92c6ae3de2b 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -81,6 +81,8 @@ static void si_destroy_context(struct pipe_context *context) LLVMDisposeTargetMachine(sctx->tm); #endif + r600_resource_reference(&sctx->trace_buf, NULL); + r600_resource_reference(&sctx->last_trace_buf, NULL); free(sctx->last_ib); FREE(sctx); } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 09a21ceb618..52167f24a95 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -43,7 +43,7 @@ #define SI_RESTART_INDEX_UNKNOWN INT_MIN #define SI_NUM_SMOOTH_AA_SAMPLES 8 -#define SI_TRACE_CS_DWORDS 6 +#define SI_TRACE_CS_DWORDS 7 #define SI_MAX_DRAW_CS_DWORDS \ (/*scratch:*/ 3 + /*derived prim state:*/ 3 + \ @@ -81,6 +81,10 @@ SI_CONTEXT_FLUSH_AND_INV_DB | \ SI_CONTEXT_FLUSH_AND_INV_DB_META) +#define SI_ENCODE_TRACE_POINT(id) (0xcafe0000 | ((id) & 0xffff)) +#define SI_IS_TRACE_POINT(x) (((x) & 0xcafe0000) == 0xcafe0000) +#define SI_GET_TRACE_POINT_ID(x) ((x) & 0xffff) + struct si_compute; struct si_screen { @@ -247,6 +251,9 @@ struct si_context { bool is_debug; uint32_t *last_ib; unsigned last_ib_dw_size; + struct r600_resource *last_trace_buf; + struct r600_resource *trace_buf; + unsigned trace_id; }; /* cik_sdma.c */ diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index e56c9e70eca..b1aba1290d6 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -835,7 +835,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) si_emit_draw_registers(sctx, info); si_emit_draw_packets(sctx, info, &ib); - if (sctx->screen->b.trace_bo) + if (sctx->trace_buf) si_trace_emit(sctx); /* Workaround for a VGT hang when streamout is enabled. @@ -873,19 +873,18 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) void si_trace_emit(struct si_context *sctx) { - struct si_screen *sscreen = sctx->screen; struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; - uint64_t va; - va = sscreen->b.trace_bo->gpu_address; - r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, sscreen->b.trace_bo, + sctx->trace_id++; + r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, sctx->trace_buf, RADEON_USAGE_READWRITE, RADEON_PRIO_MIN); - radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 4, 0)); + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); radeon_emit(cs, PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC) | PKT3_WRITE_DATA_WR_CONFIRM | PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME)); - radeon_emit(cs, va & 0xFFFFFFFFUL); - radeon_emit(cs, (va >> 32UL) & 0xFFFFFFFFUL); - radeon_emit(cs, cs->cdw); - radeon_emit(cs, sscreen->b.cs_count); + radeon_emit(cs, sctx->trace_buf->gpu_address); + radeon_emit(cs, sctx->trace_buf->gpu_address >> 32); + radeon_emit(cs, sctx->trace_id); + radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); + radeon_emit(cs, SI_ENCODE_TRACE_POINT(sctx->trace_id)); }