si_dump_reg(f, reg + i*4, ib[2+i], ~0);
}
-static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw)
+static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw,
+ int trace_id)
{
unsigned count = PKT_COUNT_G(ib[0]);
unsigned op = PKT3_IT_OPCODE_G(ib[0]);
if (ib[0] == 0xffff1000) {
count = -1; /* One dword NOP. */
break;
+ } else if (count == 0 && SI_IS_TRACE_POINT(ib[1])) {
+ unsigned packet_id = SI_GET_TRACE_POINT_ID(ib[1]);
+
+ print_spaces(f, INDENT_PKT);
+ fprintf(f, COLOR_RED "Trace point ID: %u\n", packet_id);
+
+ if (trace_id == -1)
+ break; /* tracing was disabled */
+
+ print_spaces(f, INDENT_PKT);
+ if (packet_id < trace_id)
+ fprintf(f, COLOR_RED
+ "This trace point was reached by the CP."
+ COLOR_RESET "\n");
+ else if (packet_id == trace_id)
+ fprintf(f, COLOR_RED
+ "!!!!! This is the last trace point that "
+ "was reached by the CP !!!!!"
+ COLOR_RESET "\n");
+ else if (packet_id+1 == trace_id)
+ fprintf(f, COLOR_RED
+ "!!!!! This is the first trace point that "
+ "was NOT been reached by the CP !!!!!"
+ COLOR_RESET "\n");
+ else
+ fprintf(f, COLOR_RED
+ "!!!!! This trace point was NOT reached "
+ "by the CP !!!!!"
+ COLOR_RESET "\n");
+ break;
}
/* fall through, print all dwords */
default:
return ib;
}
-static void si_parse_ib(FILE *f, uint32_t *ib, int num_dw)
+/**
+ * Parse and print an IB into a file.
+ *
+ * \param f file
+ * \param ib IB
+ * \param num_dw size of the IB
+ * \param chip_class chip class
+ * \param trace_id the last trace ID that is known to have been reached
+ * and executed by the CP, typically read from a buffer
+ */
+static void si_parse_ib(FILE *f, uint32_t *ib, int num_dw, int trace_id)
{
fprintf(f, "------------------ IB begin ------------------\n");
switch (type) {
case 3:
- ib = si_parse_packet3(f, ib, &num_dw);
+ ib = si_parse_packet3(f, ib, &num_dw, trace_id);
break;
case 2:
/* type-2 nop */
si_dump_shader(sctx->ps_shader, "Fragment", f);
if (sctx->last_ib) {
- si_parse_ib(f, sctx->last_ib, sctx->last_ib_dw_size);
+ int last_trace_id = -1;
+
+ if (sctx->last_trace_buf) {
+ /* We are expecting that the ddebug pipe has already
+ * waited for the context, so this buffer should be idle.
+ * If the GPU is hung, there is no point in waiting for it.
+ */
+ uint32_t *map =
+ sctx->b.ws->buffer_map(sctx->last_trace_buf->cs_buf,
+ NULL,
+ PIPE_TRANSFER_UNSYNCHRONIZED |
+ PIPE_TRANSFER_READ);
+ if (map)
+ last_trace_id = *map;
+ }
+
+ si_parse_ib(f, sctx->last_ib, sctx->last_ib_dw_size,
+ last_trace_id);
free(sctx->last_ib); /* dump only once */
sctx->last_ib = NULL;
+ r600_resource_reference(&sctx->last_trace_buf, NULL);
}
fprintf(f, "Done.\n");
num_dw += ctx->atoms.s.cache_flush->num_dw;
if (ctx->screen->b.trace_bo)
- num_dw += SI_TRACE_CS_DWORDS;
+ num_dw += SI_TRACE_CS_DWORDS * 2;
/* Flush if there's not enough space. */
if (num_dw > cs->max_dw) {
/* force to keep tiling flags */
flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
+ if (ctx->trace_buf)
+ si_trace_emit(ctx);
+
/* Save the IB for debug contexts. */
if (ctx->is_debug) {
free(ctx->last_ib);
ctx->last_ib_dw_size = cs->cdw;
ctx->last_ib = malloc(cs->cdw * 4);
memcpy(ctx->last_ib, cs->buf, cs->cdw * 4);
+ r600_resource_reference(&ctx->last_trace_buf, ctx->trace_buf);
+ r600_resource_reference(&ctx->trace_buf, NULL);
}
/* Flush the CS. */
void si_begin_new_cs(struct si_context *ctx)
{
+ if (ctx->is_debug) {
+ uint32_t zero = 0;
+
+ /* Create a buffer used for writing trace IDs and initialize it to 0. */
+ assert(!ctx->trace_buf);
+ ctx->trace_buf = (struct r600_resource*)
+ pipe_buffer_create(ctx->b.b.screen, PIPE_BIND_CUSTOM,
+ PIPE_USAGE_STAGING, 4);
+ if (ctx->trace_buf)
+ pipe_buffer_write_nooverlap(&ctx->b.b, &ctx->trace_buf->b.b,
+ 0, sizeof(zero), &zero);
+ ctx->trace_id = 0;
+ }
+
+ if (ctx->trace_buf)
+ si_trace_emit(ctx);
+
/* Flush read caches at the beginning of CS. */
ctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER |
SI_CONTEXT_INV_TC_L1 |
LLVMDisposeTargetMachine(sctx->tm);
#endif
+ r600_resource_reference(&sctx->trace_buf, NULL);
+ r600_resource_reference(&sctx->last_trace_buf, NULL);
free(sctx->last_ib);
FREE(sctx);
}
#define SI_RESTART_INDEX_UNKNOWN INT_MIN
#define SI_NUM_SMOOTH_AA_SAMPLES 8
-#define SI_TRACE_CS_DWORDS 6
+#define SI_TRACE_CS_DWORDS 7
#define SI_MAX_DRAW_CS_DWORDS \
(/*scratch:*/ 3 + /*derived prim state:*/ 3 + \
SI_CONTEXT_FLUSH_AND_INV_DB | \
SI_CONTEXT_FLUSH_AND_INV_DB_META)
+#define SI_ENCODE_TRACE_POINT(id) (0xcafe0000 | ((id) & 0xffff))
+#define SI_IS_TRACE_POINT(x) (((x) & 0xcafe0000) == 0xcafe0000)
+#define SI_GET_TRACE_POINT_ID(x) ((x) & 0xffff)
+
struct si_compute;
struct si_screen {
bool is_debug;
uint32_t *last_ib;
unsigned last_ib_dw_size;
+ struct r600_resource *last_trace_buf;
+ struct r600_resource *trace_buf;
+ unsigned trace_id;
};
/* cik_sdma.c */
si_emit_draw_registers(sctx, info);
si_emit_draw_packets(sctx, info, &ib);
- if (sctx->screen->b.trace_bo)
+ if (sctx->trace_buf)
si_trace_emit(sctx);
/* Workaround for a VGT hang when streamout is enabled.
void si_trace_emit(struct si_context *sctx)
{
- struct si_screen *sscreen = sctx->screen;
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
- uint64_t va;
- va = sscreen->b.trace_bo->gpu_address;
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, sscreen->b.trace_bo,
+ sctx->trace_id++;
+ r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, sctx->trace_buf,
RADEON_USAGE_READWRITE, RADEON_PRIO_MIN);
- radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 4, 0));
+ radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
radeon_emit(cs, PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC) |
PKT3_WRITE_DATA_WR_CONFIRM |
PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME));
- radeon_emit(cs, va & 0xFFFFFFFFUL);
- radeon_emit(cs, (va >> 32UL) & 0xFFFFFFFFUL);
- radeon_emit(cs, cs->cdw);
- radeon_emit(cs, sscreen->b.cs_count);
+ radeon_emit(cs, sctx->trace_buf->gpu_address);
+ radeon_emit(cs, sctx->trace_buf->gpu_address >> 32);
+ radeon_emit(cs, sctx->trace_id);
+ radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+ radeon_emit(cs, SI_ENCODE_TRACE_POINT(sctx->trace_id));
}