radeonsi: add IB tracing support for debug contexts
authorMarek Olšák <marek.olsak@amd.com>
Wed, 19 Aug 2015 09:53:25 +0000 (11:53 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Wed, 26 Aug 2015 17:25:19 +0000 (19:25 +0200)
This adds trace points to all IBs and the parser prints them and also
prints which trace points were reached (executed) by the CP.
This can help pinpoint a problematic packet, draw call, etc.

Acked-by: Christian König <christian.koenig@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
src/gallium/drivers/radeonsi/si_debug.c
src/gallium/drivers/radeonsi/si_hw_context.c
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_state_draw.c

index 72b7989e92c771320a09f211159353e9bb6ee95f..cf09686c636409161e5ea5ab49889273a65d52c3 100644 (file)
@@ -142,7 +142,8 @@ static void si_parse_set_reg_packet(FILE *f, uint32_t *ib, unsigned count,
                si_dump_reg(f, reg + i*4, ib[2+i], ~0);
 }
 
-static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw)
+static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw,
+                                 int trace_id)
 {
        unsigned count = PKT_COUNT_G(ib[0]);
        unsigned op = PKT3_IT_OPCODE_G(ib[0]);
@@ -232,6 +233,36 @@ static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw)
                if (ib[0] == 0xffff1000) {
                        count = -1; /* One dword NOP. */
                        break;
+               } else if (count == 0 && SI_IS_TRACE_POINT(ib[1])) {
+                       unsigned packet_id = SI_GET_TRACE_POINT_ID(ib[1]);
+
+                       print_spaces(f, INDENT_PKT);
+                       fprintf(f, COLOR_RED "Trace point ID: %u\n", packet_id);
+
+                       if (trace_id == -1)
+                               break; /* tracing was disabled */
+
+                       print_spaces(f, INDENT_PKT);
+                       if (packet_id < trace_id)
+                               fprintf(f, COLOR_RED
+                                       "This trace point was reached by the CP."
+                                       COLOR_RESET "\n");
+                       else if (packet_id == trace_id)
+                               fprintf(f, COLOR_RED
+                                       "!!!!! This is the last trace point that "
+                                       "was reached by the CP !!!!!"
+                                       COLOR_RESET "\n");
+                       else if (packet_id+1 == trace_id)
+                               fprintf(f, COLOR_RED
+                                       "!!!!! This is the first trace point that "
+                                       "was NOT been reached by the CP !!!!!"
+                                       COLOR_RESET "\n");
+                       else
+                               fprintf(f, COLOR_RED
+                                       "!!!!! This trace point was NOT reached "
+                                       "by the CP !!!!!"
+                                       COLOR_RESET "\n");
+                       break;
                }
                /* fall through, print all dwords */
        default:
@@ -246,7 +277,17 @@ static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw)
        return ib;
 }
 
-static void si_parse_ib(FILE *f, uint32_t *ib, int num_dw)
+/**
+ * Parse and print an IB into a file.
+ *
+ * \param f            file
+ * \param ib           IB
+ * \param num_dw       size of the IB
+ * \param chip_class   chip class
+ * \param trace_id     the last trace ID that is known to have been reached
+ *                     and executed by the CP, typically read from a buffer
+ */
+static void si_parse_ib(FILE *f, uint32_t *ib, int num_dw, int trace_id)
 {
        fprintf(f, "------------------ IB begin ------------------\n");
 
@@ -255,7 +296,7 @@ static void si_parse_ib(FILE *f, uint32_t *ib, int num_dw)
 
                switch (type) {
                case 3:
-                       ib = si_parse_packet3(f, ib, &num_dw);
+                       ib = si_parse_packet3(f, ib, &num_dw, trace_id);
                        break;
                case 2:
                        /* type-2 nop */
@@ -342,9 +383,27 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
        si_dump_shader(sctx->ps_shader, "Fragment", f);
 
        if (sctx->last_ib) {
-               si_parse_ib(f, sctx->last_ib, sctx->last_ib_dw_size);
+               int last_trace_id = -1;
+
+               if (sctx->last_trace_buf) {
+                       /* We are expecting that the ddebug pipe has already
+                        * waited for the context, so this buffer should be idle.
+                        * If the GPU is hung, there is no point in waiting for it.
+                        */
+                       uint32_t *map =
+                               sctx->b.ws->buffer_map(sctx->last_trace_buf->cs_buf,
+                                                      NULL,
+                                                      PIPE_TRANSFER_UNSYNCHRONIZED |
+                                                      PIPE_TRANSFER_READ);
+                       if (map)
+                               last_trace_id = *map;
+               }
+
+               si_parse_ib(f, sctx->last_ib, sctx->last_ib_dw_size,
+                           last_trace_id);
                free(sctx->last_ib); /* dump only once */
                sctx->last_ib = NULL;
+               r600_resource_reference(&sctx->last_trace_buf, NULL);
        }
 
        fprintf(f, "Done.\n");
index b726eb3cdd8a2f6b56d92aa028868a12c154193e..110e3163021044cc11de5a14882a854f0939e5ea 100644 (file)
@@ -89,7 +89,7 @@ void si_need_cs_space(struct si_context *ctx, unsigned num_dw,
        num_dw += ctx->atoms.s.cache_flush->num_dw;
 
        if (ctx->screen->b.trace_bo)
-               num_dw += SI_TRACE_CS_DWORDS;
+               num_dw += SI_TRACE_CS_DWORDS * 2;
 
        /* Flush if there's not enough space. */
        if (num_dw > cs->max_dw) {
@@ -127,12 +127,17 @@ void si_context_gfx_flush(void *context, unsigned flags,
        /* force to keep tiling flags */
        flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
 
+       if (ctx->trace_buf)
+               si_trace_emit(ctx);
+
        /* Save the IB for debug contexts. */
        if (ctx->is_debug) {
                free(ctx->last_ib);
                ctx->last_ib_dw_size = cs->cdw;
                ctx->last_ib = malloc(cs->cdw * 4);
                memcpy(ctx->last_ib, cs->buf, cs->cdw * 4);
+               r600_resource_reference(&ctx->last_trace_buf, ctx->trace_buf);
+               r600_resource_reference(&ctx->trace_buf, NULL);
        }
 
        /* Flush the CS. */
@@ -148,6 +153,23 @@ void si_context_gfx_flush(void *context, unsigned flags,
 
 void si_begin_new_cs(struct si_context *ctx)
 {
+       if (ctx->is_debug) {
+               uint32_t zero = 0;
+
+               /* Create a buffer used for writing trace IDs and initialize it to 0. */
+               assert(!ctx->trace_buf);
+               ctx->trace_buf = (struct r600_resource*)
+                                pipe_buffer_create(ctx->b.b.screen, PIPE_BIND_CUSTOM,
+                                                   PIPE_USAGE_STAGING, 4);
+               if (ctx->trace_buf)
+                       pipe_buffer_write_nooverlap(&ctx->b.b, &ctx->trace_buf->b.b,
+                                                   0, sizeof(zero), &zero);
+               ctx->trace_id = 0;
+       }
+
+       if (ctx->trace_buf)
+               si_trace_emit(ctx);
+
        /* Flush read caches at the beginning of CS. */
        ctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER |
                        SI_CONTEXT_INV_TC_L1 |
index e5900b748067241ea16bb001345bcdbd133e618f..92c6ae3de2be1320366b6ab0d1f45fb139dfe774 100644 (file)
@@ -81,6 +81,8 @@ static void si_destroy_context(struct pipe_context *context)
        LLVMDisposeTargetMachine(sctx->tm);
 #endif
 
+       r600_resource_reference(&sctx->trace_buf, NULL);
+       r600_resource_reference(&sctx->last_trace_buf, NULL);
        free(sctx->last_ib);
        FREE(sctx);
 }
index 09a21ceb618bf7b0be818fd9b4e22ac96c735264..52167f24a9567c89fa655e268c1f269c93aca74b 100644 (file)
@@ -43,7 +43,7 @@
 #define SI_RESTART_INDEX_UNKNOWN INT_MIN
 #define SI_NUM_SMOOTH_AA_SAMPLES 8
 
-#define SI_TRACE_CS_DWORDS             6
+#define SI_TRACE_CS_DWORDS             7
 
 #define SI_MAX_DRAW_CS_DWORDS \
        (/*scratch:*/ 3 + /*derived prim state:*/ 3 + \
                                              SI_CONTEXT_FLUSH_AND_INV_DB | \
                                              SI_CONTEXT_FLUSH_AND_INV_DB_META)
 
+#define SI_ENCODE_TRACE_POINT(id)      (0xcafe0000 | ((id) & 0xffff))
+#define SI_IS_TRACE_POINT(x)           (((x) & 0xcafe0000) == 0xcafe0000)
+#define SI_GET_TRACE_POINT_ID(x)       ((x) & 0xffff)
+
 struct si_compute;
 
 struct si_screen {
@@ -247,6 +251,9 @@ struct si_context {
        bool                    is_debug;
        uint32_t                *last_ib;
        unsigned                last_ib_dw_size;
+       struct r600_resource    *last_trace_buf;
+       struct r600_resource    *trace_buf;
+       unsigned                trace_id;
 };
 
 /* cik_sdma.c */
index e56c9e70ecac4febd13b630f4b533ae984038205..b1aba1290d67471d7dc119049ae46208daacd776 100644 (file)
@@ -835,7 +835,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
        si_emit_draw_registers(sctx, info);
        si_emit_draw_packets(sctx, info, &ib);
 
-       if (sctx->screen->b.trace_bo)
+       if (sctx->trace_buf)
                si_trace_emit(sctx);
 
        /* Workaround for a VGT hang when streamout is enabled.
@@ -873,19 +873,18 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 
 void si_trace_emit(struct si_context *sctx)
 {
-       struct si_screen *sscreen = sctx->screen;
        struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
-       uint64_t va;
 
-       va = sscreen->b.trace_bo->gpu_address;
-       r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, sscreen->b.trace_bo,
+       sctx->trace_id++;
+       r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, sctx->trace_buf,
                              RADEON_USAGE_READWRITE, RADEON_PRIO_MIN);
-       radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 4, 0));
+       radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
        radeon_emit(cs, PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC) |
                                PKT3_WRITE_DATA_WR_CONFIRM |
                                PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME));
-       radeon_emit(cs, va & 0xFFFFFFFFUL);
-       radeon_emit(cs, (va >> 32UL) & 0xFFFFFFFFUL);
-       radeon_emit(cs, cs->cdw);
-       radeon_emit(cs, sscreen->b.cs_count);
+       radeon_emit(cs, sctx->trace_buf->gpu_address);
+       radeon_emit(cs, sctx->trace_buf->gpu_address >> 32);
+       radeon_emit(cs, sctx->trace_id);
+       radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+       radeon_emit(cs, SI_ENCODE_TRACE_POINT(sctx->trace_id));
 }