radeonsi: add cs tracing v3
authorJerome Glisse <jglisse@redhat.com>
Mon, 25 Mar 2013 15:46:38 +0000 (11:46 -0400)
committerJerome Glisse <jglisse@redhat.com>
Wed, 27 Mar 2013 15:38:02 +0000 (11:38 -0400)
Same as on r600, trace cs execution by writting cs offset after each
states, this allow to pin point lockup inside command stream and
narrow down the scope of lockup investigation.

v2: Use WRITE_DATA packet instead of WRITE_MEM
v3: Remove useless nop packet

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
src/gallium/drivers/radeonsi/r600_hw_context.c
src/gallium/drivers/radeonsi/radeonsi_pipe.c
src/gallium/drivers/radeonsi/radeonsi_pipe.h
src/gallium/drivers/radeonsi/radeonsi_pm4.c
src/gallium/drivers/radeonsi/si_state_draw.c
src/gallium/drivers/radeonsi/sid.h

index bd348f9fe14766ca317704efadc4d135d4f4196f..0975a1fe978700f9403a68e551e62892ff023e3b 100644 (file)
@@ -142,6 +142,12 @@ void si_need_cs_space(struct r600_context *ctx, unsigned num_dw,
        /* Save 16 dwords for the fence mechanism. */
        num_dw += 16;
 
+#if R600_TRACE_CS
+       if (ctx->screen->trace_bo) {
+               num_dw += R600_TRACE_CS_DWORDS;
+       }
+#endif
+
        /* Flush if there's not enough space. */
        if (num_dw > RADEON_MAX_CMDBUF_DWORDS) {
                radeonsi_flush(&ctx->context, NULL, RADEON_FLUSH_ASYNC);
@@ -206,9 +212,41 @@ void si_context_flush(struct r600_context *ctx, unsigned flags)
        /* force to keep tiling flags */
        flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
 
+#if R600_TRACE_CS
+       if (ctx->screen->trace_bo) {
+               struct r600_screen *rscreen = ctx->screen;
+               unsigned i;
+
+               for (i = 0; i < cs->cdw; i++) {
+                       fprintf(stderr, "[%4d] [%5d] 0x%08x\n", rscreen->cs_count, i, cs->buf[i]);
+               }
+               rscreen->cs_count++;
+       }
+#endif
+
        /* Flush the CS. */
        ctx->ws->cs_flush(ctx->cs, flags);
 
+#if R600_TRACE_CS
+       if (ctx->screen->trace_bo) {
+               struct r600_screen *rscreen = ctx->screen;
+               unsigned i;
+
+               for (i = 0; i < 10; i++) {
+                       usleep(5);
+                       if (!ctx->ws->buffer_is_busy(rscreen->trace_bo->buf, RADEON_USAGE_READWRITE)) {
+                               break;
+                       }
+               }
+               if (i == 10) {
+                       fprintf(stderr, "timeout on cs lockup likely happen at cs %d dw %d\n",
+                               rscreen->trace_ptr[1], rscreen->trace_ptr[0]);
+               } else {
+                       fprintf(stderr, "cs %d executed in %dms\n", rscreen->trace_ptr[1], i * 5);
+               }
+       }
+#endif
+
        ctx->pm4_dirty_cdwords = 0;
        ctx->flags = 0;
 
@@ -665,3 +703,23 @@ void r600_context_draw_opaque_count(struct r600_context *ctx, struct r600_so_tar
        cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, t->filled_size, RADEON_USAGE_READ);
 
 }
+
+#if R600_TRACE_CS
+void r600_trace_emit(struct r600_context *rctx)
+{
+       struct r600_screen *rscreen = rctx->screen;
+       struct radeon_winsys_cs *cs = rctx->cs;
+       uint64_t va;
+
+       va = r600_resource_va(&rscreen->screen, (void*)rscreen->trace_bo);
+       r600_context_bo_reloc(rctx, rscreen->trace_bo, RADEON_USAGE_READWRITE);
+       cs->buf[cs->cdw++] = PKT3(PKT3_WRITE_DATA, 4, 0);
+       cs->buf[cs->cdw++] = PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC) |
+                               PKT3_WRITE_DATA_WR_CONFIRM |
+                               PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME);
+       cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL;
+       cs->buf[cs->cdw++] = (va >> 32UL) & 0xFFFFFFFFUL;
+       cs->buf[cs->cdw++] = cs->cdw;
+       cs->buf[cs->cdw++] = rscreen->cs_count;
+}
+#endif
index 672017a4ec7a877842768309f09586cc12a4c4b7..0d24309783947e69c4f05bfe4ee8bb885e72f4ac 100644 (file)
@@ -522,6 +522,14 @@ static void r600_destroy_screen(struct pipe_screen* pscreen)
                rscreen->ws->buffer_unmap(rscreen->fences.bo->cs_buf);
                si_resource_reference(&rscreen->fences.bo, NULL);
        }
+
+#if R600_TRACE_CS
+       if (rscreen->trace_bo) {
+               rscreen->ws->buffer_unmap(rscreen->trace_bo->cs_buf);
+               pipe_resource_reference((struct pipe_resource**)&rscreen->trace_bo, NULL);
+       }
+#endif
+
        pipe_mutex_destroy(rscreen->fences.mutex);
 
        rscreen->ws->destroy(rscreen->ws);
@@ -724,5 +732,19 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
        LIST_INITHEAD(&rscreen->fences.blocks);
        pipe_mutex_init(rscreen->fences.mutex);
 
+#if R600_TRACE_CS
+       rscreen->cs_count = 0;
+       if (rscreen->info.drm_minor >= 28) {
+               rscreen->trace_bo = (struct si_resource*)pipe_buffer_create(&rscreen->screen,
+                                                                               PIPE_BIND_CUSTOM,
+                                                                               PIPE_USAGE_STAGING,
+                                                                               4096);
+               if (rscreen->trace_bo) {
+                       rscreen->trace_ptr = rscreen->ws->buffer_map(rscreen->trace_bo->cs_buf, NULL,
+                                                                       PIPE_TRANSFER_UNSYNCHRONIZED);
+               }
+       }
+#endif
+
        return &rscreen->screen;
 }
index d0f04f405626646bba823b316b6db1d96d5973c9..40a5c8c3d907b771e3561c85b1b0e6d62d280781 100644 (file)
@@ -47,6 +47,9 @@
 #define R600_BIG_ENDIAN 0
 #endif
 
+#define R600_TRACE_CS 0
+#define R600_TRACE_CS_DWORDS           6
+
 struct r600_pipe_fences {
        struct si_resource              *bo;
        unsigned                        *data;
@@ -67,6 +70,11 @@ struct r600_screen {
        struct r600_tiling_info         tiling_info;
        struct util_slab_mempool        pool_buffers;
        struct r600_pipe_fences         fences;
+#if R600_TRACE_CS
+       struct si_resource              *trace_bo;
+       uint32_t                        *trace_ptr;
+       unsigned                        cs_count;
+#endif
 };
 
 struct si_pipe_sampler_view {
@@ -226,6 +234,10 @@ void r600_translate_index_buffer(struct r600_context *r600,
                                 struct pipe_index_buffer *ib,
                                 unsigned count);
 
+#if R600_TRACE_CS
+void r600_trace_emit(struct r600_context *rctx);
+#endif
+
 /*
  * common helpers
  */
index 79a2521f339c1b748859c9b40dc3308400ceb3c8..8e01738253ee9fb429d14f457d51b4589fb6716f 100644 (file)
@@ -199,6 +199,12 @@ unsigned si_pm4_dirty_dw(struct r600_context *rctx)
                        continue;
 
                count += state->ndw;
+#if R600_TRACE_CS
+               /* for tracing each states */
+               if (rctx->screen->trace_bo) {
+                       count += R600_TRACE_CS_DWORDS;
+               }
+#endif
        }
 
        return count;
@@ -219,6 +225,12 @@ void si_pm4_emit(struct r600_context *rctx, struct si_pm4_state *state)
        }
 
        cs->cdw += state->ndw;
+
+#if R600_TRACE_CS
+       if (rctx->screen->trace_bo) {
+               r600_trace_emit(rctx);
+       }
+#endif
 }
 
 void si_pm4_emit_dirty(struct r600_context *rctx)
index a78751bf80244ad9d10c8c65f178604cbe829694..1e1d1cc6d6b03e36448d61ac94166873028e013e 100644 (file)
@@ -579,6 +579,12 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
        si_pm4_emit_dirty(rctx);
        rctx->pm4_dirty_cdwords = 0;
 
+#if R600_TRACE_CS
+       if (rctx->screen->trace_bo) {
+               r600_trace_emit(rctx);
+       }
+#endif
+
 #if 0
        /* Enable stream out if needed. */
        if (rctx->streamout_start) {
@@ -587,7 +593,6 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
        }
 #endif
 
-
        rctx->flags |= R600_CONTEXT_DST_CACHES_DIRTY;
 
        /* Set the depth buffer as dirty. */
index 57553a69be204afd61299f1988ac5b02667204ed..8528981ab792264bbdb17572f1d45076f2534213 100644 (file)
 #define PKT3_DRAW_INDEX_IMMD                   0x2E
 #define PKT3_NUM_INSTANCES                     0x2F
 #define PKT3_STRMOUT_BUFFER_UPDATE             0x34
+#define PKT3_WRITE_DATA                        0x37
+#define     PKT3_WRITE_DATA_DST_SEL(x)             ((x) << 8)
+#define     PKT3_WRITE_DATA_DST_SEL_REG            0
+#define     PKT3_WRITE_DATA_DST_SEL_MEM_SYNC       1
+#define     PKT3_WRITE_DATA_DST_SEL_TC_OR_L2       2
+#define     PKT3_WRITE_DATA_DST_SEL_GDS            3
+#define     PKT3_WRITE_DATA_DST_SEL_RESERVED_4     4
+#define     PKT3_WRITE_DATA_DST_SEL_MEM_ASYNC      5
+#define     PKT3_WR_ONE_ADDR                       (1 << 16)
+#define PKT3_WRITE_DATA_WR_CONFIRM                 (1 << 20)
+#define PKT3_WRITE_DATA_ENGINE_SEL(x)              ((x) << 30)
+#define PKT3_WRITE_DATA_ENGINE_SEL_ME              0
+#define PKT3_WRITE_DATA_ENGINE_SEL_PFP             1
+#define PKT3_WRITE_DATA_ENGINE_SEL_CE              2
 #define PKT3_MEM_SEMAPHORE                     0x39
 #define PKT3_MPEG_INDEX                        0x3A
 #define PKT3_WAIT_REG_MEM                      0x3C