sizeof(struct pipe_transfer), 64,
UTIL_SLAB_SINGLETHREADED);
- r300->cs = rws->cs_create(rws, RING_GFX);
+ r300->cs = rws->cs_create(rws, RING_GFX, NULL);
if (r300->cs == NULL)
goto fail;
}
r300->flush_counter++;
- r300->rws->cs_flush(r300->cs, flags);
+ r300->rws->cs_flush(r300->cs, flags, 0);
r300->dirty_hw = 0;
/* New kitchen sink, baby. */
* and we cannot emit an empty CS. Let's write to some reg. */
CS_LOCALS(r300);
OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0);
- r300->rws->cs_flush(r300->cs, flags);
+ r300->rws->cs_flush(r300->cs, flags, 0);
} else {
/* Even if hw is not dirty, we should at least reset the CS in case
* the space checking failed for the first draw operation. */
- r300->rws->cs_flush(r300->cs, flags);
+ r300->rws->cs_flush(r300->cs, flags, 0);
}
}
flush_flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
}
- ctx->ws->cs_flush(ctx->rings.gfx.cs, flush_flags);
+ ctx->ws->cs_flush(ctx->rings.gfx.cs, flush_flags, ctx->screen->cs_count++);
ctx->flags = 0;
for (i = 0; i < R600_NUM_ATOMS; i++) {
if (ctx->atoms[i] && ctx->atoms[i]->dirty) {
num_dw += ctx->atoms[i]->num_dw;
-#if R600_TRACE_CS
if (ctx->screen->trace_bo) {
num_dw += R600_TRACE_CS_DWORDS;
}
-#endif
}
}
/* The upper-bound of how much space a draw command would take. */
num_dw += R600_MAX_FLUSH_CS_DWORDS + R600_MAX_DRAW_CS_DWORDS;
-#if R600_TRACE_CS
if (ctx->screen->trace_bo) {
num_dw += R600_TRACE_CS_DWORDS;
}
-#endif
}
/* Count in queries_suspend. */
}
/* Flush the CS. */
-#if R600_TRACE_CS
- if (ctx->screen->trace_bo) {
- struct r600_screen *rscreen = ctx->screen;
- unsigned i;
-
- for (i = 0; i < cs->cdw; i++) {
- fprintf(stderr, "[%4d] [%5d] 0x%08x\n", rscreen->cs_count, i, cs->buf[i]);
- }
- rscreen->cs_count++;
- }
-#endif
- ctx->ws->cs_flush(ctx->rings.gfx.cs, flags);
-#if R600_TRACE_CS
- if (ctx->screen->trace_bo) {
- struct r600_screen *rscreen = ctx->screen;
- unsigned i;
-
- for (i = 0; i < 10; i++) {
- usleep(5);
- if (!ctx->ws->buffer_is_busy(rscreen->trace_bo->buf, RADEON_USAGE_READWRITE)) {
- break;
- }
- }
- if (i == 10) {
- fprintf(stderr, "timeout on cs lockup likely happen at cs %d dw %d\n",
- rscreen->trace_ptr[1], rscreen->trace_ptr[0]);
- } else {
- fprintf(stderr, "cs %d executed in %dms\n", rscreen->trace_ptr[1], i * 5);
- }
- }
-#endif
+ ctx->ws->cs_flush(ctx->rings.gfx.cs, flags, ctx->screen->cs_count++);
}
void r600_begin_new_cs(struct r600_context *ctx)
{ "texdepth", DBG_TEX_DEPTH, "Print texture depth info" },
{ "compute", DBG_COMPUTE, "Print compute info" },
{ "vm", DBG_VM, "Print virtual addresses when creating resources" },
+ { "trace_cs", DBG_TRACE_CS, "Trace cs and write rlockup_<csid>.c file with faulty cs" },
/* shaders */
{ "fs", DBG_FS, "Print fetch shaders" },
}
rctx->rings.dma.flushing = true;
- rctx->ws->cs_flush(cs, flags);
+ rctx->ws->cs_flush(cs, flags, 0);
rctx->rings.dma.flushing = false;
}
goto fail;
}
- rctx->rings.gfx.cs = rctx->ws->cs_create(rctx->ws, RING_GFX);
+ if (rscreen->trace_bo) {
+ rctx->rings.gfx.cs = rctx->ws->cs_create(rctx->ws, RING_GFX, rscreen->trace_bo->cs_buf);
+ } else {
+ rctx->rings.gfx.cs = rctx->ws->cs_create(rctx->ws, RING_GFX, NULL);
+ }
rctx->rings.gfx.flush = r600_flush_gfx_ring;
rctx->ws->cs_set_flush_callback(rctx->rings.gfx.cs, r600_flush_from_winsys, rctx);
rctx->rings.gfx.flushing = false;
rctx->rings.dma.cs = NULL;
if (rscreen->info.r600_has_dma && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) {
- rctx->rings.dma.cs = rctx->ws->cs_create(rctx->ws, RING_DMA);
+ rctx->rings.dma.cs = rctx->ws->cs_create(rctx->ws, RING_DMA, NULL);
rctx->rings.dma.flush = r600_flush_dma_ring;
rctx->ws->cs_set_flush_callback(rctx->rings.dma.cs, r600_flush_dma_from_winsys, rctx);
rctx->rings.dma.flushing = false;
rscreen->ws->buffer_unmap(rscreen->fences.bo->cs_buf);
pipe_resource_reference((struct pipe_resource**)&rscreen->fences.bo, NULL);
}
-#if R600_TRACE_CS
if (rscreen->trace_bo) {
rscreen->ws->buffer_unmap(rscreen->trace_bo->cs_buf);
pipe_resource_reference((struct pipe_resource**)&rscreen->trace_bo, NULL);
}
-#endif
pipe_mutex_destroy(rscreen->fences.mutex);
rscreen->ws->destroy(rscreen->ws);
rscreen->global_pool = compute_memory_pool_new(rscreen);
-#if R600_TRACE_CS
rscreen->cs_count = 0;
- if (rscreen->info.drm_minor >= 28) {
+ if (rscreen->info.drm_minor >= 28 && (rscreen->debug_flags & DBG_TRACE_CS)) {
rscreen->trace_bo = (struct r600_resource*)pipe_buffer_create(&rscreen->screen,
PIPE_BIND_CUSTOM,
PIPE_USAGE_STAGING,
PIPE_TRANSFER_UNSYNCHRONIZED);
}
}
-#endif
/* Create the auxiliary context. */
pipe_mutex_init(rscreen->aux_context_lock);
#define R600_NUM_ATOMS 41
-#define R600_TRACE_CS 0
-
/* the number of CS dwords for flushing and drawing */
#define R600_MAX_FLUSH_CS_DWORDS 16
#define R600_MAX_DRAW_CS_DWORDS 34
/* logging */
#define DBG_TEX_DEPTH (1 << 0)
#define DBG_COMPUTE (1 << 1)
-#define DBG_VM (1 << 2)
+#define DBG_VM (1 << 2)
+#define DBG_TRACE_CS (1 << 3)
/* shaders */
#define DBG_FS (1 << 8)
#define DBG_VS (1 << 9)
* XXX: Not sure if this is the best place for global_pool. Also,
* it's not thread safe, so it won't work with multiple contexts. */
struct compute_memory_pool *global_pool;
-#if R600_TRACE_CS
struct r600_resource *trace_bo;
uint32_t *trace_ptr;
unsigned cs_count;
-#endif
r600g_dma_blit_t dma_blit;
/* Auxiliary context. Mainly used to initialize resources.
cs->cdw += cb->num_dw;
}
-#if R600_TRACE_CS
void r600_trace_emit(struct r600_context *rctx);
-#endif
static INLINE void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom)
{
atom->emit(rctx, atom);
atom->dirty = false;
-#if R600_TRACE_CS
if (rctx->screen->trace_bo) {
r600_trace_emit(rctx);
}
-#endif
}
static INLINE void r600_set_cso_state(struct r600_cso_state *state, void *cso)
(info.count_from_stream_output ? S_0287F0_USE_OPAQUE(1) : 0);
}
-#if R600_TRACE_CS
if (rctx->screen->trace_bo) {
r600_trace_emit(rctx);
}
-#endif
/* Set the depth buffer as dirty. */
if (rctx->framebuffer.state.zsbuf) {
rctx->context.draw_vbo = r600_draw_vbo;
}
-#if R600_TRACE_CS
void r600_trace_emit(struct r600_context *rctx)
{
struct r600_screen *rscreen = rctx->screen;
r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
r600_write_value(cs, reloc);
}
-#endif
while(dec->cs->cdw % 16)
pm4[dec->cs->cdw++] = RUVD_PKT2();
- dec->ws->cs_flush(dec->cs, 0);
+ dec->ws->cs_flush(dec->cs, 0, 0);
}
/* add a new set register command to the IB */
dec->set_dtb = set_dtb;
dec->stream_handle = alloc_stream_handle();
dec->ws = ws;
- dec->cs = ws->cs_create(ws, RING_UVD);
+ dec->cs = ws->cs_create(ws, RING_UVD, NULL);
if (!dec->cs) {
RUVD_ERR("Can't get command submission context.\n");
goto error;
#endif
/* Flush the CS. */
- ctx->ws->cs_flush(ctx->cs, flags);
+ ctx->ws->cs_flush(ctx->cs, flags, 0);
#if R600_TRACE_CS
if (ctx->screen->trace_bo) {
}
#endif
- rctx->ws->cs_flush(rctx->cs, RADEON_FLUSH_COMPUTE);
+ rctx->ws->cs_flush(rctx->cs, RADEON_FLUSH_COMPUTE, 0);
rctx->ws->buffer_wait(program->shader.bo->buf, 0);
FREE(pm4);
case TAHITI:
si_init_state_functions(rctx);
LIST_INITHEAD(&rctx->active_query_list);
- rctx->cs = rctx->ws->cs_create(rctx->ws, RING_GFX);
+ rctx->cs = rctx->ws->cs_create(rctx->ws, RING_GFX, NULL);
rctx->max_db = 8;
si_init_config(rctx);
break;
}
-static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws, enum ring_type ring_type)
+static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws,
+ enum ring_type ring_type,
+ struct radeon_winsys_cs_handle *trace_buf)
{
struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
struct radeon_drm_cs *cs;
pipe_semaphore_init(&cs->flush_completed, 0);
cs->ws = ws;
+ cs->trace_buf = (struct radeon_bo*)trace_buf;
if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
FREE(cs);
OUT_CS(&cs->base, index * RELOC_DWORDS);
}
-void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_cs_context *csc)
+void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc)
{
unsigned i;
}
}
-#if RADEON_CS_DUMP_ON_LOCKUP
- radeon_dump_cs_on_lockup(csc);
-#endif
+ if (cs->trace_buf) {
+ radeon_dump_cs_on_lockup(cs, csc);
+ }
for (i = 0; i < csc->crelocs; i++)
p_atomic_dec(&csc->relocs_bo[i]->num_active_ioctls);
DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", FALSE)
-static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags)
+static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags, uint32_t cs_trace_id)
{
struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
struct radeon_cs_context *tmp;
cs->csc = cs->cst;
cs->cst = tmp;
+ cs->cst->cs_trace_id = cs_trace_id;
+
/* If the CS is not empty or overflowed, emit it in a separate thread. */
if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) {
unsigned i, crelocs = cs->cst->crelocs;
}
}
pipe_mutex_unlock(cs->ws->cs_stack_lock);
- radeon_drm_cs_emit_ioctl_oneshot(cs->cst);
+ radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst);
}
} else {
radeon_cs_context_cleanup(cs->cst);
#include "radeon_drm_bo.h"
#include <radeon_drm.h>
-#define RADEON_CS_DUMP_ON_LOCKUP 0
-
struct radeon_cs_context {
uint32_t buf[RADEON_MAX_CMDBUF_DWORDS];
uint64_t chunk_array[3];
uint32_t flags[2];
+ uint32_t cs_trace_id;
+
/* Relocs. */
unsigned nrelocs;
unsigned crelocs;
int flush_started;
pipe_semaphore flush_completed;
+ struct radeon_bo *trace_buf;
};
int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo);
void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs);
void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws);
-void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_cs_context *csc);
+void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc);
-#if RADEON_CS_DUMP_ON_LOCKUP
-void radeon_dump_cs_on_lockup(struct radeon_cs_context *csc);
-#endif
+void radeon_dump_cs_on_lockup(struct radeon_drm_cs *cs, struct radeon_cs_context *csc);
#endif
#include "radeon_drm_cs.h"
#include "radeon_drm_bo.h"
-#if RADEON_CS_DUMP_ON_LOCKUP
-static bool dumped = false;
+#define RADEON_CS_DUMP_AFTER_MS_TIMEOUT 500
-void radeon_dump_cs_on_lockup(struct radeon_cs_context *csc)
+void radeon_dump_cs_on_lockup(struct radeon_drm_cs *cs, struct radeon_cs_context *csc)
{
struct drm_radeon_gem_busy args;
FILE *dump;
unsigned i, lockup;
uint32_t *ptr;
+ char fname[32];
/* only dump the first cs to cause a lockup */
- if (!csc->crelocs || dumped) {
+ if (!csc->crelocs) {
/* can not determine if there was a lockup if no bo were use by
* the cs and most likely in such case no lockup occurs
*/
memset(&args, 0, sizeof(args));
args.handle = csc->relocs_bo[0]->handle;
- for (i = 0; i < 10; i++) {
- usleep(5);
+ for (i = 0; i < RADEON_CS_DUMP_AFTER_MS_TIMEOUT; i++) {
+ usleep(1);
lockup = drmCommandWriteRead(csc->fd, DRM_RADEON_GEM_BUSY, &args, sizeof(args));
if (!lockup) {
break;
}
}
- if (!lockup || i < 10) {
+ if (!lockup || i < RADEON_CS_DUMP_AFTER_MS_TIMEOUT) {
+ return;
+ }
+
+ ptr = radeon_bo_do_map(cs->trace_buf);
+ fprintf(stderr, "timeout on cs lockup likely happen at cs 0x%08x dw 0x%08x\n", ptr[1], ptr[0]);
+
+ if (csc->cs_trace_id != ptr[1]) {
return;
}
/* ok we are most likely facing a lockup write the standalone replay file */
- dump = fopen("radeon_lockup.c", "w");
+ snprintf(fname, sizeof(fname), "rlockup_0x%08x.c", csc->cs_trace_id);
+ dump = fopen(fname, "w");
if (dump == NULL) {
return;
}
fprintf(dump, " * in same directory. You can find radeon_ctx.h in mesa tree :\n");
fprintf(dump, " * mesa/src/gallium/winsys/radeon/tools/radeon_ctx.h\n");
fprintf(dump, " * Build with :\n");
- fprintf(dump, " * gcc -O0 -g radeon_lockup.c -ldrm -o radeon_lockup -I/usr/include/libdrm\n");
+ fprintf(dump, " * gcc -O0 -g %s -ldrm -o rlockup_0x%08x -I/usr/include/libdrm\n", fname, csc->cs_trace_id);
fprintf(dump, " */\n");
+ fprintf(dump, " /* timeout on cs lockup likely happen at cs 0x%08x dw 0x%08x*/\n", ptr[1], ptr[0]);
fprintf(dump, "#include <stdio.h>\n");
fprintf(dump, "#include <stdint.h>\n");
fprintf(dump, "#include \"radeon_ctx.h\"\n");
}
fprintf(dump, "};\n\n");
+ fprintf(dump, "/* cs %d dw */\n", csc->chunks[0].length_dw);
fprintf(dump, "static uint32_t cs[] = {\n");
ptr = csc->buf;
for (i = 0; i < csc->chunks[0].length_dw; i++) {
fprintf(dump, " bo_wait(&ctx, bo[0]);\n");
fprintf(dump, "}\n");
fclose(dump);
- dumped = true;
}
-#endif
pipe_mutex_unlock(ws->cs_stack_lock);
if (cs) {
- radeon_drm_cs_emit_ioctl_oneshot(cs->cst);
+ radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst);
pipe_mutex_lock(ws->cs_stack_lock);
for (i = 1; i < p_atomic_read(&ws->ncs); i++) {
* Create a command stream.
*
* \param ws The winsys this function is called from.
+ * \param ring_type The ring type (GFX, DMA, UVD)
+ * \param trace_buf Trace buffer when tracing is enabled
*/
- struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys *ws, enum ring_type ring_type);
+ struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys *ws,
+ enum ring_type ring_type,
+ struct radeon_winsys_cs_handle *trace_buf);
/**
* Destroy a command stream.
/**
* Flush a command stream.
*
- * \param cs A command stream to flush.
- * \param flags, RADEON_FLUSH_ASYNC or 0.
+ * \param cs A command stream to flush.
+ * \param flags, RADEON_FLUSH_ASYNC or 0.
+ * \param cs_trace_id A unique identifiant for the cs
*/
- void (*cs_flush)(struct radeon_winsys_cs *cs, unsigned flags);
+ void (*cs_flush)(struct radeon_winsys_cs *cs, unsigned flags, uint32_t cs_trace_id);
/**
* Set a flush callback which is called from winsys when flush is