#include "r600d.h"
#include "util/u_memory.h"
#include <errno.h>
+#include <unistd.h>
/* Get backends mask */
void r600_get_backend_mask(struct r600_context *ctx)
for (i = 0; i < R600_NUM_ATOMS; i++) {
if (ctx->atoms[i] && ctx->atoms[i]->dirty) {
num_dw += ctx->atoms[i]->num_dw;
+#if R600_TRACE_CS
+ if (ctx->screen->trace_bo) {
+ num_dw += R600_TRACE_CS_DWORDS;
+ }
+#endif
}
}
/* The upper-bound of how much space a draw command would take. */
num_dw += R600_MAX_FLUSH_CS_DWORDS + R600_MAX_DRAW_CS_DWORDS;
+#if R600_TRACE_CS
+ if (ctx->screen->trace_bo) {
+ num_dw += R600_TRACE_CS_DWORDS;
+ }
+#endif
}
/* Count in queries_suspend. */
}
/* Flush the CS. */
+#if R600_TRACE_CS
+ if (ctx->screen->trace_bo) {
+ struct r600_screen *rscreen = ctx->screen;
+ unsigned i;
+
+ for (i = 0; i < cs->cdw; i++) {
+ fprintf(stderr, "[%4d] [%5d] 0x%08x\n", rscreen->cs_count, i, cs->buf[i]);
+ }
+ rscreen->cs_count++;
+ }
+#endif
ctx->ws->cs_flush(ctx->cs, flags);
+#if R600_TRACE_CS
+ if (ctx->screen->trace_bo) {
+ struct r600_screen *rscreen = ctx->screen;
+ unsigned i;
+
+ for (i = 0; i < 10; i++) {
+ usleep(5);
+ if (!ctx->ws->buffer_is_busy(rscreen->trace_bo->buf, RADEON_USAGE_READWRITE)) {
+ break;
+ }
+ }
+ if (i == 10) {
+ fprintf(stderr, "timeout on cs lockup likely happen at cs %d dw %d\n",
+ rscreen->trace_ptr[1], rscreen->trace_ptr[0]);
+ } else {
+ fprintf(stderr, "cs %d executed in %dms\n", rscreen->trace_ptr[1], i * 5);
+ }
+ }
+#endif
r600_begin_new_cs(ctx);
}
#include "r600_pipe.h"
/* the number of CS dwords for flushing and drawing */
-#define R600_MAX_FLUSH_CS_DWORDS 12
-#define R600_MAX_DRAW_CS_DWORDS 34
+#define R600_MAX_FLUSH_CS_DWORDS 12
+#define R600_MAX_DRAW_CS_DWORDS 34
+#define R600_TRACE_CS_DWORDS 7
/* these flags are used in register flags and added into block flags */
#define REG_FLAG_NEED_BO 1
rscreen->ws->buffer_unmap(rscreen->fences.bo->cs_buf);
pipe_resource_reference((struct pipe_resource**)&rscreen->fences.bo, NULL);
}
+#if R600_TRACE_CS
+ if (rscreen->trace_bo) {
+ rscreen->ws->buffer_unmap(rscreen->trace_bo->cs_buf);
+ pipe_resource_reference((struct pipe_resource**)&rscreen->trace_bo, NULL);
+ }
+#endif
pipe_mutex_destroy(rscreen->fences.mutex);
rscreen->ws->destroy(rscreen->ws);
rscreen->global_pool = compute_memory_pool_new(rscreen);
+#if R600_TRACE_CS
+ rscreen->cs_count = 0;
+ if (rscreen->info.drm_minor >= 28) {
+ rscreen->trace_bo = (struct r600_resource*)pipe_buffer_create(&rscreen->screen,
+ PIPE_BIND_CUSTOM,
+ PIPE_USAGE_STAGING,
+ 4096);
+ if (rscreen->trace_bo) {
+ rscreen->trace_ptr = rscreen->ws->buffer_map(rscreen->trace_bo->cs_buf, NULL,
+ PIPE_TRANSFER_UNSYNCHRONIZED);
+ }
+ }
+#endif
+
return &rscreen->screen;
}
#define R600_NUM_ATOMS 37
+#define R600_TRACE_CS 0
+
#define R600_MAX_USER_CONST_BUFFERS 1
#define R600_MAX_DRIVER_CONST_BUFFERS 2
#define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + R600_MAX_DRIVER_CONST_BUFFERS)
* XXX: Not sure if this is the best place for global_pool. Also,
* it's not thread safe, so it won't work with multiple contexts. */
struct compute_memory_pool *global_pool;
+#if R600_TRACE_CS
+ struct r600_resource *trace_bo;
+ uint32_t *trace_ptr;
+ unsigned cs_count;
+#endif
};
struct r600_pipe_sampler_view {
cs->cdw += cb->num_dw;
}
+#if R600_TRACE_CS
+void r600_trace_emit(struct r600_context *rctx);
+#endif
+
static INLINE void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom)
{
atom->emit(rctx, atom);
atom->dirty = false;
+#if R600_TRACE_CS
+ if (rctx->screen->trace_bo) {
+ r600_trace_emit(rctx);
+ }
+#endif
}
static INLINE void r600_set_cso_state(struct r600_cso_state *state, void *cso)
(info.count_from_stream_output ? S_0287F0_USE_OPAQUE(1) : 0);
}
+#if R600_TRACE_CS
+ if (rctx->screen->trace_bo) {
+ r600_trace_emit(rctx);
+ }
+#endif
+
/* Set the depth buffer as dirty. */
if (rctx->framebuffer.state.zsbuf) {
struct pipe_surface *surf = rctx->framebuffer.state.zsbuf;
rctx->context.set_stream_output_targets = r600_set_so_targets;
rctx->context.draw_vbo = r600_draw_vbo;
}
+
+#if R600_TRACE_CS
+void r600_trace_emit(struct r600_context *rctx)
+{
+ struct r600_screen *rscreen = rctx->screen;
+ struct radeon_winsys_cs *cs = rctx->cs;
+ uint64_t va;
+ uint32_t reloc;
+
+ va = r600_resource_va(&rscreen->screen, (void*)rscreen->trace_bo);
+ reloc = r600_context_bo_reloc(rctx, rscreen->trace_bo, RADEON_USAGE_READWRITE);
+ r600_write_value(cs, PKT3(PKT3_MEM_WRITE, 3, 0));
+ r600_write_value(cs, va & 0xFFFFFFFFUL);
+ r600_write_value(cs, (va >> 32UL) & 0xFFUL);
+ r600_write_value(cs, cs->cdw);
+ r600_write_value(cs, rscreen->cs_count);
+ r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
+ r600_write_value(cs, reloc);
+}
+#endif