- if (sctx->screen->b.info.drm_major == 2 &&
- sctx->screen->b.info.drm_minor < 42)
- return; /* no radeon support */
-
- fprintf(f, "Memory-mapped registers:\n");
- si_dump_mmapped_reg(sctx, f, R_008010_GRBM_STATUS);
-
- /* No other registers can be read on DRM < 3.1.0. */
- if (sctx->screen->b.info.drm_major < 3 ||
- sctx->screen->b.info.drm_minor < 1) {
- fprintf(f, "\n");
- return;
- }
-
- si_dump_mmapped_reg(sctx, f, R_008008_GRBM_STATUS2);
- si_dump_mmapped_reg(sctx, f, R_008014_GRBM_STATUS_SE0);
- si_dump_mmapped_reg(sctx, f, R_008018_GRBM_STATUS_SE1);
- si_dump_mmapped_reg(sctx, f, R_008038_GRBM_STATUS_SE2);
- si_dump_mmapped_reg(sctx, f, R_00803C_GRBM_STATUS_SE3);
- si_dump_mmapped_reg(sctx, f, R_00D034_SDMA0_STATUS_REG);
- si_dump_mmapped_reg(sctx, f, R_00D834_SDMA1_STATUS_REG);
- si_dump_mmapped_reg(sctx, f, R_000E50_SRBM_STATUS);
- si_dump_mmapped_reg(sctx, f, R_000E4C_SRBM_STATUS2);
- si_dump_mmapped_reg(sctx, f, R_000E54_SRBM_STATUS3);
- si_dump_mmapped_reg(sctx, f, R_008680_CP_STAT);
- si_dump_mmapped_reg(sctx, f, R_008674_CP_STALLED_STAT1);
- si_dump_mmapped_reg(sctx, f, R_008678_CP_STALLED_STAT2);
- si_dump_mmapped_reg(sctx, f, R_008670_CP_STALLED_STAT3);
- si_dump_mmapped_reg(sctx, f, R_008210_CP_CPC_STATUS);
- si_dump_mmapped_reg(sctx, f, R_008214_CP_CPC_BUSY_STAT);
- si_dump_mmapped_reg(sctx, f, R_008218_CP_CPC_STALLED_STAT1);
- si_dump_mmapped_reg(sctx, f, R_00821C_CP_CPF_STATUS);
- si_dump_mmapped_reg(sctx, f, R_008220_CP_CPF_BUSY_STAT);
- si_dump_mmapped_reg(sctx, f, R_008224_CP_CPF_STALLED_STAT1);
- fprintf(f, "\n");
-}
-
-static void si_dump_last_ib(struct si_context *sctx, FILE *f)
-{
- int last_trace_id = -1;
-
- if (!sctx->last_gfx.ib)
- return;
-
- if (sctx->last_trace_buf) {
- /* We are expecting that the ddebug pipe has already
- * waited for the context, so this buffer should be idle.
- * If the GPU is hung, there is no point in waiting for it.
- */
- uint32_t *map = sctx->b.ws->buffer_map(sctx->last_trace_buf->buf,
- NULL,
- PIPE_TRANSFER_UNSYNCHRONIZED |
- PIPE_TRANSFER_READ);
- if (map)
- last_trace_id = *map;
- }
-
- if (sctx->init_config)
- ac_parse_ib(f, sctx->init_config->pm4, sctx->init_config->ndw,
- -1, "IB2: Init config", sctx->b.chip_class,
- NULL, NULL);
-
- if (sctx->init_config_gs_rings)
- ac_parse_ib(f, sctx->init_config_gs_rings->pm4,
- sctx->init_config_gs_rings->ndw,
- -1, "IB2: Init GS rings", sctx->b.chip_class,
- NULL, NULL);
-
- ac_parse_ib(f, sctx->last_gfx.ib, sctx->last_gfx.num_dw,
- last_trace_id, "IB", sctx->b.chip_class,
- NULL, NULL);
+ if (!sctx->screen->info.has_read_registers_query)
+ return;
+
+ fprintf(f, "Memory-mapped registers:\n");
+ si_dump_mmapped_reg(sctx, f, R_008010_GRBM_STATUS);
+
+ /* No other registers can be read on DRM < 3.1.0. */
+ if (!sctx->screen->info.is_amdgpu || sctx->screen->info.drm_minor < 1) {
+ fprintf(f, "\n");
+ return;
+ }
+
+ si_dump_mmapped_reg(sctx, f, R_008008_GRBM_STATUS2);
+ si_dump_mmapped_reg(sctx, f, R_008014_GRBM_STATUS_SE0);
+ si_dump_mmapped_reg(sctx, f, R_008018_GRBM_STATUS_SE1);
+ si_dump_mmapped_reg(sctx, f, R_008038_GRBM_STATUS_SE2);
+ si_dump_mmapped_reg(sctx, f, R_00803C_GRBM_STATUS_SE3);
+ si_dump_mmapped_reg(sctx, f, R_00D034_SDMA0_STATUS_REG);
+ si_dump_mmapped_reg(sctx, f, R_00D834_SDMA1_STATUS_REG);
+ if (sctx->chip_class <= GFX8) {
+ si_dump_mmapped_reg(sctx, f, R_000E50_SRBM_STATUS);
+ si_dump_mmapped_reg(sctx, f, R_000E4C_SRBM_STATUS2);
+ si_dump_mmapped_reg(sctx, f, R_000E54_SRBM_STATUS3);
+ }
+ si_dump_mmapped_reg(sctx, f, R_008680_CP_STAT);
+ si_dump_mmapped_reg(sctx, f, R_008674_CP_STALLED_STAT1);
+ si_dump_mmapped_reg(sctx, f, R_008678_CP_STALLED_STAT2);
+ si_dump_mmapped_reg(sctx, f, R_008670_CP_STALLED_STAT3);
+ si_dump_mmapped_reg(sctx, f, R_008210_CP_CPC_STATUS);
+ si_dump_mmapped_reg(sctx, f, R_008214_CP_CPC_BUSY_STAT);
+ si_dump_mmapped_reg(sctx, f, R_008218_CP_CPC_STALLED_STAT1);
+ si_dump_mmapped_reg(sctx, f, R_00821C_CP_CPF_STATUS);
+ si_dump_mmapped_reg(sctx, f, R_008220_CP_CPF_BUSY_STAT);
+ si_dump_mmapped_reg(sctx, f, R_008224_CP_CPF_STALLED_STAT1);
+ fprintf(f, "\n");
+}
+
+struct si_log_chunk_cs {
+ struct si_context *ctx;
+ struct si_saved_cs *cs;
+ bool dump_bo_list;
+ unsigned gfx_begin, gfx_end;
+ unsigned compute_begin, compute_end;
+};
+
+static void si_log_chunk_type_cs_destroy(void *data)
+{
+ struct si_log_chunk_cs *chunk = data;
+ si_saved_cs_reference(&chunk->cs, NULL);
+ free(chunk);
+}
+
+static void si_parse_current_ib(FILE *f, struct radeon_cmdbuf *cs, unsigned begin, unsigned end,
+ int *last_trace_id, unsigned trace_id_count, const char *name,
+ enum chip_class chip_class)
+{
+ unsigned orig_end = end;
+
+ assert(begin <= end);
+
+ fprintf(f, "------------------ %s begin (dw = %u) ------------------\n", name, begin);
+
+ for (unsigned prev_idx = 0; prev_idx < cs->num_prev; ++prev_idx) {
+ struct radeon_cmdbuf_chunk *chunk = &cs->prev[prev_idx];
+
+ if (begin < chunk->cdw) {
+ ac_parse_ib_chunk(f, chunk->buf + begin, MIN2(end, chunk->cdw) - begin, last_trace_id,
+ trace_id_count, chip_class, NULL, NULL);
+ }
+
+ if (end <= chunk->cdw)
+ return;
+
+ if (begin < chunk->cdw)
+ fprintf(f, "\n---------- Next %s Chunk ----------\n\n", name);
+
+ begin -= MIN2(begin, chunk->cdw);
+ end -= chunk->cdw;
+ }
+
+ assert(end <= cs->current.cdw);
+
+ ac_parse_ib_chunk(f, cs->current.buf + begin, end - begin, last_trace_id, trace_id_count,
+ chip_class, NULL, NULL);
+
+ fprintf(f, "------------------- %s end (dw = %u) -------------------\n\n", name, orig_end);
+}
+
+static void si_log_chunk_type_cs_print(void *data, FILE *f)
+{
+ struct si_log_chunk_cs *chunk = data;
+ struct si_context *ctx = chunk->ctx;
+ struct si_saved_cs *scs = chunk->cs;
+ int last_trace_id = -1;
+ int last_compute_trace_id = -1;
+
+ /* We are expecting that the ddebug pipe has already
+ * waited for the context, so this buffer should be idle.
+ * If the GPU is hung, there is no point in waiting for it.
+ */
+ uint32_t *map = ctx->ws->buffer_map(scs->trace_buf->buf, NULL,
+ PIPE_TRANSFER_UNSYNCHRONIZED | PIPE_TRANSFER_READ);
+ if (map) {
+ last_trace_id = map[0];
+ last_compute_trace_id = map[1];
+ }
+
+ if (chunk->gfx_end != chunk->gfx_begin) {
+ if (chunk->gfx_begin == 0) {
+ if (ctx->cs_preamble_state)
+ ac_parse_ib(f, ctx->cs_preamble_state->pm4, ctx->cs_preamble_state->ndw, NULL, 0,
+ "IB2: Init config", ctx->chip_class, NULL, NULL);
+
+ if (ctx->cs_preamble_gs_rings)
+ ac_parse_ib(f, ctx->cs_preamble_gs_rings->pm4, ctx->cs_preamble_gs_rings->ndw, NULL, 0,
+ "IB2: Init GS rings", ctx->chip_class, NULL, NULL);
+ }
+
+ if (scs->flushed) {
+ ac_parse_ib(f, scs->gfx.ib + chunk->gfx_begin, chunk->gfx_end - chunk->gfx_begin,
+ &last_trace_id, map ? 1 : 0, "IB", ctx->chip_class, NULL, NULL);
+ } else {
+ si_parse_current_ib(f, ctx->gfx_cs, chunk->gfx_begin, chunk->gfx_end, &last_trace_id,
+ map ? 1 : 0, "IB", ctx->chip_class);
+ }
+ }
+
+ if (chunk->compute_end != chunk->compute_begin) {
+ assert(ctx->prim_discard_compute_cs);
+
+ if (scs->flushed) {
+ ac_parse_ib(f, scs->compute.ib + chunk->compute_begin,
+ chunk->compute_end - chunk->compute_begin, &last_compute_trace_id, map ? 1 : 0,
+ "Compute IB", ctx->chip_class, NULL, NULL);
+ } else {
+ si_parse_current_ib(f, ctx->prim_discard_compute_cs, chunk->compute_begin,
+ chunk->compute_end, &last_compute_trace_id, map ? 1 : 0, "Compute IB",
+ ctx->chip_class);
+ }
+ }
+
+ if (chunk->dump_bo_list) {
+ fprintf(f, "Flushing. Time: ");
+ util_dump_ns(f, scs->time_flush);
+ fprintf(f, "\n\n");
+ si_dump_bo_list(ctx, &scs->gfx, f);
+ }
+}
+
+static const struct u_log_chunk_type si_log_chunk_type_cs = {
+ .destroy = si_log_chunk_type_cs_destroy,
+ .print = si_log_chunk_type_cs_print,
+};
+
+static void si_log_cs(struct si_context *ctx, struct u_log_context *log, bool dump_bo_list)
+{
+ assert(ctx->current_saved_cs);
+
+ struct si_saved_cs *scs = ctx->current_saved_cs;
+ unsigned gfx_cur = ctx->gfx_cs->prev_dw + ctx->gfx_cs->current.cdw;
+ unsigned compute_cur = 0;
+
+ if (ctx->prim_discard_compute_cs)
+ compute_cur =
+ ctx->prim_discard_compute_cs->prev_dw + ctx->prim_discard_compute_cs->current.cdw;
+
+ if (!dump_bo_list && gfx_cur == scs->gfx_last_dw && compute_cur == scs->compute_last_dw)
+ return;
+
+ struct si_log_chunk_cs *chunk = calloc(1, sizeof(*chunk));
+
+ chunk->ctx = ctx;
+ si_saved_cs_reference(&chunk->cs, scs);
+ chunk->dump_bo_list = dump_bo_list;
+
+ chunk->gfx_begin = scs->gfx_last_dw;
+ chunk->gfx_end = gfx_cur;
+ scs->gfx_last_dw = gfx_cur;
+
+ chunk->compute_begin = scs->compute_last_dw;
+ chunk->compute_end = compute_cur;
+ scs->compute_last_dw = compute_cur;
+
+ u_log_chunk(log, &si_log_chunk_type_cs, chunk);
+}
+
+void si_auto_log_cs(void *data, struct u_log_context *log)
+{
+ struct si_context *ctx = (struct si_context *)data;
+ si_log_cs(ctx, log, false);
+}
+
+void si_log_hw_flush(struct si_context *sctx)
+{
+ if (!sctx->log)
+ return;
+
+ si_log_cs(sctx, sctx->log, true);
+
+ if (&sctx->b == sctx->screen->aux_context) {
+ /* The aux context isn't captured by the ddebug wrapper,
+ * so we dump it on a flush-by-flush basis here.
+ */
+ FILE *f = dd_get_debug_file(false);
+ if (!f) {
+ fprintf(stderr, "radeonsi: error opening aux context dump file.\n");
+ } else {
+ dd_write_header(f, &sctx->screen->b, 0);
+
+ fprintf(f, "Aux context dump:\n\n");
+ u_log_new_page_print(sctx->log, f);
+
+ fclose(f);
+ }
+ }