X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fradeonsi%2Fsi_debug.c;h=6eddfcbe8dcd74b918d21dfa0ec4ce6abdf2456a;hb=921c1d24d5ea17e5448c1f464416397e6fa9e418;hp=36cbb8866ed9dd219805be65dc65599a079dd811;hpb=a2e9d9b4c199ff1b53a625a345e566200016e2d9;p=mesa.git diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index 36cbb8866ed..6eddfcbe8dc 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -25,14 +25,15 @@ #include "si_pipe.h" #include "si_compute.h" #include "sid.h" -#include "gfx9d.h" #include "sid_tables.h" +#include "tgsi/tgsi_from_mesa.h" #include "driver_ddebug/dd_util.h" #include "util/u_dump.h" #include "util/u_log.h" #include "util/u_memory.h" #include "util/u_string.h" #include "ac_debug.h" +#include "ac_rtld.h" static void si_dump_bo_list(struct si_context *sctx, const struct radeon_saved_cs *saved, FILE *f); @@ -43,7 +44,7 @@ DEBUG_GET_ONCE_OPTION(replace_shaders, "RADEON_REPLACE_SHADERS", NULL) * Store a linearized copy of all chunks of \p cs together with the buffer * list in \p saved. */ -void si_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs, +void si_save_cs(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, struct radeon_saved_cs *saved, bool get_buffer_list) { uint32_t *buf; @@ -93,18 +94,35 @@ void si_clear_saved_cs(struct radeon_saved_cs *saved) void si_destroy_saved_cs(struct si_saved_cs *scs) { si_clear_saved_cs(&scs->gfx); - r600_resource_reference(&scs->trace_buf, NULL); + si_resource_reference(&scs->trace_buf, NULL); free(scs); } static void si_dump_shader(struct si_screen *sscreen, - enum pipe_shader_type processor, - const struct si_shader *shader, FILE *f) + struct si_shader *shader, FILE *f) { if (shader->shader_log) fwrite(shader->shader_log, shader->shader_log_size, 1, f); else - si_shader_dump(sscreen, shader, NULL, processor, f, false); + si_shader_dump(sscreen, shader, NULL, f, false); + + if (shader->bo && sscreen->options.dump_shader_binary) { + unsigned size = shader->bo->b.b.width0; + fprintf(f, "BO: VA=%"PRIx64" Size=%u\n", shader->bo->gpu_address, size); + + const char *mapped = sscreen->ws->buffer_map(shader->bo->buf, NULL, + PIPE_TRANSFER_UNSYNCHRONIZED | + PIPE_TRANSFER_READ | + RADEON_TRANSFER_TEMPORARY); + + for (unsigned i = 0; i < size; i += 4) { + fprintf(f, " %4x: %08x\n", i, *(uint32_t*)(mapped + i)); + } + + sscreen->ws->buffer_unmap(shader->bo->buf); + + fprintf(f, "\n"); + } } struct si_log_chunk_shader { @@ -118,7 +136,6 @@ struct si_log_chunk_shader { */ struct si_context *ctx; struct si_shader *shader; - enum pipe_shader_type processor; /* For keep-alive reference counts */ struct si_shader_selector *sel; @@ -139,8 +156,7 @@ si_log_chunk_shader_print(void *data, FILE *f) { struct si_log_chunk_shader *chunk = data; struct si_screen *sscreen = chunk->ctx->screen; - si_dump_shader(sscreen, chunk->processor, - chunk->shader, f); + si_dump_shader(sscreen, chunk->shader, f); } static struct u_log_chunk_type si_log_chunk_type_shader = { @@ -159,7 +175,6 @@ static void si_dump_gfx_shader(struct si_context *ctx, struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader); chunk->ctx = ctx; - chunk->processor = state->cso->info.processor; chunk->shader = current; si_shader_selector_reference(ctx, &chunk->sel, current->selector); u_log_chunk(log, &si_log_chunk_type_shader, chunk); @@ -175,7 +190,6 @@ static void si_dump_compute_shader(struct si_context *ctx, struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader); chunk->ctx = ctx; - chunk->processor = PIPE_SHADER_COMPUTE; chunk->shader = &state->program->shader; si_compute_reference(&chunk->program, state->program); u_log_chunk(log, &si_log_chunk_type_shader, chunk); @@ -184,15 +198,16 @@ static void si_dump_compute_shader(struct si_context *ctx, /** * Shader compiles can be overridden with arbitrary ELF objects by setting * the environment variable RADEON_REPLACE_SHADERS=num1:filename1[;num2:filename2] + * + * TODO: key this off some hash */ -bool si_replace_shader(unsigned num, struct ac_shader_binary *binary) +bool si_replace_shader(unsigned num, struct si_shader_binary *binary) { const char *p = debug_get_option_replace_shaders(); const char *semicolon; char *copy = NULL; FILE *f; long filesize, nread; - char *buf = NULL; bool replaced = false; if (!p) @@ -248,23 +263,25 @@ bool si_replace_shader(unsigned num, struct ac_shader_binary *binary) if (fseek(f, 0, SEEK_SET) != 0) goto file_error; - buf = MALLOC(filesize); - if (!buf) { + binary->elf_buffer = MALLOC(filesize); + if (!binary->elf_buffer) { fprintf(stderr, "out of memory\n"); goto out_close; } - nread = fread(buf, 1, filesize, f); - if (nread != filesize) + nread = fread((void*)binary->elf_buffer, 1, filesize, f); + if (nread != filesize) { + FREE((void*)binary->elf_buffer); + binary->elf_buffer = NULL; goto file_error; + } - ac_elf_read(buf, filesize, binary); + binary->elf_size = nread; replaced = true; out_close: fclose(f); out_free: - FREE(buf); free(copy); return replaced; @@ -301,7 +318,7 @@ static void si_dump_debug_registers(struct si_context *sctx, FILE *f) si_dump_mmapped_reg(sctx, f, R_008010_GRBM_STATUS); /* No other registers can be read on DRM < 3.1.0. */ - if (sctx->screen->info.drm_major < 3 || + if (!sctx->screen->info.is_amdgpu || sctx->screen->info.drm_minor < 1) { fprintf(f, "\n"); return; @@ -314,7 +331,7 @@ static void si_dump_debug_registers(struct si_context *sctx, FILE *f) si_dump_mmapped_reg(sctx, f, R_00803C_GRBM_STATUS_SE3); si_dump_mmapped_reg(sctx, f, R_00D034_SDMA0_STATUS_REG); si_dump_mmapped_reg(sctx, f, R_00D834_SDMA1_STATUS_REG); - if (sctx->chip_class <= VI) { + if (sctx->chip_class <= GFX8) { si_dump_mmapped_reg(sctx, f, R_000E50_SRBM_STATUS); si_dump_mmapped_reg(sctx, f, R_000E4C_SRBM_STATUS2); si_dump_mmapped_reg(sctx, f, R_000E54_SRBM_STATUS3); @@ -337,6 +354,7 @@ struct si_log_chunk_cs { struct si_saved_cs *cs; bool dump_bo_list; unsigned gfx_begin, gfx_end; + unsigned compute_begin, compute_end; }; static void si_log_chunk_type_cs_destroy(void *data) @@ -346,7 +364,7 @@ static void si_log_chunk_type_cs_destroy(void *data) free(chunk); } -static void si_parse_current_ib(FILE *f, struct radeon_winsys_cs *cs, +static void si_parse_current_ib(FILE *f, struct radeon_cmdbuf *cs, unsigned begin, unsigned end, int *last_trace_id, unsigned trace_id_count, const char *name, enum chip_class chip_class) @@ -359,7 +377,7 @@ static void si_parse_current_ib(FILE *f, struct radeon_winsys_cs *cs, name, begin); for (unsigned prev_idx = 0; prev_idx < cs->num_prev; ++prev_idx) { - struct radeon_winsys_cs_chunk *chunk = &cs->prev[prev_idx]; + struct radeon_cmdbuf_chunk *chunk = &cs->prev[prev_idx]; if (begin < chunk->cdw) { ac_parse_ib_chunk(f, chunk->buf + begin, @@ -394,6 +412,7 @@ static void si_log_chunk_type_cs_print(void *data, FILE *f) struct si_context *ctx = chunk->ctx; struct si_saved_cs *scs = chunk->cs; int last_trace_id = -1; + int last_compute_trace_id = -1; /* We are expecting that the ddebug pipe has already * waited for the context, so this buffer should be idle. @@ -403,8 +422,10 @@ static void si_log_chunk_type_cs_print(void *data, FILE *f) NULL, PIPE_TRANSFER_UNSYNCHRONIZED | PIPE_TRANSFER_READ); - if (map) + if (map) { last_trace_id = map[0]; + last_compute_trace_id = map[1]; + } if (chunk->gfx_end != chunk->gfx_begin) { if (chunk->gfx_begin == 0) { @@ -432,6 +453,21 @@ static void si_log_chunk_type_cs_print(void *data, FILE *f) } } + if (chunk->compute_end != chunk->compute_begin) { + assert(ctx->prim_discard_compute_cs); + + if (scs->flushed) { + ac_parse_ib(f, scs->compute.ib + chunk->compute_begin, + chunk->compute_end - chunk->compute_begin, + &last_compute_trace_id, map ? 1 : 0, "Compute IB", ctx->chip_class, + NULL, NULL); + } else { + si_parse_current_ib(f, ctx->prim_discard_compute_cs, chunk->compute_begin, + chunk->compute_end, &last_compute_trace_id, + map ? 1 : 0, "Compute IB", ctx->chip_class); + } + } + if (chunk->dump_bo_list) { fprintf(f, "Flushing. Time: "); util_dump_ns(f, scs->time_flush); @@ -452,9 +488,14 @@ static void si_log_cs(struct si_context *ctx, struct u_log_context *log, struct si_saved_cs *scs = ctx->current_saved_cs; unsigned gfx_cur = ctx->gfx_cs->prev_dw + ctx->gfx_cs->current.cdw; + unsigned compute_cur = 0; + + if (ctx->prim_discard_compute_cs) + compute_cur = ctx->prim_discard_compute_cs->prev_dw + ctx->prim_discard_compute_cs->current.cdw; if (!dump_bo_list && - gfx_cur == scs->gfx_last_dw) + gfx_cur == scs->gfx_last_dw && + compute_cur == scs->compute_last_dw) return; struct si_log_chunk_cs *chunk = calloc(1, sizeof(*chunk)); @@ -467,6 +508,10 @@ static void si_log_cs(struct si_context *ctx, struct u_log_context *log, chunk->gfx_end = gfx_cur; scs->gfx_last_dw = gfx_cur; + chunk->compute_begin = scs->compute_last_dw; + chunk->compute_end = compute_cur; + scs->compute_last_dw = compute_cur; + u_log_chunk(log, &si_log_chunk_type_cs, chunk); } @@ -482,6 +527,23 @@ void si_log_hw_flush(struct si_context *sctx) return; si_log_cs(sctx, sctx->log, true); + + if (&sctx->b == sctx->screen->aux_context) { + /* The aux context isn't captured by the ddebug wrapper, + * so we dump it on a flush-by-flush basis here. + */ + FILE *f = dd_get_debug_file(false); + if (!f) { + fprintf(stderr, "radeonsi: error opening aux context dump file.\n"); + } else { + dd_write_header(f, &sctx->screen->b, 0); + + fprintf(f, "Aux context dump:\n\n"); + u_log_new_page_print(sctx->log, f); + + fclose(f); + } + } } static const char *priority_to_string(enum radeon_bo_priority priority) @@ -496,10 +558,6 @@ static const char *priority_to_string(enum radeon_bo_priority priority) ITEM(IB2), ITEM(DRAW_INDIRECT), ITEM(INDEX_BUFFER), - ITEM(VCE), - ITEM(UVD), - ITEM(SDMA_BUFFER), - ITEM(SDMA_TEXTURE), ITEM(CP_DMA), ITEM(CONST_BUFFER), ITEM(DESCRIPTORS), @@ -515,9 +573,7 @@ static const char *priority_to_string(enum radeon_bo_priority priority) ITEM(DEPTH_BUFFER), ITEM(COLOR_BUFFER_MSAA), ITEM(DEPTH_BUFFER_MSAA), - ITEM(CMASK), - ITEM(DCC), - ITEM(HTILE), + ITEM(SEPARATE_META), ITEM(SHADER_BINARY), ITEM(SHADER_RINGS), ITEM(SCRATCH_BUFFER), @@ -574,8 +630,8 @@ static void si_dump_bo_list(struct si_context *sctx, size / page_size, va / page_size, (va + size) / page_size); /* Print the usage. */ - for (j = 0; j < 64; j++) { - if (!(saved->bo_list[i].priority_usage & (1ull << j))) + for (j = 0; j < 32; j++) { + if (!(saved->bo_list[i].priority_usage & (1u << j))) continue; fprintf(f, "%s%s", !hit ? "" : ", ", priority_to_string(j)); @@ -590,23 +646,23 @@ static void si_dump_bo_list(struct si_context *sctx, static void si_dump_framebuffer(struct si_context *sctx, struct u_log_context *log) { struct pipe_framebuffer_state *state = &sctx->framebuffer.state; - struct r600_texture *rtex; + struct si_texture *tex; int i; for (i = 0; i < state->nr_cbufs; i++) { if (!state->cbufs[i]) continue; - rtex = (struct r600_texture*)state->cbufs[i]->texture; + tex = (struct si_texture*)state->cbufs[i]->texture; u_log_printf(log, COLOR_YELLOW "Color buffer %i:" COLOR_RESET "\n", i); - si_print_texture_info(sctx->screen, rtex, log); + si_print_texture_info(sctx->screen, tex, log); u_log_printf(log, "\n"); } if (state->zsbuf) { - rtex = (struct r600_texture*)state->zsbuf->texture; + tex = (struct si_texture*)state->zsbuf->texture; u_log_printf(log, COLOR_YELLOW "Depth-stencil buffer:" COLOR_RESET "\n"); - si_print_texture_info(sctx->screen, rtex, log); + si_print_texture_info(sctx->screen, tex, log); u_log_printf(log, "\n"); } } @@ -618,7 +674,7 @@ struct si_log_chunk_desc_list { uint32_t *gpu_list; /** Reference of buffer where the list is uploaded, so that gpu_list * is kept live. */ - struct r600_resource *buf; + struct si_resource *buf; const char *shader_name; const char *elem_name; @@ -634,7 +690,7 @@ static void si_log_chunk_desc_list_destroy(void *data) { struct si_log_chunk_desc_list *chunk = data; - r600_resource_reference(&chunk->buf, NULL); + si_resource_reference(&chunk->buf, NULL); FREE(chunk); } @@ -642,6 +698,8 @@ static void si_log_chunk_desc_list_print(void *data, FILE *f) { struct si_log_chunk_desc_list *chunk = data; + unsigned sq_img_rsrc_word0 = chunk->chip_class >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0 + : R_008F10_SQ_IMG_RSRC_WORD0; for (unsigned i = 0; i < chunk->num_elements; i++) { unsigned cpu_dw_offset = i * chunk->element_dw_size; @@ -663,7 +721,7 @@ si_log_chunk_desc_list_print(void *data, FILE *f) case 8: for (unsigned j = 0; j < 8; j++) ac_dump_reg(f, chunk->chip_class, - R_008F10_SQ_IMG_RSRC_WORD0 + j*4, + sq_img_rsrc_word0 + j*4, gpu_list[j], 0xffffffff); fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n"); @@ -675,7 +733,7 @@ si_log_chunk_desc_list_print(void *data, FILE *f) case 16: for (unsigned j = 0; j < 8; j++) ac_dump_reg(f, chunk->chip_class, - R_008F10_SQ_IMG_RSRC_WORD0 + j*4, + sq_img_rsrc_word0 + j*4, gpu_list[j], 0xffffffff); fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n"); @@ -687,7 +745,7 @@ si_log_chunk_desc_list_print(void *data, FILE *f) fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n"); for (unsigned j = 0; j < 8; j++) ac_dump_reg(f, chunk->chip_class, - R_008F10_SQ_IMG_RSRC_WORD0 + j*4, + sq_img_rsrc_word0 + j*4, gpu_list[8+j], 0xffffffff); fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n"); @@ -753,7 +811,7 @@ static void si_dump_descriptor_list(struct si_screen *screen, chunk->slot_remap = slot_remap; chunk->chip_class = screen->info.chip_class; - r600_resource_reference(&chunk->buf, desc->buffer); + si_resource_reference(&chunk->buf, desc->buffer); chunk->gpu_list = desc->gpu_list; for (unsigned i = 0; i < num_elements; ++i) { @@ -866,33 +924,60 @@ struct si_shader_inst { }; /** - * Split a disassembly string into instructions and add them to the array - * pointed to by \p instructions. + * Open the given \p binary as \p rtld_binary and split the contained + * disassembly string into instructions and add them to the array + * pointed to by \p instructions, which must be sufficiently large. * * Labels are considered to be part of the following instruction. + * + * The caller must keep \p rtld_binary alive as long as \p instructions are + * used and then close it afterwards. */ -static void si_add_split_disasm(const char *disasm, +static void si_add_split_disasm(struct si_screen *screen, + struct ac_rtld_binary *rtld_binary, + struct si_shader_binary *binary, uint64_t *addr, unsigned *num, - struct si_shader_inst *instructions) + struct si_shader_inst *instructions, + enum pipe_shader_type shader_type) { - const char *semicolon; + if (!ac_rtld_open(rtld_binary, (struct ac_rtld_open_info){ + .info = &screen->info, + .shader_type = tgsi_processor_to_shader_stage(shader_type), + .wave_size = 64, + .num_parts = 1, + .elf_ptrs = &binary->elf_buffer, + .elf_sizes = &binary->elf_size })) + return; + + const char *disasm; + size_t nbytes; + if (!ac_rtld_get_section_by_name(rtld_binary, ".AMDGPU.disasm", + &disasm, &nbytes)) + return; + + const char *end = disasm + nbytes; + while (disasm < end) { + const char *semicolon = memchr(disasm, ';', end - disasm); + if (!semicolon) + break; - while ((semicolon = strchr(disasm, ';'))) { struct si_shader_inst *inst = &instructions[(*num)++]; - const char *end = util_strchrnul(semicolon, '\n'); + const char *inst_end = memchr(semicolon + 1, '\n', end - semicolon - 1); + if (!inst_end) + inst_end = end; inst->text = disasm; - inst->textlen = end - disasm; + inst->textlen = inst_end - disasm; inst->addr = *addr; /* More than 16 chars after ";" means the instruction is 8 bytes long. */ - inst->size = end - semicolon > 16 ? 8 : 4; + inst->size = inst_end - semicolon > 16 ? 8 : 4; *addr += inst->size; - if (!(*end)) + if (inst_end == end) break; - disasm = end + 1; + disasm = inst_end + 1; } } @@ -905,9 +990,11 @@ static void si_print_annotated_shader(struct si_shader *shader, unsigned num_waves, FILE *f) { - if (!shader || !shader->binary.disasm_string) + if (!shader) return; + struct si_screen *screen = shader->selector->screen; + enum pipe_shader_type shader_type = shader->selector->type; uint64_t start_addr = shader->bo->gpu_address; uint64_t end_addr = start_addr + shader->bo->b.b.width0; unsigned i; @@ -929,30 +1016,31 @@ static void si_print_annotated_shader(struct si_shader *shader, */ unsigned num_inst = 0; uint64_t inst_addr = start_addr; + struct ac_rtld_binary rtld_binaries[5] = {}; struct si_shader_inst *instructions = calloc(shader->bo->b.b.width0 / 4, sizeof(struct si_shader_inst)); if (shader->prolog) { - si_add_split_disasm(shader->prolog->binary.disasm_string, - &inst_addr, &num_inst, instructions); + si_add_split_disasm(screen, &rtld_binaries[0], &shader->prolog->binary, + &inst_addr, &num_inst, instructions, shader_type); } if (shader->previous_stage) { - si_add_split_disasm(shader->previous_stage->binary.disasm_string, - &inst_addr, &num_inst, instructions); + si_add_split_disasm(screen, &rtld_binaries[1], &shader->previous_stage->binary, + &inst_addr, &num_inst, instructions, shader_type); } if (shader->prolog2) { - si_add_split_disasm(shader->prolog2->binary.disasm_string, - &inst_addr, &num_inst, instructions); + si_add_split_disasm(screen, &rtld_binaries[2], &shader->prolog2->binary, + &inst_addr, &num_inst, instructions, shader_type); } - si_add_split_disasm(shader->binary.disasm_string, - &inst_addr, &num_inst, instructions); + si_add_split_disasm(screen, &rtld_binaries[3], &shader->binary, + &inst_addr, &num_inst, instructions, shader_type); if (shader->epilog) { - si_add_split_disasm(shader->epilog->binary.disasm_string, - &inst_addr, &num_inst, instructions); + si_add_split_disasm(screen, &rtld_binaries[4], &shader->epilog->binary, + &inst_addr, &num_inst, instructions, shader_type); } fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n", - si_get_shader_name(shader, shader->selector->type)); + si_get_shader_name(shader)); /* Print instructions with annotations. */ for (i = 0; i < num_inst; i++) { @@ -985,12 +1073,14 @@ static void si_print_annotated_shader(struct si_shader *shader, fprintf(f, "\n\n"); free(instructions); + for (unsigned i = 0; i < ARRAY_SIZE(rtld_binaries); ++i) + ac_rtld_close(&rtld_binaries[i]); } static void si_dump_annotated_shaders(struct si_context *sctx, FILE *f) { struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP]; - unsigned num_waves = ac_get_wave_info(waves); + unsigned num_waves = ac_get_wave_info(sctx->chip_class, waves); fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves); @@ -1058,23 +1148,30 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f, void si_log_draw_state(struct si_context *sctx, struct u_log_context *log) { + struct si_shader_ctx_state *tcs_shader; + if (!log) return; + tcs_shader = &sctx->tcs_shader; + if (sctx->tes_shader.cso && !sctx->tcs_shader.cso) + tcs_shader = &sctx->fixed_func_tcs_shader; + si_dump_framebuffer(sctx, log); si_dump_gfx_shader(sctx, &sctx->vs_shader, log); - si_dump_gfx_shader(sctx, &sctx->tcs_shader, log); + si_dump_gfx_shader(sctx, tcs_shader, log); si_dump_gfx_shader(sctx, &sctx->tes_shader, log); si_dump_gfx_shader(sctx, &sctx->gs_shader, log); si_dump_gfx_shader(sctx, &sctx->ps_shader, log); si_dump_descriptor_list(sctx->screen, &sctx->descriptors[SI_DESCS_RW_BUFFERS], - "", "RW buffers", 4, SI_NUM_RW_BUFFERS, + "", "RW buffers", 4, + sctx->descriptors[SI_DESCS_RW_BUFFERS].num_active_slots, si_identity, log); si_dump_gfx_descriptors(sctx, &sctx->vs_shader, log); - si_dump_gfx_descriptors(sctx, &sctx->tcs_shader, log); + si_dump_gfx_descriptors(sctx, tcs_shader, log); si_dump_gfx_descriptors(sctx, &sctx->tes_shader, log); si_dump_gfx_descriptors(sctx, &sctx->gs_shader, log); si_dump_gfx_descriptors(sctx, &sctx->ps_shader, log);