#include "si_pipe.h"
#include "si_compute.h"
#include "sid.h"
-#include "gfx9d.h"
#include "sid_tables.h"
#include "driver_ddebug/dd_util.h"
#include "util/u_dump.h"
#include "util/u_memory.h"
#include "util/u_string.h"
#include "ac_debug.h"
+#include "ac_rtld.h"
static void si_dump_bo_list(struct si_context *sctx,
const struct radeon_saved_cs *saved, FILE *f);
static void si_dump_shader(struct si_screen *sscreen,
enum pipe_shader_type processor,
- const struct si_shader *shader, FILE *f)
+ struct si_shader *shader, FILE *f)
{
if (shader->shader_log)
fwrite(shader->shader_log, shader->shader_log_size, 1, f);
else
si_shader_dump(sscreen, shader, NULL, processor, f, false);
+
+ if (shader->bo && sscreen->options.dump_shader_binary) {
+ unsigned size = shader->bo->b.b.width0;
+ fprintf(f, "BO: VA=%"PRIx64" Size=%u\n", shader->bo->gpu_address, size);
+
+ const char *mapped = sscreen->ws->buffer_map(shader->bo->buf, NULL,
+ PIPE_TRANSFER_UNSYNCHRONIZED |
+ PIPE_TRANSFER_READ |
+ RADEON_TRANSFER_TEMPORARY);
+
+ for (unsigned i = 0; i < size; i += 4) {
+ fprintf(f, " %4x: %08x\n", i, *(uint32_t*)(mapped + i));
+ }
+
+ sscreen->ws->buffer_unmap(shader->bo->buf);
+
+ fprintf(f, "\n");
+ }
}
struct si_log_chunk_shader {
/**
* Shader compiles can be overridden with arbitrary ELF objects by setting
* the environment variable RADEON_REPLACE_SHADERS=num1:filename1[;num2:filename2]
+ *
+ * TODO: key this off some hash
*/
-bool si_replace_shader(unsigned num, struct ac_shader_binary *binary)
+bool si_replace_shader(unsigned num, struct si_shader_binary *binary)
{
const char *p = debug_get_option_replace_shaders();
const char *semicolon;
char *copy = NULL;
FILE *f;
long filesize, nread;
- char *buf = NULL;
bool replaced = false;
if (!p)
if (fseek(f, 0, SEEK_SET) != 0)
goto file_error;
- buf = MALLOC(filesize);
- if (!buf) {
+ binary->elf_buffer = MALLOC(filesize);
+ if (!binary->elf_buffer) {
fprintf(stderr, "out of memory\n");
goto out_close;
}
- nread = fread(buf, 1, filesize, f);
- if (nread != filesize)
+ nread = fread((void*)binary->elf_buffer, 1, filesize, f);
+ if (nread != filesize) {
+ FREE((void*)binary->elf_buffer);
+ binary->elf_buffer = NULL;
goto file_error;
+ }
- ac_elf_read(buf, filesize, binary);
+ binary->elf_size = nread;
replaced = true;
out_close:
fclose(f);
out_free:
- FREE(buf);
free(copy);
return replaced;
si_dump_mmapped_reg(sctx, f, R_008010_GRBM_STATUS);
/* No other registers can be read on DRM < 3.1.0. */
- if (sctx->screen->info.drm_major < 3 ||
+ if (!sctx->screen->info.is_amdgpu ||
sctx->screen->info.drm_minor < 1) {
fprintf(f, "\n");
return;
si_dump_mmapped_reg(sctx, f, R_00803C_GRBM_STATUS_SE3);
si_dump_mmapped_reg(sctx, f, R_00D034_SDMA0_STATUS_REG);
si_dump_mmapped_reg(sctx, f, R_00D834_SDMA1_STATUS_REG);
- if (sctx->chip_class <= VI) {
+ if (sctx->chip_class <= GFX8) {
si_dump_mmapped_reg(sctx, f, R_000E50_SRBM_STATUS);
si_dump_mmapped_reg(sctx, f, R_000E4C_SRBM_STATUS2);
si_dump_mmapped_reg(sctx, f, R_000E54_SRBM_STATUS3);
struct si_saved_cs *cs;
bool dump_bo_list;
unsigned gfx_begin, gfx_end;
+ unsigned compute_begin, compute_end;
};
static void si_log_chunk_type_cs_destroy(void *data)
struct si_context *ctx = chunk->ctx;
struct si_saved_cs *scs = chunk->cs;
int last_trace_id = -1;
+ int last_compute_trace_id = -1;
/* We are expecting that the ddebug pipe has already
* waited for the context, so this buffer should be idle.
NULL,
PIPE_TRANSFER_UNSYNCHRONIZED |
PIPE_TRANSFER_READ);
- if (map)
+ if (map) {
last_trace_id = map[0];
+ last_compute_trace_id = map[1];
+ }
if (chunk->gfx_end != chunk->gfx_begin) {
if (chunk->gfx_begin == 0) {
}
}
+ if (chunk->compute_end != chunk->compute_begin) {
+ assert(ctx->prim_discard_compute_cs);
+
+ if (scs->flushed) {
+ ac_parse_ib(f, scs->compute.ib + chunk->compute_begin,
+ chunk->compute_end - chunk->compute_begin,
+ &last_compute_trace_id, map ? 1 : 0, "Compute IB", ctx->chip_class,
+ NULL, NULL);
+ } else {
+ si_parse_current_ib(f, ctx->prim_discard_compute_cs, chunk->compute_begin,
+ chunk->compute_end, &last_compute_trace_id,
+ map ? 1 : 0, "Compute IB", ctx->chip_class);
+ }
+ }
+
if (chunk->dump_bo_list) {
fprintf(f, "Flushing. Time: ");
util_dump_ns(f, scs->time_flush);
struct si_saved_cs *scs = ctx->current_saved_cs;
unsigned gfx_cur = ctx->gfx_cs->prev_dw + ctx->gfx_cs->current.cdw;
+ unsigned compute_cur = 0;
+
+ if (ctx->prim_discard_compute_cs)
+ compute_cur = ctx->prim_discard_compute_cs->prev_dw + ctx->prim_discard_compute_cs->current.cdw;
if (!dump_bo_list &&
- gfx_cur == scs->gfx_last_dw)
+ gfx_cur == scs->gfx_last_dw &&
+ compute_cur == scs->compute_last_dw)
return;
struct si_log_chunk_cs *chunk = calloc(1, sizeof(*chunk));
chunk->gfx_end = gfx_cur;
scs->gfx_last_dw = gfx_cur;
+ chunk->compute_begin = scs->compute_last_dw;
+ chunk->compute_end = compute_cur;
+ scs->compute_last_dw = compute_cur;
+
u_log_chunk(log, &si_log_chunk_type_cs, chunk);
}
return;
si_log_cs(sctx, sctx->log, true);
+
+ if (&sctx->b == sctx->screen->aux_context) {
+ /* The aux context isn't captured by the ddebug wrapper,
+ * so we dump it on a flush-by-flush basis here.
+ */
+ FILE *f = dd_get_debug_file(false);
+ if (!f) {
+ fprintf(stderr, "radeonsi: error opening aux context dump file.\n");
+ } else {
+ dd_write_header(f, &sctx->screen->b, 0);
+
+ fprintf(f, "Aux context dump:\n\n");
+ u_log_new_page_print(sctx->log, f);
+
+ fclose(f);
+ }
+ }
}
static const char *priority_to_string(enum radeon_bo_priority priority)
si_log_chunk_desc_list_print(void *data, FILE *f)
{
struct si_log_chunk_desc_list *chunk = data;
+ unsigned sq_img_rsrc_word0 = chunk->chip_class >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0
+ : R_008F10_SQ_IMG_RSRC_WORD0;
for (unsigned i = 0; i < chunk->num_elements; i++) {
unsigned cpu_dw_offset = i * chunk->element_dw_size;
case 8:
for (unsigned j = 0; j < 8; j++)
ac_dump_reg(f, chunk->chip_class,
- R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
+ sq_img_rsrc_word0 + j*4,
gpu_list[j], 0xffffffff);
fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
case 16:
for (unsigned j = 0; j < 8; j++)
ac_dump_reg(f, chunk->chip_class,
- R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
+ sq_img_rsrc_word0 + j*4,
gpu_list[j], 0xffffffff);
fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n");
for (unsigned j = 0; j < 8; j++)
ac_dump_reg(f, chunk->chip_class,
- R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
+ sq_img_rsrc_word0 + j*4,
gpu_list[8+j], 0xffffffff);
fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n");
};
/**
- * Split a disassembly string into instructions and add them to the array
- * pointed to by \p instructions.
+ * Open the given \p binary as \p rtld_binary and split the contained
+ * disassembly string into instructions and add them to the array
+ * pointed to by \p instructions, which must be sufficiently large.
*
* Labels are considered to be part of the following instruction.
+ *
+ * The caller must keep \p rtld_binary alive as long as \p instructions are
+ * used and then close it afterwards.
*/
-static void si_add_split_disasm(const char *disasm,
+static void si_add_split_disasm(struct si_screen *screen,
+ struct ac_rtld_binary *rtld_binary,
+ struct si_shader_binary *binary,
uint64_t *addr,
unsigned *num,
struct si_shader_inst *instructions)
{
- const char *semicolon;
+ if (!ac_rtld_open(rtld_binary, (struct ac_rtld_open_info){
+ .info = &screen->info,
+ .num_parts = 1,
+ .elf_ptrs = &binary->elf_buffer,
+ .elf_sizes = &binary->elf_size }))
+ return;
+
+ const char *disasm;
+ size_t nbytes;
+ if (!ac_rtld_get_section_by_name(rtld_binary, ".AMDGPU.disasm",
+ &disasm, &nbytes))
+ return;
+
+ const char *end = disasm + nbytes;
+ while (disasm < end) {
+ const char *semicolon = memchr(disasm, ';', end - disasm);
+ if (!semicolon)
+ break;
- while ((semicolon = strchr(disasm, ';'))) {
struct si_shader_inst *inst = &instructions[(*num)++];
- const char *end = util_strchrnul(semicolon, '\n');
+ const char *inst_end = memchr(semicolon + 1, '\n', end - semicolon - 1);
+ if (!inst_end)
+ inst_end = end;
inst->text = disasm;
- inst->textlen = end - disasm;
+ inst->textlen = inst_end - disasm;
inst->addr = *addr;
/* More than 16 chars after ";" means the instruction is 8 bytes long. */
- inst->size = end - semicolon > 16 ? 8 : 4;
+ inst->size = inst_end - semicolon > 16 ? 8 : 4;
*addr += inst->size;
- if (!(*end))
+ if (inst_end == end)
break;
- disasm = end + 1;
+ disasm = inst_end + 1;
}
}
unsigned num_waves,
FILE *f)
{
- if (!shader || !shader->binary.disasm_string)
+ if (!shader)
return;
+ struct si_screen *screen = shader->selector->screen;
uint64_t start_addr = shader->bo->gpu_address;
uint64_t end_addr = start_addr + shader->bo->b.b.width0;
unsigned i;
*/
unsigned num_inst = 0;
uint64_t inst_addr = start_addr;
+ struct ac_rtld_binary rtld_binaries[5] = {};
struct si_shader_inst *instructions =
calloc(shader->bo->b.b.width0 / 4, sizeof(struct si_shader_inst));
if (shader->prolog) {
- si_add_split_disasm(shader->prolog->binary.disasm_string,
+ si_add_split_disasm(screen, &rtld_binaries[0], &shader->prolog->binary,
&inst_addr, &num_inst, instructions);
}
if (shader->previous_stage) {
- si_add_split_disasm(shader->previous_stage->binary.disasm_string,
+ si_add_split_disasm(screen, &rtld_binaries[1], &shader->previous_stage->binary,
&inst_addr, &num_inst, instructions);
}
if (shader->prolog2) {
- si_add_split_disasm(shader->prolog2->binary.disasm_string,
+ si_add_split_disasm(screen, &rtld_binaries[2], &shader->prolog2->binary,
&inst_addr, &num_inst, instructions);
}
- si_add_split_disasm(shader->binary.disasm_string,
+ si_add_split_disasm(screen, &rtld_binaries[3], &shader->binary,
&inst_addr, &num_inst, instructions);
if (shader->epilog) {
- si_add_split_disasm(shader->epilog->binary.disasm_string,
+ si_add_split_disasm(screen, &rtld_binaries[4], &shader->epilog->binary,
&inst_addr, &num_inst, instructions);
}
fprintf(f, "\n\n");
free(instructions);
+ for (unsigned i = 0; i < ARRAY_SIZE(rtld_binaries); ++i)
+ ac_rtld_close(&rtld_binaries[i]);
}
static void si_dump_annotated_shaders(struct si_context *sctx, FILE *f)