#include "si_pipe.h"
#include "si_compute.h"
#include "sid.h"
-#include "gfx9d.h"
#include "sid_tables.h"
#include "driver_ddebug/dd_util.h"
#include "util/u_dump.h"
#include "util/u_memory.h"
#include "util/u_string.h"
#include "ac_debug.h"
+#include "ac_rtld.h"
static void si_dump_bo_list(struct si_context *sctx,
const struct radeon_saved_cs *saved, FILE *f);
static void si_dump_shader(struct si_screen *sscreen,
enum pipe_shader_type processor,
- const struct si_shader *shader, FILE *f)
+ struct si_shader *shader, FILE *f)
{
if (shader->shader_log)
fwrite(shader->shader_log, shader->shader_log_size, 1, f);
else
si_shader_dump(sscreen, shader, NULL, processor, f, false);
+
+ if (shader->bo && sscreen->options.dump_shader_binary) {
+ unsigned size = shader->bo->b.b.width0;
+ fprintf(f, "BO: VA=%"PRIx64" Size=%u\n", shader->bo->gpu_address, size);
+
+ const char *mapped = sscreen->ws->buffer_map(shader->bo->buf, NULL,
+ PIPE_TRANSFER_UNSYNCHRONIZED |
+ PIPE_TRANSFER_READ |
+ RADEON_TRANSFER_TEMPORARY);
+
+ for (unsigned i = 0; i < size; i += 4) {
+ fprintf(f, " %4x: %08x\n", i, *(uint32_t*)(mapped + i));
+ }
+
+ sscreen->ws->buffer_unmap(shader->bo->buf);
+
+ fprintf(f, "\n");
+ }
}
struct si_log_chunk_shader {
/**
* Shader compiles can be overridden with arbitrary ELF objects by setting
* the environment variable RADEON_REPLACE_SHADERS=num1:filename1[;num2:filename2]
+ *
+ * TODO: key this off some hash
*/
-bool si_replace_shader(unsigned num, struct ac_shader_binary *binary)
+bool si_replace_shader(unsigned num, struct si_shader_binary *binary)
{
const char *p = debug_get_option_replace_shaders();
const char *semicolon;
char *copy = NULL;
FILE *f;
long filesize, nread;
- char *buf = NULL;
bool replaced = false;
if (!p)
if (fseek(f, 0, SEEK_SET) != 0)
goto file_error;
- buf = MALLOC(filesize);
- if (!buf) {
+ binary->elf_buffer = MALLOC(filesize);
+ if (!binary->elf_buffer) {
fprintf(stderr, "out of memory\n");
goto out_close;
}
- nread = fread(buf, 1, filesize, f);
- if (nread != filesize)
+ nread = fread((void*)binary->elf_buffer, 1, filesize, f);
+ if (nread != filesize) {
+ FREE((void*)binary->elf_buffer);
+ binary->elf_buffer = NULL;
goto file_error;
+ }
- ac_elf_read(buf, filesize, binary);
+ binary->elf_size = nread;
replaced = true;
out_close:
fclose(f);
out_free:
- FREE(buf);
free(copy);
return replaced;
si_dump_mmapped_reg(sctx, f, R_008010_GRBM_STATUS);
/* No other registers can be read on DRM < 3.1.0. */
- if (sctx->screen->info.drm_major < 3 ||
+ if (!sctx->screen->info.is_amdgpu ||
sctx->screen->info.drm_minor < 1) {
fprintf(f, "\n");
return;
si_log_chunk_desc_list_print(void *data, FILE *f)
{
struct si_log_chunk_desc_list *chunk = data;
+ unsigned sq_img_rsrc_word0 = chunk->chip_class >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0
+ : R_008F10_SQ_IMG_RSRC_WORD0;
for (unsigned i = 0; i < chunk->num_elements; i++) {
unsigned cpu_dw_offset = i * chunk->element_dw_size;
case 8:
for (unsigned j = 0; j < 8; j++)
ac_dump_reg(f, chunk->chip_class,
- R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
+ sq_img_rsrc_word0 + j*4,
gpu_list[j], 0xffffffff);
fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
case 16:
for (unsigned j = 0; j < 8; j++)
ac_dump_reg(f, chunk->chip_class,
- R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
+ sq_img_rsrc_word0 + j*4,
gpu_list[j], 0xffffffff);
fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n");
for (unsigned j = 0; j < 8; j++)
ac_dump_reg(f, chunk->chip_class,
- R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
+ sq_img_rsrc_word0 + j*4,
gpu_list[8+j], 0xffffffff);
fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n");
};
/**
- * Split a disassembly string into instructions and add them to the array
- * pointed to by \p instructions.
+ * Open the given \p binary as \p rtld_binary and split the contained
+ * disassembly string into instructions and add them to the array
+ * pointed to by \p instructions, which must be sufficiently large.
*
* Labels are considered to be part of the following instruction.
+ *
+ * The caller must keep \p rtld_binary alive as long as \p instructions are
+ * used and then close it afterwards.
*/
-static void si_add_split_disasm(const char *disasm,
+static void si_add_split_disasm(struct si_screen *screen,
+ struct ac_rtld_binary *rtld_binary,
+ struct si_shader_binary *binary,
uint64_t *addr,
unsigned *num,
struct si_shader_inst *instructions)
{
- const char *semicolon;
+ if (!ac_rtld_open(rtld_binary, (struct ac_rtld_open_info){
+ .info = &screen->info,
+ .num_parts = 1,
+ .elf_ptrs = &binary->elf_buffer,
+ .elf_sizes = &binary->elf_size }))
+ return;
+
+ const char *disasm;
+ size_t nbytes;
+ if (!ac_rtld_get_section_by_name(rtld_binary, ".AMDGPU.disasm",
+ &disasm, &nbytes))
+ return;
+
+ const char *end = disasm + nbytes;
+ while (disasm < end) {
+ const char *semicolon = memchr(disasm, ';', end - disasm);
+ if (!semicolon)
+ break;
- while ((semicolon = strchr(disasm, ';'))) {
struct si_shader_inst *inst = &instructions[(*num)++];
- const char *end = util_strchrnul(semicolon, '\n');
+ const char *inst_end = memchr(semicolon + 1, '\n', end - semicolon - 1);
+ if (!inst_end)
+ inst_end = end;
inst->text = disasm;
- inst->textlen = end - disasm;
+ inst->textlen = inst_end - disasm;
inst->addr = *addr;
/* More than 16 chars after ";" means the instruction is 8 bytes long. */
- inst->size = end - semicolon > 16 ? 8 : 4;
+ inst->size = inst_end - semicolon > 16 ? 8 : 4;
*addr += inst->size;
- if (!(*end))
+ if (inst_end == end)
break;
- disasm = end + 1;
+ disasm = inst_end + 1;
}
}
unsigned num_waves,
FILE *f)
{
- if (!shader || !shader->binary.disasm_string)
+ if (!shader)
return;
+ struct si_screen *screen = shader->selector->screen;
uint64_t start_addr = shader->bo->gpu_address;
uint64_t end_addr = start_addr + shader->bo->b.b.width0;
unsigned i;
*/
unsigned num_inst = 0;
uint64_t inst_addr = start_addr;
+ struct ac_rtld_binary rtld_binaries[5] = {};
struct si_shader_inst *instructions =
calloc(shader->bo->b.b.width0 / 4, sizeof(struct si_shader_inst));
if (shader->prolog) {
- si_add_split_disasm(shader->prolog->binary.disasm_string,
+ si_add_split_disasm(screen, &rtld_binaries[0], &shader->prolog->binary,
&inst_addr, &num_inst, instructions);
}
if (shader->previous_stage) {
- si_add_split_disasm(shader->previous_stage->binary.disasm_string,
+ si_add_split_disasm(screen, &rtld_binaries[1], &shader->previous_stage->binary,
&inst_addr, &num_inst, instructions);
}
if (shader->prolog2) {
- si_add_split_disasm(shader->prolog2->binary.disasm_string,
+ si_add_split_disasm(screen, &rtld_binaries[2], &shader->prolog2->binary,
&inst_addr, &num_inst, instructions);
}
- si_add_split_disasm(shader->binary.disasm_string,
+ si_add_split_disasm(screen, &rtld_binaries[3], &shader->binary,
&inst_addr, &num_inst, instructions);
if (shader->epilog) {
- si_add_split_disasm(shader->epilog->binary.disasm_string,
+ si_add_split_disasm(screen, &rtld_binaries[4], &shader->epilog->binary,
&inst_addr, &num_inst, instructions);
}
fprintf(f, "\n\n");
free(instructions);
+ for (unsigned i = 0; i < ARRAY_SIZE(rtld_binaries); ++i)
+ ac_rtld_close(&rtld_binaries[i]);
}
static void si_dump_annotated_shaders(struct si_context *sctx, FILE *f)