radeonsi: extract TGSI memory/texture opcode handling into its own file
[mesa.git] / src / gallium / drivers / radeonsi / si_debug.c
index 43fc9c1f3e03a626c879e18de4e336fb912cb11d..d1159ad5e27c6fe206c81d47d816d86070a49fdf 100644 (file)
  */
 
 #include "si_pipe.h"
+#include "si_compute.h"
 #include "sid.h"
+#include "gfx9d.h"
 #include "sid_tables.h"
-#include "radeon/radeon_elf_util.h"
 #include "ddebug/dd_util.h"
 #include "util/u_memory.h"
 #include "ac_debug.h"
 DEBUG_GET_ONCE_OPTION(replace_shaders, "RADEON_REPLACE_SHADERS", NULL)
 
 static void si_dump_shader(struct si_screen *sscreen,
-                          struct si_shader_ctx_state *state, FILE *f)
+                          enum pipe_shader_type processor,
+                          const struct si_shader *shader, FILE *f)
 {
-       struct si_shader *current = state->current;
+       if (shader->shader_log)
+               fwrite(shader->shader_log, shader->shader_log_size, 1, f);
+       else
+               si_shader_dump(sscreen, shader, NULL, processor, f, false);
+}
+
+static void si_dump_gfx_shader(struct si_screen *sscreen,
+                              const struct si_shader_ctx_state *state, FILE *f)
+{
+       const struct si_shader *current = state->current;
 
        if (!state->cso || !current)
                return;
 
-       if (current->shader_log)
-               fwrite(current->shader_log, current->shader_log_size, 1, f);
-       else
-               si_shader_dump(sscreen, state->current, NULL,
-                              state->cso->info.processor, f, false);
+       si_dump_shader(sscreen, state->cso->info.processor, current, f);
+}
+
+static void si_dump_compute_shader(struct si_screen *sscreen,
+                                  const struct si_cs_shader_state *state, FILE *f)
+{
+       if (!state->program || state->program != state->emitted_program)
+               return;
+
+       si_dump_shader(sscreen, PIPE_SHADER_COMPUTE, &state->program->shader, f);
 }
 
 /**
  * Shader compiles can be overridden with arbitrary ELF objects by setting
  * the environment variable RADEON_REPLACE_SHADERS=num1:filename1[;num2:filename2]
  */
-bool si_replace_shader(unsigned num, struct radeon_shader_binary *binary)
+bool si_replace_shader(unsigned num, struct ac_shader_binary *binary)
 {
        const char *p = debug_get_option_replace_shaders();
        const char *semicolon;
@@ -126,7 +142,7 @@ bool si_replace_shader(unsigned num, struct radeon_shader_binary *binary)
        if (nread != filesize)
                goto file_error;
 
-       radeon_elf_read(buf, filesize, binary);
+       ac_elf_read(buf, filesize, binary);
        replaced = true;
 
 out_close:
@@ -183,9 +199,11 @@ static void si_dump_debug_registers(struct si_context *sctx, FILE *f)
        si_dump_mmapped_reg(sctx, f, R_00803C_GRBM_STATUS_SE3);
        si_dump_mmapped_reg(sctx, f, R_00D034_SDMA0_STATUS_REG);
        si_dump_mmapped_reg(sctx, f, R_00D834_SDMA1_STATUS_REG);
-       si_dump_mmapped_reg(sctx, f, R_000E50_SRBM_STATUS);
-       si_dump_mmapped_reg(sctx, f, R_000E4C_SRBM_STATUS2);
-       si_dump_mmapped_reg(sctx, f, R_000E54_SRBM_STATUS3);
+       if (sctx->b.chip_class <= VI) {
+               si_dump_mmapped_reg(sctx, f, R_000E50_SRBM_STATUS);
+               si_dump_mmapped_reg(sctx, f, R_000E4C_SRBM_STATUS2);
+               si_dump_mmapped_reg(sctx, f, R_000E54_SRBM_STATUS3);
+       }
        si_dump_mmapped_reg(sctx, f, R_008680_CP_STAT);
        si_dump_mmapped_reg(sctx, f, R_008674_CP_STALLED_STAT1);
        si_dump_mmapped_reg(sctx, f, R_008678_CP_STALLED_STAT2);
@@ -221,15 +239,18 @@ static void si_dump_last_ib(struct si_context *sctx, FILE *f)
 
        if (sctx->init_config)
                ac_parse_ib(f, sctx->init_config->pm4, sctx->init_config->ndw,
-                           -1, "IB2: Init config", sctx->b.chip_class);
+                           -1, "IB2: Init config", sctx->b.chip_class,
+                           NULL, NULL);
 
        if (sctx->init_config_gs_rings)
                ac_parse_ib(f, sctx->init_config_gs_rings->pm4,
                            sctx->init_config_gs_rings->ndw,
-                           -1, "IB2: Init GS rings", sctx->b.chip_class);
+                           -1, "IB2: Init GS rings", sctx->b.chip_class,
+                           NULL, NULL);
 
        ac_parse_ib(f, sctx->last_gfx.ib, sctx->last_gfx.num_dw,
-                   last_trace_id, "IB", sctx->b.chip_class);
+                   last_trace_id, "IB", sctx->b.chip_class,
+                    NULL, NULL);
 }
 
 static const char *priority_to_string(enum radeon_bo_priority priority)
@@ -347,14 +368,14 @@ static void si_dump_framebuffer(struct si_context *sctx, FILE *f)
 
                rtex = (struct r600_texture*)state->cbufs[i]->texture;
                fprintf(f, COLOR_YELLOW "Color buffer %i:" COLOR_RESET "\n", i);
-               r600_print_texture_info(rtex, f);
+               r600_print_texture_info(sctx->b.screen, rtex, f);
                fprintf(f, "\n");
        }
 
        if (state->zsbuf) {
                rtex = (struct r600_texture*)state->zsbuf->texture;
                fprintf(f, COLOR_YELLOW "Depth-stencil buffer:" COLOR_RESET "\n");
-               r600_print_texture_info(rtex, f);
+               r600_print_texture_info(sctx->b.screen, rtex, f);
                fprintf(f, "\n");
        }
 }
@@ -429,17 +450,12 @@ static void si_dump_descriptor_list(struct si_descriptors *desc,
 }
 
 static void si_dump_descriptors(struct si_context *sctx,
-                               struct si_shader_ctx_state *state,
-                               FILE *f)
+                               enum pipe_shader_type processor,
+                               const struct tgsi_shader_info *info, FILE *f)
 {
-       if (!state->cso || !state->current)
-               return;
-
-       unsigned type = state->cso->type;
-       const struct tgsi_shader_info *info = &state->cso->info;
        struct si_descriptors *descs =
                &sctx->descriptors[SI_DESCS_FIRST_SHADER +
-                                  type * SI_NUM_SHADER_DESCS];
+                                  processor * SI_NUM_SHADER_DESCS];
        static const char *shader_name[] = {"VS", "PS", "GS", "TCS", "TES", "CS"};
 
        static const char *elem_name[] = {
@@ -448,21 +464,319 @@ static void si_dump_descriptors(struct si_context *sctx,
                " - Sampler",
                " - Image",
        };
-       unsigned num_elements[] = {
-               util_last_bit(info->const_buffers_declared),
-               util_last_bit(info->shader_buffers_declared),
-               util_last_bit(info->samplers_declared),
-               util_last_bit(info->images_declared),
+       unsigned enabled_slots[] = {
+               sctx->const_buffers[processor].enabled_mask,
+               sctx->shader_buffers[processor].enabled_mask,
+               sctx->samplers[processor].views.enabled_mask,
+               sctx->images[processor].enabled_mask,
+       };
+       unsigned required_slots[] = {
+               info ? info->const_buffers_declared : 0,
+               info ? info->shader_buffers_declared : 0,
+               info ? info->samplers_declared : 0,
+               info ? info->images_declared : 0,
        };
 
-       if (type == PIPE_SHADER_VERTEX) {
-               si_dump_descriptor_list(&sctx->vertex_buffers, shader_name[type],
+       if (processor == PIPE_SHADER_VERTEX) {
+               assert(info); /* only CS may not have an info struct */
+
+               si_dump_descriptor_list(&sctx->vertex_buffers, shader_name[processor],
                                        " - Vertex buffer", info->num_inputs, f);
        }
 
        for (unsigned i = 0; i < SI_NUM_SHADER_DESCS; ++i, ++descs)
-               si_dump_descriptor_list(descs, shader_name[type], elem_name[i],
-                                       num_elements[i], f);
+               si_dump_descriptor_list(descs, shader_name[processor], elem_name[i],
+                                       util_last_bit(enabled_slots[i] | required_slots[i]), f);
+}
+
+static void si_dump_gfx_descriptors(struct si_context *sctx,
+                                   const struct si_shader_ctx_state *state,
+                                   FILE *f)
+{
+       if (!state->cso || !state->current)
+               return;
+
+       si_dump_descriptors(sctx, state->cso->type, &state->cso->info, f);
+}
+
+static void si_dump_compute_descriptors(struct si_context *sctx, FILE *f)
+{
+       if (!sctx->cs_shader_state.program ||
+           sctx->cs_shader_state.program != sctx->cs_shader_state.emitted_program)
+               return;
+
+       si_dump_descriptors(sctx, PIPE_SHADER_COMPUTE, NULL, f);
+}
+
+struct si_shader_inst {
+       char text[160];  /* one disasm line */
+       unsigned offset; /* instruction offset */
+       unsigned size;   /* instruction size = 4 or 8 */
+};
+
+/* Split a disassembly string into lines and add them to the array pointed
+ * to by "instructions". */
+static void si_add_split_disasm(const char *disasm,
+                               uint64_t start_addr,
+                               unsigned *num,
+                               struct si_shader_inst *instructions)
+{
+       struct si_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
+       char *next;
+
+       while ((next = strchr(disasm, '\n'))) {
+               struct si_shader_inst *inst = &instructions[*num];
+               unsigned len = next - disasm;
+
+               assert(len < ARRAY_SIZE(inst->text));
+               memcpy(inst->text, disasm, len);
+               inst->text[len] = 0;
+               inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
+
+               const char *semicolon = strchr(disasm, ';');
+               assert(semicolon);
+               /* More than 16 chars after ";" means the instruction is 8 bytes long. */
+               inst->size = next - semicolon > 16 ? 8 : 4;
+
+               snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len,
+                       " [PC=0x%"PRIx64", off=%u, size=%u]",
+                       start_addr + inst->offset, inst->offset, inst->size);
+
+               last_inst = inst;
+               (*num)++;
+               disasm = next + 1;
+       }
+}
+
+#define MAX_WAVES_PER_CHIP (64 * 40)
+
+struct si_wave_info {
+       unsigned se; /* shader engine */
+       unsigned sh; /* shader array */
+       unsigned cu; /* compute unit */
+       unsigned simd;
+       unsigned wave;
+       uint32_t status;
+       uint64_t pc; /* program counter */
+       uint32_t inst_dw0;
+       uint32_t inst_dw1;
+       uint64_t exec;
+       bool matched; /* whether the wave is used by a currently-bound shader */
+};
+
+static int compare_wave(const void *p1, const void *p2)
+{
+       struct si_wave_info *w1 = (struct si_wave_info *)p1;
+       struct si_wave_info *w2 = (struct si_wave_info *)p2;
+
+       /* Sort waves according to PC and then SE, SH, CU, etc. */
+       if (w1->pc < w2->pc)
+               return -1;
+       if (w1->pc > w2->pc)
+               return 1;
+       if (w1->se < w2->se)
+               return -1;
+       if (w1->se > w2->se)
+               return 1;
+       if (w1->sh < w2->sh)
+               return -1;
+       if (w1->sh > w2->sh)
+               return 1;
+       if (w1->cu < w2->cu)
+               return -1;
+       if (w1->cu > w2->cu)
+               return 1;
+       if (w1->simd < w2->simd)
+               return -1;
+       if (w1->simd > w2->simd)
+               return 1;
+       if (w1->wave < w2->wave)
+               return -1;
+       if (w1->wave > w2->wave)
+               return 1;
+
+       return 0;
+}
+
+/* Return wave information. "waves" should be a large enough array. */
+static unsigned si_get_wave_info(struct si_wave_info waves[MAX_WAVES_PER_CHIP])
+{
+       char line[2000];
+       unsigned num_waves = 0;
+
+       FILE *p = popen("umr -wa", "r");
+       if (!p)
+               return 0;
+
+       if (!fgets(line, sizeof(line), p) ||
+           strncmp(line, "SE", 2) != 0) {
+               pclose(p);
+               return 0;
+       }
+
+       while (fgets(line, sizeof(line), p)) {
+               struct si_wave_info *w;
+               uint32_t pc_hi, pc_lo, exec_hi, exec_lo;
+
+               assert(num_waves < MAX_WAVES_PER_CHIP);
+               w = &waves[num_waves];
+
+               if (sscanf(line, "%u %u %u %u %u %x %x %x %x %x %x %x",
+                          &w->se, &w->sh, &w->cu, &w->simd, &w->wave,
+                          &w->status, &pc_hi, &pc_lo, &w->inst_dw0,
+                          &w->inst_dw1, &exec_hi, &exec_lo) == 12) {
+                       w->pc = ((uint64_t)pc_hi << 32) | pc_lo;
+                       w->exec = ((uint64_t)exec_hi << 32) | exec_lo;
+                       w->matched = false;
+                       num_waves++;
+               }
+       }
+
+       qsort(waves, num_waves, sizeof(struct si_wave_info), compare_wave);
+
+       pclose(p);
+       return num_waves;
+}
+
+/* If the shader is being executed, print its asm instructions, and annotate
+ * those that are being executed right now with information about waves that
+ * execute them. This is most useful during a GPU hang.
+ */
+static void si_print_annotated_shader(struct si_shader *shader,
+                                     struct si_wave_info *waves,
+                                     unsigned num_waves,
+                                     FILE *f)
+{
+       if (!shader || !shader->binary.disasm_string)
+               return;
+
+       uint64_t start_addr = shader->bo->gpu_address;
+       uint64_t end_addr = start_addr + shader->bo->b.b.width0;
+       unsigned i;
+
+       /* See if any wave executes the shader. */
+       for (i = 0; i < num_waves; i++) {
+               if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
+                       break;
+       }
+       if (i == num_waves)
+               return; /* the shader is not being executed */
+
+       /* Remember the first found wave. The waves are sorted according to PC. */
+       waves = &waves[i];
+       num_waves -= i;
+
+       /* Get the list of instructions.
+        * Buffer size / 4 is the upper bound of the instruction count.
+        */
+       unsigned num_inst = 0;
+       struct si_shader_inst *instructions =
+               calloc(shader->bo->b.b.width0 / 4, sizeof(struct si_shader_inst));
+
+       if (shader->prolog) {
+               si_add_split_disasm(shader->prolog->binary.disasm_string,
+                                   start_addr, &num_inst, instructions);
+       }
+       if (shader->previous_stage) {
+               si_add_split_disasm(shader->previous_stage->binary.disasm_string,
+                                   start_addr, &num_inst, instructions);
+       }
+       if (shader->prolog2) {
+               si_add_split_disasm(shader->prolog2->binary.disasm_string,
+                                   start_addr, &num_inst, instructions);
+       }
+       si_add_split_disasm(shader->binary.disasm_string,
+                           start_addr, &num_inst, instructions);
+       if (shader->epilog) {
+               si_add_split_disasm(shader->epilog->binary.disasm_string,
+                                   start_addr, &num_inst, instructions);
+       }
+
+       fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
+               si_get_shader_name(shader, shader->selector->type));
+
+       /* Print instructions with annotations. */
+       for (i = 0; i < num_inst; i++) {
+               struct si_shader_inst *inst = &instructions[i];
+
+               fprintf(f, "%s\n", inst->text);
+
+               /* Print which waves execute the instruction right now. */
+               while (num_waves && start_addr + inst->offset == waves->pc) {
+                       fprintf(f,
+                               "          " COLOR_GREEN "^ SE%u SH%u CU%u "
+                               "SIMD%u WAVE%u  EXEC=%016"PRIx64 "  ",
+                               waves->se, waves->sh, waves->cu, waves->simd,
+                               waves->wave, waves->exec);
+
+                       if (inst->size == 4) {
+                               fprintf(f, "INST32=%08X" COLOR_RESET "\n",
+                                       waves->inst_dw0);
+                       } else {
+                               fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n",
+                                       waves->inst_dw0, waves->inst_dw1);
+                       }
+
+                       waves->matched = true;
+                       waves = &waves[1];
+                       num_waves--;
+               }
+       }
+
+       fprintf(f, "\n\n");
+       free(instructions);
+}
+
+static void si_dump_annotated_shaders(struct si_context *sctx, FILE *f)
+{
+       struct si_wave_info waves[MAX_WAVES_PER_CHIP];
+       unsigned num_waves = si_get_wave_info(waves);
+
+       fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET
+               "\n\n", num_waves);
+
+       si_print_annotated_shader(sctx->vs_shader.current, waves, num_waves, f);
+       si_print_annotated_shader(sctx->tcs_shader.current, waves, num_waves, f);
+       si_print_annotated_shader(sctx->tes_shader.current, waves, num_waves, f);
+       si_print_annotated_shader(sctx->gs_shader.current, waves, num_waves, f);
+       si_print_annotated_shader(sctx->ps_shader.current, waves, num_waves, f);
+
+       /* Print waves executing shaders that are not currently bound. */
+       unsigned i;
+       bool found = false;
+       for (i = 0; i < num_waves; i++) {
+               if (waves[i].matched)
+                       continue;
+
+               if (!found) {
+                       fprintf(f, COLOR_CYAN
+                               "Waves not executing currently-bound shaders:"
+                               COLOR_RESET "\n");
+                       found = true;
+               }
+               fprintf(f, "    SE%u SH%u CU%u SIMD%u WAVE%u  EXEC=%016"PRIx64
+                       "  INST=%08X %08X  PC=%"PRIx64"\n",
+                       waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd,
+                       waves[i].wave, waves[i].exec, waves[i].inst_dw0,
+                       waves[i].inst_dw1, waves[i].pc);
+       }
+       if (found)
+               fprintf(f, "\n\n");
+}
+
+static void si_dump_command(const char *title, const char *command, FILE *f)
+{
+       char line[2000];
+
+       FILE *p = popen(command, "r");
+       if (!p)
+               return;
+
+       fprintf(f, COLOR_YELLOW "%s: " COLOR_RESET "\n", title);
+       while (fgets(line, sizeof(line), p))
+               fputs(line, f);
+       fprintf(f, "\n\n");
+       pclose(p);
 }
 
 static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
@@ -477,19 +791,27 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
                si_dump_framebuffer(sctx, f);
 
        if (flags & PIPE_DUMP_CURRENT_SHADERS) {
-               si_dump_shader(sctx->screen, &sctx->vs_shader, f);
-               si_dump_shader(sctx->screen, &sctx->tcs_shader, f);
-               si_dump_shader(sctx->screen, &sctx->tes_shader, f);
-               si_dump_shader(sctx->screen, &sctx->gs_shader, f);
-               si_dump_shader(sctx->screen, &sctx->ps_shader, f);
+               si_dump_gfx_shader(sctx->screen, &sctx->vs_shader, f);
+               si_dump_gfx_shader(sctx->screen, &sctx->tcs_shader, f);
+               si_dump_gfx_shader(sctx->screen, &sctx->tes_shader, f);
+               si_dump_gfx_shader(sctx->screen, &sctx->gs_shader, f);
+               si_dump_gfx_shader(sctx->screen, &sctx->ps_shader, f);
+               si_dump_compute_shader(sctx->screen, &sctx->cs_shader_state, f);
+
+               if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS) {
+                       si_dump_annotated_shaders(sctx, f);
+                       si_dump_command("Active waves (raw data)", "umr -wa | column -t", f);
+                       si_dump_command("Wave information", "umr -O bits -wa", f);
+               }
 
                si_dump_descriptor_list(&sctx->descriptors[SI_DESCS_RW_BUFFERS],
                                        "", "RW buffers", SI_NUM_RW_BUFFERS, f);
-               si_dump_descriptors(sctx, &sctx->vs_shader, f);
-               si_dump_descriptors(sctx, &sctx->tcs_shader, f);
-               si_dump_descriptors(sctx, &sctx->tes_shader, f);
-               si_dump_descriptors(sctx, &sctx->gs_shader, f);
-               si_dump_descriptors(sctx, &sctx->ps_shader, f);
+               si_dump_gfx_descriptors(sctx, &sctx->vs_shader, f);
+               si_dump_gfx_descriptors(sctx, &sctx->tcs_shader, f);
+               si_dump_gfx_descriptors(sctx, &sctx->tes_shader, f);
+               si_dump_gfx_descriptors(sctx, &sctx->gs_shader, f);
+               si_dump_gfx_descriptors(sctx, &sctx->ps_shader, f);
+               si_dump_compute_descriptors(sctx, f);
        }
 
        if (flags & PIPE_DUMP_LAST_COMMAND_BUFFER) {