X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=blobdiff_plain;f=src%2Ffreedreno%2Fir3%2Fdisasm-a3xx.c;h=ea3a9609c6b23cc9a3d49ef45d83f299576aeb4a;hp=d1e77b08913a050bb19da0f0bf3a329c3109426f;hb=ce335dcb19297d04f3fb6ce0d290ff99130d09f7;hpb=f4e64e9f530c22e779ef5747c2a927bdd5b6c47d diff --git a/src/freedreno/ir3/disasm-a3xx.c b/src/freedreno/ir3/disasm-a3xx.c index d1e77b08913..ea3a9609c6b 100644 --- a/src/freedreno/ir3/disasm-a3xx.c +++ b/src/freedreno/ir3/disasm-a3xx.c @@ -30,14 +30,9 @@ #include +#include "disasm.h" #include "instr-a3xx.h" - -/* bitmask of debug flags */ -enum debug_t { - PRINT_RAW = 0x1, /* dump raw hexdump */ - PRINT_VERBOSE = 0x2, - EXPAND_REPEAT = 0x4, -}; +#include "regmask.h" static enum debug_t debug; @@ -80,12 +75,28 @@ struct disasm_ctx { int level; unsigned gpu_id; + struct shader_stats *stats; + + /* we have to process the dst register after src to avoid tripping up + * the read-before-write detection + */ + unsigned last_dst; + bool last_dst_full; + bool last_dst_valid; + /* current instruction repeat flag: */ unsigned repeat; /* current instruction repeat indx/offset (for --expand): */ unsigned repeatidx; - unsigned instructions; + /* tracking for register usage */ + struct { + regmask_t used; + regmask_t used_merged; + regmask_t rbw; /* read before write */ + regmask_t war; /* write after read */ + unsigned max_const; + } regs; }; static const char *float_imms[] = { @@ -147,9 +158,34 @@ static void print_reg(struct disasm_ctx *ctx, reg_t reg, bool full, fprintf(ctx->out, "p0.%c", component[reg.comp]); } else { fprintf(ctx->out, "%s%c%d.%c", full ? "" : "h", type, reg.num, component[reg.comp]); + if (0 && full && !c) { + reg_t hr0 = reg; + hr0.iim_val *= 2; + reg_t hr1 = hr0; + hr1.iim_val += 1; + fprintf(ctx->out, " (hr%d.%c,hr%d.%c)", hr0.num, component[hr0.comp], hr1.num, component[hr1.comp]); + } } } +static void regmask_set(regmask_t *regmask, unsigned num, bool full) +{ + ir3_assert(num < MAX_REG); + __regmask_set(regmask, !full, num); +} + +static void regmask_clear(regmask_t *regmask, unsigned num, bool full) +{ + ir3_assert(num < MAX_REG); + __regmask_clear(regmask, !full, num); +} + +static unsigned regmask_get(regmask_t *regmask, unsigned num, bool full) +{ + ir3_assert(num < MAX_REG); + return __regmask_get(regmask, !full, num); +} + static unsigned regidx(reg_t reg) { return (4 * reg.num) + reg.comp; @@ -163,8 +199,138 @@ static reg_t idxreg(unsigned idx) }; } +static void print_sequence(struct disasm_ctx *ctx, int first, int last) +{ + if (first != MAX_REG) { + if (first == last) { + fprintf(ctx->out, " %d", first); + } else { + fprintf(ctx->out, " %d-%d", first, last); + } + } +} + +static int print_regs(struct disasm_ctx *ctx, regmask_t *regmask, bool full) +{ + int num, max = 0, cnt = 0; + int first, last; + + first = last = MAX_REG; + + for (num = 0; num < MAX_REG; num++) { + if (regmask_get(regmask, num, full)) { + if (num != (last + 1)) { + print_sequence(ctx, first, last); + first = num; + } + last = num; + if (num < (48*4)) + max = num; + cnt++; + } + } + + print_sequence(ctx, first, last); + + fprintf(ctx->out, " (cnt=%d, max=%d)", cnt, max); + + return max; +} + +static void print_reg_stats(struct disasm_ctx *ctx) +{ + int fullreg, halfreg; + + fprintf(ctx->out, "%sRegister Stats:\n", levels[ctx->level]); + fprintf(ctx->out, "%s- used (half):", levels[ctx->level]); + halfreg = print_regs(ctx, &ctx->regs.used, false); + fprintf(ctx->out, "\n"); + fprintf(ctx->out, "%s- used (full):", levels[ctx->level]); + fullreg = print_regs(ctx, &ctx->regs.used, true); + fprintf(ctx->out, "\n"); + if (ctx->gpu_id >= 600) { + fprintf(ctx->out, "%s- used (merged):", levels[ctx->level]); + print_regs(ctx, &ctx->regs.used_merged, false); + fprintf(ctx->out, "\n"); + } + fprintf(ctx->out, "%s- input (half):", levels[ctx->level]); + print_regs(ctx, &ctx->regs.rbw, false); + fprintf(ctx->out, "\n"); + fprintf(ctx->out, "%s- input (full):", levels[ctx->level]); + print_regs(ctx, &ctx->regs.rbw, true); + fprintf(ctx->out, "\n"); + fprintf(ctx->out, "%s- max const: %u\n", levels[ctx->level], ctx->regs.max_const); + fprintf(ctx->out, "\n"); + fprintf(ctx->out, "%s- output (half):", levels[ctx->level]); + print_regs(ctx, &ctx->regs.war, false); + fprintf(ctx->out, " (estimated)\n"); + fprintf(ctx->out, "%s- output (full):", levels[ctx->level]); + print_regs(ctx, &ctx->regs.war, true); + fprintf(ctx->out, " (estimated)\n"); + + /* convert to vec4, which is the granularity that registers are + * assigned to shader: + */ + fullreg = (fullreg + 3) / 4; + halfreg = ctx->regs.used.mergedregs ? 0 : (halfreg + 3) / 4; + + // Note this count of instructions includes rptN, which matches + // up to how mesa prints this: + fprintf(ctx->out, "%s- shaderdb: %d instructions, %d nops, %d non-nops, " + "(%d instlen), %u last-baryf, %d half, %d full\n", + levels[ctx->level], ctx->stats->instructions, ctx->stats->nops, + ctx->stats->instructions - ctx->stats->nops, ctx->stats->instlen, + ctx->stats->last_baryf, halfreg, fullreg); + fprintf(ctx->out, "%s- shaderdb: %u cat0, %u cat1, %u cat2, %u cat3, " + "%u cat4, %u cat5, %u cat6, %u cat7\n", + levels[ctx->level], + ctx->stats->instrs_per_cat[0], + ctx->stats->instrs_per_cat[1], + ctx->stats->instrs_per_cat[2], + ctx->stats->instrs_per_cat[3], + ctx->stats->instrs_per_cat[4], + ctx->stats->instrs_per_cat[5], + ctx->stats->instrs_per_cat[6], + ctx->stats->instrs_per_cat[7]); + fprintf(ctx->out, "%s- shaderdb: %d (ss), %d (sy)\n", levels[ctx->level], + ctx->stats->ss, ctx->stats->sy); +} + +static void process_reg_dst(struct disasm_ctx *ctx) +{ + if (!ctx->last_dst_valid) + return; + + /* ignore dummy writes (ie. r63.x): */ + if (!VALIDREG(ctx->last_dst)) + return; + + for (unsigned i = 0; i <= ctx->repeat; i++) { + unsigned dst = ctx->last_dst + i; + + regmask_set(&ctx->regs.war, dst, ctx->last_dst_full); + regmask_set(&ctx->regs.used, dst, ctx->last_dst_full); + + if (ctx->gpu_id >= 600) { + if (ctx->last_dst_full) { + regmask_set(&ctx->regs.used_merged, (dst*2)+0, false); + regmask_set(&ctx->regs.used_merged, (dst*2)+1, false); + } else { + regmask_set(&ctx->regs.used_merged, dst, false); + } + } + } + + ctx->last_dst_valid = false; +} static void print_reg_dst(struct disasm_ctx *ctx, reg_t reg, bool full, bool addr_rel) { + /* presumably the special registers a0.c and p0.c don't count.. */ + if (!(addr_rel || (reg.num == REG_A0) || (reg.num == REG_P0))) { + ctx->last_dst = regidx(reg); + ctx->last_dst_full = full; + ctx->last_dst_valid = true; + } reg = idxreg(regidx(reg) + ctx->repeatidx); print_reg(ctx, reg, full, false, false, false, false, false, false, addr_rel); } @@ -189,6 +355,45 @@ static void print_src(struct disasm_ctx *ctx, struct reginfo *info) { reg_t reg = info->reg; + /* presumably the special registers a0.c and p0.c don't count.. */ + if (!(info->addr_rel || info->c || info->im || + (reg.num == REG_A0) || (reg.num == REG_P0))) { + int i, num = regidx(reg); + for (i = 0; i <= ctx->repeat; i++) { + unsigned src = num + i; + + if (!regmask_get(&ctx->regs.used, src, info->full)) + regmask_set(&ctx->regs.rbw, src, info->full); + + regmask_clear(&ctx->regs.war, src, info->full); + regmask_set(&ctx->regs.used, src, info->full); + + if (info->full) { + regmask_set(&ctx->regs.used_merged, (src*2)+0, false); + regmask_set(&ctx->regs.used_merged, (src*2)+1, false); + } else { + regmask_set(&ctx->regs.used_merged, src, false); + } + + if (!info->r) + break; + } + } else if (info->c) { + int i, num = regidx(reg); + for (i = 0; i <= ctx->repeat; i++) { + unsigned src = num + i; + + ctx->regs.max_const = MAX2(ctx->regs.max_const, src); + + if (!info->r) + break; + } + + unsigned max = (num + ctx->repeat + 1 + 3) / 4; + if (max > ctx->stats->constlen) + ctx->stats->constlen = max; + } + if (info->r) reg = idxreg(regidx(info->reg) + ctx->repeatidx); @@ -674,7 +879,7 @@ static void print_instr_cat6_a3xx(struct disasm_ctx *ctx, instr_t *instr) char sd = 0, ss = 0; /* dst/src address space */ bool nodst = false; struct reginfo dst, src1, src2, ssbo; - int src1off = 0, dstoff = 0; + int src1off = 0; memset(&dst, 0, sizeof(dst)); memset(&src1, 0, sizeof(src1)); @@ -929,12 +1134,6 @@ static void print_instr_cat6_a3xx(struct disasm_ctx *ctx, instr_t *instr) return; } - if (cat6->dst_off) { - dst.reg = (reg_t)(cat6->c.dst); - dstoff = cat6->c.off; - } else { - dst.reg = (reg_t)(cat6->d.dst); - } if (cat6->src_off) { src1.reg = (reg_t)(cat6->a.src1); @@ -953,16 +1152,23 @@ static void print_instr_cat6_a3xx(struct disasm_ctx *ctx, instr_t *instr) if (sd) fprintf(ctx->out, "%c[", sd); /* note: dst might actually be a src (ie. address to store to) */ - print_src(ctx, &dst); - if (cat6->dst_off && cat6->g) { - struct reginfo dstoff_reg = { - .reg = (reg_t) cat6->c.off, - .full = true - }; - fprintf(ctx->out, "+"); - print_src(ctx, &dstoff_reg); - } else if (dstoff) - fprintf(ctx->out, "%+d", dstoff); + if (cat6->dst_off) { + dst.reg = (reg_t)(cat6->c.dst); + print_src(ctx, &dst); + if (cat6->g) { + struct reginfo dstoff_reg = { + .reg = (reg_t) cat6->c.off, + .full = true + }; + fprintf(ctx->out, "+"); + print_src(ctx, &dstoff_reg); + } else if (cat6->c.off || cat6->c.off_high) { + fprintf(ctx->out, "%+d", ((uint32_t)cat6->c.off_high << 8) | cat6->c.off); + } + } else { + dst.reg = (reg_t)(cat6->d.dst); + print_src(ctx, &dst); + } if (sd) fprintf(ctx->out, "]"); fprintf(ctx->out, ", "); @@ -1256,6 +1462,9 @@ static const struct opc_info { OPC(5, OPC_DSYPP_1, dsypp.1), OPC(5, OPC_RGETPOS, rgetpos), OPC(5, OPC_RGETINFO, rgetinfo), + /* macros are needed here for ir3_print */ + OPC(5, OPC_DSXPP_MACRO, dsxpp.macro), + OPC(5, OPC_DSYPP_MACRO, dsypp.macro), /* category 6: */ @@ -1298,12 +1507,10 @@ static const struct opc_info { #define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr, ctx->gpu_id)])) -// XXX hack.. probably should move this table somewhere common: -#include "ir3.h" -const char *ir3_instr_name(struct ir3_instruction *instr) +const char *disasm_a3xx_instr_name(opc_t opc) { - if (opc_cat(instr->opc) == -1) return "??meta??"; - return opcs[instr->opc].name; + if (opc_cat(opc) == -1) return "??meta??"; + return opcs[opc].name; } static void print_single_instr(struct disasm_ctx *ctx, instr_t *instr) @@ -1333,28 +1540,34 @@ static void print_single_instr(struct disasm_ctx *ctx, instr_t *instr) static bool print_instr(struct disasm_ctx *ctx, uint32_t *dwords, int n) { instr_t *instr = (instr_t *)dwords; - uint32_t opc = instr_opc(instr, ctx->gpu_id); + opc_t opc = _OPC(instr->opc_cat, instr_opc(instr, ctx->gpu_id)); unsigned nop = 0; - unsigned cycles = ctx->instructions; + unsigned cycles = ctx->stats->instructions; - if (debug & PRINT_VERBOSE) { - fprintf(ctx->out, "%s%04d:%04d[%08xx_%08xx] ", levels[ctx->level], - n, cycles++, dwords[1], dwords[0]); + if (debug & PRINT_RAW) { + fprintf(ctx->out, "%s:%d:%04d:%04d[%08xx_%08xx] ", levels[ctx->level], + instr->opc_cat, n, cycles++, dwords[1], dwords[0]); } + if (opc == OPC_BARY_F) + ctx->stats->last_baryf = ctx->stats->instructions; + + ctx->repeat = instr_repeat(instr); + ctx->stats->instructions += 1 + ctx->repeat; + ctx->stats->instlen++; + /* NOTE: order flags are printed is a bit fugly.. but for now I * try to match the order in llvm-a3xx disassembler for easy * diff'ing.. */ - ctx->repeat = instr_repeat(instr); - ctx->instructions += 1 + ctx->repeat; - if (instr->sync) { fprintf(ctx->out, "(sy)"); + ctx->stats->sy++; } if (instr->ss && ((instr->opc_cat <= 4) || (instr->opc_cat == 7))) { fprintf(ctx->out, "(ss)"); + ctx->stats->ss++; } if (instr->jmp_tgt) fprintf(ctx->out, "(jp)"); @@ -1368,30 +1581,49 @@ static bool print_instr(struct disasm_ctx *ctx, uint32_t *dwords, int n) nop = (instr->cat2.src2_r * 2) + instr->cat2.src1_r; else if ((instr->opc_cat == 3) && (instr->cat3.src1_r || instr->cat3.src2_r)) nop = (instr->cat3.src2_r * 2) + instr->cat3.src1_r; - ctx->instructions += nop; if (nop) fprintf(ctx->out, "(nop%d) ", nop); if (instr->ul && ((2 <= instr->opc_cat) && (instr->opc_cat <= 4))) fprintf(ctx->out, "(ul)"); + ctx->stats->instructions += nop; + ctx->stats->nops += nop; + if (opc == OPC_NOP) { + ctx->stats->nops += 1 + ctx->repeat; + ctx->stats->instrs_per_cat[0] += 1 + ctx->repeat; + } else { + ctx->stats->instrs_per_cat[instr->opc_cat] += 1 + ctx->repeat; + ctx->stats->instrs_per_cat[0] += nop; + } + + if (opc == OPC_MOV) { + if (instr->cat1.src_type == instr->cat1.dst_type) { + ctx->stats->mov_count += 1 + ctx->repeat; + } else { + ctx->stats->cov_count += 1 + ctx->repeat; + } + } + print_single_instr(ctx, instr); fprintf(ctx->out, "\n"); + process_reg_dst(ctx); + if ((instr->opc_cat <= 4) && (debug & EXPAND_REPEAT)) { int i; for (i = 0; i < nop; i++) { if (debug & PRINT_VERBOSE) { - fprintf(ctx->out, "%s%04d:%04d[ ] ", - levels[ctx->level], n, cycles++); + fprintf(ctx->out, "%s:%d:%04d:%04d[ ] ", + levels[ctx->level], instr->opc_cat, n, cycles++); } fprintf(ctx->out, "nop\n"); } for (i = 0; i < ctx->repeat; i++) { ctx->repeatidx = i + 1; if (debug & PRINT_VERBOSE) { - fprintf(ctx->out, "%s%04d:%04d[ ] ", - levels[ctx->level], n, cycles++); + fprintf(ctx->out, "%s:%d:%04d:%04d[ ] ", + levels[ctx->level], instr->opc_cat, n, cycles++); } print_single_instr(ctx, instr); fprintf(ctx->out, "\n"); @@ -1399,24 +1631,31 @@ static bool print_instr(struct disasm_ctx *ctx, uint32_t *dwords, int n) ctx->repeatidx = 0; } - return (instr->opc_cat == 0) && (opc == OPC_END); + return (instr->opc_cat == 0) && + ((opc == OPC_END) || (opc == OPC_CHSH)); } -int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id) +int disasm_a3xx_stat(uint32_t *dwords, int sizedwords, int level, FILE *out, + unsigned gpu_id, struct shader_stats *stats) { struct disasm_ctx ctx; int i; int nop_count = 0; + bool has_end = false; - assert((sizedwords % 2) == 0); + ir3_assert((sizedwords % 2) == 0); memset(&ctx, 0, sizeof(ctx)); ctx.out = out; ctx.level = level; ctx.gpu_id = gpu_id; + ctx.stats = stats; + memset(ctx.stats, 0, sizeof(*ctx.stats)); for (i = 0; i < sizedwords; i += 2) { - print_instr(&ctx, &dwords[i], i/2); + has_end |= print_instr(&ctx, &dwords[i], i/2); + if (!has_end) + continue; if (dwords[i] == 0 && dwords[i + 1] == 0) nop_count++; else @@ -1425,5 +1664,52 @@ int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned break; } + if (debug & PRINT_STATS) + print_reg_stats(&ctx); + return 0; } + +void disasm_a3xx_set_debug(enum debug_t d) +{ + debug = d; +} + +#include + +static bool jmp_env_valid; +static jmp_buf jmp_env; + +void +ir3_assert_handler(const char *expr, const char *file, int line, + const char *func) +{ + fprintf(stdout, "\n%s:%u: %s: Assertion `%s' failed.\n", file, line, func, expr); + if (jmp_env_valid) + longjmp(jmp_env, 1); + abort(); +} + +#define TRY(x) do { \ + assert(!jmp_env_valid); \ + if (setjmp(jmp_env) == 0) { \ + jmp_env_valid = true; \ + x; \ + } \ + jmp_env_valid = false; \ + } while (0) + + +int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id) +{ + struct shader_stats stats; + return disasm_a3xx_stat(dwords, sizedwords, level, out, gpu_id, &stats); +} + +int try_disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id) +{ + struct shader_stats stats; + int ret = -1; + TRY(ret = disasm_a3xx_stat(dwords, sizedwords, level, out, gpu_id, &stats)); + return ret; +}