src/gallium/drivers/vc4/vc4_qpu_disasm.c

   1 /*
   2  * Copyright © 2014 Broadcom
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include <stdbool.h>
  25 #include <stdio.h>
  26
  27 #include "vc4_qpu.h"
  28 #include "vc4_qpu_defines.h"
  29
  30 static const char *qpu_add_opcodes[] = {
  31         [QPU_A_NOP] = "nop",
  32         [QPU_A_FADD] = "fadd",
  33         [QPU_A_FSUB] = "fsub",
  34         [QPU_A_FMIN] = "fmin",
  35         [QPU_A_FMAX] = "fmax",
  36         [QPU_A_FMINABS] = "fminabs",
  37         [QPU_A_FMAXABS] = "fmaxabs",
  38         [QPU_A_FTOI] = "ftoi",
  39         [QPU_A_ITOF] = "itof",
  40         [QPU_A_ADD] = "add",
  41         [QPU_A_SUB] = "sub",
  42         [QPU_A_SHR] = "shr",
  43         [QPU_A_ASR] = "asr",
  44         [QPU_A_ROR] = "ror",
  45         [QPU_A_SHL] = "shl",
  46         [QPU_A_MIN] = "min",
  47         [QPU_A_MAX] = "max",
  48         [QPU_A_AND] = "and",
  49         [QPU_A_OR] = "or",
  50         [QPU_A_XOR] = "xor",
  51         [QPU_A_NOT] = "not",
  52         [QPU_A_CLZ] = "clz",
  53         [QPU_A_V8ADDS] = "v8adds",
  54         [QPU_A_V8SUBS] = "v8subs",
  55 };
  56
  57 static const char *qpu_mul_opcodes[] = {
  58         [QPU_M_NOP] = "nop",
  59         [QPU_M_FMUL] = "fmul",
  60         [QPU_M_MUL24] = "mul24",
  61         [QPU_M_V8MULD] = "v8muld",
  62         [QPU_M_V8MIN] = "v8min",
  63         [QPU_M_V8MAX] = "v8max",
  64         [QPU_M_V8ADDS] = "v8adds",
  65         [QPU_M_V8SUBS] = "v8subs",
  66 };
  67
  68 static const char *qpu_sig[] = {
  69         [QPU_SIG_SW_BREAKPOINT] = "sig_brk",
  70         [QPU_SIG_NONE] = "",
  71         [QPU_SIG_THREAD_SWITCH] = "sig_switch",
  72         [QPU_SIG_PROG_END] = "sig_end",
  73         [QPU_SIG_WAIT_FOR_SCOREBOARD] = "sig_wait_score",
  74         [QPU_SIG_SCOREBOARD_UNLOCK] = "sig_unlock_score",
  75         [QPU_SIG_LAST_THREAD_SWITCH] = "sig_thread_switch",
  76         [QPU_SIG_COVERAGE_LOAD] = "sig_coverage_load",
  77         [QPU_SIG_COLOR_LOAD] = "sig_color_load",
  78         [QPU_SIG_COLOR_LOAD_END] = "sig_color_load_end",
  79         [QPU_SIG_LOAD_TMU0] = "load_tmu0",
  80         [QPU_SIG_LOAD_TMU1] = "load_tmu1",
  81         [QPU_SIG_ALPHA_MASK_LOAD] = "sig_alpha_mask_load",
  82         [QPU_SIG_SMALL_IMM] = "sig_small_imm",
  83         [QPU_SIG_LOAD_IMM] = "sig_load_imm",
  84         [QPU_SIG_BRANCH] = "sig_branch",
  85 };
  86
  87 static const char *qpu_pack_mul[] = {
  88         [QPU_PACK_MUL_NOP] = "",
  89         [QPU_PACK_MUL_8888] = "8888",
  90         [QPU_PACK_MUL_8A] = "8a",
  91         [QPU_PACK_MUL_8B] = "8b",
  92         [QPU_PACK_MUL_8C] = "8c",
  93         [QPU_PACK_MUL_8D] = "8d",
  94 };
  95
  96 /* The QPU unpack for A and R4 files can be described the same, it's just that
  97  * the R4 variants are convert-to-float only, with no int support.
  98  */
  99 static const char *qpu_unpack[] = {
 100         [QPU_UNPACK_NOP] = "",
 101         [QPU_UNPACK_16A] = "16a",
 102         [QPU_UNPACK_16B] = "16b",
 103         [QPU_UNPACK_8D_REP] = "8d_rep",
 104         [QPU_UNPACK_8A] = "8a",
 105         [QPU_UNPACK_8B] = "8b",
 106         [QPU_UNPACK_8C] = "8c",
 107         [QPU_UNPACK_8D] = "8d",
 108 };
 109
 110 static const char *special_read_a[] = {
 111         "uni",
 112         NULL,
 113         NULL,
 114         "vary",
 115         NULL,
 116         NULL,
 117         "elem",
 118         "nop",
 119         NULL,
 120         "x_pix",
 121         "ms_flags",
 122         NULL,
 123         NULL,
 124         NULL,
 125         NULL,
 126         NULL,
 127         "vpm_read",
 128         "vpm_ld_busy",
 129         "vpm_ld_wait",
 130         "mutex_acq"
 131 };
 132
 133 static const char *special_read_b[] = {
 134         "uni",
 135         NULL,
 136         NULL,
 137         "vary",
 138         NULL,
 139         NULL,
 140         "qpu",
 141         "nop",
 142         NULL,
 143         "y_pix",
 144         "rev_flag",
 145         NULL,
 146         NULL,
 147         NULL,
 148         NULL,
 149         NULL,
 150         "vpm_read",
 151         "vpm_st_busy",
 152         "vpm_st_wait",
 153         "mutex_acq"
 154 };
 155
 156 /**
 157  * This has the B-file descriptions for register writes.
 158  *
 159  * Since only a couple of regs are different between A and B, the A overrides
 160  * are in get_special_write_desc().
 161  */
 162 static const char *special_write[] = {
 163         [QPU_W_ACC0] = "r0",
 164         [QPU_W_ACC1] = "r1",
 165         [QPU_W_ACC2] = "r2",
 166         [QPU_W_ACC3] = "r3",
 167         [QPU_W_TMU_NOSWAP] = "tmu_noswap",
 168         [QPU_W_ACC5] = "r5",
 169         [QPU_W_HOST_INT] = "host_int",
 170         [QPU_W_NOP] = "nop",
 171         [QPU_W_UNIFORMS_ADDRESS] = "uniforms_addr",
 172         [QPU_W_QUAD_XY] = "quad_y",
 173         [QPU_W_MS_FLAGS] = "ms_flags",
 174         [QPU_W_TLB_STENCIL_SETUP] = "tlb_stencil_setup",
 175         [QPU_W_TLB_Z] = "tlb_z",
 176         [QPU_W_TLB_COLOR_MS] = "tlb_color_ms",
 177         [QPU_W_TLB_COLOR_ALL] = "tlb_color_all",
 178         [QPU_W_VPM] = "vpm",
 179         [QPU_W_VPMVCD_SETUP] = "vw_setup",
 180         [QPU_W_VPM_ADDR] = "vw_addr",
 181         [QPU_W_MUTEX_RELEASE] = "mutex_release",
 182         [QPU_W_SFU_RECIP] = "sfu_recip",
 183         [QPU_W_SFU_RECIPSQRT] = "sfu_recipsqrt",
 184         [QPU_W_SFU_EXP] = "sfu_exp",
 185         [QPU_W_SFU_LOG] = "sfu_log",
 186         [QPU_W_TMU0_S] = "tmu0_s",
 187         [QPU_W_TMU0_T] = "tmu0_t",
 188         [QPU_W_TMU0_R] = "tmu0_r",
 189         [QPU_W_TMU0_B] = "tmu0_b",
 190         [QPU_W_TMU1_S] = "tmu1_s",
 191         [QPU_W_TMU1_T] = "tmu1_t",
 192         [QPU_W_TMU1_R] = "tmu1_r",
 193         [QPU_W_TMU1_B] = "tmu1_b",
 194 };
 195
 196 static const char *qpu_pack_a[] = {
 197         [QPU_PACK_A_NOP] = "",
 198         [QPU_PACK_A_16A] = ".16a",
 199         [QPU_PACK_A_16B] = ".16b",
 200         [QPU_PACK_A_8888] = ".8888",
 201         [QPU_PACK_A_8A] = ".8a",
 202         [QPU_PACK_A_8B] = ".8b",
 203         [QPU_PACK_A_8C] = ".8c",
 204         [QPU_PACK_A_8D] = ".8d",
 205
 206         [QPU_PACK_A_32_SAT] = ".sat",
 207         [QPU_PACK_A_16A_SAT] = ".16a.sat",
 208         [QPU_PACK_A_16B_SAT] = ".16b.sat",
 209         [QPU_PACK_A_8888_SAT] = ".8888.sat",
 210         [QPU_PACK_A_8A_SAT] = ".8a.sat",
 211         [QPU_PACK_A_8B_SAT] = ".8b.sat",
 212         [QPU_PACK_A_8C_SAT] = ".8c.sat",
 213         [QPU_PACK_A_8D_SAT] = ".8d.sat",
 214 };
 215
 216 static const char *qpu_condflags[] = {
 217         [QPU_COND_NEVER] = ".never",
 218         [QPU_COND_ALWAYS] = "",
 219         [QPU_COND_ZS] = ".zs",
 220         [QPU_COND_ZC] = ".zc",
 221         [QPU_COND_NS] = ".ns",
 222         [QPU_COND_NC] = ".nc",
 223         [QPU_COND_CS] = ".cs",
 224         [QPU_COND_CC] = ".cc",
 225 };
 226
 227 #define DESC(array, index)                                        \
 228         ((index >= ARRAY_SIZE(array) || !(array)[index]) ?         \
 229          "???" : (array)[index])
 230
 231 static const char *
 232 get_special_write_desc(int reg, bool is_a)
 233 {
 234         if (is_a) {
 235                 switch (reg) {
 236                 case QPU_W_QUAD_XY:
 237                         return "quad_x";
 238                 case QPU_W_VPMVCD_SETUP:
 239                         return "vr_setup";
 240                 case QPU_W_VPM_ADDR:
 241                         return "vr_addr";
 242                 }
 243         }
 244
 245         return special_write[reg];
 246 }
 247
 248 void
 249 vc4_qpu_disasm_pack_mul(FILE *out, uint32_t pack)
 250 {
 251         fprintf(out, ".%s", DESC(qpu_pack_mul, pack));
 252 }
 253
 254 void
 255 vc4_qpu_disasm_pack_a(FILE *out, uint32_t pack)
 256 {
 257         fprintf(out, "%s", DESC(qpu_pack_a, pack));
 258 }
 259
 260 void
 261 vc4_qpu_disasm_unpack(FILE *out, uint32_t unpack)
 262 {
 263         if (unpack != QPU_UNPACK_NOP)
 264                 fprintf(out, ".%s", DESC(qpu_unpack, unpack));
 265 }
 266
 267 static void
 268 print_alu_dst(uint64_t inst, bool is_mul)
 269 {
 270         bool is_a = is_mul == ((inst & QPU_WS) != 0);
 271         uint32_t waddr = (is_mul ?
 272                           QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
 273                           QPU_GET_FIELD(inst, QPU_WADDR_ADD));
 274         const char *file = is_a ? "a" : "b";
 275         uint32_t pack = QPU_GET_FIELD(inst, QPU_PACK);
 276
 277         if (waddr <= 31)
 278                 fprintf(stderr, "r%s%d", file, waddr);
 279         else if (get_special_write_desc(waddr, is_a))
 280                 fprintf(stderr, "%s", get_special_write_desc(waddr, is_a));
 281         else
 282                 fprintf(stderr, "%s%d?", file, waddr);
 283
 284         if (is_mul && (inst & QPU_PM)) {
 285                 vc4_qpu_disasm_pack_mul(stderr, pack);
 286         } else if (is_a && !(inst & QPU_PM)) {
 287                 vc4_qpu_disasm_pack_a(stderr, pack);
 288         }
 289 }
 290
 291 static void
 292 print_alu_src(uint64_t inst, uint32_t mux)
 293 {
 294         bool is_a = mux != QPU_MUX_B;
 295         const char *file = is_a ? "a" : "b";
 296         uint32_t raddr = (is_a ?
 297                           QPU_GET_FIELD(inst, QPU_RADDR_A) :
 298                           QPU_GET_FIELD(inst, QPU_RADDR_B));
 299         uint32_t unpack = QPU_GET_FIELD(inst, QPU_UNPACK);
 300
 301         if (mux <= QPU_MUX_R5)
 302                 fprintf(stderr, "r%d", mux);
 303         else if (!is_a &&
 304                  QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_SMALL_IMM) {
 305                 uint32_t si = QPU_GET_FIELD(inst, QPU_SMALL_IMM);
 306                 if (si <= 15)
 307                         fprintf(stderr, "%d", si);
 308                 else if (si <= 31)
 309                         fprintf(stderr, "%d", -16 + (si - 16));
 310                 else if (si <= 39)
 311                         fprintf(stderr, "%.1f", (float)(1 << (si - 32)));
 312                 else if (si <= 47)
 313                         fprintf(stderr, "%f", 1.0f / (1 << (48 - si)));
 314                 else
 315                         fprintf(stderr, "<bad imm %d>", si);
 316         } else if (raddr <= 31)
 317                 fprintf(stderr, "r%s%d", file, raddr);
 318         else {
 319                 if (is_a)
 320                         fprintf(stderr, "%s", DESC(special_read_a, raddr - 32));
 321                 else
 322                         fprintf(stderr, "%s", DESC(special_read_b, raddr - 32));
 323         }
 324
 325         if (((mux == QPU_MUX_A && !(inst & QPU_PM)) ||
 326              (mux == QPU_MUX_R4 && (inst & QPU_PM)))) {
 327                 vc4_qpu_disasm_unpack(stderr, unpack);
 328         }
 329 }
 330
 331 static void
 332 print_add_op(uint64_t inst)
 333 {
 334         uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
 335         uint32_t cond = QPU_GET_FIELD(inst, QPU_COND_ADD);
 336         bool is_mov = (op_add == QPU_A_OR &&
 337                        QPU_GET_FIELD(inst, QPU_ADD_A) ==
 338                        QPU_GET_FIELD(inst, QPU_ADD_B));
 339
 340         fprintf(stderr, "%s%s%s ",
 341                 is_mov ? "mov" : DESC(qpu_add_opcodes, op_add),
 342                 ((inst & QPU_SF) && op_add != QPU_A_NOP) ? ".sf" : "",
 343                 op_add != QPU_A_NOP ? DESC(qpu_condflags, cond) : "");
 344
 345         print_alu_dst(inst, false);
 346         fprintf(stderr, ", ");
 347
 348         print_alu_src(inst, QPU_GET_FIELD(inst, QPU_ADD_A));
 349
 350         if (!is_mov) {
 351                 fprintf(stderr, ", ");
 352
 353                 print_alu_src(inst, QPU_GET_FIELD(inst, QPU_ADD_B));
 354         }
 355 }
 356
 357 static void
 358 print_mul_op(uint64_t inst)
 359 {
 360         uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
 361         uint32_t op_mul = QPU_GET_FIELD(inst, QPU_OP_MUL);
 362         uint32_t cond = QPU_GET_FIELD(inst, QPU_COND_MUL);
 363         bool is_mov = (op_mul == QPU_M_V8MIN &&
 364                        QPU_GET_FIELD(inst, QPU_MUL_A) ==
 365                        QPU_GET_FIELD(inst, QPU_MUL_B));
 366
 367         fprintf(stderr, "%s%s%s ",
 368                 is_mov ? "mov" : DESC(qpu_mul_opcodes, op_mul),
 369                 ((inst & QPU_SF) && op_add == QPU_A_NOP) ? ".sf" : "",
 370                 op_mul != QPU_M_NOP ? DESC(qpu_condflags, cond) : "");
 371
 372         print_alu_dst(inst, true);
 373         fprintf(stderr, ", ");
 374
 375         print_alu_src(inst, QPU_GET_FIELD(inst, QPU_MUL_A));
 376
 377         if (!is_mov) {
 378                 fprintf(stderr, ", ");
 379                 print_alu_src(inst, QPU_GET_FIELD(inst, QPU_MUL_B));
 380         }
 381 }
 382
 383 static void
 384 print_load_imm(uint64_t inst)
 385 {
 386         uint32_t imm = inst;
 387         uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
 388         uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
 389         uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
 390         uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL);
 391
 392         fprintf(stderr, "load_imm ");
 393         print_alu_dst(inst, false);
 394         fprintf(stderr, "%s, ", (waddr_add != QPU_W_NOP ?
 395                                  DESC(qpu_condflags, cond_add) : ""));
 396         print_alu_dst(inst, true);
 397         fprintf(stderr, "%s, ", (waddr_mul != QPU_W_NOP ?
 398                                  DESC(qpu_condflags, cond_mul) : ""));
 399         fprintf(stderr, "0x%08x (%f)", imm, uif(imm));
 400 }
 401
 402 void
 403 vc4_qpu_disasm(const uint64_t *instructions, int num_instructions)
 404 {
 405         for (int i = 0; i < num_instructions; i++) {
 406                 uint64_t inst = instructions[i];
 407                 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
 408
 409                 switch (sig) {
 410                 case QPU_SIG_BRANCH:
 411                         fprintf(stderr, "branch");
 412                         break;
 413                 case QPU_SIG_LOAD_IMM:
 414                         print_load_imm(inst);
 415                         break;
 416                 default:
 417                         if (sig != QPU_SIG_NONE)
 418                                 fprintf(stderr, "%s ", DESC(qpu_sig, sig));
 419                         print_add_op(inst);
 420                         fprintf(stderr, " ; ");
 421                         print_mul_op(inst);
 422                         break;
 423                 }
 424
 425                 if (num_instructions != 1)
 426                         fprintf(stderr, "\n");
 427         }
 428 }