src/broadcom/compiler/vir_to_qpu.c

   1 /*
   2  * Copyright © 2016 Broadcom
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "compiler/v3d_compiler.h"
  25 #include "qpu/qpu_instr.h"
  26 #include "qpu/qpu_disasm.h"
  27
  28 static inline struct qpu_reg
  29 qpu_reg(int index)
  30 {
  31         struct qpu_reg reg = {
  32                 .magic = false,
  33                 .index = index,
  34         };
  35         return reg;
  36 }
  37
  38 static inline struct qpu_reg
  39 qpu_magic(enum v3d_qpu_waddr waddr)
  40 {
  41         struct qpu_reg reg = {
  42                 .magic = true,
  43                 .index = waddr,
  44         };
  45         return reg;
  46 }
  47
  48 static inline struct qpu_reg
  49 qpu_acc(int acc)
  50 {
  51         return qpu_magic(V3D_QPU_WADDR_R0 + acc);
  52 }
  53
  54 struct v3d_qpu_instr
  55 v3d_qpu_nop(void)
  56 {
  57         struct v3d_qpu_instr instr = {
  58                 .type = V3D_QPU_INSTR_TYPE_ALU,
  59                 .alu = {
  60                         .add = {
  61                                 .op = V3D_QPU_A_NOP,
  62                                 .waddr = V3D_QPU_WADDR_NOP,
  63                                 .magic_write = true,
  64                         },
  65                         .mul = {
  66                                 .op = V3D_QPU_M_NOP,
  67                                 .waddr = V3D_QPU_WADDR_NOP,
  68                                 .magic_write = true,
  69                         },
  70                 }
  71         };
  72
  73         return instr;
  74 }
  75
  76 static struct qinst *
  77 vir_nop(void)
  78 {
  79         struct qreg undef = vir_nop_reg();
  80         struct qinst *qinst = vir_add_inst(V3D_QPU_A_NOP, undef, undef, undef);
  81
  82         return qinst;
  83 }
  84
  85 static struct qinst *
  86 new_qpu_nop_before(struct qinst *inst)
  87 {
  88         struct qinst *q = vir_nop();
  89
  90         list_addtail(&q->link, &inst->link);
  91
  92         return q;
  93 }
  94
  95 static void
  96 new_ldunif_instr(struct qinst *inst, int i)
  97 {
  98         struct qinst *ldunif = new_qpu_nop_before(inst);
  99
 100         ldunif->qpu.sig.ldunif = true;
 101         assert(inst->src[i].file == QFILE_UNIF);
 102         ldunif->uniform = inst->src[i].index;
 103 }
 104
 105 /**
 106  * Allocates the src register (accumulator or register file) into the RADDR
 107  * fields of the instruction.
 108  */
 109 static void
 110 set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
 111 {
 112         if (src.smimm) {
 113                 assert(instr->sig.small_imm);
 114                 *mux = V3D_QPU_MUX_B;
 115                 return;
 116         }
 117
 118         if (src.magic) {
 119                 assert(src.index >= V3D_QPU_WADDR_R0 &&
 120                        src.index <= V3D_QPU_WADDR_R5);
 121                 *mux = src.index - V3D_QPU_WADDR_R0 + V3D_QPU_MUX_R0;
 122                 return;
 123         }
 124
 125         if (instr->alu.add.a != V3D_QPU_MUX_A &&
 126             instr->alu.add.b != V3D_QPU_MUX_A &&
 127             instr->alu.mul.a != V3D_QPU_MUX_A &&
 128             instr->alu.mul.b != V3D_QPU_MUX_A) {
 129                 instr->raddr_a = src.index;
 130                 *mux = V3D_QPU_MUX_A;
 131         } else {
 132                 if (instr->raddr_a == src.index) {
 133                         *mux = V3D_QPU_MUX_A;
 134                 } else {
 135                         assert(!(instr->alu.add.a == V3D_QPU_MUX_B &&
 136                                  instr->alu.add.b == V3D_QPU_MUX_B &&
 137                                  instr->alu.mul.a == V3D_QPU_MUX_B &&
 138                                  instr->alu.mul.b == V3D_QPU_MUX_B) ||
 139                                src.index == instr->raddr_b);
 140
 141                         instr->raddr_b = src.index;
 142                         *mux = V3D_QPU_MUX_B;
 143                 }
 144         }
 145 }
 146
 147 static bool
 148 is_no_op_mov(struct qinst *qinst)
 149 {
 150         static const struct v3d_qpu_sig no_sig = {0};
 151
 152         /* Make sure it's just a lone MOV. */
 153         if (qinst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
 154             qinst->qpu.alu.mul.op != V3D_QPU_M_MOV ||
 155             qinst->qpu.alu.add.op != V3D_QPU_A_NOP ||
 156             memcmp(&qinst->qpu.sig, &no_sig, sizeof(no_sig)) != 0) {
 157                 return false;
 158         }
 159
 160         /* Check if it's a MOV from a register to itself. */
 161         enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr;
 162         if (qinst->qpu.alu.mul.magic_write) {
 163                 if (waddr < V3D_QPU_WADDR_R0 || waddr > V3D_QPU_WADDR_R4)
 164                         return false;
 165
 166                 if (qinst->qpu.alu.mul.a !=
 167                     V3D_QPU_MUX_R0 + (waddr - V3D_QPU_WADDR_R0)) {
 168                         return false;
 169                 }
 170         } else {
 171                 int raddr;
 172
 173                 switch (qinst->qpu.alu.mul.a) {
 174                 case V3D_QPU_MUX_A:
 175                         raddr = qinst->qpu.raddr_a;
 176                         break;
 177                 case V3D_QPU_MUX_B:
 178                         raddr = qinst->qpu.raddr_b;
 179                         break;
 180                 default:
 181                         return false;
 182                 }
 183                 if (raddr != waddr)
 184                         return false;
 185         }
 186
 187         /* No packing or flags updates, or we need to execute the
 188          * instruction.
 189          */
 190         if (qinst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE ||
 191             qinst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE ||
 192             qinst->qpu.flags.mc != V3D_QPU_COND_NONE ||
 193             qinst->qpu.flags.mpf != V3D_QPU_PF_NONE ||
 194             qinst->qpu.flags.muf != V3D_QPU_UF_NONE) {
 195                 return false;
 196         }
 197
 198         return true;
 199 }
 200
 201 static void
 202 v3d_generate_code_block(struct v3d_compile *c,
 203                         struct qblock *block,
 204                         struct qpu_reg *temp_registers)
 205 {
 206         int last_vpm_read_index = -1;
 207
 208         vir_for_each_inst_safe(qinst, block) {
 209 #if 0
 210                 fprintf(stderr, "translating qinst to qpu: ");
 211                 vir_dump_inst(c, qinst);
 212                 fprintf(stderr, "\n");
 213 #endif
 214
 215                 struct qinst *temp;
 216
 217                 if (vir_has_implicit_uniform(qinst)) {
 218                         int src = vir_get_implicit_uniform_src(qinst);
 219                         assert(qinst->src[src].file == QFILE_UNIF);
 220                         qinst->uniform = qinst->src[src].index;
 221                         c->num_uniforms++;
 222                 }
 223
 224                 int nsrc = vir_get_non_sideband_nsrc(qinst);
 225                 struct qpu_reg src[ARRAY_SIZE(qinst->src)];
 226                 bool emitted_ldunif = false;
 227                 for (int i = 0; i < nsrc; i++) {
 228                         int index = qinst->src[i].index;
 229                         switch (qinst->src[i].file) {
 230                         case QFILE_REG:
 231                                 src[i] = qpu_reg(qinst->src[i].index);
 232                                 break;
 233                         case QFILE_MAGIC:
 234                                 src[i] = qpu_magic(qinst->src[i].index);
 235                                 break;
 236                         case QFILE_NULL:
 237                         case QFILE_LOAD_IMM:
 238                                 src[i] = qpu_acc(0);
 239                                 break;
 240                         case QFILE_TEMP:
 241                                 src[i] = temp_registers[index];
 242                                 break;
 243                         case QFILE_UNIF:
 244                                 /* XXX perf: If the last ldunif we emitted was
 245                                  * the same uniform value, skip it.  Common
 246                                  * for multop/umul24 sequences.
 247                                  */
 248                                 if (!emitted_ldunif) {
 249                                         new_ldunif_instr(qinst, i);
 250                                         c->num_uniforms++;
 251                                         emitted_ldunif = true;
 252                                 }
 253
 254                                 src[i] = qpu_acc(5);
 255                                 break;
 256                         case QFILE_SMALL_IMM:
 257                                 src[i].smimm = true;
 258                                 break;
 259
 260                         case QFILE_VPM:
 261                                 assert((int)qinst->src[i].index >=
 262                                        last_vpm_read_index);
 263                                 (void)last_vpm_read_index;
 264                                 last_vpm_read_index = qinst->src[i].index;
 265
 266                                 temp = new_qpu_nop_before(qinst);
 267                                 temp->qpu.sig.ldvpm = true;
 268
 269                                 src[i] = qpu_acc(3);
 270                                 break;
 271
 272                         case QFILE_TLB:
 273                         case QFILE_TLBU:
 274                                 unreachable("bad vir src file");
 275                         }
 276                 }
 277
 278                 struct qpu_reg dst;
 279                 switch (qinst->dst.file) {
 280                 case QFILE_NULL:
 281                         dst = qpu_magic(V3D_QPU_WADDR_NOP);
 282                         break;
 283
 284                 case QFILE_REG:
 285                         dst = qpu_reg(qinst->dst.index);
 286                         break;
 287
 288                 case QFILE_MAGIC:
 289                         dst = qpu_magic(qinst->dst.index);
 290                         break;
 291
 292                 case QFILE_TEMP:
 293                         dst = temp_registers[qinst->dst.index];
 294                         break;
 295
 296                 case QFILE_VPM:
 297                         dst = qpu_magic(V3D_QPU_WADDR_VPM);
 298                         break;
 299
 300                 case QFILE_TLB:
 301                         dst = qpu_magic(V3D_QPU_WADDR_TLB);
 302                         break;
 303
 304                 case QFILE_TLBU:
 305                         dst = qpu_magic(V3D_QPU_WADDR_TLBU);
 306                         break;
 307
 308                 case QFILE_UNIF:
 309                 case QFILE_SMALL_IMM:
 310                 case QFILE_LOAD_IMM:
 311                         assert(!"not reached");
 312                         break;
 313                 }
 314
 315                 if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
 316                         if (v3d_qpu_sig_writes_address(c->devinfo,
 317                                                        &qinst->qpu.sig)) {
 318                                 assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
 319                                 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
 320
 321                                 qinst->qpu.sig_addr = dst.index;
 322                                 qinst->qpu.sig_magic = dst.magic;
 323                         } else if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {
 324                                 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
 325                                 if (nsrc >= 1) {
 326                                         set_src(&qinst->qpu,
 327                                                 &qinst->qpu.alu.add.a, src[0]);
 328                                 }
 329                                 if (nsrc >= 2) {
 330                                         set_src(&qinst->qpu,
 331                                                 &qinst->qpu.alu.add.b, src[1]);
 332                                 }
 333
 334                                 qinst->qpu.alu.add.waddr = dst.index;
 335                                 qinst->qpu.alu.add.magic_write = dst.magic;
 336                         } else {
 337                                 if (nsrc >= 1) {
 338                                         set_src(&qinst->qpu,
 339                                                 &qinst->qpu.alu.mul.a, src[0]);
 340                                 }
 341                                 if (nsrc >= 2) {
 342                                         set_src(&qinst->qpu,
 343                                                 &qinst->qpu.alu.mul.b, src[1]);
 344                                 }
 345
 346                                 qinst->qpu.alu.mul.waddr = dst.index;
 347                                 qinst->qpu.alu.mul.magic_write = dst.magic;
 348
 349                                 if (is_no_op_mov(qinst)) {
 350                                         vir_remove_instruction(c, qinst);
 351                                         continue;
 352                                 }
 353                         }
 354                 } else {
 355                         assert(qinst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH);
 356                 }
 357         }
 358 }
 359
 360 static bool
 361 reads_uniform(const struct v3d_device_info *devinfo, uint64_t instruction)
 362 {
 363         struct v3d_qpu_instr qpu;
 364         MAYBE_UNUSED bool ok = v3d_qpu_instr_unpack(devinfo, instruction, &qpu);
 365         assert(ok);
 366
 367         if (qpu.sig.ldunif ||
 368             qpu.sig.ldunifarf ||
 369             qpu.sig.wrtmuc) {
 370                 return true;
 371         }
 372
 373         if (qpu.type == V3D_QPU_INSTR_TYPE_BRANCH)
 374                 return true;
 375
 376         if (qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
 377                 if (qpu.alu.add.magic_write &&
 378                     v3d_qpu_magic_waddr_loads_unif(qpu.alu.add.waddr)) {
 379                         return true;
 380                 }
 381
 382                 if (qpu.alu.mul.magic_write &&
 383                     v3d_qpu_magic_waddr_loads_unif(qpu.alu.mul.waddr)) {
 384                         return true;
 385                 }
 386         }
 387
 388         return false;
 389 }
 390
 391 static void
 392 v3d_dump_qpu(struct v3d_compile *c)
 393 {
 394         fprintf(stderr, "%s prog %d/%d QPU:\n",
 395                 vir_get_stage_name(c),
 396                 c->program_id, c->variant_id);
 397
 398         int next_uniform = 0;
 399         for (int i = 0; i < c->qpu_inst_count; i++) {
 400                 const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]);
 401                 fprintf(stderr, "0x%016"PRIx64" %s", c->qpu_insts[i], str);
 402
 403                 /* We can only do this on 4.x, because we're not tracking TMU
 404                  * implicit uniforms here on 3.x.
 405                  */
 406                 if (c->devinfo->ver >= 40 &&
 407                     reads_uniform(c->devinfo, c->qpu_insts[i])) {
 408                         fprintf(stderr, " (");
 409                         vir_dump_uniform(c->uniform_contents[next_uniform],
 410                                          c->uniform_data[next_uniform]);
 411                         fprintf(stderr, ")");
 412                         next_uniform++;
 413                 }
 414                 fprintf(stderr, "\n");
 415                 ralloc_free((void *)str);
 416         }
 417
 418         /* Make sure our dumping lined up. */
 419         if (c->devinfo->ver >= 40)
 420                 assert(next_uniform == c->num_uniforms);
 421
 422         fprintf(stderr, "\n");
 423 }
 424
 425 void
 426 v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers)
 427 {
 428         /* Reset the uniform count to how many will be actually loaded by the
 429          * generated QPU code.
 430          */
 431         c->num_uniforms = 0;
 432
 433         vir_for_each_block(block, c)
 434                 v3d_generate_code_block(c, block, temp_registers);
 435
 436         uint32_t cycles = v3d_qpu_schedule_instructions(c);
 437
 438         c->qpu_insts = rzalloc_array(c, uint64_t, c->qpu_inst_count);
 439         int i = 0;
 440         vir_for_each_inst_inorder(inst, c) {
 441                 bool ok = v3d_qpu_instr_pack(c->devinfo, &inst->qpu,
 442                                              &c->qpu_insts[i++]);
 443                 if (!ok) {
 444                         fprintf(stderr, "Failed to pack instruction:\n");
 445                         vir_dump_inst(c, inst);
 446                         fprintf(stderr, "\n");
 447                         c->failed = true;
 448                         return;
 449                 }
 450         }
 451         assert(i == c->qpu_inst_count);
 452
 453         if (V3D_DEBUG & V3D_DEBUG_SHADERDB) {
 454                 fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d instructions\n",
 455                         vir_get_stage_name(c),
 456                         c->program_id, c->variant_id,
 457                         c->qpu_inst_count);
 458         }
 459
 460         /* The QPU cycle estimates are pretty broken (see waddr_latency()), so
 461          * don't report them for now.
 462          */
 463         if (false) {
 464                 fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d estimated cycles\n",
 465                         vir_get_stage_name(c),
 466                         c->program_id, c->variant_id,
 467                         cycles);
 468         }
 469
 470         if (V3D_DEBUG & (V3D_DEBUG_QPU |
 471                          v3d_debug_flag_for_shader_stage(c->s->info.stage))) {
 472                 v3d_dump_qpu(c);
 473         }
 474
 475         qpu_validate(c);
 476
 477         free(temp_registers);
 478 }