src/panfrost/bifrost/bifrost_compile.c

   1 /*
   2  * Copyright (C) 2019 Ryan Houdek <Sonicadvance1@gmail.com>
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  */
  23
  24 #include "compiler/nir/nir_builder.h"
  25 #include "bifrost_compile.h"
  26 #include "bifrost_opts.h"
  27 #include "bifrost_sched.h"
  28 #include "compiler_defines.h"
  29 #include "disassemble.h"
  30 #include "bifrost_print.h"
  31
  32 #define BI_DEBUG
  33
  34 static int
  35 glsl_type_size(const struct glsl_type *type, bool bindless)
  36 {
  37         return glsl_count_attribute_slots(type, false);
  38 }
  39
  40 static void
  41 optimize_nir(nir_shader *nir)
  42 {
  43         bool progress;
  44
  45         NIR_PASS_V(nir, nir_lower_io, nir_var_all, glsl_type_size, 0);
  46         NIR_PASS(progress, nir, nir_lower_regs_to_ssa);
  47
  48         do {
  49                 progress = false;
  50
  51                 NIR_PASS(progress, nir, nir_lower_io, nir_var_all, glsl_type_size, 0);
  52
  53                 NIR_PASS(progress, nir, nir_lower_var_copies);
  54                 NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
  55
  56                 NIR_PASS(progress, nir, nir_copy_prop);
  57                 NIR_PASS(progress, nir, nir_opt_constant_folding);
  58
  59                 NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
  60                 NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL, NULL);
  61                 NIR_PASS(progress, nir, nir_opt_if, true);
  62
  63         } while (progress);
  64
  65         NIR_PASS(progress, nir, nir_copy_prop);
  66         NIR_PASS(progress, nir, nir_opt_dce);
  67 }
  68
  69 static unsigned
  70 nir_src_index(compiler_context *ctx, nir_src *src)
  71 {
  72         if (src->is_ssa)
  73                 return src->ssa->index;
  74         else
  75                 return ctx->func->impl->ssa_alloc + src->reg.reg->index;
  76 }
  77
  78 static unsigned
  79 nir_dest_index(compiler_context *ctx, nir_dest *dst)
  80 {
  81         if (dst->is_ssa)
  82                 return dst->ssa.index;
  83         else
  84                 return ctx->func->impl->ssa_alloc + dst->reg.reg->index;
  85 }
  86
  87 static unsigned
  88 nir_alu_src_index(compiler_context *ctx, nir_alu_src *src)
  89 {
  90         return nir_src_index(ctx, &src->src);
  91 }
  92
  93 struct bifrost_instruction *
  94 mir_alloc_ins(struct bifrost_instruction instr)
  95 {
  96         struct bifrost_instruction *heap_ins = malloc(sizeof(instr));
  97         memcpy(heap_ins, &instr, sizeof(instr));
  98         return heap_ins;
  99 }
 100
 101 static void
 102 emit_mir_instruction(struct compiler_context *ctx, struct bifrost_instruction instr)
 103 {
 104         list_addtail(&(mir_alloc_ins(instr))->link, &ctx->current_block->instructions);
 105 }
 106
 107 static void
 108 bifrost_block_add_successor(bifrost_block *block, bifrost_block *successor)
 109 {
 110         assert(block->num_successors < ARRAY_SIZE(block->successors));
 111         block->successors[block->num_successors++] = successor;
 112 }
 113
 114 static void
 115 emit_load_const(struct compiler_context *ctx, nir_load_const_instr *instr)
 116 {
 117         nir_ssa_def def = instr->def;
 118
 119         float *v = ralloc_array(NULL, float, 1);
 120         nir_const_value_to_array(v, instr->value, instr->def.num_components, f32);
 121         _mesa_hash_table_u64_insert(ctx->ssa_constants, def.index + 1, v);
 122 }
 123
 124 static uint32_t
 125 alloc_mir_temp(struct compiler_context *ctx)
 126 {
 127         return SSA_TEMP_VALUE(ctx->mir_temp++);
 128 }
 129
 130 static uint32_t
 131 emit_ld_vary_addr_constant(struct compiler_context *ctx, uint32_t location)
 132 {
 133         // LD_VAR_ADDR.f32 {R0, T1}, R61, R62, location:1, R12
 134         // ...
 135         // ST_VAR.v4 T1, R12, R13, R14, R4
 136
 137         // R61-R62 is filled with information needed for varying interpolation
 138         // This loads a vec3 with the information that ST_VAR needs to work
 139
 140         uint32_t mir_temp_location = alloc_mir_temp(ctx);
 141         // This instruction loads a vec3 starting from the initial register
 142         struct bifrost_instruction instr = {
 143                 .op = op_ld_var_addr,
 144                 .dest_components = 3,
 145                 .ssa_args = {
 146                         .dest = mir_temp_location,
 147                         .src0 = SSA_FIXED_REGISTER(61),
 148                         .src1 = SSA_FIXED_REGISTER(62),
 149                         .src2 = SSA_INVALID_VALUE,
 150                         .src3 = SSA_INVALID_VALUE,
 151                 },
 152                 .literal_args[0] = location,
 153         };
 154         emit_mir_instruction(ctx, instr);
 155
 156         return mir_temp_location;
 157 }
 158
 159 // XXX: Doesn't support duplicated values in the components!
 160 // RA WILL fail!
 161 static void
 162 emit_create_vector(struct compiler_context *ctx, unsigned dest, unsigned num_comps, uint32_t *comps)
 163 {
 164         assert(num_comps <= 4 && "Can't make a vector larger than 4 components");
 165
 166         // This instruction loads a vec3 starting from the initial register
 167         struct bifrost_instruction instr = {
 168                 .op = op_create_vector,
 169                 .dest_components = num_comps,
 170                 .ssa_args = {
 171                         .dest = dest,
 172                 }
 173         };
 174
 175         uint32_t *srcs[4] = {
 176                 &instr.ssa_args.src0,
 177                 &instr.ssa_args.src1,
 178                 &instr.ssa_args.src2,
 179                 &instr.ssa_args.src3,
 180         };
 181
 182         for (unsigned i = 0; i < 4; ++i) {
 183                 if (i < num_comps)
 184                         *srcs[i] = comps[i];
 185                 else
 186                         *srcs[i] = SSA_INVALID_VALUE;
 187         }
 188         emit_mir_instruction(ctx, instr);
 189 }
 190
 191 static uint32_t
 192 emit_extract_vector_element(struct compiler_context *ctx, unsigned ssa_vector, unsigned element)
 193 {
 194         uint32_t mir_temp_location = alloc_mir_temp(ctx);
 195         // This instruction loads a vec3 starting from the initial register
 196         struct bifrost_instruction instr = {
 197                 .op = op_extract_element,
 198                 .dest_components = 1,
 199                 .ssa_args = {
 200                         .dest = mir_temp_location,
 201                         .src0 = ssa_vector,
 202                         .src1 = SSA_INVALID_VALUE,
 203                         .src2 = SSA_INVALID_VALUE,
 204                         .src3 = SSA_INVALID_VALUE,
 205                 },
 206                 .literal_args[0] = element,
 207         };
 208         emit_mir_instruction(ctx, instr);
 209
 210         return mir_temp_location;
 211 }
 212 static uint32_t
 213 emit_movi(struct compiler_context *ctx, uint32_t literal)
 214 {
 215         uint32_t mir_temp_location = alloc_mir_temp(ctx);
 216         // This instruction loads a vec3 starting from the initial register
 217         struct bifrost_instruction instr = {
 218                 .op = op_movi,
 219                 .dest_components = 1,
 220                 .ssa_args = {
 221                         .dest = mir_temp_location,
 222                         .src0 = SSA_INVALID_VALUE,
 223                         .src1 = SSA_INVALID_VALUE,
 224                         .src2 = SSA_INVALID_VALUE,
 225                         .src3 = SSA_INVALID_VALUE,
 226                 },
 227                 .literal_args[0] = literal,
 228         };
 229         emit_mir_instruction(ctx, instr);
 230
 231         return mir_temp_location;
 232 }
 233
 234 static unsigned
 235 nir_alu_src_index_scalar(compiler_context *ctx, nir_alu_instr *nir_instr, unsigned src)
 236 {
 237         // NIR uses a combination of single channels plus swizzles to determine which component is pulled out of a source
 238         for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; c++) {
 239                 if (!nir_alu_instr_channel_used(nir_instr, src, c))
 240                         continue;
 241                 // Pull the swizzle from this element that is active and use it as the source
 242                 unsigned element = nir_instr->src[src].swizzle[c];
 243
 244                 // Create an op that extracts an element from a vector
 245                 return emit_extract_vector_element(ctx, nir_alu_src_index(ctx, &nir_instr->src[src]), element);
 246         }
 247         assert(0);
 248         return 0;
 249 }
 250
 251 static void
 252 emit_intrinsic(struct compiler_context *ctx, nir_intrinsic_instr *nir_instr)
 253 {
 254         nir_const_value *const_offset;
 255         unsigned offset, reg;
 256
 257         switch (nir_instr->intrinsic) {
 258         case nir_intrinsic_load_ubo: {
 259                 nir_const_value *location = nir_src_as_const_value(nir_instr->src[0]);
 260                 const_offset = nir_src_as_const_value(nir_instr->src[1]);
 261                 assert (location && "no indirect ubo selection");
 262                 assert (const_offset && "no indirect inputs");
 263
 264                 enum bifrost_ir_ops op;
 265
 266                 // load_ubo <UBO binding>, <byte offset>
 267                 // ld_ubo <byte offset>, <UBO binding>
 268                 switch (nir_dest_num_components(nir_instr->dest)) {
 269                 case 1:
 270                         op = op_ld_ubo_v1;
 271                         break;
 272                 case 2:
 273                         op = op_ld_ubo_v2;
 274                         break;
 275                 case 3:
 276                         op = op_ld_ubo_v3;
 277                         break;
 278                 case 4:
 279                         op = op_ld_ubo_v4;
 280                         break;
 281                 default:
 282                         assert(0);
 283                         break;
 284                 }
 285
 286                 reg = nir_dest_index(ctx, &nir_instr->dest);
 287                 struct bifrost_instruction instr = {
 288                         .op = op,
 289                         .dest_components = nir_dest_num_components(nir_instr->dest),
 290                         .ssa_args = {
 291                                 .dest = reg,
 292                                 .src0 = SSA_INVALID_VALUE,
 293                                 .src1 = SSA_INVALID_VALUE,
 294                                 .src2 = SSA_INVALID_VALUE,
 295                                 .src3 = SSA_INVALID_VALUE,
 296                         },
 297                         .literal_args[0] = nir_src_as_uint(nir_instr->src[1]),
 298                         .literal_args[1] = nir_src_as_uint(nir_instr->src[0]),
 299                 };
 300
 301                 emit_mir_instruction(ctx, instr);
 302                 break;
 303         }
 304         case nir_intrinsic_store_ssbo: {
 305                 nir_const_value *location = nir_src_as_const_value(nir_instr->src[1]);
 306                 const_offset = nir_src_as_const_value(nir_instr->src[2]);
 307                 assert (location && "no indirect ubo selection");
 308                 assert (const_offset && "no indirect inputs");
 309
 310                 // store_ssbo <Value>, <binding>, <offset>
 311                 // store_vN <Addr>, <Value>
 312                 reg = nir_src_index(ctx, &nir_instr->src[0]);
 313
 314                 enum bifrost_ir_ops op;
 315                 switch (nir_src_num_components(nir_instr->src[0])) {
 316                 case 1:
 317                         op = op_store_v1;
 318                         break;
 319                 case 2:
 320                         op = op_store_v2;
 321                         break;
 322                 case 3:
 323                         op = op_store_v3;
 324                         break;
 325                 case 4:
 326                         op = op_store_v4;
 327                         break;
 328                 default:
 329                         assert(0);
 330                         break;
 331                 }
 332
 333                 struct bifrost_instruction instr = {
 334                         .op = op,
 335                         .dest_components = 0,
 336                         .ssa_args = {
 337                                 .dest = SSA_INVALID_VALUE,
 338                                 .src0 = reg,
 339                                 .src1 = SSA_INVALID_VALUE,
 340                                 .src2 = SSA_INVALID_VALUE,
 341                                 .src3 = SSA_INVALID_VALUE,
 342                         },
 343                         .literal_args[0] = nir_src_as_uint(nir_instr->src[2]),
 344                 };
 345                 emit_mir_instruction(ctx, instr);
 346                 break;
 347         }
 348         case nir_intrinsic_load_uniform:
 349                 offset = nir_intrinsic_base(nir_instr);
 350
 351                 if (nir_src_is_const(nir_instr->src[0])) {
 352                         offset += nir_src_as_uint(nir_instr->src[0]);
 353                 } else {
 354                         assert(0 && "Can't handle indirect load_uniform");
 355                 }
 356
 357                 reg = nir_dest_index(ctx, &nir_instr->dest);
 358
 359                 unsigned num_components = nir_dest_num_components(nir_instr->dest);
 360                 if (num_components == 1) {
 361                         struct bifrost_instruction instr = {
 362                                 .op = op_mov,
 363                                 .dest_components = 1,
 364                                 .ssa_args = {
 365                                         .dest = reg,
 366                                         .src0 = SSA_FIXED_UREGISTER(offset),
 367                                         .src1 = SSA_INVALID_VALUE,
 368                                         .src2 = SSA_INVALID_VALUE,
 369                                         .src3 = SSA_INVALID_VALUE,
 370                                 },
 371                         };
 372                         emit_mir_instruction(ctx, instr);
 373                 } else {
 374                         uint32_t comps[4];
 375
 376                         for (unsigned i = 0; i < nir_dest_num_components(nir_instr->dest); ++i) {
 377                                 uint32_t temp_dest = alloc_mir_temp(ctx);
 378                                 comps[i] = temp_dest;
 379                                 struct bifrost_instruction instr = {
 380                                         .op = op_mov,
 381                                         .dest_components = 1,
 382                                         .ssa_args = {
 383                                                 .dest = temp_dest,
 384                                                 .src0 = SSA_FIXED_UREGISTER(offset + (i * 4)),
 385                                                 .src1 = SSA_INVALID_VALUE,
 386                                                 .src2 = SSA_INVALID_VALUE,
 387                                                 .src3 = SSA_INVALID_VALUE,
 388                                         },
 389                                 };
 390                                 emit_mir_instruction(ctx, instr);
 391                         }
 392
 393                         emit_create_vector(ctx, reg, num_components, comps);
 394                 }
 395                 break;
 396
 397         case nir_intrinsic_load_input: {
 398                 const_offset = nir_src_as_const_value(nir_instr->src[0]);
 399                 assert (const_offset && "no indirect inputs");
 400
 401                 offset = nir_intrinsic_base(nir_instr) + nir_src_as_uint(nir_instr->src[0]);
 402
 403                 reg = nir_dest_index(ctx, &nir_instr->dest);
 404
 405                 enum bifrost_ir_ops op;
 406                 switch (nir_dest_num_components(nir_instr->dest)) {
 407                 case 1:
 408                         op = op_ld_attr_v1;
 409                         break;
 410                 case 2:
 411                         op = op_ld_attr_v2;
 412                         break;
 413                 case 3:
 414                         op = op_ld_attr_v3;
 415                         break;
 416                 case 4:
 417                         op = op_ld_attr_v4;
 418                         break;
 419                 default:
 420                         assert(0);
 421                         break;
 422                 }
 423
 424                 struct bifrost_instruction instr = {
 425                         .op = op,
 426                         .dest_components = nir_dest_num_components(nir_instr->dest),
 427                         .ssa_args = {
 428                                 .dest = reg,
 429                                 .src0 = offset,
 430                                 .src1 = SSA_INVALID_VALUE,
 431                                 .src2 = SSA_INVALID_VALUE,
 432                                 .src3 = SSA_INVALID_VALUE,
 433                         }
 434                 };
 435
 436                 emit_mir_instruction(ctx, instr);
 437                 break;
 438         }
 439         case nir_intrinsic_store_output: {
 440                 const_offset = nir_src_as_const_value(nir_instr->src[1]);
 441                 assert(const_offset && "no indirect outputs");
 442
 443                 offset = nir_intrinsic_base(nir_instr);
 444                 if (ctx->stage == MESA_SHADER_FRAGMENT) {
 445                         int comp = nir_intrinsic_component(nir_instr);
 446                         offset += comp;
 447                         // XXX: Once we support more than colour output then this will need to change
 448                         void *entry = _mesa_hash_table_u64_search(ctx->outputs_nir_to_bi, offset + FRAG_RESULT_DATA0 + 1);
 449
 450                         if (!entry) {
 451                                 printf("WARNING: skipping fragment output\n");
 452                                 break;
 453                         }
 454
 455                         offset = (uintptr_t) (entry) - 1;
 456                         reg = nir_src_index(ctx, &nir_instr->src[0]);
 457
 458                         enum bifrost_ir_ops op;
 459                         switch (nir_src_num_components(nir_instr->src[0])) {
 460                         case 1:
 461                                 op = op_store_v1;
 462                                 break;
 463                         case 2:
 464                                 op = op_store_v2;
 465                                 break;
 466                         case 3:
 467                                 op = op_store_v3;
 468                                 break;
 469                         case 4:
 470                                 op = op_store_v4;
 471                                 break;
 472                         default:
 473                                 assert(0);
 474                                 break;
 475                         }
 476
 477                         // XXX: All offsets aren't vec4 aligned. Will need to adjust this in the future
 478                         // XXX: This needs to offset correctly in to memory so the blend step can pick it up
 479                         uint32_t movi = emit_movi(ctx, offset * 16);
 480                         uint32_t movi2 = emit_movi(ctx, 0);
 481
 482                         uint32_t comps[2] = {
 483                                 movi, movi2,
 484                         };
 485                         uint32_t offset_val = alloc_mir_temp(ctx);
 486                         emit_create_vector(ctx, offset_val, 2, comps);
 487
 488                         struct bifrost_instruction instr = {
 489                                 .op = op,
 490                                 .dest_components = 0,
 491                                 .ssa_args = {
 492                                         .dest = SSA_INVALID_VALUE,
 493                                         .src0 = offset_val,
 494                                         .src1 = reg,
 495                                         .src2 = SSA_INVALID_VALUE,
 496                                         .src3 = SSA_INVALID_VALUE,
 497                                 }
 498                         };
 499                         emit_mir_instruction(ctx, instr);
 500                 } else if (ctx->stage == MESA_SHADER_VERTEX) {
 501                         int comp = nir_intrinsic_component(nir_instr);
 502                         offset += comp;
 503                         void *entry = _mesa_hash_table_u64_search(ctx->varying_nir_to_bi, offset + 2);
 504
 505                         if (!entry) {
 506                                 printf("WARNING: skipping varying\n");
 507                                 break;
 508                         }
 509
 510                         offset = (uintptr_t) (entry) - 1;
 511
 512                         reg = nir_src_index(ctx, &nir_instr->src[0]);
 513                         // LD_VAR_ADDR.f32 {R0, T1}, R61, R62, location:1, R12
 514                         // ...
 515                         // ST_VAR.v4 T1, R12, R13, R14, R4
 516
 517                         offset = emit_ld_vary_addr_constant(ctx, offset);
 518                         enum bifrost_ir_ops op;
 519                         switch (nir_src_num_components(nir_instr->src[0])) {
 520                         case 1:
 521                                 op = op_st_vary_v1;
 522                                 break;
 523                         case 2:
 524                                 op = op_st_vary_v2;
 525                                 break;
 526                         case 3:
 527                                 op = op_st_vary_v3;
 528                                 break;
 529                         case 4:
 530                                 op = op_st_vary_v4;
 531                                 break;
 532                         default:
 533                                 assert(0);
 534                                 break;
 535                         }
 536
 537                         struct bifrost_instruction instr = {
 538                                 .op = op,
 539                                 .dest_components = 0,
 540                                 .ssa_args = {
 541                                         .dest = SSA_INVALID_VALUE,
 542                                         .src0 = offset,
 543                                         .src1 = reg,
 544                                         .src2 = SSA_INVALID_VALUE,
 545                                         .src3 = SSA_INVALID_VALUE,
 546                                 }
 547                         };
 548                         emit_mir_instruction(ctx, instr);
 549                 } else {
 550                         assert(0 && "Unknown store_output stage");
 551                 }
 552                 break;
 553         }
 554         default:
 555                 printf ("Unhandled intrinsic %s\n", nir_intrinsic_infos[nir_instr->intrinsic].name);
 556                 break;
 557         }
 558 }
 559
 560 #define ALU_CASE(arguments, nir, name) \
 561         case nir_op_##nir: \
 562                 argument_count = arguments; \
 563                 op = op_##name; \
 564                 break
 565 #define ALU_CASE_MOD(arguments, nir, name, modifiers) \
 566         case nir_op_##nir: \
 567                 argument_count = arguments; \
 568                 op = op_##name; \
 569                 src_modifiers = modifiers; \
 570                 break
 571
 572 static void
 573 emit_alu(struct compiler_context *ctx, nir_alu_instr *nir_instr)
 574 {
 575         unsigned dest = nir_dest_index(ctx, &nir_instr->dest.dest);
 576         unsigned op = ~0U, argument_count;
 577         unsigned src_modifiers = 0;
 578
 579         switch (nir_instr->op) {
 580                 ALU_CASE(2, fmul, fmul_f32);
 581                 ALU_CASE(2, fadd, fadd_f32);
 582                 ALU_CASE_MOD(2, fsub, fadd_f32, SOURCE_MODIFIER(1, SRC_MOD_NEG));
 583                 ALU_CASE(1, ftrunc, trunc);
 584                 ALU_CASE(1, fceil, ceil);
 585                 ALU_CASE(1, ffloor, floor);
 586                 ALU_CASE(1, fround_even, roundeven);
 587                 ALU_CASE(1, frcp, frcp_fast_f32);
 588                 ALU_CASE(2, fmax, max_f32);
 589                 ALU_CASE(2, fmin, min_f32);
 590                 ALU_CASE(2, iadd, add_i32);
 591                 ALU_CASE(2, isub, sub_i32);
 592                 ALU_CASE(2, imul, mul_i32);
 593                 ALU_CASE(2, iand, and_i32);
 594                 ALU_CASE(2, ior, or_i32);
 595                 ALU_CASE(2, ixor, xor_i32);
 596                 ALU_CASE(2, ishl, lshift_i32);
 597                 ALU_CASE(2, ushr, rshift_i32);
 598                 ALU_CASE(2, ishr, arshift_i32);
 599         case nir_op_ineg: {
 600                 unsigned src0 = nir_alu_src_index_scalar(ctx, nir_instr, 0);
 601                 printf("ineg 0x%08x\n", src0);
 602                 struct bifrost_instruction instr = {
 603                         .op = op_sub_i32,
 604                         .dest_components = 1,
 605                         .ssa_args = {
 606                                 .dest = dest,
 607                                 .src0 = SSA_FIXED_CONST_0,
 608                                 .src1 = src0,
 609                                 .src2 = SSA_INVALID_VALUE,
 610                                 .src3 = SSA_INVALID_VALUE,
 611                         },
 612                 };
 613
 614                 emit_mir_instruction(ctx, instr);
 615                 return;
 616
 617         }
 618         case nir_op_vec2: {
 619                 uint32_t comps[3] = {
 620                         nir_alu_src_index(ctx, &nir_instr->src[0]),
 621                         nir_alu_src_index(ctx, &nir_instr->src[1]),
 622                 };
 623                 emit_create_vector(ctx, dest, 2, comps);
 624                 return;
 625                 break;
 626         }
 627         case nir_op_vec3: {
 628                 uint32_t comps[3] = {
 629                         nir_alu_src_index(ctx, &nir_instr->src[0]),
 630                         nir_alu_src_index(ctx, &nir_instr->src[1]),
 631                         nir_alu_src_index(ctx, &nir_instr->src[2]),
 632                 };
 633                 emit_create_vector(ctx, dest, 3, comps);
 634                 return;
 635                 break;
 636         }
 637         case nir_op_vec4: {
 638                 uint32_t comps[4] = {
 639                         nir_alu_src_index(ctx, &nir_instr->src[0]),
 640                         nir_alu_src_index(ctx, &nir_instr->src[1]),
 641                         nir_alu_src_index(ctx, &nir_instr->src[2]),
 642                         nir_alu_src_index(ctx, &nir_instr->src[3]),
 643                 };
 644                 emit_create_vector(ctx, dest, 4, comps);
 645                 return;
 646                 break;
 647         }
 648         case nir_op_fdiv: {
 649                 unsigned src0 = nir_alu_src_index_scalar(ctx, nir_instr, 0);
 650                 unsigned src1 = nir_alu_src_index_scalar(ctx, nir_instr, 1);
 651                 uint32_t mir_temp_location = alloc_mir_temp(ctx);
 652                 {
 653                         struct bifrost_instruction instr = {
 654                                 .op = op_frcp_fast_f32,
 655                                 .dest_components = 1,
 656                                 .ssa_args = {
 657                                         .dest = mir_temp_location,
 658                                         .src0 = src1,
 659                                         .src1 = SSA_INVALID_VALUE,
 660                                         .src2 = SSA_INVALID_VALUE,
 661                                         .src3 = SSA_INVALID_VALUE,
 662                                 },
 663                         };
 664                         emit_mir_instruction(ctx, instr);
 665                 }
 666
 667                 struct bifrost_instruction instr = {
 668                         .op = op_fmul_f32,
 669                         .dest_components = 1,
 670                         .ssa_args = {
 671                                 .dest = dest,
 672                                 .src0 = src0,
 673                                 .src1 = src1,
 674                                 .src2 = SSA_INVALID_VALUE,
 675                                 .src3 = SSA_INVALID_VALUE,
 676                         },
 677                         .src_modifiers = src_modifiers,
 678                 };
 679
 680                 emit_mir_instruction(ctx, instr);
 681                 return;
 682                 break;
 683         }
 684         case nir_op_umin:
 685         case nir_op_imin:
 686         case nir_op_umax:
 687         case nir_op_imax: {
 688                 unsigned src0 = nir_alu_src_index_scalar(ctx, nir_instr, 0);
 689                 unsigned src1 = nir_alu_src_index_scalar(ctx, nir_instr, 1);
 690                 struct bifrost_instruction instr = {
 691                         .op = op_csel_i32,
 692                         .dest_components = 1,
 693                         .ssa_args = {
 694                                 .dest = dest,
 695                                 .src0 = src0,
 696                                 .src1 = src1,
 697                                 .src2 = src0,
 698                                 .src3 = src1,
 699                         },
 700                         .src_modifiers = src_modifiers,
 701                         .literal_args[0] = 0, /* XXX: Comparison operator */
 702                 };
 703
 704                 emit_mir_instruction(ctx, instr);
 705                 return;
 706                 break;
 707         }
 708         case nir_op_umin3:
 709         case nir_op_imin3:
 710         case nir_op_umax3:
 711         case nir_op_imax3: {
 712                 unsigned src0 = nir_alu_src_index_scalar(ctx, nir_instr, 0);
 713                 unsigned src1 = nir_alu_src_index_scalar(ctx, nir_instr, 1);
 714                 unsigned src2 = nir_alu_src_index_scalar(ctx, nir_instr, 2);
 715
 716                 unsigned op = 0;
 717                 if (nir_instr->op == nir_op_umin3)
 718                         op = op_umin3_i32;
 719                 else if (nir_instr->op == nir_op_imin3)
 720                         op = op_imin3_i32;
 721                 else if (nir_instr->op == nir_op_umax3)
 722                         op = op_umax3_i32;
 723                 else if (nir_instr->op == nir_op_imax3)
 724                         op = op_imax3_i32;
 725                 struct bifrost_instruction instr = {
 726                         .op = op,
 727                         .dest_components = 1,
 728                         .ssa_args = {
 729                                 .dest = dest,
 730                                 .src0 = src0,
 731                                 .src1 = src1,
 732                                 .src2 = src2,
 733                                 .src3 = SSA_INVALID_VALUE,
 734                         },
 735                         .src_modifiers = src_modifiers,
 736                 };
 737
 738                 emit_mir_instruction(ctx, instr);
 739
 740                 return;
 741                 break;
 742         }
 743         case nir_op_ine: {
 744                 uint32_t movi = emit_movi(ctx, ~0U);
 745                 unsigned src0 = nir_alu_src_index(ctx, &nir_instr->src[0]);
 746                 unsigned src1 = nir_alu_src_index(ctx, &nir_instr->src[1]);
 747                 struct bifrost_instruction instr = {
 748                         .op = op_csel_i32,
 749                         .dest_components = 1,
 750                         .ssa_args = {
 751                                 .dest = dest,
 752                                 .src0 = src0,
 753                                 .src1 = src1,
 754                                 .src2 = movi,
 755                                 .src3 = SSA_FIXED_CONST_0,
 756                         },
 757                         .src_modifiers = src_modifiers,
 758                         .literal_args[0] = CSEL_IEQ, /* XXX: Comparison operator */
 759                 };
 760
 761                 emit_mir_instruction(ctx, instr);
 762                 return;
 763                 break;
 764         }
 765         default:
 766                 printf("Unhandled ALU op %s\n", nir_op_infos[nir_instr->op].name);
 767                 return;
 768         }
 769
 770         unsigned src0 = nir_alu_src_index_scalar(ctx, nir_instr, 0);
 771         unsigned src1 = argument_count >= 2 ? nir_alu_src_index_scalar(ctx, nir_instr, 1) : SSA_INVALID_VALUE;
 772         unsigned src2 = argument_count >= 3 ? nir_alu_src_index_scalar(ctx, nir_instr, 2) : SSA_INVALID_VALUE;
 773         unsigned src3 = argument_count >= 4 ? nir_alu_src_index_scalar(ctx, nir_instr, 3) : SSA_INVALID_VALUE;
 774
 775         struct bifrost_instruction instr = {
 776                 .op = op,
 777                 .dest_components = 1,
 778                 .ssa_args = {
 779                         .dest = dest,
 780                         .src0 = src0,
 781                         .src1 = src1,
 782                         .src2 = src2,
 783                         .src3 = src3,
 784                 },
 785                 .src_modifiers = src_modifiers,
 786         };
 787
 788         emit_mir_instruction(ctx, instr);
 789 }
 790
 791 static void
 792 emit_instr(struct compiler_context *ctx, struct nir_instr *instr)
 793 {
 794         switch (instr->type) {
 795         case nir_instr_type_load_const:
 796                 emit_load_const(ctx, nir_instr_as_load_const(instr));
 797                 break;
 798         case nir_instr_type_intrinsic:
 799                 emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
 800                 break;
 801         case nir_instr_type_alu:
 802                 emit_alu(ctx, nir_instr_as_alu(instr));
 803                 break;
 804         case nir_instr_type_tex:
 805                 printf("Unhandled NIR inst tex\n");
 806                 break;
 807         case nir_instr_type_jump:
 808                 printf("Unhandled NIR inst jump\n");
 809                 break;
 810         case nir_instr_type_ssa_undef:
 811                 printf("Unhandled NIR inst ssa_undef\n");
 812                 break;
 813         default:
 814                 printf("Unhandled instruction type\n");
 815                 break;
 816         }
 817
 818 }
 819
 820 static bifrost_block *
 821 emit_block(struct compiler_context *ctx, nir_block *block)
 822 {
 823         bifrost_block *this_block = calloc(sizeof(bifrost_block), 1);
 824         list_addtail(&this_block->link, &ctx->blocks);
 825
 826         ++ctx->block_count;
 827
 828         /* Add this block to be a successor to the previous block */
 829         if (ctx->current_block)
 830                 bifrost_block_add_successor(ctx->current_block, this_block);
 831
 832         /* Set up current block */
 833         list_inithead(&this_block->instructions);
 834         ctx->current_block = this_block;
 835
 836         nir_foreach_instr(instr, block) {
 837                 emit_instr(ctx, instr);
 838                 ++ctx->instruction_count;
 839         }
 840
 841 #ifdef BI_DEBUG
 842         print_mir_block(this_block, false);
 843 #endif
 844         return this_block;
 845 }
 846
 847 void
 848 emit_if(struct compiler_context *ctx, nir_if *nir_inst);
 849
 850 static struct bifrost_block *
 851 emit_cf_list(struct compiler_context *ctx, struct exec_list *list)
 852 {
 853         struct bifrost_block *start_block = NULL;
 854         foreach_list_typed(nir_cf_node, node, node, list) {
 855                 switch (node->type) {
 856                 case nir_cf_node_block: {
 857                         bifrost_block *block = emit_block(ctx, nir_cf_node_as_block(node));
 858
 859                         if (!start_block)
 860                                 start_block = block;
 861
 862                         break;
 863                 }
 864
 865                 case nir_cf_node_if:
 866                         emit_if(ctx, nir_cf_node_as_if(node));
 867                         break;
 868
 869                 default:
 870                 case nir_cf_node_loop:
 871                 case nir_cf_node_function:
 872                         assert(0);
 873                         break;
 874                 }
 875         }
 876
 877         return start_block;
 878 }
 879
 880 void
 881 emit_if(struct compiler_context *ctx, nir_if *nir_inst)
 882 {
 883
 884         // XXX: Conditional branch instruction can do a variety of comparisons with the sources
 885         // Merge the source instruction `ine` with our conditional branch
 886         {
 887                 uint32_t movi = emit_movi(ctx, ~0U);
 888                 struct bifrost_instruction instr = {
 889                         .op = op_branch,
 890                         .dest_components = 0,
 891                         .ssa_args = {
 892                                 .dest = SSA_INVALID_VALUE,
 893                                 .src0 = nir_src_index(ctx, &nir_inst->condition),
 894                                 .src1 = movi,
 895                                 .src2 = SSA_INVALID_VALUE,
 896                                 .src3 = SSA_INVALID_VALUE,
 897                         },
 898                         .src_modifiers = 0,
 899                         .literal_args[0] = BR_COND_EQ, /* XXX: Comparison Arg type */
 900                         .literal_args[1] = 0, /* XXX: Branch target */
 901                 };
 902
 903                 emit_mir_instruction(ctx, instr);
 904         }
 905
 906         bifrost_instruction *true_branch = mir_last_instr_in_block(ctx->current_block);
 907
 908         bifrost_block *true_block = emit_cf_list(ctx, &nir_inst->then_list);
 909
 910         {
 911                 struct bifrost_instruction instr = {
 912                         .op = op_branch,
 913                         .dest_components = 0,
 914                         .ssa_args = {
 915                                 .dest = SSA_INVALID_VALUE,
 916                                 .src0 = SSA_INVALID_VALUE,
 917                                 .src1 = SSA_INVALID_VALUE,
 918                                 .src2 = SSA_INVALID_VALUE,
 919                                 .src3 = SSA_INVALID_VALUE,
 920                         },
 921                         .src_modifiers = 0,
 922                         .literal_args[0] = BR_ALWAYS, /* XXX: ALWAYS */
 923                         .literal_args[1] = 0, /* XXX: Branch target */
 924                 };
 925
 926                 emit_mir_instruction(ctx, instr);
 927         }
 928         bifrost_instruction *true_exit_branch = mir_last_instr_in_block(ctx->current_block);
 929
 930         unsigned false_idx = ctx->block_count;
 931         unsigned inst_count = ctx->instruction_count;
 932
 933         bifrost_block *false_block = emit_cf_list(ctx, &nir_inst->else_list);
 934
 935         unsigned if_footer_idx = ctx->block_count;
 936         assert(true_block);
 937         assert(false_block);
 938
 939
 940         if (ctx->instruction_count == inst_count) {
 941                 // If the else branch didn't have anything in it then we can remove the dead jump
 942                 mir_remove_instr(true_exit_branch);
 943         } else {
 944                 true_exit_branch->literal_args[1] = if_footer_idx;
 945         }
 946
 947         true_branch->literal_args[1] = false_idx;
 948 }
 949
 950 int
 951 bifrost_compile_shader_nir(nir_shader *nir, struct bifrost_program *program)
 952 {
 953         struct compiler_context ictx = {
 954                 .nir = nir,
 955                 .stage = nir->info.stage,
 956         };
 957
 958         struct compiler_context *ctx = &ictx;
 959
 960         ctx->mir_temp = 0;
 961
 962         /* Initialize at a global (not block) level hash tables */
 963         ctx->ssa_constants = _mesa_hash_table_u64_create(NULL);
 964         ctx->hash_to_temp = _mesa_hash_table_u64_create(NULL);
 965
 966         /* Assign actual uniform location, skipping over samplers */
 967         ctx->uniform_nir_to_bi  = _mesa_hash_table_u64_create(NULL);
 968
 969         nir_foreach_variable(var, &nir->uniforms) {
 970                 if (glsl_get_base_type(var->type) == GLSL_TYPE_SAMPLER) continue;
 971
 972                 for (int col = 0; col < glsl_get_matrix_columns(var->type); ++col) {
 973                         int id = ctx->uniform_count++;
 974                         _mesa_hash_table_u64_insert(ctx->uniform_nir_to_bi, var->data.driver_location + col + 1, (void *) ((uintptr_t) (id + 1)));
 975                 }
 976         }
 977
 978         if (ctx->stage == MESA_SHADER_VERTEX) {
 979                 ctx->varying_nir_to_bi = _mesa_hash_table_u64_create(NULL);
 980                 nir_foreach_variable(var, &nir->outputs) {
 981                         if (var->data.location < VARYING_SLOT_VAR0) {
 982                                 if (var->data.location == VARYING_SLOT_POS)
 983                                         ctx->varying_count++;
 984                                 _mesa_hash_table_u64_insert(ctx->varying_nir_to_bi, var->data.driver_location + 1, (void *) ((uintptr_t) (1)));
 985
 986                                 continue;
 987                         }
 988
 989                         for (int col = 0; col < glsl_get_matrix_columns(var->type); ++col) {
 990                                 for (int comp = 0; comp < 4; ++comp) {
 991                                         int id = comp + ctx->varying_count++;
 992                                         _mesa_hash_table_u64_insert(ctx->varying_nir_to_bi, var->data.driver_location + col + comp + 1, (void *) ((uintptr_t) (id + 1)));
 993                                 }
 994                         }
 995                 }
 996
 997         } else if (ctx->stage == MESA_SHADER_FRAGMENT) {
 998                 ctx->outputs_nir_to_bi = _mesa_hash_table_u64_create(NULL);
 999                 nir_foreach_variable(var, &nir->outputs) {
1000                         if (var->data.location >= FRAG_RESULT_DATA0 && var->data.location <= FRAG_RESULT_DATA7) {
1001                                 int id = ctx->outputs_count++;
1002                                 printf("Driver location: %d with id %d\n", var->data.location + 1, id);
1003                                 _mesa_hash_table_u64_insert(ctx->outputs_nir_to_bi, var->data.location + 1, (void *) ((uintptr_t) (id + 1)));
1004                         }
1005                 }
1006         }
1007
1008         /* Optimisation passes */
1009         optimize_nir(nir);
1010
1011 #ifdef BI_DEBUG
1012         nir_print_shader(nir, stdout);
1013 #endif
1014
1015         /* Generate machine IR for shader */
1016         nir_foreach_function(func, nir) {
1017                 nir_builder _b;
1018                 ctx->b = &_b;
1019                 nir_builder_init(ctx->b, func->impl);
1020
1021                 list_inithead(&ctx->blocks);
1022                 ctx->block_count = 0;
1023                 ctx->func = func;
1024
1025                 emit_cf_list(ctx, &func->impl->body);
1026
1027                 break; // XXX: Once we support multi function shaders then implement
1028         }
1029
1030         util_dynarray_init(&program->compiled, NULL);
1031
1032         // MIR pre-RA optimizations
1033
1034         bool progress = false;
1035
1036         do {
1037                 progress = false;
1038                 mir_foreach_block(ctx, block) {
1039                         // XXX: Not yet working
1040 //                        progress |= bifrost_opt_branch_fusion(ctx, block);
1041                 }
1042         } while (progress);
1043
1044         schedule_program(ctx);
1045
1046 #ifdef BI_DEBUG
1047         nir_print_shader(nir, stdout);
1048         disassemble_bifrost(program->compiled.data, program->compiled.size, false);
1049 #endif
1050         return 0;
1051 }