src/gallium/drivers/etnaviv/etnaviv_compiler_nir_emit.h

   1 /*
   2  * Copyright (c) 2019 Zodiac Inflight Innovations
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the
  12  * next paragraph) shall be included in all copies or substantial portions
  13  * of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Jonathan Marek <jonathan@marek.ca>
  25  */
  26
  27 #include "etnaviv_asm.h"
  28 #include "etnaviv_context.h"
  29 #include "etnaviv_compiler_nir.h"
  30
  31 #include "compiler/nir/nir.h"
  32 #include "compiler/nir/nir_builder.h"
  33 #include "util/register_allocate.h"
  34
  35 #define ALU_SWIZ(s) INST_SWIZ((s)->swizzle[0], (s)->swizzle[1], (s)->swizzle[2], (s)->swizzle[3])
  36 #define SRC_DISABLE ((hw_src){})
  37 #define SRC_CONST(idx, s) ((hw_src){.use=1, .rgroup = INST_RGROUP_UNIFORM_0, .reg=idx, .swiz=s})
  38 #define SRC_REG(idx, s) ((hw_src){.use=1, .rgroup = INST_RGROUP_TEMP, .reg=idx, .swiz=s})
  39
  40 #define emit(type, args...) etna_emit_##type(state->c, args)
  41
  42 typedef struct etna_inst_dst hw_dst;
  43 typedef struct etna_inst_src hw_src;
  44
  45 struct state {
  46    struct etna_compile *c;
  47
  48    unsigned const_count;
  49
  50    nir_shader *shader;
  51    nir_function_impl *impl;
  52
  53    /* ra state */
  54    struct ra_graph *g;
  55    struct ra_regs *regs;
  56    unsigned *live_map;
  57    unsigned num_nodes;
  58 };
  59
  60 static inline hw_src
  61 src_swizzle(hw_src src, unsigned swizzle)
  62 {
  63    if (src.rgroup != INST_RGROUP_IMMEDIATE)
  64       src.swiz = inst_swiz_compose(src.swiz, swizzle);
  65
  66    return src;
  67 }
  68
  69 /* constants are represented as 64-bit ints
  70  * 32-bit for the value and 32-bit for the type (imm, uniform, etc)
  71  */
  72
  73 #define CONST_VAL(a, b) (nir_const_value) {.u64 = (uint64_t)(a) << 32 | (uint64_t)(b)}
  74 #define CONST(x) CONST_VAL(ETNA_IMMEDIATE_CONSTANT, x)
  75 #define UNIFORM(x) CONST_VAL(ETNA_IMMEDIATE_UNIFORM, x)
  76 #define TEXSCALE(x, i) CONST_VAL(ETNA_IMMEDIATE_TEXRECT_SCALE_X + (i), x)
  77
  78 static int
  79 const_add(uint64_t *c, uint64_t value)
  80 {
  81    for (unsigned i = 0; i < 4; i++) {
  82       if (c[i] == value || !c[i]) {
  83          c[i] = value;
  84          return i;
  85       }
  86    }
  87    return -1;
  88 }
  89
  90 static hw_src
  91 const_src(struct state *state, nir_const_value *value, unsigned num_components)
  92 {
  93    /* use inline immediates if possible */
  94    if (state->c->specs->halti >= 2 && num_components == 1 &&
  95        value[0].u64 >> 32 == ETNA_IMMEDIATE_CONSTANT) {
  96       uint32_t bits = value[0].u32;
  97
  98       /* "float" - shifted by 12 */
  99       if ((bits & 0xfff) == 0)
 100          return etna_immediate_src(0, bits >> 12);
 101
 102       /* "unsigned" - raw 20 bit value */
 103       if (bits < (1 << 20))
 104          return etna_immediate_src(2, bits);
 105
 106       /* "signed" - sign extended 20-bit (sign included) value */
 107       if (bits >= 0xfff80000)
 108          return etna_immediate_src(1, bits);
 109    }
 110
 111    unsigned i;
 112    int swiz = -1;
 113    for (i = 0; swiz < 0; i++) {
 114       uint64_t *a = &state->c->consts[i*4];
 115       uint64_t save[4];
 116       memcpy(save, a, sizeof(save));
 117       swiz = 0;
 118       for (unsigned j = 0; j < num_components; j++) {
 119          int c = const_add(a, value[j].u64);
 120          if (c < 0) {
 121             memcpy(a, save, sizeof(save));
 122             swiz = -1;
 123             break;
 124          }
 125          swiz |= c << j * 2;
 126       }
 127    }
 128
 129    assert(i <= ETNA_MAX_IMM / 4);
 130    state->const_count = MAX2(state->const_count, i);
 131
 132    return SRC_CONST(i - 1, swiz);
 133 }
 134
 135 /* Swizzles and write masks can be used to layer virtual non-interfering
 136  * registers on top of the real VEC4 registers. For example, the virtual
 137  * VEC3_XYZ register and the virtual SCALAR_W register that use the same
 138  * physical VEC4 base register do not interfere.
 139  */
 140 enum reg_class {
 141    REG_CLASS_VIRT_SCALAR,
 142    REG_CLASS_VIRT_VEC2,
 143    REG_CLASS_VIRT_VEC3,
 144    REG_CLASS_VEC4,
 145    /* special vec2 class for fast transcendentals, limited to XY or ZW */
 146    REG_CLASS_VIRT_VEC2T,
 147    /* special classes for LOAD - contiguous components */
 148    REG_CLASS_VIRT_VEC2C,
 149    REG_CLASS_VIRT_VEC3C,
 150    NUM_REG_CLASSES,
 151 };
 152
 153 enum reg_type {
 154    REG_TYPE_VEC4,
 155    REG_TYPE_VIRT_VEC3_XYZ,
 156    REG_TYPE_VIRT_VEC3_XYW,
 157    REG_TYPE_VIRT_VEC3_XZW,
 158    REG_TYPE_VIRT_VEC3_YZW,
 159    REG_TYPE_VIRT_VEC2_XY,
 160    REG_TYPE_VIRT_VEC2_XZ,
 161    REG_TYPE_VIRT_VEC2_XW,
 162    REG_TYPE_VIRT_VEC2_YZ,
 163    REG_TYPE_VIRT_VEC2_YW,
 164    REG_TYPE_VIRT_VEC2_ZW,
 165    REG_TYPE_VIRT_SCALAR_X,
 166    REG_TYPE_VIRT_SCALAR_Y,
 167    REG_TYPE_VIRT_SCALAR_Z,
 168    REG_TYPE_VIRT_SCALAR_W,
 169    REG_TYPE_VIRT_VEC2T_XY,
 170    REG_TYPE_VIRT_VEC2T_ZW,
 171    REG_TYPE_VIRT_VEC2C_XY,
 172    REG_TYPE_VIRT_VEC2C_YZ,
 173    REG_TYPE_VIRT_VEC2C_ZW,
 174    REG_TYPE_VIRT_VEC3C_XYZ,
 175    REG_TYPE_VIRT_VEC3C_YZW,
 176    NUM_REG_TYPES,
 177 };
 178
 179 /* writemask when used as dest */
 180 static const uint8_t
 181 reg_writemask[NUM_REG_TYPES] = {
 182    [REG_TYPE_VEC4] = 0xf,
 183    [REG_TYPE_VIRT_SCALAR_X] = 0x1,
 184    [REG_TYPE_VIRT_SCALAR_Y] = 0x2,
 185    [REG_TYPE_VIRT_VEC2_XY] = 0x3,
 186    [REG_TYPE_VIRT_VEC2T_XY] = 0x3,
 187    [REG_TYPE_VIRT_VEC2C_XY] = 0x3,
 188    [REG_TYPE_VIRT_SCALAR_Z] = 0x4,
 189    [REG_TYPE_VIRT_VEC2_XZ] = 0x5,
 190    [REG_TYPE_VIRT_VEC2_YZ] = 0x6,
 191    [REG_TYPE_VIRT_VEC2C_YZ] = 0x6,
 192    [REG_TYPE_VIRT_VEC3_XYZ] = 0x7,
 193    [REG_TYPE_VIRT_VEC3C_XYZ] = 0x7,
 194    [REG_TYPE_VIRT_SCALAR_W] = 0x8,
 195    [REG_TYPE_VIRT_VEC2_XW] = 0x9,
 196    [REG_TYPE_VIRT_VEC2_YW] = 0xa,
 197    [REG_TYPE_VIRT_VEC3_XYW] = 0xb,
 198    [REG_TYPE_VIRT_VEC2_ZW] = 0xc,
 199    [REG_TYPE_VIRT_VEC2T_ZW] = 0xc,
 200    [REG_TYPE_VIRT_VEC2C_ZW] = 0xc,
 201    [REG_TYPE_VIRT_VEC3_XZW] = 0xd,
 202    [REG_TYPE_VIRT_VEC3_YZW] = 0xe,
 203    [REG_TYPE_VIRT_VEC3C_YZW] = 0xe,
 204 };
 205
 206 /* how to swizzle when used as a src */
 207 static const uint8_t
 208 reg_swiz[NUM_REG_TYPES] = {
 209    [REG_TYPE_VEC4] = INST_SWIZ_IDENTITY,
 210    [REG_TYPE_VIRT_SCALAR_X] = INST_SWIZ_IDENTITY,
 211    [REG_TYPE_VIRT_SCALAR_Y] = SWIZZLE(Y, Y, Y, Y),
 212    [REG_TYPE_VIRT_VEC2_XY] = INST_SWIZ_IDENTITY,
 213    [REG_TYPE_VIRT_VEC2T_XY] = INST_SWIZ_IDENTITY,
 214    [REG_TYPE_VIRT_VEC2C_XY] = INST_SWIZ_IDENTITY,
 215    [REG_TYPE_VIRT_SCALAR_Z] = SWIZZLE(Z, Z, Z, Z),
 216    [REG_TYPE_VIRT_VEC2_XZ] = SWIZZLE(X, Z, X, Z),
 217    [REG_TYPE_VIRT_VEC2_YZ] = SWIZZLE(Y, Z, Y, Z),
 218    [REG_TYPE_VIRT_VEC2C_YZ] = SWIZZLE(Y, Z, Y, Z),
 219    [REG_TYPE_VIRT_VEC3_XYZ] = INST_SWIZ_IDENTITY,
 220    [REG_TYPE_VIRT_VEC3C_XYZ] = INST_SWIZ_IDENTITY,
 221    [REG_TYPE_VIRT_SCALAR_W] = SWIZZLE(W, W, W, W),
 222    [REG_TYPE_VIRT_VEC2_XW] = SWIZZLE(X, W, X, W),
 223    [REG_TYPE_VIRT_VEC2_YW] = SWIZZLE(Y, W, Y, W),
 224    [REG_TYPE_VIRT_VEC3_XYW] = SWIZZLE(X, Y, W, X),
 225    [REG_TYPE_VIRT_VEC2_ZW] = SWIZZLE(Z, W, Z, W),
 226    [REG_TYPE_VIRT_VEC2T_ZW] = SWIZZLE(Z, W, Z, W),
 227    [REG_TYPE_VIRT_VEC2C_ZW] = SWIZZLE(Z, W, Z, W),
 228    [REG_TYPE_VIRT_VEC3_XZW] = SWIZZLE(X, Z, W, X),
 229    [REG_TYPE_VIRT_VEC3_YZW] = SWIZZLE(Y, Z, W, X),
 230    [REG_TYPE_VIRT_VEC3C_YZW] = SWIZZLE(Y, Z, W, X),
 231 };
 232
 233 /* how to swizzle when used as a dest */
 234 static const uint8_t
 235 reg_dst_swiz[NUM_REG_TYPES] = {
 236    [REG_TYPE_VEC4] = INST_SWIZ_IDENTITY,
 237    [REG_TYPE_VIRT_SCALAR_X] = INST_SWIZ_IDENTITY,
 238    [REG_TYPE_VIRT_SCALAR_Y] = SWIZZLE(X, X, X, X),
 239    [REG_TYPE_VIRT_VEC2_XY] = INST_SWIZ_IDENTITY,
 240    [REG_TYPE_VIRT_VEC2T_XY] = INST_SWIZ_IDENTITY,
 241    [REG_TYPE_VIRT_VEC2C_XY] = INST_SWIZ_IDENTITY,
 242    [REG_TYPE_VIRT_SCALAR_Z] = SWIZZLE(X, X, X, X),
 243    [REG_TYPE_VIRT_VEC2_XZ] = SWIZZLE(X, X, Y, Y),
 244    [REG_TYPE_VIRT_VEC2_YZ] = SWIZZLE(X, X, Y, Y),
 245    [REG_TYPE_VIRT_VEC2C_YZ] = SWIZZLE(X, X, Y, Y),
 246    [REG_TYPE_VIRT_VEC3_XYZ] = INST_SWIZ_IDENTITY,
 247    [REG_TYPE_VIRT_VEC3C_XYZ] = INST_SWIZ_IDENTITY,
 248    [REG_TYPE_VIRT_SCALAR_W] = SWIZZLE(X, X, X, X),
 249    [REG_TYPE_VIRT_VEC2_XW] = SWIZZLE(X, X, Y, Y),
 250    [REG_TYPE_VIRT_VEC2_YW] = SWIZZLE(X, X, Y, Y),
 251    [REG_TYPE_VIRT_VEC3_XYW] = SWIZZLE(X, Y, Z, Z),
 252    [REG_TYPE_VIRT_VEC2_ZW] = SWIZZLE(X, X, X, Y),
 253    [REG_TYPE_VIRT_VEC2T_ZW] = SWIZZLE(X, X, X, Y),
 254    [REG_TYPE_VIRT_VEC2C_ZW] = SWIZZLE(X, X, X, Y),
 255    [REG_TYPE_VIRT_VEC3_XZW] = SWIZZLE(X, Y, Y, Z),
 256    [REG_TYPE_VIRT_VEC3_YZW] = SWIZZLE(X, X, Y, Z),
 257    [REG_TYPE_VIRT_VEC3C_YZW] = SWIZZLE(X, X, Y, Z),
 258 };
 259
 260 static inline int reg_get_type(int virt_reg)
 261 {
 262    return virt_reg % NUM_REG_TYPES;
 263 }
 264
 265 static inline int reg_get_base(struct state *state, int virt_reg)
 266 {
 267    /* offset by 1 to avoid reserved position register */
 268    if (state->shader->info.stage == MESA_SHADER_FRAGMENT)
 269       return (virt_reg / NUM_REG_TYPES + 1) % ETNA_MAX_TEMPS;
 270    return virt_reg / NUM_REG_TYPES;
 271 }
 272
 273 /* use "r63.z" for depth reg, it will wrap around to r0.z by reg_get_base
 274  * (fs registers are offset by 1 to avoid reserving r0)
 275  */
 276 #define REG_FRAG_DEPTH ((ETNA_MAX_TEMPS - 1) * NUM_REG_TYPES + REG_TYPE_VIRT_SCALAR_Z)
 277
 278 static inline int reg_get_class(int virt_reg)
 279 {
 280    switch (reg_get_type(virt_reg)) {
 281    case REG_TYPE_VEC4:
 282       return REG_CLASS_VEC4;
 283    case REG_TYPE_VIRT_VEC3_XYZ:
 284    case REG_TYPE_VIRT_VEC3_XYW:
 285    case REG_TYPE_VIRT_VEC3_XZW:
 286    case REG_TYPE_VIRT_VEC3_YZW:
 287       return REG_CLASS_VIRT_VEC3;
 288    case REG_TYPE_VIRT_VEC2_XY:
 289    case REG_TYPE_VIRT_VEC2_XZ:
 290    case REG_TYPE_VIRT_VEC2_XW:
 291    case REG_TYPE_VIRT_VEC2_YZ:
 292    case REG_TYPE_VIRT_VEC2_YW:
 293    case REG_TYPE_VIRT_VEC2_ZW:
 294       return REG_CLASS_VIRT_VEC2;
 295    case REG_TYPE_VIRT_SCALAR_X:
 296    case REG_TYPE_VIRT_SCALAR_Y:
 297    case REG_TYPE_VIRT_SCALAR_Z:
 298    case REG_TYPE_VIRT_SCALAR_W:
 299       return REG_CLASS_VIRT_SCALAR;
 300    case REG_TYPE_VIRT_VEC2T_XY:
 301    case REG_TYPE_VIRT_VEC2T_ZW:
 302       return REG_CLASS_VIRT_VEC2T;
 303    case REG_TYPE_VIRT_VEC2C_XY:
 304    case REG_TYPE_VIRT_VEC2C_YZ:
 305    case REG_TYPE_VIRT_VEC2C_ZW:
 306       return REG_CLASS_VIRT_VEC2C;
 307    case REG_TYPE_VIRT_VEC3C_XYZ:
 308    case REG_TYPE_VIRT_VEC3C_YZW:
 309       return REG_CLASS_VIRT_VEC3C;
 310    }
 311
 312    assert(false);
 313    return 0;
 314 }
 315
 316 /* nir_src to allocated register */
 317 static hw_src
 318 ra_src(struct state *state, nir_src *src)
 319 {
 320    unsigned reg = ra_get_node_reg(state->g, state->live_map[src_index(state->impl, src)]);
 321    return SRC_REG(reg_get_base(state, reg), reg_swiz[reg_get_type(reg)]);
 322 }
 323
 324 static hw_src
 325 get_src(struct state *state, nir_src *src)
 326 {
 327    if (!src->is_ssa)
 328       return ra_src(state, src);
 329
 330    nir_instr *instr = src->ssa->parent_instr;
 331
 332    if (instr->pass_flags & BYPASS_SRC) {
 333       assert(instr->type == nir_instr_type_alu);
 334       nir_alu_instr *alu = nir_instr_as_alu(instr);
 335       assert(alu->op == nir_op_mov);
 336       return src_swizzle(get_src(state, &alu->src[0].src), ALU_SWIZ(&alu->src[0]));
 337    }
 338
 339    switch (instr->type) {
 340    case nir_instr_type_load_const:
 341       return const_src(state, nir_instr_as_load_const(instr)->value, src->ssa->num_components);
 342    case nir_instr_type_intrinsic: {
 343       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 344       switch (intr->intrinsic) {
 345       case nir_intrinsic_load_input:
 346       case nir_intrinsic_load_instance_id:
 347       case nir_intrinsic_load_uniform:
 348       case nir_intrinsic_load_ubo:
 349          return ra_src(state, src);
 350       case nir_intrinsic_load_front_face:
 351          return (hw_src) { .use = 1, .rgroup = INST_RGROUP_INTERNAL };
 352       case nir_intrinsic_load_frag_coord:
 353          return SRC_REG(0, INST_SWIZ_IDENTITY);
 354       default:
 355          compile_error(state->c, "Unhandled NIR intrinsic type: %s\n",
 356                        nir_intrinsic_infos[intr->intrinsic].name);
 357          break;
 358       }
 359    } break;
 360    case nir_instr_type_alu:
 361    case nir_instr_type_tex:
 362       return ra_src(state, src);
 363    case nir_instr_type_ssa_undef: {
 364       /* return zero to deal with broken Blur demo */
 365       nir_const_value value = CONST(0);
 366       return src_swizzle(const_src(state, &value, 1), SWIZZLE(X,X,X,X));
 367    }
 368    default:
 369       compile_error(state->c, "Unhandled NIR instruction type: %d\n", instr->type);
 370       break;
 371    }
 372
 373    return SRC_DISABLE;
 374 }
 375
 376 static bool
 377 vec_dest_has_swizzle(nir_alu_instr *vec, nir_ssa_def *ssa)
 378 {
 379    for (unsigned i = 0; i < 4; i++) {
 380       if (!(vec->dest.write_mask & (1 << i)) || vec->src[i].src.ssa != ssa)
 381          continue;
 382
 383       if (vec->src[i].swizzle[0] != i)
 384          return true;
 385    }
 386
 387    /* don't deal with possible bypassed vec/mov chain */
 388    nir_foreach_use(use_src, ssa) {
 389       nir_instr *instr = use_src->parent_instr;
 390       if (instr->type != nir_instr_type_alu)
 391          continue;
 392
 393       nir_alu_instr *alu = nir_instr_as_alu(instr);
 394
 395       switch (alu->op) {
 396       case nir_op_mov:
 397       case nir_op_vec2:
 398       case nir_op_vec3:
 399       case nir_op_vec4:
 400          return true;
 401       default:
 402          break;
 403       }
 404    }
 405    return false;
 406 }
 407
 408 /* get allocated dest register for nir_dest
 409  * *p_swiz tells how the components need to be placed into register
 410  */
 411 static hw_dst
 412 ra_dest(struct state *state, nir_dest *dest, unsigned *p_swiz)
 413 {
 414    unsigned swiz = INST_SWIZ_IDENTITY, mask = 0xf;
 415    dest = real_dest(dest, &swiz, &mask);
 416
 417    unsigned r = ra_get_node_reg(state->g, state->live_map[dest_index(state->impl, dest)]);
 418    unsigned t = reg_get_type(r);
 419
 420    *p_swiz = inst_swiz_compose(swiz, reg_dst_swiz[t]);
 421
 422    return (hw_dst) {
 423       .use = 1,
 424       .reg = reg_get_base(state, r),
 425       .write_mask = inst_write_mask_compose(mask, reg_writemask[t]),
 426    };
 427 }
 428
 429 /* precomputed by register_allocate  */
 430 static unsigned int *q_values[] = {
 431    (unsigned int[]) {1, 2, 3, 4, 2, 2, 3, },
 432    (unsigned int[]) {3, 5, 6, 6, 5, 5, 6, },
 433    (unsigned int[]) {3, 4, 4, 4, 4, 4, 4, },
 434    (unsigned int[]) {1, 1, 1, 1, 1, 1, 1, },
 435    (unsigned int[]) {1, 2, 2, 2, 1, 2, 2, },
 436    (unsigned int[]) {2, 3, 3, 3, 2, 3, 3, },
 437    (unsigned int[]) {2, 2, 2, 2, 2, 2, 2, },
 438 };
 439
 440 static void
 441 ra_assign(struct state *state, nir_shader *shader)
 442 {
 443    struct ra_regs *regs = ra_alloc_reg_set(NULL, ETNA_MAX_TEMPS *
 444                   NUM_REG_TYPES, false);
 445
 446    /* classes always be created from index 0, so equal to the class enum
 447     * which represents a register with (c+1) components
 448     */
 449    for (int c = 0; c < NUM_REG_CLASSES; c++)
 450       ra_alloc_reg_class(regs);
 451    /* add each register of each class */
 452    for (int r = 0; r < NUM_REG_TYPES * ETNA_MAX_TEMPS; r++)
 453       ra_class_add_reg(regs, reg_get_class(r), r);
 454    /* set conflicts */
 455    for (int r = 0; r < ETNA_MAX_TEMPS; r++) {
 456       for (int i = 0; i < NUM_REG_TYPES; i++) {
 457          for (int j = 0; j < i; j++) {
 458             if (reg_writemask[i] & reg_writemask[j]) {
 459                ra_add_reg_conflict(regs, NUM_REG_TYPES * r + i,
 460                                          NUM_REG_TYPES * r + j);
 461             }
 462          }
 463       }
 464    }
 465    ra_set_finalize(regs, q_values);
 466
 467    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
 468
 469    /* liveness and interference */
 470
 471    nir_index_blocks(impl);
 472    nir_index_ssa_defs(impl);
 473    nir_foreach_block(block, impl) {
 474       nir_foreach_instr(instr, block)
 475          instr->pass_flags = 0;
 476    }
 477
 478    /* this gives an approximation/upper limit on how many nodes are needed
 479     * (some ssa values do not represent an allocated register)
 480     */
 481    unsigned max_nodes = impl->ssa_alloc + impl->reg_alloc;
 482    unsigned *live_map = ralloc_array(NULL, unsigned, max_nodes);
 483    memset(live_map, 0xff, sizeof(unsigned) * max_nodes);
 484    struct live_def *defs = rzalloc_array(NULL, struct live_def, max_nodes);
 485
 486    unsigned num_nodes = etna_live_defs(impl, defs, live_map);
 487    struct ra_graph *g = ra_alloc_interference_graph(regs, num_nodes);
 488
 489    /* set classes from num_components */
 490    for (unsigned i = 0; i < num_nodes; i++) {
 491       nir_instr *instr = defs[i].instr;
 492       nir_dest *dest = defs[i].dest;
 493       unsigned c = nir_dest_num_components(*dest) - 1;
 494
 495       if (instr->type == nir_instr_type_alu &&
 496           state->c->specs->has_new_transcendentals) {
 497          switch (nir_instr_as_alu(instr)->op) {
 498          case nir_op_fdiv:
 499          case nir_op_flog2:
 500          case nir_op_fsin:
 501          case nir_op_fcos:
 502             assert(dest->is_ssa);
 503             c = REG_CLASS_VIRT_VEC2T;
 504          default:
 505             break;
 506          }
 507       }
 508
 509       if (instr->type == nir_instr_type_intrinsic) {
 510          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 511          /* can't have dst swizzle or sparse writemask on UBO loads */
 512          if (intr->intrinsic == nir_intrinsic_load_ubo) {
 513             assert(dest == &intr->dest);
 514             if (dest->ssa.num_components == 2)
 515                c = REG_CLASS_VIRT_VEC2C;
 516             if (dest->ssa.num_components == 3)
 517                c = REG_CLASS_VIRT_VEC3C;
 518          }
 519       }
 520
 521       ra_set_node_class(g, i, c);
 522    }
 523
 524    nir_foreach_block(block, impl) {
 525       nir_foreach_instr(instr, block) {
 526          if (instr->type != nir_instr_type_intrinsic)
 527             continue;
 528
 529          nir_dest *dest = dest_for_instr(instr);
 530          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 531          unsigned reg;
 532
 533          switch (intr->intrinsic) {
 534          case nir_intrinsic_store_deref: {
 535             /* don't want outputs to be swizzled
 536              * TODO: better would be to set the type to X/XY/XYZ/XYZW
 537              * TODO: what if fragcoord.z is read after writing fragdepth?
 538              */
 539             nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
 540             unsigned index = live_map[src_index(impl, &intr->src[1])];
 541
 542             if (shader->info.stage == MESA_SHADER_FRAGMENT &&
 543                 deref->var->data.location == FRAG_RESULT_DEPTH) {
 544                ra_set_node_reg(g, index, REG_FRAG_DEPTH);
 545             } else {
 546                ra_set_node_class(g, index, REG_CLASS_VEC4);
 547             }
 548          } continue;
 549          case nir_intrinsic_load_input:
 550             reg = nir_intrinsic_base(intr) * NUM_REG_TYPES + (unsigned[]) {
 551                REG_TYPE_VIRT_SCALAR_X,
 552                REG_TYPE_VIRT_VEC2_XY,
 553                REG_TYPE_VIRT_VEC3_XYZ,
 554                REG_TYPE_VEC4,
 555             }[nir_dest_num_components(*dest) - 1];
 556             break;
 557          case nir_intrinsic_load_instance_id:
 558             reg = state->c->variant->infile.num_reg * NUM_REG_TYPES + REG_TYPE_VIRT_SCALAR_Y;
 559             break;
 560          default:
 561             continue;
 562          }
 563
 564          ra_set_node_reg(g, live_map[dest_index(impl, dest)], reg);
 565       }
 566    }
 567
 568    /* add interference for intersecting live ranges */
 569    for (unsigned i = 0; i < num_nodes; i++) {
 570       assert(defs[i].live_start < defs[i].live_end);
 571       for (unsigned j = 0; j < i; j++) {
 572          if (defs[i].live_start >= defs[j].live_end || defs[j].live_start >= defs[i].live_end)
 573             continue;
 574          ra_add_node_interference(g, i, j);
 575       }
 576    }
 577
 578    ralloc_free(defs);
 579
 580    /* Allocate registers */
 581    ASSERTED bool ok = ra_allocate(g);
 582    assert(ok);
 583
 584    state->g = g;
 585    state->regs = regs;
 586    state->live_map = live_map;
 587    state->num_nodes = num_nodes;
 588 }
 589
 590 static unsigned
 591 ra_finish(struct state *state)
 592 {
 593    /* TODO: better way to get number of registers used? */
 594    unsigned j = 0;
 595    for (unsigned i = 0; i < state->num_nodes; i++) {
 596       j = MAX2(j, reg_get_base(state, ra_get_node_reg(state->g, i)) + 1);
 597    }
 598
 599    ralloc_free(state->g);
 600    ralloc_free(state->regs);
 601    ralloc_free(state->live_map);
 602
 603    return j;
 604 }
 605
 606 static void
 607 emit_alu(struct state *state, nir_alu_instr * alu)
 608 {
 609    const nir_op_info *info = &nir_op_infos[alu->op];
 610
 611    /* marked as dead instruction (vecN and other bypassed instr) */
 612    if (alu->instr.pass_flags)
 613       return;
 614
 615    assert(!(alu->op >= nir_op_vec2 && alu->op <= nir_op_vec4));
 616
 617    unsigned dst_swiz;
 618    hw_dst dst = ra_dest(state, &alu->dest.dest, &dst_swiz);
 619
 620    /* compose alu write_mask with RA write mask */
 621    if (!alu->dest.dest.is_ssa)
 622       dst.write_mask = inst_write_mask_compose(alu->dest.write_mask, dst.write_mask);
 623
 624    switch (alu->op) {
 625    case nir_op_fdot2:
 626    case nir_op_fdot3:
 627    case nir_op_fdot4:
 628       /* not per-component - don't compose dst_swiz */
 629       dst_swiz = INST_SWIZ_IDENTITY;
 630       break;
 631    default:
 632       break;
 633    }
 634
 635    hw_src srcs[3];
 636
 637    for (int i = 0; i < info->num_inputs; i++) {
 638       nir_alu_src *asrc = &alu->src[i];
 639       hw_src src;
 640
 641       src = src_swizzle(get_src(state, &asrc->src), ALU_SWIZ(asrc));
 642       src = src_swizzle(src, dst_swiz);
 643
 644       if (src.rgroup != INST_RGROUP_IMMEDIATE) {
 645          src.neg = asrc->negate || (alu->op == nir_op_fneg);
 646          src.abs = asrc->abs || (alu->op == nir_op_fabs);
 647       } else {
 648          assert(!asrc->negate && alu->op != nir_op_fneg);
 649          assert(!asrc->abs && alu->op != nir_op_fabs);
 650       }
 651
 652       srcs[i] = src;
 653    }
 654
 655    emit(alu, alu->op, dst, srcs, alu->dest.saturate || (alu->op == nir_op_fsat));
 656 }
 657
 658 static void
 659 emit_tex(struct state *state, nir_tex_instr * tex)
 660 {
 661    unsigned dst_swiz;
 662    hw_dst dst = ra_dest(state, &tex->dest, &dst_swiz);
 663    nir_src *coord = NULL, *lod_bias = NULL, *compare = NULL;
 664
 665    for (unsigned i = 0; i < tex->num_srcs; i++) {
 666       switch (tex->src[i].src_type) {
 667       case nir_tex_src_coord:
 668          coord = &tex->src[i].src;
 669          break;
 670       case nir_tex_src_bias:
 671       case nir_tex_src_lod:
 672          assert(!lod_bias);
 673          lod_bias = &tex->src[i].src;
 674          break;
 675       case nir_tex_src_comparator:
 676          compare = &tex->src[i].src;
 677          break;
 678       default:
 679          compile_error(state->c, "Unhandled NIR tex src type: %d\n",
 680                        tex->src[i].src_type);
 681          break;
 682       }
 683    }
 684
 685    emit(tex, tex->op, tex->sampler_index, dst_swiz, dst, get_src(state, coord),
 686         lod_bias ? get_src(state, lod_bias) : SRC_DISABLE,
 687         compare ? get_src(state, compare) : SRC_DISABLE);
 688 }
 689
 690 static void
 691 emit_intrinsic(struct state *state, nir_intrinsic_instr * intr)
 692 {
 693    switch (intr->intrinsic) {
 694    case nir_intrinsic_store_deref:
 695       emit(output, nir_src_as_deref(intr->src[0])->var, get_src(state, &intr->src[1]));
 696       break;
 697    case nir_intrinsic_discard_if:
 698       emit(discard, get_src(state, &intr->src[0]));
 699       break;
 700    case nir_intrinsic_discard:
 701       emit(discard, SRC_DISABLE);
 702       break;
 703    case nir_intrinsic_load_uniform: {
 704       unsigned dst_swiz;
 705       struct etna_inst_dst dst = ra_dest(state, &intr->dest, &dst_swiz);
 706
 707       /* TODO: rework so extra MOV isn't required, load up to 4 addresses at once */
 708       emit_inst(state->c, &(struct etna_inst) {
 709          .opcode = INST_OPCODE_MOVAR,
 710          .dst.write_mask = 0x1,
 711          .src[2] = get_src(state, &intr->src[0]),
 712       });
 713       emit_inst(state->c, &(struct etna_inst) {
 714          .opcode = INST_OPCODE_MOV,
 715          .dst = dst,
 716          .src[2] = {
 717             .use = 1,
 718             .rgroup = INST_RGROUP_UNIFORM_0,
 719             .reg = nir_intrinsic_base(intr),
 720             .swiz = dst_swiz,
 721             .amode = INST_AMODE_ADD_A_X,
 722          },
 723       });
 724    } break;
 725    case nir_intrinsic_load_ubo: {
 726       /* TODO: if offset is of the form (x + C) then add C to the base instead */
 727       unsigned idx = nir_src_as_const_value(intr->src[0])[0].u32;
 728       unsigned dst_swiz;
 729       emit_inst(state->c, &(struct etna_inst) {
 730          .opcode = INST_OPCODE_LOAD,
 731          .type = INST_TYPE_U32,
 732          .dst = ra_dest(state, &intr->dest, &dst_swiz),
 733          .src[0] = get_src(state, &intr->src[1]),
 734          .src[1] = const_src(state, &CONST_VAL(ETNA_IMMEDIATE_UBO0_ADDR + idx, 0), 1),
 735       });
 736    } break;
 737    case nir_intrinsic_load_front_face:
 738    case nir_intrinsic_load_frag_coord:
 739       assert(intr->dest.is_ssa); /* TODO - lower phis could cause this */
 740       break;
 741    case nir_intrinsic_load_input:
 742    case nir_intrinsic_load_instance_id:
 743       break;
 744    default:
 745       compile_error(state->c, "Unhandled NIR intrinsic type: %s\n",
 746                     nir_intrinsic_infos[intr->intrinsic].name);
 747    }
 748 }
 749
 750 static void
 751 emit_instr(struct state *state, nir_instr * instr)
 752 {
 753    switch (instr->type) {
 754    case nir_instr_type_alu:
 755       emit_alu(state, nir_instr_as_alu(instr));
 756       break;
 757    case nir_instr_type_tex:
 758       emit_tex(state, nir_instr_as_tex(instr));
 759       break;
 760    case nir_instr_type_intrinsic:
 761       emit_intrinsic(state, nir_instr_as_intrinsic(instr));
 762       break;
 763    case nir_instr_type_jump:
 764       assert(nir_instr_is_last(instr));
 765    case nir_instr_type_load_const:
 766    case nir_instr_type_ssa_undef:
 767    case nir_instr_type_deref:
 768       break;
 769    default:
 770       compile_error(state->c, "Unhandled NIR instruction type: %d\n", instr->type);
 771       break;
 772    }
 773 }
 774
 775 static void
 776 emit_block(struct state *state, nir_block * block)
 777 {
 778    emit(block_start, block->index);
 779
 780    nir_foreach_instr(instr, block)
 781       emit_instr(state, instr);
 782
 783    /* succs->index < block->index is for the loop case  */
 784    nir_block *succs = block->successors[0];
 785    if (nir_block_ends_in_jump(block) || succs->index < block->index)
 786       emit(jump, succs->index, SRC_DISABLE);
 787 }
 788
 789 static void
 790 emit_cf_list(struct state *state, struct exec_list *list);
 791
 792 static void
 793 emit_if(struct state *state, nir_if * nif)
 794 {
 795    emit(jump, nir_if_first_else_block(nif)->index, get_src(state, &nif->condition));
 796    emit_cf_list(state, &nif->then_list);
 797
 798    /* jump at end of then_list to skip else_list
 799     * not needed if then_list already ends with a jump or else_list is empty
 800     */
 801    if (!nir_block_ends_in_jump(nir_if_last_then_block(nif)) &&
 802        !nir_cf_list_is_empty_block(&nif->else_list))
 803       emit(jump, nir_if_last_else_block(nif)->successors[0]->index, SRC_DISABLE);
 804
 805    emit_cf_list(state, &nif->else_list);
 806 }
 807
 808 static void
 809 emit_cf_list(struct state *state, struct exec_list *list)
 810 {
 811    foreach_list_typed(nir_cf_node, node, node, list) {
 812       switch (node->type) {
 813       case nir_cf_node_block:
 814          emit_block(state, nir_cf_node_as_block(node));
 815          break;
 816       case nir_cf_node_if:
 817          emit_if(state, nir_cf_node_as_if(node));
 818          break;
 819       case nir_cf_node_loop:
 820          emit_cf_list(state, &nir_cf_node_as_loop(node)->body);
 821          break;
 822       default:
 823          compile_error(state->c, "Unknown NIR node type\n");
 824          break;
 825       }
 826    }
 827 }
 828
 829 /* based on nir_lower_vec_to_movs */
 830 static unsigned
 831 insert_vec_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader)
 832 {
 833    assert(start_idx < nir_op_infos[vec->op].num_inputs);
 834    unsigned write_mask = (1u << start_idx);
 835
 836    nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_mov);
 837    nir_alu_src_copy(&mov->src[0], &vec->src[start_idx], mov);
 838
 839    mov->src[0].swizzle[0] = vec->src[start_idx].swizzle[0];
 840    mov->src[0].negate = vec->src[start_idx].negate;
 841    mov->src[0].abs = vec->src[start_idx].abs;
 842
 843    unsigned num_components = 1;
 844
 845    for (unsigned i = start_idx + 1; i < 4; i++) {
 846       if (!(vec->dest.write_mask & (1 << i)))
 847          continue;
 848
 849       if (nir_srcs_equal(vec->src[i].src, vec->src[start_idx].src) &&
 850           vec->src[i].negate == vec->src[start_idx].negate &&
 851           vec->src[i].abs == vec->src[start_idx].abs) {
 852          write_mask |= (1 << i);
 853          mov->src[0].swizzle[num_components] = vec->src[i].swizzle[0];
 854          num_components++;
 855       }
 856    }
 857
 858    mov->dest.write_mask = (1 << num_components) - 1;
 859    nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, 32, NULL);
 860
 861    /* replace vec srcs with inserted mov */
 862    for (unsigned i = 0, j = 0; i < 4; i++) {
 863       if (!(write_mask & (1 << i)))
 864          continue;
 865
 866       nir_instr_rewrite_src(&vec->instr, &vec->src[i].src, nir_src_for_ssa(&mov->dest.dest.ssa));
 867       vec->src[i].swizzle[0] = j++;
 868    }
 869
 870    nir_instr_insert_before(&vec->instr, &mov->instr);
 871
 872    return write_mask;
 873 }
 874
 875 /*
 876  * for vecN instructions:
 877  * -merge constant sources into a single src
 878  * -insert movs (nir_lower_vec_to_movs equivalent)
 879  * for non-vecN instructions:
 880  * -try to merge constants as single constant
 881  * -insert movs for multiple constants (pre-HALTI5)
 882  */
 883 static void
 884 lower_alu(struct state *state, nir_alu_instr *alu)
 885 {
 886    const nir_op_info *info = &nir_op_infos[alu->op];
 887
 888    nir_builder b;
 889    nir_builder_init(&b, state->impl);
 890    b.cursor = nir_before_instr(&alu->instr);
 891
 892    switch (alu->op) {
 893    case nir_op_vec2:
 894    case nir_op_vec3:
 895    case nir_op_vec4:
 896       break;
 897    default:
 898       /* pre-GC7000L can only have 1 uniform src per instruction */
 899       if (state->c->specs->halti >= 5)
 900          return;
 901
 902       nir_const_value value[4] = {};
 903       uint8_t swizzle[4][4] = {};
 904       unsigned swiz_max = 0, num_const = 0;
 905
 906       for (unsigned i = 0; i < info->num_inputs; i++) {
 907          nir_const_value *cv = nir_src_as_const_value(alu->src[i].src);
 908          if (!cv)
 909             continue;
 910
 911          unsigned num_components = info->input_sizes[i] ?: alu->dest.dest.ssa.num_components;
 912          for (unsigned j = 0; j < num_components; j++) {
 913             int idx = const_add(&value[0].u64, cv[alu->src[i].swizzle[j]].u64);
 914             swizzle[i][j] = idx;
 915             swiz_max = MAX2(swiz_max, (unsigned) idx);
 916          }
 917          num_const++;
 918       }
 919
 920       /* nothing to do */
 921       if (num_const <= 1)
 922          return;
 923
 924       /* resolve with single combined const src */
 925       if (swiz_max < 4) {
 926          nir_ssa_def *def = nir_build_imm(&b, swiz_max + 1, 32, value);
 927
 928          for (unsigned i = 0; i < info->num_inputs; i++) {
 929             nir_const_value *cv = nir_src_as_const_value(alu->src[i].src);
 930             if (!cv)
 931                continue;
 932
 933             nir_instr_rewrite_src(&alu->instr, &alu->src[i].src, nir_src_for_ssa(def));
 934
 935             for (unsigned j = 0; j < 4; j++)
 936                alu->src[i].swizzle[j] = swizzle[i][j];
 937          }
 938          return;
 939       }
 940
 941       /* resolve with movs */
 942       num_const = 0;
 943       for (unsigned i = 0; i < info->num_inputs; i++) {
 944          nir_const_value *cv = nir_src_as_const_value(alu->src[i].src);
 945          if (!cv)
 946             continue;
 947
 948          num_const++;
 949          if (num_const == 1)
 950             continue;
 951
 952          nir_ssa_def *mov = nir_mov(&b, alu->src[i].src.ssa);
 953          nir_instr_rewrite_src(&alu->instr, &alu->src[i].src, nir_src_for_ssa(mov));
 954       }
 955       return;
 956    }
 957
 958    nir_const_value value[4];
 959    unsigned num_components = 0;
 960
 961    for (unsigned i = 0; i < info->num_inputs; i++) {
 962       nir_const_value *cv = nir_src_as_const_value(alu->src[i].src);
 963       if (cv)
 964          value[num_components++] = cv[alu->src[i].swizzle[0]];
 965    }
 966
 967    /* if there is more than one constant source to the vecN, combine them
 968     * into a single load_const (removing the vecN completely if all components
 969     * are constant)
 970     */
 971    if (num_components > 1) {
 972       nir_ssa_def *def = nir_build_imm(&b, num_components, 32, value);
 973
 974       if (num_components == info->num_inputs) {
 975          nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(def));
 976          nir_instr_remove(&alu->instr);
 977          return;
 978       }
 979
 980       for (unsigned i = 0, j = 0; i < info->num_inputs; i++) {
 981          nir_const_value *cv = nir_src_as_const_value(alu->src[i].src);
 982          if (!cv)
 983             continue;
 984
 985          nir_instr_rewrite_src(&alu->instr, &alu->src[i].src, nir_src_for_ssa(def));
 986          alu->src[i].swizzle[0] = j++;
 987       }
 988    }
 989
 990    unsigned finished_write_mask = 0;
 991    for (unsigned i = 0; i < 4; i++) {
 992       if (!(alu->dest.write_mask & (1 << i)))
 993             continue;
 994
 995       nir_ssa_def *ssa = alu->src[i].src.ssa;
 996
 997       /* check that vecN instruction is only user of this */
 998       bool need_mov = list_length(&ssa->if_uses) != 0;
 999       nir_foreach_use(use_src, ssa) {
1000          if (use_src->parent_instr != &alu->instr)
1001             need_mov = true;
1002       }
1003
1004       nir_instr *instr = ssa->parent_instr;
1005       switch (instr->type) {
1006       case nir_instr_type_alu:
1007       case nir_instr_type_tex:
1008          break;
1009       case nir_instr_type_intrinsic:
1010          if (nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_load_input) {
1011             need_mov = vec_dest_has_swizzle(alu, &nir_instr_as_intrinsic(instr)->dest.ssa);
1012             break;
1013          }
1014       default:
1015          need_mov = true;
1016       }
1017
1018       if (need_mov && !(finished_write_mask & (1 << i)))
1019          finished_write_mask |= insert_vec_mov(alu, i, state->shader);
1020    }
1021 }
1022
1023 static bool
1024 emit_shader(struct etna_compile *c, unsigned *num_temps, unsigned *num_consts)
1025 {
1026    nir_shader *shader = c->nir;
1027
1028    struct state state = {
1029       .c = c,
1030       .shader = shader,
1031       .impl = nir_shader_get_entrypoint(shader),
1032    };
1033    bool have_indirect_uniform = false;
1034    unsigned indirect_max = 0;
1035
1036    nir_builder b;
1037    nir_builder_init(&b, state.impl);
1038
1039    /* convert non-dynamic uniform loads to constants, etc */
1040    nir_foreach_block(block, state.impl) {
1041       nir_foreach_instr_safe(instr, block) {
1042          switch(instr->type) {
1043          case nir_instr_type_alu:
1044             /* deals with vecN and const srcs */
1045             lower_alu(&state, nir_instr_as_alu(instr));
1046             break;
1047          case nir_instr_type_load_const: {
1048             nir_load_const_instr *load_const = nir_instr_as_load_const(instr);
1049             for (unsigned  i = 0; i < load_const->def.num_components; i++)
1050                load_const->value[i] = CONST(load_const->value[i].u32);
1051          } break;
1052          case nir_instr_type_intrinsic: {
1053             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1054             /* TODO: load_ubo can also become a constant in some cases
1055              * (at the moment it can end up emitting a LOAD with two
1056              *  uniform sources, which could be a problem on HALTI2)
1057              */
1058             if (intr->intrinsic != nir_intrinsic_load_uniform)
1059                break;
1060             nir_const_value *off = nir_src_as_const_value(intr->src[0]);
1061             if (!off || off[0].u64 >> 32 != ETNA_IMMEDIATE_CONSTANT) {
1062                have_indirect_uniform = true;
1063                indirect_max = nir_intrinsic_base(intr) + nir_intrinsic_range(intr);
1064                break;
1065             }
1066
1067             unsigned base = nir_intrinsic_base(intr);
1068             /* pre halti2 uniform offset will be float */
1069             if (c->specs->halti < 2)
1070                base += (unsigned) off[0].f32;
1071             else
1072                base += off[0].u32;
1073             nir_const_value value[4];
1074
1075             for (unsigned i = 0; i < intr->dest.ssa.num_components; i++) {
1076                if (nir_intrinsic_base(intr) < 0)
1077                   value[i] = TEXSCALE(~nir_intrinsic_base(intr), i);
1078                else
1079                   value[i] = UNIFORM(base * 4 + i);
1080             }
1081
1082             b.cursor = nir_after_instr(instr);
1083             nir_ssa_def *def = nir_build_imm(&b, intr->dest.ssa.num_components, 32, value);
1084
1085             nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(def));
1086             nir_instr_remove(instr);
1087          } break;
1088          default:
1089             break;
1090          }
1091       }
1092    }
1093
1094    /* TODO: only emit required indirect uniform ranges */
1095    if (have_indirect_uniform) {
1096       for (unsigned i = 0; i < indirect_max * 4; i++)
1097          c->consts[i] = UNIFORM(i).u64;
1098       state.const_count = indirect_max;
1099    }
1100
1101    /* add mov for any store output using sysval/const  */
1102    nir_foreach_block(block, state.impl) {
1103       nir_foreach_instr_safe(instr, block) {
1104          if (instr->type != nir_instr_type_intrinsic)
1105             continue;
1106
1107          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1108
1109          switch (intr->intrinsic) {
1110          case nir_intrinsic_store_deref: {
1111             nir_src *src = &intr->src[1];
1112             if (nir_src_is_const(*src) || is_sysval(src->ssa->parent_instr)) {
1113                b.cursor = nir_before_instr(instr);
1114                nir_instr_rewrite_src(instr, src, nir_src_for_ssa(nir_mov(&b, src->ssa)));
1115             }
1116          } break;
1117          default:
1118             break;
1119          }
1120       }
1121    }
1122
1123    /* call directly to avoid validation (load_const don't pass validation at this point) */
1124    nir_convert_from_ssa(shader, true);
1125    nir_opt_dce(shader);
1126
1127    ra_assign(&state, shader);
1128
1129    emit_cf_list(&state, &nir_shader_get_entrypoint(shader)->body);
1130
1131    *num_temps = ra_finish(&state);
1132    *num_consts = state.const_count;
1133    return true;
1134 }