src/gallium/drivers/etnaviv/etnaviv_compiler_nir_emit.h

   1 /*
   2  * Copyright (c) 2019 Zodiac Inflight Innovations
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the
  12  * next paragraph) shall be included in all copies or substantial portions
  13  * of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Jonathan Marek <jonathan@marek.ca>
  25  */
  26
  27 #include "etnaviv_asm.h"
  28 #include "etnaviv_context.h"
  29 #include "etnaviv_compiler_nir.h"
  30
  31 #include "compiler/nir/nir.h"
  32 #include "compiler/nir/nir_builder.h"
  33
  34 #define ALU_SWIZ(s) INST_SWIZ((s)->swizzle[0], (s)->swizzle[1], (s)->swizzle[2], (s)->swizzle[3])
  35 #define SRC_DISABLE ((hw_src){})
  36 #define SRC_CONST(idx, s) ((hw_src){.use=1, .rgroup = INST_RGROUP_UNIFORM_0, .reg=idx, .swiz=s})
  37 #define SRC_REG(idx, s) ((hw_src){.use=1, .rgroup = INST_RGROUP_TEMP, .reg=idx, .swiz=s})
  38
  39 typedef struct etna_inst_dst hw_dst;
  40 typedef struct etna_inst_src hw_src;
  41
  42 static inline hw_src
  43 src_swizzle(hw_src src, unsigned swizzle)
  44 {
  45    if (src.rgroup != INST_RGROUP_IMMEDIATE)
  46       src.swiz = inst_swiz_compose(src.swiz, swizzle);
  47
  48    return src;
  49 }
  50
  51 /* constants are represented as 64-bit ints
  52  * 32-bit for the value and 32-bit for the type (imm, uniform, etc)
  53  */
  54
  55 #define CONST_VAL(a, b) (nir_const_value) {.u64 = (uint64_t)(a) << 32 | (uint64_t)(b)}
  56 #define CONST(x) CONST_VAL(ETNA_IMMEDIATE_CONSTANT, x)
  57 #define UNIFORM(x) CONST_VAL(ETNA_IMMEDIATE_UNIFORM, x)
  58 #define TEXSCALE(x, i) CONST_VAL(ETNA_IMMEDIATE_TEXRECT_SCALE_X + (i), x)
  59
  60 static int
  61 const_add(uint64_t *c, uint64_t value)
  62 {
  63    for (unsigned i = 0; i < 4; i++) {
  64       if (c[i] == value || !c[i]) {
  65          c[i] = value;
  66          return i;
  67       }
  68    }
  69    return -1;
  70 }
  71
  72 static hw_src
  73 const_src(struct etna_compile *c, nir_const_value *value, unsigned num_components)
  74 {
  75    /* use inline immediates if possible */
  76    if (c->specs->halti >= 2 && num_components == 1 &&
  77        value[0].u64 >> 32 == ETNA_IMMEDIATE_CONSTANT) {
  78       uint32_t bits = value[0].u32;
  79
  80       /* "float" - shifted by 12 */
  81       if ((bits & 0xfff) == 0)
  82          return etna_immediate_src(0, bits >> 12);
  83
  84       /* "unsigned" - raw 20 bit value */
  85       if (bits < (1 << 20))
  86          return etna_immediate_src(2, bits);
  87
  88       /* "signed" - sign extended 20-bit (sign included) value */
  89       if (bits >= 0xfff80000)
  90          return etna_immediate_src(1, bits);
  91    }
  92
  93    unsigned i;
  94    int swiz = -1;
  95    for (i = 0; swiz < 0; i++) {
  96       uint64_t *a = &c->consts[i*4];
  97       uint64_t save[4];
  98       memcpy(save, a, sizeof(save));
  99       swiz = 0;
 100       for (unsigned j = 0; j < num_components; j++) {
 101          int c = const_add(a, value[j].u64);
 102          if (c < 0) {
 103             memcpy(a, save, sizeof(save));
 104             swiz = -1;
 105             break;
 106          }
 107          swiz |= c << j * 2;
 108       }
 109    }
 110
 111    assert(i <= ETNA_MAX_IMM / 4);
 112    c->const_count = MAX2(c->const_count, i);
 113
 114    return SRC_CONST(i - 1, swiz);
 115 }
 116
 117 /* Swizzles and write masks can be used to layer virtual non-interfering
 118  * registers on top of the real VEC4 registers. For example, the virtual
 119  * VEC3_XYZ register and the virtual SCALAR_W register that use the same
 120  * physical VEC4 base register do not interfere.
 121  */
 122 enum reg_class {
 123    REG_CLASS_VIRT_SCALAR,
 124    REG_CLASS_VIRT_VEC2,
 125    REG_CLASS_VIRT_VEC3,
 126    REG_CLASS_VEC4,
 127    /* special vec2 class for fast transcendentals, limited to XY or ZW */
 128    REG_CLASS_VIRT_VEC2T,
 129    /* special classes for LOAD - contiguous components */
 130    REG_CLASS_VIRT_VEC2C,
 131    REG_CLASS_VIRT_VEC3C,
 132    NUM_REG_CLASSES,
 133 };
 134
 135 enum reg_type {
 136    REG_TYPE_VEC4,
 137    REG_TYPE_VIRT_VEC3_XYZ,
 138    REG_TYPE_VIRT_VEC3_XYW,
 139    REG_TYPE_VIRT_VEC3_XZW,
 140    REG_TYPE_VIRT_VEC3_YZW,
 141    REG_TYPE_VIRT_VEC2_XY,
 142    REG_TYPE_VIRT_VEC2_XZ,
 143    REG_TYPE_VIRT_VEC2_XW,
 144    REG_TYPE_VIRT_VEC2_YZ,
 145    REG_TYPE_VIRT_VEC2_YW,
 146    REG_TYPE_VIRT_VEC2_ZW,
 147    REG_TYPE_VIRT_SCALAR_X,
 148    REG_TYPE_VIRT_SCALAR_Y,
 149    REG_TYPE_VIRT_SCALAR_Z,
 150    REG_TYPE_VIRT_SCALAR_W,
 151    REG_TYPE_VIRT_VEC2T_XY,
 152    REG_TYPE_VIRT_VEC2T_ZW,
 153    REG_TYPE_VIRT_VEC2C_XY,
 154    REG_TYPE_VIRT_VEC2C_YZ,
 155    REG_TYPE_VIRT_VEC2C_ZW,
 156    REG_TYPE_VIRT_VEC3C_XYZ,
 157    REG_TYPE_VIRT_VEC3C_YZW,
 158    NUM_REG_TYPES,
 159 };
 160
 161 /* writemask when used as dest */
 162 static const uint8_t
 163 reg_writemask[NUM_REG_TYPES] = {
 164    [REG_TYPE_VEC4] = 0xf,
 165    [REG_TYPE_VIRT_SCALAR_X] = 0x1,
 166    [REG_TYPE_VIRT_SCALAR_Y] = 0x2,
 167    [REG_TYPE_VIRT_VEC2_XY] = 0x3,
 168    [REG_TYPE_VIRT_VEC2T_XY] = 0x3,
 169    [REG_TYPE_VIRT_VEC2C_XY] = 0x3,
 170    [REG_TYPE_VIRT_SCALAR_Z] = 0x4,
 171    [REG_TYPE_VIRT_VEC2_XZ] = 0x5,
 172    [REG_TYPE_VIRT_VEC2_YZ] = 0x6,
 173    [REG_TYPE_VIRT_VEC2C_YZ] = 0x6,
 174    [REG_TYPE_VIRT_VEC3_XYZ] = 0x7,
 175    [REG_TYPE_VIRT_VEC3C_XYZ] = 0x7,
 176    [REG_TYPE_VIRT_SCALAR_W] = 0x8,
 177    [REG_TYPE_VIRT_VEC2_XW] = 0x9,
 178    [REG_TYPE_VIRT_VEC2_YW] = 0xa,
 179    [REG_TYPE_VIRT_VEC3_XYW] = 0xb,
 180    [REG_TYPE_VIRT_VEC2_ZW] = 0xc,
 181    [REG_TYPE_VIRT_VEC2T_ZW] = 0xc,
 182    [REG_TYPE_VIRT_VEC2C_ZW] = 0xc,
 183    [REG_TYPE_VIRT_VEC3_XZW] = 0xd,
 184    [REG_TYPE_VIRT_VEC3_YZW] = 0xe,
 185    [REG_TYPE_VIRT_VEC3C_YZW] = 0xe,
 186 };
 187
 188 /* how to swizzle when used as a src */
 189 static const uint8_t
 190 reg_swiz[NUM_REG_TYPES] = {
 191    [REG_TYPE_VEC4] = INST_SWIZ_IDENTITY,
 192    [REG_TYPE_VIRT_SCALAR_X] = INST_SWIZ_IDENTITY,
 193    [REG_TYPE_VIRT_SCALAR_Y] = SWIZZLE(Y, Y, Y, Y),
 194    [REG_TYPE_VIRT_VEC2_XY] = INST_SWIZ_IDENTITY,
 195    [REG_TYPE_VIRT_VEC2T_XY] = INST_SWIZ_IDENTITY,
 196    [REG_TYPE_VIRT_VEC2C_XY] = INST_SWIZ_IDENTITY,
 197    [REG_TYPE_VIRT_SCALAR_Z] = SWIZZLE(Z, Z, Z, Z),
 198    [REG_TYPE_VIRT_VEC2_XZ] = SWIZZLE(X, Z, X, Z),
 199    [REG_TYPE_VIRT_VEC2_YZ] = SWIZZLE(Y, Z, Y, Z),
 200    [REG_TYPE_VIRT_VEC2C_YZ] = SWIZZLE(Y, Z, Y, Z),
 201    [REG_TYPE_VIRT_VEC3_XYZ] = INST_SWIZ_IDENTITY,
 202    [REG_TYPE_VIRT_VEC3C_XYZ] = INST_SWIZ_IDENTITY,
 203    [REG_TYPE_VIRT_SCALAR_W] = SWIZZLE(W, W, W, W),
 204    [REG_TYPE_VIRT_VEC2_XW] = SWIZZLE(X, W, X, W),
 205    [REG_TYPE_VIRT_VEC2_YW] = SWIZZLE(Y, W, Y, W),
 206    [REG_TYPE_VIRT_VEC3_XYW] = SWIZZLE(X, Y, W, X),
 207    [REG_TYPE_VIRT_VEC2_ZW] = SWIZZLE(Z, W, Z, W),
 208    [REG_TYPE_VIRT_VEC2T_ZW] = SWIZZLE(Z, W, Z, W),
 209    [REG_TYPE_VIRT_VEC2C_ZW] = SWIZZLE(Z, W, Z, W),
 210    [REG_TYPE_VIRT_VEC3_XZW] = SWIZZLE(X, Z, W, X),
 211    [REG_TYPE_VIRT_VEC3_YZW] = SWIZZLE(Y, Z, W, X),
 212    [REG_TYPE_VIRT_VEC3C_YZW] = SWIZZLE(Y, Z, W, X),
 213 };
 214
 215 /* how to swizzle when used as a dest */
 216 static const uint8_t
 217 reg_dst_swiz[NUM_REG_TYPES] = {
 218    [REG_TYPE_VEC4] = INST_SWIZ_IDENTITY,
 219    [REG_TYPE_VIRT_SCALAR_X] = INST_SWIZ_IDENTITY,
 220    [REG_TYPE_VIRT_SCALAR_Y] = SWIZZLE(X, X, X, X),
 221    [REG_TYPE_VIRT_VEC2_XY] = INST_SWIZ_IDENTITY,
 222    [REG_TYPE_VIRT_VEC2T_XY] = INST_SWIZ_IDENTITY,
 223    [REG_TYPE_VIRT_VEC2C_XY] = INST_SWIZ_IDENTITY,
 224    [REG_TYPE_VIRT_SCALAR_Z] = SWIZZLE(X, X, X, X),
 225    [REG_TYPE_VIRT_VEC2_XZ] = SWIZZLE(X, X, Y, Y),
 226    [REG_TYPE_VIRT_VEC2_YZ] = SWIZZLE(X, X, Y, Y),
 227    [REG_TYPE_VIRT_VEC2C_YZ] = SWIZZLE(X, X, Y, Y),
 228    [REG_TYPE_VIRT_VEC3_XYZ] = INST_SWIZ_IDENTITY,
 229    [REG_TYPE_VIRT_VEC3C_XYZ] = INST_SWIZ_IDENTITY,
 230    [REG_TYPE_VIRT_SCALAR_W] = SWIZZLE(X, X, X, X),
 231    [REG_TYPE_VIRT_VEC2_XW] = SWIZZLE(X, X, Y, Y),
 232    [REG_TYPE_VIRT_VEC2_YW] = SWIZZLE(X, X, Y, Y),
 233    [REG_TYPE_VIRT_VEC3_XYW] = SWIZZLE(X, Y, Z, Z),
 234    [REG_TYPE_VIRT_VEC2_ZW] = SWIZZLE(X, X, X, Y),
 235    [REG_TYPE_VIRT_VEC2T_ZW] = SWIZZLE(X, X, X, Y),
 236    [REG_TYPE_VIRT_VEC2C_ZW] = SWIZZLE(X, X, X, Y),
 237    [REG_TYPE_VIRT_VEC3_XZW] = SWIZZLE(X, Y, Y, Z),
 238    [REG_TYPE_VIRT_VEC3_YZW] = SWIZZLE(X, X, Y, Z),
 239    [REG_TYPE_VIRT_VEC3C_YZW] = SWIZZLE(X, X, Y, Z),
 240 };
 241
 242 static inline int reg_get_type(int virt_reg)
 243 {
 244    return virt_reg % NUM_REG_TYPES;
 245 }
 246
 247 static inline int reg_get_base(struct etna_compile *c, int virt_reg)
 248 {
 249    /* offset by 1 to avoid reserved position register */
 250    if (c->nir->info.stage == MESA_SHADER_FRAGMENT)
 251       return (virt_reg / NUM_REG_TYPES + 1) % ETNA_MAX_TEMPS;
 252    return virt_reg / NUM_REG_TYPES;
 253 }
 254
 255 /* use "r63.z" for depth reg, it will wrap around to r0.z by reg_get_base
 256  * (fs registers are offset by 1 to avoid reserving r0)
 257  */
 258 #define REG_FRAG_DEPTH ((ETNA_MAX_TEMPS - 1) * NUM_REG_TYPES + REG_TYPE_VIRT_SCALAR_Z)
 259
 260 static inline int reg_get_class(int virt_reg)
 261 {
 262    switch (reg_get_type(virt_reg)) {
 263    case REG_TYPE_VEC4:
 264       return REG_CLASS_VEC4;
 265    case REG_TYPE_VIRT_VEC3_XYZ:
 266    case REG_TYPE_VIRT_VEC3_XYW:
 267    case REG_TYPE_VIRT_VEC3_XZW:
 268    case REG_TYPE_VIRT_VEC3_YZW:
 269       return REG_CLASS_VIRT_VEC3;
 270    case REG_TYPE_VIRT_VEC2_XY:
 271    case REG_TYPE_VIRT_VEC2_XZ:
 272    case REG_TYPE_VIRT_VEC2_XW:
 273    case REG_TYPE_VIRT_VEC2_YZ:
 274    case REG_TYPE_VIRT_VEC2_YW:
 275    case REG_TYPE_VIRT_VEC2_ZW:
 276       return REG_CLASS_VIRT_VEC2;
 277    case REG_TYPE_VIRT_SCALAR_X:
 278    case REG_TYPE_VIRT_SCALAR_Y:
 279    case REG_TYPE_VIRT_SCALAR_Z:
 280    case REG_TYPE_VIRT_SCALAR_W:
 281       return REG_CLASS_VIRT_SCALAR;
 282    case REG_TYPE_VIRT_VEC2T_XY:
 283    case REG_TYPE_VIRT_VEC2T_ZW:
 284       return REG_CLASS_VIRT_VEC2T;
 285    case REG_TYPE_VIRT_VEC2C_XY:
 286    case REG_TYPE_VIRT_VEC2C_YZ:
 287    case REG_TYPE_VIRT_VEC2C_ZW:
 288       return REG_CLASS_VIRT_VEC2C;
 289    case REG_TYPE_VIRT_VEC3C_XYZ:
 290    case REG_TYPE_VIRT_VEC3C_YZW:
 291       return REG_CLASS_VIRT_VEC3C;
 292    }
 293
 294    assert(false);
 295    return 0;
 296 }
 297
 298 /* nir_src to allocated register */
 299 static hw_src
 300 ra_src(struct etna_compile *c, nir_src *src)
 301 {
 302    unsigned reg = ra_get_node_reg(c->g, c->live_map[src_index(c->impl, src)]);
 303    return SRC_REG(reg_get_base(c, reg), reg_swiz[reg_get_type(reg)]);
 304 }
 305
 306 static hw_src
 307 get_src(struct etna_compile *c, nir_src *src)
 308 {
 309    if (!src->is_ssa)
 310       return ra_src(c, src);
 311
 312    nir_instr *instr = src->ssa->parent_instr;
 313
 314    if (instr->pass_flags & BYPASS_SRC) {
 315       assert(instr->type == nir_instr_type_alu);
 316       nir_alu_instr *alu = nir_instr_as_alu(instr);
 317       assert(alu->op == nir_op_mov);
 318       return src_swizzle(get_src(c, &alu->src[0].src), ALU_SWIZ(&alu->src[0]));
 319    }
 320
 321    switch (instr->type) {
 322    case nir_instr_type_load_const:
 323       return const_src(c, nir_instr_as_load_const(instr)->value, src->ssa->num_components);
 324    case nir_instr_type_intrinsic: {
 325       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 326       switch (intr->intrinsic) {
 327       case nir_intrinsic_load_input:
 328       case nir_intrinsic_load_instance_id:
 329       case nir_intrinsic_load_uniform:
 330       case nir_intrinsic_load_ubo:
 331          return ra_src(c, src);
 332       case nir_intrinsic_load_front_face:
 333          return (hw_src) { .use = 1, .rgroup = INST_RGROUP_INTERNAL };
 334       case nir_intrinsic_load_frag_coord:
 335          return SRC_REG(0, INST_SWIZ_IDENTITY);
 336       default:
 337          compile_error(c, "Unhandled NIR intrinsic type: %s\n",
 338                        nir_intrinsic_infos[intr->intrinsic].name);
 339          break;
 340       }
 341    } break;
 342    case nir_instr_type_alu:
 343    case nir_instr_type_tex:
 344       return ra_src(c, src);
 345    case nir_instr_type_ssa_undef: {
 346       /* return zero to deal with broken Blur demo */
 347       nir_const_value value = CONST(0);
 348       return src_swizzle(const_src(c, &value, 1), SWIZZLE(X,X,X,X));
 349    }
 350    default:
 351       compile_error(c, "Unhandled NIR instruction type: %d\n", instr->type);
 352       break;
 353    }
 354
 355    return SRC_DISABLE;
 356 }
 357
 358 static bool
 359 vec_dest_has_swizzle(nir_alu_instr *vec, nir_ssa_def *ssa)
 360 {
 361    for (unsigned i = 0; i < 4; i++) {
 362       if (!(vec->dest.write_mask & (1 << i)) || vec->src[i].src.ssa != ssa)
 363          continue;
 364
 365       if (vec->src[i].swizzle[0] != i)
 366          return true;
 367    }
 368
 369    /* don't deal with possible bypassed vec/mov chain */
 370    nir_foreach_use(use_src, ssa) {
 371       nir_instr *instr = use_src->parent_instr;
 372       if (instr->type != nir_instr_type_alu)
 373          continue;
 374
 375       nir_alu_instr *alu = nir_instr_as_alu(instr);
 376
 377       switch (alu->op) {
 378       case nir_op_mov:
 379       case nir_op_vec2:
 380       case nir_op_vec3:
 381       case nir_op_vec4:
 382          return true;
 383       default:
 384          break;
 385       }
 386    }
 387    return false;
 388 }
 389
 390 /* get allocated dest register for nir_dest
 391  * *p_swiz tells how the components need to be placed into register
 392  */
 393 static hw_dst
 394 ra_dest(struct etna_compile *c, nir_dest *dest, unsigned *p_swiz)
 395 {
 396    unsigned swiz = INST_SWIZ_IDENTITY, mask = 0xf;
 397    dest = real_dest(dest, &swiz, &mask);
 398
 399    unsigned r = ra_get_node_reg(c->g, c->live_map[dest_index(c->impl, dest)]);
 400    unsigned t = reg_get_type(r);
 401
 402    *p_swiz = inst_swiz_compose(swiz, reg_dst_swiz[t]);
 403
 404    return (hw_dst) {
 405       .use = 1,
 406       .reg = reg_get_base(c, r),
 407       .write_mask = inst_write_mask_compose(mask, reg_writemask[t]),
 408    };
 409 }
 410
 411 /* precomputed by register_allocate  */
 412 static unsigned int *q_values[] = {
 413    (unsigned int[]) {1, 2, 3, 4, 2, 2, 3, },
 414    (unsigned int[]) {3, 5, 6, 6, 5, 5, 6, },
 415    (unsigned int[]) {3, 4, 4, 4, 4, 4, 4, },
 416    (unsigned int[]) {1, 1, 1, 1, 1, 1, 1, },
 417    (unsigned int[]) {1, 2, 2, 2, 1, 2, 2, },
 418    (unsigned int[]) {2, 3, 3, 3, 2, 3, 3, },
 419    (unsigned int[]) {2, 2, 2, 2, 2, 2, 2, },
 420 };
 421
 422 static void
 423 ra_assign(struct etna_compile *c, nir_shader *shader)
 424 {
 425    struct ra_regs *regs = ra_alloc_reg_set(NULL, ETNA_MAX_TEMPS *
 426                   NUM_REG_TYPES, false);
 427
 428    /* classes always be created from index 0, so equal to the class enum
 429     * which represents a register with (c+1) components
 430     */
 431    for (int c = 0; c < NUM_REG_CLASSES; c++)
 432       ra_alloc_reg_class(regs);
 433    /* add each register of each class */
 434    for (int r = 0; r < NUM_REG_TYPES * ETNA_MAX_TEMPS; r++)
 435       ra_class_add_reg(regs, reg_get_class(r), r);
 436    /* set conflicts */
 437    for (int r = 0; r < ETNA_MAX_TEMPS; r++) {
 438       for (int i = 0; i < NUM_REG_TYPES; i++) {
 439          for (int j = 0; j < i; j++) {
 440             if (reg_writemask[i] & reg_writemask[j]) {
 441                ra_add_reg_conflict(regs, NUM_REG_TYPES * r + i,
 442                                          NUM_REG_TYPES * r + j);
 443             }
 444          }
 445       }
 446    }
 447    ra_set_finalize(regs, q_values);
 448
 449    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
 450
 451    /* liveness and interference */
 452
 453    nir_index_blocks(impl);
 454    nir_index_ssa_defs(impl);
 455    nir_foreach_block(block, impl) {
 456       nir_foreach_instr(instr, block)
 457          instr->pass_flags = 0;
 458    }
 459
 460    /* this gives an approximation/upper limit on how many nodes are needed
 461     * (some ssa values do not represent an allocated register)
 462     */
 463    unsigned max_nodes = impl->ssa_alloc + impl->reg_alloc;
 464    unsigned *live_map = ralloc_array(NULL, unsigned, max_nodes);
 465    memset(live_map, 0xff, sizeof(unsigned) * max_nodes);
 466    struct live_def *defs = rzalloc_array(NULL, struct live_def, max_nodes);
 467
 468    unsigned num_nodes = etna_live_defs(impl, defs, live_map);
 469    struct ra_graph *g = ra_alloc_interference_graph(regs, num_nodes);
 470
 471    /* set classes from num_components */
 472    for (unsigned i = 0; i < num_nodes; i++) {
 473       nir_instr *instr = defs[i].instr;
 474       nir_dest *dest = defs[i].dest;
 475       unsigned comp = nir_dest_num_components(*dest) - 1;
 476
 477       if (instr->type == nir_instr_type_alu &&
 478           c->specs->has_new_transcendentals) {
 479          switch (nir_instr_as_alu(instr)->op) {
 480          case nir_op_fdiv:
 481          case nir_op_flog2:
 482          case nir_op_fsin:
 483          case nir_op_fcos:
 484             assert(dest->is_ssa);
 485             comp = REG_CLASS_VIRT_VEC2T;
 486          default:
 487             break;
 488          }
 489       }
 490
 491       if (instr->type == nir_instr_type_intrinsic) {
 492          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 493          /* can't have dst swizzle or sparse writemask on UBO loads */
 494          if (intr->intrinsic == nir_intrinsic_load_ubo) {
 495             assert(dest == &intr->dest);
 496             if (dest->ssa.num_components == 2)
 497                comp = REG_CLASS_VIRT_VEC2C;
 498             if (dest->ssa.num_components == 3)
 499                comp = REG_CLASS_VIRT_VEC3C;
 500          }
 501       }
 502
 503       ra_set_node_class(g, i, comp);
 504    }
 505
 506    nir_foreach_block(block, impl) {
 507       nir_foreach_instr(instr, block) {
 508          if (instr->type != nir_instr_type_intrinsic)
 509             continue;
 510
 511          nir_dest *dest = dest_for_instr(instr);
 512          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 513          unsigned reg;
 514
 515          switch (intr->intrinsic) {
 516          case nir_intrinsic_store_deref: {
 517             /* don't want outputs to be swizzled
 518              * TODO: better would be to set the type to X/XY/XYZ/XYZW
 519              * TODO: what if fragcoord.z is read after writing fragdepth?
 520              */
 521             nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
 522             unsigned index = live_map[src_index(impl, &intr->src[1])];
 523
 524             if (shader->info.stage == MESA_SHADER_FRAGMENT &&
 525                 deref->var->data.location == FRAG_RESULT_DEPTH) {
 526                ra_set_node_reg(g, index, REG_FRAG_DEPTH);
 527             } else {
 528                ra_set_node_class(g, index, REG_CLASS_VEC4);
 529             }
 530          } continue;
 531          case nir_intrinsic_load_input:
 532             reg = nir_intrinsic_base(intr) * NUM_REG_TYPES + (unsigned[]) {
 533                REG_TYPE_VIRT_SCALAR_X,
 534                REG_TYPE_VIRT_VEC2_XY,
 535                REG_TYPE_VIRT_VEC3_XYZ,
 536                REG_TYPE_VEC4,
 537             }[nir_dest_num_components(*dest) - 1];
 538             break;
 539          case nir_intrinsic_load_instance_id:
 540             reg = c->variant->infile.num_reg * NUM_REG_TYPES + REG_TYPE_VIRT_SCALAR_Y;
 541             break;
 542          default:
 543             continue;
 544          }
 545
 546          ra_set_node_reg(g, live_map[dest_index(impl, dest)], reg);
 547       }
 548    }
 549
 550    /* add interference for intersecting live ranges */
 551    for (unsigned i = 0; i < num_nodes; i++) {
 552       assert(defs[i].live_start < defs[i].live_end);
 553       for (unsigned j = 0; j < i; j++) {
 554          if (defs[i].live_start >= defs[j].live_end || defs[j].live_start >= defs[i].live_end)
 555             continue;
 556          ra_add_node_interference(g, i, j);
 557       }
 558    }
 559
 560    ralloc_free(defs);
 561
 562    /* Allocate registers */
 563    ASSERTED bool ok = ra_allocate(g);
 564    assert(ok);
 565
 566    c->g = g;
 567    c->regs = regs;
 568    c->live_map = live_map;
 569    c->num_nodes = num_nodes;
 570 }
 571
 572 static unsigned
 573 ra_finish(struct etna_compile *c)
 574 {
 575    /* TODO: better way to get number of registers used? */
 576    unsigned j = 0;
 577    for (unsigned i = 0; i < c->num_nodes; i++) {
 578       j = MAX2(j, reg_get_base(c, ra_get_node_reg(c->g, i)) + 1);
 579    }
 580
 581    ralloc_free(c->g);
 582    ralloc_free(c->regs);
 583    ralloc_free(c->live_map);
 584
 585    return j;
 586 }
 587
 588 static void
 589 emit_alu(struct etna_compile *c, nir_alu_instr * alu)
 590 {
 591    const nir_op_info *info = &nir_op_infos[alu->op];
 592
 593    /* marked as dead instruction (vecN and other bypassed instr) */
 594    if (alu->instr.pass_flags)
 595       return;
 596
 597    assert(!(alu->op >= nir_op_vec2 && alu->op <= nir_op_vec4));
 598
 599    unsigned dst_swiz;
 600    hw_dst dst = ra_dest(c, &alu->dest.dest, &dst_swiz);
 601
 602    /* compose alu write_mask with RA write mask */
 603    if (!alu->dest.dest.is_ssa)
 604       dst.write_mask = inst_write_mask_compose(alu->dest.write_mask, dst.write_mask);
 605
 606    switch (alu->op) {
 607    case nir_op_fdot2:
 608    case nir_op_fdot3:
 609    case nir_op_fdot4:
 610       /* not per-component - don't compose dst_swiz */
 611       dst_swiz = INST_SWIZ_IDENTITY;
 612       break;
 613    default:
 614       break;
 615    }
 616
 617    hw_src srcs[3];
 618
 619    for (int i = 0; i < info->num_inputs; i++) {
 620       nir_alu_src *asrc = &alu->src[i];
 621       hw_src src;
 622
 623       src = src_swizzle(get_src(c, &asrc->src), ALU_SWIZ(asrc));
 624       src = src_swizzle(src, dst_swiz);
 625
 626       if (src.rgroup != INST_RGROUP_IMMEDIATE) {
 627          src.neg = asrc->negate || (alu->op == nir_op_fneg);
 628          src.abs = asrc->abs || (alu->op == nir_op_fabs);
 629       } else {
 630          assert(!asrc->negate && alu->op != nir_op_fneg);
 631          assert(!asrc->abs && alu->op != nir_op_fabs);
 632       }
 633
 634       srcs[i] = src;
 635    }
 636
 637    etna_emit_alu(c, alu->op, dst, srcs, alu->dest.saturate || (alu->op == nir_op_fsat));
 638 }
 639
 640 static void
 641 emit_tex(struct etna_compile *c, nir_tex_instr * tex)
 642 {
 643    unsigned dst_swiz;
 644    hw_dst dst = ra_dest(c, &tex->dest, &dst_swiz);
 645    nir_src *coord = NULL, *lod_bias = NULL, *compare = NULL;
 646
 647    for (unsigned i = 0; i < tex->num_srcs; i++) {
 648       switch (tex->src[i].src_type) {
 649       case nir_tex_src_coord:
 650          coord = &tex->src[i].src;
 651          break;
 652       case nir_tex_src_bias:
 653       case nir_tex_src_lod:
 654          assert(!lod_bias);
 655          lod_bias = &tex->src[i].src;
 656          break;
 657       case nir_tex_src_comparator:
 658          compare = &tex->src[i].src;
 659          break;
 660       default:
 661          compile_error(c, "Unhandled NIR tex src type: %d\n",
 662                        tex->src[i].src_type);
 663          break;
 664       }
 665    }
 666
 667    etna_emit_tex(c, tex->op, tex->sampler_index, dst_swiz, dst, get_src(c, coord),
 668                  lod_bias ? get_src(c, lod_bias) : SRC_DISABLE,
 669                  compare ? get_src(c, compare) : SRC_DISABLE);
 670 }
 671
 672 static void
 673 emit_intrinsic(struct etna_compile *c, nir_intrinsic_instr * intr)
 674 {
 675    switch (intr->intrinsic) {
 676    case nir_intrinsic_store_deref:
 677       etna_emit_output(c, nir_src_as_deref(intr->src[0])->var, get_src(c, &intr->src[1]));
 678       break;
 679    case nir_intrinsic_discard_if:
 680       etna_emit_discard(c, get_src(c, &intr->src[0]));
 681       break;
 682    case nir_intrinsic_discard:
 683       etna_emit_discard(c, SRC_DISABLE);
 684       break;
 685    case nir_intrinsic_load_uniform: {
 686       unsigned dst_swiz;
 687       struct etna_inst_dst dst = ra_dest(c, &intr->dest, &dst_swiz);
 688
 689       /* TODO: rework so extra MOV isn't required, load up to 4 addresses at once */
 690       emit_inst(c, &(struct etna_inst) {
 691          .opcode = INST_OPCODE_MOVAR,
 692          .dst.write_mask = 0x1,
 693          .src[2] = get_src(c, &intr->src[0]),
 694       });
 695       emit_inst(c, &(struct etna_inst) {
 696          .opcode = INST_OPCODE_MOV,
 697          .dst = dst,
 698          .src[2] = {
 699             .use = 1,
 700             .rgroup = INST_RGROUP_UNIFORM_0,
 701             .reg = nir_intrinsic_base(intr),
 702             .swiz = dst_swiz,
 703             .amode = INST_AMODE_ADD_A_X,
 704          },
 705       });
 706    } break;
 707    case nir_intrinsic_load_ubo: {
 708       /* TODO: if offset is of the form (x + C) then add C to the base instead */
 709       unsigned idx = nir_src_as_const_value(intr->src[0])[0].u32;
 710       unsigned dst_swiz;
 711       emit_inst(c, &(struct etna_inst) {
 712          .opcode = INST_OPCODE_LOAD,
 713          .type = INST_TYPE_U32,
 714          .dst = ra_dest(c, &intr->dest, &dst_swiz),
 715          .src[0] = get_src(c, &intr->src[1]),
 716          .src[1] = const_src(c, &CONST_VAL(ETNA_IMMEDIATE_UBO0_ADDR + idx, 0), 1),
 717       });
 718    } break;
 719    case nir_intrinsic_load_front_face:
 720    case nir_intrinsic_load_frag_coord:
 721       assert(intr->dest.is_ssa); /* TODO - lower phis could cause this */
 722       break;
 723    case nir_intrinsic_load_input:
 724    case nir_intrinsic_load_instance_id:
 725       break;
 726    default:
 727       compile_error(c, "Unhandled NIR intrinsic type: %s\n",
 728                     nir_intrinsic_infos[intr->intrinsic].name);
 729    }
 730 }
 731
 732 static void
 733 emit_instr(struct etna_compile *c, nir_instr * instr)
 734 {
 735    switch (instr->type) {
 736    case nir_instr_type_alu:
 737       emit_alu(c, nir_instr_as_alu(instr));
 738       break;
 739    case nir_instr_type_tex:
 740       emit_tex(c, nir_instr_as_tex(instr));
 741       break;
 742    case nir_instr_type_intrinsic:
 743       emit_intrinsic(c, nir_instr_as_intrinsic(instr));
 744       break;
 745    case nir_instr_type_jump:
 746       assert(nir_instr_is_last(instr));
 747    case nir_instr_type_load_const:
 748    case nir_instr_type_ssa_undef:
 749    case nir_instr_type_deref:
 750       break;
 751    default:
 752       compile_error(c, "Unhandled NIR instruction type: %d\n", instr->type);
 753       break;
 754    }
 755 }
 756
 757 static void
 758 emit_block(struct etna_compile *c, nir_block * block)
 759 {
 760    etna_emit_block_start(c, block->index);
 761
 762    nir_foreach_instr(instr, block)
 763       emit_instr(c, instr);
 764
 765    /* succs->index < block->index is for the loop case  */
 766    nir_block *succs = block->successors[0];
 767    if (nir_block_ends_in_jump(block) || succs->index < block->index)
 768       etna_emit_jump(c, succs->index, SRC_DISABLE);
 769 }
 770
 771 static void
 772 emit_cf_list(struct etna_compile *c, struct exec_list *list);
 773
 774 static void
 775 emit_if(struct etna_compile *c, nir_if * nif)
 776 {
 777    etna_emit_jump(c, nir_if_first_else_block(nif)->index, get_src(c, &nif->condition));
 778    emit_cf_list(c, &nif->then_list);
 779
 780    /* jump at end of then_list to skip else_list
 781     * not needed if then_list already ends with a jump or else_list is empty
 782     */
 783    if (!nir_block_ends_in_jump(nir_if_last_then_block(nif)) &&
 784        !nir_cf_list_is_empty_block(&nif->else_list))
 785       etna_emit_jump(c, nir_if_last_else_block(nif)->successors[0]->index, SRC_DISABLE);
 786
 787    emit_cf_list(c, &nif->else_list);
 788 }
 789
 790 static void
 791 emit_cf_list(struct etna_compile *c, struct exec_list *list)
 792 {
 793    foreach_list_typed(nir_cf_node, node, node, list) {
 794       switch (node->type) {
 795       case nir_cf_node_block:
 796          emit_block(c, nir_cf_node_as_block(node));
 797          break;
 798       case nir_cf_node_if:
 799          emit_if(c, nir_cf_node_as_if(node));
 800          break;
 801       case nir_cf_node_loop:
 802          emit_cf_list(c, &nir_cf_node_as_loop(node)->body);
 803          break;
 804       default:
 805          compile_error(c, "Unknown NIR node type\n");
 806          break;
 807       }
 808    }
 809 }
 810
 811 /* based on nir_lower_vec_to_movs */
 812 static unsigned
 813 insert_vec_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader)
 814 {
 815    assert(start_idx < nir_op_infos[vec->op].num_inputs);
 816    unsigned write_mask = (1u << start_idx);
 817
 818    nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_mov);
 819    nir_alu_src_copy(&mov->src[0], &vec->src[start_idx], mov);
 820
 821    mov->src[0].swizzle[0] = vec->src[start_idx].swizzle[0];
 822    mov->src[0].negate = vec->src[start_idx].negate;
 823    mov->src[0].abs = vec->src[start_idx].abs;
 824
 825    unsigned num_components = 1;
 826
 827    for (unsigned i = start_idx + 1; i < 4; i++) {
 828       if (!(vec->dest.write_mask & (1 << i)))
 829          continue;
 830
 831       if (nir_srcs_equal(vec->src[i].src, vec->src[start_idx].src) &&
 832           vec->src[i].negate == vec->src[start_idx].negate &&
 833           vec->src[i].abs == vec->src[start_idx].abs) {
 834          write_mask |= (1 << i);
 835          mov->src[0].swizzle[num_components] = vec->src[i].swizzle[0];
 836          num_components++;
 837       }
 838    }
 839
 840    mov->dest.write_mask = (1 << num_components) - 1;
 841    nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, 32, NULL);
 842
 843    /* replace vec srcs with inserted mov */
 844    for (unsigned i = 0, j = 0; i < 4; i++) {
 845       if (!(write_mask & (1 << i)))
 846          continue;
 847
 848       nir_instr_rewrite_src(&vec->instr, &vec->src[i].src, nir_src_for_ssa(&mov->dest.dest.ssa));
 849       vec->src[i].swizzle[0] = j++;
 850    }
 851
 852    nir_instr_insert_before(&vec->instr, &mov->instr);
 853
 854    return write_mask;
 855 }
 856
 857 /*
 858  * for vecN instructions:
 859  * -merge constant sources into a single src
 860  * -insert movs (nir_lower_vec_to_movs equivalent)
 861  * for non-vecN instructions:
 862  * -try to merge constants as single constant
 863  * -insert movs for multiple constants (pre-HALTI5)
 864  */
 865 static void
 866 lower_alu(struct etna_compile *c, nir_alu_instr *alu)
 867 {
 868    const nir_op_info *info = &nir_op_infos[alu->op];
 869
 870    nir_builder b;
 871    nir_builder_init(&b, c->impl);
 872    b.cursor = nir_before_instr(&alu->instr);
 873
 874    switch (alu->op) {
 875    case nir_op_vec2:
 876    case nir_op_vec3:
 877    case nir_op_vec4:
 878       break;
 879    default:
 880       /* pre-GC7000L can only have 1 uniform src per instruction */
 881       if (c->specs->halti >= 5)
 882          return;
 883
 884       nir_const_value value[4] = {};
 885       uint8_t swizzle[4][4] = {};
 886       unsigned swiz_max = 0, num_const = 0;
 887
 888       for (unsigned i = 0; i < info->num_inputs; i++) {
 889          nir_const_value *cv = nir_src_as_const_value(alu->src[i].src);
 890          if (!cv)
 891             continue;
 892
 893          unsigned num_components = info->input_sizes[i] ?: alu->dest.dest.ssa.num_components;
 894          for (unsigned j = 0; j < num_components; j++) {
 895             int idx = const_add(&value[0].u64, cv[alu->src[i].swizzle[j]].u64);
 896             swizzle[i][j] = idx;
 897             swiz_max = MAX2(swiz_max, (unsigned) idx);
 898          }
 899          num_const++;
 900       }
 901
 902       /* nothing to do */
 903       if (num_const <= 1)
 904          return;
 905
 906       /* resolve with single combined const src */
 907       if (swiz_max < 4) {
 908          nir_ssa_def *def = nir_build_imm(&b, swiz_max + 1, 32, value);
 909
 910          for (unsigned i = 0; i < info->num_inputs; i++) {
 911             nir_const_value *cv = nir_src_as_const_value(alu->src[i].src);
 912             if (!cv)
 913                continue;
 914
 915             nir_instr_rewrite_src(&alu->instr, &alu->src[i].src, nir_src_for_ssa(def));
 916
 917             for (unsigned j = 0; j < 4; j++)
 918                alu->src[i].swizzle[j] = swizzle[i][j];
 919          }
 920          return;
 921       }
 922
 923       /* resolve with movs */
 924       num_const = 0;
 925       for (unsigned i = 0; i < info->num_inputs; i++) {
 926          nir_const_value *cv = nir_src_as_const_value(alu->src[i].src);
 927          if (!cv)
 928             continue;
 929
 930          num_const++;
 931          if (num_const == 1)
 932             continue;
 933
 934          nir_ssa_def *mov = nir_mov(&b, alu->src[i].src.ssa);
 935          nir_instr_rewrite_src(&alu->instr, &alu->src[i].src, nir_src_for_ssa(mov));
 936       }
 937       return;
 938    }
 939
 940    nir_const_value value[4];
 941    unsigned num_components = 0;
 942
 943    for (unsigned i = 0; i < info->num_inputs; i++) {
 944       nir_const_value *cv = nir_src_as_const_value(alu->src[i].src);
 945       if (cv)
 946          value[num_components++] = cv[alu->src[i].swizzle[0]];
 947    }
 948
 949    /* if there is more than one constant source to the vecN, combine them
 950     * into a single load_const (removing the vecN completely if all components
 951     * are constant)
 952     */
 953    if (num_components > 1) {
 954       nir_ssa_def *def = nir_build_imm(&b, num_components, 32, value);
 955
 956       if (num_components == info->num_inputs) {
 957          nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(def));
 958          nir_instr_remove(&alu->instr);
 959          return;
 960       }
 961
 962       for (unsigned i = 0, j = 0; i < info->num_inputs; i++) {
 963          nir_const_value *cv = nir_src_as_const_value(alu->src[i].src);
 964          if (!cv)
 965             continue;
 966
 967          nir_instr_rewrite_src(&alu->instr, &alu->src[i].src, nir_src_for_ssa(def));
 968          alu->src[i].swizzle[0] = j++;
 969       }
 970    }
 971
 972    unsigned finished_write_mask = 0;
 973    for (unsigned i = 0; i < 4; i++) {
 974       if (!(alu->dest.write_mask & (1 << i)))
 975             continue;
 976
 977       nir_ssa_def *ssa = alu->src[i].src.ssa;
 978
 979       /* check that vecN instruction is only user of this */
 980       bool need_mov = list_length(&ssa->if_uses) != 0;
 981       nir_foreach_use(use_src, ssa) {
 982          if (use_src->parent_instr != &alu->instr)
 983             need_mov = true;
 984       }
 985
 986       nir_instr *instr = ssa->parent_instr;
 987       switch (instr->type) {
 988       case nir_instr_type_alu:
 989       case nir_instr_type_tex:
 990          break;
 991       case nir_instr_type_intrinsic:
 992          if (nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_load_input) {
 993             need_mov = vec_dest_has_swizzle(alu, &nir_instr_as_intrinsic(instr)->dest.ssa);
 994             break;
 995          }
 996       default:
 997          need_mov = true;
 998       }
 999
1000       if (need_mov && !(finished_write_mask & (1 << i)))
1001          finished_write_mask |= insert_vec_mov(alu, i, c->nir);
1002    }
1003 }
1004
1005 static bool
1006 emit_shader(struct etna_compile *c, unsigned *num_temps, unsigned *num_consts)
1007 {
1008    nir_shader *shader = c->nir;
1009    c->impl = nir_shader_get_entrypoint(shader);
1010
1011    bool have_indirect_uniform = false;
1012    unsigned indirect_max = 0;
1013
1014    nir_builder b;
1015    nir_builder_init(&b, c->impl);
1016
1017    /* convert non-dynamic uniform loads to constants, etc */
1018    nir_foreach_block(block, c->impl) {
1019       nir_foreach_instr_safe(instr, block) {
1020          switch(instr->type) {
1021          case nir_instr_type_alu:
1022             /* deals with vecN and const srcs */
1023             lower_alu(c, nir_instr_as_alu(instr));
1024             break;
1025          case nir_instr_type_load_const: {
1026             nir_load_const_instr *load_const = nir_instr_as_load_const(instr);
1027             for (unsigned  i = 0; i < load_const->def.num_components; i++)
1028                load_const->value[i] = CONST(load_const->value[i].u32);
1029          } break;
1030          case nir_instr_type_intrinsic: {
1031             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1032             /* TODO: load_ubo can also become a constant in some cases
1033              * (at the moment it can end up emitting a LOAD with two
1034              *  uniform sources, which could be a problem on HALTI2)
1035              */
1036             if (intr->intrinsic != nir_intrinsic_load_uniform)
1037                break;
1038             nir_const_value *off = nir_src_as_const_value(intr->src[0]);
1039             if (!off || off[0].u64 >> 32 != ETNA_IMMEDIATE_CONSTANT) {
1040                have_indirect_uniform = true;
1041                indirect_max = nir_intrinsic_base(intr) + nir_intrinsic_range(intr);
1042                break;
1043             }
1044
1045             unsigned base = nir_intrinsic_base(intr);
1046             /* pre halti2 uniform offset will be float */
1047             if (c->specs->halti < 2)
1048                base += (unsigned) off[0].f32;
1049             else
1050                base += off[0].u32;
1051             nir_const_value value[4];
1052
1053             for (unsigned i = 0; i < intr->dest.ssa.num_components; i++) {
1054                if (nir_intrinsic_base(intr) < 0)
1055                   value[i] = TEXSCALE(~nir_intrinsic_base(intr), i);
1056                else
1057                   value[i] = UNIFORM(base * 4 + i);
1058             }
1059
1060             b.cursor = nir_after_instr(instr);
1061             nir_ssa_def *def = nir_build_imm(&b, intr->dest.ssa.num_components, 32, value);
1062
1063             nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(def));
1064             nir_instr_remove(instr);
1065          } break;
1066          default:
1067             break;
1068          }
1069       }
1070    }
1071
1072    /* TODO: only emit required indirect uniform ranges */
1073    if (have_indirect_uniform) {
1074       for (unsigned i = 0; i < indirect_max * 4; i++)
1075          c->consts[i] = UNIFORM(i).u64;
1076       c->const_count = indirect_max;
1077    }
1078
1079    /* add mov for any store output using sysval/const  */
1080    nir_foreach_block(block, c->impl) {
1081       nir_foreach_instr_safe(instr, block) {
1082          if (instr->type != nir_instr_type_intrinsic)
1083             continue;
1084
1085          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1086
1087          switch (intr->intrinsic) {
1088          case nir_intrinsic_store_deref: {
1089             nir_src *src = &intr->src[1];
1090             if (nir_src_is_const(*src) || is_sysval(src->ssa->parent_instr)) {
1091                b.cursor = nir_before_instr(instr);
1092                nir_instr_rewrite_src(instr, src, nir_src_for_ssa(nir_mov(&b, src->ssa)));
1093             }
1094          } break;
1095          default:
1096             break;
1097          }
1098       }
1099    }
1100
1101    /* call directly to avoid validation (load_const don't pass validation at this point) */
1102    nir_convert_from_ssa(shader, true);
1103    nir_opt_dce(shader);
1104
1105    ra_assign(c, shader);
1106
1107    emit_cf_list(c, &nir_shader_get_entrypoint(shader)->body);
1108
1109    *num_temps = ra_finish(c);
1110    *num_consts = c->const_count;
1111    return true;
1112 }