src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp

   1 /*
   2  * Copyright 2017 Red Hat Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * Authors: Karol Herbst <kherbst@redhat.com>
  23  */
  24
  25 #include "compiler/nir/nir.h"
  26
  27 #include "util/u_debug.h"
  28
  29 #include "codegen/nv50_ir.h"
  30 #include "codegen/nv50_ir_from_common.h"
  31 #include "codegen/nv50_ir_lowering_helper.h"
  32 #include "codegen/nv50_ir_util.h"
  33
  34 #if __cplusplus >= 201103L
  35 #include <unordered_map>
  36 #else
  37 #include <tr1/unordered_map>
  38 #endif
  39 #include <cstring>
  40 #include <list>
  41 #include <vector>
  42
  43 namespace {
  44
  45 #if __cplusplus >= 201103L
  46 using std::hash;
  47 using std::unordered_map;
  48 #else
  49 using std::tr1::hash;
  50 using std::tr1::unordered_map;
  51 #endif
  52
  53 using namespace nv50_ir;
  54
  55 int
  56 type_size(const struct glsl_type *type, bool bindless)
  57 {
  58    return glsl_count_attribute_slots(type, false);
  59 }
  60
  61 class Converter : public ConverterCommon
  62 {
  63 public:
  64    Converter(Program *, nir_shader *, nv50_ir_prog_info *);
  65
  66    bool run();
  67 private:
  68    typedef std::vector<LValue*> LValues;
  69    typedef unordered_map<unsigned, LValues> NirDefMap;
  70    typedef unordered_map<unsigned, nir_load_const_instr*> ImmediateMap;
  71    typedef unordered_map<unsigned, uint32_t> NirArrayLMemOffsets;
  72    typedef unordered_map<unsigned, BasicBlock*> NirBlockMap;
  73
  74    CacheMode convert(enum gl_access_qualifier);
  75    TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
  76    LValues& convert(nir_alu_dest *);
  77    BasicBlock* convert(nir_block *);
  78    LValues& convert(nir_dest *);
  79    SVSemantic convert(nir_intrinsic_op);
  80    Value* convert(nir_load_const_instr*, uint8_t);
  81    LValues& convert(nir_register *);
  82    LValues& convert(nir_ssa_def *);
  83
  84    Value* getSrc(nir_alu_src *, uint8_t component = 0);
  85    Value* getSrc(nir_register *, uint8_t);
  86    Value* getSrc(nir_src *, uint8_t, bool indirect = false);
  87    Value* getSrc(nir_ssa_def *, uint8_t);
  88
  89    // returned value is the constant part of the given source (either the
  90    // nir_src or the selected source component of an intrinsic). Even though
  91    // this is mostly an optimization to be able to skip indirects in a few
  92    // cases, sometimes we require immediate values or set some fileds on
  93    // instructions (e.g. tex) in order for codegen to consume those.
  94    // If the found value has not a constant part, the Value gets returned
  95    // through the Value parameter.
  96    uint32_t getIndirect(nir_src *, uint8_t, Value *&);
  97    // isScalar indicates that the addressing is scalar, vec4 addressing is
  98    // assumed otherwise
  99    uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&,
 100                         bool isScalar = false);
 101
 102    uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
 103
 104    void setInterpolate(nv50_ir_varying *,
 105                        uint8_t,
 106                        bool centroid,
 107                        unsigned semantics);
 108
 109    Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,
 110                          uint8_t c, Value *indirect0 = NULL,
 111                          Value *indirect1 = NULL, bool patch = false);
 112    void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
 113                 Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
 114                 Value *indirect1 = NULL);
 115
 116    bool isFloatType(nir_alu_type);
 117    bool isSignedType(nir_alu_type);
 118    bool isResultFloat(nir_op);
 119    bool isResultSigned(nir_op);
 120
 121    DataType getDType(nir_alu_instr *);
 122    DataType getDType(nir_intrinsic_instr *);
 123    DataType getDType(nir_intrinsic_instr *, bool isSigned);
 124    DataType getDType(nir_op, uint8_t);
 125
 126    std::vector<DataType> getSTypes(nir_alu_instr *);
 127    DataType getSType(nir_src &, bool isFloat, bool isSigned);
 128
 129    operation getOperation(nir_intrinsic_op);
 130    operation getOperation(nir_op);
 131    operation getOperation(nir_texop);
 132    operation preOperationNeeded(nir_op);
 133
 134    int getSubOp(nir_intrinsic_op);
 135    int getSubOp(nir_op);
 136
 137    CondCode getCondCode(nir_op);
 138
 139    bool assignSlots();
 140    bool parseNIR();
 141
 142    bool visit(nir_alu_instr *);
 143    bool visit(nir_block *);
 144    bool visit(nir_cf_node *);
 145    bool visit(nir_deref_instr *);
 146    bool visit(nir_function *);
 147    bool visit(nir_if *);
 148    bool visit(nir_instr *);
 149    bool visit(nir_intrinsic_instr *);
 150    bool visit(nir_jump_instr *);
 151    bool visit(nir_load_const_instr*);
 152    bool visit(nir_loop *);
 153    bool visit(nir_ssa_undef_instr *);
 154    bool visit(nir_tex_instr *);
 155
 156    // tex stuff
 157    Value* applyProjection(Value *src, Value *proj);
 158    unsigned int getNIRArgCount(TexInstruction::Target&);
 159
 160    // image stuff
 161    uint16_t handleDeref(nir_deref_instr *, Value * & indirect, const nir_variable * &);
 162    CacheMode getCacheModeFromVar(const nir_variable *);
 163
 164    nir_shader *nir;
 165
 166    NirDefMap ssaDefs;
 167    NirDefMap regDefs;
 168    ImmediateMap immediates;
 169    NirArrayLMemOffsets regToLmemOffset;
 170    NirBlockMap blocks;
 171    unsigned int curLoopDepth;
 172
 173    BasicBlock *exit;
 174    Value *zero;
 175    Instruction *immInsertPos;
 176
 177    int clipVertexOutput;
 178
 179    union {
 180       struct {
 181          Value *position;
 182       } fp;
 183    };
 184 };
 185
 186 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
 187    : ConverterCommon(prog, info),
 188      nir(nir),
 189      curLoopDepth(0),
 190      clipVertexOutput(-1)
 191 {
 192    zero = mkImm((uint32_t)0);
 193 }
 194
 195 BasicBlock *
 196 Converter::convert(nir_block *block)
 197 {
 198    NirBlockMap::iterator it = blocks.find(block->index);
 199    if (it != blocks.end())
 200       return it->second;
 201
 202    BasicBlock *bb = new BasicBlock(func);
 203    blocks[block->index] = bb;
 204    return bb;
 205 }
 206
 207 bool
 208 Converter::isFloatType(nir_alu_type type)
 209 {
 210    return nir_alu_type_get_base_type(type) == nir_type_float;
 211 }
 212
 213 bool
 214 Converter::isSignedType(nir_alu_type type)
 215 {
 216    return nir_alu_type_get_base_type(type) == nir_type_int;
 217 }
 218
 219 bool
 220 Converter::isResultFloat(nir_op op)
 221 {
 222    const nir_op_info &info = nir_op_infos[op];
 223    if (info.output_type != nir_type_invalid)
 224       return isFloatType(info.output_type);
 225
 226    ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
 227    assert(false);
 228    return true;
 229 }
 230
 231 bool
 232 Converter::isResultSigned(nir_op op)
 233 {
 234    switch (op) {
 235    // there is no umul and we get wrong results if we treat all muls as signed
 236    case nir_op_imul:
 237    case nir_op_inot:
 238       return false;
 239    default:
 240       const nir_op_info &info = nir_op_infos[op];
 241       if (info.output_type != nir_type_invalid)
 242          return isSignedType(info.output_type);
 243       ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
 244       assert(false);
 245       return true;
 246    }
 247 }
 248
 249 DataType
 250 Converter::getDType(nir_alu_instr *insn)
 251 {
 252    if (insn->dest.dest.is_ssa)
 253       return getDType(insn->op, insn->dest.dest.ssa.bit_size);
 254    else
 255       return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
 256 }
 257
 258 DataType
 259 Converter::getDType(nir_intrinsic_instr *insn)
 260 {
 261    bool isSigned;
 262    switch (insn->intrinsic) {
 263    case nir_intrinsic_shared_atomic_imax:
 264    case nir_intrinsic_shared_atomic_imin:
 265    case nir_intrinsic_ssbo_atomic_imax:
 266    case nir_intrinsic_ssbo_atomic_imin:
 267       isSigned = true;
 268       break;
 269    default:
 270       isSigned = false;
 271       break;
 272    }
 273
 274    return getDType(insn, isSigned);
 275 }
 276
 277 DataType
 278 Converter::getDType(nir_intrinsic_instr *insn, bool isSigned)
 279 {
 280    if (insn->dest.is_ssa)
 281       return typeOfSize(insn->dest.ssa.bit_size / 8, false, isSigned);
 282    else
 283       return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, isSigned);
 284 }
 285
 286 DataType
 287 Converter::getDType(nir_op op, uint8_t bitSize)
 288 {
 289    DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
 290    if (ty == TYPE_NONE) {
 291       ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
 292       assert(false);
 293    }
 294    return ty;
 295 }
 296
 297 std::vector<DataType>
 298 Converter::getSTypes(nir_alu_instr *insn)
 299 {
 300    const nir_op_info &info = nir_op_infos[insn->op];
 301    std::vector<DataType> res(info.num_inputs);
 302
 303    for (uint8_t i = 0; i < info.num_inputs; ++i) {
 304       if (info.input_types[i] != nir_type_invalid) {
 305          res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
 306       } else {
 307          ERROR("getSType not implemented for %s idx %u\n", info.name, i);
 308          assert(false);
 309          res[i] = TYPE_NONE;
 310          break;
 311       }
 312    }
 313
 314    return res;
 315 }
 316
 317 DataType
 318 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
 319 {
 320    uint8_t bitSize;
 321    if (src.is_ssa)
 322       bitSize = src.ssa->bit_size;
 323    else
 324       bitSize = src.reg.reg->bit_size;
 325
 326    DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
 327    if (ty == TYPE_NONE) {
 328       const char *str;
 329       if (isFloat)
 330          str = "float";
 331       else if (isSigned)
 332          str = "int";
 333       else
 334          str = "uint";
 335       ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
 336       assert(false);
 337    }
 338    return ty;
 339 }
 340
 341 operation
 342 Converter::getOperation(nir_op op)
 343 {
 344    switch (op) {
 345    // basic ops with float and int variants
 346    case nir_op_fabs:
 347    case nir_op_iabs:
 348       return OP_ABS;
 349    case nir_op_fadd:
 350    case nir_op_iadd:
 351       return OP_ADD;
 352    case nir_op_iand:
 353       return OP_AND;
 354    case nir_op_ifind_msb:
 355    case nir_op_ufind_msb:
 356       return OP_BFIND;
 357    case nir_op_fceil:
 358       return OP_CEIL;
 359    case nir_op_fcos:
 360       return OP_COS;
 361    case nir_op_f2f32:
 362    case nir_op_f2f64:
 363    case nir_op_f2i32:
 364    case nir_op_f2i64:
 365    case nir_op_f2u32:
 366    case nir_op_f2u64:
 367    case nir_op_i2f32:
 368    case nir_op_i2f64:
 369    case nir_op_i2i32:
 370    case nir_op_i2i64:
 371    case nir_op_u2f32:
 372    case nir_op_u2f64:
 373    case nir_op_u2u32:
 374    case nir_op_u2u64:
 375       return OP_CVT;
 376    case nir_op_fddx:
 377    case nir_op_fddx_coarse:
 378    case nir_op_fddx_fine:
 379       return OP_DFDX;
 380    case nir_op_fddy:
 381    case nir_op_fddy_coarse:
 382    case nir_op_fddy_fine:
 383       return OP_DFDY;
 384    case nir_op_fdiv:
 385    case nir_op_idiv:
 386    case nir_op_udiv:
 387       return OP_DIV;
 388    case nir_op_fexp2:
 389       return OP_EX2;
 390    case nir_op_ffloor:
 391       return OP_FLOOR;
 392    case nir_op_ffma:
 393       return OP_FMA;
 394    case nir_op_flog2:
 395       return OP_LG2;
 396    case nir_op_fmax:
 397    case nir_op_imax:
 398    case nir_op_umax:
 399       return OP_MAX;
 400    case nir_op_pack_64_2x32_split:
 401       return OP_MERGE;
 402    case nir_op_fmin:
 403    case nir_op_imin:
 404    case nir_op_umin:
 405       return OP_MIN;
 406    case nir_op_fmod:
 407    case nir_op_imod:
 408    case nir_op_umod:
 409    case nir_op_frem:
 410    case nir_op_irem:
 411       return OP_MOD;
 412    case nir_op_fmul:
 413    case nir_op_imul:
 414    case nir_op_imul_high:
 415    case nir_op_umul_high:
 416       return OP_MUL;
 417    case nir_op_fneg:
 418    case nir_op_ineg:
 419       return OP_NEG;
 420    case nir_op_inot:
 421       return OP_NOT;
 422    case nir_op_ior:
 423       return OP_OR;
 424    case nir_op_fpow:
 425       return OP_POW;
 426    case nir_op_frcp:
 427       return OP_RCP;
 428    case nir_op_frsq:
 429       return OP_RSQ;
 430    case nir_op_fsat:
 431       return OP_SAT;
 432    case nir_op_feq32:
 433    case nir_op_ieq32:
 434    case nir_op_fge32:
 435    case nir_op_ige32:
 436    case nir_op_uge32:
 437    case nir_op_flt32:
 438    case nir_op_ilt32:
 439    case nir_op_ult32:
 440    case nir_op_fne32:
 441    case nir_op_ine32:
 442       return OP_SET;
 443    case nir_op_ishl:
 444       return OP_SHL;
 445    case nir_op_ishr:
 446    case nir_op_ushr:
 447       return OP_SHR;
 448    case nir_op_fsin:
 449       return OP_SIN;
 450    case nir_op_fsqrt:
 451       return OP_SQRT;
 452    case nir_op_ftrunc:
 453       return OP_TRUNC;
 454    case nir_op_ixor:
 455       return OP_XOR;
 456    default:
 457       ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
 458       assert(false);
 459       return OP_NOP;
 460    }
 461 }
 462
 463 operation
 464 Converter::getOperation(nir_texop op)
 465 {
 466    switch (op) {
 467    case nir_texop_tex:
 468       return OP_TEX;
 469    case nir_texop_lod:
 470       return OP_TXLQ;
 471    case nir_texop_txb:
 472       return OP_TXB;
 473    case nir_texop_txd:
 474       return OP_TXD;
 475    case nir_texop_txf:
 476    case nir_texop_txf_ms:
 477       return OP_TXF;
 478    case nir_texop_tg4:
 479       return OP_TXG;
 480    case nir_texop_txl:
 481       return OP_TXL;
 482    case nir_texop_query_levels:
 483    case nir_texop_texture_samples:
 484    case nir_texop_txs:
 485       return OP_TXQ;
 486    default:
 487       ERROR("couldn't get operation for nir_texop %u\n", op);
 488       assert(false);
 489       return OP_NOP;
 490    }
 491 }
 492
 493 operation
 494 Converter::getOperation(nir_intrinsic_op op)
 495 {
 496    switch (op) {
 497    case nir_intrinsic_emit_vertex:
 498       return OP_EMIT;
 499    case nir_intrinsic_end_primitive:
 500       return OP_RESTART;
 501    case nir_intrinsic_bindless_image_atomic_add:
 502    case nir_intrinsic_image_atomic_add:
 503    case nir_intrinsic_image_deref_atomic_add:
 504    case nir_intrinsic_bindless_image_atomic_and:
 505    case nir_intrinsic_image_atomic_and:
 506    case nir_intrinsic_image_deref_atomic_and:
 507    case nir_intrinsic_bindless_image_atomic_comp_swap:
 508    case nir_intrinsic_image_atomic_comp_swap:
 509    case nir_intrinsic_image_deref_atomic_comp_swap:
 510    case nir_intrinsic_bindless_image_atomic_exchange:
 511    case nir_intrinsic_image_atomic_exchange:
 512    case nir_intrinsic_image_deref_atomic_exchange:
 513    case nir_intrinsic_bindless_image_atomic_imax:
 514    case nir_intrinsic_image_atomic_imax:
 515    case nir_intrinsic_image_deref_atomic_imax:
 516    case nir_intrinsic_bindless_image_atomic_umax:
 517    case nir_intrinsic_image_atomic_umax:
 518    case nir_intrinsic_image_deref_atomic_umax:
 519    case nir_intrinsic_bindless_image_atomic_imin:
 520    case nir_intrinsic_image_atomic_imin:
 521    case nir_intrinsic_image_deref_atomic_imin:
 522    case nir_intrinsic_bindless_image_atomic_umin:
 523    case nir_intrinsic_image_atomic_umin:
 524    case nir_intrinsic_image_deref_atomic_umin:
 525    case nir_intrinsic_bindless_image_atomic_or:
 526    case nir_intrinsic_image_atomic_or:
 527    case nir_intrinsic_image_deref_atomic_or:
 528    case nir_intrinsic_bindless_image_atomic_xor:
 529    case nir_intrinsic_image_atomic_xor:
 530    case nir_intrinsic_image_deref_atomic_xor:
 531       return OP_SUREDP;
 532    case nir_intrinsic_bindless_image_load:
 533    case nir_intrinsic_image_load:
 534    case nir_intrinsic_image_deref_load:
 535       return OP_SULDP;
 536    case nir_intrinsic_bindless_image_samples:
 537    case nir_intrinsic_image_samples:
 538    case nir_intrinsic_image_deref_samples:
 539    case nir_intrinsic_bindless_image_size:
 540    case nir_intrinsic_image_size:
 541    case nir_intrinsic_image_deref_size:
 542       return OP_SUQ;
 543    case nir_intrinsic_bindless_image_store:
 544    case nir_intrinsic_image_store:
 545    case nir_intrinsic_image_deref_store:
 546       return OP_SUSTP;
 547    default:
 548       ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
 549       assert(false);
 550       return OP_NOP;
 551    }
 552 }
 553
 554 operation
 555 Converter::preOperationNeeded(nir_op op)
 556 {
 557    switch (op) {
 558    case nir_op_fcos:
 559    case nir_op_fsin:
 560       return OP_PRESIN;
 561    default:
 562       return OP_NOP;
 563    }
 564 }
 565
 566 int
 567 Converter::getSubOp(nir_op op)
 568 {
 569    switch (op) {
 570    case nir_op_imul_high:
 571    case nir_op_umul_high:
 572       return NV50_IR_SUBOP_MUL_HIGH;
 573    default:
 574       return 0;
 575    }
 576 }
 577
 578 int
 579 Converter::getSubOp(nir_intrinsic_op op)
 580 {
 581    switch (op) {
 582    case nir_intrinsic_bindless_image_atomic_add:
 583    case nir_intrinsic_global_atomic_add:
 584    case nir_intrinsic_image_atomic_add:
 585    case nir_intrinsic_image_deref_atomic_add:
 586    case nir_intrinsic_shared_atomic_add:
 587    case nir_intrinsic_ssbo_atomic_add:
 588       return  NV50_IR_SUBOP_ATOM_ADD;
 589    case nir_intrinsic_bindless_image_atomic_and:
 590    case nir_intrinsic_global_atomic_and:
 591    case nir_intrinsic_image_atomic_and:
 592    case nir_intrinsic_image_deref_atomic_and:
 593    case nir_intrinsic_shared_atomic_and:
 594    case nir_intrinsic_ssbo_atomic_and:
 595       return  NV50_IR_SUBOP_ATOM_AND;
 596    case nir_intrinsic_bindless_image_atomic_comp_swap:
 597    case nir_intrinsic_global_atomic_comp_swap:
 598    case nir_intrinsic_image_atomic_comp_swap:
 599    case nir_intrinsic_image_deref_atomic_comp_swap:
 600    case nir_intrinsic_shared_atomic_comp_swap:
 601    case nir_intrinsic_ssbo_atomic_comp_swap:
 602       return  NV50_IR_SUBOP_ATOM_CAS;
 603    case nir_intrinsic_bindless_image_atomic_exchange:
 604    case nir_intrinsic_global_atomic_exchange:
 605    case nir_intrinsic_image_atomic_exchange:
 606    case nir_intrinsic_image_deref_atomic_exchange:
 607    case nir_intrinsic_shared_atomic_exchange:
 608    case nir_intrinsic_ssbo_atomic_exchange:
 609       return  NV50_IR_SUBOP_ATOM_EXCH;
 610    case nir_intrinsic_bindless_image_atomic_or:
 611    case nir_intrinsic_global_atomic_or:
 612    case nir_intrinsic_image_atomic_or:
 613    case nir_intrinsic_image_deref_atomic_or:
 614    case nir_intrinsic_shared_atomic_or:
 615    case nir_intrinsic_ssbo_atomic_or:
 616       return  NV50_IR_SUBOP_ATOM_OR;
 617    case nir_intrinsic_bindless_image_atomic_imax:
 618    case nir_intrinsic_bindless_image_atomic_umax:
 619    case nir_intrinsic_global_atomic_imax:
 620    case nir_intrinsic_global_atomic_umax:
 621    case nir_intrinsic_image_atomic_imax:
 622    case nir_intrinsic_image_atomic_umax:
 623    case nir_intrinsic_image_deref_atomic_imax:
 624    case nir_intrinsic_image_deref_atomic_umax:
 625    case nir_intrinsic_shared_atomic_imax:
 626    case nir_intrinsic_shared_atomic_umax:
 627    case nir_intrinsic_ssbo_atomic_imax:
 628    case nir_intrinsic_ssbo_atomic_umax:
 629       return  NV50_IR_SUBOP_ATOM_MAX;
 630    case nir_intrinsic_bindless_image_atomic_imin:
 631    case nir_intrinsic_bindless_image_atomic_umin:
 632    case nir_intrinsic_global_atomic_imin:
 633    case nir_intrinsic_global_atomic_umin:
 634    case nir_intrinsic_image_atomic_imin:
 635    case nir_intrinsic_image_atomic_umin:
 636    case nir_intrinsic_image_deref_atomic_imin:
 637    case nir_intrinsic_image_deref_atomic_umin:
 638    case nir_intrinsic_shared_atomic_imin:
 639    case nir_intrinsic_shared_atomic_umin:
 640    case nir_intrinsic_ssbo_atomic_imin:
 641    case nir_intrinsic_ssbo_atomic_umin:
 642       return  NV50_IR_SUBOP_ATOM_MIN;
 643    case nir_intrinsic_bindless_image_atomic_xor:
 644    case nir_intrinsic_global_atomic_xor:
 645    case nir_intrinsic_image_atomic_xor:
 646    case nir_intrinsic_image_deref_atomic_xor:
 647    case nir_intrinsic_shared_atomic_xor:
 648    case nir_intrinsic_ssbo_atomic_xor:
 649       return  NV50_IR_SUBOP_ATOM_XOR;
 650
 651    case nir_intrinsic_group_memory_barrier:
 652    case nir_intrinsic_memory_barrier:
 653    case nir_intrinsic_memory_barrier_buffer:
 654    case nir_intrinsic_memory_barrier_image:
 655       return NV50_IR_SUBOP_MEMBAR(M, GL);
 656    case nir_intrinsic_memory_barrier_shared:
 657       return NV50_IR_SUBOP_MEMBAR(M, CTA);
 658
 659    case nir_intrinsic_vote_all:
 660       return NV50_IR_SUBOP_VOTE_ALL;
 661    case nir_intrinsic_vote_any:
 662       return NV50_IR_SUBOP_VOTE_ANY;
 663    case nir_intrinsic_vote_ieq:
 664       return NV50_IR_SUBOP_VOTE_UNI;
 665    default:
 666       return 0;
 667    }
 668 }
 669
 670 CondCode
 671 Converter::getCondCode(nir_op op)
 672 {
 673    switch (op) {
 674    case nir_op_feq32:
 675    case nir_op_ieq32:
 676       return CC_EQ;
 677    case nir_op_fge32:
 678    case nir_op_ige32:
 679    case nir_op_uge32:
 680       return CC_GE;
 681    case nir_op_flt32:
 682    case nir_op_ilt32:
 683    case nir_op_ult32:
 684       return CC_LT;
 685    case nir_op_fne32:
 686       return CC_NEU;
 687    case nir_op_ine32:
 688       return CC_NE;
 689    default:
 690       ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name);
 691       assert(false);
 692       return CC_FL;
 693    }
 694 }
 695
 696 Converter::LValues&
 697 Converter::convert(nir_alu_dest *dest)
 698 {
 699    return convert(&dest->dest);
 700 }
 701
 702 Converter::LValues&
 703 Converter::convert(nir_dest *dest)
 704 {
 705    if (dest->is_ssa)
 706       return convert(&dest->ssa);
 707    if (dest->reg.indirect) {
 708       ERROR("no support for indirects.");
 709       assert(false);
 710    }
 711    return convert(dest->reg.reg);
 712 }
 713
 714 Converter::LValues&
 715 Converter::convert(nir_register *reg)
 716 {
 717    NirDefMap::iterator it = regDefs.find(reg->index);
 718    if (it != regDefs.end())
 719       return it->second;
 720
 721    LValues newDef(reg->num_components);
 722    for (uint8_t i = 0; i < reg->num_components; i++)
 723       newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
 724    return regDefs[reg->index] = newDef;
 725 }
 726
 727 Converter::LValues&
 728 Converter::convert(nir_ssa_def *def)
 729 {
 730    NirDefMap::iterator it = ssaDefs.find(def->index);
 731    if (it != ssaDefs.end())
 732       return it->second;
 733
 734    LValues newDef(def->num_components);
 735    for (uint8_t i = 0; i < def->num_components; i++)
 736       newDef[i] = getSSA(std::max(4, def->bit_size / 8));
 737    return ssaDefs[def->index] = newDef;
 738 }
 739
 740 Value*
 741 Converter::getSrc(nir_alu_src *src, uint8_t component)
 742 {
 743    if (src->abs || src->negate) {
 744       ERROR("modifiers currently not supported on nir_alu_src\n");
 745       assert(false);
 746    }
 747    return getSrc(&src->src, src->swizzle[component]);
 748 }
 749
 750 Value*
 751 Converter::getSrc(nir_register *reg, uint8_t idx)
 752 {
 753    NirDefMap::iterator it = regDefs.find(reg->index);
 754    if (it == regDefs.end())
 755       return convert(reg)[idx];
 756    return it->second[idx];
 757 }
 758
 759 Value*
 760 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
 761 {
 762    if (src->is_ssa)
 763       return getSrc(src->ssa, idx);
 764
 765    if (src->reg.indirect) {
 766       if (indirect)
 767          return getSrc(src->reg.indirect, idx);
 768       ERROR("no support for indirects.");
 769       assert(false);
 770       return NULL;
 771    }
 772
 773    return getSrc(src->reg.reg, idx);
 774 }
 775
 776 Value*
 777 Converter::getSrc(nir_ssa_def *src, uint8_t idx)
 778 {
 779    ImmediateMap::iterator iit = immediates.find(src->index);
 780    if (iit != immediates.end())
 781       return convert((*iit).second, idx);
 782
 783    NirDefMap::iterator it = ssaDefs.find(src->index);
 784    if (it == ssaDefs.end()) {
 785       ERROR("SSA value %u not found\n", src->index);
 786       assert(false);
 787       return NULL;
 788    }
 789    return it->second[idx];
 790 }
 791
 792 uint32_t
 793 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
 794 {
 795    nir_const_value *offset = nir_src_as_const_value(*src);
 796
 797    if (offset) {
 798       indirect = NULL;
 799       return offset[0].u32;
 800    }
 801
 802    indirect = getSrc(src, idx, true);
 803    return 0;
 804 }
 805
 806 uint32_t
 807 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect, bool isScalar)
 808 {
 809    int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
 810    if (indirect && !isScalar)
 811       indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
 812    return idx;
 813 }
 814
 815 static void
 816 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
 817 {
 818    assert(name && index);
 819
 820    if (slot >= VERT_ATTRIB_MAX) {
 821       ERROR("invalid varying slot %u\n", slot);
 822       assert(false);
 823       return;
 824    }
 825
 826    if (slot >= VERT_ATTRIB_GENERIC0 &&
 827        slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
 828       *name = TGSI_SEMANTIC_GENERIC;
 829       *index = slot - VERT_ATTRIB_GENERIC0;
 830       return;
 831    }
 832
 833    if (slot >= VERT_ATTRIB_TEX0 &&
 834        slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
 835       *name = TGSI_SEMANTIC_TEXCOORD;
 836       *index = slot - VERT_ATTRIB_TEX0;
 837       return;
 838    }
 839
 840    switch (slot) {
 841    case VERT_ATTRIB_COLOR0:
 842       *name = TGSI_SEMANTIC_COLOR;
 843       *index = 0;
 844       break;
 845    case VERT_ATTRIB_COLOR1:
 846       *name = TGSI_SEMANTIC_COLOR;
 847       *index = 1;
 848       break;
 849    case VERT_ATTRIB_EDGEFLAG:
 850       *name = TGSI_SEMANTIC_EDGEFLAG;
 851       *index = 0;
 852       break;
 853    case VERT_ATTRIB_FOG:
 854       *name = TGSI_SEMANTIC_FOG;
 855       *index = 0;
 856       break;
 857    case VERT_ATTRIB_NORMAL:
 858       *name = TGSI_SEMANTIC_NORMAL;
 859       *index = 0;
 860       break;
 861    case VERT_ATTRIB_POS:
 862       *name = TGSI_SEMANTIC_POSITION;
 863       *index = 0;
 864       break;
 865    case VERT_ATTRIB_POINT_SIZE:
 866       *name = TGSI_SEMANTIC_PSIZE;
 867       *index = 0;
 868       break;
 869    default:
 870       ERROR("unknown vert attrib slot %u\n", slot);
 871       assert(false);
 872       break;
 873    }
 874 }
 875
 876 static void
 877 varying_slot_to_tgsi_semantic(gl_varying_slot slot, unsigned *name, unsigned *index)
 878 {
 879    assert(name && index);
 880
 881    if (slot >= VARYING_SLOT_TESS_MAX) {
 882       ERROR("invalid varying slot %u\n", slot);
 883       assert(false);
 884       return;
 885    }
 886
 887    if (slot >= VARYING_SLOT_PATCH0) {
 888       *name = TGSI_SEMANTIC_PATCH;
 889       *index = slot - VARYING_SLOT_PATCH0;
 890       return;
 891    }
 892
 893    if (slot >= VARYING_SLOT_VAR0) {
 894       *name = TGSI_SEMANTIC_GENERIC;
 895       *index = slot - VARYING_SLOT_VAR0;
 896       return;
 897    }
 898
 899    if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
 900       *name = TGSI_SEMANTIC_TEXCOORD;
 901       *index = slot - VARYING_SLOT_TEX0;
 902       return;
 903    }
 904
 905    switch (slot) {
 906    case VARYING_SLOT_BFC0:
 907       *name = TGSI_SEMANTIC_BCOLOR;
 908       *index = 0;
 909       break;
 910    case VARYING_SLOT_BFC1:
 911       *name = TGSI_SEMANTIC_BCOLOR;
 912       *index = 1;
 913       break;
 914    case VARYING_SLOT_CLIP_DIST0:
 915       *name = TGSI_SEMANTIC_CLIPDIST;
 916       *index = 0;
 917       break;
 918    case VARYING_SLOT_CLIP_DIST1:
 919       *name = TGSI_SEMANTIC_CLIPDIST;
 920       *index = 1;
 921       break;
 922    case VARYING_SLOT_CLIP_VERTEX:
 923       *name = TGSI_SEMANTIC_CLIPVERTEX;
 924       *index = 0;
 925       break;
 926    case VARYING_SLOT_COL0:
 927       *name = TGSI_SEMANTIC_COLOR;
 928       *index = 0;
 929       break;
 930    case VARYING_SLOT_COL1:
 931       *name = TGSI_SEMANTIC_COLOR;
 932       *index = 1;
 933       break;
 934    case VARYING_SLOT_EDGE:
 935       *name = TGSI_SEMANTIC_EDGEFLAG;
 936       *index = 0;
 937       break;
 938    case VARYING_SLOT_FACE:
 939       *name = TGSI_SEMANTIC_FACE;
 940       *index = 0;
 941       break;
 942    case VARYING_SLOT_FOGC:
 943       *name = TGSI_SEMANTIC_FOG;
 944       *index = 0;
 945       break;
 946    case VARYING_SLOT_LAYER:
 947       *name = TGSI_SEMANTIC_LAYER;
 948       *index = 0;
 949       break;
 950    case VARYING_SLOT_PNTC:
 951       *name = TGSI_SEMANTIC_PCOORD;
 952       *index = 0;
 953       break;
 954    case VARYING_SLOT_POS:
 955       *name = TGSI_SEMANTIC_POSITION;
 956       *index = 0;
 957       break;
 958    case VARYING_SLOT_PRIMITIVE_ID:
 959       *name = TGSI_SEMANTIC_PRIMID;
 960       *index = 0;
 961       break;
 962    case VARYING_SLOT_PSIZ:
 963       *name = TGSI_SEMANTIC_PSIZE;
 964       *index = 0;
 965       break;
 966    case VARYING_SLOT_TESS_LEVEL_INNER:
 967       *name = TGSI_SEMANTIC_TESSINNER;
 968       *index = 0;
 969       break;
 970    case VARYING_SLOT_TESS_LEVEL_OUTER:
 971       *name = TGSI_SEMANTIC_TESSOUTER;
 972       *index = 0;
 973       break;
 974    case VARYING_SLOT_VIEWPORT:
 975       *name = TGSI_SEMANTIC_VIEWPORT_INDEX;
 976       *index = 0;
 977       break;
 978    default:
 979       ERROR("unknown varying slot %u\n", slot);
 980       assert(false);
 981       break;
 982    }
 983 }
 984
 985 static void
 986 frag_result_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
 987 {
 988    if (slot >= FRAG_RESULT_DATA0) {
 989       *name = TGSI_SEMANTIC_COLOR;
 990       *index = slot - FRAG_RESULT_COLOR - 2; // intentional
 991       return;
 992    }
 993
 994    switch (slot) {
 995    case FRAG_RESULT_COLOR:
 996       *name = TGSI_SEMANTIC_COLOR;
 997       *index = 0;
 998       break;
 999    case FRAG_RESULT_DEPTH:
1000       *name = TGSI_SEMANTIC_POSITION;
1001       *index = 0;
1002       break;
1003    case FRAG_RESULT_SAMPLE_MASK:
1004       *name = TGSI_SEMANTIC_SAMPLEMASK;
1005       *index = 0;
1006       break;
1007    default:
1008       ERROR("unknown frag result slot %u\n", slot);
1009       assert(false);
1010       break;
1011    }
1012 }
1013
1014 // copy of _mesa_sysval_to_semantic
1015 static void
1016 system_val_to_tgsi_semantic(unsigned val, unsigned *name, unsigned *index)
1017 {
1018    *index = 0;
1019    switch (val) {
1020    // Vertex shader
1021    case SYSTEM_VALUE_VERTEX_ID:
1022       *name = TGSI_SEMANTIC_VERTEXID;
1023       break;
1024    case SYSTEM_VALUE_INSTANCE_ID:
1025       *name = TGSI_SEMANTIC_INSTANCEID;
1026       break;
1027    case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
1028       *name = TGSI_SEMANTIC_VERTEXID_NOBASE;
1029       break;
1030    case SYSTEM_VALUE_BASE_VERTEX:
1031       *name = TGSI_SEMANTIC_BASEVERTEX;
1032       break;
1033    case SYSTEM_VALUE_BASE_INSTANCE:
1034       *name = TGSI_SEMANTIC_BASEINSTANCE;
1035       break;
1036    case SYSTEM_VALUE_DRAW_ID:
1037       *name = TGSI_SEMANTIC_DRAWID;
1038       break;
1039
1040    // Geometry shader
1041    case SYSTEM_VALUE_INVOCATION_ID:
1042       *name = TGSI_SEMANTIC_INVOCATIONID;
1043       break;
1044
1045    // Fragment shader
1046    case SYSTEM_VALUE_FRAG_COORD:
1047       *name = TGSI_SEMANTIC_POSITION;
1048       break;
1049    case SYSTEM_VALUE_FRONT_FACE:
1050       *name = TGSI_SEMANTIC_FACE;
1051       break;
1052    case SYSTEM_VALUE_SAMPLE_ID:
1053       *name = TGSI_SEMANTIC_SAMPLEID;
1054       break;
1055    case SYSTEM_VALUE_SAMPLE_POS:
1056       *name = TGSI_SEMANTIC_SAMPLEPOS;
1057       break;
1058    case SYSTEM_VALUE_SAMPLE_MASK_IN:
1059       *name = TGSI_SEMANTIC_SAMPLEMASK;
1060       break;
1061    case SYSTEM_VALUE_HELPER_INVOCATION:
1062       *name = TGSI_SEMANTIC_HELPER_INVOCATION;
1063       break;
1064
1065    // Tessellation shader
1066    case SYSTEM_VALUE_TESS_COORD:
1067       *name = TGSI_SEMANTIC_TESSCOORD;
1068       break;
1069    case SYSTEM_VALUE_VERTICES_IN:
1070       *name = TGSI_SEMANTIC_VERTICESIN;
1071       break;
1072    case SYSTEM_VALUE_PRIMITIVE_ID:
1073       *name = TGSI_SEMANTIC_PRIMID;
1074       break;
1075    case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1076       *name = TGSI_SEMANTIC_TESSOUTER;
1077       break;
1078    case SYSTEM_VALUE_TESS_LEVEL_INNER:
1079       *name = TGSI_SEMANTIC_TESSINNER;
1080       break;
1081
1082    // Compute shader
1083    case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
1084       *name = TGSI_SEMANTIC_THREAD_ID;
1085       break;
1086    case SYSTEM_VALUE_WORK_GROUP_ID:
1087       *name = TGSI_SEMANTIC_BLOCK_ID;
1088       break;
1089    case SYSTEM_VALUE_NUM_WORK_GROUPS:
1090       *name = TGSI_SEMANTIC_GRID_SIZE;
1091       break;
1092    case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
1093       *name = TGSI_SEMANTIC_BLOCK_SIZE;
1094       break;
1095
1096    // ARB_shader_ballot
1097    case SYSTEM_VALUE_SUBGROUP_SIZE:
1098       *name = TGSI_SEMANTIC_SUBGROUP_SIZE;
1099       break;
1100    case SYSTEM_VALUE_SUBGROUP_INVOCATION:
1101       *name = TGSI_SEMANTIC_SUBGROUP_INVOCATION;
1102       break;
1103    case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
1104       *name = TGSI_SEMANTIC_SUBGROUP_EQ_MASK;
1105       break;
1106    case SYSTEM_VALUE_SUBGROUP_GE_MASK:
1107       *name = TGSI_SEMANTIC_SUBGROUP_GE_MASK;
1108       break;
1109    case SYSTEM_VALUE_SUBGROUP_GT_MASK:
1110       *name = TGSI_SEMANTIC_SUBGROUP_GT_MASK;
1111       break;
1112    case SYSTEM_VALUE_SUBGROUP_LE_MASK:
1113       *name = TGSI_SEMANTIC_SUBGROUP_LE_MASK;
1114       break;
1115    case SYSTEM_VALUE_SUBGROUP_LT_MASK:
1116       *name = TGSI_SEMANTIC_SUBGROUP_LT_MASK;
1117       break;
1118
1119    default:
1120       ERROR("unknown system value %u\n", val);
1121       assert(false);
1122       break;
1123    }
1124 }
1125
1126 void
1127 Converter::setInterpolate(nv50_ir_varying *var,
1128                           uint8_t mode,
1129                           bool centroid,
1130                           unsigned semantic)
1131 {
1132    switch (mode) {
1133    case INTERP_MODE_FLAT:
1134       var->flat = 1;
1135       break;
1136    case INTERP_MODE_NONE:
1137       if (semantic == TGSI_SEMANTIC_COLOR)
1138          var->sc = 1;
1139       else if (semantic == TGSI_SEMANTIC_POSITION)
1140          var->linear = 1;
1141       break;
1142    case INTERP_MODE_NOPERSPECTIVE:
1143       var->linear = 1;
1144       break;
1145    case INTERP_MODE_SMOOTH:
1146       break;
1147    }
1148    var->centroid = centroid;
1149 }
1150
1151 static uint16_t
1152 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
1153           bool input, const nir_variable *var)
1154 {
1155    if (!type->is_array())
1156       return type->count_attribute_slots(false);
1157
1158    uint16_t slots;
1159    switch (stage) {
1160    case Program::TYPE_GEOMETRY:
1161       slots = type->uniform_locations();
1162       if (input)
1163          slots /= info.gs.vertices_in;
1164       break;
1165    case Program::TYPE_TESSELLATION_CONTROL:
1166    case Program::TYPE_TESSELLATION_EVAL:
1167       // remove first dimension
1168       if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
1169          slots = type->uniform_locations();
1170       else
1171          slots = type->fields.array->uniform_locations();
1172       break;
1173    default:
1174       slots = type->count_attribute_slots(false);
1175       break;
1176    }
1177
1178    return slots;
1179 }
1180
1181 bool Converter::assignSlots() {
1182    unsigned name;
1183    unsigned index;
1184
1185    info->io.viewportId = -1;
1186    info->numInputs = 0;
1187    info->numOutputs = 0;
1188
1189    // we have to fixup the uniform locations for arrays
1190    unsigned numImages = 0;
1191    nir_foreach_variable(var, &nir->uniforms) {
1192       const glsl_type *type = var->type;
1193       if (!type->without_array()->is_image())
1194          continue;
1195       var->data.driver_location = numImages;
1196       numImages += type->is_array() ? type->arrays_of_arrays_size() : 1;
1197    }
1198
1199    info->numSysVals = 0;
1200    for (uint8_t i = 0; i < SYSTEM_VALUE_MAX; ++i) {
1201       if (!(nir->info.system_values_read & 1ull << i))
1202          continue;
1203
1204       system_val_to_tgsi_semantic(i, &name, &index);
1205       info->sv[info->numSysVals].sn = name;
1206       info->sv[info->numSysVals].si = index;
1207       info->sv[info->numSysVals].input = 0; // TODO inferSysValDirection(sn);
1208
1209       switch (i) {
1210       case SYSTEM_VALUE_INSTANCE_ID:
1211          info->io.instanceId = info->numSysVals;
1212          break;
1213       case SYSTEM_VALUE_TESS_LEVEL_INNER:
1214       case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1215          info->sv[info->numSysVals].patch = 1;
1216          break;
1217       case SYSTEM_VALUE_VERTEX_ID:
1218          info->io.vertexId = info->numSysVals;
1219          break;
1220       default:
1221          break;
1222       }
1223
1224       info->numSysVals += 1;
1225    }
1226
1227    if (prog->getType() == Program::TYPE_COMPUTE)
1228       return true;
1229
1230    nir_foreach_variable(var, &nir->inputs) {
1231       const glsl_type *type = var->type;
1232       int slot = var->data.location;
1233       uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
1234       uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1235                                        : type->component_slots();
1236       uint32_t frac = var->data.location_frac;
1237       uint32_t vary = var->data.driver_location;
1238
1239       if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1240          if (comp > 2)
1241             slots *= 2;
1242       }
1243
1244       assert(vary + slots <= PIPE_MAX_SHADER_INPUTS);
1245
1246       switch(prog->getType()) {
1247       case Program::TYPE_FRAGMENT:
1248          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1249          for (uint16_t i = 0; i < slots; ++i) {
1250             setInterpolate(&info->in[vary + i], var->data.interpolation,
1251                            var->data.centroid | var->data.sample, name);
1252          }
1253          break;
1254       case Program::TYPE_GEOMETRY:
1255          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1256          break;
1257       case Program::TYPE_TESSELLATION_CONTROL:
1258       case Program::TYPE_TESSELLATION_EVAL:
1259          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1260          if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
1261             info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1262          break;
1263       case Program::TYPE_VERTEX:
1264          vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
1265          switch (name) {
1266          case TGSI_SEMANTIC_EDGEFLAG:
1267             info->io.edgeFlagIn = vary;
1268             break;
1269          default:
1270             break;
1271          }
1272          break;
1273       default:
1274          ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1275          return false;
1276       }
1277
1278       for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1279          info->in[vary].id = vary;
1280          info->in[vary].patch = var->data.patch;
1281          info->in[vary].sn = name;
1282          info->in[vary].si = index + i;
1283          if (glsl_base_type_is_64bit(type->without_array()->base_type))
1284             if (i & 0x1)
1285                info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1286             else
1287                info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1288          else
1289             info->in[vary].mask |= ((1 << comp) - 1) << frac;
1290       }
1291       info->numInputs = std::max<uint8_t>(info->numInputs, vary);
1292    }
1293
1294    nir_foreach_variable(var, &nir->outputs) {
1295       const glsl_type *type = var->type;
1296       int slot = var->data.location;
1297       uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
1298       uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1299                                        : type->component_slots();
1300       uint32_t frac = var->data.location_frac;
1301       uint32_t vary = var->data.driver_location;
1302
1303       if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1304          if (comp > 2)
1305             slots *= 2;
1306       }
1307
1308       assert(vary < PIPE_MAX_SHADER_OUTPUTS);
1309
1310       switch(prog->getType()) {
1311       case Program::TYPE_FRAGMENT:
1312          frag_result_to_tgsi_semantic((gl_frag_result)slot, &name, &index);
1313          switch (name) {
1314          case TGSI_SEMANTIC_COLOR:
1315             if (!var->data.fb_fetch_output)
1316                info->prop.fp.numColourResults++;
1317             info->prop.fp.separateFragData = true;
1318             // sometimes we get FRAG_RESULT_DATAX with data.index 0
1319             // sometimes we get FRAG_RESULT_DATA0 with data.index X
1320             index = index == 0 ? var->data.index : index;
1321             break;
1322          case TGSI_SEMANTIC_POSITION:
1323             info->io.fragDepth = vary;
1324             info->prop.fp.writesDepth = true;
1325             break;
1326          case TGSI_SEMANTIC_SAMPLEMASK:
1327             info->io.sampleMask = vary;
1328             break;
1329          default:
1330             break;
1331          }
1332          break;
1333       case Program::TYPE_GEOMETRY:
1334       case Program::TYPE_TESSELLATION_CONTROL:
1335       case Program::TYPE_TESSELLATION_EVAL:
1336       case Program::TYPE_VERTEX:
1337          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1338
1339          if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
1340              name != TGSI_SEMANTIC_TESSOUTER)
1341             info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1342
1343          switch (name) {
1344          case TGSI_SEMANTIC_CLIPDIST:
1345             info->io.genUserClip = -1;
1346             break;
1347          case TGSI_SEMANTIC_CLIPVERTEX:
1348             clipVertexOutput = vary;
1349             break;
1350          case TGSI_SEMANTIC_EDGEFLAG:
1351             info->io.edgeFlagOut = vary;
1352             break;
1353          case TGSI_SEMANTIC_POSITION:
1354             if (clipVertexOutput < 0)
1355                clipVertexOutput = vary;
1356             break;
1357          default:
1358             break;
1359          }
1360          break;
1361       default:
1362          ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1363          return false;
1364       }
1365
1366       for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1367          info->out[vary].id = vary;
1368          info->out[vary].patch = var->data.patch;
1369          info->out[vary].sn = name;
1370          info->out[vary].si = index + i;
1371          if (glsl_base_type_is_64bit(type->without_array()->base_type))
1372             if (i & 0x1)
1373                info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1374             else
1375                info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1376          else
1377             info->out[vary].mask |= ((1 << comp) - 1) << frac;
1378
1379          if (nir->info.outputs_read & 1ull << slot)
1380             info->out[vary].oread = 1;
1381       }
1382       info->numOutputs = std::max<uint8_t>(info->numOutputs, vary);
1383    }
1384
1385    if (info->io.genUserClip > 0) {
1386       info->io.clipDistances = info->io.genUserClip;
1387
1388       const unsigned int nOut = (info->io.genUserClip + 3) / 4;
1389
1390       for (unsigned int n = 0; n < nOut; ++n) {
1391          unsigned int i = info->numOutputs++;
1392          info->out[i].id = i;
1393          info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1394          info->out[i].si = n;
1395          info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
1396       }
1397    }
1398
1399    return info->assignSlots(info) == 0;
1400 }
1401
1402 uint32_t
1403 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
1404 {
1405    DataType ty;
1406    int offset = nir_intrinsic_component(insn);
1407    bool input;
1408
1409    if (nir_intrinsic_infos[insn->intrinsic].has_dest)
1410       ty = getDType(insn);
1411    else
1412       ty = getSType(insn->src[0], false, false);
1413
1414    switch (insn->intrinsic) {
1415    case nir_intrinsic_load_input:
1416    case nir_intrinsic_load_interpolated_input:
1417    case nir_intrinsic_load_per_vertex_input:
1418       input = true;
1419       break;
1420    case nir_intrinsic_load_output:
1421    case nir_intrinsic_load_per_vertex_output:
1422    case nir_intrinsic_store_output:
1423    case nir_intrinsic_store_per_vertex_output:
1424       input = false;
1425       break;
1426    default:
1427       ERROR("unknown intrinsic in getSlotAddress %s",
1428             nir_intrinsic_infos[insn->intrinsic].name);
1429       input = false;
1430       assert(false);
1431       break;
1432    }
1433
1434    if (typeSizeof(ty) == 8) {
1435       slot *= 2;
1436       slot += offset;
1437       if (slot >= 4) {
1438          idx += 1;
1439          slot -= 4;
1440       }
1441    } else {
1442       slot += offset;
1443    }
1444
1445    assert(slot < 4);
1446    assert(!input || idx < PIPE_MAX_SHADER_INPUTS);
1447    assert(input || idx < PIPE_MAX_SHADER_OUTPUTS);
1448
1449    const nv50_ir_varying *vary = input ? info->in : info->out;
1450    return vary[idx].slot[slot] * 4;
1451 }
1452
1453 Instruction *
1454 Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
1455                     uint32_t base, uint8_t c, Value *indirect0,
1456                     Value *indirect1, bool patch)
1457 {
1458    unsigned int tySize = typeSizeof(ty);
1459
1460    if (tySize == 8 &&
1461        (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) {
1462       Value *lo = getSSA();
1463       Value *hi = getSSA();
1464
1465       Instruction *loi =
1466          mkLoad(TYPE_U32, lo,
1467                 mkSymbol(file, i, TYPE_U32, base + c * tySize),
1468                 indirect0);
1469       loi->setIndirect(0, 1, indirect1);
1470       loi->perPatch = patch;
1471
1472       Instruction *hii =
1473          mkLoad(TYPE_U32, hi,
1474                 mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
1475                 indirect0);
1476       hii->setIndirect(0, 1, indirect1);
1477       hii->perPatch = patch;
1478
1479       return mkOp2(OP_MERGE, ty, def, lo, hi);
1480    } else {
1481       Instruction *ld =
1482          mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
1483       ld->setIndirect(0, 1, indirect1);
1484       ld->perPatch = patch;
1485       return ld;
1486    }
1487 }
1488
1489 void
1490 Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
1491                    DataType ty, Value *src, uint8_t idx, uint8_t c,
1492                    Value *indirect0, Value *indirect1)
1493 {
1494    uint8_t size = typeSizeof(ty);
1495    uint32_t address = getSlotAddress(insn, idx, c);
1496
1497    if (size == 8 && indirect0) {
1498       Value *split[2];
1499       mkSplit(split, 4, src);
1500
1501       if (op == OP_EXPORT) {
1502          split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
1503          split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
1504       }
1505
1506       mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
1507               split[0])->perPatch = info->out[idx].patch;
1508       mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,
1509               split[1])->perPatch = info->out[idx].patch;
1510    } else {
1511       if (op == OP_EXPORT)
1512          src = mkMov(getSSA(size), src, ty)->getDef(0);
1513       mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
1514               src)->perPatch = info->out[idx].patch;
1515    }
1516 }
1517
1518 bool
1519 Converter::parseNIR()
1520 {
1521    info->bin.tlsSpace = 0;
1522    info->io.clipDistances = nir->info.clip_distance_array_size;
1523    info->io.cullDistances = nir->info.cull_distance_array_size;
1524
1525    switch(prog->getType()) {
1526    case Program::TYPE_COMPUTE:
1527       info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
1528       info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
1529       info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
1530       info->bin.smemSize = nir->info.cs.shared_size;
1531       break;
1532    case Program::TYPE_FRAGMENT:
1533       info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
1534       info->prop.fp.persampleInvocation =
1535          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) ||
1536          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1537       info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
1538       info->prop.fp.readsSampleLocations =
1539          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1540       info->prop.fp.usesDiscard = nir->info.fs.uses_discard;
1541       info->prop.fp.usesSampleMaskIn =
1542          !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN);
1543       break;
1544    case Program::TYPE_GEOMETRY:
1545       info->prop.gp.inputPrim = nir->info.gs.input_primitive;
1546       info->prop.gp.instanceCount = nir->info.gs.invocations;
1547       info->prop.gp.maxVertices = nir->info.gs.vertices_out;
1548       info->prop.gp.outputPrim = nir->info.gs.output_primitive;
1549       break;
1550    case Program::TYPE_TESSELLATION_CONTROL:
1551    case Program::TYPE_TESSELLATION_EVAL:
1552       if (nir->info.tess.primitive_mode == GL_ISOLINES)
1553          info->prop.tp.domain = GL_LINES;
1554       else
1555          info->prop.tp.domain = nir->info.tess.primitive_mode;
1556       info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
1557       info->prop.tp.outputPrim =
1558          nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
1559       info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
1560       info->prop.tp.winding = !nir->info.tess.ccw;
1561       break;
1562    case Program::TYPE_VERTEX:
1563       info->prop.vp.usesDrawParameters =
1564          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) ||
1565          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) ||
1566          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID));
1567       break;
1568    default:
1569       break;
1570    }
1571
1572    return true;
1573 }
1574
1575 bool
1576 Converter::visit(nir_function *function)
1577 {
1578    assert(function->impl);
1579
1580    // usually the blocks will set everything up, but main is special
1581    BasicBlock *entry = new BasicBlock(prog->main);
1582    exit = new BasicBlock(prog->main);
1583    blocks[nir_start_block(function->impl)->index] = entry;
1584    prog->main->setEntry(entry);
1585    prog->main->setExit(exit);
1586
1587    setPosition(entry, true);
1588
1589    if (info->io.genUserClip > 0) {
1590       for (int c = 0; c < 4; ++c)
1591          clipVtx[c] = getScratch();
1592    }
1593
1594    switch (prog->getType()) {
1595    case Program::TYPE_TESSELLATION_CONTROL:
1596       outBase = mkOp2v(
1597          OP_SUB, TYPE_U32, getSSA(),
1598          mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
1599          mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
1600       break;
1601    case Program::TYPE_FRAGMENT: {
1602       Symbol *sv = mkSysVal(SV_POSITION, 3);
1603       fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
1604       fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
1605       break;
1606    }
1607    default:
1608       break;
1609    }
1610
1611    nir_foreach_register(reg, &function->impl->registers) {
1612       if (reg->num_array_elems) {
1613          // TODO: packed variables would be nice, but MemoryOpt fails
1614          // replace 4 with reg->num_components
1615          uint32_t size = 4 * reg->num_array_elems * (reg->bit_size / 8);
1616          regToLmemOffset[reg->index] = info->bin.tlsSpace;
1617          info->bin.tlsSpace += size;
1618       }
1619    }
1620
1621    nir_index_ssa_defs(function->impl);
1622    foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
1623       if (!visit(node))
1624          return false;
1625    }
1626
1627    bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
1628    setPosition(exit, true);
1629
1630    if ((prog->getType() == Program::TYPE_VERTEX ||
1631         prog->getType() == Program::TYPE_TESSELLATION_EVAL)
1632        && info->io.genUserClip > 0)
1633       handleUserClipPlanes();
1634
1635    // TODO: for non main function this needs to be a OP_RETURN
1636    mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1637    return true;
1638 }
1639
1640 bool
1641 Converter::visit(nir_cf_node *node)
1642 {
1643    switch (node->type) {
1644    case nir_cf_node_block:
1645       return visit(nir_cf_node_as_block(node));
1646    case nir_cf_node_if:
1647       return visit(nir_cf_node_as_if(node));
1648    case nir_cf_node_loop:
1649       return visit(nir_cf_node_as_loop(node));
1650    default:
1651       ERROR("unknown nir_cf_node type %u\n", node->type);
1652       return false;
1653    }
1654 }
1655
1656 bool
1657 Converter::visit(nir_block *block)
1658 {
1659    if (!block->predecessors->entries && block->instr_list.is_empty())
1660       return true;
1661
1662    BasicBlock *bb = convert(block);
1663
1664    setPosition(bb, true);
1665    nir_foreach_instr(insn, block) {
1666       if (!visit(insn))
1667          return false;
1668    }
1669    return true;
1670 }
1671
1672 bool
1673 Converter::visit(nir_if *nif)
1674 {
1675    DataType sType = getSType(nif->condition, false, false);
1676    Value *src = getSrc(&nif->condition, 0);
1677
1678    nir_block *lastThen = nir_if_last_then_block(nif);
1679    nir_block *lastElse = nir_if_last_else_block(nif);
1680
1681    assert(!lastThen->successors[1]);
1682    assert(!lastElse->successors[1]);
1683
1684    BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
1685    BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
1686
1687    bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
1688    bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
1689
1690    // we only insert joinats, if both nodes end up at the end of the if again.
1691    // the reason for this to not happens are breaks/continues/ret/... which
1692    // have their own handling
1693    if (lastThen->successors[0] == lastElse->successors[0])
1694       bb->joinAt = mkFlow(OP_JOINAT, convert(lastThen->successors[0]),
1695                           CC_ALWAYS, NULL);
1696
1697    mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType);
1698
1699    foreach_list_typed(nir_cf_node, node, node, &nif->then_list) {
1700       if (!visit(node))
1701          return false;
1702    }
1703    setPosition(convert(lastThen), true);
1704    if (!bb->getExit() ||
1705        !bb->getExit()->asFlow() ||
1706         bb->getExit()->asFlow()->op == OP_JOIN) {
1707       BasicBlock *tailBB = convert(lastThen->successors[0]);
1708       mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1709       bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1710    }
1711
1712    foreach_list_typed(nir_cf_node, node, node, &nif->else_list) {
1713       if (!visit(node))
1714          return false;
1715    }
1716    setPosition(convert(lastElse), true);
1717    if (!bb->getExit() ||
1718        !bb->getExit()->asFlow() ||
1719         bb->getExit()->asFlow()->op == OP_JOIN) {
1720       BasicBlock *tailBB = convert(lastElse->successors[0]);
1721       mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1722       bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1723    }
1724
1725    if (lastThen->successors[0] == lastElse->successors[0]) {
1726       setPosition(convert(lastThen->successors[0]), true);
1727       mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
1728    }
1729
1730    return true;
1731 }
1732
1733 bool
1734 Converter::visit(nir_loop *loop)
1735 {
1736    curLoopDepth += 1;
1737    func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth);
1738
1739    BasicBlock *loopBB = convert(nir_loop_first_block(loop));
1740    BasicBlock *tailBB =
1741       convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
1742    bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE);
1743
1744    mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL);
1745    setPosition(loopBB, false);
1746    mkFlow(OP_PRECONT, loopBB, CC_ALWAYS, NULL);
1747
1748    foreach_list_typed(nir_cf_node, node, node, &loop->body) {
1749       if (!visit(node))
1750          return false;
1751    }
1752    Instruction *insn = bb->getExit();
1753    if (bb->cfg.incidentCount() != 0) {
1754       if (!insn || !insn->asFlow()) {
1755          mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
1756          bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
1757       } else if (insn && insn->op == OP_BRA && !insn->getPredicate() &&
1758                  tailBB->cfg.incidentCount() == 0) {
1759          // RA doesn't like having blocks around with no incident edge,
1760          // so we create a fake one to make it happy
1761          bb->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);
1762       }
1763    }
1764
1765    curLoopDepth -= 1;
1766
1767    return true;
1768 }
1769
1770 bool
1771 Converter::visit(nir_instr *insn)
1772 {
1773    // we need an insertion point for on the fly generated immediate loads
1774    immInsertPos = bb->getExit();
1775    switch (insn->type) {
1776    case nir_instr_type_alu:
1777       return visit(nir_instr_as_alu(insn));
1778    case nir_instr_type_deref:
1779       return visit(nir_instr_as_deref(insn));
1780    case nir_instr_type_intrinsic:
1781       return visit(nir_instr_as_intrinsic(insn));
1782    case nir_instr_type_jump:
1783       return visit(nir_instr_as_jump(insn));
1784    case nir_instr_type_load_const:
1785       return visit(nir_instr_as_load_const(insn));
1786    case nir_instr_type_ssa_undef:
1787       return visit(nir_instr_as_ssa_undef(insn));
1788    case nir_instr_type_tex:
1789       return visit(nir_instr_as_tex(insn));
1790    default:
1791       ERROR("unknown nir_instr type %u\n", insn->type);
1792       return false;
1793    }
1794    return true;
1795 }
1796
1797 SVSemantic
1798 Converter::convert(nir_intrinsic_op intr)
1799 {
1800    switch (intr) {
1801    case nir_intrinsic_load_base_vertex:
1802       return SV_BASEVERTEX;
1803    case nir_intrinsic_load_base_instance:
1804       return SV_BASEINSTANCE;
1805    case nir_intrinsic_load_draw_id:
1806       return SV_DRAWID;
1807    case nir_intrinsic_load_front_face:
1808       return SV_FACE;
1809    case nir_intrinsic_load_helper_invocation:
1810       return SV_THREAD_KILL;
1811    case nir_intrinsic_load_instance_id:
1812       return SV_INSTANCE_ID;
1813    case nir_intrinsic_load_invocation_id:
1814       return SV_INVOCATION_ID;
1815    case nir_intrinsic_load_local_group_size:
1816       return SV_NTID;
1817    case nir_intrinsic_load_local_invocation_id:
1818       return SV_TID;
1819    case nir_intrinsic_load_num_work_groups:
1820       return SV_NCTAID;
1821    case nir_intrinsic_load_patch_vertices_in:
1822       return SV_VERTEX_COUNT;
1823    case nir_intrinsic_load_primitive_id:
1824       return SV_PRIMITIVE_ID;
1825    case nir_intrinsic_load_sample_id:
1826       return SV_SAMPLE_INDEX;
1827    case nir_intrinsic_load_sample_mask_in:
1828       return SV_SAMPLE_MASK;
1829    case nir_intrinsic_load_sample_pos:
1830       return SV_SAMPLE_POS;
1831    case nir_intrinsic_load_subgroup_eq_mask:
1832       return SV_LANEMASK_EQ;
1833    case nir_intrinsic_load_subgroup_ge_mask:
1834       return SV_LANEMASK_GE;
1835    case nir_intrinsic_load_subgroup_gt_mask:
1836       return SV_LANEMASK_GT;
1837    case nir_intrinsic_load_subgroup_le_mask:
1838       return SV_LANEMASK_LE;
1839    case nir_intrinsic_load_subgroup_lt_mask:
1840       return SV_LANEMASK_LT;
1841    case nir_intrinsic_load_subgroup_invocation:
1842       return SV_LANEID;
1843    case nir_intrinsic_load_tess_coord:
1844       return SV_TESS_COORD;
1845    case nir_intrinsic_load_tess_level_inner:
1846       return SV_TESS_INNER;
1847    case nir_intrinsic_load_tess_level_outer:
1848       return SV_TESS_OUTER;
1849    case nir_intrinsic_load_vertex_id:
1850       return SV_VERTEX_ID;
1851    case nir_intrinsic_load_work_group_id:
1852       return SV_CTAID;
1853    default:
1854       ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1855             nir_intrinsic_infos[intr].name);
1856       assert(false);
1857       return SV_LAST;
1858    }
1859 }
1860
1861 bool
1862 Converter::visit(nir_intrinsic_instr *insn)
1863 {
1864    nir_intrinsic_op op = insn->intrinsic;
1865    const nir_intrinsic_info &opInfo = nir_intrinsic_infos[op];
1866
1867    switch (op) {
1868    case nir_intrinsic_load_uniform: {
1869       LValues &newDefs = convert(&insn->dest);
1870       const DataType dType = getDType(insn);
1871       Value *indirect;
1872       uint32_t coffset = getIndirect(insn, 0, 0, indirect);
1873       for (uint8_t i = 0; i < insn->num_components; ++i) {
1874          loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect);
1875       }
1876       break;
1877    }
1878    case nir_intrinsic_store_output:
1879    case nir_intrinsic_store_per_vertex_output: {
1880       Value *indirect;
1881       DataType dType = getSType(insn->src[0], false, false);
1882       uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);
1883
1884       for (uint8_t i = 0u; i < insn->num_components; ++i) {
1885          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
1886             continue;
1887
1888          uint8_t offset = 0;
1889          Value *src = getSrc(&insn->src[0], i);
1890          switch (prog->getType()) {
1891          case Program::TYPE_FRAGMENT: {
1892             if (info->out[idx].sn == TGSI_SEMANTIC_POSITION) {
1893                // TGSI uses a different interface than NIR, TGSI stores that
1894                // value in the z component, NIR in X
1895                offset += 2;
1896                src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
1897             }
1898             break;
1899          }
1900          case Program::TYPE_GEOMETRY:
1901          case Program::TYPE_VERTEX: {
1902             if (info->io.genUserClip > 0 && idx == (uint32_t)clipVertexOutput) {
1903                mkMov(clipVtx[i], src);
1904                src = clipVtx[i];
1905             }
1906             break;
1907          }
1908          default:
1909             break;
1910          }
1911
1912          storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + offset, indirect);
1913       }
1914       break;
1915    }
1916    case nir_intrinsic_load_input:
1917    case nir_intrinsic_load_interpolated_input:
1918    case nir_intrinsic_load_output: {
1919       LValues &newDefs = convert(&insn->dest);
1920
1921       // FBFetch
1922       if (prog->getType() == Program::TYPE_FRAGMENT &&
1923           op == nir_intrinsic_load_output) {
1924          std::vector<Value*> defs, srcs;
1925          uint8_t mask = 0;
1926
1927          srcs.push_back(getSSA());
1928          srcs.push_back(getSSA());
1929          Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0));
1930          Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1));
1931          mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
1932          mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
1933
1934          srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
1935          srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
1936
1937          for (uint8_t i = 0u; i < insn->num_components; ++i) {
1938             defs.push_back(newDefs[i]);
1939             mask |= 1 << i;
1940          }
1941
1942          TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs);
1943          texi->tex.levelZero = 1;
1944          texi->tex.mask = mask;
1945          texi->tex.useOffsets = 0;
1946          texi->tex.r = 0xffff;
1947          texi->tex.s = 0xffff;
1948
1949          info->prop.fp.readsFramebuffer = true;
1950          break;
1951       }
1952
1953       const DataType dType = getDType(insn);
1954       Value *indirect;
1955       bool input = op != nir_intrinsic_load_output;
1956       operation nvirOp;
1957       uint32_t mode = 0;
1958
1959       uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect);
1960       nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx];
1961
1962       // see load_barycentric_* handling
1963       if (prog->getType() == Program::TYPE_FRAGMENT) {
1964          mode = translateInterpMode(&vary, nvirOp);
1965          if (op == nir_intrinsic_load_interpolated_input) {
1966             ImmediateValue immMode;
1967             if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode))
1968                mode |= immMode.reg.data.u32;
1969          }
1970       }
1971
1972       for (uint8_t i = 0u; i < insn->num_components; ++i) {
1973          uint32_t address = getSlotAddress(insn, idx, i);
1974          Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);
1975          if (prog->getType() == Program::TYPE_FRAGMENT) {
1976             int s = 1;
1977             if (typeSizeof(dType) == 8) {
1978                Value *lo = getSSA();
1979                Value *hi = getSSA();
1980                Instruction *interp;
1981
1982                interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
1983                if (nvirOp == OP_PINTERP)
1984                   interp->setSrc(s++, fp.position);
1985                if (mode & NV50_IR_INTERP_OFFSET)
1986                   interp->setSrc(s++, getSrc(&insn->src[0], 0));
1987                interp->setInterpolate(mode);
1988                interp->setIndirect(0, 0, indirect);
1989
1990                Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4);
1991                interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
1992                if (nvirOp == OP_PINTERP)
1993                   interp->setSrc(s++, fp.position);
1994                if (mode & NV50_IR_INTERP_OFFSET)
1995                   interp->setSrc(s++, getSrc(&insn->src[0], 0));
1996                interp->setInterpolate(mode);
1997                interp->setIndirect(0, 0, indirect);
1998
1999                mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
2000             } else {
2001                Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
2002                if (nvirOp == OP_PINTERP)
2003                   interp->setSrc(s++, fp.position);
2004                if (mode & NV50_IR_INTERP_OFFSET)
2005                   interp->setSrc(s++, getSrc(&insn->src[0], 0));
2006                interp->setInterpolate(mode);
2007                interp->setIndirect(0, 0, indirect);
2008             }
2009          } else {
2010             mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
2011          }
2012       }
2013       break;
2014    }
2015    case nir_intrinsic_load_kernel_input: {
2016       assert(prog->getType() == Program::TYPE_COMPUTE);
2017       assert(insn->num_components == 1);
2018
2019       LValues &newDefs = convert(&insn->dest);
2020       const DataType dType = getDType(insn);
2021       Value *indirect;
2022       uint32_t idx = getIndirect(insn, 0, 0, indirect, true);
2023
2024       mkLoad(dType, newDefs[0], mkSymbol(FILE_SHADER_INPUT, 0, dType, idx), indirect);
2025       break;
2026    }
2027    case nir_intrinsic_load_barycentric_at_offset:
2028    case nir_intrinsic_load_barycentric_at_sample:
2029    case nir_intrinsic_load_barycentric_centroid:
2030    case nir_intrinsic_load_barycentric_pixel:
2031    case nir_intrinsic_load_barycentric_sample: {
2032       LValues &newDefs = convert(&insn->dest);
2033       uint32_t mode;
2034
2035       if (op == nir_intrinsic_load_barycentric_centroid ||
2036           op == nir_intrinsic_load_barycentric_sample) {
2037          mode = NV50_IR_INTERP_CENTROID;
2038       } else if (op == nir_intrinsic_load_barycentric_at_offset) {
2039          Value *offs[2];
2040          for (uint8_t c = 0; c < 2; c++) {
2041             offs[c] = getScratch();
2042             mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(&insn->src[0], c), loadImm(NULL, 0.4375f));
2043             mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
2044             mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
2045             mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
2046          }
2047          mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]);
2048
2049          mode = NV50_IR_INTERP_OFFSET;
2050       } else if (op == nir_intrinsic_load_barycentric_pixel) {
2051          mode = NV50_IR_INTERP_DEFAULT;
2052       } else if (op == nir_intrinsic_load_barycentric_at_sample) {
2053          info->prop.fp.readsSampleLocations = true;
2054          mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], getSrc(&insn->src[0], 0))->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
2055          mode = NV50_IR_INTERP_OFFSET;
2056       } else {
2057          unreachable("all intrinsics already handled above");
2058       }
2059
2060       loadImm(newDefs[1], mode);
2061       break;
2062    }
2063    case nir_intrinsic_discard:
2064       mkOp(OP_DISCARD, TYPE_NONE, NULL);
2065       break;
2066    case nir_intrinsic_discard_if: {
2067       Value *pred = getSSA(1, FILE_PREDICATE);
2068       if (insn->num_components > 1) {
2069          ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
2070          assert(false);
2071          return false;
2072       }
2073       mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2074       mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
2075       break;
2076    }
2077    case nir_intrinsic_load_base_vertex:
2078    case nir_intrinsic_load_base_instance:
2079    case nir_intrinsic_load_draw_id:
2080    case nir_intrinsic_load_front_face:
2081    case nir_intrinsic_load_helper_invocation:
2082    case nir_intrinsic_load_instance_id:
2083    case nir_intrinsic_load_invocation_id:
2084    case nir_intrinsic_load_local_group_size:
2085    case nir_intrinsic_load_local_invocation_id:
2086    case nir_intrinsic_load_num_work_groups:
2087    case nir_intrinsic_load_patch_vertices_in:
2088    case nir_intrinsic_load_primitive_id:
2089    case nir_intrinsic_load_sample_id:
2090    case nir_intrinsic_load_sample_mask_in:
2091    case nir_intrinsic_load_sample_pos:
2092    case nir_intrinsic_load_subgroup_eq_mask:
2093    case nir_intrinsic_load_subgroup_ge_mask:
2094    case nir_intrinsic_load_subgroup_gt_mask:
2095    case nir_intrinsic_load_subgroup_le_mask:
2096    case nir_intrinsic_load_subgroup_lt_mask:
2097    case nir_intrinsic_load_subgroup_invocation:
2098    case nir_intrinsic_load_tess_coord:
2099    case nir_intrinsic_load_tess_level_inner:
2100    case nir_intrinsic_load_tess_level_outer:
2101    case nir_intrinsic_load_vertex_id:
2102    case nir_intrinsic_load_work_group_id: {
2103       const DataType dType = getDType(insn);
2104       SVSemantic sv = convert(op);
2105       LValues &newDefs = convert(&insn->dest);
2106
2107       for (uint8_t i = 0u; i < insn->num_components; ++i) {
2108          Value *def;
2109          if (typeSizeof(dType) == 8)
2110             def = getSSA();
2111          else
2112             def = newDefs[i];
2113
2114          if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
2115             loadImm(def, 0u);
2116          } else {
2117             Symbol *sym = mkSysVal(sv, i);
2118             Instruction *rdsv = mkOp1(OP_RDSV, TYPE_U32, def, sym);
2119             if (sv == SV_TESS_OUTER || sv == SV_TESS_INNER)
2120                rdsv->perPatch = 1;
2121          }
2122
2123          if (typeSizeof(dType) == 8)
2124             mkOp2(OP_MERGE, dType, newDefs[i], def, loadImm(getSSA(), 0u));
2125       }
2126       break;
2127    }
2128    // constants
2129    case nir_intrinsic_load_subgroup_size: {
2130       LValues &newDefs = convert(&insn->dest);
2131       loadImm(newDefs[0], 32u);
2132       break;
2133    }
2134    case nir_intrinsic_vote_all:
2135    case nir_intrinsic_vote_any:
2136    case nir_intrinsic_vote_ieq: {
2137       LValues &newDefs = convert(&insn->dest);
2138       Value *pred = getScratch(1, FILE_PREDICATE);
2139       mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2140       mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op);
2141       mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred);
2142       break;
2143    }
2144    case nir_intrinsic_ballot: {
2145       LValues &newDefs = convert(&insn->dest);
2146       Value *pred = getSSA(1, FILE_PREDICATE);
2147       mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2148       mkOp1(OP_VOTE, TYPE_U32, newDefs[0], pred)->subOp = NV50_IR_SUBOP_VOTE_ANY;
2149       break;
2150    }
2151    case nir_intrinsic_read_first_invocation:
2152    case nir_intrinsic_read_invocation: {
2153       LValues &newDefs = convert(&insn->dest);
2154       const DataType dType = getDType(insn);
2155       Value *tmp = getScratch();
2156
2157       if (op == nir_intrinsic_read_first_invocation) {
2158          mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
2159          mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2160          mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
2161       } else
2162          tmp = getSrc(&insn->src[1], 0);
2163
2164       for (uint8_t i = 0; i < insn->num_components; ++i) {
2165          mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f))
2166             ->subOp = NV50_IR_SUBOP_SHFL_IDX;
2167       }
2168       break;
2169    }
2170    case nir_intrinsic_load_per_vertex_input: {
2171       const DataType dType = getDType(insn);
2172       LValues &newDefs = convert(&insn->dest);
2173       Value *indirectVertex;
2174       Value *indirectOffset;
2175       uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
2176       uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
2177
2178       Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
2179                               mkImm(baseVertex), indirectVertex);
2180       for (uint8_t i = 0u; i < insn->num_components; ++i) {
2181          uint32_t address = getSlotAddress(insn, idx, i);
2182          loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0,
2183                   indirectOffset, vtxBase, info->in[idx].patch);
2184       }
2185       break;
2186    }
2187    case nir_intrinsic_load_per_vertex_output: {
2188       const DataType dType = getDType(insn);
2189       LValues &newDefs = convert(&insn->dest);
2190       Value *indirectVertex;
2191       Value *indirectOffset;
2192       uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
2193       uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
2194       Value *vtxBase = NULL;
2195
2196       if (indirectVertex)
2197          vtxBase = indirectVertex;
2198       else
2199          vtxBase = loadImm(NULL, baseVertex);
2200
2201       vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, vtxBase);
2202
2203       for (uint8_t i = 0u; i < insn->num_components; ++i) {
2204          uint32_t address = getSlotAddress(insn, idx, i);
2205          loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0,
2206                   indirectOffset, vtxBase, info->in[idx].patch);
2207       }
2208       break;
2209    }
2210    case nir_intrinsic_emit_vertex:
2211       if (info->io.genUserClip > 0)
2212          handleUserClipPlanes();
2213       // fallthrough
2214    case nir_intrinsic_end_primitive: {
2215       uint32_t idx = nir_intrinsic_stream_id(insn);
2216       mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
2217       break;
2218    }
2219    case nir_intrinsic_load_ubo: {
2220       const DataType dType = getDType(insn);
2221       LValues &newDefs = convert(&insn->dest);
2222       Value *indirectIndex;
2223       Value *indirectOffset;
2224       uint32_t index = getIndirect(&insn->src[0], 0, indirectIndex) + 1;
2225       uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2226
2227       for (uint8_t i = 0u; i < insn->num_components; ++i) {
2228          loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i,
2229                   indirectOffset, indirectIndex);
2230       }
2231       break;
2232    }
2233    case nir_intrinsic_get_buffer_size: {
2234       LValues &newDefs = convert(&insn->dest);
2235       const DataType dType = getDType(insn);
2236       Value *indirectBuffer;
2237       uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2238
2239       Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, 0);
2240       mkOp1(OP_BUFQ, dType, newDefs[0], sym)->setIndirect(0, 0, indirectBuffer);
2241       break;
2242    }
2243    case nir_intrinsic_store_ssbo: {
2244       DataType sType = getSType(insn->src[0], false, false);
2245       Value *indirectBuffer;
2246       Value *indirectOffset;
2247       uint32_t buffer = getIndirect(&insn->src[1], 0, indirectBuffer);
2248       uint32_t offset = getIndirect(&insn->src[2], 0, indirectOffset);
2249
2250       for (uint8_t i = 0u; i < insn->num_components; ++i) {
2251          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2252             continue;
2253          Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType,
2254                                 offset + i * typeSizeof(sType));
2255          mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i))
2256             ->setIndirect(0, 1, indirectBuffer);
2257       }
2258       info->io.globalAccess |= 0x2;
2259       break;
2260    }
2261    case nir_intrinsic_load_ssbo: {
2262       const DataType dType = getDType(insn);
2263       LValues &newDefs = convert(&insn->dest);
2264       Value *indirectBuffer;
2265       Value *indirectOffset;
2266       uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2267       uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2268
2269       for (uint8_t i = 0u; i < insn->num_components; ++i)
2270          loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i,
2271                   indirectOffset, indirectBuffer);
2272
2273       info->io.globalAccess |= 0x1;
2274       break;
2275    }
2276    case nir_intrinsic_shared_atomic_add:
2277    case nir_intrinsic_shared_atomic_and:
2278    case nir_intrinsic_shared_atomic_comp_swap:
2279    case nir_intrinsic_shared_atomic_exchange:
2280    case nir_intrinsic_shared_atomic_or:
2281    case nir_intrinsic_shared_atomic_imax:
2282    case nir_intrinsic_shared_atomic_imin:
2283    case nir_intrinsic_shared_atomic_umax:
2284    case nir_intrinsic_shared_atomic_umin:
2285    case nir_intrinsic_shared_atomic_xor: {
2286       const DataType dType = getDType(insn);
2287       LValues &newDefs = convert(&insn->dest);
2288       Value *indirectOffset;
2289       uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2290       Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, dType, offset);
2291       Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
2292       if (op == nir_intrinsic_shared_atomic_comp_swap)
2293          atom->setSrc(2, getSrc(&insn->src[2], 0));
2294       atom->setIndirect(0, 0, indirectOffset);
2295       atom->subOp = getSubOp(op);
2296       break;
2297    }
2298    case nir_intrinsic_ssbo_atomic_add:
2299    case nir_intrinsic_ssbo_atomic_and:
2300    case nir_intrinsic_ssbo_atomic_comp_swap:
2301    case nir_intrinsic_ssbo_atomic_exchange:
2302    case nir_intrinsic_ssbo_atomic_or:
2303    case nir_intrinsic_ssbo_atomic_imax:
2304    case nir_intrinsic_ssbo_atomic_imin:
2305    case nir_intrinsic_ssbo_atomic_umax:
2306    case nir_intrinsic_ssbo_atomic_umin:
2307    case nir_intrinsic_ssbo_atomic_xor: {
2308       const DataType dType = getDType(insn);
2309       LValues &newDefs = convert(&insn->dest);
2310       Value *indirectBuffer;
2311       Value *indirectOffset;
2312       uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2313       uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2314
2315       Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, offset);
2316       Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym,
2317                                 getSrc(&insn->src[2], 0));
2318       if (op == nir_intrinsic_ssbo_atomic_comp_swap)
2319          atom->setSrc(2, getSrc(&insn->src[3], 0));
2320       atom->setIndirect(0, 0, indirectOffset);
2321       atom->setIndirect(0, 1, indirectBuffer);
2322       atom->subOp = getSubOp(op);
2323
2324       info->io.globalAccess |= 0x2;
2325       break;
2326    }
2327    case nir_intrinsic_global_atomic_add:
2328    case nir_intrinsic_global_atomic_and:
2329    case nir_intrinsic_global_atomic_comp_swap:
2330    case nir_intrinsic_global_atomic_exchange:
2331    case nir_intrinsic_global_atomic_or:
2332    case nir_intrinsic_global_atomic_imax:
2333    case nir_intrinsic_global_atomic_imin:
2334    case nir_intrinsic_global_atomic_umax:
2335    case nir_intrinsic_global_atomic_umin:
2336    case nir_intrinsic_global_atomic_xor: {
2337       const DataType dType = getDType(insn);
2338       LValues &newDefs = convert(&insn->dest);
2339       Value *address;
2340       uint32_t offset = getIndirect(&insn->src[0], 0, address);
2341
2342       Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, dType, offset);
2343       Instruction *atom =
2344          mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
2345       atom->setIndirect(0, 0, address);
2346       atom->subOp = getSubOp(op);
2347
2348       info->io.globalAccess |= 0x2;
2349       break;
2350    }
2351    case nir_intrinsic_bindless_image_atomic_add:
2352    case nir_intrinsic_bindless_image_atomic_and:
2353    case nir_intrinsic_bindless_image_atomic_comp_swap:
2354    case nir_intrinsic_bindless_image_atomic_exchange:
2355    case nir_intrinsic_bindless_image_atomic_imax:
2356    case nir_intrinsic_bindless_image_atomic_umax:
2357    case nir_intrinsic_bindless_image_atomic_imin:
2358    case nir_intrinsic_bindless_image_atomic_umin:
2359    case nir_intrinsic_bindless_image_atomic_or:
2360    case nir_intrinsic_bindless_image_atomic_xor:
2361    case nir_intrinsic_bindless_image_load:
2362    case nir_intrinsic_bindless_image_samples:
2363    case nir_intrinsic_bindless_image_size:
2364    case nir_intrinsic_bindless_image_store: {
2365       std::vector<Value*> srcs, defs;
2366       Value *indirect = getSrc(&insn->src[0], 0);
2367       DataType ty;
2368
2369       uint32_t mask = 0;
2370       TexInstruction::Target target =
2371          convert(nir_intrinsic_image_dim(insn), !!nir_intrinsic_image_array(insn), false);
2372       unsigned int argCount = getNIRArgCount(target);
2373       uint16_t location = 0;
2374
2375       if (opInfo.has_dest) {
2376          LValues &newDefs = convert(&insn->dest);
2377          for (uint8_t i = 0u; i < newDefs.size(); ++i) {
2378             defs.push_back(newDefs[i]);
2379             mask |= 1 << i;
2380          }
2381       }
2382
2383       switch (op) {
2384       case nir_intrinsic_bindless_image_atomic_add:
2385       case nir_intrinsic_bindless_image_atomic_and:
2386       case nir_intrinsic_bindless_image_atomic_comp_swap:
2387       case nir_intrinsic_bindless_image_atomic_exchange:
2388       case nir_intrinsic_bindless_image_atomic_imax:
2389       case nir_intrinsic_bindless_image_atomic_umax:
2390       case nir_intrinsic_bindless_image_atomic_imin:
2391       case nir_intrinsic_bindless_image_atomic_umin:
2392       case nir_intrinsic_bindless_image_atomic_or:
2393       case nir_intrinsic_bindless_image_atomic_xor:
2394          ty = getDType(insn);
2395          mask = 0x1;
2396          info->io.globalAccess |= 0x2;
2397          break;
2398       case nir_intrinsic_bindless_image_load:
2399          ty = TYPE_U32;
2400          info->io.globalAccess |= 0x1;
2401          break;
2402       case nir_intrinsic_bindless_image_store:
2403          ty = TYPE_U32;
2404          mask = 0xf;
2405          info->io.globalAccess |= 0x2;
2406          break;
2407       case nir_intrinsic_bindless_image_samples:
2408          mask = 0x8;
2409          ty = TYPE_U32;
2410          break;
2411       case nir_intrinsic_bindless_image_size:
2412          ty = TYPE_U32;
2413          break;
2414       default:
2415          unreachable("unhandled image opcode");
2416          break;
2417       }
2418
2419       // coords
2420       if (opInfo.num_srcs >= 2)
2421          for (unsigned int i = 0u; i < argCount; ++i)
2422             srcs.push_back(getSrc(&insn->src[1], i));
2423
2424       // the sampler is just another src added after coords
2425       if (opInfo.num_srcs >= 3 && target.isMS())
2426          srcs.push_back(getSrc(&insn->src[2], 0));
2427
2428       if (opInfo.num_srcs >= 4) {
2429          unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
2430          for (uint8_t i = 0u; i < components; ++i)
2431             srcs.push_back(getSrc(&insn->src[3], i));
2432       }
2433
2434       if (opInfo.num_srcs >= 5)
2435          // 1 for aotmic swap
2436          for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
2437             srcs.push_back(getSrc(&insn->src[4], i));
2438
2439       TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
2440       texi->tex.bindless = false;
2441       texi->tex.format = nv50_ir::TexInstruction::translateImgFormat(nir_intrinsic_format(insn));
2442       texi->tex.mask = mask;
2443       texi->tex.bindless = true;
2444       texi->cache = convert(nir_intrinsic_access(insn));
2445       texi->setType(ty);
2446       texi->subOp = getSubOp(op);
2447
2448       if (indirect)
2449          texi->setIndirectR(indirect);
2450
2451       break;
2452    }
2453    case nir_intrinsic_image_deref_atomic_add:
2454    case nir_intrinsic_image_deref_atomic_and:
2455    case nir_intrinsic_image_deref_atomic_comp_swap:
2456    case nir_intrinsic_image_deref_atomic_exchange:
2457    case nir_intrinsic_image_deref_atomic_imax:
2458    case nir_intrinsic_image_deref_atomic_umax:
2459    case nir_intrinsic_image_deref_atomic_imin:
2460    case nir_intrinsic_image_deref_atomic_umin:
2461    case nir_intrinsic_image_deref_atomic_or:
2462    case nir_intrinsic_image_deref_atomic_xor:
2463    case nir_intrinsic_image_deref_load:
2464    case nir_intrinsic_image_deref_samples:
2465    case nir_intrinsic_image_deref_size:
2466    case nir_intrinsic_image_deref_store: {
2467       const nir_variable *tex;
2468       std::vector<Value*> srcs, defs;
2469       Value *indirect;
2470       DataType ty;
2471
2472       uint32_t mask = 0;
2473       nir_deref_instr *deref = nir_src_as_deref(insn->src[0]);
2474       const glsl_type *type = deref->type;
2475       TexInstruction::Target target =
2476          convert((glsl_sampler_dim)type->sampler_dimensionality,
2477                  type->sampler_array, type->sampler_shadow);
2478       unsigned int argCount = getNIRArgCount(target);
2479       uint16_t location = handleDeref(deref, indirect, tex);
2480
2481       if (opInfo.has_dest) {
2482          LValues &newDefs = convert(&insn->dest);
2483          for (uint8_t i = 0u; i < newDefs.size(); ++i) {
2484             defs.push_back(newDefs[i]);
2485             mask |= 1 << i;
2486          }
2487       }
2488
2489       switch (op) {
2490       case nir_intrinsic_image_deref_atomic_add:
2491       case nir_intrinsic_image_deref_atomic_and:
2492       case nir_intrinsic_image_deref_atomic_comp_swap:
2493       case nir_intrinsic_image_deref_atomic_exchange:
2494       case nir_intrinsic_image_deref_atomic_imax:
2495       case nir_intrinsic_image_deref_atomic_umax:
2496       case nir_intrinsic_image_deref_atomic_imin:
2497       case nir_intrinsic_image_deref_atomic_umin:
2498       case nir_intrinsic_image_deref_atomic_or:
2499       case nir_intrinsic_image_deref_atomic_xor:
2500          ty = getDType(insn);
2501          mask = 0x1;
2502          info->io.globalAccess |= 0x2;
2503          break;
2504       case nir_intrinsic_image_deref_load:
2505          ty = TYPE_U32;
2506          info->io.globalAccess |= 0x1;
2507          break;
2508       case nir_intrinsic_image_deref_store:
2509          ty = TYPE_U32;
2510          mask = 0xf;
2511          info->io.globalAccess |= 0x2;
2512          break;
2513       case nir_intrinsic_image_deref_samples:
2514          mask = 0x8;
2515          ty = TYPE_U32;
2516          break;
2517       case nir_intrinsic_image_deref_size:
2518          ty = TYPE_U32;
2519          break;
2520       default:
2521          unreachable("unhandled image opcode");
2522          break;
2523       }
2524
2525       // coords
2526       if (opInfo.num_srcs >= 2)
2527          for (unsigned int i = 0u; i < argCount; ++i)
2528             srcs.push_back(getSrc(&insn->src[1], i));
2529
2530       // the sampler is just another src added after coords
2531       if (opInfo.num_srcs >= 3 && target.isMS())
2532          srcs.push_back(getSrc(&insn->src[2], 0));
2533
2534       if (opInfo.num_srcs >= 4) {
2535          unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
2536          for (uint8_t i = 0u; i < components; ++i)
2537             srcs.push_back(getSrc(&insn->src[3], i));
2538       }
2539
2540       if (opInfo.num_srcs >= 5)
2541          // 1 for aotmic swap
2542          for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
2543             srcs.push_back(getSrc(&insn->src[4], i));
2544
2545       TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
2546       texi->tex.bindless = false;
2547       texi->tex.format = nv50_ir::TexInstruction::translateImgFormat(tex->data.image.format);
2548       texi->tex.mask = mask;
2549       texi->cache = getCacheModeFromVar(tex);
2550       texi->setType(ty);
2551       texi->subOp = getSubOp(op);
2552
2553       if (indirect)
2554          texi->setIndirectR(indirect);
2555
2556       break;
2557    }
2558    case nir_intrinsic_store_shared: {
2559       DataType sType = getSType(insn->src[0], false, false);
2560       Value *indirectOffset;
2561       uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2562
2563       for (uint8_t i = 0u; i < insn->num_components; ++i) {
2564          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2565             continue;
2566          Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, sType, offset + i * typeSizeof(sType));
2567          mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i));
2568       }
2569       break;
2570    }
2571    case nir_intrinsic_load_shared: {
2572       const DataType dType = getDType(insn);
2573       LValues &newDefs = convert(&insn->dest);
2574       Value *indirectOffset;
2575       uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2576
2577       for (uint8_t i = 0u; i < insn->num_components; ++i)
2578          loadFrom(FILE_MEMORY_SHARED, 0, dType, newDefs[i], offset, i, indirectOffset);
2579
2580       break;
2581    }
2582    case nir_intrinsic_control_barrier: {
2583       // TODO: add flag to shader_info
2584       info->numBarriers = 1;
2585       Instruction *bar = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0));
2586       bar->fixed = 1;
2587       bar->subOp = NV50_IR_SUBOP_BAR_SYNC;
2588       break;
2589    }
2590    case nir_intrinsic_group_memory_barrier:
2591    case nir_intrinsic_memory_barrier:
2592    case nir_intrinsic_memory_barrier_buffer:
2593    case nir_intrinsic_memory_barrier_image:
2594    case nir_intrinsic_memory_barrier_shared: {
2595       Instruction *bar = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
2596       bar->fixed = 1;
2597       bar->subOp = getSubOp(op);
2598       break;
2599    }
2600    case nir_intrinsic_memory_barrier_tcs_patch:
2601       break;
2602    case nir_intrinsic_shader_clock: {
2603       const DataType dType = getDType(insn);
2604       LValues &newDefs = convert(&insn->dest);
2605
2606       loadImm(newDefs[0], 0u);
2607       mkOp1(OP_RDSV, dType, newDefs[1], mkSysVal(SV_CLOCK, 0))->fixed = 1;
2608       break;
2609    }
2610    case nir_intrinsic_load_global: {
2611       const DataType dType = getDType(insn);
2612       LValues &newDefs = convert(&insn->dest);
2613       Value *indirectOffset;
2614       uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2615
2616       for (auto i = 0u; i < insn->num_components; ++i)
2617          loadFrom(FILE_MEMORY_GLOBAL, 0, dType, newDefs[i], offset, i, indirectOffset);
2618
2619       info->io.globalAccess |= 0x1;
2620       break;
2621    }
2622    case nir_intrinsic_store_global: {
2623       DataType sType = getSType(insn->src[0], false, false);
2624
2625       for (auto i = 0u; i < insn->num_components; ++i) {
2626          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2627             continue;
2628          if (typeSizeof(sType) == 8) {
2629             Value *split[2];
2630             mkSplit(split, 4, getSrc(&insn->src[0], i));
2631
2632             Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, i * typeSizeof(sType));
2633             mkStore(OP_STORE, TYPE_U32, sym, getSrc(&insn->src[1], 0), split[0]);
2634
2635             sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, i * typeSizeof(sType) + 4);
2636             mkStore(OP_STORE, TYPE_U32, sym, getSrc(&insn->src[1], 0), split[1]);
2637          } else {
2638             Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, sType, i * typeSizeof(sType));
2639             mkStore(OP_STORE, sType, sym, getSrc(&insn->src[1], 0), getSrc(&insn->src[0], i));
2640          }
2641       }
2642
2643       info->io.globalAccess |= 0x2;
2644       break;
2645    }
2646    default:
2647       ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
2648       return false;
2649    }
2650
2651    return true;
2652 }
2653
2654 bool
2655 Converter::visit(nir_jump_instr *insn)
2656 {
2657    switch (insn->type) {
2658    case nir_jump_return:
2659       // TODO: this only works in the main function
2660       mkFlow(OP_BRA, exit, CC_ALWAYS, NULL);
2661       bb->cfg.attach(&exit->cfg, Graph::Edge::CROSS);
2662       break;
2663    case nir_jump_break:
2664    case nir_jump_continue: {
2665       bool isBreak = insn->type == nir_jump_break;
2666       nir_block *block = insn->instr.block;
2667       assert(!block->successors[1]);
2668       BasicBlock *target = convert(block->successors[0]);
2669       mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL);
2670       bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK);
2671       break;
2672    }
2673    default:
2674       ERROR("unknown nir_jump_type %u\n", insn->type);
2675       return false;
2676    }
2677
2678    return true;
2679 }
2680
2681 Value*
2682 Converter::convert(nir_load_const_instr *insn, uint8_t idx)
2683 {
2684    Value *val;
2685
2686    if (immInsertPos)
2687       setPosition(immInsertPos, true);
2688    else
2689       setPosition(bb, false);
2690
2691    switch (insn->def.bit_size) {
2692    case 64:
2693       val = loadImm(getSSA(8), insn->value[idx].u64);
2694       break;
2695    case 32:
2696       val = loadImm(getSSA(4), insn->value[idx].u32);
2697       break;
2698    case 16:
2699       val = loadImm(getSSA(2), insn->value[idx].u16);
2700       break;
2701    case 8:
2702       val = loadImm(getSSA(1), insn->value[idx].u8);
2703       break;
2704    default:
2705       unreachable("unhandled bit size!\n");
2706    }
2707    setPosition(bb, true);
2708    return val;
2709 }
2710
2711 bool
2712 Converter::visit(nir_load_const_instr *insn)
2713 {
2714    assert(insn->def.bit_size <= 64);
2715    immediates[insn->def.index] = insn;
2716    return true;
2717 }
2718
2719 #define DEFAULT_CHECKS \
2720       if (insn->dest.dest.ssa.num_components > 1) { \
2721          ERROR("nir_alu_instr only supported with 1 component!\n"); \
2722          return false; \
2723       } \
2724       if (insn->dest.write_mask != 1) { \
2725          ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
2726          return false; \
2727       }
2728 bool
2729 Converter::visit(nir_alu_instr *insn)
2730 {
2731    const nir_op op = insn->op;
2732    const nir_op_info &info = nir_op_infos[op];
2733    DataType dType = getDType(insn);
2734    const std::vector<DataType> sTypes = getSTypes(insn);
2735
2736    Instruction *oldPos = this->bb->getExit();
2737
2738    switch (op) {
2739    case nir_op_fabs:
2740    case nir_op_iabs:
2741    case nir_op_fadd:
2742    case nir_op_iadd:
2743    case nir_op_iand:
2744    case nir_op_fceil:
2745    case nir_op_fcos:
2746    case nir_op_fddx:
2747    case nir_op_fddx_coarse:
2748    case nir_op_fddx_fine:
2749    case nir_op_fddy:
2750    case nir_op_fddy_coarse:
2751    case nir_op_fddy_fine:
2752    case nir_op_fdiv:
2753    case nir_op_idiv:
2754    case nir_op_udiv:
2755    case nir_op_fexp2:
2756    case nir_op_ffloor:
2757    case nir_op_ffma:
2758    case nir_op_flog2:
2759    case nir_op_fmax:
2760    case nir_op_imax:
2761    case nir_op_umax:
2762    case nir_op_fmin:
2763    case nir_op_imin:
2764    case nir_op_umin:
2765    case nir_op_fmod:
2766    case nir_op_imod:
2767    case nir_op_umod:
2768    case nir_op_fmul:
2769    case nir_op_imul:
2770    case nir_op_imul_high:
2771    case nir_op_umul_high:
2772    case nir_op_fneg:
2773    case nir_op_ineg:
2774    case nir_op_inot:
2775    case nir_op_ior:
2776    case nir_op_pack_64_2x32_split:
2777    case nir_op_fpow:
2778    case nir_op_frcp:
2779    case nir_op_frem:
2780    case nir_op_irem:
2781    case nir_op_frsq:
2782    case nir_op_fsat:
2783    case nir_op_ishr:
2784    case nir_op_ushr:
2785    case nir_op_fsin:
2786    case nir_op_fsqrt:
2787    case nir_op_ftrunc:
2788    case nir_op_ishl:
2789    case nir_op_ixor: {
2790       DEFAULT_CHECKS;
2791       LValues &newDefs = convert(&insn->dest);
2792       operation preOp = preOperationNeeded(op);
2793       if (preOp != OP_NOP) {
2794          assert(info.num_inputs < 2);
2795          Value *tmp = getSSA(typeSizeof(dType));
2796          Instruction *i0 = mkOp(preOp, dType, tmp);
2797          Instruction *i1 = mkOp(getOperation(op), dType, newDefs[0]);
2798          if (info.num_inputs) {
2799             i0->setSrc(0, getSrc(&insn->src[0]));
2800             i1->setSrc(0, tmp);
2801          }
2802          i1->subOp = getSubOp(op);
2803       } else {
2804          Instruction *i = mkOp(getOperation(op), dType, newDefs[0]);
2805          for (unsigned s = 0u; s < info.num_inputs; ++s) {
2806             i->setSrc(s, getSrc(&insn->src[s]));
2807          }
2808          i->subOp = getSubOp(op);
2809       }
2810       break;
2811    }
2812    case nir_op_ifind_msb:
2813    case nir_op_ufind_msb: {
2814       DEFAULT_CHECKS;
2815       LValues &newDefs = convert(&insn->dest);
2816       dType = sTypes[0];
2817       mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2818       break;
2819    }
2820    case nir_op_fround_even: {
2821       DEFAULT_CHECKS;
2822       LValues &newDefs = convert(&insn->dest);
2823       mkCvt(OP_CVT, dType, newDefs[0], dType, getSrc(&insn->src[0]))->rnd = ROUND_NI;
2824       break;
2825    }
2826    // convert instructions
2827    case nir_op_f2f32:
2828    case nir_op_f2i32:
2829    case nir_op_f2u32:
2830    case nir_op_i2f32:
2831    case nir_op_i2i32:
2832    case nir_op_u2f32:
2833    case nir_op_u2u32:
2834    case nir_op_f2f64:
2835    case nir_op_f2i64:
2836    case nir_op_f2u64:
2837    case nir_op_i2f64:
2838    case nir_op_i2i64:
2839    case nir_op_u2f64:
2840    case nir_op_u2u64: {
2841       DEFAULT_CHECKS;
2842       LValues &newDefs = convert(&insn->dest);
2843       Instruction *i = mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2844       if (op == nir_op_f2i32 || op == nir_op_f2i64 || op == nir_op_f2u32 || op == nir_op_f2u64)
2845          i->rnd = ROUND_Z;
2846       i->sType = sTypes[0];
2847       break;
2848    }
2849    // compare instructions
2850    case nir_op_feq32:
2851    case nir_op_ieq32:
2852    case nir_op_fge32:
2853    case nir_op_ige32:
2854    case nir_op_uge32:
2855    case nir_op_flt32:
2856    case nir_op_ilt32:
2857    case nir_op_ult32:
2858    case nir_op_fne32:
2859    case nir_op_ine32: {
2860       DEFAULT_CHECKS;
2861       LValues &newDefs = convert(&insn->dest);
2862       Instruction *i = mkCmp(getOperation(op),
2863                              getCondCode(op),
2864                              dType,
2865                              newDefs[0],
2866                              dType,
2867                              getSrc(&insn->src[0]),
2868                              getSrc(&insn->src[1]));
2869       if (info.num_inputs == 3)
2870          i->setSrc(2, getSrc(&insn->src[2]));
2871       i->sType = sTypes[0];
2872       break;
2873    }
2874    // those are weird ALU ops and need special handling, because
2875    //   1. they are always componend based
2876    //   2. they basically just merge multiple values into one data type
2877    case nir_op_mov:
2878       if (!insn->dest.dest.is_ssa && insn->dest.dest.reg.reg->num_array_elems) {
2879          nir_reg_dest& reg = insn->dest.dest.reg;
2880          uint32_t goffset = regToLmemOffset[reg.reg->index];
2881          uint8_t comps = reg.reg->num_components;
2882          uint8_t size = reg.reg->bit_size / 8;
2883          uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2884          uint32_t aoffset = csize * reg.base_offset;
2885          Value *indirect = NULL;
2886
2887          if (reg.indirect)
2888             indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS),
2889                               getSrc(reg.indirect, 0), mkImm(csize));
2890
2891          for (uint8_t i = 0u; i < comps; ++i) {
2892             if (!((1u << i) & insn->dest.write_mask))
2893                continue;
2894
2895             Symbol *sym = mkSymbol(FILE_MEMORY_LOCAL, 0, dType, goffset + aoffset + i * size);
2896             mkStore(OP_STORE, dType, sym, indirect, getSrc(&insn->src[0], i));
2897          }
2898          break;
2899       } else if (!insn->src[0].src.is_ssa && insn->src[0].src.reg.reg->num_array_elems) {
2900          LValues &newDefs = convert(&insn->dest);
2901          nir_reg_src& reg = insn->src[0].src.reg;
2902          uint32_t goffset = regToLmemOffset[reg.reg->index];
2903          // uint8_t comps = reg.reg->num_components;
2904          uint8_t size = reg.reg->bit_size / 8;
2905          uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2906          uint32_t aoffset = csize * reg.base_offset;
2907          Value *indirect = NULL;
2908
2909          if (reg.indirect)
2910             indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS), getSrc(reg.indirect, 0), mkImm(csize));
2911
2912          for (uint8_t i = 0u; i < newDefs.size(); ++i)
2913             loadFrom(FILE_MEMORY_LOCAL, 0, dType, newDefs[i], goffset + aoffset, i, indirect);
2914
2915          break;
2916       } else {
2917          LValues &newDefs = convert(&insn->dest);
2918          for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2919             mkMov(newDefs[c], getSrc(&insn->src[0], c), dType);
2920          }
2921       }
2922       break;
2923    case nir_op_vec2:
2924    case nir_op_vec3:
2925    case nir_op_vec4:
2926    case nir_op_vec8:
2927    case nir_op_vec16: {
2928       LValues &newDefs = convert(&insn->dest);
2929       for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2930          mkMov(newDefs[c], getSrc(&insn->src[c]), dType);
2931       }
2932       break;
2933    }
2934    // (un)pack
2935    case nir_op_pack_64_2x32: {
2936       LValues &newDefs = convert(&insn->dest);
2937       Instruction *merge = mkOp(OP_MERGE, dType, newDefs[0]);
2938       merge->setSrc(0, getSrc(&insn->src[0], 0));
2939       merge->setSrc(1, getSrc(&insn->src[0], 1));
2940       break;
2941    }
2942    case nir_op_pack_half_2x16_split: {
2943       LValues &newDefs = convert(&insn->dest);
2944       Value *tmpH = getSSA();
2945       Value *tmpL = getSSA();
2946
2947       mkCvt(OP_CVT, TYPE_F16, tmpL, TYPE_F32, getSrc(&insn->src[0]));
2948       mkCvt(OP_CVT, TYPE_F16, tmpH, TYPE_F32, getSrc(&insn->src[1]));
2949       mkOp3(OP_INSBF, TYPE_U32, newDefs[0], tmpH, mkImm(0x1010), tmpL);
2950       break;
2951    }
2952    case nir_op_unpack_half_2x16_split_x:
2953    case nir_op_unpack_half_2x16_split_y: {
2954       LValues &newDefs = convert(&insn->dest);
2955       Instruction *cvt = mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, getSrc(&insn->src[0]));
2956       if (op == nir_op_unpack_half_2x16_split_y)
2957          cvt->subOp = 1;
2958       break;
2959    }
2960    case nir_op_unpack_64_2x32: {
2961       LValues &newDefs = convert(&insn->dest);
2962       mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, newDefs[1]);
2963       break;
2964    }
2965    case nir_op_unpack_64_2x32_split_x: {
2966       LValues &newDefs = convert(&insn->dest);
2967       mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, getSSA());
2968       break;
2969    }
2970    case nir_op_unpack_64_2x32_split_y: {
2971       LValues &newDefs = convert(&insn->dest);
2972       mkOp1(OP_SPLIT, dType, getSSA(), getSrc(&insn->src[0]))->setDef(1, newDefs[0]);
2973       break;
2974    }
2975    // special instructions
2976    case nir_op_fsign:
2977    case nir_op_isign: {
2978       DEFAULT_CHECKS;
2979       DataType iType;
2980       if (::isFloatType(dType))
2981          iType = TYPE_F32;
2982       else
2983          iType = TYPE_S32;
2984
2985       LValues &newDefs = convert(&insn->dest);
2986       LValue *val0 = getScratch();
2987       LValue *val1 = getScratch();
2988       mkCmp(OP_SET, CC_GT, iType, val0, dType, getSrc(&insn->src[0]), zero);
2989       mkCmp(OP_SET, CC_LT, iType, val1, dType, getSrc(&insn->src[0]), zero);
2990
2991       if (dType == TYPE_F64) {
2992          mkOp2(OP_SUB, iType, val0, val0, val1);
2993          mkCvt(OP_CVT, TYPE_F64, newDefs[0], iType, val0);
2994       } else if (dType == TYPE_S64 || dType == TYPE_U64) {
2995          mkOp2(OP_SUB, iType, val0, val1, val0);
2996          mkOp2(OP_SHR, iType, val1, val0, loadImm(NULL, 31));
2997          mkOp2(OP_MERGE, dType, newDefs[0], val0, val1);
2998       } else if (::isFloatType(dType))
2999          mkOp2(OP_SUB, iType, newDefs[0], val0, val1);
3000       else
3001          mkOp2(OP_SUB, iType, newDefs[0], val1, val0);
3002       break;
3003    }
3004    case nir_op_fcsel:
3005    case nir_op_b32csel: {
3006       DEFAULT_CHECKS;
3007       LValues &newDefs = convert(&insn->dest);
3008       mkCmp(OP_SLCT, CC_NE, dType, newDefs[0], sTypes[0], getSrc(&insn->src[1]), getSrc(&insn->src[2]), getSrc(&insn->src[0]));
3009       break;
3010    }
3011    case nir_op_ibitfield_extract:
3012    case nir_op_ubitfield_extract: {
3013       DEFAULT_CHECKS;
3014       Value *tmp = getSSA();
3015       LValues &newDefs = convert(&insn->dest);
3016       mkOp3(OP_INSBF, dType, tmp, getSrc(&insn->src[2]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
3017       mkOp2(OP_EXTBF, dType, newDefs[0], getSrc(&insn->src[0]), tmp);
3018       break;
3019    }
3020    case nir_op_bfm: {
3021       DEFAULT_CHECKS;
3022       LValues &newDefs = convert(&insn->dest);
3023       mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
3024       break;
3025    }
3026    case nir_op_bitfield_insert: {
3027       DEFAULT_CHECKS;
3028       LValues &newDefs = convert(&insn->dest);
3029       LValue *temp = getSSA();
3030       mkOp3(OP_INSBF, TYPE_U32, temp, getSrc(&insn->src[3]), mkImm(0x808), getSrc(&insn->src[2]));
3031       mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[1]), temp, getSrc(&insn->src[0]));
3032       break;
3033    }
3034    case nir_op_bit_count: {
3035       DEFAULT_CHECKS;
3036       LValues &newDefs = convert(&insn->dest);
3037       mkOp2(OP_POPCNT, dType, newDefs[0], getSrc(&insn->src[0]), getSrc(&insn->src[0]));
3038       break;
3039    }
3040    case nir_op_bitfield_reverse: {
3041       DEFAULT_CHECKS;
3042       LValues &newDefs = convert(&insn->dest);
3043       mkOp2(OP_EXTBF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
3044       break;
3045    }
3046    case nir_op_find_lsb: {
3047       DEFAULT_CHECKS;
3048       LValues &newDefs = convert(&insn->dest);
3049       Value *tmp = getSSA();
3050       mkOp2(OP_EXTBF, TYPE_U32, tmp, getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
3051       mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
3052       break;
3053    }
3054    // boolean conversions
3055    case nir_op_b2f32: {
3056       DEFAULT_CHECKS;
3057       LValues &newDefs = convert(&insn->dest);
3058       mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1.0f));
3059       break;
3060    }
3061    case nir_op_b2f64: {
3062       DEFAULT_CHECKS;
3063       LValues &newDefs = convert(&insn->dest);
3064       Value *tmp = getSSA(4);
3065       mkOp2(OP_AND, TYPE_U32, tmp, getSrc(&insn->src[0]), loadImm(NULL, 0x3ff00000));
3066       mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
3067       break;
3068    }
3069    case nir_op_f2b32:
3070    case nir_op_i2b32: {
3071       DEFAULT_CHECKS;
3072       LValues &newDefs = convert(&insn->dest);
3073       Value *src1;
3074       if (typeSizeof(sTypes[0]) == 8) {
3075          src1 = loadImm(getSSA(8), 0.0);
3076       } else {
3077          src1 = zero;
3078       }
3079       CondCode cc = op == nir_op_f2b32 ? CC_NEU : CC_NE;
3080       mkCmp(OP_SET, cc, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), src1);
3081       break;
3082    }
3083    case nir_op_b2i32: {
3084       DEFAULT_CHECKS;
3085       LValues &newDefs = convert(&insn->dest);
3086       mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1));
3087       break;
3088    }
3089    case nir_op_b2i64: {
3090       DEFAULT_CHECKS;
3091       LValues &newDefs = convert(&insn->dest);
3092       LValue *def = getScratch();
3093       mkOp2(OP_AND, TYPE_U32, def, getSrc(&insn->src[0]), loadImm(NULL, 1));
3094       mkOp2(OP_MERGE, TYPE_S64, newDefs[0], def, loadImm(NULL, 0));
3095       break;
3096    }
3097    default:
3098       ERROR("unknown nir_op %s\n", info.name);
3099       return false;
3100    }
3101
3102    if (!oldPos) {
3103       oldPos = this->bb->getEntry();
3104       oldPos->precise = insn->exact;
3105    }
3106
3107    if (unlikely(!oldPos))
3108       return true;
3109
3110    while (oldPos->next) {
3111       oldPos = oldPos->next;
3112       oldPos->precise = insn->exact;
3113    }
3114    oldPos->saturate = insn->dest.saturate;
3115
3116    return true;
3117 }
3118 #undef DEFAULT_CHECKS
3119
3120 bool
3121 Converter::visit(nir_ssa_undef_instr *insn)
3122 {
3123    LValues &newDefs = convert(&insn->def);
3124    for (uint8_t i = 0u; i < insn->def.num_components; ++i) {
3125       mkOp(OP_NOP, TYPE_NONE, newDefs[i]);
3126    }
3127    return true;
3128 }
3129
3130 #define CASE_SAMPLER(ty) \
3131    case GLSL_SAMPLER_DIM_ ## ty : \
3132       if (isArray && !isShadow) \
3133          return TEX_TARGET_ ## ty ## _ARRAY; \
3134       else if (!isArray && isShadow) \
3135          return TEX_TARGET_## ty ## _SHADOW; \
3136       else if (isArray && isShadow) \
3137          return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
3138       else \
3139          return TEX_TARGET_ ## ty
3140
3141 TexTarget
3142 Converter::convert(glsl_sampler_dim dim, bool isArray, bool isShadow)
3143 {
3144    switch (dim) {
3145    CASE_SAMPLER(1D);
3146    CASE_SAMPLER(2D);
3147    CASE_SAMPLER(CUBE);
3148    case GLSL_SAMPLER_DIM_3D:
3149       return TEX_TARGET_3D;
3150    case GLSL_SAMPLER_DIM_MS:
3151       if (isArray)
3152          return TEX_TARGET_2D_MS_ARRAY;
3153       return TEX_TARGET_2D_MS;
3154    case GLSL_SAMPLER_DIM_RECT:
3155       if (isShadow)
3156          return TEX_TARGET_RECT_SHADOW;
3157       return TEX_TARGET_RECT;
3158    case GLSL_SAMPLER_DIM_BUF:
3159       return TEX_TARGET_BUFFER;
3160    case GLSL_SAMPLER_DIM_EXTERNAL:
3161       return TEX_TARGET_2D;
3162    default:
3163       ERROR("unknown glsl_sampler_dim %u\n", dim);
3164       assert(false);
3165       return TEX_TARGET_COUNT;
3166    }
3167 }
3168 #undef CASE_SAMPLER
3169
3170 Value*
3171 Converter::applyProjection(Value *src, Value *proj)
3172 {
3173    if (!proj)
3174       return src;
3175    return mkOp2v(OP_MUL, TYPE_F32, getScratch(), src, proj);
3176 }
3177
3178 unsigned int
3179 Converter::getNIRArgCount(TexInstruction::Target& target)
3180 {
3181    unsigned int result = target.getArgCount();
3182    if (target.isCube() && target.isArray())
3183       result--;
3184    if (target.isMS())
3185       result--;
3186    return result;
3187 }
3188
3189 uint16_t
3190 Converter::handleDeref(nir_deref_instr *deref, Value * &indirect, const nir_variable * &tex)
3191 {
3192    typedef std::pair<uint32_t,Value*> DerefPair;
3193    std::list<DerefPair> derefs;
3194
3195    uint16_t result = 0;
3196    while (deref->deref_type != nir_deref_type_var) {
3197       switch (deref->deref_type) {
3198       case nir_deref_type_array: {
3199          Value *indirect;
3200          uint8_t size = type_size(deref->type, true);
3201          result += size * getIndirect(&deref->arr.index, 0, indirect);
3202
3203          if (indirect) {
3204             derefs.push_front(std::make_pair(size, indirect));
3205          }
3206
3207          break;
3208       }
3209       case nir_deref_type_struct: {
3210          result += nir_deref_instr_parent(deref)->type->struct_location_offset(deref->strct.index);
3211          break;
3212       }
3213       case nir_deref_type_var:
3214       default:
3215          unreachable("nir_deref_type_var reached in handleDeref!");
3216          break;
3217       }
3218       deref = nir_deref_instr_parent(deref);
3219    }
3220
3221    indirect = NULL;
3222    for (std::list<DerefPair>::const_iterator it = derefs.begin(); it != derefs.end(); ++it) {
3223       Value *offset = mkOp2v(OP_MUL, TYPE_U32, getSSA(), loadImm(getSSA(), it->first), it->second);
3224       if (indirect)
3225          indirect = mkOp2v(OP_ADD, TYPE_U32, getSSA(), indirect, offset);
3226       else
3227          indirect = offset;
3228    }
3229
3230    tex = nir_deref_instr_get_variable(deref);
3231    assert(tex);
3232
3233    return result + tex->data.driver_location;
3234 }
3235
3236 CacheMode
3237 Converter::convert(enum gl_access_qualifier access)
3238 {
3239    switch (access) {
3240    case ACCESS_VOLATILE:
3241       return CACHE_CV;
3242    case ACCESS_COHERENT:
3243       return CACHE_CG;
3244    default:
3245       return CACHE_CA;
3246    }
3247 }
3248
3249 CacheMode
3250 Converter::getCacheModeFromVar(const nir_variable *var)
3251 {
3252    return convert(var->data.access);
3253 }
3254
3255 bool
3256 Converter::visit(nir_tex_instr *insn)
3257 {
3258    switch (insn->op) {
3259    case nir_texop_lod:
3260    case nir_texop_query_levels:
3261    case nir_texop_tex:
3262    case nir_texop_texture_samples:
3263    case nir_texop_tg4:
3264    case nir_texop_txb:
3265    case nir_texop_txd:
3266    case nir_texop_txf:
3267    case nir_texop_txf_ms:
3268    case nir_texop_txl:
3269    case nir_texop_txs: {
3270       LValues &newDefs = convert(&insn->dest);
3271       std::vector<Value*> srcs;
3272       std::vector<Value*> defs;
3273       std::vector<nir_src*> offsets;
3274       uint8_t mask = 0;
3275       bool lz = false;
3276       Value *proj = NULL;
3277       TexInstruction::Target target = convert(insn->sampler_dim, insn->is_array, insn->is_shadow);
3278       operation op = getOperation(insn->op);
3279
3280       int r, s;
3281       int biasIdx = nir_tex_instr_src_index(insn, nir_tex_src_bias);
3282       int compIdx = nir_tex_instr_src_index(insn, nir_tex_src_comparator);
3283       int coordsIdx = nir_tex_instr_src_index(insn, nir_tex_src_coord);
3284       int ddxIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddx);
3285       int ddyIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddy);
3286       int msIdx = nir_tex_instr_src_index(insn, nir_tex_src_ms_index);
3287       int lodIdx = nir_tex_instr_src_index(insn, nir_tex_src_lod);
3288       int offsetIdx = nir_tex_instr_src_index(insn, nir_tex_src_offset);
3289       int projIdx = nir_tex_instr_src_index(insn, nir_tex_src_projector);
3290       int sampOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_offset);
3291       int texOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_offset);
3292       int sampHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_handle);
3293       int texHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_handle);
3294
3295       bool bindless = sampHandleIdx != -1 || texHandleIdx != -1;
3296       assert((sampHandleIdx != -1) == (texHandleIdx != -1));
3297
3298       if (projIdx != -1)
3299          proj = mkOp1v(OP_RCP, TYPE_F32, getScratch(), getSrc(&insn->src[projIdx].src, 0));
3300
3301       srcs.resize(insn->coord_components);
3302       for (uint8_t i = 0u; i < insn->coord_components; ++i)
3303          srcs[i] = applyProjection(getSrc(&insn->src[coordsIdx].src, i), proj);
3304
3305       // sometimes we get less args than target.getArgCount, but codegen expects the latter
3306       if (insn->coord_components) {
3307          uint32_t argCount = target.getArgCount();
3308
3309          if (target.isMS())
3310             argCount -= 1;
3311
3312          for (uint32_t i = 0u; i < (argCount - insn->coord_components); ++i)
3313             srcs.push_back(getSSA());
3314       }
3315
3316       if (insn->op == nir_texop_texture_samples)
3317          srcs.push_back(zero);
3318       else if (!insn->num_srcs)
3319          srcs.push_back(loadImm(NULL, 0));
3320       if (biasIdx != -1)
3321          srcs.push_back(getSrc(&insn->src[biasIdx].src, 0));
3322       if (lodIdx != -1)
3323          srcs.push_back(getSrc(&insn->src[lodIdx].src, 0));
3324       else if (op == OP_TXF)
3325          lz = true;
3326       if (msIdx != -1)
3327          srcs.push_back(getSrc(&insn->src[msIdx].src, 0));
3328       if (offsetIdx != -1)
3329          offsets.push_back(&insn->src[offsetIdx].src);
3330       if (compIdx != -1)
3331          srcs.push_back(applyProjection(getSrc(&insn->src[compIdx].src, 0), proj));
3332       if (texOffIdx != -1) {
3333          srcs.push_back(getSrc(&insn->src[texOffIdx].src, 0));
3334          texOffIdx = srcs.size() - 1;
3335       }
3336       if (sampOffIdx != -1) {
3337          srcs.push_back(getSrc(&insn->src[sampOffIdx].src, 0));
3338          sampOffIdx = srcs.size() - 1;
3339       }
3340       if (bindless) {
3341          // currently we use the lower bits
3342          Value *split[2];
3343          Value *handle = getSrc(&insn->src[sampHandleIdx].src, 0);
3344
3345          mkSplit(split, 4, handle);
3346
3347          srcs.push_back(split[0]);
3348          texOffIdx = srcs.size() - 1;
3349       }
3350
3351       r = bindless ? 0xff : insn->texture_index;
3352       s = bindless ? 0x1f : insn->sampler_index;
3353
3354       defs.resize(newDefs.size());
3355       for (uint8_t d = 0u; d < newDefs.size(); ++d) {
3356          defs[d] = newDefs[d];
3357          mask |= 1 << d;
3358       }
3359       if (target.isMS() || (op == OP_TEX && prog->getType() != Program::TYPE_FRAGMENT))
3360          lz = true;
3361
3362       TexInstruction *texi = mkTex(op, target.getEnum(), r, s, defs, srcs);
3363       texi->tex.levelZero = lz;
3364       texi->tex.mask = mask;
3365       texi->tex.bindless = bindless;
3366
3367       if (texOffIdx != -1)
3368          texi->tex.rIndirectSrc = texOffIdx;
3369       if (sampOffIdx != -1)
3370          texi->tex.sIndirectSrc = sampOffIdx;
3371
3372       switch (insn->op) {
3373       case nir_texop_tg4:
3374          if (!target.isShadow())
3375             texi->tex.gatherComp = insn->component;
3376          break;
3377       case nir_texop_txs:
3378          texi->tex.query = TXQ_DIMS;
3379          break;
3380       case nir_texop_texture_samples:
3381          texi->tex.mask = 0x4;
3382          texi->tex.query = TXQ_TYPE;
3383          break;
3384       case nir_texop_query_levels:
3385          texi->tex.mask = 0x8;
3386          texi->tex.query = TXQ_DIMS;
3387          break;
3388       default:
3389          break;
3390       }
3391
3392       texi->tex.useOffsets = offsets.size();
3393       if (texi->tex.useOffsets) {
3394          for (uint8_t s = 0; s < texi->tex.useOffsets; ++s) {
3395             for (uint32_t c = 0u; c < 3; ++c) {
3396                uint8_t s2 = std::min(c, target.getDim() - 1);
3397                texi->offset[s][c].set(getSrc(offsets[s], s2));
3398                texi->offset[s][c].setInsn(texi);
3399             }
3400          }
3401       }
3402
3403       if (op == OP_TXG && offsetIdx == -1) {
3404          if (nir_tex_instr_has_explicit_tg4_offsets(insn)) {
3405             texi->tex.useOffsets = 4;
3406             setPosition(texi, false);
3407             for (uint8_t i = 0; i < 4; ++i) {
3408                for (uint8_t j = 0; j < 2; ++j) {
3409                   texi->offset[i][j].set(loadImm(NULL, insn->tg4_offsets[i][j]));
3410                   texi->offset[i][j].setInsn(texi);
3411                }
3412             }
3413             setPosition(texi, true);
3414          }
3415       }
3416
3417       if (ddxIdx != -1 && ddyIdx != -1) {
3418          for (uint8_t c = 0u; c < target.getDim() + target.isCube(); ++c) {
3419             texi->dPdx[c].set(getSrc(&insn->src[ddxIdx].src, c));
3420             texi->dPdy[c].set(getSrc(&insn->src[ddyIdx].src, c));
3421          }
3422       }
3423
3424       break;
3425    }
3426    default:
3427       ERROR("unknown nir_texop %u\n", insn->op);
3428       return false;
3429    }
3430    return true;
3431 }
3432
3433 bool
3434 Converter::visit(nir_deref_instr *deref)
3435 {
3436    // we just ignore those, because images intrinsics are the only place where
3437    // we should end up with deref sources and those have to backtrack anyway
3438    // to get the nir_variable. This code just exists to handle some special
3439    // cases.
3440    switch (deref->deref_type) {
3441    case nir_deref_type_array:
3442    case nir_deref_type_struct:
3443    case nir_deref_type_var:
3444       break;
3445    default:
3446       ERROR("unknown nir_deref_instr %u\n", deref->deref_type);
3447       return false;
3448    }
3449    return true;
3450 }
3451
3452 bool
3453 Converter::run()
3454 {
3455    bool progress;
3456
3457    if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
3458       nir_print_shader(nir, stderr);
3459
3460    struct nir_lower_subgroups_options subgroup_options = {
3461       .subgroup_size = 32,
3462       .ballot_bit_size = 32,
3463    };
3464
3465    NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, (nir_lower_io_options)0);
3466    NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
3467    NIR_PASS_V(nir, nir_lower_regs_to_ssa);
3468    NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
3469    NIR_PASS_V(nir, nir_lower_vars_to_ssa);
3470    NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
3471    NIR_PASS_V(nir, nir_lower_phis_to_scalar);
3472
3473    do {
3474       progress = false;
3475       NIR_PASS(progress, nir, nir_copy_prop);
3476       NIR_PASS(progress, nir, nir_opt_remove_phis);
3477       NIR_PASS(progress, nir, nir_opt_trivial_continues);
3478       NIR_PASS(progress, nir, nir_opt_cse);
3479       NIR_PASS(progress, nir, nir_opt_algebraic);
3480       NIR_PASS(progress, nir, nir_opt_constant_folding);
3481       NIR_PASS(progress, nir, nir_copy_prop);
3482       NIR_PASS(progress, nir, nir_opt_dce);
3483       NIR_PASS(progress, nir, nir_opt_dead_cf);
3484    } while (progress);
3485
3486    NIR_PASS_V(nir, nir_lower_bool_to_int32);
3487    NIR_PASS_V(nir, nir_lower_locals_to_regs);
3488    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp);
3489    NIR_PASS_V(nir, nir_convert_from_ssa, true);
3490
3491    // Garbage collect dead instructions
3492    nir_sweep(nir);
3493
3494    if (!parseNIR()) {
3495       ERROR("Couldn't prase NIR!\n");
3496       return false;
3497    }
3498
3499    if (!assignSlots()) {
3500       ERROR("Couldn't assign slots!\n");
3501       return false;
3502    }
3503
3504    if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
3505       nir_print_shader(nir, stderr);
3506
3507    nir_foreach_function(function, nir) {
3508       if (!visit(function))
3509          return false;
3510    }
3511
3512    return true;
3513 }
3514
3515 } // unnamed namespace
3516
3517 namespace nv50_ir {
3518
3519 bool
3520 Program::makeFromNIR(struct nv50_ir_prog_info *info)
3521 {
3522    nir_shader *nir = (nir_shader*)info->bin.source;
3523    Converter converter(this, nir, info);
3524    bool result = converter.run();
3525    if (!result)
3526       return result;
3527    LoweringHelper lowering;
3528    lowering.run(this);
3529    tlsSize = info->bin.tlsSpace;
3530    return result;
3531 }
3532
3533 } // namespace nv50_ir