src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp

   1 /*
   2  * Copyright 2017 Red Hat Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * Authors: Karol Herbst <kherbst@redhat.com>
  23  */
  24
  25 #include "compiler/nir/nir.h"
  26
  27 #include "util/u_debug.h"
  28
  29 #include "codegen/nv50_ir.h"
  30 #include "codegen/nv50_ir_from_common.h"
  31 #include "codegen/nv50_ir_lowering_helper.h"
  32 #include "codegen/nv50_ir_util.h"
  33
  34 #if __cplusplus >= 201103L
  35 #include <unordered_map>
  36 #else
  37 #include <tr1/unordered_map>
  38 #endif
  39 #include <cstring>
  40 #include <list>
  41 #include <vector>
  42
  43 namespace {
  44
  45 #if __cplusplus >= 201103L
  46 using std::hash;
  47 using std::unordered_map;
  48 #else
  49 using std::tr1::hash;
  50 using std::tr1::unordered_map;
  51 #endif
  52
  53 using namespace nv50_ir;
  54
  55 int
  56 type_size(const struct glsl_type *type, bool bindless)
  57 {
  58    return glsl_count_attribute_slots(type, false);
  59 }
  60
  61 class Converter : public ConverterCommon
  62 {
  63 public:
  64    Converter(Program *, nir_shader *, nv50_ir_prog_info *);
  65
  66    bool run();
  67 private:
  68    typedef std::vector<LValue*> LValues;
  69    typedef unordered_map<unsigned, LValues> NirDefMap;
  70    typedef unordered_map<unsigned, nir_load_const_instr*> ImmediateMap;
  71    typedef unordered_map<unsigned, uint32_t> NirArrayLMemOffsets;
  72    typedef unordered_map<unsigned, BasicBlock*> NirBlockMap;
  73
  74    CacheMode convert(enum gl_access_qualifier);
  75    TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
  76    LValues& convert(nir_alu_dest *);
  77    BasicBlock* convert(nir_block *);
  78    LValues& convert(nir_dest *);
  79    SVSemantic convert(nir_intrinsic_op);
  80    Value* convert(nir_load_const_instr*, uint8_t);
  81    LValues& convert(nir_register *);
  82    LValues& convert(nir_ssa_def *);
  83
  84    ImgFormat convertGLImgFormat(GLuint);
  85
  86    Value* getSrc(nir_alu_src *, uint8_t component = 0);
  87    Value* getSrc(nir_register *, uint8_t);
  88    Value* getSrc(nir_src *, uint8_t, bool indirect = false);
  89    Value* getSrc(nir_ssa_def *, uint8_t);
  90
  91    // returned value is the constant part of the given source (either the
  92    // nir_src or the selected source component of an intrinsic). Even though
  93    // this is mostly an optimization to be able to skip indirects in a few
  94    // cases, sometimes we require immediate values or set some fileds on
  95    // instructions (e.g. tex) in order for codegen to consume those.
  96    // If the found value has not a constant part, the Value gets returned
  97    // through the Value parameter.
  98    uint32_t getIndirect(nir_src *, uint8_t, Value *&);
  99    // isScalar indicates that the addressing is scalar, vec4 addressing is
 100    // assumed otherwise
 101    uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&,
 102                         bool isScalar = false);
 103
 104    uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
 105
 106    void setInterpolate(nv50_ir_varying *,
 107                        uint8_t,
 108                        bool centroid,
 109                        unsigned semantics);
 110
 111    Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,
 112                          uint8_t c, Value *indirect0 = NULL,
 113                          Value *indirect1 = NULL, bool patch = false);
 114    void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
 115                 Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
 116                 Value *indirect1 = NULL);
 117
 118    bool isFloatType(nir_alu_type);
 119    bool isSignedType(nir_alu_type);
 120    bool isResultFloat(nir_op);
 121    bool isResultSigned(nir_op);
 122
 123    DataType getDType(nir_alu_instr *);
 124    DataType getDType(nir_intrinsic_instr *);
 125    DataType getDType(nir_intrinsic_instr *, bool isSigned);
 126    DataType getDType(nir_op, uint8_t);
 127
 128    std::vector<DataType> getSTypes(nir_alu_instr *);
 129    DataType getSType(nir_src &, bool isFloat, bool isSigned);
 130
 131    operation getOperation(nir_intrinsic_op);
 132    operation getOperation(nir_op);
 133    operation getOperation(nir_texop);
 134    operation preOperationNeeded(nir_op);
 135
 136    int getSubOp(nir_intrinsic_op);
 137    int getSubOp(nir_op);
 138
 139    CondCode getCondCode(nir_op);
 140
 141    bool assignSlots();
 142    bool parseNIR();
 143
 144    bool visit(nir_alu_instr *);
 145    bool visit(nir_block *);
 146    bool visit(nir_cf_node *);
 147    bool visit(nir_deref_instr *);
 148    bool visit(nir_function *);
 149    bool visit(nir_if *);
 150    bool visit(nir_instr *);
 151    bool visit(nir_intrinsic_instr *);
 152    bool visit(nir_jump_instr *);
 153    bool visit(nir_load_const_instr*);
 154    bool visit(nir_loop *);
 155    bool visit(nir_ssa_undef_instr *);
 156    bool visit(nir_tex_instr *);
 157
 158    // tex stuff
 159    Value* applyProjection(Value *src, Value *proj);
 160    unsigned int getNIRArgCount(TexInstruction::Target&);
 161
 162    // image stuff
 163    uint16_t handleDeref(nir_deref_instr *, Value * & indirect, const nir_variable * &);
 164    CacheMode getCacheModeFromVar(const nir_variable *);
 165
 166    nir_shader *nir;
 167
 168    NirDefMap ssaDefs;
 169    NirDefMap regDefs;
 170    ImmediateMap immediates;
 171    NirArrayLMemOffsets regToLmemOffset;
 172    NirBlockMap blocks;
 173    unsigned int curLoopDepth;
 174
 175    BasicBlock *exit;
 176    Value *zero;
 177    Instruction *immInsertPos;
 178
 179    int clipVertexOutput;
 180
 181    union {
 182       struct {
 183          Value *position;
 184       } fp;
 185    };
 186 };
 187
 188 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
 189    : ConverterCommon(prog, info),
 190      nir(nir),
 191      curLoopDepth(0),
 192      clipVertexOutput(-1)
 193 {
 194    zero = mkImm((uint32_t)0);
 195 }
 196
 197 BasicBlock *
 198 Converter::convert(nir_block *block)
 199 {
 200    NirBlockMap::iterator it = blocks.find(block->index);
 201    if (it != blocks.end())
 202       return it->second;
 203
 204    BasicBlock *bb = new BasicBlock(func);
 205    blocks[block->index] = bb;
 206    return bb;
 207 }
 208
 209 bool
 210 Converter::isFloatType(nir_alu_type type)
 211 {
 212    return nir_alu_type_get_base_type(type) == nir_type_float;
 213 }
 214
 215 bool
 216 Converter::isSignedType(nir_alu_type type)
 217 {
 218    return nir_alu_type_get_base_type(type) == nir_type_int;
 219 }
 220
 221 bool
 222 Converter::isResultFloat(nir_op op)
 223 {
 224    const nir_op_info &info = nir_op_infos[op];
 225    if (info.output_type != nir_type_invalid)
 226       return isFloatType(info.output_type);
 227
 228    ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
 229    assert(false);
 230    return true;
 231 }
 232
 233 bool
 234 Converter::isResultSigned(nir_op op)
 235 {
 236    switch (op) {
 237    // there is no umul and we get wrong results if we treat all muls as signed
 238    case nir_op_imul:
 239    case nir_op_inot:
 240       return false;
 241    default:
 242       const nir_op_info &info = nir_op_infos[op];
 243       if (info.output_type != nir_type_invalid)
 244          return isSignedType(info.output_type);
 245       ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
 246       assert(false);
 247       return true;
 248    }
 249 }
 250
 251 DataType
 252 Converter::getDType(nir_alu_instr *insn)
 253 {
 254    if (insn->dest.dest.is_ssa)
 255       return getDType(insn->op, insn->dest.dest.ssa.bit_size);
 256    else
 257       return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
 258 }
 259
 260 DataType
 261 Converter::getDType(nir_intrinsic_instr *insn)
 262 {
 263    bool isSigned;
 264    switch (insn->intrinsic) {
 265    case nir_intrinsic_shared_atomic_imax:
 266    case nir_intrinsic_shared_atomic_imin:
 267    case nir_intrinsic_ssbo_atomic_imax:
 268    case nir_intrinsic_ssbo_atomic_imin:
 269       isSigned = true;
 270       break;
 271    default:
 272       isSigned = false;
 273       break;
 274    }
 275
 276    return getDType(insn, isSigned);
 277 }
 278
 279 DataType
 280 Converter::getDType(nir_intrinsic_instr *insn, bool isSigned)
 281 {
 282    if (insn->dest.is_ssa)
 283       return typeOfSize(insn->dest.ssa.bit_size / 8, false, isSigned);
 284    else
 285       return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, isSigned);
 286 }
 287
 288 DataType
 289 Converter::getDType(nir_op op, uint8_t bitSize)
 290 {
 291    DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
 292    if (ty == TYPE_NONE) {
 293       ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
 294       assert(false);
 295    }
 296    return ty;
 297 }
 298
 299 std::vector<DataType>
 300 Converter::getSTypes(nir_alu_instr *insn)
 301 {
 302    const nir_op_info &info = nir_op_infos[insn->op];
 303    std::vector<DataType> res(info.num_inputs);
 304
 305    for (uint8_t i = 0; i < info.num_inputs; ++i) {
 306       if (info.input_types[i] != nir_type_invalid) {
 307          res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
 308       } else {
 309          ERROR("getSType not implemented for %s idx %u\n", info.name, i);
 310          assert(false);
 311          res[i] = TYPE_NONE;
 312          break;
 313       }
 314    }
 315
 316    return res;
 317 }
 318
 319 DataType
 320 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
 321 {
 322    uint8_t bitSize;
 323    if (src.is_ssa)
 324       bitSize = src.ssa->bit_size;
 325    else
 326       bitSize = src.reg.reg->bit_size;
 327
 328    DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
 329    if (ty == TYPE_NONE) {
 330       const char *str;
 331       if (isFloat)
 332          str = "float";
 333       else if (isSigned)
 334          str = "int";
 335       else
 336          str = "uint";
 337       ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
 338       assert(false);
 339    }
 340    return ty;
 341 }
 342
 343 operation
 344 Converter::getOperation(nir_op op)
 345 {
 346    switch (op) {
 347    // basic ops with float and int variants
 348    case nir_op_fabs:
 349    case nir_op_iabs:
 350       return OP_ABS;
 351    case nir_op_fadd:
 352    case nir_op_iadd:
 353       return OP_ADD;
 354    case nir_op_iand:
 355       return OP_AND;
 356    case nir_op_ifind_msb:
 357    case nir_op_ufind_msb:
 358       return OP_BFIND;
 359    case nir_op_fceil:
 360       return OP_CEIL;
 361    case nir_op_fcos:
 362       return OP_COS;
 363    case nir_op_f2f32:
 364    case nir_op_f2f64:
 365    case nir_op_f2i32:
 366    case nir_op_f2i64:
 367    case nir_op_f2u32:
 368    case nir_op_f2u64:
 369    case nir_op_i2f32:
 370    case nir_op_i2f64:
 371    case nir_op_i2i32:
 372    case nir_op_i2i64:
 373    case nir_op_u2f32:
 374    case nir_op_u2f64:
 375    case nir_op_u2u32:
 376    case nir_op_u2u64:
 377       return OP_CVT;
 378    case nir_op_fddx:
 379    case nir_op_fddx_coarse:
 380    case nir_op_fddx_fine:
 381       return OP_DFDX;
 382    case nir_op_fddy:
 383    case nir_op_fddy_coarse:
 384    case nir_op_fddy_fine:
 385       return OP_DFDY;
 386    case nir_op_fdiv:
 387    case nir_op_idiv:
 388    case nir_op_udiv:
 389       return OP_DIV;
 390    case nir_op_fexp2:
 391       return OP_EX2;
 392    case nir_op_ffloor:
 393       return OP_FLOOR;
 394    case nir_op_ffma:
 395       return OP_FMA;
 396    case nir_op_flog2:
 397       return OP_LG2;
 398    case nir_op_fmax:
 399    case nir_op_imax:
 400    case nir_op_umax:
 401       return OP_MAX;
 402    case nir_op_pack_64_2x32_split:
 403       return OP_MERGE;
 404    case nir_op_fmin:
 405    case nir_op_imin:
 406    case nir_op_umin:
 407       return OP_MIN;
 408    case nir_op_fmod:
 409    case nir_op_imod:
 410    case nir_op_umod:
 411    case nir_op_frem:
 412    case nir_op_irem:
 413       return OP_MOD;
 414    case nir_op_fmul:
 415    case nir_op_imul:
 416    case nir_op_imul_high:
 417    case nir_op_umul_high:
 418       return OP_MUL;
 419    case nir_op_fneg:
 420    case nir_op_ineg:
 421       return OP_NEG;
 422    case nir_op_inot:
 423       return OP_NOT;
 424    case nir_op_ior:
 425       return OP_OR;
 426    case nir_op_fpow:
 427       return OP_POW;
 428    case nir_op_frcp:
 429       return OP_RCP;
 430    case nir_op_frsq:
 431       return OP_RSQ;
 432    case nir_op_fsat:
 433       return OP_SAT;
 434    case nir_op_feq32:
 435    case nir_op_ieq32:
 436    case nir_op_fge32:
 437    case nir_op_ige32:
 438    case nir_op_uge32:
 439    case nir_op_flt32:
 440    case nir_op_ilt32:
 441    case nir_op_ult32:
 442    case nir_op_fne32:
 443    case nir_op_ine32:
 444       return OP_SET;
 445    case nir_op_ishl:
 446       return OP_SHL;
 447    case nir_op_ishr:
 448    case nir_op_ushr:
 449       return OP_SHR;
 450    case nir_op_fsin:
 451       return OP_SIN;
 452    case nir_op_fsqrt:
 453       return OP_SQRT;
 454    case nir_op_fsub:
 455    case nir_op_isub:
 456       return OP_SUB;
 457    case nir_op_ftrunc:
 458       return OP_TRUNC;
 459    case nir_op_ixor:
 460       return OP_XOR;
 461    default:
 462       ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
 463       assert(false);
 464       return OP_NOP;
 465    }
 466 }
 467
 468 operation
 469 Converter::getOperation(nir_texop op)
 470 {
 471    switch (op) {
 472    case nir_texop_tex:
 473       return OP_TEX;
 474    case nir_texop_lod:
 475       return OP_TXLQ;
 476    case nir_texop_txb:
 477       return OP_TXB;
 478    case nir_texop_txd:
 479       return OP_TXD;
 480    case nir_texop_txf:
 481    case nir_texop_txf_ms:
 482       return OP_TXF;
 483    case nir_texop_tg4:
 484       return OP_TXG;
 485    case nir_texop_txl:
 486       return OP_TXL;
 487    case nir_texop_query_levels:
 488    case nir_texop_texture_samples:
 489    case nir_texop_txs:
 490       return OP_TXQ;
 491    default:
 492       ERROR("couldn't get operation for nir_texop %u\n", op);
 493       assert(false);
 494       return OP_NOP;
 495    }
 496 }
 497
 498 operation
 499 Converter::getOperation(nir_intrinsic_op op)
 500 {
 501    switch (op) {
 502    case nir_intrinsic_emit_vertex:
 503       return OP_EMIT;
 504    case nir_intrinsic_end_primitive:
 505       return OP_RESTART;
 506    case nir_intrinsic_bindless_image_atomic_add:
 507    case nir_intrinsic_image_atomic_add:
 508    case nir_intrinsic_image_deref_atomic_add:
 509    case nir_intrinsic_bindless_image_atomic_and:
 510    case nir_intrinsic_image_atomic_and:
 511    case nir_intrinsic_image_deref_atomic_and:
 512    case nir_intrinsic_bindless_image_atomic_comp_swap:
 513    case nir_intrinsic_image_atomic_comp_swap:
 514    case nir_intrinsic_image_deref_atomic_comp_swap:
 515    case nir_intrinsic_bindless_image_atomic_exchange:
 516    case nir_intrinsic_image_atomic_exchange:
 517    case nir_intrinsic_image_deref_atomic_exchange:
 518    case nir_intrinsic_bindless_image_atomic_imax:
 519    case nir_intrinsic_image_atomic_imax:
 520    case nir_intrinsic_image_deref_atomic_imax:
 521    case nir_intrinsic_bindless_image_atomic_umax:
 522    case nir_intrinsic_image_atomic_umax:
 523    case nir_intrinsic_image_deref_atomic_umax:
 524    case nir_intrinsic_bindless_image_atomic_imin:
 525    case nir_intrinsic_image_atomic_imin:
 526    case nir_intrinsic_image_deref_atomic_imin:
 527    case nir_intrinsic_bindless_image_atomic_umin:
 528    case nir_intrinsic_image_atomic_umin:
 529    case nir_intrinsic_image_deref_atomic_umin:
 530    case nir_intrinsic_bindless_image_atomic_or:
 531    case nir_intrinsic_image_atomic_or:
 532    case nir_intrinsic_image_deref_atomic_or:
 533    case nir_intrinsic_bindless_image_atomic_xor:
 534    case nir_intrinsic_image_atomic_xor:
 535    case nir_intrinsic_image_deref_atomic_xor:
 536       return OP_SUREDP;
 537    case nir_intrinsic_bindless_image_load:
 538    case nir_intrinsic_image_load:
 539    case nir_intrinsic_image_deref_load:
 540       return OP_SULDP;
 541    case nir_intrinsic_bindless_image_samples:
 542    case nir_intrinsic_image_samples:
 543    case nir_intrinsic_image_deref_samples:
 544    case nir_intrinsic_bindless_image_size:
 545    case nir_intrinsic_image_size:
 546    case nir_intrinsic_image_deref_size:
 547       return OP_SUQ;
 548    case nir_intrinsic_bindless_image_store:
 549    case nir_intrinsic_image_store:
 550    case nir_intrinsic_image_deref_store:
 551       return OP_SUSTP;
 552    default:
 553       ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
 554       assert(false);
 555       return OP_NOP;
 556    }
 557 }
 558
 559 operation
 560 Converter::preOperationNeeded(nir_op op)
 561 {
 562    switch (op) {
 563    case nir_op_fcos:
 564    case nir_op_fsin:
 565       return OP_PRESIN;
 566    default:
 567       return OP_NOP;
 568    }
 569 }
 570
 571 int
 572 Converter::getSubOp(nir_op op)
 573 {
 574    switch (op) {
 575    case nir_op_imul_high:
 576    case nir_op_umul_high:
 577       return NV50_IR_SUBOP_MUL_HIGH;
 578    default:
 579       return 0;
 580    }
 581 }
 582
 583 int
 584 Converter::getSubOp(nir_intrinsic_op op)
 585 {
 586    switch (op) {
 587    case nir_intrinsic_bindless_image_atomic_add:
 588    case nir_intrinsic_image_atomic_add:
 589    case nir_intrinsic_image_deref_atomic_add:
 590    case nir_intrinsic_shared_atomic_add:
 591    case nir_intrinsic_ssbo_atomic_add:
 592       return  NV50_IR_SUBOP_ATOM_ADD;
 593    case nir_intrinsic_bindless_image_atomic_and:
 594    case nir_intrinsic_image_atomic_and:
 595    case nir_intrinsic_image_deref_atomic_and:
 596    case nir_intrinsic_shared_atomic_and:
 597    case nir_intrinsic_ssbo_atomic_and:
 598       return  NV50_IR_SUBOP_ATOM_AND;
 599    case nir_intrinsic_bindless_image_atomic_comp_swap:
 600    case nir_intrinsic_image_atomic_comp_swap:
 601    case nir_intrinsic_image_deref_atomic_comp_swap:
 602    case nir_intrinsic_shared_atomic_comp_swap:
 603    case nir_intrinsic_ssbo_atomic_comp_swap:
 604       return  NV50_IR_SUBOP_ATOM_CAS;
 605    case nir_intrinsic_bindless_image_atomic_exchange:
 606    case nir_intrinsic_image_atomic_exchange:
 607    case nir_intrinsic_image_deref_atomic_exchange:
 608    case nir_intrinsic_shared_atomic_exchange:
 609    case nir_intrinsic_ssbo_atomic_exchange:
 610       return  NV50_IR_SUBOP_ATOM_EXCH;
 611    case nir_intrinsic_bindless_image_atomic_or:
 612    case nir_intrinsic_image_atomic_or:
 613    case nir_intrinsic_image_deref_atomic_or:
 614    case nir_intrinsic_shared_atomic_or:
 615    case nir_intrinsic_ssbo_atomic_or:
 616       return  NV50_IR_SUBOP_ATOM_OR;
 617    case nir_intrinsic_bindless_image_atomic_imax:
 618    case nir_intrinsic_image_atomic_imax:
 619    case nir_intrinsic_image_deref_atomic_imax:
 620    case nir_intrinsic_bindless_image_atomic_umax:
 621    case nir_intrinsic_image_atomic_umax:
 622    case nir_intrinsic_image_deref_atomic_umax:
 623    case nir_intrinsic_shared_atomic_imax:
 624    case nir_intrinsic_shared_atomic_umax:
 625    case nir_intrinsic_ssbo_atomic_imax:
 626    case nir_intrinsic_ssbo_atomic_umax:
 627       return  NV50_IR_SUBOP_ATOM_MAX;
 628    case nir_intrinsic_bindless_image_atomic_imin:
 629    case nir_intrinsic_image_atomic_imin:
 630    case nir_intrinsic_image_deref_atomic_imin:
 631    case nir_intrinsic_bindless_image_atomic_umin:
 632    case nir_intrinsic_image_atomic_umin:
 633    case nir_intrinsic_image_deref_atomic_umin:
 634    case nir_intrinsic_shared_atomic_imin:
 635    case nir_intrinsic_shared_atomic_umin:
 636    case nir_intrinsic_ssbo_atomic_imin:
 637    case nir_intrinsic_ssbo_atomic_umin:
 638       return  NV50_IR_SUBOP_ATOM_MIN;
 639    case nir_intrinsic_bindless_image_atomic_xor:
 640    case nir_intrinsic_image_atomic_xor:
 641    case nir_intrinsic_image_deref_atomic_xor:
 642    case nir_intrinsic_shared_atomic_xor:
 643    case nir_intrinsic_ssbo_atomic_xor:
 644       return  NV50_IR_SUBOP_ATOM_XOR;
 645
 646    case nir_intrinsic_group_memory_barrier:
 647    case nir_intrinsic_memory_barrier:
 648    case nir_intrinsic_memory_barrier_atomic_counter:
 649    case nir_intrinsic_memory_barrier_buffer:
 650    case nir_intrinsic_memory_barrier_image:
 651       return NV50_IR_SUBOP_MEMBAR(M, GL);
 652    case nir_intrinsic_memory_barrier_shared:
 653       return NV50_IR_SUBOP_MEMBAR(M, CTA);
 654
 655    case nir_intrinsic_vote_all:
 656       return NV50_IR_SUBOP_VOTE_ALL;
 657    case nir_intrinsic_vote_any:
 658       return NV50_IR_SUBOP_VOTE_ANY;
 659    case nir_intrinsic_vote_ieq:
 660       return NV50_IR_SUBOP_VOTE_UNI;
 661    default:
 662       return 0;
 663    }
 664 }
 665
 666 CondCode
 667 Converter::getCondCode(nir_op op)
 668 {
 669    switch (op) {
 670    case nir_op_feq32:
 671    case nir_op_ieq32:
 672       return CC_EQ;
 673    case nir_op_fge32:
 674    case nir_op_ige32:
 675    case nir_op_uge32:
 676       return CC_GE;
 677    case nir_op_flt32:
 678    case nir_op_ilt32:
 679    case nir_op_ult32:
 680       return CC_LT;
 681    case nir_op_fne32:
 682       return CC_NEU;
 683    case nir_op_ine32:
 684       return CC_NE;
 685    default:
 686       ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name);
 687       assert(false);
 688       return CC_FL;
 689    }
 690 }
 691
 692 Converter::LValues&
 693 Converter::convert(nir_alu_dest *dest)
 694 {
 695    return convert(&dest->dest);
 696 }
 697
 698 Converter::LValues&
 699 Converter::convert(nir_dest *dest)
 700 {
 701    if (dest->is_ssa)
 702       return convert(&dest->ssa);
 703    if (dest->reg.indirect) {
 704       ERROR("no support for indirects.");
 705       assert(false);
 706    }
 707    return convert(dest->reg.reg);
 708 }
 709
 710 Converter::LValues&
 711 Converter::convert(nir_register *reg)
 712 {
 713    NirDefMap::iterator it = regDefs.find(reg->index);
 714    if (it != regDefs.end())
 715       return it->second;
 716
 717    LValues newDef(reg->num_components);
 718    for (uint8_t i = 0; i < reg->num_components; i++)
 719       newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
 720    return regDefs[reg->index] = newDef;
 721 }
 722
 723 Converter::LValues&
 724 Converter::convert(nir_ssa_def *def)
 725 {
 726    NirDefMap::iterator it = ssaDefs.find(def->index);
 727    if (it != ssaDefs.end())
 728       return it->second;
 729
 730    LValues newDef(def->num_components);
 731    for (uint8_t i = 0; i < def->num_components; i++)
 732       newDef[i] = getSSA(std::max(4, def->bit_size / 8));
 733    return ssaDefs[def->index] = newDef;
 734 }
 735
 736 Value*
 737 Converter::getSrc(nir_alu_src *src, uint8_t component)
 738 {
 739    if (src->abs || src->negate) {
 740       ERROR("modifiers currently not supported on nir_alu_src\n");
 741       assert(false);
 742    }
 743    return getSrc(&src->src, src->swizzle[component]);
 744 }
 745
 746 Value*
 747 Converter::getSrc(nir_register *reg, uint8_t idx)
 748 {
 749    NirDefMap::iterator it = regDefs.find(reg->index);
 750    if (it == regDefs.end())
 751       return convert(reg)[idx];
 752    return it->second[idx];
 753 }
 754
 755 Value*
 756 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
 757 {
 758    if (src->is_ssa)
 759       return getSrc(src->ssa, idx);
 760
 761    if (src->reg.indirect) {
 762       if (indirect)
 763          return getSrc(src->reg.indirect, idx);
 764       ERROR("no support for indirects.");
 765       assert(false);
 766       return NULL;
 767    }
 768
 769    return getSrc(src->reg.reg, idx);
 770 }
 771
 772 Value*
 773 Converter::getSrc(nir_ssa_def *src, uint8_t idx)
 774 {
 775    ImmediateMap::iterator iit = immediates.find(src->index);
 776    if (iit != immediates.end())
 777       return convert((*iit).second, idx);
 778
 779    NirDefMap::iterator it = ssaDefs.find(src->index);
 780    if (it == ssaDefs.end()) {
 781       ERROR("SSA value %u not found\n", src->index);
 782       assert(false);
 783       return NULL;
 784    }
 785    return it->second[idx];
 786 }
 787
 788 uint32_t
 789 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
 790 {
 791    nir_const_value *offset = nir_src_as_const_value(*src);
 792
 793    if (offset) {
 794       indirect = NULL;
 795       return offset[0].u32;
 796    }
 797
 798    indirect = getSrc(src, idx, true);
 799    return 0;
 800 }
 801
 802 uint32_t
 803 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect, bool isScalar)
 804 {
 805    int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
 806    if (indirect && !isScalar)
 807       indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
 808    return idx;
 809 }
 810
 811 static void
 812 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
 813 {
 814    assert(name && index);
 815
 816    if (slot >= VERT_ATTRIB_MAX) {
 817       ERROR("invalid varying slot %u\n", slot);
 818       assert(false);
 819       return;
 820    }
 821
 822    if (slot >= VERT_ATTRIB_GENERIC0 &&
 823        slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
 824       *name = TGSI_SEMANTIC_GENERIC;
 825       *index = slot - VERT_ATTRIB_GENERIC0;
 826       return;
 827    }
 828
 829    if (slot >= VERT_ATTRIB_TEX0 &&
 830        slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
 831       *name = TGSI_SEMANTIC_TEXCOORD;
 832       *index = slot - VERT_ATTRIB_TEX0;
 833       return;
 834    }
 835
 836    switch (slot) {
 837    case VERT_ATTRIB_COLOR0:
 838       *name = TGSI_SEMANTIC_COLOR;
 839       *index = 0;
 840       break;
 841    case VERT_ATTRIB_COLOR1:
 842       *name = TGSI_SEMANTIC_COLOR;
 843       *index = 1;
 844       break;
 845    case VERT_ATTRIB_EDGEFLAG:
 846       *name = TGSI_SEMANTIC_EDGEFLAG;
 847       *index = 0;
 848       break;
 849    case VERT_ATTRIB_FOG:
 850       *name = TGSI_SEMANTIC_FOG;
 851       *index = 0;
 852       break;
 853    case VERT_ATTRIB_NORMAL:
 854       *name = TGSI_SEMANTIC_NORMAL;
 855       *index = 0;
 856       break;
 857    case VERT_ATTRIB_POS:
 858       *name = TGSI_SEMANTIC_POSITION;
 859       *index = 0;
 860       break;
 861    case VERT_ATTRIB_POINT_SIZE:
 862       *name = TGSI_SEMANTIC_PSIZE;
 863       *index = 0;
 864       break;
 865    default:
 866       ERROR("unknown vert attrib slot %u\n", slot);
 867       assert(false);
 868       break;
 869    }
 870 }
 871
 872 static void
 873 varying_slot_to_tgsi_semantic(gl_varying_slot slot, unsigned *name, unsigned *index)
 874 {
 875    assert(name && index);
 876
 877    if (slot >= VARYING_SLOT_TESS_MAX) {
 878       ERROR("invalid varying slot %u\n", slot);
 879       assert(false);
 880       return;
 881    }
 882
 883    if (slot >= VARYING_SLOT_PATCH0) {
 884       *name = TGSI_SEMANTIC_PATCH;
 885       *index = slot - VARYING_SLOT_PATCH0;
 886       return;
 887    }
 888
 889    if (slot >= VARYING_SLOT_VAR0) {
 890       *name = TGSI_SEMANTIC_GENERIC;
 891       *index = slot - VARYING_SLOT_VAR0;
 892       return;
 893    }
 894
 895    if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
 896       *name = TGSI_SEMANTIC_TEXCOORD;
 897       *index = slot - VARYING_SLOT_TEX0;
 898       return;
 899    }
 900
 901    switch (slot) {
 902    case VARYING_SLOT_BFC0:
 903       *name = TGSI_SEMANTIC_BCOLOR;
 904       *index = 0;
 905       break;
 906    case VARYING_SLOT_BFC1:
 907       *name = TGSI_SEMANTIC_BCOLOR;
 908       *index = 1;
 909       break;
 910    case VARYING_SLOT_CLIP_DIST0:
 911       *name = TGSI_SEMANTIC_CLIPDIST;
 912       *index = 0;
 913       break;
 914    case VARYING_SLOT_CLIP_DIST1:
 915       *name = TGSI_SEMANTIC_CLIPDIST;
 916       *index = 1;
 917       break;
 918    case VARYING_SLOT_CLIP_VERTEX:
 919       *name = TGSI_SEMANTIC_CLIPVERTEX;
 920       *index = 0;
 921       break;
 922    case VARYING_SLOT_COL0:
 923       *name = TGSI_SEMANTIC_COLOR;
 924       *index = 0;
 925       break;
 926    case VARYING_SLOT_COL1:
 927       *name = TGSI_SEMANTIC_COLOR;
 928       *index = 1;
 929       break;
 930    case VARYING_SLOT_EDGE:
 931       *name = TGSI_SEMANTIC_EDGEFLAG;
 932       *index = 0;
 933       break;
 934    case VARYING_SLOT_FACE:
 935       *name = TGSI_SEMANTIC_FACE;
 936       *index = 0;
 937       break;
 938    case VARYING_SLOT_FOGC:
 939       *name = TGSI_SEMANTIC_FOG;
 940       *index = 0;
 941       break;
 942    case VARYING_SLOT_LAYER:
 943       *name = TGSI_SEMANTIC_LAYER;
 944       *index = 0;
 945       break;
 946    case VARYING_SLOT_PNTC:
 947       *name = TGSI_SEMANTIC_PCOORD;
 948       *index = 0;
 949       break;
 950    case VARYING_SLOT_POS:
 951       *name = TGSI_SEMANTIC_POSITION;
 952       *index = 0;
 953       break;
 954    case VARYING_SLOT_PRIMITIVE_ID:
 955       *name = TGSI_SEMANTIC_PRIMID;
 956       *index = 0;
 957       break;
 958    case VARYING_SLOT_PSIZ:
 959       *name = TGSI_SEMANTIC_PSIZE;
 960       *index = 0;
 961       break;
 962    case VARYING_SLOT_TESS_LEVEL_INNER:
 963       *name = TGSI_SEMANTIC_TESSINNER;
 964       *index = 0;
 965       break;
 966    case VARYING_SLOT_TESS_LEVEL_OUTER:
 967       *name = TGSI_SEMANTIC_TESSOUTER;
 968       *index = 0;
 969       break;
 970    case VARYING_SLOT_VIEWPORT:
 971       *name = TGSI_SEMANTIC_VIEWPORT_INDEX;
 972       *index = 0;
 973       break;
 974    default:
 975       ERROR("unknown varying slot %u\n", slot);
 976       assert(false);
 977       break;
 978    }
 979 }
 980
 981 static void
 982 frag_result_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
 983 {
 984    if (slot >= FRAG_RESULT_DATA0) {
 985       *name = TGSI_SEMANTIC_COLOR;
 986       *index = slot - FRAG_RESULT_COLOR - 2; // intentional
 987       return;
 988    }
 989
 990    switch (slot) {
 991    case FRAG_RESULT_COLOR:
 992       *name = TGSI_SEMANTIC_COLOR;
 993       *index = 0;
 994       break;
 995    case FRAG_RESULT_DEPTH:
 996       *name = TGSI_SEMANTIC_POSITION;
 997       *index = 0;
 998       break;
 999    case FRAG_RESULT_SAMPLE_MASK:
1000       *name = TGSI_SEMANTIC_SAMPLEMASK;
1001       *index = 0;
1002       break;
1003    default:
1004       ERROR("unknown frag result slot %u\n", slot);
1005       assert(false);
1006       break;
1007    }
1008 }
1009
1010 // copy of _mesa_sysval_to_semantic
1011 static void
1012 system_val_to_tgsi_semantic(unsigned val, unsigned *name, unsigned *index)
1013 {
1014    *index = 0;
1015    switch (val) {
1016    // Vertex shader
1017    case SYSTEM_VALUE_VERTEX_ID:
1018       *name = TGSI_SEMANTIC_VERTEXID;
1019       break;
1020    case SYSTEM_VALUE_INSTANCE_ID:
1021       *name = TGSI_SEMANTIC_INSTANCEID;
1022       break;
1023    case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
1024       *name = TGSI_SEMANTIC_VERTEXID_NOBASE;
1025       break;
1026    case SYSTEM_VALUE_BASE_VERTEX:
1027       *name = TGSI_SEMANTIC_BASEVERTEX;
1028       break;
1029    case SYSTEM_VALUE_BASE_INSTANCE:
1030       *name = TGSI_SEMANTIC_BASEINSTANCE;
1031       break;
1032    case SYSTEM_VALUE_DRAW_ID:
1033       *name = TGSI_SEMANTIC_DRAWID;
1034       break;
1035
1036    // Geometry shader
1037    case SYSTEM_VALUE_INVOCATION_ID:
1038       *name = TGSI_SEMANTIC_INVOCATIONID;
1039       break;
1040
1041    // Fragment shader
1042    case SYSTEM_VALUE_FRAG_COORD:
1043       *name = TGSI_SEMANTIC_POSITION;
1044       break;
1045    case SYSTEM_VALUE_FRONT_FACE:
1046       *name = TGSI_SEMANTIC_FACE;
1047       break;
1048    case SYSTEM_VALUE_SAMPLE_ID:
1049       *name = TGSI_SEMANTIC_SAMPLEID;
1050       break;
1051    case SYSTEM_VALUE_SAMPLE_POS:
1052       *name = TGSI_SEMANTIC_SAMPLEPOS;
1053       break;
1054    case SYSTEM_VALUE_SAMPLE_MASK_IN:
1055       *name = TGSI_SEMANTIC_SAMPLEMASK;
1056       break;
1057    case SYSTEM_VALUE_HELPER_INVOCATION:
1058       *name = TGSI_SEMANTIC_HELPER_INVOCATION;
1059       break;
1060
1061    // Tessellation shader
1062    case SYSTEM_VALUE_TESS_COORD:
1063       *name = TGSI_SEMANTIC_TESSCOORD;
1064       break;
1065    case SYSTEM_VALUE_VERTICES_IN:
1066       *name = TGSI_SEMANTIC_VERTICESIN;
1067       break;
1068    case SYSTEM_VALUE_PRIMITIVE_ID:
1069       *name = TGSI_SEMANTIC_PRIMID;
1070       break;
1071    case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1072       *name = TGSI_SEMANTIC_TESSOUTER;
1073       break;
1074    case SYSTEM_VALUE_TESS_LEVEL_INNER:
1075       *name = TGSI_SEMANTIC_TESSINNER;
1076       break;
1077
1078    // Compute shader
1079    case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
1080       *name = TGSI_SEMANTIC_THREAD_ID;
1081       break;
1082    case SYSTEM_VALUE_WORK_GROUP_ID:
1083       *name = TGSI_SEMANTIC_BLOCK_ID;
1084       break;
1085    case SYSTEM_VALUE_NUM_WORK_GROUPS:
1086       *name = TGSI_SEMANTIC_GRID_SIZE;
1087       break;
1088    case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
1089       *name = TGSI_SEMANTIC_BLOCK_SIZE;
1090       break;
1091
1092    // ARB_shader_ballot
1093    case SYSTEM_VALUE_SUBGROUP_SIZE:
1094       *name = TGSI_SEMANTIC_SUBGROUP_SIZE;
1095       break;
1096    case SYSTEM_VALUE_SUBGROUP_INVOCATION:
1097       *name = TGSI_SEMANTIC_SUBGROUP_INVOCATION;
1098       break;
1099    case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
1100       *name = TGSI_SEMANTIC_SUBGROUP_EQ_MASK;
1101       break;
1102    case SYSTEM_VALUE_SUBGROUP_GE_MASK:
1103       *name = TGSI_SEMANTIC_SUBGROUP_GE_MASK;
1104       break;
1105    case SYSTEM_VALUE_SUBGROUP_GT_MASK:
1106       *name = TGSI_SEMANTIC_SUBGROUP_GT_MASK;
1107       break;
1108    case SYSTEM_VALUE_SUBGROUP_LE_MASK:
1109       *name = TGSI_SEMANTIC_SUBGROUP_LE_MASK;
1110       break;
1111    case SYSTEM_VALUE_SUBGROUP_LT_MASK:
1112       *name = TGSI_SEMANTIC_SUBGROUP_LT_MASK;
1113       break;
1114
1115    default:
1116       ERROR("unknown system value %u\n", val);
1117       assert(false);
1118       break;
1119    }
1120 }
1121
1122 void
1123 Converter::setInterpolate(nv50_ir_varying *var,
1124                           uint8_t mode,
1125                           bool centroid,
1126                           unsigned semantic)
1127 {
1128    switch (mode) {
1129    case INTERP_MODE_FLAT:
1130       var->flat = 1;
1131       break;
1132    case INTERP_MODE_NONE:
1133       if (semantic == TGSI_SEMANTIC_COLOR)
1134          var->sc = 1;
1135       else if (semantic == TGSI_SEMANTIC_POSITION)
1136          var->linear = 1;
1137       break;
1138    case INTERP_MODE_NOPERSPECTIVE:
1139       var->linear = 1;
1140       break;
1141    case INTERP_MODE_SMOOTH:
1142       break;
1143    }
1144    var->centroid = centroid;
1145 }
1146
1147 static uint16_t
1148 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
1149           bool input, const nir_variable *var)
1150 {
1151    if (!type->is_array())
1152       return type->count_attribute_slots(false);
1153
1154    uint16_t slots;
1155    switch (stage) {
1156    case Program::TYPE_GEOMETRY:
1157       slots = type->uniform_locations();
1158       if (input)
1159          slots /= info.gs.vertices_in;
1160       break;
1161    case Program::TYPE_TESSELLATION_CONTROL:
1162    case Program::TYPE_TESSELLATION_EVAL:
1163       // remove first dimension
1164       if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
1165          slots = type->uniform_locations();
1166       else
1167          slots = type->fields.array->uniform_locations();
1168       break;
1169    default:
1170       slots = type->count_attribute_slots(false);
1171       break;
1172    }
1173
1174    return slots;
1175 }
1176
1177 bool Converter::assignSlots() {
1178    unsigned name;
1179    unsigned index;
1180
1181    info->io.viewportId = -1;
1182    info->numInputs = 0;
1183    info->numOutputs = 0;
1184
1185    // we have to fixup the uniform locations for arrays
1186    unsigned numImages = 0;
1187    nir_foreach_variable(var, &nir->uniforms) {
1188       const glsl_type *type = var->type;
1189       if (!type->without_array()->is_image())
1190          continue;
1191       var->data.driver_location = numImages;
1192       numImages += type->is_array() ? type->arrays_of_arrays_size() : 1;
1193    }
1194
1195    info->numSysVals = 0;
1196    for (uint8_t i = 0; i < SYSTEM_VALUE_MAX; ++i) {
1197       if (!(nir->info.system_values_read & 1ull << i))
1198          continue;
1199
1200       system_val_to_tgsi_semantic(i, &name, &index);
1201       info->sv[info->numSysVals].sn = name;
1202       info->sv[info->numSysVals].si = index;
1203       info->sv[info->numSysVals].input = 0; // TODO inferSysValDirection(sn);
1204
1205       switch (i) {
1206       case SYSTEM_VALUE_INSTANCE_ID:
1207          info->io.instanceId = info->numSysVals;
1208          break;
1209       case SYSTEM_VALUE_TESS_LEVEL_INNER:
1210       case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1211          info->sv[info->numSysVals].patch = 1;
1212          break;
1213       case SYSTEM_VALUE_VERTEX_ID:
1214          info->io.vertexId = info->numSysVals;
1215          break;
1216       default:
1217          break;
1218       }
1219
1220       info->numSysVals += 1;
1221    }
1222
1223    if (prog->getType() == Program::TYPE_COMPUTE)
1224       return true;
1225
1226    nir_foreach_variable(var, &nir->inputs) {
1227       const glsl_type *type = var->type;
1228       int slot = var->data.location;
1229       uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
1230       uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1231                                        : type->component_slots();
1232       uint32_t frac = var->data.location_frac;
1233       uint32_t vary = var->data.driver_location;
1234
1235       if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1236          if (comp > 2)
1237             slots *= 2;
1238       }
1239
1240       assert(vary + slots <= PIPE_MAX_SHADER_INPUTS);
1241
1242       switch(prog->getType()) {
1243       case Program::TYPE_FRAGMENT:
1244          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1245          for (uint16_t i = 0; i < slots; ++i) {
1246             setInterpolate(&info->in[vary + i], var->data.interpolation,
1247                            var->data.centroid | var->data.sample, name);
1248          }
1249          break;
1250       case Program::TYPE_GEOMETRY:
1251          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1252          break;
1253       case Program::TYPE_TESSELLATION_CONTROL:
1254       case Program::TYPE_TESSELLATION_EVAL:
1255          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1256          if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
1257             info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1258          break;
1259       case Program::TYPE_VERTEX:
1260          vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
1261          switch (name) {
1262          case TGSI_SEMANTIC_EDGEFLAG:
1263             info->io.edgeFlagIn = vary;
1264             break;
1265          default:
1266             break;
1267          }
1268          break;
1269       default:
1270          ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1271          return false;
1272       }
1273
1274       for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1275          info->in[vary].id = vary;
1276          info->in[vary].patch = var->data.patch;
1277          info->in[vary].sn = name;
1278          info->in[vary].si = index + i;
1279          if (glsl_base_type_is_64bit(type->without_array()->base_type))
1280             if (i & 0x1)
1281                info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1282             else
1283                info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1284          else
1285             info->in[vary].mask |= ((1 << comp) - 1) << frac;
1286       }
1287       info->numInputs = std::max<uint8_t>(info->numInputs, vary);
1288    }
1289
1290    nir_foreach_variable(var, &nir->outputs) {
1291       const glsl_type *type = var->type;
1292       int slot = var->data.location;
1293       uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
1294       uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1295                                        : type->component_slots();
1296       uint32_t frac = var->data.location_frac;
1297       uint32_t vary = var->data.driver_location;
1298
1299       if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1300          if (comp > 2)
1301             slots *= 2;
1302       }
1303
1304       assert(vary < PIPE_MAX_SHADER_OUTPUTS);
1305
1306       switch(prog->getType()) {
1307       case Program::TYPE_FRAGMENT:
1308          frag_result_to_tgsi_semantic((gl_frag_result)slot, &name, &index);
1309          switch (name) {
1310          case TGSI_SEMANTIC_COLOR:
1311             if (!var->data.fb_fetch_output)
1312                info->prop.fp.numColourResults++;
1313             info->prop.fp.separateFragData = true;
1314             // sometimes we get FRAG_RESULT_DATAX with data.index 0
1315             // sometimes we get FRAG_RESULT_DATA0 with data.index X
1316             index = index == 0 ? var->data.index : index;
1317             break;
1318          case TGSI_SEMANTIC_POSITION:
1319             info->io.fragDepth = vary;
1320             info->prop.fp.writesDepth = true;
1321             break;
1322          case TGSI_SEMANTIC_SAMPLEMASK:
1323             info->io.sampleMask = vary;
1324             break;
1325          default:
1326             break;
1327          }
1328          break;
1329       case Program::TYPE_GEOMETRY:
1330       case Program::TYPE_TESSELLATION_CONTROL:
1331       case Program::TYPE_TESSELLATION_EVAL:
1332       case Program::TYPE_VERTEX:
1333          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1334
1335          if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
1336              name != TGSI_SEMANTIC_TESSOUTER)
1337             info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1338
1339          switch (name) {
1340          case TGSI_SEMANTIC_CLIPDIST:
1341             info->io.genUserClip = -1;
1342             break;
1343          case TGSI_SEMANTIC_CLIPVERTEX:
1344             clipVertexOutput = vary;
1345             break;
1346          case TGSI_SEMANTIC_EDGEFLAG:
1347             info->io.edgeFlagOut = vary;
1348             break;
1349          case TGSI_SEMANTIC_POSITION:
1350             if (clipVertexOutput < 0)
1351                clipVertexOutput = vary;
1352             break;
1353          default:
1354             break;
1355          }
1356          break;
1357       default:
1358          ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1359          return false;
1360       }
1361
1362       for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1363          info->out[vary].id = vary;
1364          info->out[vary].patch = var->data.patch;
1365          info->out[vary].sn = name;
1366          info->out[vary].si = index + i;
1367          if (glsl_base_type_is_64bit(type->without_array()->base_type))
1368             if (i & 0x1)
1369                info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1370             else
1371                info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1372          else
1373             info->out[vary].mask |= ((1 << comp) - 1) << frac;
1374
1375          if (nir->info.outputs_read & 1ull << slot)
1376             info->out[vary].oread = 1;
1377       }
1378       info->numOutputs = std::max<uint8_t>(info->numOutputs, vary);
1379    }
1380
1381    if (info->io.genUserClip > 0) {
1382       info->io.clipDistances = info->io.genUserClip;
1383
1384       const unsigned int nOut = (info->io.genUserClip + 3) / 4;
1385
1386       for (unsigned int n = 0; n < nOut; ++n) {
1387          unsigned int i = info->numOutputs++;
1388          info->out[i].id = i;
1389          info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1390          info->out[i].si = n;
1391          info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
1392       }
1393    }
1394
1395    return info->assignSlots(info) == 0;
1396 }
1397
1398 uint32_t
1399 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
1400 {
1401    DataType ty;
1402    int offset = nir_intrinsic_component(insn);
1403    bool input;
1404
1405    if (nir_intrinsic_infos[insn->intrinsic].has_dest)
1406       ty = getDType(insn);
1407    else
1408       ty = getSType(insn->src[0], false, false);
1409
1410    switch (insn->intrinsic) {
1411    case nir_intrinsic_load_input:
1412    case nir_intrinsic_load_interpolated_input:
1413    case nir_intrinsic_load_per_vertex_input:
1414       input = true;
1415       break;
1416    case nir_intrinsic_load_output:
1417    case nir_intrinsic_load_per_vertex_output:
1418    case nir_intrinsic_store_output:
1419    case nir_intrinsic_store_per_vertex_output:
1420       input = false;
1421       break;
1422    default:
1423       ERROR("unknown intrinsic in getSlotAddress %s",
1424             nir_intrinsic_infos[insn->intrinsic].name);
1425       input = false;
1426       assert(false);
1427       break;
1428    }
1429
1430    if (typeSizeof(ty) == 8) {
1431       slot *= 2;
1432       slot += offset;
1433       if (slot >= 4) {
1434          idx += 1;
1435          slot -= 4;
1436       }
1437    } else {
1438       slot += offset;
1439    }
1440
1441    assert(slot < 4);
1442    assert(!input || idx < PIPE_MAX_SHADER_INPUTS);
1443    assert(input || idx < PIPE_MAX_SHADER_OUTPUTS);
1444
1445    const nv50_ir_varying *vary = input ? info->in : info->out;
1446    return vary[idx].slot[slot] * 4;
1447 }
1448
1449 Instruction *
1450 Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
1451                     uint32_t base, uint8_t c, Value *indirect0,
1452                     Value *indirect1, bool patch)
1453 {
1454    unsigned int tySize = typeSizeof(ty);
1455
1456    if (tySize == 8 &&
1457        (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) {
1458       Value *lo = getSSA();
1459       Value *hi = getSSA();
1460
1461       Instruction *loi =
1462          mkLoad(TYPE_U32, lo,
1463                 mkSymbol(file, i, TYPE_U32, base + c * tySize),
1464                 indirect0);
1465       loi->setIndirect(0, 1, indirect1);
1466       loi->perPatch = patch;
1467
1468       Instruction *hii =
1469          mkLoad(TYPE_U32, hi,
1470                 mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
1471                 indirect0);
1472       hii->setIndirect(0, 1, indirect1);
1473       hii->perPatch = patch;
1474
1475       return mkOp2(OP_MERGE, ty, def, lo, hi);
1476    } else {
1477       Instruction *ld =
1478          mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
1479       ld->setIndirect(0, 1, indirect1);
1480       ld->perPatch = patch;
1481       return ld;
1482    }
1483 }
1484
1485 void
1486 Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
1487                    DataType ty, Value *src, uint8_t idx, uint8_t c,
1488                    Value *indirect0, Value *indirect1)
1489 {
1490    uint8_t size = typeSizeof(ty);
1491    uint32_t address = getSlotAddress(insn, idx, c);
1492
1493    if (size == 8 && indirect0) {
1494       Value *split[2];
1495       mkSplit(split, 4, src);
1496
1497       if (op == OP_EXPORT) {
1498          split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
1499          split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
1500       }
1501
1502       mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
1503               split[0])->perPatch = info->out[idx].patch;
1504       mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,
1505               split[1])->perPatch = info->out[idx].patch;
1506    } else {
1507       if (op == OP_EXPORT)
1508          src = mkMov(getSSA(size), src, ty)->getDef(0);
1509       mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
1510               src)->perPatch = info->out[idx].patch;
1511    }
1512 }
1513
1514 bool
1515 Converter::parseNIR()
1516 {
1517    info->bin.tlsSpace = 0;
1518    info->io.clipDistances = nir->info.clip_distance_array_size;
1519    info->io.cullDistances = nir->info.cull_distance_array_size;
1520
1521    switch(prog->getType()) {
1522    case Program::TYPE_COMPUTE:
1523       info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
1524       info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
1525       info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
1526       info->bin.smemSize = nir->info.cs.shared_size;
1527       break;
1528    case Program::TYPE_FRAGMENT:
1529       info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
1530       info->prop.fp.persampleInvocation =
1531          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) ||
1532          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1533       info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
1534       info->prop.fp.readsSampleLocations =
1535          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1536       info->prop.fp.usesDiscard = nir->info.fs.uses_discard;
1537       info->prop.fp.usesSampleMaskIn =
1538          !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN);
1539       break;
1540    case Program::TYPE_GEOMETRY:
1541       info->prop.gp.inputPrim = nir->info.gs.input_primitive;
1542       info->prop.gp.instanceCount = nir->info.gs.invocations;
1543       info->prop.gp.maxVertices = nir->info.gs.vertices_out;
1544       info->prop.gp.outputPrim = nir->info.gs.output_primitive;
1545       break;
1546    case Program::TYPE_TESSELLATION_CONTROL:
1547    case Program::TYPE_TESSELLATION_EVAL:
1548       if (nir->info.tess.primitive_mode == GL_ISOLINES)
1549          info->prop.tp.domain = GL_LINES;
1550       else
1551          info->prop.tp.domain = nir->info.tess.primitive_mode;
1552       info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
1553       info->prop.tp.outputPrim =
1554          nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
1555       info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
1556       info->prop.tp.winding = !nir->info.tess.ccw;
1557       break;
1558    case Program::TYPE_VERTEX:
1559       info->prop.vp.usesDrawParameters =
1560          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) ||
1561          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) ||
1562          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID));
1563       break;
1564    default:
1565       break;
1566    }
1567
1568    return true;
1569 }
1570
1571 bool
1572 Converter::visit(nir_function *function)
1573 {
1574    assert(function->impl);
1575
1576    // usually the blocks will set everything up, but main is special
1577    BasicBlock *entry = new BasicBlock(prog->main);
1578    exit = new BasicBlock(prog->main);
1579    blocks[nir_start_block(function->impl)->index] = entry;
1580    prog->main->setEntry(entry);
1581    prog->main->setExit(exit);
1582
1583    setPosition(entry, true);
1584
1585    if (info->io.genUserClip > 0) {
1586       for (int c = 0; c < 4; ++c)
1587          clipVtx[c] = getScratch();
1588    }
1589
1590    switch (prog->getType()) {
1591    case Program::TYPE_TESSELLATION_CONTROL:
1592       outBase = mkOp2v(
1593          OP_SUB, TYPE_U32, getSSA(),
1594          mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
1595          mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
1596       break;
1597    case Program::TYPE_FRAGMENT: {
1598       Symbol *sv = mkSysVal(SV_POSITION, 3);
1599       fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
1600       fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
1601       break;
1602    }
1603    default:
1604       break;
1605    }
1606
1607    nir_foreach_register(reg, &function->impl->registers) {
1608       if (reg->num_array_elems) {
1609          // TODO: packed variables would be nice, but MemoryOpt fails
1610          // replace 4 with reg->num_components
1611          uint32_t size = 4 * reg->num_array_elems * (reg->bit_size / 8);
1612          regToLmemOffset[reg->index] = info->bin.tlsSpace;
1613          info->bin.tlsSpace += size;
1614       }
1615    }
1616
1617    nir_index_ssa_defs(function->impl);
1618    foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
1619       if (!visit(node))
1620          return false;
1621    }
1622
1623    bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
1624    setPosition(exit, true);
1625
1626    if ((prog->getType() == Program::TYPE_VERTEX ||
1627         prog->getType() == Program::TYPE_TESSELLATION_EVAL)
1628        && info->io.genUserClip > 0)
1629       handleUserClipPlanes();
1630
1631    // TODO: for non main function this needs to be a OP_RETURN
1632    mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1633    return true;
1634 }
1635
1636 bool
1637 Converter::visit(nir_cf_node *node)
1638 {
1639    switch (node->type) {
1640    case nir_cf_node_block:
1641       return visit(nir_cf_node_as_block(node));
1642    case nir_cf_node_if:
1643       return visit(nir_cf_node_as_if(node));
1644    case nir_cf_node_loop:
1645       return visit(nir_cf_node_as_loop(node));
1646    default:
1647       ERROR("unknown nir_cf_node type %u\n", node->type);
1648       return false;
1649    }
1650 }
1651
1652 bool
1653 Converter::visit(nir_block *block)
1654 {
1655    if (!block->predecessors->entries && block->instr_list.is_empty())
1656       return true;
1657
1658    BasicBlock *bb = convert(block);
1659
1660    setPosition(bb, true);
1661    nir_foreach_instr(insn, block) {
1662       if (!visit(insn))
1663          return false;
1664    }
1665    return true;
1666 }
1667
1668 bool
1669 Converter::visit(nir_if *nif)
1670 {
1671    DataType sType = getSType(nif->condition, false, false);
1672    Value *src = getSrc(&nif->condition, 0);
1673
1674    nir_block *lastThen = nir_if_last_then_block(nif);
1675    nir_block *lastElse = nir_if_last_else_block(nif);
1676
1677    assert(!lastThen->successors[1]);
1678    assert(!lastElse->successors[1]);
1679
1680    BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
1681    BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
1682
1683    bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
1684    bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
1685
1686    // we only insert joinats, if both nodes end up at the end of the if again.
1687    // the reason for this to not happens are breaks/continues/ret/... which
1688    // have their own handling
1689    if (lastThen->successors[0] == lastElse->successors[0])
1690       bb->joinAt = mkFlow(OP_JOINAT, convert(lastThen->successors[0]),
1691                           CC_ALWAYS, NULL);
1692
1693    mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType);
1694
1695    foreach_list_typed(nir_cf_node, node, node, &nif->then_list) {
1696       if (!visit(node))
1697          return false;
1698    }
1699    setPosition(convert(lastThen), true);
1700    if (!bb->getExit() ||
1701        !bb->getExit()->asFlow() ||
1702         bb->getExit()->asFlow()->op == OP_JOIN) {
1703       BasicBlock *tailBB = convert(lastThen->successors[0]);
1704       mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1705       bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1706    }
1707
1708    foreach_list_typed(nir_cf_node, node, node, &nif->else_list) {
1709       if (!visit(node))
1710          return false;
1711    }
1712    setPosition(convert(lastElse), true);
1713    if (!bb->getExit() ||
1714        !bb->getExit()->asFlow() ||
1715         bb->getExit()->asFlow()->op == OP_JOIN) {
1716       BasicBlock *tailBB = convert(lastElse->successors[0]);
1717       mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1718       bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1719    }
1720
1721    if (lastThen->successors[0] == lastElse->successors[0]) {
1722       setPosition(convert(lastThen->successors[0]), true);
1723       mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
1724    }
1725
1726    return true;
1727 }
1728
1729 bool
1730 Converter::visit(nir_loop *loop)
1731 {
1732    curLoopDepth += 1;
1733    func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth);
1734
1735    BasicBlock *loopBB = convert(nir_loop_first_block(loop));
1736    BasicBlock *tailBB =
1737       convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
1738    bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE);
1739
1740    mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL);
1741    setPosition(loopBB, false);
1742    mkFlow(OP_PRECONT, loopBB, CC_ALWAYS, NULL);
1743
1744    foreach_list_typed(nir_cf_node, node, node, &loop->body) {
1745       if (!visit(node))
1746          return false;
1747    }
1748    Instruction *insn = bb->getExit();
1749    if (bb->cfg.incidentCount() != 0) {
1750       if (!insn || !insn->asFlow()) {
1751          mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
1752          bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
1753       } else if (insn && insn->op == OP_BRA && !insn->getPredicate() &&
1754                  tailBB->cfg.incidentCount() == 0) {
1755          // RA doesn't like having blocks around with no incident edge,
1756          // so we create a fake one to make it happy
1757          bb->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);
1758       }
1759    }
1760
1761    curLoopDepth -= 1;
1762
1763    return true;
1764 }
1765
1766 bool
1767 Converter::visit(nir_instr *insn)
1768 {
1769    // we need an insertion point for on the fly generated immediate loads
1770    immInsertPos = bb->getExit();
1771    switch (insn->type) {
1772    case nir_instr_type_alu:
1773       return visit(nir_instr_as_alu(insn));
1774    case nir_instr_type_deref:
1775       return visit(nir_instr_as_deref(insn));
1776    case nir_instr_type_intrinsic:
1777       return visit(nir_instr_as_intrinsic(insn));
1778    case nir_instr_type_jump:
1779       return visit(nir_instr_as_jump(insn));
1780    case nir_instr_type_load_const:
1781       return visit(nir_instr_as_load_const(insn));
1782    case nir_instr_type_ssa_undef:
1783       return visit(nir_instr_as_ssa_undef(insn));
1784    case nir_instr_type_tex:
1785       return visit(nir_instr_as_tex(insn));
1786    default:
1787       ERROR("unknown nir_instr type %u\n", insn->type);
1788       return false;
1789    }
1790    return true;
1791 }
1792
1793 SVSemantic
1794 Converter::convert(nir_intrinsic_op intr)
1795 {
1796    switch (intr) {
1797    case nir_intrinsic_load_base_vertex:
1798       return SV_BASEVERTEX;
1799    case nir_intrinsic_load_base_instance:
1800       return SV_BASEINSTANCE;
1801    case nir_intrinsic_load_draw_id:
1802       return SV_DRAWID;
1803    case nir_intrinsic_load_front_face:
1804       return SV_FACE;
1805    case nir_intrinsic_load_helper_invocation:
1806       return SV_THREAD_KILL;
1807    case nir_intrinsic_load_instance_id:
1808       return SV_INSTANCE_ID;
1809    case nir_intrinsic_load_invocation_id:
1810       return SV_INVOCATION_ID;
1811    case nir_intrinsic_load_local_group_size:
1812       return SV_NTID;
1813    case nir_intrinsic_load_local_invocation_id:
1814       return SV_TID;
1815    case nir_intrinsic_load_num_work_groups:
1816       return SV_NCTAID;
1817    case nir_intrinsic_load_patch_vertices_in:
1818       return SV_VERTEX_COUNT;
1819    case nir_intrinsic_load_primitive_id:
1820       return SV_PRIMITIVE_ID;
1821    case nir_intrinsic_load_sample_id:
1822       return SV_SAMPLE_INDEX;
1823    case nir_intrinsic_load_sample_mask_in:
1824       return SV_SAMPLE_MASK;
1825    case nir_intrinsic_load_sample_pos:
1826       return SV_SAMPLE_POS;
1827    case nir_intrinsic_load_subgroup_eq_mask:
1828       return SV_LANEMASK_EQ;
1829    case nir_intrinsic_load_subgroup_ge_mask:
1830       return SV_LANEMASK_GE;
1831    case nir_intrinsic_load_subgroup_gt_mask:
1832       return SV_LANEMASK_GT;
1833    case nir_intrinsic_load_subgroup_le_mask:
1834       return SV_LANEMASK_LE;
1835    case nir_intrinsic_load_subgroup_lt_mask:
1836       return SV_LANEMASK_LT;
1837    case nir_intrinsic_load_subgroup_invocation:
1838       return SV_LANEID;
1839    case nir_intrinsic_load_tess_coord:
1840       return SV_TESS_COORD;
1841    case nir_intrinsic_load_tess_level_inner:
1842       return SV_TESS_INNER;
1843    case nir_intrinsic_load_tess_level_outer:
1844       return SV_TESS_OUTER;
1845    case nir_intrinsic_load_vertex_id:
1846       return SV_VERTEX_ID;
1847    case nir_intrinsic_load_work_group_id:
1848       return SV_CTAID;
1849    default:
1850       ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1851             nir_intrinsic_infos[intr].name);
1852       assert(false);
1853       return SV_LAST;
1854    }
1855 }
1856
1857 ImgFormat
1858 Converter::convertGLImgFormat(GLuint format)
1859 {
1860 #define FMT_CASE(a, b) \
1861   case GL_ ## a: return nv50_ir::FMT_ ## b
1862
1863    switch (format) {
1864    FMT_CASE(NONE, NONE);
1865
1866    FMT_CASE(RGBA32F, RGBA32F);
1867    FMT_CASE(RGBA16F, RGBA16F);
1868    FMT_CASE(RG32F, RG32F);
1869    FMT_CASE(RG16F, RG16F);
1870    FMT_CASE(R11F_G11F_B10F, R11G11B10F);
1871    FMT_CASE(R32F, R32F);
1872    FMT_CASE(R16F, R16F);
1873
1874    FMT_CASE(RGBA32UI, RGBA32UI);
1875    FMT_CASE(RGBA16UI, RGBA16UI);
1876    FMT_CASE(RGB10_A2UI, RGB10A2UI);
1877    FMT_CASE(RGBA8UI, RGBA8UI);
1878    FMT_CASE(RG32UI, RG32UI);
1879    FMT_CASE(RG16UI, RG16UI);
1880    FMT_CASE(RG8UI, RG8UI);
1881    FMT_CASE(R32UI, R32UI);
1882    FMT_CASE(R16UI, R16UI);
1883    FMT_CASE(R8UI, R8UI);
1884
1885    FMT_CASE(RGBA32I, RGBA32I);
1886    FMT_CASE(RGBA16I, RGBA16I);
1887    FMT_CASE(RGBA8I, RGBA8I);
1888    FMT_CASE(RG32I, RG32I);
1889    FMT_CASE(RG16I, RG16I);
1890    FMT_CASE(RG8I, RG8I);
1891    FMT_CASE(R32I, R32I);
1892    FMT_CASE(R16I, R16I);
1893    FMT_CASE(R8I, R8I);
1894
1895    FMT_CASE(RGBA16, RGBA16);
1896    FMT_CASE(RGB10_A2, RGB10A2);
1897    FMT_CASE(RGBA8, RGBA8);
1898    FMT_CASE(RG16, RG16);
1899    FMT_CASE(RG8, RG8);
1900    FMT_CASE(R16, R16);
1901    FMT_CASE(R8, R8);
1902
1903    FMT_CASE(RGBA16_SNORM, RGBA16_SNORM);
1904    FMT_CASE(RGBA8_SNORM, RGBA8_SNORM);
1905    FMT_CASE(RG16_SNORM, RG16_SNORM);
1906    FMT_CASE(RG8_SNORM, RG8_SNORM);
1907    FMT_CASE(R16_SNORM, R16_SNORM);
1908    FMT_CASE(R8_SNORM, R8_SNORM);
1909
1910    FMT_CASE(BGRA_INTEGER, BGRA8);
1911    default:
1912       ERROR("unknown format %x\n", format);
1913       assert(false);
1914       return nv50_ir::FMT_NONE;
1915    }
1916 #undef FMT_CASE
1917 }
1918
1919 bool
1920 Converter::visit(nir_intrinsic_instr *insn)
1921 {
1922    nir_intrinsic_op op = insn->intrinsic;
1923    const nir_intrinsic_info &opInfo = nir_intrinsic_infos[op];
1924
1925    switch (op) {
1926    case nir_intrinsic_load_uniform: {
1927       LValues &newDefs = convert(&insn->dest);
1928       const DataType dType = getDType(insn);
1929       Value *indirect;
1930       uint32_t coffset = getIndirect(insn, 0, 0, indirect);
1931       for (uint8_t i = 0; i < insn->num_components; ++i) {
1932          loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect);
1933       }
1934       break;
1935    }
1936    case nir_intrinsic_store_output:
1937    case nir_intrinsic_store_per_vertex_output: {
1938       Value *indirect;
1939       DataType dType = getSType(insn->src[0], false, false);
1940       uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);
1941
1942       for (uint8_t i = 0u; i < insn->num_components; ++i) {
1943          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
1944             continue;
1945
1946          uint8_t offset = 0;
1947          Value *src = getSrc(&insn->src[0], i);
1948          switch (prog->getType()) {
1949          case Program::TYPE_FRAGMENT: {
1950             if (info->out[idx].sn == TGSI_SEMANTIC_POSITION) {
1951                // TGSI uses a different interface than NIR, TGSI stores that
1952                // value in the z component, NIR in X
1953                offset += 2;
1954                src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
1955             }
1956             break;
1957          }
1958          case Program::TYPE_GEOMETRY:
1959          case Program::TYPE_VERTEX: {
1960             if (info->io.genUserClip > 0 && idx == clipVertexOutput) {
1961                mkMov(clipVtx[i], src);
1962                src = clipVtx[i];
1963             }
1964             break;
1965          }
1966          default:
1967             break;
1968          }
1969
1970          storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + offset, indirect);
1971       }
1972       break;
1973    }
1974    case nir_intrinsic_load_input:
1975    case nir_intrinsic_load_interpolated_input:
1976    case nir_intrinsic_load_output: {
1977       LValues &newDefs = convert(&insn->dest);
1978
1979       // FBFetch
1980       if (prog->getType() == Program::TYPE_FRAGMENT &&
1981           op == nir_intrinsic_load_output) {
1982          std::vector<Value*> defs, srcs;
1983          uint8_t mask = 0;
1984
1985          srcs.push_back(getSSA());
1986          srcs.push_back(getSSA());
1987          Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0));
1988          Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1));
1989          mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
1990          mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
1991
1992          srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
1993          srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
1994
1995          for (uint8_t i = 0u; i < insn->num_components; ++i) {
1996             defs.push_back(newDefs[i]);
1997             mask |= 1 << i;
1998          }
1999
2000          TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs);
2001          texi->tex.levelZero = 1;
2002          texi->tex.mask = mask;
2003          texi->tex.useOffsets = 0;
2004          texi->tex.r = 0xffff;
2005          texi->tex.s = 0xffff;
2006
2007          info->prop.fp.readsFramebuffer = true;
2008          break;
2009       }
2010
2011       const DataType dType = getDType(insn);
2012       Value *indirect;
2013       bool input = op != nir_intrinsic_load_output;
2014       operation nvirOp;
2015       uint32_t mode = 0;
2016
2017       uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect);
2018       nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx];
2019
2020       // see load_barycentric_* handling
2021       if (prog->getType() == Program::TYPE_FRAGMENT) {
2022          mode = translateInterpMode(&vary, nvirOp);
2023          if (op == nir_intrinsic_load_interpolated_input) {
2024             ImmediateValue immMode;
2025             if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode))
2026                mode |= immMode.reg.data.u32;
2027          }
2028       }
2029
2030       for (uint8_t i = 0u; i < insn->num_components; ++i) {
2031          uint32_t address = getSlotAddress(insn, idx, i);
2032          Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);
2033          if (prog->getType() == Program::TYPE_FRAGMENT) {
2034             int s = 1;
2035             if (typeSizeof(dType) == 8) {
2036                Value *lo = getSSA();
2037                Value *hi = getSSA();
2038                Instruction *interp;
2039
2040                interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
2041                if (nvirOp == OP_PINTERP)
2042                   interp->setSrc(s++, fp.position);
2043                if (mode & NV50_IR_INTERP_OFFSET)
2044                   interp->setSrc(s++, getSrc(&insn->src[0], 0));
2045                interp->setInterpolate(mode);
2046                interp->setIndirect(0, 0, indirect);
2047
2048                Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4);
2049                interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
2050                if (nvirOp == OP_PINTERP)
2051                   interp->setSrc(s++, fp.position);
2052                if (mode & NV50_IR_INTERP_OFFSET)
2053                   interp->setSrc(s++, getSrc(&insn->src[0], 0));
2054                interp->setInterpolate(mode);
2055                interp->setIndirect(0, 0, indirect);
2056
2057                mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
2058             } else {
2059                Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
2060                if (nvirOp == OP_PINTERP)
2061                   interp->setSrc(s++, fp.position);
2062                if (mode & NV50_IR_INTERP_OFFSET)
2063                   interp->setSrc(s++, getSrc(&insn->src[0], 0));
2064                interp->setInterpolate(mode);
2065                interp->setIndirect(0, 0, indirect);
2066             }
2067          } else {
2068             mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
2069          }
2070       }
2071       break;
2072    }
2073    case nir_intrinsic_load_kernel_input: {
2074       assert(prog->getType() == Program::TYPE_COMPUTE);
2075       assert(insn->num_components == 1);
2076
2077       LValues &newDefs = convert(&insn->dest);
2078       const DataType dType = getDType(insn);
2079       Value *indirect;
2080       uint32_t idx = getIndirect(insn, 0, 0, indirect, true);
2081
2082       mkLoad(dType, newDefs[0], mkSymbol(FILE_SHADER_INPUT, 0, dType, idx), indirect);
2083       break;
2084    }
2085    case nir_intrinsic_load_barycentric_at_offset:
2086    case nir_intrinsic_load_barycentric_at_sample:
2087    case nir_intrinsic_load_barycentric_centroid:
2088    case nir_intrinsic_load_barycentric_pixel:
2089    case nir_intrinsic_load_barycentric_sample: {
2090       LValues &newDefs = convert(&insn->dest);
2091       uint32_t mode;
2092
2093       if (op == nir_intrinsic_load_barycentric_centroid ||
2094           op == nir_intrinsic_load_barycentric_sample) {
2095          mode = NV50_IR_INTERP_CENTROID;
2096       } else if (op == nir_intrinsic_load_barycentric_at_offset) {
2097          Value *offs[2];
2098          for (uint8_t c = 0; c < 2; c++) {
2099             offs[c] = getScratch();
2100             mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(&insn->src[0], c), loadImm(NULL, 0.4375f));
2101             mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
2102             mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
2103             mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
2104          }
2105          mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]);
2106
2107          mode = NV50_IR_INTERP_OFFSET;
2108       } else if (op == nir_intrinsic_load_barycentric_pixel) {
2109          mode = NV50_IR_INTERP_DEFAULT;
2110       } else if (op == nir_intrinsic_load_barycentric_at_sample) {
2111          info->prop.fp.readsSampleLocations = true;
2112          mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], getSrc(&insn->src[0], 0))->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
2113          mode = NV50_IR_INTERP_OFFSET;
2114       } else {
2115          unreachable("all intrinsics already handled above");
2116       }
2117
2118       loadImm(newDefs[1], mode);
2119       break;
2120    }
2121    case nir_intrinsic_discard:
2122       mkOp(OP_DISCARD, TYPE_NONE, NULL);
2123       break;
2124    case nir_intrinsic_discard_if: {
2125       Value *pred = getSSA(1, FILE_PREDICATE);
2126       if (insn->num_components > 1) {
2127          ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
2128          assert(false);
2129          return false;
2130       }
2131       mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2132       mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
2133       break;
2134    }
2135    case nir_intrinsic_load_base_vertex:
2136    case nir_intrinsic_load_base_instance:
2137    case nir_intrinsic_load_draw_id:
2138    case nir_intrinsic_load_front_face:
2139    case nir_intrinsic_load_helper_invocation:
2140    case nir_intrinsic_load_instance_id:
2141    case nir_intrinsic_load_invocation_id:
2142    case nir_intrinsic_load_local_group_size:
2143    case nir_intrinsic_load_local_invocation_id:
2144    case nir_intrinsic_load_num_work_groups:
2145    case nir_intrinsic_load_patch_vertices_in:
2146    case nir_intrinsic_load_primitive_id:
2147    case nir_intrinsic_load_sample_id:
2148    case nir_intrinsic_load_sample_mask_in:
2149    case nir_intrinsic_load_sample_pos:
2150    case nir_intrinsic_load_subgroup_eq_mask:
2151    case nir_intrinsic_load_subgroup_ge_mask:
2152    case nir_intrinsic_load_subgroup_gt_mask:
2153    case nir_intrinsic_load_subgroup_le_mask:
2154    case nir_intrinsic_load_subgroup_lt_mask:
2155    case nir_intrinsic_load_subgroup_invocation:
2156    case nir_intrinsic_load_tess_coord:
2157    case nir_intrinsic_load_tess_level_inner:
2158    case nir_intrinsic_load_tess_level_outer:
2159    case nir_intrinsic_load_vertex_id:
2160    case nir_intrinsic_load_work_group_id: {
2161       const DataType dType = getDType(insn);
2162       SVSemantic sv = convert(op);
2163       LValues &newDefs = convert(&insn->dest);
2164
2165       for (uint8_t i = 0u; i < insn->num_components; ++i) {
2166          Value *def;
2167          if (typeSizeof(dType) == 8)
2168             def = getSSA();
2169          else
2170             def = newDefs[i];
2171
2172          if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
2173             loadImm(def, 0u);
2174          } else {
2175             Symbol *sym = mkSysVal(sv, i);
2176             Instruction *rdsv = mkOp1(OP_RDSV, TYPE_U32, def, sym);
2177             if (sv == SV_TESS_OUTER || sv == SV_TESS_INNER)
2178                rdsv->perPatch = 1;
2179          }
2180
2181          if (typeSizeof(dType) == 8)
2182             mkOp2(OP_MERGE, dType, newDefs[i], def, loadImm(getSSA(), 0u));
2183       }
2184       break;
2185    }
2186    // constants
2187    case nir_intrinsic_load_subgroup_size: {
2188       LValues &newDefs = convert(&insn->dest);
2189       loadImm(newDefs[0], 32u);
2190       break;
2191    }
2192    case nir_intrinsic_vote_all:
2193    case nir_intrinsic_vote_any:
2194    case nir_intrinsic_vote_ieq: {
2195       LValues &newDefs = convert(&insn->dest);
2196       Value *pred = getScratch(1, FILE_PREDICATE);
2197       mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2198       mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op);
2199       mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred);
2200       break;
2201    }
2202    case nir_intrinsic_ballot: {
2203       LValues &newDefs = convert(&insn->dest);
2204       Value *pred = getSSA(1, FILE_PREDICATE);
2205       mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2206       mkOp1(OP_VOTE, TYPE_U32, newDefs[0], pred)->subOp = NV50_IR_SUBOP_VOTE_ANY;
2207       break;
2208    }
2209    case nir_intrinsic_read_first_invocation:
2210    case nir_intrinsic_read_invocation: {
2211       LValues &newDefs = convert(&insn->dest);
2212       const DataType dType = getDType(insn);
2213       Value *tmp = getScratch();
2214
2215       if (op == nir_intrinsic_read_first_invocation) {
2216          mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
2217          mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2218          mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
2219       } else
2220          tmp = getSrc(&insn->src[1], 0);
2221
2222       for (uint8_t i = 0; i < insn->num_components; ++i) {
2223          mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f))
2224             ->subOp = NV50_IR_SUBOP_SHFL_IDX;
2225       }
2226       break;
2227    }
2228    case nir_intrinsic_load_per_vertex_input: {
2229       const DataType dType = getDType(insn);
2230       LValues &newDefs = convert(&insn->dest);
2231       Value *indirectVertex;
2232       Value *indirectOffset;
2233       uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
2234       uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
2235
2236       Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
2237                               mkImm(baseVertex), indirectVertex);
2238       for (uint8_t i = 0u; i < insn->num_components; ++i) {
2239          uint32_t address = getSlotAddress(insn, idx, i);
2240          loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0,
2241                   indirectOffset, vtxBase, info->in[idx].patch);
2242       }
2243       break;
2244    }
2245    case nir_intrinsic_load_per_vertex_output: {
2246       const DataType dType = getDType(insn);
2247       LValues &newDefs = convert(&insn->dest);
2248       Value *indirectVertex;
2249       Value *indirectOffset;
2250       uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
2251       uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
2252       Value *vtxBase = NULL;
2253
2254       if (indirectVertex)
2255          vtxBase = indirectVertex;
2256       else
2257          vtxBase = loadImm(NULL, baseVertex);
2258
2259       vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, vtxBase);
2260
2261       for (uint8_t i = 0u; i < insn->num_components; ++i) {
2262          uint32_t address = getSlotAddress(insn, idx, i);
2263          loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0,
2264                   indirectOffset, vtxBase, info->in[idx].patch);
2265       }
2266       break;
2267    }
2268    case nir_intrinsic_emit_vertex:
2269       if (info->io.genUserClip > 0)
2270          handleUserClipPlanes();
2271       // fallthrough
2272    case nir_intrinsic_end_primitive: {
2273       uint32_t idx = nir_intrinsic_stream_id(insn);
2274       mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
2275       break;
2276    }
2277    case nir_intrinsic_load_ubo: {
2278       const DataType dType = getDType(insn);
2279       LValues &newDefs = convert(&insn->dest);
2280       Value *indirectIndex;
2281       Value *indirectOffset;
2282       uint32_t index = getIndirect(&insn->src[0], 0, indirectIndex) + 1;
2283       uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2284
2285       for (uint8_t i = 0u; i < insn->num_components; ++i) {
2286          loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i,
2287                   indirectOffset, indirectIndex);
2288       }
2289       break;
2290    }
2291    case nir_intrinsic_get_buffer_size: {
2292       LValues &newDefs = convert(&insn->dest);
2293       const DataType dType = getDType(insn);
2294       Value *indirectBuffer;
2295       uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2296
2297       Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, 0);
2298       mkOp1(OP_BUFQ, dType, newDefs[0], sym)->setIndirect(0, 0, indirectBuffer);
2299       break;
2300    }
2301    case nir_intrinsic_store_ssbo: {
2302       DataType sType = getSType(insn->src[0], false, false);
2303       Value *indirectBuffer;
2304       Value *indirectOffset;
2305       uint32_t buffer = getIndirect(&insn->src[1], 0, indirectBuffer);
2306       uint32_t offset = getIndirect(&insn->src[2], 0, indirectOffset);
2307
2308       for (uint8_t i = 0u; i < insn->num_components; ++i) {
2309          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2310             continue;
2311          Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType,
2312                                 offset + i * typeSizeof(sType));
2313          mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i))
2314             ->setIndirect(0, 1, indirectBuffer);
2315       }
2316       info->io.globalAccess |= 0x2;
2317       break;
2318    }
2319    case nir_intrinsic_load_ssbo: {
2320       const DataType dType = getDType(insn);
2321       LValues &newDefs = convert(&insn->dest);
2322       Value *indirectBuffer;
2323       Value *indirectOffset;
2324       uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2325       uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2326
2327       for (uint8_t i = 0u; i < insn->num_components; ++i)
2328          loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i,
2329                   indirectOffset, indirectBuffer);
2330
2331       info->io.globalAccess |= 0x1;
2332       break;
2333    }
2334    case nir_intrinsic_shared_atomic_add:
2335    case nir_intrinsic_shared_atomic_and:
2336    case nir_intrinsic_shared_atomic_comp_swap:
2337    case nir_intrinsic_shared_atomic_exchange:
2338    case nir_intrinsic_shared_atomic_or:
2339    case nir_intrinsic_shared_atomic_imax:
2340    case nir_intrinsic_shared_atomic_imin:
2341    case nir_intrinsic_shared_atomic_umax:
2342    case nir_intrinsic_shared_atomic_umin:
2343    case nir_intrinsic_shared_atomic_xor: {
2344       const DataType dType = getDType(insn);
2345       LValues &newDefs = convert(&insn->dest);
2346       Value *indirectOffset;
2347       uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2348       Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, dType, offset);
2349       Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
2350       if (op == nir_intrinsic_shared_atomic_comp_swap)
2351          atom->setSrc(2, getSrc(&insn->src[2], 0));
2352       atom->setIndirect(0, 0, indirectOffset);
2353       atom->subOp = getSubOp(op);
2354       break;
2355    }
2356    case nir_intrinsic_ssbo_atomic_add:
2357    case nir_intrinsic_ssbo_atomic_and:
2358    case nir_intrinsic_ssbo_atomic_comp_swap:
2359    case nir_intrinsic_ssbo_atomic_exchange:
2360    case nir_intrinsic_ssbo_atomic_or:
2361    case nir_intrinsic_ssbo_atomic_imax:
2362    case nir_intrinsic_ssbo_atomic_imin:
2363    case nir_intrinsic_ssbo_atomic_umax:
2364    case nir_intrinsic_ssbo_atomic_umin:
2365    case nir_intrinsic_ssbo_atomic_xor: {
2366       const DataType dType = getDType(insn);
2367       LValues &newDefs = convert(&insn->dest);
2368       Value *indirectBuffer;
2369       Value *indirectOffset;
2370       uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2371       uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2372
2373       Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, offset);
2374       Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym,
2375                                 getSrc(&insn->src[2], 0));
2376       if (op == nir_intrinsic_ssbo_atomic_comp_swap)
2377          atom->setSrc(2, getSrc(&insn->src[3], 0));
2378       atom->setIndirect(0, 0, indirectOffset);
2379       atom->setIndirect(0, 1, indirectBuffer);
2380       atom->subOp = getSubOp(op);
2381
2382       info->io.globalAccess |= 0x2;
2383       break;
2384    }
2385    case nir_intrinsic_bindless_image_atomic_add:
2386    case nir_intrinsic_bindless_image_atomic_and:
2387    case nir_intrinsic_bindless_image_atomic_comp_swap:
2388    case nir_intrinsic_bindless_image_atomic_exchange:
2389    case nir_intrinsic_bindless_image_atomic_imax:
2390    case nir_intrinsic_bindless_image_atomic_umax:
2391    case nir_intrinsic_bindless_image_atomic_imin:
2392    case nir_intrinsic_bindless_image_atomic_umin:
2393    case nir_intrinsic_bindless_image_atomic_or:
2394    case nir_intrinsic_bindless_image_atomic_xor:
2395    case nir_intrinsic_bindless_image_load:
2396    case nir_intrinsic_bindless_image_samples:
2397    case nir_intrinsic_bindless_image_size:
2398    case nir_intrinsic_bindless_image_store: {
2399       std::vector<Value*> srcs, defs;
2400       Value *indirect = getSrc(&insn->src[0], 0);
2401       DataType ty;
2402
2403       uint32_t mask = 0;
2404       TexInstruction::Target target =
2405          convert(nir_intrinsic_image_dim(insn), !!nir_intrinsic_image_array(insn), false);
2406       unsigned int argCount = getNIRArgCount(target);
2407       uint16_t location = 0;
2408
2409       if (opInfo.has_dest) {
2410          LValues &newDefs = convert(&insn->dest);
2411          for (uint8_t i = 0u; i < newDefs.size(); ++i) {
2412             defs.push_back(newDefs[i]);
2413             mask |= 1 << i;
2414          }
2415       }
2416
2417       switch (op) {
2418       case nir_intrinsic_bindless_image_atomic_add:
2419       case nir_intrinsic_bindless_image_atomic_and:
2420       case nir_intrinsic_bindless_image_atomic_comp_swap:
2421       case nir_intrinsic_bindless_image_atomic_exchange:
2422       case nir_intrinsic_bindless_image_atomic_imax:
2423       case nir_intrinsic_bindless_image_atomic_umax:
2424       case nir_intrinsic_bindless_image_atomic_imin:
2425       case nir_intrinsic_bindless_image_atomic_umin:
2426       case nir_intrinsic_bindless_image_atomic_or:
2427       case nir_intrinsic_bindless_image_atomic_xor:
2428          ty = getDType(insn);
2429          mask = 0x1;
2430          info->io.globalAccess |= 0x2;
2431          break;
2432       case nir_intrinsic_bindless_image_load:
2433          ty = TYPE_U32;
2434          info->io.globalAccess |= 0x1;
2435          break;
2436       case nir_intrinsic_bindless_image_store:
2437          ty = TYPE_U32;
2438          mask = 0xf;
2439          info->io.globalAccess |= 0x2;
2440          break;
2441       case nir_intrinsic_bindless_image_samples:
2442          mask = 0x8;
2443          ty = TYPE_U32;
2444          break;
2445       case nir_intrinsic_bindless_image_size:
2446          ty = TYPE_U32;
2447          break;
2448       default:
2449          unreachable("unhandled image opcode");
2450          break;
2451       }
2452
2453       // coords
2454       if (opInfo.num_srcs >= 2)
2455          for (unsigned int i = 0u; i < argCount; ++i)
2456             srcs.push_back(getSrc(&insn->src[1], i));
2457
2458       // the sampler is just another src added after coords
2459       if (opInfo.num_srcs >= 3 && target.isMS())
2460          srcs.push_back(getSrc(&insn->src[2], 0));
2461
2462       if (opInfo.num_srcs >= 4) {
2463          unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
2464          for (uint8_t i = 0u; i < components; ++i)
2465             srcs.push_back(getSrc(&insn->src[3], i));
2466       }
2467
2468       if (opInfo.num_srcs >= 5)
2469          // 1 for aotmic swap
2470          for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
2471             srcs.push_back(getSrc(&insn->src[4], i));
2472
2473       TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
2474       texi->tex.bindless = false;
2475       texi->tex.format = &nv50_ir::TexInstruction::formatTable[convertGLImgFormat(nir_intrinsic_format(insn))];
2476       texi->tex.mask = mask;
2477       texi->tex.bindless = true;
2478       texi->cache = convert(nir_intrinsic_access(insn));
2479       texi->setType(ty);
2480       texi->subOp = getSubOp(op);
2481
2482       if (indirect)
2483          texi->setIndirectR(indirect);
2484
2485       break;
2486    }
2487    case nir_intrinsic_image_deref_atomic_add:
2488    case nir_intrinsic_image_deref_atomic_and:
2489    case nir_intrinsic_image_deref_atomic_comp_swap:
2490    case nir_intrinsic_image_deref_atomic_exchange:
2491    case nir_intrinsic_image_deref_atomic_imax:
2492    case nir_intrinsic_image_deref_atomic_umax:
2493    case nir_intrinsic_image_deref_atomic_imin:
2494    case nir_intrinsic_image_deref_atomic_umin:
2495    case nir_intrinsic_image_deref_atomic_or:
2496    case nir_intrinsic_image_deref_atomic_xor:
2497    case nir_intrinsic_image_deref_load:
2498    case nir_intrinsic_image_deref_samples:
2499    case nir_intrinsic_image_deref_size:
2500    case nir_intrinsic_image_deref_store: {
2501       const nir_variable *tex;
2502       std::vector<Value*> srcs, defs;
2503       Value *indirect;
2504       DataType ty;
2505
2506       uint32_t mask = 0;
2507       nir_deref_instr *deref = nir_src_as_deref(insn->src[0]);
2508       const glsl_type *type = deref->type;
2509       TexInstruction::Target target =
2510          convert((glsl_sampler_dim)type->sampler_dimensionality,
2511                  type->sampler_array, type->sampler_shadow);
2512       unsigned int argCount = getNIRArgCount(target);
2513       uint16_t location = handleDeref(deref, indirect, tex);
2514
2515       if (opInfo.has_dest) {
2516          LValues &newDefs = convert(&insn->dest);
2517          for (uint8_t i = 0u; i < newDefs.size(); ++i) {
2518             defs.push_back(newDefs[i]);
2519             mask |= 1 << i;
2520          }
2521       }
2522
2523       switch (op) {
2524       case nir_intrinsic_image_deref_atomic_add:
2525       case nir_intrinsic_image_deref_atomic_and:
2526       case nir_intrinsic_image_deref_atomic_comp_swap:
2527       case nir_intrinsic_image_deref_atomic_exchange:
2528       case nir_intrinsic_image_deref_atomic_imax:
2529       case nir_intrinsic_image_deref_atomic_umax:
2530       case nir_intrinsic_image_deref_atomic_imin:
2531       case nir_intrinsic_image_deref_atomic_umin:
2532       case nir_intrinsic_image_deref_atomic_or:
2533       case nir_intrinsic_image_deref_atomic_xor:
2534          ty = getDType(insn);
2535          mask = 0x1;
2536          info->io.globalAccess |= 0x2;
2537          break;
2538       case nir_intrinsic_image_deref_load:
2539          ty = TYPE_U32;
2540          info->io.globalAccess |= 0x1;
2541          break;
2542       case nir_intrinsic_image_deref_store:
2543          ty = TYPE_U32;
2544          mask = 0xf;
2545          info->io.globalAccess |= 0x2;
2546          break;
2547       case nir_intrinsic_image_deref_samples:
2548          mask = 0x8;
2549          ty = TYPE_U32;
2550          break;
2551       case nir_intrinsic_image_deref_size:
2552          ty = TYPE_U32;
2553          break;
2554       default:
2555          unreachable("unhandled image opcode");
2556          break;
2557       }
2558
2559       // coords
2560       if (opInfo.num_srcs >= 2)
2561          for (unsigned int i = 0u; i < argCount; ++i)
2562             srcs.push_back(getSrc(&insn->src[1], i));
2563
2564       // the sampler is just another src added after coords
2565       if (opInfo.num_srcs >= 3 && target.isMS())
2566          srcs.push_back(getSrc(&insn->src[2], 0));
2567
2568       if (opInfo.num_srcs >= 4) {
2569          unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
2570          for (uint8_t i = 0u; i < components; ++i)
2571             srcs.push_back(getSrc(&insn->src[3], i));
2572       }
2573
2574       if (opInfo.num_srcs >= 5)
2575          // 1 for aotmic swap
2576          for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
2577             srcs.push_back(getSrc(&insn->src[4], i));
2578
2579       TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
2580       texi->tex.bindless = false;
2581       texi->tex.format = &nv50_ir::TexInstruction::formatTable[convertGLImgFormat(tex->data.image.format)];
2582       texi->tex.mask = mask;
2583       texi->cache = getCacheModeFromVar(tex);
2584       texi->setType(ty);
2585       texi->subOp = getSubOp(op);
2586
2587       if (indirect)
2588          texi->setIndirectR(indirect);
2589
2590       break;
2591    }
2592    case nir_intrinsic_store_shared: {
2593       DataType sType = getSType(insn->src[0], false, false);
2594       Value *indirectOffset;
2595       uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2596
2597       for (uint8_t i = 0u; i < insn->num_components; ++i) {
2598          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2599             continue;
2600          Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, sType, offset + i * typeSizeof(sType));
2601          mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i));
2602       }
2603       break;
2604    }
2605    case nir_intrinsic_load_shared: {
2606       const DataType dType = getDType(insn);
2607       LValues &newDefs = convert(&insn->dest);
2608       Value *indirectOffset;
2609       uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2610
2611       for (uint8_t i = 0u; i < insn->num_components; ++i)
2612          loadFrom(FILE_MEMORY_SHARED, 0, dType, newDefs[i], offset, i, indirectOffset);
2613
2614       break;
2615    }
2616    case nir_intrinsic_barrier: {
2617       // TODO: add flag to shader_info
2618       info->numBarriers = 1;
2619       Instruction *bar = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0));
2620       bar->fixed = 1;
2621       bar->subOp = NV50_IR_SUBOP_BAR_SYNC;
2622       break;
2623    }
2624    case nir_intrinsic_group_memory_barrier:
2625    case nir_intrinsic_memory_barrier:
2626    case nir_intrinsic_memory_barrier_atomic_counter:
2627    case nir_intrinsic_memory_barrier_buffer:
2628    case nir_intrinsic_memory_barrier_image:
2629    case nir_intrinsic_memory_barrier_shared: {
2630       Instruction *bar = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
2631       bar->fixed = 1;
2632       bar->subOp = getSubOp(op);
2633       break;
2634    }
2635    case nir_intrinsic_shader_clock: {
2636       const DataType dType = getDType(insn);
2637       LValues &newDefs = convert(&insn->dest);
2638
2639       loadImm(newDefs[0], 0u);
2640       mkOp1(OP_RDSV, dType, newDefs[1], mkSysVal(SV_CLOCK, 0))->fixed = 1;
2641       break;
2642    }
2643    case nir_intrinsic_load_global: {
2644       const DataType dType = getDType(insn);
2645       LValues &newDefs = convert(&insn->dest);
2646       Value *indirectOffset;
2647       uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2648
2649       for (auto i = 0u; i < insn->num_components; ++i)
2650          loadFrom(FILE_MEMORY_GLOBAL, 0, dType, newDefs[i], offset, i, indirectOffset);
2651
2652       info->io.globalAccess |= 0x1;
2653       break;
2654    }
2655    case nir_intrinsic_store_global: {
2656       DataType sType = getSType(insn->src[0], false, false);
2657
2658       for (auto i = 0u; i < insn->num_components; ++i) {
2659          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2660             continue;
2661          if (typeSizeof(sType) == 8) {
2662             Value *split[2];
2663             mkSplit(split, 4, getSrc(&insn->src[0], i));
2664
2665             Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, i * typeSizeof(sType));
2666             mkStore(OP_STORE, TYPE_U32, sym, getSrc(&insn->src[1], 0), split[0]);
2667
2668             sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, i * typeSizeof(sType) + 4);
2669             mkStore(OP_STORE, TYPE_U32, sym, getSrc(&insn->src[1], 0), split[1]);
2670          } else {
2671             Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, sType, i * typeSizeof(sType));
2672             mkStore(OP_STORE, sType, sym, getSrc(&insn->src[1], 0), getSrc(&insn->src[0], i));
2673          }
2674       }
2675
2676       info->io.globalAccess |= 0x2;
2677       break;
2678    }
2679    default:
2680       ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
2681       return false;
2682    }
2683
2684    return true;
2685 }
2686
2687 bool
2688 Converter::visit(nir_jump_instr *insn)
2689 {
2690    switch (insn->type) {
2691    case nir_jump_return:
2692       // TODO: this only works in the main function
2693       mkFlow(OP_BRA, exit, CC_ALWAYS, NULL);
2694       bb->cfg.attach(&exit->cfg, Graph::Edge::CROSS);
2695       break;
2696    case nir_jump_break:
2697    case nir_jump_continue: {
2698       bool isBreak = insn->type == nir_jump_break;
2699       nir_block *block = insn->instr.block;
2700       assert(!block->successors[1]);
2701       BasicBlock *target = convert(block->successors[0]);
2702       mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL);
2703       bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK);
2704       break;
2705    }
2706    default:
2707       ERROR("unknown nir_jump_type %u\n", insn->type);
2708       return false;
2709    }
2710
2711    return true;
2712 }
2713
2714 Value*
2715 Converter::convert(nir_load_const_instr *insn, uint8_t idx)
2716 {
2717    Value *val;
2718
2719    if (immInsertPos)
2720       setPosition(immInsertPos, true);
2721    else
2722       setPosition(bb, false);
2723
2724    switch (insn->def.bit_size) {
2725    case 64:
2726       val = loadImm(getSSA(8), insn->value[idx].u64);
2727       break;
2728    case 32:
2729       val = loadImm(getSSA(4), insn->value[idx].u32);
2730       break;
2731    case 16:
2732       val = loadImm(getSSA(2), insn->value[idx].u16);
2733       break;
2734    case 8:
2735       val = loadImm(getSSA(1), insn->value[idx].u8);
2736       break;
2737    default:
2738       unreachable("unhandled bit size!\n");
2739    }
2740    setPosition(bb, true);
2741    return val;
2742 }
2743
2744 bool
2745 Converter::visit(nir_load_const_instr *insn)
2746 {
2747    assert(insn->def.bit_size <= 64);
2748    immediates[insn->def.index] = insn;
2749    return true;
2750 }
2751
2752 #define DEFAULT_CHECKS \
2753       if (insn->dest.dest.ssa.num_components > 1) { \
2754          ERROR("nir_alu_instr only supported with 1 component!\n"); \
2755          return false; \
2756       } \
2757       if (insn->dest.write_mask != 1) { \
2758          ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
2759          return false; \
2760       }
2761 bool
2762 Converter::visit(nir_alu_instr *insn)
2763 {
2764    const nir_op op = insn->op;
2765    const nir_op_info &info = nir_op_infos[op];
2766    DataType dType = getDType(insn);
2767    const std::vector<DataType> sTypes = getSTypes(insn);
2768
2769    Instruction *oldPos = this->bb->getExit();
2770
2771    switch (op) {
2772    case nir_op_fabs:
2773    case nir_op_iabs:
2774    case nir_op_fadd:
2775    case nir_op_iadd:
2776    case nir_op_iand:
2777    case nir_op_fceil:
2778    case nir_op_fcos:
2779    case nir_op_fddx:
2780    case nir_op_fddx_coarse:
2781    case nir_op_fddx_fine:
2782    case nir_op_fddy:
2783    case nir_op_fddy_coarse:
2784    case nir_op_fddy_fine:
2785    case nir_op_fdiv:
2786    case nir_op_idiv:
2787    case nir_op_udiv:
2788    case nir_op_fexp2:
2789    case nir_op_ffloor:
2790    case nir_op_ffma:
2791    case nir_op_flog2:
2792    case nir_op_fmax:
2793    case nir_op_imax:
2794    case nir_op_umax:
2795    case nir_op_fmin:
2796    case nir_op_imin:
2797    case nir_op_umin:
2798    case nir_op_fmod:
2799    case nir_op_imod:
2800    case nir_op_umod:
2801    case nir_op_fmul:
2802    case nir_op_imul:
2803    case nir_op_imul_high:
2804    case nir_op_umul_high:
2805    case nir_op_fneg:
2806    case nir_op_ineg:
2807    case nir_op_inot:
2808    case nir_op_ior:
2809    case nir_op_pack_64_2x32_split:
2810    case nir_op_fpow:
2811    case nir_op_frcp:
2812    case nir_op_frem:
2813    case nir_op_irem:
2814    case nir_op_frsq:
2815    case nir_op_fsat:
2816    case nir_op_ishr:
2817    case nir_op_ushr:
2818    case nir_op_fsin:
2819    case nir_op_fsqrt:
2820    case nir_op_fsub:
2821    case nir_op_isub:
2822    case nir_op_ftrunc:
2823    case nir_op_ishl:
2824    case nir_op_ixor: {
2825       DEFAULT_CHECKS;
2826       LValues &newDefs = convert(&insn->dest);
2827       operation preOp = preOperationNeeded(op);
2828       if (preOp != OP_NOP) {
2829          assert(info.num_inputs < 2);
2830          Value *tmp = getSSA(typeSizeof(dType));
2831          Instruction *i0 = mkOp(preOp, dType, tmp);
2832          Instruction *i1 = mkOp(getOperation(op), dType, newDefs[0]);
2833          if (info.num_inputs) {
2834             i0->setSrc(0, getSrc(&insn->src[0]));
2835             i1->setSrc(0, tmp);
2836          }
2837          i1->subOp = getSubOp(op);
2838       } else {
2839          Instruction *i = mkOp(getOperation(op), dType, newDefs[0]);
2840          for (unsigned s = 0u; s < info.num_inputs; ++s) {
2841             i->setSrc(s, getSrc(&insn->src[s]));
2842          }
2843          i->subOp = getSubOp(op);
2844       }
2845       break;
2846    }
2847    case nir_op_ifind_msb:
2848    case nir_op_ufind_msb: {
2849       DEFAULT_CHECKS;
2850       LValues &newDefs = convert(&insn->dest);
2851       dType = sTypes[0];
2852       mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2853       break;
2854    }
2855    case nir_op_fround_even: {
2856       DEFAULT_CHECKS;
2857       LValues &newDefs = convert(&insn->dest);
2858       mkCvt(OP_CVT, dType, newDefs[0], dType, getSrc(&insn->src[0]))->rnd = ROUND_NI;
2859       break;
2860    }
2861    // convert instructions
2862    case nir_op_f2f32:
2863    case nir_op_f2i32:
2864    case nir_op_f2u32:
2865    case nir_op_i2f32:
2866    case nir_op_i2i32:
2867    case nir_op_u2f32:
2868    case nir_op_u2u32:
2869    case nir_op_f2f64:
2870    case nir_op_f2i64:
2871    case nir_op_f2u64:
2872    case nir_op_i2f64:
2873    case nir_op_i2i64:
2874    case nir_op_u2f64:
2875    case nir_op_u2u64: {
2876       DEFAULT_CHECKS;
2877       LValues &newDefs = convert(&insn->dest);
2878       Instruction *i = mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2879       if (op == nir_op_f2i32 || op == nir_op_f2i64 || op == nir_op_f2u32 || op == nir_op_f2u64)
2880          i->rnd = ROUND_Z;
2881       i->sType = sTypes[0];
2882       break;
2883    }
2884    // compare instructions
2885    case nir_op_feq32:
2886    case nir_op_ieq32:
2887    case nir_op_fge32:
2888    case nir_op_ige32:
2889    case nir_op_uge32:
2890    case nir_op_flt32:
2891    case nir_op_ilt32:
2892    case nir_op_ult32:
2893    case nir_op_fne32:
2894    case nir_op_ine32: {
2895       DEFAULT_CHECKS;
2896       LValues &newDefs = convert(&insn->dest);
2897       Instruction *i = mkCmp(getOperation(op),
2898                              getCondCode(op),
2899                              dType,
2900                              newDefs[0],
2901                              dType,
2902                              getSrc(&insn->src[0]),
2903                              getSrc(&insn->src[1]));
2904       if (info.num_inputs == 3)
2905          i->setSrc(2, getSrc(&insn->src[2]));
2906       i->sType = sTypes[0];
2907       break;
2908    }
2909    // those are weird ALU ops and need special handling, because
2910    //   1. they are always componend based
2911    //   2. they basically just merge multiple values into one data type
2912    case nir_op_mov:
2913       if (!insn->dest.dest.is_ssa && insn->dest.dest.reg.reg->num_array_elems) {
2914          nir_reg_dest& reg = insn->dest.dest.reg;
2915          uint32_t goffset = regToLmemOffset[reg.reg->index];
2916          uint8_t comps = reg.reg->num_components;
2917          uint8_t size = reg.reg->bit_size / 8;
2918          uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2919          uint32_t aoffset = csize * reg.base_offset;
2920          Value *indirect = NULL;
2921
2922          if (reg.indirect)
2923             indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS),
2924                               getSrc(reg.indirect, 0), mkImm(csize));
2925
2926          for (uint8_t i = 0u; i < comps; ++i) {
2927             if (!((1u << i) & insn->dest.write_mask))
2928                continue;
2929
2930             Symbol *sym = mkSymbol(FILE_MEMORY_LOCAL, 0, dType, goffset + aoffset + i * size);
2931             mkStore(OP_STORE, dType, sym, indirect, getSrc(&insn->src[0], i));
2932          }
2933          break;
2934       } else if (!insn->src[0].src.is_ssa && insn->src[0].src.reg.reg->num_array_elems) {
2935          LValues &newDefs = convert(&insn->dest);
2936          nir_reg_src& reg = insn->src[0].src.reg;
2937          uint32_t goffset = regToLmemOffset[reg.reg->index];
2938          // uint8_t comps = reg.reg->num_components;
2939          uint8_t size = reg.reg->bit_size / 8;
2940          uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2941          uint32_t aoffset = csize * reg.base_offset;
2942          Value *indirect = NULL;
2943
2944          if (reg.indirect)
2945             indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS), getSrc(reg.indirect, 0), mkImm(csize));
2946
2947          for (uint8_t i = 0u; i < newDefs.size(); ++i)
2948             loadFrom(FILE_MEMORY_LOCAL, 0, dType, newDefs[i], goffset + aoffset, i, indirect);
2949
2950          break;
2951       } else {
2952          LValues &newDefs = convert(&insn->dest);
2953          for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2954             mkMov(newDefs[c], getSrc(&insn->src[0], c), dType);
2955          }
2956       }
2957       break;
2958    case nir_op_vec2:
2959    case nir_op_vec3:
2960    case nir_op_vec4: {
2961       LValues &newDefs = convert(&insn->dest);
2962       for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2963          mkMov(newDefs[c], getSrc(&insn->src[c]), dType);
2964       }
2965       break;
2966    }
2967    // (un)pack
2968    case nir_op_pack_64_2x32: {
2969       LValues &newDefs = convert(&insn->dest);
2970       Instruction *merge = mkOp(OP_MERGE, dType, newDefs[0]);
2971       merge->setSrc(0, getSrc(&insn->src[0], 0));
2972       merge->setSrc(1, getSrc(&insn->src[0], 1));
2973       break;
2974    }
2975    case nir_op_pack_half_2x16_split: {
2976       LValues &newDefs = convert(&insn->dest);
2977       Value *tmpH = getSSA();
2978       Value *tmpL = getSSA();
2979
2980       mkCvt(OP_CVT, TYPE_F16, tmpL, TYPE_F32, getSrc(&insn->src[0]));
2981       mkCvt(OP_CVT, TYPE_F16, tmpH, TYPE_F32, getSrc(&insn->src[1]));
2982       mkOp3(OP_INSBF, TYPE_U32, newDefs[0], tmpH, mkImm(0x1010), tmpL);
2983       break;
2984    }
2985    case nir_op_unpack_half_2x16_split_x:
2986    case nir_op_unpack_half_2x16_split_y: {
2987       LValues &newDefs = convert(&insn->dest);
2988       Instruction *cvt = mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, getSrc(&insn->src[0]));
2989       if (op == nir_op_unpack_half_2x16_split_y)
2990          cvt->subOp = 1;
2991       break;
2992    }
2993    case nir_op_unpack_64_2x32: {
2994       LValues &newDefs = convert(&insn->dest);
2995       mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, newDefs[1]);
2996       break;
2997    }
2998    case nir_op_unpack_64_2x32_split_x: {
2999       LValues &newDefs = convert(&insn->dest);
3000       mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, getSSA());
3001       break;
3002    }
3003    case nir_op_unpack_64_2x32_split_y: {
3004       LValues &newDefs = convert(&insn->dest);
3005       mkOp1(OP_SPLIT, dType, getSSA(), getSrc(&insn->src[0]))->setDef(1, newDefs[0]);
3006       break;
3007    }
3008    // special instructions
3009    case nir_op_fsign:
3010    case nir_op_isign: {
3011       DEFAULT_CHECKS;
3012       DataType iType;
3013       if (::isFloatType(dType))
3014          iType = TYPE_F32;
3015       else
3016          iType = TYPE_S32;
3017
3018       LValues &newDefs = convert(&insn->dest);
3019       LValue *val0 = getScratch();
3020       LValue *val1 = getScratch();
3021       mkCmp(OP_SET, CC_GT, iType, val0, dType, getSrc(&insn->src[0]), zero);
3022       mkCmp(OP_SET, CC_LT, iType, val1, dType, getSrc(&insn->src[0]), zero);
3023
3024       if (dType == TYPE_F64) {
3025          mkOp2(OP_SUB, iType, val0, val0, val1);
3026          mkCvt(OP_CVT, TYPE_F64, newDefs[0], iType, val0);
3027       } else if (dType == TYPE_S64 || dType == TYPE_U64) {
3028          mkOp2(OP_SUB, iType, val0, val1, val0);
3029          mkOp2(OP_SHR, iType, val1, val0, loadImm(NULL, 31));
3030          mkOp2(OP_MERGE, dType, newDefs[0], val0, val1);
3031       } else if (::isFloatType(dType))
3032          mkOp2(OP_SUB, iType, newDefs[0], val0, val1);
3033       else
3034          mkOp2(OP_SUB, iType, newDefs[0], val1, val0);
3035       break;
3036    }
3037    case nir_op_fcsel:
3038    case nir_op_b32csel: {
3039       DEFAULT_CHECKS;
3040       LValues &newDefs = convert(&insn->dest);
3041       mkCmp(OP_SLCT, CC_NE, dType, newDefs[0], sTypes[0], getSrc(&insn->src[1]), getSrc(&insn->src[2]), getSrc(&insn->src[0]));
3042       break;
3043    }
3044    case nir_op_ibitfield_extract:
3045    case nir_op_ubitfield_extract: {
3046       DEFAULT_CHECKS;
3047       Value *tmp = getSSA();
3048       LValues &newDefs = convert(&insn->dest);
3049       mkOp3(OP_INSBF, dType, tmp, getSrc(&insn->src[2]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
3050       mkOp2(OP_EXTBF, dType, newDefs[0], getSrc(&insn->src[0]), tmp);
3051       break;
3052    }
3053    case nir_op_bfm: {
3054       DEFAULT_CHECKS;
3055       LValues &newDefs = convert(&insn->dest);
3056       mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
3057       break;
3058    }
3059    case nir_op_bitfield_insert: {
3060       DEFAULT_CHECKS;
3061       LValues &newDefs = convert(&insn->dest);
3062       LValue *temp = getSSA();
3063       mkOp3(OP_INSBF, TYPE_U32, temp, getSrc(&insn->src[3]), mkImm(0x808), getSrc(&insn->src[2]));
3064       mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[1]), temp, getSrc(&insn->src[0]));
3065       break;
3066    }
3067    case nir_op_bit_count: {
3068       DEFAULT_CHECKS;
3069       LValues &newDefs = convert(&insn->dest);
3070       mkOp2(OP_POPCNT, dType, newDefs[0], getSrc(&insn->src[0]), getSrc(&insn->src[0]));
3071       break;
3072    }
3073    case nir_op_bitfield_reverse: {
3074       DEFAULT_CHECKS;
3075       LValues &newDefs = convert(&insn->dest);
3076       mkOp2(OP_EXTBF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
3077       break;
3078    }
3079    case nir_op_find_lsb: {
3080       DEFAULT_CHECKS;
3081       LValues &newDefs = convert(&insn->dest);
3082       Value *tmp = getSSA();
3083       mkOp2(OP_EXTBF, TYPE_U32, tmp, getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
3084       mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
3085       break;
3086    }
3087    // boolean conversions
3088    case nir_op_b2f32: {
3089       DEFAULT_CHECKS;
3090       LValues &newDefs = convert(&insn->dest);
3091       mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1.0f));
3092       break;
3093    }
3094    case nir_op_b2f64: {
3095       DEFAULT_CHECKS;
3096       LValues &newDefs = convert(&insn->dest);
3097       Value *tmp = getSSA(4);
3098       mkOp2(OP_AND, TYPE_U32, tmp, getSrc(&insn->src[0]), loadImm(NULL, 0x3ff00000));
3099       mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
3100       break;
3101    }
3102    case nir_op_f2b32:
3103    case nir_op_i2b32: {
3104       DEFAULT_CHECKS;
3105       LValues &newDefs = convert(&insn->dest);
3106       Value *src1;
3107       if (typeSizeof(sTypes[0]) == 8) {
3108          src1 = loadImm(getSSA(8), 0.0);
3109       } else {
3110          src1 = zero;
3111       }
3112       CondCode cc = op == nir_op_f2b32 ? CC_NEU : CC_NE;
3113       mkCmp(OP_SET, cc, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), src1);
3114       break;
3115    }
3116    case nir_op_b2i32: {
3117       DEFAULT_CHECKS;
3118       LValues &newDefs = convert(&insn->dest);
3119       mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1));
3120       break;
3121    }
3122    case nir_op_b2i64: {
3123       DEFAULT_CHECKS;
3124       LValues &newDefs = convert(&insn->dest);
3125       LValue *def = getScratch();
3126       mkOp2(OP_AND, TYPE_U32, def, getSrc(&insn->src[0]), loadImm(NULL, 1));
3127       mkOp2(OP_MERGE, TYPE_S64, newDefs[0], def, loadImm(NULL, 0));
3128       break;
3129    }
3130    default:
3131       ERROR("unknown nir_op %s\n", info.name);
3132       return false;
3133    }
3134
3135    if (!oldPos) {
3136       oldPos = this->bb->getEntry();
3137       oldPos->precise = insn->exact;
3138    }
3139
3140    if (unlikely(!oldPos))
3141       return true;
3142
3143    while (oldPos->next) {
3144       oldPos = oldPos->next;
3145       oldPos->precise = insn->exact;
3146    }
3147    oldPos->saturate = insn->dest.saturate;
3148
3149    return true;
3150 }
3151 #undef DEFAULT_CHECKS
3152
3153 bool
3154 Converter::visit(nir_ssa_undef_instr *insn)
3155 {
3156    LValues &newDefs = convert(&insn->def);
3157    for (uint8_t i = 0u; i < insn->def.num_components; ++i) {
3158       mkOp(OP_NOP, TYPE_NONE, newDefs[i]);
3159    }
3160    return true;
3161 }
3162
3163 #define CASE_SAMPLER(ty) \
3164    case GLSL_SAMPLER_DIM_ ## ty : \
3165       if (isArray && !isShadow) \
3166          return TEX_TARGET_ ## ty ## _ARRAY; \
3167       else if (!isArray && isShadow) \
3168          return TEX_TARGET_## ty ## _SHADOW; \
3169       else if (isArray && isShadow) \
3170          return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
3171       else \
3172          return TEX_TARGET_ ## ty
3173
3174 TexTarget
3175 Converter::convert(glsl_sampler_dim dim, bool isArray, bool isShadow)
3176 {
3177    switch (dim) {
3178    CASE_SAMPLER(1D);
3179    CASE_SAMPLER(2D);
3180    CASE_SAMPLER(CUBE);
3181    case GLSL_SAMPLER_DIM_3D:
3182       return TEX_TARGET_3D;
3183    case GLSL_SAMPLER_DIM_MS:
3184       if (isArray)
3185          return TEX_TARGET_2D_MS_ARRAY;
3186       return TEX_TARGET_2D_MS;
3187    case GLSL_SAMPLER_DIM_RECT:
3188       if (isShadow)
3189          return TEX_TARGET_RECT_SHADOW;
3190       return TEX_TARGET_RECT;
3191    case GLSL_SAMPLER_DIM_BUF:
3192       return TEX_TARGET_BUFFER;
3193    case GLSL_SAMPLER_DIM_EXTERNAL:
3194       return TEX_TARGET_2D;
3195    default:
3196       ERROR("unknown glsl_sampler_dim %u\n", dim);
3197       assert(false);
3198       return TEX_TARGET_COUNT;
3199    }
3200 }
3201 #undef CASE_SAMPLER
3202
3203 Value*
3204 Converter::applyProjection(Value *src, Value *proj)
3205 {
3206    if (!proj)
3207       return src;
3208    return mkOp2v(OP_MUL, TYPE_F32, getScratch(), src, proj);
3209 }
3210
3211 unsigned int
3212 Converter::getNIRArgCount(TexInstruction::Target& target)
3213 {
3214    unsigned int result = target.getArgCount();
3215    if (target.isCube() && target.isArray())
3216       result--;
3217    if (target.isMS())
3218       result--;
3219    return result;
3220 }
3221
3222 uint16_t
3223 Converter::handleDeref(nir_deref_instr *deref, Value * &indirect, const nir_variable * &tex)
3224 {
3225    typedef std::pair<uint32_t,Value*> DerefPair;
3226    std::list<DerefPair> derefs;
3227
3228    uint16_t result = 0;
3229    while (deref->deref_type != nir_deref_type_var) {
3230       switch (deref->deref_type) {
3231       case nir_deref_type_array: {
3232          Value *indirect;
3233          uint8_t size = type_size(deref->type, true);
3234          result += size * getIndirect(&deref->arr.index, 0, indirect);
3235
3236          if (indirect) {
3237             derefs.push_front(std::make_pair(size, indirect));
3238          }
3239
3240          break;
3241       }
3242       case nir_deref_type_struct: {
3243          result += nir_deref_instr_parent(deref)->type->struct_location_offset(deref->strct.index);
3244          break;
3245       }
3246       case nir_deref_type_var:
3247       default:
3248          unreachable("nir_deref_type_var reached in handleDeref!");
3249          break;
3250       }
3251       deref = nir_deref_instr_parent(deref);
3252    }
3253
3254    indirect = NULL;
3255    for (std::list<DerefPair>::const_iterator it = derefs.begin(); it != derefs.end(); ++it) {
3256       Value *offset = mkOp2v(OP_MUL, TYPE_U32, getSSA(), loadImm(getSSA(), it->first), it->second);
3257       if (indirect)
3258          indirect = mkOp2v(OP_ADD, TYPE_U32, getSSA(), indirect, offset);
3259       else
3260          indirect = offset;
3261    }
3262
3263    tex = nir_deref_instr_get_variable(deref);
3264    assert(tex);
3265
3266    return result + tex->data.driver_location;
3267 }
3268
3269 CacheMode
3270 Converter::convert(enum gl_access_qualifier access)
3271 {
3272    switch (access) {
3273    case ACCESS_VOLATILE:
3274       return CACHE_CV;
3275    case ACCESS_COHERENT:
3276       return CACHE_CG;
3277    default:
3278       return CACHE_CA;
3279    }
3280 }
3281
3282 CacheMode
3283 Converter::getCacheModeFromVar(const nir_variable *var)
3284 {
3285    return convert(var->data.image.access);
3286 }
3287
3288 bool
3289 Converter::visit(nir_tex_instr *insn)
3290 {
3291    switch (insn->op) {
3292    case nir_texop_lod:
3293    case nir_texop_query_levels:
3294    case nir_texop_tex:
3295    case nir_texop_texture_samples:
3296    case nir_texop_tg4:
3297    case nir_texop_txb:
3298    case nir_texop_txd:
3299    case nir_texop_txf:
3300    case nir_texop_txf_ms:
3301    case nir_texop_txl:
3302    case nir_texop_txs: {
3303       LValues &newDefs = convert(&insn->dest);
3304       std::vector<Value*> srcs;
3305       std::vector<Value*> defs;
3306       std::vector<nir_src*> offsets;
3307       uint8_t mask = 0;
3308       bool lz = false;
3309       Value *proj = NULL;
3310       TexInstruction::Target target = convert(insn->sampler_dim, insn->is_array, insn->is_shadow);
3311       operation op = getOperation(insn->op);
3312
3313       int r, s;
3314       int biasIdx = nir_tex_instr_src_index(insn, nir_tex_src_bias);
3315       int compIdx = nir_tex_instr_src_index(insn, nir_tex_src_comparator);
3316       int coordsIdx = nir_tex_instr_src_index(insn, nir_tex_src_coord);
3317       int ddxIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddx);
3318       int ddyIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddy);
3319       int msIdx = nir_tex_instr_src_index(insn, nir_tex_src_ms_index);
3320       int lodIdx = nir_tex_instr_src_index(insn, nir_tex_src_lod);
3321       int offsetIdx = nir_tex_instr_src_index(insn, nir_tex_src_offset);
3322       int projIdx = nir_tex_instr_src_index(insn, nir_tex_src_projector);
3323       int sampOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_offset);
3324       int texOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_offset);
3325       int sampHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_handle);
3326       int texHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_handle);
3327
3328       bool bindless = sampHandleIdx != -1 || texHandleIdx != -1;
3329       assert((sampHandleIdx != -1) == (texHandleIdx != -1));
3330
3331       if (projIdx != -1)
3332          proj = mkOp1v(OP_RCP, TYPE_F32, getScratch(), getSrc(&insn->src[projIdx].src, 0));
3333
3334       srcs.resize(insn->coord_components);
3335       for (uint8_t i = 0u; i < insn->coord_components; ++i)
3336          srcs[i] = applyProjection(getSrc(&insn->src[coordsIdx].src, i), proj);
3337
3338       // sometimes we get less args than target.getArgCount, but codegen expects the latter
3339       if (insn->coord_components) {
3340          uint32_t argCount = target.getArgCount();
3341
3342          if (target.isMS())
3343             argCount -= 1;
3344
3345          for (uint32_t i = 0u; i < (argCount - insn->coord_components); ++i)
3346             srcs.push_back(getSSA());
3347       }
3348
3349       if (insn->op == nir_texop_texture_samples)
3350          srcs.push_back(zero);
3351       else if (!insn->num_srcs)
3352          srcs.push_back(loadImm(NULL, 0));
3353       if (biasIdx != -1)
3354          srcs.push_back(getSrc(&insn->src[biasIdx].src, 0));
3355       if (lodIdx != -1)
3356          srcs.push_back(getSrc(&insn->src[lodIdx].src, 0));
3357       else if (op == OP_TXF)
3358          lz = true;
3359       if (msIdx != -1)
3360          srcs.push_back(getSrc(&insn->src[msIdx].src, 0));
3361       if (offsetIdx != -1)
3362          offsets.push_back(&insn->src[offsetIdx].src);
3363       if (compIdx != -1)
3364          srcs.push_back(applyProjection(getSrc(&insn->src[compIdx].src, 0), proj));
3365       if (texOffIdx != -1) {
3366          srcs.push_back(getSrc(&insn->src[texOffIdx].src, 0));
3367          texOffIdx = srcs.size() - 1;
3368       }
3369       if (sampOffIdx != -1) {
3370          srcs.push_back(getSrc(&insn->src[sampOffIdx].src, 0));
3371          sampOffIdx = srcs.size() - 1;
3372       }
3373       if (bindless) {
3374          // currently we use the lower bits
3375          Value *split[2];
3376          Value *handle = getSrc(&insn->src[sampHandleIdx].src, 0);
3377
3378          mkSplit(split, 4, handle);
3379
3380          srcs.push_back(split[0]);
3381          texOffIdx = srcs.size() - 1;
3382       }
3383
3384       r = bindless ? 0xff : insn->texture_index;
3385       s = bindless ? 0x1f : insn->sampler_index;
3386
3387       defs.resize(newDefs.size());
3388       for (uint8_t d = 0u; d < newDefs.size(); ++d) {
3389          defs[d] = newDefs[d];
3390          mask |= 1 << d;
3391       }
3392       if (target.isMS() || (op == OP_TEX && prog->getType() != Program::TYPE_FRAGMENT))
3393          lz = true;
3394
3395       TexInstruction *texi = mkTex(op, target.getEnum(), r, s, defs, srcs);
3396       texi->tex.levelZero = lz;
3397       texi->tex.mask = mask;
3398       texi->tex.bindless = bindless;
3399
3400       if (texOffIdx != -1)
3401          texi->tex.rIndirectSrc = texOffIdx;
3402       if (sampOffIdx != -1)
3403          texi->tex.sIndirectSrc = sampOffIdx;
3404
3405       switch (insn->op) {
3406       case nir_texop_tg4:
3407          if (!target.isShadow())
3408             texi->tex.gatherComp = insn->component;
3409          break;
3410       case nir_texop_txs:
3411          texi->tex.query = TXQ_DIMS;
3412          break;
3413       case nir_texop_texture_samples:
3414          texi->tex.mask = 0x4;
3415          texi->tex.query = TXQ_TYPE;
3416          break;
3417       case nir_texop_query_levels:
3418          texi->tex.mask = 0x8;
3419          texi->tex.query = TXQ_DIMS;
3420          break;
3421       default:
3422          break;
3423       }
3424
3425       texi->tex.useOffsets = offsets.size();
3426       if (texi->tex.useOffsets) {
3427          for (uint8_t s = 0; s < texi->tex.useOffsets; ++s) {
3428             for (uint32_t c = 0u; c < 3; ++c) {
3429                uint8_t s2 = std::min(c, target.getDim() - 1);
3430                texi->offset[s][c].set(getSrc(offsets[s], s2));
3431                texi->offset[s][c].setInsn(texi);
3432             }
3433          }
3434       }
3435
3436       if (op == OP_TXG && offsetIdx == -1) {
3437          if (nir_tex_instr_has_explicit_tg4_offsets(insn)) {
3438             texi->tex.useOffsets = 4;
3439             setPosition(texi, false);
3440             for (uint8_t i = 0; i < 4; ++i) {
3441                for (uint8_t j = 0; j < 2; ++j) {
3442                   texi->offset[i][j].set(loadImm(NULL, insn->tg4_offsets[i][j]));
3443                   texi->offset[i][j].setInsn(texi);
3444                }
3445             }
3446             setPosition(texi, true);
3447          }
3448       }
3449
3450       if (ddxIdx != -1 && ddyIdx != -1) {
3451          for (uint8_t c = 0u; c < target.getDim() + target.isCube(); ++c) {
3452             texi->dPdx[c].set(getSrc(&insn->src[ddxIdx].src, c));
3453             texi->dPdy[c].set(getSrc(&insn->src[ddyIdx].src, c));
3454          }
3455       }
3456
3457       break;
3458    }
3459    default:
3460       ERROR("unknown nir_texop %u\n", insn->op);
3461       return false;
3462    }
3463    return true;
3464 }
3465
3466 bool
3467 Converter::visit(nir_deref_instr *deref)
3468 {
3469    // we just ignore those, because images intrinsics are the only place where
3470    // we should end up with deref sources and those have to backtrack anyway
3471    // to get the nir_variable. This code just exists to handle some special
3472    // cases.
3473    switch (deref->deref_type) {
3474    case nir_deref_type_array:
3475    case nir_deref_type_struct:
3476    case nir_deref_type_var:
3477       break;
3478    default:
3479       ERROR("unknown nir_deref_instr %u\n", deref->deref_type);
3480       return false;
3481    }
3482    return true;
3483 }
3484
3485 bool
3486 Converter::run()
3487 {
3488    bool progress;
3489
3490    if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
3491       nir_print_shader(nir, stderr);
3492
3493    struct nir_lower_subgroups_options subgroup_options = {
3494       .subgroup_size = 32,
3495       .ballot_bit_size = 32,
3496    };
3497
3498    NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, (nir_lower_io_options)0);
3499    NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
3500    NIR_PASS_V(nir, nir_lower_regs_to_ssa);
3501    NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
3502    NIR_PASS_V(nir, nir_lower_vars_to_ssa);
3503    NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
3504    NIR_PASS_V(nir, nir_lower_phis_to_scalar);
3505
3506    do {
3507       progress = false;
3508       NIR_PASS(progress, nir, nir_copy_prop);
3509       NIR_PASS(progress, nir, nir_opt_remove_phis);
3510       NIR_PASS(progress, nir, nir_opt_trivial_continues);
3511       NIR_PASS(progress, nir, nir_opt_cse);
3512       NIR_PASS(progress, nir, nir_opt_algebraic);
3513       NIR_PASS(progress, nir, nir_opt_constant_folding);
3514       NIR_PASS(progress, nir, nir_copy_prop);
3515       NIR_PASS(progress, nir, nir_opt_dce);
3516       NIR_PASS(progress, nir, nir_opt_dead_cf);
3517    } while (progress);
3518
3519    NIR_PASS_V(nir, nir_lower_bool_to_int32);
3520    NIR_PASS_V(nir, nir_lower_locals_to_regs);
3521    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp);
3522    NIR_PASS_V(nir, nir_convert_from_ssa, true);
3523
3524    // Garbage collect dead instructions
3525    nir_sweep(nir);
3526
3527    if (!parseNIR()) {
3528       ERROR("Couldn't prase NIR!\n");
3529       return false;
3530    }
3531
3532    if (!assignSlots()) {
3533       ERROR("Couldn't assign slots!\n");
3534       return false;
3535    }
3536
3537    if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
3538       nir_print_shader(nir, stderr);
3539
3540    nir_foreach_function(function, nir) {
3541       if (!visit(function))
3542          return false;
3543    }
3544
3545    return true;
3546 }
3547
3548 } // unnamed namespace
3549
3550 namespace nv50_ir {
3551
3552 bool
3553 Program::makeFromNIR(struct nv50_ir_prog_info *info)
3554 {
3555    nir_shader *nir = (nir_shader*)info->bin.source;
3556    Converter converter(this, nir, info);
3557    bool result = converter.run();
3558    if (!result)
3559       return result;
3560    LoweringHelper lowering;
3561    lowering.run(this);
3562    tlsSize = info->bin.tlsSpace;
3563    return result;
3564 }
3565
3566 } // namespace nv50_ir