src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp

   1 /*
   2  * Copyright 2017 Red Hat Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * Authors: Karol Herbst <kherbst@redhat.com>
  23  */
  24
  25 #include "compiler/nir/nir.h"
  26
  27 #include "util/u_debug.h"
  28
  29 #include "codegen/nv50_ir.h"
  30 #include "codegen/nv50_ir_from_common.h"
  31 #include "codegen/nv50_ir_lowering_helper.h"
  32 #include "codegen/nv50_ir_util.h"
  33
  34 #if __cplusplus >= 201103L
  35 #include <unordered_map>
  36 #else
  37 #include <tr1/unordered_map>
  38 #endif
  39 #include <list>
  40 #include <vector>
  41
  42 namespace {
  43
  44 #if __cplusplus >= 201103L
  45 using std::hash;
  46 using std::unordered_map;
  47 #else
  48 using std::tr1::hash;
  49 using std::tr1::unordered_map;
  50 #endif
  51
  52 using namespace nv50_ir;
  53
  54 int
  55 type_size(const struct glsl_type *type)
  56 {
  57    return glsl_count_attribute_slots(type, false);
  58 }
  59
  60 class Converter : public ConverterCommon
  61 {
  62 public:
  63    Converter(Program *, nir_shader *, nv50_ir_prog_info *);
  64
  65    bool run();
  66 private:
  67    typedef std::vector<LValue*> LValues;
  68    typedef unordered_map<unsigned, LValues> NirDefMap;
  69    typedef unordered_map<unsigned, nir_load_const_instr*> ImmediateMap;
  70    typedef unordered_map<unsigned, uint32_t> NirArrayLMemOffsets;
  71    typedef unordered_map<unsigned, BasicBlock*> NirBlockMap;
  72
  73    TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
  74    LValues& convert(nir_alu_dest *);
  75    BasicBlock* convert(nir_block *);
  76    LValues& convert(nir_dest *);
  77    SVSemantic convert(nir_intrinsic_op);
  78    Value* convert(nir_load_const_instr*, uint8_t);
  79    LValues& convert(nir_register *);
  80    LValues& convert(nir_ssa_def *);
  81
  82    ImgFormat convertGLImgFormat(GLuint);
  83
  84    Value* getSrc(nir_alu_src *, uint8_t component = 0);
  85    Value* getSrc(nir_register *, uint8_t);
  86    Value* getSrc(nir_src *, uint8_t, bool indirect = false);
  87    Value* getSrc(nir_ssa_def *, uint8_t);
  88
  89    // returned value is the constant part of the given source (either the
  90    // nir_src or the selected source component of an intrinsic). Even though
  91    // this is mostly an optimization to be able to skip indirects in a few
  92    // cases, sometimes we require immediate values or set some fileds on
  93    // instructions (e.g. tex) in order for codegen to consume those.
  94    // If the found value has not a constant part, the Value gets returned
  95    // through the Value parameter.
  96    uint32_t getIndirect(nir_src *, uint8_t, Value *&);
  97    uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&);
  98
  99    uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
 100
 101    void setInterpolate(nv50_ir_varying *,
 102                        uint8_t,
 103                        bool centroid,
 104                        unsigned semantics);
 105
 106    Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,
 107                          uint8_t c, Value *indirect0 = NULL,
 108                          Value *indirect1 = NULL, bool patch = false);
 109    void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
 110                 Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
 111                 Value *indirect1 = NULL);
 112
 113    bool isFloatType(nir_alu_type);
 114    bool isSignedType(nir_alu_type);
 115    bool isResultFloat(nir_op);
 116    bool isResultSigned(nir_op);
 117
 118    DataType getDType(nir_alu_instr *);
 119    DataType getDType(nir_intrinsic_instr *);
 120    DataType getDType(nir_intrinsic_instr *, bool isSigned);
 121    DataType getDType(nir_op, uint8_t);
 122
 123    std::vector<DataType> getSTypes(nir_alu_instr *);
 124    DataType getSType(nir_src &, bool isFloat, bool isSigned);
 125
 126    operation getOperation(nir_intrinsic_op);
 127    operation getOperation(nir_op);
 128    operation getOperation(nir_texop);
 129    operation preOperationNeeded(nir_op);
 130
 131    int getSubOp(nir_intrinsic_op);
 132    int getSubOp(nir_op);
 133
 134    CondCode getCondCode(nir_op);
 135
 136    bool assignSlots();
 137    bool parseNIR();
 138
 139    bool visit(nir_alu_instr *);
 140    bool visit(nir_block *);
 141    bool visit(nir_cf_node *);
 142    bool visit(nir_deref_instr *);
 143    bool visit(nir_function *);
 144    bool visit(nir_if *);
 145    bool visit(nir_instr *);
 146    bool visit(nir_intrinsic_instr *);
 147    bool visit(nir_jump_instr *);
 148    bool visit(nir_load_const_instr*);
 149    bool visit(nir_loop *);
 150    bool visit(nir_ssa_undef_instr *);
 151    bool visit(nir_tex_instr *);
 152
 153    // tex stuff
 154    Value* applyProjection(Value *src, Value *proj);
 155    unsigned int getNIRArgCount(TexInstruction::Target&);
 156
 157    // image stuff
 158    uint16_t handleDeref(nir_deref_instr *, Value * & indirect, const nir_variable * &);
 159    CacheMode getCacheModeFromVar(const nir_variable *);
 160
 161    nir_shader *nir;
 162
 163    NirDefMap ssaDefs;
 164    NirDefMap regDefs;
 165    ImmediateMap immediates;
 166    NirArrayLMemOffsets regToLmemOffset;
 167    NirBlockMap blocks;
 168    unsigned int curLoopDepth;
 169
 170    BasicBlock *exit;
 171    Value *zero;
 172    Instruction *immInsertPos;
 173
 174    int clipVertexOutput;
 175
 176    union {
 177       struct {
 178          Value *position;
 179       } fp;
 180    };
 181 };
 182
 183 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
 184    : ConverterCommon(prog, info),
 185      nir(nir),
 186      curLoopDepth(0),
 187      clipVertexOutput(-1)
 188 {
 189    zero = mkImm((uint32_t)0);
 190 }
 191
 192 BasicBlock *
 193 Converter::convert(nir_block *block)
 194 {
 195    NirBlockMap::iterator it = blocks.find(block->index);
 196    if (it != blocks.end())
 197       return it->second;
 198
 199    BasicBlock *bb = new BasicBlock(func);
 200    blocks[block->index] = bb;
 201    return bb;
 202 }
 203
 204 bool
 205 Converter::isFloatType(nir_alu_type type)
 206 {
 207    return nir_alu_type_get_base_type(type) == nir_type_float;
 208 }
 209
 210 bool
 211 Converter::isSignedType(nir_alu_type type)
 212 {
 213    return nir_alu_type_get_base_type(type) == nir_type_int;
 214 }
 215
 216 bool
 217 Converter::isResultFloat(nir_op op)
 218 {
 219    const nir_op_info &info = nir_op_infos[op];
 220    if (info.output_type != nir_type_invalid)
 221       return isFloatType(info.output_type);
 222
 223    ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
 224    assert(false);
 225    return true;
 226 }
 227
 228 bool
 229 Converter::isResultSigned(nir_op op)
 230 {
 231    switch (op) {
 232    // there is no umul and we get wrong results if we treat all muls as signed
 233    case nir_op_imul:
 234    case nir_op_inot:
 235       return false;
 236    default:
 237       const nir_op_info &info = nir_op_infos[op];
 238       if (info.output_type != nir_type_invalid)
 239          return isSignedType(info.output_type);
 240       ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
 241       assert(false);
 242       return true;
 243    }
 244 }
 245
 246 DataType
 247 Converter::getDType(nir_alu_instr *insn)
 248 {
 249    if (insn->dest.dest.is_ssa)
 250       return getDType(insn->op, insn->dest.dest.ssa.bit_size);
 251    else
 252       return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
 253 }
 254
 255 DataType
 256 Converter::getDType(nir_intrinsic_instr *insn)
 257 {
 258    bool isSigned;
 259    switch (insn->intrinsic) {
 260    case nir_intrinsic_shared_atomic_imax:
 261    case nir_intrinsic_shared_atomic_imin:
 262    case nir_intrinsic_ssbo_atomic_imax:
 263    case nir_intrinsic_ssbo_atomic_imin:
 264       isSigned = true;
 265       break;
 266    default:
 267       isSigned = false;
 268       break;
 269    }
 270
 271    return getDType(insn, isSigned);
 272 }
 273
 274 DataType
 275 Converter::getDType(nir_intrinsic_instr *insn, bool isSigned)
 276 {
 277    if (insn->dest.is_ssa)
 278       return typeOfSize(insn->dest.ssa.bit_size / 8, false, isSigned);
 279    else
 280       return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, isSigned);
 281 }
 282
 283 DataType
 284 Converter::getDType(nir_op op, uint8_t bitSize)
 285 {
 286    DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
 287    if (ty == TYPE_NONE) {
 288       ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
 289       assert(false);
 290    }
 291    return ty;
 292 }
 293
 294 std::vector<DataType>
 295 Converter::getSTypes(nir_alu_instr *insn)
 296 {
 297    const nir_op_info &info = nir_op_infos[insn->op];
 298    std::vector<DataType> res(info.num_inputs);
 299
 300    for (uint8_t i = 0; i < info.num_inputs; ++i) {
 301       if (info.input_types[i] != nir_type_invalid) {
 302          res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
 303       } else {
 304          ERROR("getSType not implemented for %s idx %u\n", info.name, i);
 305          assert(false);
 306          res[i] = TYPE_NONE;
 307          break;
 308       }
 309    }
 310
 311    return res;
 312 }
 313
 314 DataType
 315 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
 316 {
 317    uint8_t bitSize;
 318    if (src.is_ssa)
 319       bitSize = src.ssa->bit_size;
 320    else
 321       bitSize = src.reg.reg->bit_size;
 322
 323    DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
 324    if (ty == TYPE_NONE) {
 325       const char *str;
 326       if (isFloat)
 327          str = "float";
 328       else if (isSigned)
 329          str = "int";
 330       else
 331          str = "uint";
 332       ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
 333       assert(false);
 334    }
 335    return ty;
 336 }
 337
 338 operation
 339 Converter::getOperation(nir_op op)
 340 {
 341    switch (op) {
 342    // basic ops with float and int variants
 343    case nir_op_fabs:
 344    case nir_op_iabs:
 345       return OP_ABS;
 346    case nir_op_fadd:
 347    case nir_op_iadd:
 348       return OP_ADD;
 349    case nir_op_fand:
 350    case nir_op_iand:
 351       return OP_AND;
 352    case nir_op_ifind_msb:
 353    case nir_op_ufind_msb:
 354       return OP_BFIND;
 355    case nir_op_fceil:
 356       return OP_CEIL;
 357    case nir_op_fcos:
 358       return OP_COS;
 359    case nir_op_f2f32:
 360    case nir_op_f2f64:
 361    case nir_op_f2i32:
 362    case nir_op_f2i64:
 363    case nir_op_f2u32:
 364    case nir_op_f2u64:
 365    case nir_op_i2f32:
 366    case nir_op_i2f64:
 367    case nir_op_i2i32:
 368    case nir_op_i2i64:
 369    case nir_op_u2f32:
 370    case nir_op_u2f64:
 371    case nir_op_u2u32:
 372    case nir_op_u2u64:
 373       return OP_CVT;
 374    case nir_op_fddx:
 375    case nir_op_fddx_coarse:
 376    case nir_op_fddx_fine:
 377       return OP_DFDX;
 378    case nir_op_fddy:
 379    case nir_op_fddy_coarse:
 380    case nir_op_fddy_fine:
 381       return OP_DFDY;
 382    case nir_op_fdiv:
 383    case nir_op_idiv:
 384    case nir_op_udiv:
 385       return OP_DIV;
 386    case nir_op_fexp2:
 387       return OP_EX2;
 388    case nir_op_ffloor:
 389       return OP_FLOOR;
 390    case nir_op_ffma:
 391       return OP_FMA;
 392    case nir_op_flog2:
 393       return OP_LG2;
 394    case nir_op_fmax:
 395    case nir_op_imax:
 396    case nir_op_umax:
 397       return OP_MAX;
 398    case nir_op_pack_64_2x32_split:
 399       return OP_MERGE;
 400    case nir_op_fmin:
 401    case nir_op_imin:
 402    case nir_op_umin:
 403       return OP_MIN;
 404    case nir_op_fmod:
 405    case nir_op_imod:
 406    case nir_op_umod:
 407    case nir_op_frem:
 408    case nir_op_irem:
 409       return OP_MOD;
 410    case nir_op_fmul:
 411    case nir_op_imul:
 412    case nir_op_imul_high:
 413    case nir_op_umul_high:
 414       return OP_MUL;
 415    case nir_op_fneg:
 416    case nir_op_ineg:
 417       return OP_NEG;
 418    case nir_op_fnot:
 419    case nir_op_inot:
 420       return OP_NOT;
 421    case nir_op_for:
 422    case nir_op_ior:
 423       return OP_OR;
 424    case nir_op_fpow:
 425       return OP_POW;
 426    case nir_op_frcp:
 427       return OP_RCP;
 428    case nir_op_frsq:
 429       return OP_RSQ;
 430    case nir_op_fsat:
 431       return OP_SAT;
 432    case nir_op_feq32:
 433    case nir_op_ieq32:
 434    case nir_op_fge32:
 435    case nir_op_ige32:
 436    case nir_op_uge32:
 437    case nir_op_flt32:
 438    case nir_op_ilt32:
 439    case nir_op_ult32:
 440    case nir_op_fne32:
 441    case nir_op_ine32:
 442       return OP_SET;
 443    case nir_op_ishl:
 444       return OP_SHL;
 445    case nir_op_ishr:
 446    case nir_op_ushr:
 447       return OP_SHR;
 448    case nir_op_fsin:
 449       return OP_SIN;
 450    case nir_op_fsqrt:
 451       return OP_SQRT;
 452    case nir_op_fsub:
 453    case nir_op_isub:
 454       return OP_SUB;
 455    case nir_op_ftrunc:
 456       return OP_TRUNC;
 457    case nir_op_fxor:
 458    case nir_op_ixor:
 459       return OP_XOR;
 460    default:
 461       ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
 462       assert(false);
 463       return OP_NOP;
 464    }
 465 }
 466
 467 operation
 468 Converter::getOperation(nir_texop op)
 469 {
 470    switch (op) {
 471    case nir_texop_tex:
 472       return OP_TEX;
 473    case nir_texop_lod:
 474       return OP_TXLQ;
 475    case nir_texop_txb:
 476       return OP_TXB;
 477    case nir_texop_txd:
 478       return OP_TXD;
 479    case nir_texop_txf:
 480    case nir_texop_txf_ms:
 481       return OP_TXF;
 482    case nir_texop_tg4:
 483       return OP_TXG;
 484    case nir_texop_txl:
 485       return OP_TXL;
 486    case nir_texop_query_levels:
 487    case nir_texop_texture_samples:
 488    case nir_texop_txs:
 489       return OP_TXQ;
 490    default:
 491       ERROR("couldn't get operation for nir_texop %u\n", op);
 492       assert(false);
 493       return OP_NOP;
 494    }
 495 }
 496
 497 operation
 498 Converter::getOperation(nir_intrinsic_op op)
 499 {
 500    switch (op) {
 501    case nir_intrinsic_emit_vertex:
 502       return OP_EMIT;
 503    case nir_intrinsic_end_primitive:
 504       return OP_RESTART;
 505    case nir_intrinsic_image_deref_atomic_add:
 506    case nir_intrinsic_image_deref_atomic_and:
 507    case nir_intrinsic_image_deref_atomic_comp_swap:
 508    case nir_intrinsic_image_deref_atomic_exchange:
 509    case nir_intrinsic_image_deref_atomic_max:
 510    case nir_intrinsic_image_deref_atomic_min:
 511    case nir_intrinsic_image_deref_atomic_or:
 512    case nir_intrinsic_image_deref_atomic_xor:
 513       return OP_SUREDP;
 514    case nir_intrinsic_image_deref_load:
 515       return OP_SULDP;
 516    case nir_intrinsic_image_deref_samples:
 517    case nir_intrinsic_image_deref_size:
 518       return OP_SUQ;
 519    case nir_intrinsic_image_deref_store:
 520       return OP_SUSTP;
 521    default:
 522       ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
 523       assert(false);
 524       return OP_NOP;
 525    }
 526 }
 527
 528 operation
 529 Converter::preOperationNeeded(nir_op op)
 530 {
 531    switch (op) {
 532    case nir_op_fcos:
 533    case nir_op_fsin:
 534       return OP_PRESIN;
 535    default:
 536       return OP_NOP;
 537    }
 538 }
 539
 540 int
 541 Converter::getSubOp(nir_op op)
 542 {
 543    switch (op) {
 544    case nir_op_imul_high:
 545    case nir_op_umul_high:
 546       return NV50_IR_SUBOP_MUL_HIGH;
 547    default:
 548       return 0;
 549    }
 550 }
 551
 552 int
 553 Converter::getSubOp(nir_intrinsic_op op)
 554 {
 555    switch (op) {
 556    case nir_intrinsic_image_deref_atomic_add:
 557    case nir_intrinsic_shared_atomic_add:
 558    case nir_intrinsic_ssbo_atomic_add:
 559       return  NV50_IR_SUBOP_ATOM_ADD;
 560    case nir_intrinsic_image_deref_atomic_and:
 561    case nir_intrinsic_shared_atomic_and:
 562    case nir_intrinsic_ssbo_atomic_and:
 563       return  NV50_IR_SUBOP_ATOM_AND;
 564    case nir_intrinsic_image_deref_atomic_comp_swap:
 565    case nir_intrinsic_shared_atomic_comp_swap:
 566    case nir_intrinsic_ssbo_atomic_comp_swap:
 567       return  NV50_IR_SUBOP_ATOM_CAS;
 568    case nir_intrinsic_image_deref_atomic_exchange:
 569    case nir_intrinsic_shared_atomic_exchange:
 570    case nir_intrinsic_ssbo_atomic_exchange:
 571       return  NV50_IR_SUBOP_ATOM_EXCH;
 572    case nir_intrinsic_image_deref_atomic_or:
 573    case nir_intrinsic_shared_atomic_or:
 574    case nir_intrinsic_ssbo_atomic_or:
 575       return  NV50_IR_SUBOP_ATOM_OR;
 576    case nir_intrinsic_image_deref_atomic_max:
 577    case nir_intrinsic_shared_atomic_imax:
 578    case nir_intrinsic_shared_atomic_umax:
 579    case nir_intrinsic_ssbo_atomic_imax:
 580    case nir_intrinsic_ssbo_atomic_umax:
 581       return  NV50_IR_SUBOP_ATOM_MAX;
 582    case nir_intrinsic_image_deref_atomic_min:
 583    case nir_intrinsic_shared_atomic_imin:
 584    case nir_intrinsic_shared_atomic_umin:
 585    case nir_intrinsic_ssbo_atomic_imin:
 586    case nir_intrinsic_ssbo_atomic_umin:
 587       return  NV50_IR_SUBOP_ATOM_MIN;
 588    case nir_intrinsic_image_deref_atomic_xor:
 589    case nir_intrinsic_shared_atomic_xor:
 590    case nir_intrinsic_ssbo_atomic_xor:
 591       return  NV50_IR_SUBOP_ATOM_XOR;
 592
 593    case nir_intrinsic_group_memory_barrier:
 594    case nir_intrinsic_memory_barrier:
 595    case nir_intrinsic_memory_barrier_atomic_counter:
 596    case nir_intrinsic_memory_barrier_buffer:
 597    case nir_intrinsic_memory_barrier_image:
 598       return NV50_IR_SUBOP_MEMBAR(M, GL);
 599    case nir_intrinsic_memory_barrier_shared:
 600       return NV50_IR_SUBOP_MEMBAR(M, CTA);
 601
 602    case nir_intrinsic_vote_all:
 603       return NV50_IR_SUBOP_VOTE_ALL;
 604    case nir_intrinsic_vote_any:
 605       return NV50_IR_SUBOP_VOTE_ANY;
 606    case nir_intrinsic_vote_ieq:
 607       return NV50_IR_SUBOP_VOTE_UNI;
 608    default:
 609       return 0;
 610    }
 611 }
 612
 613 CondCode
 614 Converter::getCondCode(nir_op op)
 615 {
 616    switch (op) {
 617    case nir_op_feq32:
 618    case nir_op_ieq32:
 619       return CC_EQ;
 620    case nir_op_fge32:
 621    case nir_op_ige32:
 622    case nir_op_uge32:
 623       return CC_GE;
 624    case nir_op_flt32:
 625    case nir_op_ilt32:
 626    case nir_op_ult32:
 627       return CC_LT;
 628    case nir_op_fne32:
 629       return CC_NEU;
 630    case nir_op_ine32:
 631       return CC_NE;
 632    default:
 633       ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name);
 634       assert(false);
 635       return CC_FL;
 636    }
 637 }
 638
 639 Converter::LValues&
 640 Converter::convert(nir_alu_dest *dest)
 641 {
 642    return convert(&dest->dest);
 643 }
 644
 645 Converter::LValues&
 646 Converter::convert(nir_dest *dest)
 647 {
 648    if (dest->is_ssa)
 649       return convert(&dest->ssa);
 650    if (dest->reg.indirect) {
 651       ERROR("no support for indirects.");
 652       assert(false);
 653    }
 654    return convert(dest->reg.reg);
 655 }
 656
 657 Converter::LValues&
 658 Converter::convert(nir_register *reg)
 659 {
 660    NirDefMap::iterator it = regDefs.find(reg->index);
 661    if (it != regDefs.end())
 662       return it->second;
 663
 664    LValues newDef(reg->num_components);
 665    for (uint8_t i = 0; i < reg->num_components; i++)
 666       newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
 667    return regDefs[reg->index] = newDef;
 668 }
 669
 670 Converter::LValues&
 671 Converter::convert(nir_ssa_def *def)
 672 {
 673    NirDefMap::iterator it = ssaDefs.find(def->index);
 674    if (it != ssaDefs.end())
 675       return it->second;
 676
 677    LValues newDef(def->num_components);
 678    for (uint8_t i = 0; i < def->num_components; i++)
 679       newDef[i] = getSSA(std::max(4, def->bit_size / 8));
 680    return ssaDefs[def->index] = newDef;
 681 }
 682
 683 Value*
 684 Converter::getSrc(nir_alu_src *src, uint8_t component)
 685 {
 686    if (src->abs || src->negate) {
 687       ERROR("modifiers currently not supported on nir_alu_src\n");
 688       assert(false);
 689    }
 690    return getSrc(&src->src, src->swizzle[component]);
 691 }
 692
 693 Value*
 694 Converter::getSrc(nir_register *reg, uint8_t idx)
 695 {
 696    NirDefMap::iterator it = regDefs.find(reg->index);
 697    if (it == regDefs.end())
 698       return convert(reg)[idx];
 699    return it->second[idx];
 700 }
 701
 702 Value*
 703 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
 704 {
 705    if (src->is_ssa)
 706       return getSrc(src->ssa, idx);
 707
 708    if (src->reg.indirect) {
 709       if (indirect)
 710          return getSrc(src->reg.indirect, idx);
 711       ERROR("no support for indirects.");
 712       assert(false);
 713       return NULL;
 714    }
 715
 716    return getSrc(src->reg.reg, idx);
 717 }
 718
 719 Value*
 720 Converter::getSrc(nir_ssa_def *src, uint8_t idx)
 721 {
 722    ImmediateMap::iterator iit = immediates.find(src->index);
 723    if (iit != immediates.end())
 724       return convert((*iit).second, idx);
 725
 726    NirDefMap::iterator it = ssaDefs.find(src->index);
 727    if (it == ssaDefs.end()) {
 728       ERROR("SSA value %u not found\n", src->index);
 729       assert(false);
 730       return NULL;
 731    }
 732    return it->second[idx];
 733 }
 734
 735 uint32_t
 736 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
 737 {
 738    nir_const_value *offset = nir_src_as_const_value(*src);
 739
 740    if (offset) {
 741       indirect = NULL;
 742       return offset->u32[0];
 743    }
 744
 745    indirect = getSrc(src, idx, true);
 746    return 0;
 747 }
 748
 749 uint32_t
 750 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect)
 751 {
 752    int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
 753    if (indirect)
 754       indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
 755    return idx;
 756 }
 757
 758 static void
 759 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
 760 {
 761    assert(name && index);
 762
 763    if (slot >= VERT_ATTRIB_MAX) {
 764       ERROR("invalid varying slot %u\n", slot);
 765       assert(false);
 766       return;
 767    }
 768
 769    if (slot >= VERT_ATTRIB_GENERIC0 &&
 770        slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
 771       *name = TGSI_SEMANTIC_GENERIC;
 772       *index = slot - VERT_ATTRIB_GENERIC0;
 773       return;
 774    }
 775
 776    if (slot >= VERT_ATTRIB_TEX0 &&
 777        slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
 778       *name = TGSI_SEMANTIC_TEXCOORD;
 779       *index = slot - VERT_ATTRIB_TEX0;
 780       return;
 781    }
 782
 783    switch (slot) {
 784    case VERT_ATTRIB_COLOR0:
 785       *name = TGSI_SEMANTIC_COLOR;
 786       *index = 0;
 787       break;
 788    case VERT_ATTRIB_COLOR1:
 789       *name = TGSI_SEMANTIC_COLOR;
 790       *index = 1;
 791       break;
 792    case VERT_ATTRIB_EDGEFLAG:
 793       *name = TGSI_SEMANTIC_EDGEFLAG;
 794       *index = 0;
 795       break;
 796    case VERT_ATTRIB_FOG:
 797       *name = TGSI_SEMANTIC_FOG;
 798       *index = 0;
 799       break;
 800    case VERT_ATTRIB_NORMAL:
 801       *name = TGSI_SEMANTIC_NORMAL;
 802       *index = 0;
 803       break;
 804    case VERT_ATTRIB_POS:
 805       *name = TGSI_SEMANTIC_POSITION;
 806       *index = 0;
 807       break;
 808    case VERT_ATTRIB_POINT_SIZE:
 809       *name = TGSI_SEMANTIC_PSIZE;
 810       *index = 0;
 811       break;
 812    default:
 813       ERROR("unknown vert attrib slot %u\n", slot);
 814       assert(false);
 815       break;
 816    }
 817 }
 818
 819 static void
 820 varying_slot_to_tgsi_semantic(gl_varying_slot slot, unsigned *name, unsigned *index)
 821 {
 822    assert(name && index);
 823
 824    if (slot >= VARYING_SLOT_TESS_MAX) {
 825       ERROR("invalid varying slot %u\n", slot);
 826       assert(false);
 827       return;
 828    }
 829
 830    if (slot >= VARYING_SLOT_PATCH0) {
 831       *name = TGSI_SEMANTIC_PATCH;
 832       *index = slot - VARYING_SLOT_PATCH0;
 833       return;
 834    }
 835
 836    if (slot >= VARYING_SLOT_VAR0) {
 837       *name = TGSI_SEMANTIC_GENERIC;
 838       *index = slot - VARYING_SLOT_VAR0;
 839       return;
 840    }
 841
 842    if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
 843       *name = TGSI_SEMANTIC_TEXCOORD;
 844       *index = slot - VARYING_SLOT_TEX0;
 845       return;
 846    }
 847
 848    switch (slot) {
 849    case VARYING_SLOT_BFC0:
 850       *name = TGSI_SEMANTIC_BCOLOR;
 851       *index = 0;
 852       break;
 853    case VARYING_SLOT_BFC1:
 854       *name = TGSI_SEMANTIC_BCOLOR;
 855       *index = 1;
 856       break;
 857    case VARYING_SLOT_CLIP_DIST0:
 858       *name = TGSI_SEMANTIC_CLIPDIST;
 859       *index = 0;
 860       break;
 861    case VARYING_SLOT_CLIP_DIST1:
 862       *name = TGSI_SEMANTIC_CLIPDIST;
 863       *index = 1;
 864       break;
 865    case VARYING_SLOT_CLIP_VERTEX:
 866       *name = TGSI_SEMANTIC_CLIPVERTEX;
 867       *index = 0;
 868       break;
 869    case VARYING_SLOT_COL0:
 870       *name = TGSI_SEMANTIC_COLOR;
 871       *index = 0;
 872       break;
 873    case VARYING_SLOT_COL1:
 874       *name = TGSI_SEMANTIC_COLOR;
 875       *index = 1;
 876       break;
 877    case VARYING_SLOT_EDGE:
 878       *name = TGSI_SEMANTIC_EDGEFLAG;
 879       *index = 0;
 880       break;
 881    case VARYING_SLOT_FACE:
 882       *name = TGSI_SEMANTIC_FACE;
 883       *index = 0;
 884       break;
 885    case VARYING_SLOT_FOGC:
 886       *name = TGSI_SEMANTIC_FOG;
 887       *index = 0;
 888       break;
 889    case VARYING_SLOT_LAYER:
 890       *name = TGSI_SEMANTIC_LAYER;
 891       *index = 0;
 892       break;
 893    case VARYING_SLOT_PNTC:
 894       *name = TGSI_SEMANTIC_PCOORD;
 895       *index = 0;
 896       break;
 897    case VARYING_SLOT_POS:
 898       *name = TGSI_SEMANTIC_POSITION;
 899       *index = 0;
 900       break;
 901    case VARYING_SLOT_PRIMITIVE_ID:
 902       *name = TGSI_SEMANTIC_PRIMID;
 903       *index = 0;
 904       break;
 905    case VARYING_SLOT_PSIZ:
 906       *name = TGSI_SEMANTIC_PSIZE;
 907       *index = 0;
 908       break;
 909    case VARYING_SLOT_TESS_LEVEL_INNER:
 910       *name = TGSI_SEMANTIC_TESSINNER;
 911       *index = 0;
 912       break;
 913    case VARYING_SLOT_TESS_LEVEL_OUTER:
 914       *name = TGSI_SEMANTIC_TESSOUTER;
 915       *index = 0;
 916       break;
 917    case VARYING_SLOT_VIEWPORT:
 918       *name = TGSI_SEMANTIC_VIEWPORT_INDEX;
 919       *index = 0;
 920       break;
 921    default:
 922       ERROR("unknown varying slot %u\n", slot);
 923       assert(false);
 924       break;
 925    }
 926 }
 927
 928 static void
 929 frag_result_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
 930 {
 931    if (slot >= FRAG_RESULT_DATA0) {
 932       *name = TGSI_SEMANTIC_COLOR;
 933       *index = slot - FRAG_RESULT_COLOR - 2; // intentional
 934       return;
 935    }
 936
 937    switch (slot) {
 938    case FRAG_RESULT_COLOR:
 939       *name = TGSI_SEMANTIC_COLOR;
 940       *index = 0;
 941       break;
 942    case FRAG_RESULT_DEPTH:
 943       *name = TGSI_SEMANTIC_POSITION;
 944       *index = 0;
 945       break;
 946    case FRAG_RESULT_SAMPLE_MASK:
 947       *name = TGSI_SEMANTIC_SAMPLEMASK;
 948       *index = 0;
 949       break;
 950    default:
 951       ERROR("unknown frag result slot %u\n", slot);
 952       assert(false);
 953       break;
 954    }
 955 }
 956
 957 // copy of _mesa_sysval_to_semantic
 958 static void
 959 system_val_to_tgsi_semantic(unsigned val, unsigned *name, unsigned *index)
 960 {
 961    *index = 0;
 962    switch (val) {
 963    // Vertex shader
 964    case SYSTEM_VALUE_VERTEX_ID:
 965       *name = TGSI_SEMANTIC_VERTEXID;
 966       break;
 967    case SYSTEM_VALUE_INSTANCE_ID:
 968       *name = TGSI_SEMANTIC_INSTANCEID;
 969       break;
 970    case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
 971       *name = TGSI_SEMANTIC_VERTEXID_NOBASE;
 972       break;
 973    case SYSTEM_VALUE_BASE_VERTEX:
 974       *name = TGSI_SEMANTIC_BASEVERTEX;
 975       break;
 976    case SYSTEM_VALUE_BASE_INSTANCE:
 977       *name = TGSI_SEMANTIC_BASEINSTANCE;
 978       break;
 979    case SYSTEM_VALUE_DRAW_ID:
 980       *name = TGSI_SEMANTIC_DRAWID;
 981       break;
 982
 983    // Geometry shader
 984    case SYSTEM_VALUE_INVOCATION_ID:
 985       *name = TGSI_SEMANTIC_INVOCATIONID;
 986       break;
 987
 988    // Fragment shader
 989    case SYSTEM_VALUE_FRAG_COORD:
 990       *name = TGSI_SEMANTIC_POSITION;
 991       break;
 992    case SYSTEM_VALUE_FRONT_FACE:
 993       *name = TGSI_SEMANTIC_FACE;
 994       break;
 995    case SYSTEM_VALUE_SAMPLE_ID:
 996       *name = TGSI_SEMANTIC_SAMPLEID;
 997       break;
 998    case SYSTEM_VALUE_SAMPLE_POS:
 999       *name = TGSI_SEMANTIC_SAMPLEPOS;
1000       break;
1001    case SYSTEM_VALUE_SAMPLE_MASK_IN:
1002       *name = TGSI_SEMANTIC_SAMPLEMASK;
1003       break;
1004    case SYSTEM_VALUE_HELPER_INVOCATION:
1005       *name = TGSI_SEMANTIC_HELPER_INVOCATION;
1006       break;
1007
1008    // Tessellation shader
1009    case SYSTEM_VALUE_TESS_COORD:
1010       *name = TGSI_SEMANTIC_TESSCOORD;
1011       break;
1012    case SYSTEM_VALUE_VERTICES_IN:
1013       *name = TGSI_SEMANTIC_VERTICESIN;
1014       break;
1015    case SYSTEM_VALUE_PRIMITIVE_ID:
1016       *name = TGSI_SEMANTIC_PRIMID;
1017       break;
1018    case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1019       *name = TGSI_SEMANTIC_TESSOUTER;
1020       break;
1021    case SYSTEM_VALUE_TESS_LEVEL_INNER:
1022       *name = TGSI_SEMANTIC_TESSINNER;
1023       break;
1024
1025    // Compute shader
1026    case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
1027       *name = TGSI_SEMANTIC_THREAD_ID;
1028       break;
1029    case SYSTEM_VALUE_WORK_GROUP_ID:
1030       *name = TGSI_SEMANTIC_BLOCK_ID;
1031       break;
1032    case SYSTEM_VALUE_NUM_WORK_GROUPS:
1033       *name = TGSI_SEMANTIC_GRID_SIZE;
1034       break;
1035    case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
1036       *name = TGSI_SEMANTIC_BLOCK_SIZE;
1037       break;
1038
1039    // ARB_shader_ballot
1040    case SYSTEM_VALUE_SUBGROUP_SIZE:
1041       *name = TGSI_SEMANTIC_SUBGROUP_SIZE;
1042       break;
1043    case SYSTEM_VALUE_SUBGROUP_INVOCATION:
1044       *name = TGSI_SEMANTIC_SUBGROUP_INVOCATION;
1045       break;
1046    case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
1047       *name = TGSI_SEMANTIC_SUBGROUP_EQ_MASK;
1048       break;
1049    case SYSTEM_VALUE_SUBGROUP_GE_MASK:
1050       *name = TGSI_SEMANTIC_SUBGROUP_GE_MASK;
1051       break;
1052    case SYSTEM_VALUE_SUBGROUP_GT_MASK:
1053       *name = TGSI_SEMANTIC_SUBGROUP_GT_MASK;
1054       break;
1055    case SYSTEM_VALUE_SUBGROUP_LE_MASK:
1056       *name = TGSI_SEMANTIC_SUBGROUP_LE_MASK;
1057       break;
1058    case SYSTEM_VALUE_SUBGROUP_LT_MASK:
1059       *name = TGSI_SEMANTIC_SUBGROUP_LT_MASK;
1060       break;
1061
1062    default:
1063       ERROR("unknown system value %u\n", val);
1064       assert(false);
1065       break;
1066    }
1067 }
1068
1069 void
1070 Converter::setInterpolate(nv50_ir_varying *var,
1071                           uint8_t mode,
1072                           bool centroid,
1073                           unsigned semantic)
1074 {
1075    switch (mode) {
1076    case INTERP_MODE_FLAT:
1077       var->flat = 1;
1078       break;
1079    case INTERP_MODE_NONE:
1080       if (semantic == TGSI_SEMANTIC_COLOR)
1081          var->sc = 1;
1082       else if (semantic == TGSI_SEMANTIC_POSITION)
1083          var->linear = 1;
1084       break;
1085    case INTERP_MODE_NOPERSPECTIVE:
1086       var->linear = 1;
1087       break;
1088    case INTERP_MODE_SMOOTH:
1089       break;
1090    }
1091    var->centroid = centroid;
1092 }
1093
1094 static uint16_t
1095 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
1096           bool input, const nir_variable *var)
1097 {
1098    if (!type->is_array())
1099       return type->count_attribute_slots(false);
1100
1101    uint16_t slots;
1102    switch (stage) {
1103    case Program::TYPE_GEOMETRY:
1104       slots = type->uniform_locations();
1105       if (input)
1106          slots /= info.gs.vertices_in;
1107       break;
1108    case Program::TYPE_TESSELLATION_CONTROL:
1109    case Program::TYPE_TESSELLATION_EVAL:
1110       // remove first dimension
1111       if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
1112          slots = type->uniform_locations();
1113       else
1114          slots = type->fields.array->uniform_locations();
1115       break;
1116    default:
1117       slots = type->count_attribute_slots(false);
1118       break;
1119    }
1120
1121    return slots;
1122 }
1123
1124 bool Converter::assignSlots() {
1125    unsigned name;
1126    unsigned index;
1127
1128    info->io.viewportId = -1;
1129    info->numInputs = 0;
1130
1131    // we have to fixup the uniform locations for arrays
1132    unsigned numImages = 0;
1133    nir_foreach_variable(var, &nir->uniforms) {
1134       const glsl_type *type = var->type;
1135       if (!type->without_array()->is_image())
1136          continue;
1137       var->data.driver_location = numImages;
1138       numImages += type->is_array() ? type->arrays_of_arrays_size() : 1;
1139    }
1140
1141    nir_foreach_variable(var, &nir->inputs) {
1142       const glsl_type *type = var->type;
1143       int slot = var->data.location;
1144       uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
1145       uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1146                                        : type->component_slots();
1147       uint32_t frac = var->data.location_frac;
1148       uint32_t vary = var->data.driver_location;
1149
1150       if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1151          if (comp > 2)
1152             slots *= 2;
1153       }
1154
1155       assert(vary + slots <= PIPE_MAX_SHADER_INPUTS);
1156
1157       switch(prog->getType()) {
1158       case Program::TYPE_FRAGMENT:
1159          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1160          for (uint16_t i = 0; i < slots; ++i) {
1161             setInterpolate(&info->in[vary + i], var->data.interpolation,
1162                            var->data.centroid | var->data.sample, name);
1163          }
1164          break;
1165       case Program::TYPE_GEOMETRY:
1166          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1167          break;
1168       case Program::TYPE_TESSELLATION_CONTROL:
1169       case Program::TYPE_TESSELLATION_EVAL:
1170          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1171          if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
1172             info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1173          break;
1174       case Program::TYPE_VERTEX:
1175          vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
1176          switch (name) {
1177          case TGSI_SEMANTIC_EDGEFLAG:
1178             info->io.edgeFlagIn = vary;
1179             break;
1180          default:
1181             break;
1182          }
1183          break;
1184       default:
1185          ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1186          return false;
1187       }
1188
1189       for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1190          info->in[vary].id = vary;
1191          info->in[vary].patch = var->data.patch;
1192          info->in[vary].sn = name;
1193          info->in[vary].si = index + i;
1194          if (glsl_base_type_is_64bit(type->without_array()->base_type))
1195             if (i & 0x1)
1196                info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1197             else
1198                info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1199          else
1200             info->in[vary].mask |= ((1 << comp) - 1) << frac;
1201       }
1202       info->numInputs = std::max<uint8_t>(info->numInputs, vary);
1203    }
1204
1205    info->numOutputs = 0;
1206    nir_foreach_variable(var, &nir->outputs) {
1207       const glsl_type *type = var->type;
1208       int slot = var->data.location;
1209       uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
1210       uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1211                                        : type->component_slots();
1212       uint32_t frac = var->data.location_frac;
1213       uint32_t vary = var->data.driver_location;
1214
1215       if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1216          if (comp > 2)
1217             slots *= 2;
1218       }
1219
1220       assert(vary < PIPE_MAX_SHADER_OUTPUTS);
1221
1222       switch(prog->getType()) {
1223       case Program::TYPE_FRAGMENT:
1224          frag_result_to_tgsi_semantic((gl_frag_result)slot, &name, &index);
1225          switch (name) {
1226          case TGSI_SEMANTIC_COLOR:
1227             if (!var->data.fb_fetch_output)
1228                info->prop.fp.numColourResults++;
1229             info->prop.fp.separateFragData = true;
1230             // sometimes we get FRAG_RESULT_DATAX with data.index 0
1231             // sometimes we get FRAG_RESULT_DATA0 with data.index X
1232             index = index == 0 ? var->data.index : index;
1233             break;
1234          case TGSI_SEMANTIC_POSITION:
1235             info->io.fragDepth = vary;
1236             info->prop.fp.writesDepth = true;
1237             break;
1238          case TGSI_SEMANTIC_SAMPLEMASK:
1239             info->io.sampleMask = vary;
1240             break;
1241          default:
1242             break;
1243          }
1244          break;
1245       case Program::TYPE_GEOMETRY:
1246       case Program::TYPE_TESSELLATION_CONTROL:
1247       case Program::TYPE_TESSELLATION_EVAL:
1248       case Program::TYPE_VERTEX:
1249          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1250
1251          if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
1252              name != TGSI_SEMANTIC_TESSOUTER)
1253             info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1254
1255          switch (name) {
1256          case TGSI_SEMANTIC_CLIPDIST:
1257             info->io.genUserClip = -1;
1258             break;
1259          case TGSI_SEMANTIC_CLIPVERTEX:
1260             clipVertexOutput = vary;
1261             break;
1262          case TGSI_SEMANTIC_EDGEFLAG:
1263             info->io.edgeFlagOut = vary;
1264             break;
1265          case TGSI_SEMANTIC_POSITION:
1266             if (clipVertexOutput < 0)
1267                clipVertexOutput = vary;
1268             break;
1269          default:
1270             break;
1271          }
1272          break;
1273       default:
1274          ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1275          return false;
1276       }
1277
1278       for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1279          info->out[vary].id = vary;
1280          info->out[vary].patch = var->data.patch;
1281          info->out[vary].sn = name;
1282          info->out[vary].si = index + i;
1283          if (glsl_base_type_is_64bit(type->without_array()->base_type))
1284             if (i & 0x1)
1285                info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1286             else
1287                info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1288          else
1289             info->out[vary].mask |= ((1 << comp) - 1) << frac;
1290
1291          if (nir->info.outputs_read & 1ll << slot)
1292             info->out[vary].oread = 1;
1293       }
1294       info->numOutputs = std::max<uint8_t>(info->numOutputs, vary);
1295    }
1296
1297    info->numSysVals = 0;
1298    for (uint8_t i = 0; i < 64; ++i) {
1299       if (!(nir->info.system_values_read & 1ll << i))
1300          continue;
1301
1302       system_val_to_tgsi_semantic(i, &name, &index);
1303       info->sv[info->numSysVals].sn = name;
1304       info->sv[info->numSysVals].si = index;
1305       info->sv[info->numSysVals].input = 0; // TODO inferSysValDirection(sn);
1306
1307       switch (i) {
1308       case SYSTEM_VALUE_INSTANCE_ID:
1309          info->io.instanceId = info->numSysVals;
1310          break;
1311       case SYSTEM_VALUE_TESS_LEVEL_INNER:
1312       case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1313          info->sv[info->numSysVals].patch = 1;
1314          break;
1315       case SYSTEM_VALUE_VERTEX_ID:
1316          info->io.vertexId = info->numSysVals;
1317          break;
1318       default:
1319          break;
1320       }
1321
1322       info->numSysVals += 1;
1323    }
1324
1325    if (info->io.genUserClip > 0) {
1326       info->io.clipDistances = info->io.genUserClip;
1327
1328       const unsigned int nOut = (info->io.genUserClip + 3) / 4;
1329
1330       for (unsigned int n = 0; n < nOut; ++n) {
1331          unsigned int i = info->numOutputs++;
1332          info->out[i].id = i;
1333          info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1334          info->out[i].si = n;
1335          info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
1336       }
1337    }
1338
1339    return info->assignSlots(info) == 0;
1340 }
1341
1342 uint32_t
1343 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
1344 {
1345    DataType ty;
1346    int offset = nir_intrinsic_component(insn);
1347    bool input;
1348
1349    if (nir_intrinsic_infos[insn->intrinsic].has_dest)
1350       ty = getDType(insn);
1351    else
1352       ty = getSType(insn->src[0], false, false);
1353
1354    switch (insn->intrinsic) {
1355    case nir_intrinsic_load_input:
1356    case nir_intrinsic_load_interpolated_input:
1357    case nir_intrinsic_load_per_vertex_input:
1358       input = true;
1359       break;
1360    case nir_intrinsic_load_output:
1361    case nir_intrinsic_load_per_vertex_output:
1362    case nir_intrinsic_store_output:
1363    case nir_intrinsic_store_per_vertex_output:
1364       input = false;
1365       break;
1366    default:
1367       ERROR("unknown intrinsic in getSlotAddress %s",
1368             nir_intrinsic_infos[insn->intrinsic].name);
1369       input = false;
1370       assert(false);
1371       break;
1372    }
1373
1374    if (typeSizeof(ty) == 8) {
1375       slot *= 2;
1376       slot += offset;
1377       if (slot >= 4) {
1378          idx += 1;
1379          slot -= 4;
1380       }
1381    } else {
1382       slot += offset;
1383    }
1384
1385    assert(slot < 4);
1386    assert(!input || idx < PIPE_MAX_SHADER_INPUTS);
1387    assert(input || idx < PIPE_MAX_SHADER_OUTPUTS);
1388
1389    const nv50_ir_varying *vary = input ? info->in : info->out;
1390    return vary[idx].slot[slot] * 4;
1391 }
1392
1393 Instruction *
1394 Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
1395                     uint32_t base, uint8_t c, Value *indirect0,
1396                     Value *indirect1, bool patch)
1397 {
1398    unsigned int tySize = typeSizeof(ty);
1399
1400    if (tySize == 8 &&
1401        (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) {
1402       Value *lo = getSSA();
1403       Value *hi = getSSA();
1404
1405       Instruction *loi =
1406          mkLoad(TYPE_U32, lo,
1407                 mkSymbol(file, i, TYPE_U32, base + c * tySize),
1408                 indirect0);
1409       loi->setIndirect(0, 1, indirect1);
1410       loi->perPatch = patch;
1411
1412       Instruction *hii =
1413          mkLoad(TYPE_U32, hi,
1414                 mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
1415                 indirect0);
1416       hii->setIndirect(0, 1, indirect1);
1417       hii->perPatch = patch;
1418
1419       return mkOp2(OP_MERGE, ty, def, lo, hi);
1420    } else {
1421       Instruction *ld =
1422          mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
1423       ld->setIndirect(0, 1, indirect1);
1424       ld->perPatch = patch;
1425       return ld;
1426    }
1427 }
1428
1429 void
1430 Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
1431                    DataType ty, Value *src, uint8_t idx, uint8_t c,
1432                    Value *indirect0, Value *indirect1)
1433 {
1434    uint8_t size = typeSizeof(ty);
1435    uint32_t address = getSlotAddress(insn, idx, c);
1436
1437    if (size == 8 && indirect0) {
1438       Value *split[2];
1439       mkSplit(split, 4, src);
1440
1441       if (op == OP_EXPORT) {
1442          split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
1443          split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
1444       }
1445
1446       mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
1447               split[0])->perPatch = info->out[idx].patch;
1448       mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,
1449               split[1])->perPatch = info->out[idx].patch;
1450    } else {
1451       if (op == OP_EXPORT)
1452          src = mkMov(getSSA(size), src, ty)->getDef(0);
1453       mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
1454               src)->perPatch = info->out[idx].patch;
1455    }
1456 }
1457
1458 bool
1459 Converter::parseNIR()
1460 {
1461    info->bin.tlsSpace = 0;
1462    info->io.clipDistances = nir->info.clip_distance_array_size;
1463    info->io.cullDistances = nir->info.cull_distance_array_size;
1464
1465    switch(prog->getType()) {
1466    case Program::TYPE_COMPUTE:
1467       info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
1468       info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
1469       info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
1470       info->bin.smemSize = nir->info.cs.shared_size;
1471       break;
1472    case Program::TYPE_FRAGMENT:
1473       info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
1474       info->prop.fp.persampleInvocation =
1475          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) ||
1476          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1477       info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
1478       info->prop.fp.readsSampleLocations =
1479          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1480       info->prop.fp.usesDiscard = nir->info.fs.uses_discard;
1481       info->prop.fp.usesSampleMaskIn =
1482          !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN);
1483       break;
1484    case Program::TYPE_GEOMETRY:
1485       info->prop.gp.inputPrim = nir->info.gs.input_primitive;
1486       info->prop.gp.instanceCount = nir->info.gs.invocations;
1487       info->prop.gp.maxVertices = nir->info.gs.vertices_out;
1488       info->prop.gp.outputPrim = nir->info.gs.output_primitive;
1489       break;
1490    case Program::TYPE_TESSELLATION_CONTROL:
1491    case Program::TYPE_TESSELLATION_EVAL:
1492       if (nir->info.tess.primitive_mode == GL_ISOLINES)
1493          info->prop.tp.domain = GL_LINES;
1494       else
1495          info->prop.tp.domain = nir->info.tess.primitive_mode;
1496       info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
1497       info->prop.tp.outputPrim =
1498          nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
1499       info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
1500       info->prop.tp.winding = !nir->info.tess.ccw;
1501       break;
1502    case Program::TYPE_VERTEX:
1503       info->prop.vp.usesDrawParameters =
1504          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) ||
1505          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) ||
1506          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID));
1507       break;
1508    default:
1509       break;
1510    }
1511
1512    return true;
1513 }
1514
1515 bool
1516 Converter::visit(nir_function *function)
1517 {
1518    // we only support emiting the main function for now
1519    assert(!strcmp(function->name, "main"));
1520    assert(function->impl);
1521
1522    // usually the blocks will set everything up, but main is special
1523    BasicBlock *entry = new BasicBlock(prog->main);
1524    exit = new BasicBlock(prog->main);
1525    blocks[nir_start_block(function->impl)->index] = entry;
1526    prog->main->setEntry(entry);
1527    prog->main->setExit(exit);
1528
1529    setPosition(entry, true);
1530
1531    if (info->io.genUserClip > 0) {
1532       for (int c = 0; c < 4; ++c)
1533          clipVtx[c] = getScratch();
1534    }
1535
1536    switch (prog->getType()) {
1537    case Program::TYPE_TESSELLATION_CONTROL:
1538       outBase = mkOp2v(
1539          OP_SUB, TYPE_U32, getSSA(),
1540          mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
1541          mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
1542       break;
1543    case Program::TYPE_FRAGMENT: {
1544       Symbol *sv = mkSysVal(SV_POSITION, 3);
1545       fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
1546       fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
1547       break;
1548    }
1549    default:
1550       break;
1551    }
1552
1553    nir_foreach_register(reg, &function->impl->registers) {
1554       if (reg->num_array_elems) {
1555          // TODO: packed variables would be nice, but MemoryOpt fails
1556          // replace 4 with reg->num_components
1557          uint32_t size = 4 * reg->num_array_elems * (reg->bit_size / 8);
1558          regToLmemOffset[reg->index] = info->bin.tlsSpace;
1559          info->bin.tlsSpace += size;
1560       }
1561    }
1562
1563    nir_index_ssa_defs(function->impl);
1564    foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
1565       if (!visit(node))
1566          return false;
1567    }
1568
1569    bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
1570    setPosition(exit, true);
1571
1572    if ((prog->getType() == Program::TYPE_VERTEX ||
1573         prog->getType() == Program::TYPE_TESSELLATION_EVAL)
1574        && info->io.genUserClip > 0)
1575       handleUserClipPlanes();
1576
1577    // TODO: for non main function this needs to be a OP_RETURN
1578    mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1579    return true;
1580 }
1581
1582 bool
1583 Converter::visit(nir_cf_node *node)
1584 {
1585    switch (node->type) {
1586    case nir_cf_node_block:
1587       return visit(nir_cf_node_as_block(node));
1588    case nir_cf_node_if:
1589       return visit(nir_cf_node_as_if(node));
1590    case nir_cf_node_loop:
1591       return visit(nir_cf_node_as_loop(node));
1592    default:
1593       ERROR("unknown nir_cf_node type %u\n", node->type);
1594       return false;
1595    }
1596 }
1597
1598 bool
1599 Converter::visit(nir_block *block)
1600 {
1601    if (!block->predecessors->entries && block->instr_list.is_empty())
1602       return true;
1603
1604    BasicBlock *bb = convert(block);
1605
1606    setPosition(bb, true);
1607    nir_foreach_instr(insn, block) {
1608       if (!visit(insn))
1609          return false;
1610    }
1611    return true;
1612 }
1613
1614 bool
1615 Converter::visit(nir_if *nif)
1616 {
1617    DataType sType = getSType(nif->condition, false, false);
1618    Value *src = getSrc(&nif->condition, 0);
1619
1620    nir_block *lastThen = nir_if_last_then_block(nif);
1621    nir_block *lastElse = nir_if_last_else_block(nif);
1622
1623    assert(!lastThen->successors[1]);
1624    assert(!lastElse->successors[1]);
1625
1626    BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
1627    BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
1628
1629    bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
1630    bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
1631
1632    // we only insert joinats, if both nodes end up at the end of the if again.
1633    // the reason for this to not happens are breaks/continues/ret/... which
1634    // have their own handling
1635    if (lastThen->successors[0] == lastElse->successors[0])
1636       bb->joinAt = mkFlow(OP_JOINAT, convert(lastThen->successors[0]),
1637                           CC_ALWAYS, NULL);
1638
1639    mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType);
1640
1641    foreach_list_typed(nir_cf_node, node, node, &nif->then_list) {
1642       if (!visit(node))
1643          return false;
1644    }
1645    setPosition(convert(lastThen), true);
1646    if (!bb->getExit() ||
1647        !bb->getExit()->asFlow() ||
1648         bb->getExit()->asFlow()->op == OP_JOIN) {
1649       BasicBlock *tailBB = convert(lastThen->successors[0]);
1650       mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1651       bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1652    }
1653
1654    foreach_list_typed(nir_cf_node, node, node, &nif->else_list) {
1655       if (!visit(node))
1656          return false;
1657    }
1658    setPosition(convert(lastElse), true);
1659    if (!bb->getExit() ||
1660        !bb->getExit()->asFlow() ||
1661         bb->getExit()->asFlow()->op == OP_JOIN) {
1662       BasicBlock *tailBB = convert(lastElse->successors[0]);
1663       mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1664       bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1665    }
1666
1667    if (lastThen->successors[0] == lastElse->successors[0]) {
1668       setPosition(convert(lastThen->successors[0]), true);
1669       mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
1670    }
1671
1672    return true;
1673 }
1674
1675 bool
1676 Converter::visit(nir_loop *loop)
1677 {
1678    curLoopDepth += 1;
1679    func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth);
1680
1681    BasicBlock *loopBB = convert(nir_loop_first_block(loop));
1682    BasicBlock *tailBB =
1683       convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
1684    bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE);
1685
1686    mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL);
1687    setPosition(loopBB, false);
1688    mkFlow(OP_PRECONT, loopBB, CC_ALWAYS, NULL);
1689
1690    foreach_list_typed(nir_cf_node, node, node, &loop->body) {
1691       if (!visit(node))
1692          return false;
1693    }
1694    Instruction *insn = bb->getExit();
1695    if (bb->cfg.incidentCount() != 0) {
1696       if (!insn || !insn->asFlow()) {
1697          mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
1698          bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
1699       } else if (insn && insn->op == OP_BRA && !insn->getPredicate() &&
1700                  tailBB->cfg.incidentCount() == 0) {
1701          // RA doesn't like having blocks around with no incident edge,
1702          // so we create a fake one to make it happy
1703          bb->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);
1704       }
1705    }
1706
1707    curLoopDepth -= 1;
1708
1709    return true;
1710 }
1711
1712 bool
1713 Converter::visit(nir_instr *insn)
1714 {
1715    // we need an insertion point for on the fly generated immediate loads
1716    immInsertPos = bb->getExit();
1717    switch (insn->type) {
1718    case nir_instr_type_alu:
1719       return visit(nir_instr_as_alu(insn));
1720    case nir_instr_type_deref:
1721       return visit(nir_instr_as_deref(insn));
1722    case nir_instr_type_intrinsic:
1723       return visit(nir_instr_as_intrinsic(insn));
1724    case nir_instr_type_jump:
1725       return visit(nir_instr_as_jump(insn));
1726    case nir_instr_type_load_const:
1727       return visit(nir_instr_as_load_const(insn));
1728    case nir_instr_type_ssa_undef:
1729       return visit(nir_instr_as_ssa_undef(insn));
1730    case nir_instr_type_tex:
1731       return visit(nir_instr_as_tex(insn));
1732    default:
1733       ERROR("unknown nir_instr type %u\n", insn->type);
1734       return false;
1735    }
1736    return true;
1737 }
1738
1739 SVSemantic
1740 Converter::convert(nir_intrinsic_op intr)
1741 {
1742    switch (intr) {
1743    case nir_intrinsic_load_base_vertex:
1744       return SV_BASEVERTEX;
1745    case nir_intrinsic_load_base_instance:
1746       return SV_BASEINSTANCE;
1747    case nir_intrinsic_load_draw_id:
1748       return SV_DRAWID;
1749    case nir_intrinsic_load_front_face:
1750       return SV_FACE;
1751    case nir_intrinsic_load_helper_invocation:
1752       return SV_THREAD_KILL;
1753    case nir_intrinsic_load_instance_id:
1754       return SV_INSTANCE_ID;
1755    case nir_intrinsic_load_invocation_id:
1756       return SV_INVOCATION_ID;
1757    case nir_intrinsic_load_local_group_size:
1758       return SV_NTID;
1759    case nir_intrinsic_load_local_invocation_id:
1760       return SV_TID;
1761    case nir_intrinsic_load_num_work_groups:
1762       return SV_NCTAID;
1763    case nir_intrinsic_load_patch_vertices_in:
1764       return SV_VERTEX_COUNT;
1765    case nir_intrinsic_load_primitive_id:
1766       return SV_PRIMITIVE_ID;
1767    case nir_intrinsic_load_sample_id:
1768       return SV_SAMPLE_INDEX;
1769    case nir_intrinsic_load_sample_mask_in:
1770       return SV_SAMPLE_MASK;
1771    case nir_intrinsic_load_sample_pos:
1772       return SV_SAMPLE_POS;
1773    case nir_intrinsic_load_subgroup_eq_mask:
1774       return SV_LANEMASK_EQ;
1775    case nir_intrinsic_load_subgroup_ge_mask:
1776       return SV_LANEMASK_GE;
1777    case nir_intrinsic_load_subgroup_gt_mask:
1778       return SV_LANEMASK_GT;
1779    case nir_intrinsic_load_subgroup_le_mask:
1780       return SV_LANEMASK_LE;
1781    case nir_intrinsic_load_subgroup_lt_mask:
1782       return SV_LANEMASK_LT;
1783    case nir_intrinsic_load_subgroup_invocation:
1784       return SV_LANEID;
1785    case nir_intrinsic_load_tess_coord:
1786       return SV_TESS_COORD;
1787    case nir_intrinsic_load_tess_level_inner:
1788       return SV_TESS_INNER;
1789    case nir_intrinsic_load_tess_level_outer:
1790       return SV_TESS_OUTER;
1791    case nir_intrinsic_load_vertex_id:
1792       return SV_VERTEX_ID;
1793    case nir_intrinsic_load_work_group_id:
1794       return SV_CTAID;
1795    default:
1796       ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1797             nir_intrinsic_infos[intr].name);
1798       assert(false);
1799       return SV_LAST;
1800    }
1801 }
1802
1803 ImgFormat
1804 Converter::convertGLImgFormat(GLuint format)
1805 {
1806 #define FMT_CASE(a, b) \
1807   case GL_ ## a: return nv50_ir::FMT_ ## b
1808
1809    switch (format) {
1810    FMT_CASE(NONE, NONE);
1811
1812    FMT_CASE(RGBA32F, RGBA32F);
1813    FMT_CASE(RGBA16F, RGBA16F);
1814    FMT_CASE(RG32F, RG32F);
1815    FMT_CASE(RG16F, RG16F);
1816    FMT_CASE(R11F_G11F_B10F, R11G11B10F);
1817    FMT_CASE(R32F, R32F);
1818    FMT_CASE(R16F, R16F);
1819
1820    FMT_CASE(RGBA32UI, RGBA32UI);
1821    FMT_CASE(RGBA16UI, RGBA16UI);
1822    FMT_CASE(RGB10_A2UI, RGB10A2UI);
1823    FMT_CASE(RGBA8UI, RGBA8UI);
1824    FMT_CASE(RG32UI, RG32UI);
1825    FMT_CASE(RG16UI, RG16UI);
1826    FMT_CASE(RG8UI, RG8UI);
1827    FMT_CASE(R32UI, R32UI);
1828    FMT_CASE(R16UI, R16UI);
1829    FMT_CASE(R8UI, R8UI);
1830
1831    FMT_CASE(RGBA32I, RGBA32I);
1832    FMT_CASE(RGBA16I, RGBA16I);
1833    FMT_CASE(RGBA8I, RGBA8I);
1834    FMT_CASE(RG32I, RG32I);
1835    FMT_CASE(RG16I, RG16I);
1836    FMT_CASE(RG8I, RG8I);
1837    FMT_CASE(R32I, R32I);
1838    FMT_CASE(R16I, R16I);
1839    FMT_CASE(R8I, R8I);
1840
1841    FMT_CASE(RGBA16, RGBA16);
1842    FMT_CASE(RGB10_A2, RGB10A2);
1843    FMT_CASE(RGBA8, RGBA8);
1844    FMT_CASE(RG16, RG16);
1845    FMT_CASE(RG8, RG8);
1846    FMT_CASE(R16, R16);
1847    FMT_CASE(R8, R8);
1848
1849    FMT_CASE(RGBA16_SNORM, RGBA16_SNORM);
1850    FMT_CASE(RGBA8_SNORM, RGBA8_SNORM);
1851    FMT_CASE(RG16_SNORM, RG16_SNORM);
1852    FMT_CASE(RG8_SNORM, RG8_SNORM);
1853    FMT_CASE(R16_SNORM, R16_SNORM);
1854    FMT_CASE(R8_SNORM, R8_SNORM);
1855
1856    FMT_CASE(BGRA_INTEGER, BGRA8);
1857    default:
1858       ERROR("unknown format %x\n", format);
1859       assert(false);
1860       return nv50_ir::FMT_NONE;
1861    }
1862 #undef FMT_CASE
1863 }
1864
1865 bool
1866 Converter::visit(nir_intrinsic_instr *insn)
1867 {
1868    nir_intrinsic_op op = insn->intrinsic;
1869    const nir_intrinsic_info &opInfo = nir_intrinsic_infos[op];
1870
1871    switch (op) {
1872    case nir_intrinsic_load_uniform: {
1873       LValues &newDefs = convert(&insn->dest);
1874       const DataType dType = getDType(insn);
1875       Value *indirect;
1876       uint32_t coffset = getIndirect(insn, 0, 0, indirect);
1877       for (uint8_t i = 0; i < insn->num_components; ++i) {
1878          loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect);
1879       }
1880       break;
1881    }
1882    case nir_intrinsic_store_output:
1883    case nir_intrinsic_store_per_vertex_output: {
1884       Value *indirect;
1885       DataType dType = getSType(insn->src[0], false, false);
1886       uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);
1887
1888       for (uint8_t i = 0u; i < insn->num_components; ++i) {
1889          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
1890             continue;
1891
1892          uint8_t offset = 0;
1893          Value *src = getSrc(&insn->src[0], i);
1894          switch (prog->getType()) {
1895          case Program::TYPE_FRAGMENT: {
1896             if (info->out[idx].sn == TGSI_SEMANTIC_POSITION) {
1897                // TGSI uses a different interface than NIR, TGSI stores that
1898                // value in the z component, NIR in X
1899                offset += 2;
1900                src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
1901             }
1902             break;
1903          }
1904          case Program::TYPE_GEOMETRY:
1905          case Program::TYPE_VERTEX: {
1906             if (info->io.genUserClip > 0 && idx == clipVertexOutput) {
1907                mkMov(clipVtx[i], src);
1908                src = clipVtx[i];
1909             }
1910             break;
1911          }
1912          default:
1913             break;
1914          }
1915
1916          storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + offset, indirect);
1917       }
1918       break;
1919    }
1920    case nir_intrinsic_load_input:
1921    case nir_intrinsic_load_interpolated_input:
1922    case nir_intrinsic_load_output: {
1923       LValues &newDefs = convert(&insn->dest);
1924
1925       // FBFetch
1926       if (prog->getType() == Program::TYPE_FRAGMENT &&
1927           op == nir_intrinsic_load_output) {
1928          std::vector<Value*> defs, srcs;
1929          uint8_t mask = 0;
1930
1931          srcs.push_back(getSSA());
1932          srcs.push_back(getSSA());
1933          Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0));
1934          Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1));
1935          mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
1936          mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
1937
1938          srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
1939          srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
1940
1941          for (uint8_t i = 0u; i < insn->num_components; ++i) {
1942             defs.push_back(newDefs[i]);
1943             mask |= 1 << i;
1944          }
1945
1946          TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs);
1947          texi->tex.levelZero = 1;
1948          texi->tex.mask = mask;
1949          texi->tex.useOffsets = 0;
1950          texi->tex.r = 0xffff;
1951          texi->tex.s = 0xffff;
1952
1953          info->prop.fp.readsFramebuffer = true;
1954          break;
1955       }
1956
1957       const DataType dType = getDType(insn);
1958       Value *indirect;
1959       bool input = op != nir_intrinsic_load_output;
1960       operation nvirOp;
1961       uint32_t mode = 0;
1962
1963       uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect);
1964       nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx];
1965
1966       // see load_barycentric_* handling
1967       if (prog->getType() == Program::TYPE_FRAGMENT) {
1968          mode = translateInterpMode(&vary, nvirOp);
1969          if (op == nir_intrinsic_load_interpolated_input) {
1970             ImmediateValue immMode;
1971             if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode))
1972                mode |= immMode.reg.data.u32;
1973          }
1974       }
1975
1976       for (uint8_t i = 0u; i < insn->num_components; ++i) {
1977          uint32_t address = getSlotAddress(insn, idx, i);
1978          Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);
1979          if (prog->getType() == Program::TYPE_FRAGMENT) {
1980             int s = 1;
1981             if (typeSizeof(dType) == 8) {
1982                Value *lo = getSSA();
1983                Value *hi = getSSA();
1984                Instruction *interp;
1985
1986                interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
1987                if (nvirOp == OP_PINTERP)
1988                   interp->setSrc(s++, fp.position);
1989                if (mode & NV50_IR_INTERP_OFFSET)
1990                   interp->setSrc(s++, getSrc(&insn->src[0], 0));
1991                interp->setInterpolate(mode);
1992                interp->setIndirect(0, 0, indirect);
1993
1994                Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4);
1995                interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
1996                if (nvirOp == OP_PINTERP)
1997                   interp->setSrc(s++, fp.position);
1998                if (mode & NV50_IR_INTERP_OFFSET)
1999                   interp->setSrc(s++, getSrc(&insn->src[0], 0));
2000                interp->setInterpolate(mode);
2001                interp->setIndirect(0, 0, indirect);
2002
2003                mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
2004             } else {
2005                Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
2006                if (nvirOp == OP_PINTERP)
2007                   interp->setSrc(s++, fp.position);
2008                if (mode & NV50_IR_INTERP_OFFSET)
2009                   interp->setSrc(s++, getSrc(&insn->src[0], 0));
2010                interp->setInterpolate(mode);
2011                interp->setIndirect(0, 0, indirect);
2012             }
2013          } else {
2014             mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
2015          }
2016       }
2017       break;
2018    }
2019    case nir_intrinsic_load_barycentric_at_offset:
2020    case nir_intrinsic_load_barycentric_at_sample:
2021    case nir_intrinsic_load_barycentric_centroid:
2022    case nir_intrinsic_load_barycentric_pixel:
2023    case nir_intrinsic_load_barycentric_sample: {
2024       LValues &newDefs = convert(&insn->dest);
2025       uint32_t mode;
2026
2027       if (op == nir_intrinsic_load_barycentric_centroid ||
2028           op == nir_intrinsic_load_barycentric_sample) {
2029          mode = NV50_IR_INTERP_CENTROID;
2030       } else if (op == nir_intrinsic_load_barycentric_at_offset) {
2031          Value *offs[2];
2032          for (uint8_t c = 0; c < 2; c++) {
2033             offs[c] = getScratch();
2034             mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(&insn->src[0], c), loadImm(NULL, 0.4375f));
2035             mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
2036             mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
2037             mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
2038          }
2039          mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]);
2040
2041          mode = NV50_IR_INTERP_OFFSET;
2042       } else if (op == nir_intrinsic_load_barycentric_pixel) {
2043          mode = NV50_IR_INTERP_DEFAULT;
2044       } else if (op == nir_intrinsic_load_barycentric_at_sample) {
2045          info->prop.fp.readsSampleLocations = true;
2046          mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], getSrc(&insn->src[0], 0))->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
2047          mode = NV50_IR_INTERP_OFFSET;
2048       } else {
2049          unreachable("all intrinsics already handled above");
2050       }
2051
2052       loadImm(newDefs[1], mode);
2053       break;
2054    }
2055    case nir_intrinsic_discard:
2056       mkOp(OP_DISCARD, TYPE_NONE, NULL);
2057       break;
2058    case nir_intrinsic_discard_if: {
2059       Value *pred = getSSA(1, FILE_PREDICATE);
2060       if (insn->num_components > 1) {
2061          ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
2062          assert(false);
2063          return false;
2064       }
2065       mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2066       mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
2067       break;
2068    }
2069    case nir_intrinsic_load_base_vertex:
2070    case nir_intrinsic_load_base_instance:
2071    case nir_intrinsic_load_draw_id:
2072    case nir_intrinsic_load_front_face:
2073    case nir_intrinsic_load_helper_invocation:
2074    case nir_intrinsic_load_instance_id:
2075    case nir_intrinsic_load_invocation_id:
2076    case nir_intrinsic_load_local_group_size:
2077    case nir_intrinsic_load_local_invocation_id:
2078    case nir_intrinsic_load_num_work_groups:
2079    case nir_intrinsic_load_patch_vertices_in:
2080    case nir_intrinsic_load_primitive_id:
2081    case nir_intrinsic_load_sample_id:
2082    case nir_intrinsic_load_sample_mask_in:
2083    case nir_intrinsic_load_sample_pos:
2084    case nir_intrinsic_load_subgroup_eq_mask:
2085    case nir_intrinsic_load_subgroup_ge_mask:
2086    case nir_intrinsic_load_subgroup_gt_mask:
2087    case nir_intrinsic_load_subgroup_le_mask:
2088    case nir_intrinsic_load_subgroup_lt_mask:
2089    case nir_intrinsic_load_subgroup_invocation:
2090    case nir_intrinsic_load_tess_coord:
2091    case nir_intrinsic_load_tess_level_inner:
2092    case nir_intrinsic_load_tess_level_outer:
2093    case nir_intrinsic_load_vertex_id:
2094    case nir_intrinsic_load_work_group_id: {
2095       const DataType dType = getDType(insn);
2096       SVSemantic sv = convert(op);
2097       LValues &newDefs = convert(&insn->dest);
2098
2099       for (uint8_t i = 0u; i < insn->num_components; ++i) {
2100          Value *def;
2101          if (typeSizeof(dType) == 8)
2102             def = getSSA();
2103          else
2104             def = newDefs[i];
2105
2106          if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
2107             loadImm(def, 0u);
2108          } else {
2109             Symbol *sym = mkSysVal(sv, i);
2110             Instruction *rdsv = mkOp1(OP_RDSV, TYPE_U32, def, sym);
2111             if (sv == SV_TESS_OUTER || sv == SV_TESS_INNER)
2112                rdsv->perPatch = 1;
2113          }
2114
2115          if (typeSizeof(dType) == 8)
2116             mkOp2(OP_MERGE, dType, newDefs[i], def, loadImm(getSSA(), 0u));
2117       }
2118       break;
2119    }
2120    // constants
2121    case nir_intrinsic_load_subgroup_size: {
2122       LValues &newDefs = convert(&insn->dest);
2123       loadImm(newDefs[0], 32u);
2124       break;
2125    }
2126    case nir_intrinsic_vote_all:
2127    case nir_intrinsic_vote_any:
2128    case nir_intrinsic_vote_ieq: {
2129       LValues &newDefs = convert(&insn->dest);
2130       Value *pred = getScratch(1, FILE_PREDICATE);
2131       mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2132       mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op);
2133       mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred);
2134       break;
2135    }
2136    case nir_intrinsic_ballot: {
2137       LValues &newDefs = convert(&insn->dest);
2138       Value *pred = getSSA(1, FILE_PREDICATE);
2139       mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2140       mkOp1(OP_VOTE, TYPE_U32, newDefs[0], pred)->subOp = NV50_IR_SUBOP_VOTE_ANY;
2141       break;
2142    }
2143    case nir_intrinsic_read_first_invocation:
2144    case nir_intrinsic_read_invocation: {
2145       LValues &newDefs = convert(&insn->dest);
2146       const DataType dType = getDType(insn);
2147       Value *tmp = getScratch();
2148
2149       if (op == nir_intrinsic_read_first_invocation) {
2150          mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
2151          mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2152          mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
2153       } else
2154          tmp = getSrc(&insn->src[1], 0);
2155
2156       for (uint8_t i = 0; i < insn->num_components; ++i) {
2157          mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f))
2158             ->subOp = NV50_IR_SUBOP_SHFL_IDX;
2159       }
2160       break;
2161    }
2162    case nir_intrinsic_load_per_vertex_input: {
2163       const DataType dType = getDType(insn);
2164       LValues &newDefs = convert(&insn->dest);
2165       Value *indirectVertex;
2166       Value *indirectOffset;
2167       uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
2168       uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
2169
2170       Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
2171                               mkImm(baseVertex), indirectVertex);
2172       for (uint8_t i = 0u; i < insn->num_components; ++i) {
2173          uint32_t address = getSlotAddress(insn, idx, i);
2174          loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0,
2175                   indirectOffset, vtxBase, info->in[idx].patch);
2176       }
2177       break;
2178    }
2179    case nir_intrinsic_load_per_vertex_output: {
2180       const DataType dType = getDType(insn);
2181       LValues &newDefs = convert(&insn->dest);
2182       Value *indirectVertex;
2183       Value *indirectOffset;
2184       uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
2185       uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
2186       Value *vtxBase = NULL;
2187
2188       if (indirectVertex)
2189          vtxBase = indirectVertex;
2190       else
2191          vtxBase = loadImm(NULL, baseVertex);
2192
2193       vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, vtxBase);
2194
2195       for (uint8_t i = 0u; i < insn->num_components; ++i) {
2196          uint32_t address = getSlotAddress(insn, idx, i);
2197          loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0,
2198                   indirectOffset, vtxBase, info->in[idx].patch);
2199       }
2200       break;
2201    }
2202    case nir_intrinsic_emit_vertex:
2203       if (info->io.genUserClip > 0)
2204          handleUserClipPlanes();
2205       // fallthrough
2206    case nir_intrinsic_end_primitive: {
2207       uint32_t idx = nir_intrinsic_stream_id(insn);
2208       mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
2209       break;
2210    }
2211    case nir_intrinsic_load_ubo: {
2212       const DataType dType = getDType(insn);
2213       LValues &newDefs = convert(&insn->dest);
2214       Value *indirectIndex;
2215       Value *indirectOffset;
2216       uint32_t index = getIndirect(&insn->src[0], 0, indirectIndex) + 1;
2217       uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2218
2219       for (uint8_t i = 0u; i < insn->num_components; ++i) {
2220          loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i,
2221                   indirectOffset, indirectIndex);
2222       }
2223       break;
2224    }
2225    case nir_intrinsic_get_buffer_size: {
2226       LValues &newDefs = convert(&insn->dest);
2227       const DataType dType = getDType(insn);
2228       Value *indirectBuffer;
2229       uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2230
2231       Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, 0);
2232       mkOp1(OP_BUFQ, dType, newDefs[0], sym)->setIndirect(0, 0, indirectBuffer);
2233       break;
2234    }
2235    case nir_intrinsic_store_ssbo: {
2236       DataType sType = getSType(insn->src[0], false, false);
2237       Value *indirectBuffer;
2238       Value *indirectOffset;
2239       uint32_t buffer = getIndirect(&insn->src[1], 0, indirectBuffer);
2240       uint32_t offset = getIndirect(&insn->src[2], 0, indirectOffset);
2241
2242       for (uint8_t i = 0u; i < insn->num_components; ++i) {
2243          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2244             continue;
2245          Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType,
2246                                 offset + i * typeSizeof(sType));
2247          mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i))
2248             ->setIndirect(0, 1, indirectBuffer);
2249       }
2250       info->io.globalAccess |= 0x2;
2251       break;
2252    }
2253    case nir_intrinsic_load_ssbo: {
2254       const DataType dType = getDType(insn);
2255       LValues &newDefs = convert(&insn->dest);
2256       Value *indirectBuffer;
2257       Value *indirectOffset;
2258       uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2259       uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2260
2261       for (uint8_t i = 0u; i < insn->num_components; ++i)
2262          loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i,
2263                   indirectOffset, indirectBuffer);
2264
2265       info->io.globalAccess |= 0x1;
2266       break;
2267    }
2268    case nir_intrinsic_shared_atomic_add:
2269    case nir_intrinsic_shared_atomic_and:
2270    case nir_intrinsic_shared_atomic_comp_swap:
2271    case nir_intrinsic_shared_atomic_exchange:
2272    case nir_intrinsic_shared_atomic_or:
2273    case nir_intrinsic_shared_atomic_imax:
2274    case nir_intrinsic_shared_atomic_imin:
2275    case nir_intrinsic_shared_atomic_umax:
2276    case nir_intrinsic_shared_atomic_umin:
2277    case nir_intrinsic_shared_atomic_xor: {
2278       const DataType dType = getDType(insn);
2279       LValues &newDefs = convert(&insn->dest);
2280       Value *indirectOffset;
2281       uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2282       Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, dType, offset);
2283       Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
2284       if (op == nir_intrinsic_shared_atomic_comp_swap)
2285          atom->setSrc(2, getSrc(&insn->src[2], 0));
2286       atom->setIndirect(0, 0, indirectOffset);
2287       atom->subOp = getSubOp(op);
2288       break;
2289    }
2290    case nir_intrinsic_ssbo_atomic_add:
2291    case nir_intrinsic_ssbo_atomic_and:
2292    case nir_intrinsic_ssbo_atomic_comp_swap:
2293    case nir_intrinsic_ssbo_atomic_exchange:
2294    case nir_intrinsic_ssbo_atomic_or:
2295    case nir_intrinsic_ssbo_atomic_imax:
2296    case nir_intrinsic_ssbo_atomic_imin:
2297    case nir_intrinsic_ssbo_atomic_umax:
2298    case nir_intrinsic_ssbo_atomic_umin:
2299    case nir_intrinsic_ssbo_atomic_xor: {
2300       const DataType dType = getDType(insn);
2301       LValues &newDefs = convert(&insn->dest);
2302       Value *indirectBuffer;
2303       Value *indirectOffset;
2304       uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2305       uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2306
2307       Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, offset);
2308       Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym,
2309                                 getSrc(&insn->src[2], 0));
2310       if (op == nir_intrinsic_ssbo_atomic_comp_swap)
2311          atom->setSrc(2, getSrc(&insn->src[3], 0));
2312       atom->setIndirect(0, 0, indirectOffset);
2313       atom->setIndirect(0, 1, indirectBuffer);
2314       atom->subOp = getSubOp(op);
2315
2316       info->io.globalAccess |= 0x2;
2317       break;
2318    }
2319    case nir_intrinsic_image_deref_atomic_add:
2320    case nir_intrinsic_image_deref_atomic_and:
2321    case nir_intrinsic_image_deref_atomic_comp_swap:
2322    case nir_intrinsic_image_deref_atomic_exchange:
2323    case nir_intrinsic_image_deref_atomic_max:
2324    case nir_intrinsic_image_deref_atomic_min:
2325    case nir_intrinsic_image_deref_atomic_or:
2326    case nir_intrinsic_image_deref_atomic_xor:
2327    case nir_intrinsic_image_deref_load:
2328    case nir_intrinsic_image_deref_samples:
2329    case nir_intrinsic_image_deref_size:
2330    case nir_intrinsic_image_deref_store: {
2331       const nir_variable *tex;
2332       std::vector<Value*> srcs, defs;
2333       Value *indirect;
2334       DataType ty;
2335
2336       uint32_t mask = 0;
2337       nir_deref_instr *deref = nir_src_as_deref(insn->src[0]);
2338       const glsl_type *type = deref->type;
2339       TexInstruction::Target target =
2340          convert((glsl_sampler_dim)type->sampler_dimensionality,
2341                  type->sampler_array, type->sampler_shadow);
2342       unsigned int argCount = getNIRArgCount(target);
2343       uint16_t location = handleDeref(deref, indirect, tex);
2344
2345       if (opInfo.has_dest) {
2346          LValues &newDefs = convert(&insn->dest);
2347          for (uint8_t i = 0u; i < newDefs.size(); ++i) {
2348             defs.push_back(newDefs[i]);
2349             mask |= 1 << i;
2350          }
2351       }
2352
2353       switch (op) {
2354       case nir_intrinsic_image_deref_atomic_add:
2355       case nir_intrinsic_image_deref_atomic_and:
2356       case nir_intrinsic_image_deref_atomic_comp_swap:
2357       case nir_intrinsic_image_deref_atomic_exchange:
2358       case nir_intrinsic_image_deref_atomic_max:
2359       case nir_intrinsic_image_deref_atomic_min:
2360       case nir_intrinsic_image_deref_atomic_or:
2361       case nir_intrinsic_image_deref_atomic_xor:
2362          ty = getDType(insn);
2363          mask = 0x1;
2364          info->io.globalAccess |= 0x2;
2365          break;
2366       case nir_intrinsic_image_deref_load:
2367          ty = TYPE_U32;
2368          info->io.globalAccess |= 0x1;
2369          break;
2370       case nir_intrinsic_image_deref_store:
2371          ty = TYPE_U32;
2372          mask = 0xf;
2373          info->io.globalAccess |= 0x2;
2374          break;
2375       case nir_intrinsic_image_deref_samples:
2376          mask = 0x8;
2377          ty = TYPE_U32;
2378          break;
2379       case nir_intrinsic_image_deref_size:
2380          ty = TYPE_U32;
2381          break;
2382       default:
2383          unreachable("unhandled image opcode");
2384          break;
2385       }
2386
2387       // coords
2388       if (opInfo.num_srcs >= 2)
2389          for (unsigned int i = 0u; i < argCount; ++i)
2390             srcs.push_back(getSrc(&insn->src[1], i));
2391
2392       // the sampler is just another src added after coords
2393       if (opInfo.num_srcs >= 3 && target.isMS())
2394          srcs.push_back(getSrc(&insn->src[2], 0));
2395
2396       if (opInfo.num_srcs >= 4) {
2397          unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
2398          for (uint8_t i = 0u; i < components; ++i)
2399             srcs.push_back(getSrc(&insn->src[3], i));
2400       }
2401
2402       if (opInfo.num_srcs >= 5)
2403          // 1 for aotmic swap
2404          for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
2405             srcs.push_back(getSrc(&insn->src[4], i));
2406
2407       TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
2408       texi->tex.bindless = false;
2409       texi->tex.format = &nv50_ir::TexInstruction::formatTable[convertGLImgFormat(tex->data.image.format)];
2410       texi->tex.mask = mask;
2411       texi->cache = getCacheModeFromVar(tex);
2412       texi->setType(ty);
2413       texi->subOp = getSubOp(op);
2414
2415       if (indirect)
2416          texi->setIndirectR(indirect);
2417
2418       break;
2419    }
2420    case nir_intrinsic_store_shared: {
2421       DataType sType = getSType(insn->src[0], false, false);
2422       Value *indirectOffset;
2423       uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2424
2425       for (uint8_t i = 0u; i < insn->num_components; ++i) {
2426          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2427             continue;
2428          Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, sType, offset + i * typeSizeof(sType));
2429          mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i));
2430       }
2431       break;
2432    }
2433    case nir_intrinsic_load_shared: {
2434       const DataType dType = getDType(insn);
2435       LValues &newDefs = convert(&insn->dest);
2436       Value *indirectOffset;
2437       uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2438
2439       for (uint8_t i = 0u; i < insn->num_components; ++i)
2440          loadFrom(FILE_MEMORY_SHARED, 0, dType, newDefs[i], offset, i, indirectOffset);
2441
2442       break;
2443    }
2444    case nir_intrinsic_barrier: {
2445       // TODO: add flag to shader_info
2446       info->numBarriers = 1;
2447       Instruction *bar = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0));
2448       bar->fixed = 1;
2449       bar->subOp = NV50_IR_SUBOP_BAR_SYNC;
2450       break;
2451    }
2452    case nir_intrinsic_group_memory_barrier:
2453    case nir_intrinsic_memory_barrier:
2454    case nir_intrinsic_memory_barrier_atomic_counter:
2455    case nir_intrinsic_memory_barrier_buffer:
2456    case nir_intrinsic_memory_barrier_image:
2457    case nir_intrinsic_memory_barrier_shared: {
2458       Instruction *bar = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
2459       bar->fixed = 1;
2460       bar->subOp = getSubOp(op);
2461       break;
2462    }
2463    case nir_intrinsic_shader_clock: {
2464       const DataType dType = getDType(insn);
2465       LValues &newDefs = convert(&insn->dest);
2466
2467       loadImm(newDefs[0], 0u);
2468       mkOp1(OP_RDSV, dType, newDefs[1], mkSysVal(SV_CLOCK, 0))->fixed = 1;
2469       break;
2470    }
2471    default:
2472       ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
2473       return false;
2474    }
2475
2476    return true;
2477 }
2478
2479 bool
2480 Converter::visit(nir_jump_instr *insn)
2481 {
2482    switch (insn->type) {
2483    case nir_jump_return:
2484       // TODO: this only works in the main function
2485       mkFlow(OP_BRA, exit, CC_ALWAYS, NULL);
2486       bb->cfg.attach(&exit->cfg, Graph::Edge::CROSS);
2487       break;
2488    case nir_jump_break:
2489    case nir_jump_continue: {
2490       bool isBreak = insn->type == nir_jump_break;
2491       nir_block *block = insn->instr.block;
2492       assert(!block->successors[1]);
2493       BasicBlock *target = convert(block->successors[0]);
2494       mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL);
2495       bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK);
2496       break;
2497    }
2498    default:
2499       ERROR("unknown nir_jump_type %u\n", insn->type);
2500       return false;
2501    }
2502
2503    return true;
2504 }
2505
2506 Value*
2507 Converter::convert(nir_load_const_instr *insn, uint8_t idx)
2508 {
2509    Value *val;
2510
2511    if (immInsertPos)
2512       setPosition(immInsertPos, true);
2513    else
2514       setPosition(bb, false);
2515
2516    switch (insn->def.bit_size) {
2517    case 64:
2518       val = loadImm(getSSA(8), insn->value.u64[idx]);
2519       break;
2520    case 32:
2521       val = loadImm(getSSA(4), insn->value.u32[idx]);
2522       break;
2523    case 16:
2524       val = loadImm(getSSA(2), insn->value.u16[idx]);
2525       break;
2526    case 8:
2527       val = loadImm(getSSA(1), insn->value.u8[idx]);
2528       break;
2529    default:
2530       unreachable("unhandled bit size!\n");
2531    }
2532    setPosition(bb, true);
2533    return val;
2534 }
2535
2536 bool
2537 Converter::visit(nir_load_const_instr *insn)
2538 {
2539    assert(insn->def.bit_size <= 64);
2540    immediates[insn->def.index] = insn;
2541    return true;
2542 }
2543
2544 #define DEFAULT_CHECKS \
2545       if (insn->dest.dest.ssa.num_components > 1) { \
2546          ERROR("nir_alu_instr only supported with 1 component!\n"); \
2547          return false; \
2548       } \
2549       if (insn->dest.write_mask != 1) { \
2550          ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
2551          return false; \
2552       }
2553 bool
2554 Converter::visit(nir_alu_instr *insn)
2555 {
2556    const nir_op op = insn->op;
2557    const nir_op_info &info = nir_op_infos[op];
2558    DataType dType = getDType(insn);
2559    const std::vector<DataType> sTypes = getSTypes(insn);
2560
2561    Instruction *oldPos = this->bb->getExit();
2562
2563    switch (op) {
2564    case nir_op_fabs:
2565    case nir_op_iabs:
2566    case nir_op_fadd:
2567    case nir_op_iadd:
2568    case nir_op_fand:
2569    case nir_op_iand:
2570    case nir_op_fceil:
2571    case nir_op_fcos:
2572    case nir_op_fddx:
2573    case nir_op_fddx_coarse:
2574    case nir_op_fddx_fine:
2575    case nir_op_fddy:
2576    case nir_op_fddy_coarse:
2577    case nir_op_fddy_fine:
2578    case nir_op_fdiv:
2579    case nir_op_idiv:
2580    case nir_op_udiv:
2581    case nir_op_fexp2:
2582    case nir_op_ffloor:
2583    case nir_op_ffma:
2584    case nir_op_flog2:
2585    case nir_op_fmax:
2586    case nir_op_imax:
2587    case nir_op_umax:
2588    case nir_op_fmin:
2589    case nir_op_imin:
2590    case nir_op_umin:
2591    case nir_op_fmod:
2592    case nir_op_imod:
2593    case nir_op_umod:
2594    case nir_op_fmul:
2595    case nir_op_imul:
2596    case nir_op_imul_high:
2597    case nir_op_umul_high:
2598    case nir_op_fneg:
2599    case nir_op_ineg:
2600    case nir_op_fnot:
2601    case nir_op_inot:
2602    case nir_op_for:
2603    case nir_op_ior:
2604    case nir_op_pack_64_2x32_split:
2605    case nir_op_fpow:
2606    case nir_op_frcp:
2607    case nir_op_frem:
2608    case nir_op_irem:
2609    case nir_op_frsq:
2610    case nir_op_fsat:
2611    case nir_op_ishr:
2612    case nir_op_ushr:
2613    case nir_op_fsin:
2614    case nir_op_fsqrt:
2615    case nir_op_fsub:
2616    case nir_op_isub:
2617    case nir_op_ftrunc:
2618    case nir_op_ishl:
2619    case nir_op_fxor:
2620    case nir_op_ixor: {
2621       DEFAULT_CHECKS;
2622       LValues &newDefs = convert(&insn->dest);
2623       operation preOp = preOperationNeeded(op);
2624       if (preOp != OP_NOP) {
2625          assert(info.num_inputs < 2);
2626          Value *tmp = getSSA(typeSizeof(dType));
2627          Instruction *i0 = mkOp(preOp, dType, tmp);
2628          Instruction *i1 = mkOp(getOperation(op), dType, newDefs[0]);
2629          if (info.num_inputs) {
2630             i0->setSrc(0, getSrc(&insn->src[0]));
2631             i1->setSrc(0, tmp);
2632          }
2633          i1->subOp = getSubOp(op);
2634       } else {
2635          Instruction *i = mkOp(getOperation(op), dType, newDefs[0]);
2636          for (unsigned s = 0u; s < info.num_inputs; ++s) {
2637             i->setSrc(s, getSrc(&insn->src[s]));
2638          }
2639          i->subOp = getSubOp(op);
2640       }
2641       break;
2642    }
2643    case nir_op_ifind_msb:
2644    case nir_op_ufind_msb: {
2645       DEFAULT_CHECKS;
2646       LValues &newDefs = convert(&insn->dest);
2647       dType = sTypes[0];
2648       mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2649       break;
2650    }
2651    case nir_op_fround_even: {
2652       DEFAULT_CHECKS;
2653       LValues &newDefs = convert(&insn->dest);
2654       mkCvt(OP_CVT, dType, newDefs[0], dType, getSrc(&insn->src[0]))->rnd = ROUND_NI;
2655       break;
2656    }
2657    // convert instructions
2658    case nir_op_f2f32:
2659    case nir_op_f2i32:
2660    case nir_op_f2u32:
2661    case nir_op_i2f32:
2662    case nir_op_i2i32:
2663    case nir_op_u2f32:
2664    case nir_op_u2u32:
2665    case nir_op_f2f64:
2666    case nir_op_f2i64:
2667    case nir_op_f2u64:
2668    case nir_op_i2f64:
2669    case nir_op_i2i64:
2670    case nir_op_u2f64:
2671    case nir_op_u2u64: {
2672       DEFAULT_CHECKS;
2673       LValues &newDefs = convert(&insn->dest);
2674       Instruction *i = mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2675       if (op == nir_op_f2i32 || op == nir_op_f2i64 || op == nir_op_f2u32 || op == nir_op_f2u64)
2676          i->rnd = ROUND_Z;
2677       i->sType = sTypes[0];
2678       break;
2679    }
2680    // compare instructions
2681    case nir_op_feq32:
2682    case nir_op_ieq32:
2683    case nir_op_fge32:
2684    case nir_op_ige32:
2685    case nir_op_uge32:
2686    case nir_op_flt32:
2687    case nir_op_ilt32:
2688    case nir_op_ult32:
2689    case nir_op_fne32:
2690    case nir_op_ine32: {
2691       DEFAULT_CHECKS;
2692       LValues &newDefs = convert(&insn->dest);
2693       Instruction *i = mkCmp(getOperation(op),
2694                              getCondCode(op),
2695                              dType,
2696                              newDefs[0],
2697                              dType,
2698                              getSrc(&insn->src[0]),
2699                              getSrc(&insn->src[1]));
2700       if (info.num_inputs == 3)
2701          i->setSrc(2, getSrc(&insn->src[2]));
2702       i->sType = sTypes[0];
2703       break;
2704    }
2705    // those are weird ALU ops and need special handling, because
2706    //   1. they are always componend based
2707    //   2. they basically just merge multiple values into one data type
2708    case nir_op_imov:
2709    case nir_op_fmov:
2710       if (!insn->dest.dest.is_ssa && insn->dest.dest.reg.reg->num_array_elems) {
2711          nir_reg_dest& reg = insn->dest.dest.reg;
2712          uint32_t goffset = regToLmemOffset[reg.reg->index];
2713          uint8_t comps = reg.reg->num_components;
2714          uint8_t size = reg.reg->bit_size / 8;
2715          uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2716          uint32_t aoffset = csize * reg.base_offset;
2717          Value *indirect = NULL;
2718
2719          if (reg.indirect)
2720             indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS),
2721                               getSrc(reg.indirect, 0), mkImm(csize));
2722
2723          for (uint8_t i = 0u; i < comps; ++i) {
2724             if (!((1u << i) & insn->dest.write_mask))
2725                continue;
2726
2727             Symbol *sym = mkSymbol(FILE_MEMORY_LOCAL, 0, dType, goffset + aoffset + i * size);
2728             mkStore(OP_STORE, dType, sym, indirect, getSrc(&insn->src[0], i));
2729          }
2730          break;
2731       } else if (!insn->src[0].src.is_ssa && insn->src[0].src.reg.reg->num_array_elems) {
2732          LValues &newDefs = convert(&insn->dest);
2733          nir_reg_src& reg = insn->src[0].src.reg;
2734          uint32_t goffset = regToLmemOffset[reg.reg->index];
2735          // uint8_t comps = reg.reg->num_components;
2736          uint8_t size = reg.reg->bit_size / 8;
2737          uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2738          uint32_t aoffset = csize * reg.base_offset;
2739          Value *indirect = NULL;
2740
2741          if (reg.indirect)
2742             indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS), getSrc(reg.indirect, 0), mkImm(csize));
2743
2744          for (uint8_t i = 0u; i < newDefs.size(); ++i)
2745             loadFrom(FILE_MEMORY_LOCAL, 0, dType, newDefs[i], goffset + aoffset, i, indirect);
2746
2747          break;
2748       } else {
2749          LValues &newDefs = convert(&insn->dest);
2750          for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2751             mkMov(newDefs[c], getSrc(&insn->src[0], c), dType);
2752          }
2753       }
2754       break;
2755    case nir_op_vec2:
2756    case nir_op_vec3:
2757    case nir_op_vec4: {
2758       LValues &newDefs = convert(&insn->dest);
2759       for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2760          mkMov(newDefs[c], getSrc(&insn->src[c]), dType);
2761       }
2762       break;
2763    }
2764    // (un)pack
2765    case nir_op_pack_64_2x32: {
2766       LValues &newDefs = convert(&insn->dest);
2767       Instruction *merge = mkOp(OP_MERGE, dType, newDefs[0]);
2768       merge->setSrc(0, getSrc(&insn->src[0], 0));
2769       merge->setSrc(1, getSrc(&insn->src[0], 1));
2770       break;
2771    }
2772    case nir_op_pack_half_2x16_split: {
2773       LValues &newDefs = convert(&insn->dest);
2774       Value *tmpH = getSSA();
2775       Value *tmpL = getSSA();
2776
2777       mkCvt(OP_CVT, TYPE_F16, tmpL, TYPE_F32, getSrc(&insn->src[0]));
2778       mkCvt(OP_CVT, TYPE_F16, tmpH, TYPE_F32, getSrc(&insn->src[1]));
2779       mkOp3(OP_INSBF, TYPE_U32, newDefs[0], tmpH, mkImm(0x1010), tmpL);
2780       break;
2781    }
2782    case nir_op_unpack_half_2x16_split_x:
2783    case nir_op_unpack_half_2x16_split_y: {
2784       LValues &newDefs = convert(&insn->dest);
2785       Instruction *cvt = mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, getSrc(&insn->src[0]));
2786       if (op == nir_op_unpack_half_2x16_split_y)
2787          cvt->subOp = 1;
2788       break;
2789    }
2790    case nir_op_unpack_64_2x32: {
2791       LValues &newDefs = convert(&insn->dest);
2792       mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, newDefs[1]);
2793       break;
2794    }
2795    case nir_op_unpack_64_2x32_split_x: {
2796       LValues &newDefs = convert(&insn->dest);
2797       mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, getSSA());
2798       break;
2799    }
2800    case nir_op_unpack_64_2x32_split_y: {
2801       LValues &newDefs = convert(&insn->dest);
2802       mkOp1(OP_SPLIT, dType, getSSA(), getSrc(&insn->src[0]))->setDef(1, newDefs[0]);
2803       break;
2804    }
2805    // special instructions
2806    case nir_op_fsign:
2807    case nir_op_isign: {
2808       DEFAULT_CHECKS;
2809       DataType iType;
2810       if (::isFloatType(dType))
2811          iType = TYPE_F32;
2812       else
2813          iType = TYPE_S32;
2814
2815       LValues &newDefs = convert(&insn->dest);
2816       LValue *val0 = getScratch();
2817       LValue *val1 = getScratch();
2818       mkCmp(OP_SET, CC_GT, iType, val0, dType, getSrc(&insn->src[0]), zero);
2819       mkCmp(OP_SET, CC_LT, iType, val1, dType, getSrc(&insn->src[0]), zero);
2820
2821       if (dType == TYPE_F64) {
2822          mkOp2(OP_SUB, iType, val0, val0, val1);
2823          mkCvt(OP_CVT, TYPE_F64, newDefs[0], iType, val0);
2824       } else if (dType == TYPE_S64 || dType == TYPE_U64) {
2825          mkOp2(OP_SUB, iType, val0, val1, val0);
2826          mkOp2(OP_SHR, iType, val1, val0, loadImm(NULL, 31));
2827          mkOp2(OP_MERGE, dType, newDefs[0], val0, val1);
2828       } else if (::isFloatType(dType))
2829          mkOp2(OP_SUB, iType, newDefs[0], val0, val1);
2830       else
2831          mkOp2(OP_SUB, iType, newDefs[0], val1, val0);
2832       break;
2833    }
2834    case nir_op_fcsel:
2835    case nir_op_b32csel: {
2836       DEFAULT_CHECKS;
2837       LValues &newDefs = convert(&insn->dest);
2838       mkCmp(OP_SLCT, CC_NE, dType, newDefs[0], sTypes[0], getSrc(&insn->src[1]), getSrc(&insn->src[2]), getSrc(&insn->src[0]));
2839       break;
2840    }
2841    case nir_op_ibitfield_extract:
2842    case nir_op_ubitfield_extract: {
2843       DEFAULT_CHECKS;
2844       Value *tmp = getSSA();
2845       LValues &newDefs = convert(&insn->dest);
2846       mkOp3(OP_INSBF, dType, tmp, getSrc(&insn->src[2]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2847       mkOp2(OP_EXTBF, dType, newDefs[0], getSrc(&insn->src[0]), tmp);
2848       break;
2849    }
2850    case nir_op_bfm: {
2851       DEFAULT_CHECKS;
2852       LValues &newDefs = convert(&insn->dest);
2853       mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2854       break;
2855    }
2856    case nir_op_bitfield_insert: {
2857       DEFAULT_CHECKS;
2858       LValues &newDefs = convert(&insn->dest);
2859       LValue *temp = getSSA();
2860       mkOp3(OP_INSBF, TYPE_U32, temp, getSrc(&insn->src[3]), mkImm(0x808), getSrc(&insn->src[2]));
2861       mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[1]), temp, getSrc(&insn->src[0]));
2862       break;
2863    }
2864    case nir_op_bit_count: {
2865       DEFAULT_CHECKS;
2866       LValues &newDefs = convert(&insn->dest);
2867       mkOp2(OP_POPCNT, dType, newDefs[0], getSrc(&insn->src[0]), getSrc(&insn->src[0]));
2868       break;
2869    }
2870    case nir_op_bitfield_reverse: {
2871       DEFAULT_CHECKS;
2872       LValues &newDefs = convert(&insn->dest);
2873       mkOp2(OP_EXTBF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2874       break;
2875    }
2876    case nir_op_find_lsb: {
2877       DEFAULT_CHECKS;
2878       LValues &newDefs = convert(&insn->dest);
2879       Value *tmp = getSSA();
2880       mkOp2(OP_EXTBF, TYPE_U32, tmp, getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2881       mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
2882       break;
2883    }
2884    // boolean conversions
2885    case nir_op_b2f32: {
2886       DEFAULT_CHECKS;
2887       LValues &newDefs = convert(&insn->dest);
2888       mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1.0f));
2889       break;
2890    }
2891    case nir_op_b2f64: {
2892       DEFAULT_CHECKS;
2893       LValues &newDefs = convert(&insn->dest);
2894       Value *tmp = getSSA(4);
2895       mkOp2(OP_AND, TYPE_U32, tmp, getSrc(&insn->src[0]), loadImm(NULL, 0x3ff00000));
2896       mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
2897       break;
2898    }
2899    case nir_op_f2b32:
2900    case nir_op_i2b32: {
2901       DEFAULT_CHECKS;
2902       LValues &newDefs = convert(&insn->dest);
2903       Value *src1;
2904       if (typeSizeof(sTypes[0]) == 8) {
2905          src1 = loadImm(getSSA(8), 0.0);
2906       } else {
2907          src1 = zero;
2908       }
2909       CondCode cc = op == nir_op_f2b32 ? CC_NEU : CC_NE;
2910       mkCmp(OP_SET, cc, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), src1);
2911       break;
2912    }
2913    case nir_op_b2i32: {
2914       DEFAULT_CHECKS;
2915       LValues &newDefs = convert(&insn->dest);
2916       mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1));
2917       break;
2918    }
2919    case nir_op_b2i64: {
2920       DEFAULT_CHECKS;
2921       LValues &newDefs = convert(&insn->dest);
2922       LValue *def = getScratch();
2923       mkOp2(OP_AND, TYPE_U32, def, getSrc(&insn->src[0]), loadImm(NULL, 1));
2924       mkOp2(OP_MERGE, TYPE_S64, newDefs[0], def, loadImm(NULL, 0));
2925       break;
2926    }
2927    default:
2928       ERROR("unknown nir_op %s\n", info.name);
2929       return false;
2930    }
2931
2932    if (!oldPos) {
2933       oldPos = this->bb->getEntry();
2934       oldPos->precise = insn->exact;
2935    }
2936
2937    if (unlikely(!oldPos))
2938       return true;
2939
2940    while (oldPos->next) {
2941       oldPos = oldPos->next;
2942       oldPos->precise = insn->exact;
2943    }
2944    oldPos->saturate = insn->dest.saturate;
2945
2946    return true;
2947 }
2948 #undef DEFAULT_CHECKS
2949
2950 bool
2951 Converter::visit(nir_ssa_undef_instr *insn)
2952 {
2953    LValues &newDefs = convert(&insn->def);
2954    for (uint8_t i = 0u; i < insn->def.num_components; ++i) {
2955       mkOp(OP_NOP, TYPE_NONE, newDefs[i]);
2956    }
2957    return true;
2958 }
2959
2960 #define CASE_SAMPLER(ty) \
2961    case GLSL_SAMPLER_DIM_ ## ty : \
2962       if (isArray && !isShadow) \
2963          return TEX_TARGET_ ## ty ## _ARRAY; \
2964       else if (!isArray && isShadow) \
2965          return TEX_TARGET_## ty ## _SHADOW; \
2966       else if (isArray && isShadow) \
2967          return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
2968       else \
2969          return TEX_TARGET_ ## ty
2970
2971 TexTarget
2972 Converter::convert(glsl_sampler_dim dim, bool isArray, bool isShadow)
2973 {
2974    switch (dim) {
2975    CASE_SAMPLER(1D);
2976    CASE_SAMPLER(2D);
2977    CASE_SAMPLER(CUBE);
2978    case GLSL_SAMPLER_DIM_3D:
2979       return TEX_TARGET_3D;
2980    case GLSL_SAMPLER_DIM_MS:
2981       if (isArray)
2982          return TEX_TARGET_2D_MS_ARRAY;
2983       return TEX_TARGET_2D_MS;
2984    case GLSL_SAMPLER_DIM_RECT:
2985       if (isShadow)
2986          return TEX_TARGET_RECT_SHADOW;
2987       return TEX_TARGET_RECT;
2988    case GLSL_SAMPLER_DIM_BUF:
2989       return TEX_TARGET_BUFFER;
2990    case GLSL_SAMPLER_DIM_EXTERNAL:
2991       return TEX_TARGET_2D;
2992    default:
2993       ERROR("unknown glsl_sampler_dim %u\n", dim);
2994       assert(false);
2995       return TEX_TARGET_COUNT;
2996    }
2997 }
2998 #undef CASE_SAMPLER
2999
3000 Value*
3001 Converter::applyProjection(Value *src, Value *proj)
3002 {
3003    if (!proj)
3004       return src;
3005    return mkOp2v(OP_MUL, TYPE_F32, getScratch(), src, proj);
3006 }
3007
3008 unsigned int
3009 Converter::getNIRArgCount(TexInstruction::Target& target)
3010 {
3011    unsigned int result = target.getArgCount();
3012    if (target.isCube() && target.isArray())
3013       result--;
3014    if (target.isMS())
3015       result--;
3016    return result;
3017 }
3018
3019 uint16_t
3020 Converter::handleDeref(nir_deref_instr *deref, Value * &indirect, const nir_variable * &tex)
3021 {
3022    typedef std::pair<uint32_t,Value*> DerefPair;
3023    std::list<DerefPair> derefs;
3024
3025    uint16_t result = 0;
3026    while (deref->deref_type != nir_deref_type_var) {
3027       switch (deref->deref_type) {
3028       case nir_deref_type_array: {
3029          Value *indirect;
3030          uint8_t size = type_size(deref->type);
3031          result += size * getIndirect(&deref->arr.index, 0, indirect);
3032
3033          if (indirect) {
3034             derefs.push_front(std::make_pair(size, indirect));
3035          }
3036
3037          break;
3038       }
3039       case nir_deref_type_struct: {
3040          result += nir_deref_instr_parent(deref)->type->struct_location_offset(deref->strct.index);
3041          break;
3042       }
3043       case nir_deref_type_var:
3044       default:
3045          unreachable("nir_deref_type_var reached in handleDeref!");
3046          break;
3047       }
3048       deref = nir_deref_instr_parent(deref);
3049    }
3050
3051    indirect = NULL;
3052    for (std::list<DerefPair>::const_iterator it = derefs.begin(); it != derefs.end(); ++it) {
3053       Value *offset = mkOp2v(OP_MUL, TYPE_U32, getSSA(), loadImm(getSSA(), it->first), it->second);
3054       if (indirect)
3055          indirect = mkOp2v(OP_ADD, TYPE_U32, getSSA(), indirect, offset);
3056       else
3057          indirect = offset;
3058    }
3059
3060    tex = nir_deref_instr_get_variable(deref);
3061    assert(tex);
3062
3063    return result + tex->data.driver_location;
3064 }
3065
3066 CacheMode
3067 Converter::getCacheModeFromVar(const nir_variable *var)
3068 {
3069    if (var->data.image.access == ACCESS_VOLATILE)
3070       return CACHE_CV;
3071    if (var->data.image.access == ACCESS_COHERENT)
3072       return CACHE_CG;
3073    return CACHE_CA;
3074 }
3075
3076 bool
3077 Converter::visit(nir_tex_instr *insn)
3078 {
3079    switch (insn->op) {
3080    case nir_texop_lod:
3081    case nir_texop_query_levels:
3082    case nir_texop_tex:
3083    case nir_texop_texture_samples:
3084    case nir_texop_tg4:
3085    case nir_texop_txb:
3086    case nir_texop_txd:
3087    case nir_texop_txf:
3088    case nir_texop_txf_ms:
3089    case nir_texop_txl:
3090    case nir_texop_txs: {
3091       LValues &newDefs = convert(&insn->dest);
3092       std::vector<Value*> srcs;
3093       std::vector<Value*> defs;
3094       std::vector<nir_src*> offsets;
3095       uint8_t mask = 0;
3096       bool lz = false;
3097       Value *proj = NULL;
3098       TexInstruction::Target target = convert(insn->sampler_dim, insn->is_array, insn->is_shadow);
3099       operation op = getOperation(insn->op);
3100
3101       int r, s;
3102       int biasIdx = nir_tex_instr_src_index(insn, nir_tex_src_bias);
3103       int compIdx = nir_tex_instr_src_index(insn, nir_tex_src_comparator);
3104       int coordsIdx = nir_tex_instr_src_index(insn, nir_tex_src_coord);
3105       int ddxIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddx);
3106       int ddyIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddy);
3107       int msIdx = nir_tex_instr_src_index(insn, nir_tex_src_ms_index);
3108       int lodIdx = nir_tex_instr_src_index(insn, nir_tex_src_lod);
3109       int offsetIdx = nir_tex_instr_src_index(insn, nir_tex_src_offset);
3110       int projIdx = nir_tex_instr_src_index(insn, nir_tex_src_projector);
3111       int sampOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_offset);
3112       int texOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_offset);
3113
3114       if (projIdx != -1)
3115          proj = mkOp1v(OP_RCP, TYPE_F32, getScratch(), getSrc(&insn->src[projIdx].src, 0));
3116
3117       srcs.resize(insn->coord_components);
3118       for (uint8_t i = 0u; i < insn->coord_components; ++i)
3119          srcs[i] = applyProjection(getSrc(&insn->src[coordsIdx].src, i), proj);
3120
3121       // sometimes we get less args than target.getArgCount, but codegen expects the latter
3122       if (insn->coord_components) {
3123          uint32_t argCount = target.getArgCount();
3124
3125          if (target.isMS())
3126             argCount -= 1;
3127
3128          for (uint32_t i = 0u; i < (argCount - insn->coord_components); ++i)
3129             srcs.push_back(getSSA());
3130       }
3131
3132       if (insn->op == nir_texop_texture_samples)
3133          srcs.push_back(zero);
3134       else if (!insn->num_srcs)
3135          srcs.push_back(loadImm(NULL, 0));
3136       if (biasIdx != -1)
3137          srcs.push_back(getSrc(&insn->src[biasIdx].src, 0));
3138       if (lodIdx != -1)
3139          srcs.push_back(getSrc(&insn->src[lodIdx].src, 0));
3140       else if (op == OP_TXF)
3141          lz = true;
3142       if (msIdx != -1)
3143          srcs.push_back(getSrc(&insn->src[msIdx].src, 0));
3144       if (offsetIdx != -1)
3145          offsets.push_back(&insn->src[offsetIdx].src);
3146       if (compIdx != -1)
3147          srcs.push_back(applyProjection(getSrc(&insn->src[compIdx].src, 0), proj));
3148       if (texOffIdx != -1) {
3149          srcs.push_back(getSrc(&insn->src[texOffIdx].src, 0));
3150          texOffIdx = srcs.size() - 1;
3151       }
3152       if (sampOffIdx != -1) {
3153          srcs.push_back(getSrc(&insn->src[sampOffIdx].src, 0));
3154          sampOffIdx = srcs.size() - 1;
3155       }
3156
3157       r = insn->texture_index;
3158       s = insn->sampler_index;
3159
3160       defs.resize(newDefs.size());
3161       for (uint8_t d = 0u; d < newDefs.size(); ++d) {
3162          defs[d] = newDefs[d];
3163          mask |= 1 << d;
3164       }
3165       if (target.isMS() || (op == OP_TEX && prog->getType() != Program::TYPE_FRAGMENT))
3166          lz = true;
3167
3168       TexInstruction *texi = mkTex(op, target.getEnum(), r, s, defs, srcs);
3169       texi->tex.levelZero = lz;
3170       texi->tex.mask = mask;
3171
3172       if (texOffIdx != -1)
3173          texi->tex.rIndirectSrc = texOffIdx;
3174       if (sampOffIdx != -1)
3175          texi->tex.sIndirectSrc = sampOffIdx;
3176
3177       switch (insn->op) {
3178       case nir_texop_tg4:
3179          if (!target.isShadow())
3180             texi->tex.gatherComp = insn->component;
3181          break;
3182       case nir_texop_txs:
3183          texi->tex.query = TXQ_DIMS;
3184          break;
3185       case nir_texop_texture_samples:
3186          texi->tex.mask = 0x4;
3187          texi->tex.query = TXQ_TYPE;
3188          break;
3189       case nir_texop_query_levels:
3190          texi->tex.mask = 0x8;
3191          texi->tex.query = TXQ_DIMS;
3192          break;
3193       default:
3194          break;
3195       }
3196
3197       texi->tex.useOffsets = offsets.size();
3198       if (texi->tex.useOffsets) {
3199          for (uint8_t s = 0; s < texi->tex.useOffsets; ++s) {
3200             for (uint32_t c = 0u; c < 3; ++c) {
3201                uint8_t s2 = std::min(c, target.getDim() - 1);
3202                texi->offset[s][c].set(getSrc(offsets[s], s2));
3203                texi->offset[s][c].setInsn(texi);
3204             }
3205          }
3206       }
3207
3208       if (ddxIdx != -1 && ddyIdx != -1) {
3209          for (uint8_t c = 0u; c < target.getDim() + target.isCube(); ++c) {
3210             texi->dPdx[c].set(getSrc(&insn->src[ddxIdx].src, c));
3211             texi->dPdy[c].set(getSrc(&insn->src[ddyIdx].src, c));
3212          }
3213       }
3214
3215       break;
3216    }
3217    default:
3218       ERROR("unknown nir_texop %u\n", insn->op);
3219       return false;
3220    }
3221    return true;
3222 }
3223
3224 bool
3225 Converter::visit(nir_deref_instr *deref)
3226 {
3227    // we just ignore those, because images intrinsics are the only place where
3228    // we should end up with deref sources and those have to backtrack anyway
3229    // to get the nir_variable. This code just exists to handle some special
3230    // cases.
3231    switch (deref->deref_type) {
3232    case nir_deref_type_array:
3233    case nir_deref_type_struct:
3234    case nir_deref_type_var:
3235       break;
3236    default:
3237       ERROR("unknown nir_deref_instr %u\n", deref->deref_type);
3238       return false;
3239    }
3240    return true;
3241 }
3242
3243 bool
3244 Converter::run()
3245 {
3246    bool progress;
3247
3248    if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
3249       nir_print_shader(nir, stderr);
3250
3251    struct nir_lower_subgroups_options subgroup_options = {
3252       .subgroup_size = 32,
3253       .ballot_bit_size = 32,
3254    };
3255
3256    NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, (nir_lower_io_options)0);
3257    NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
3258    NIR_PASS_V(nir, nir_lower_regs_to_ssa);
3259    NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
3260    NIR_PASS_V(nir, nir_lower_vars_to_ssa);
3261    NIR_PASS_V(nir, nir_lower_alu_to_scalar);
3262    NIR_PASS_V(nir, nir_lower_phis_to_scalar);
3263
3264    do {
3265       progress = false;
3266       NIR_PASS(progress, nir, nir_copy_prop);
3267       NIR_PASS(progress, nir, nir_opt_remove_phis);
3268       NIR_PASS(progress, nir, nir_opt_trivial_continues);
3269       NIR_PASS(progress, nir, nir_opt_cse);
3270       NIR_PASS(progress, nir, nir_opt_algebraic);
3271       NIR_PASS(progress, nir, nir_opt_constant_folding);
3272       NIR_PASS(progress, nir, nir_copy_prop);
3273       NIR_PASS(progress, nir, nir_opt_dce);
3274       NIR_PASS(progress, nir, nir_opt_dead_cf);
3275    } while (progress);
3276
3277    NIR_PASS_V(nir, nir_lower_bool_to_int32);
3278    NIR_PASS_V(nir, nir_lower_locals_to_regs);
3279    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp);
3280    NIR_PASS_V(nir, nir_convert_from_ssa, true);
3281
3282    // Garbage collect dead instructions
3283    nir_sweep(nir);
3284
3285    if (!parseNIR()) {
3286       ERROR("Couldn't prase NIR!\n");
3287       return false;
3288    }
3289
3290    if (!assignSlots()) {
3291       ERROR("Couldn't assign slots!\n");
3292       return false;
3293    }
3294
3295    if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
3296       nir_print_shader(nir, stderr);
3297
3298    nir_foreach_function(function, nir) {
3299       if (!visit(function))
3300          return false;
3301    }
3302
3303    return true;
3304 }
3305
3306 } // unnamed namespace
3307
3308 namespace nv50_ir {
3309
3310 bool
3311 Program::makeFromNIR(struct nv50_ir_prog_info *info)
3312 {
3313    nir_shader *nir = (nir_shader*)info->bin.source;
3314    Converter converter(this, nir, info);
3315    bool result = converter.run();
3316    if (!result)
3317       return result;
3318    LoweringHelper lowering;
3319    lowering.run(this);
3320    tlsSize = info->bin.tlsSpace;
3321    return result;
3322 }
3323
3324 } // namespace nv50_ir